Think I came up with an implementation that would work on GPU as uses PyTorch. However, reading this post True Contribution Details, exposure dissimilarity seems to be relevant just combined with FNCv3 on a multiplicative way so it might not make sense to use it without it.
Any feedback is more than welcome!
for f in feature_cols:
train_data[f] -= 0.5
for epoch in range(epochs):
np.random.shuffle(era_list)
batch_count = 0
acc_loss_train = 0
for era in era_list:
batch_count += 1
# get features and target from data and put in tensors
features = torch.tensor(train_data[train_data.erano == era].filter(like='feature').values)
target = torch.tensor(train_data[train_data.erano == era]['target'])
# zero gradient buffer and get model output
optimizer.zero_grad()
model.train()
model_output = model(features)
orig_loss = -numerair_tb(model_output, target)
#dissimilarity
train_era = train_data[train_data.erano == era]
example_preds = torch.as_tensor(train_era['example_preds'].values) #Needs to be created previously
example_preds = example_preds - example_preds.mean()
corr_example_preds = (features.T * example_preds).sum(dim=1) / ((features.T * features.T).sum(dim=1) * (example_preds * example_preds).sum()).sqrt()
preds = model_output
preds = preds - preds.mean()
corr_preds = (features.T * preds).sum(dim=1) / ((features.T * features.T).sum(dim=1) * (preds * preds).sum()).sqrt()
num = corr_preds.pinverse(rcond=1e-6).dot(corr_example_preds)
denom = corr_example_preds.pinverse(rcond=1e-6).dot(corr_example_preds)
dissimilarity = (num/denom).sum()
#final loss
loss = - orig_loss + dissimilarity
acc_loss_train += loss
loss.backward()
optimizer.step()
loss_train = acc_loss_train / batch_count