Here is some code showing how you can use PyTorch to create custom objective functions for XGBoost. Objective functions for XGBoost must return a gradient and the diagonal of the Hessian (i.e. matrix of second derivatives). Internally XGBoost uses the Hessian diagonal to rescale the gradient. The Hessian is very expensive to compute, so we replace it with all ones. This basically forces XGBoost to do standard gradient descent rather than the fancier second order version it usually uses. It works fine, but makes it a bit more sensitive to step size, so watch things carefully. Below we make a function to use Adjusted Sharpe as a cost function for XGBoost. Because the Adjusted Sharpe calculation is not defined for a constant initial condition, we first fit a model using the standard least squares cost-function and then start from there (i.e. the base margin) and fit additional trees to improve the in-sample adjusted Sharpe. This should be enough to get you going, have fun!
import numpy as np
import pandas as pd
from xgboost import XGBRegressor
import torch
from torch.autograd import grad
trainval=pd.read_parquet("numerai_training_validation_target_nomi.parquet")
train = trainval[trainval.data_type=='train']
target = "target_nomi"
feature_columns = [c for c in trainval if c.startswith("feature")]
# fit an initial model
model_init = XGBRegressor(max_depth=5, learning_rate=0.01, n_estimators=2000, colsample_bytree=0.1, nthread=6)
model_init.fit(train[feature_columns], train[target])
# get prediction from initial model as starting point to improve upon
base_margin = model_init.predict(train[feature_columns])
# get indexes for each era
era_idx = [np.where(train.era==uera)[0] for uera in train.era.unique()]
# define adjusted sharpe in terms of cost adjusted numerai sharpe
def numerai_sharpe(x):
return (x.mean() -0.010415154) / x.std()
def skew(x):
mx = x.mean()
m2 = ((x-mx)**2).mean()
m3 = ((x-mx)**3).mean()
return m3/(m2**1.5)
def kurtosis(x):
mx = x.mean()
m4 = ((x-mx)**4).mean()
m2 = ((x-mx)**2).mean()
return (m4/(m2**2))-3
def adj_sharpe(x):
return numerai_sharpe(x) * (1 + ((skew(x) / 6) * numerai_sharpe(x)) - ((kurtosis(x) / 24) * (numerai_sharpe(x) ** 2)))
# use correlation as the measure of fit
def corr(pred, target):
pred_n = pred - pred.mean(dim=0)
pred_n = pred_n / pred_n.norm(dim=0)
target_n = target - target.mean(dim=0)
target_n = target_n / target_n.norm(dim=0)
l = torch.matmul(pred_n.T, target_n)
return l
# definte a custom objective for XGBoost
def adj_sharpe_obj(ytrue, ypred):
# convert to pytorch tensors
ypred_th = torch.tensor(ypred, requires_grad=True)
ytrue_th = torch.tensor(ytrue)
all_corrs = []
# get correlations in each era
for ee in era_idx:
score = corr(ypred_th[ee], ytrue_th[ee])
all_corrs.append(score)
all_corrs = torch.stack(all_corrs)
# calculate adjusted sharpe using correlations
loss = -adj_sharpe(all_corrs)
print(f'Current loss:{loss}')
# calculate gradient and convert to numpy
loss_grads = grad(loss, ypred_th, create_graph=True)[0]
loss_grads = loss_grads.detach().numpy()
# return gradient and ones instead of Hessian diagonal
return loss_grads, np.ones(loss_grads.shape)
model_adj_sharpe = XGBRegressor(max_depth=5, learning_rate=0.01, n_estimators=200, nthread=6, colsample_bytree=0.1, objective=adj_sharpe_obj)
model_adj_sharpe.fit(train[feature_columns], train[target], base_margin=base_margin)