I have been playing around with tournament data for a while and I thought to share the script I use to download the data in case someone is interested. It is easy but who can be bothered to check the API? So here is the script.
.
from numerapi import NumerAPI
import pandas as pd
import json
napi = NumerAPI(
# public_id='',
# secret_key='',
verbosity="info")
START_ROUND = 280
END_ROUND = 333
TOURNAMENT = 8
query = """
query($roundNumber: Int!, $tournament: Int!) {
roundDetails (roundNumber: $roundNumber, tournament: $tournament) {
roundNumber
tournament
roundTarget
status
totalStakes
totalAtStake
totalPayout
payoutFactor
models {
modelName
selectedStakeValue
tc
correlation
corr60
fnc
fncV3
mmc
}
}
}
"""
allPerfs = []
rounds = []
for round_num in range(START_ROUND, END_ROUND+1):
print("roundNumber ", round_num)
arguments = {'roundNumber': round_num, 'tournament': TOURNAMENT}
roundDetails = napi.raw_query(query, arguments)['data']['roundDetails']
perf = pd.DataFrame(roundDetails['models'])
perf['roundNumber'] = round_num
perf.to_csv(f'round-{round_num}.csv', index=False)
allPerfs.append(perf)
# force type inference (infer_dtype() or convert_dtype() don't seem to work
# and I cannot be bothered to find out why)
perf = pd.read_csv(f'round-{round_num}.csv')
r = {k: v for k, v in roundDetails.items() if k != 'models'}
#perf = perf[perf.selectedStakeValue > 0]
r['stake.mean'] = perf.selectedStakeValue.mean()
r['stake.median'] = perf.selectedStakeValue.median()
r['tc.mean'] = perf.tc.mean()
r['tc.median'] = perf.tc.median()
r['correlation.mean'] = perf.correlation.mean()
r['correlation.median'] = perf.correlation.median()
r['tcVScorr'] = perf.tc.corr(perf.correlation)
rounds.append(r)
pd.DataFrame(rounds).to_csv(f'rounds.csv', index=False)
pd.DataFrame(pd.concat(allPerfs).dropna(how='any')).to_csv(f'round-{START_ROUND}-{END_ROUND}.csv', index=False)