Interesting era distribution. Am I missing something? I was expecting to have continous era. Or maybe the numbers does not matter? I expected a longer train era.
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
from plotly.subplots import make_subplots
pio.renderers.default = “browser”
training_data = pd.read_csv(“numerai_training_data.csv”)
tournament_data = pd.read_csv(“numerai_tournament_data.csv”)
df = pd.concat([training_data, tournament_data],ignore_index=True)
df[‘era_value’] = df[‘era’].str[3:]
df.loc[df[‘era’] == ‘eraX’, ‘era’] = ‘era0’
df[‘era_value’] = df[‘era’].str[3:].astype(int)
max_era = df[‘era_value’].max()
print(max_era)
df.loc[df[‘era_value’] == 0, ‘era_value’] = max_era + 1
fig = px.scatter(df, x=“era_value”, y=“data_type”, color=“data_type”,
title=“Interesting era distribution”)
fig.show()