forked from Mcompetitions/M6-methods
-
Notifications
You must be signed in to change notification settings - Fork 0
/
RPS and IR calculation.py
172 lines (125 loc) · 8.85 KB
/
RPS and IR calculation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
###########################################################################
# Code for computing the RPS and IR scores for a given evaluation period
###########################################################################
#For simplicity, in this example it is assumed that the data provided cover a single evaluation period.
#This period is specified through the min/max date of the asset prices data set.
#If you wish to compute RPS/IR for multiple periods, you'll have to execute
#the script multiple times, each time using a different, appropriate input.
import pandas as pd
import numpy as np
from statistics import stdev
#Read asset prices data (as provided by the M6 submission platform)
asset_data = pd.read_csv("assets_m6.csv")
#Read submission file (similar to the template provided by the M6 submission platform)
submission_data = pd.read_csv("template.csv")
hist_data = asset_data
submission = submission_data
#Function for computing RPS
def RPS_calculation(hist_data, submission, asset_no=100):
if hist_data.shape[0]<=asset_no:
return np.nan
asset_id = pd.unique(hist_data.symbol)
for i in range(len(pd.unique(hist_data.date))):
if len(hist_data[hist_data.date == pd.unique(hist_data.date)[i]])<len(asset_id):
for asset in [x for x in asset_id if x not in hist_data[hist_data.date == pd.unique(hist_data.date)[i]].symbol.values]:
right_price = hist_data[hist_data.symbol==asset].sort_values(by='date')
right_price = right_price[right_price.date <= pd.unique(hist_data.date)[i]]
right_price = right_price.price.iloc[-1]
hist_data = hist_data.append({'date' : pd.unique(hist_data.date)[i],
'symbol' : asset,
'price' : right_price}, ignore_index=True)
#Compute percentage returns
asset_id = sorted(asset_id)
#Compute percentage returns
returns = pd.DataFrame(columns = ["ID", "Return"])
min_date = min(hist_data.date)
max_date = max(hist_data.date)
for i in range(0,len(asset_id)):
temp = hist_data.loc[hist_data.symbol==asset_id[i]]
open_price = float(temp.loc[temp.date==min_date].price)
close_price = float(temp.loc[temp.date==max_date].price)
returns = returns.append({'ID': temp.symbol.iloc[0],
'Return': (close_price - open_price)/open_price}, ignore_index=True)
#Define the relevant position of each asset
ranking = pd.DataFrame(columns=["ID", "Position", "Return"])
ranking.ID = list(asset_id)
ranking.Return = returns.Return
ranking.Position = ranking.Return.rank(method = 'min')
#Handle Ties
Series_per_position = pd.DataFrame(columns=["Position","Series", "Rank", "Rank1", "Rank2","Rank3", "Rank4", "Rank5"])
Series_per_position.Position = list(pd.unique(ranking.Position.sort_values(ascending=True)))
temp = ranking.Position.value_counts()
temp = pd.DataFrame(zip(temp.index, temp), columns = ["Rank", "Occurencies"])
temp = temp.sort_values(by = ["Rank"],ascending=True)
Series_per_position.Series = list(temp.Occurencies)
Series_per_position
total_ranks = Series_per_position.Position.values[-1]
for i in range(0,Series_per_position.shape[0]):
start_p = Series_per_position.Position[i]
end_p = Series_per_position.Position[i] + Series_per_position.Series[i]
temp = pd.DataFrame(columns = ["Position","Rank", "Rank1", "Rank2", "Rank3", "Rank4","Rank5"])
temp.Position = list(range(int(start_p),int(end_p)))
if(temp.loc[temp.Position.isin(list(range(1,int(0.2*total_ranks+1))))].empty==False):
temp.loc[temp.Position.isin(list(range(1,int(0.2*total_ranks+1))))] = temp.loc[temp.Position.isin(list(range(1,int(0.2*total_ranks+1))))].assign(Rank=1)
temp.loc[temp.Position.isin(list(range(1,int(0.2*total_ranks+1))))] = temp.loc[temp.Position.isin(list(range(1,int(0.2*total_ranks+1))))].assign(Rank1=1.0)
elif(temp.loc[temp.Position.isin(list(range(int(0.2*total_ranks+1),int(0.4*total_ranks+1))))].empty==False):
temp.loc[temp.Position.isin(list(range(int(0.2*total_ranks+1),int(0.4*total_ranks+1))))] = temp.loc[temp.Position.isin(list(range(int(0.2*total_ranks+1),int(0.4*total_ranks+1))))].assign(Rank=2)
temp.loc[temp.Position.isin(list(range(int(0.2*total_ranks+1),int(0.4*total_ranks+1))))] = temp.loc[temp.Position.isin(list(range(int(0.2*total_ranks+1),int(0.4*total_ranks+1))))].assign(Rank2=1.0)
elif(temp.loc[temp.Position.isin(list(range(int(0.4*total_ranks+1),int(0.6*total_ranks+1))))].empty==False):
temp.loc[temp.Position.isin(list(range(int(0.4*total_ranks+1),int(0.6*total_ranks+1))))] = temp.loc[temp.Position.isin(list(range(int(0.4*total_ranks+1),int(0.6*total_ranks+1))))].assign(Rank=3)
temp.loc[temp.Position.isin(list(range(int(0.4*total_ranks+1),int(0.6*total_ranks+1))))] = temp.loc[temp.Position.isin(list(range(int(0.4*total_ranks+1),int(0.6*total_ranks+1))))].assign(Rank3=1.0)
elif(temp.loc[temp.Position.isin(list(range(int(0.6*total_ranks+1),int(0.8*total_ranks+1))))].empty==False):
temp.loc[temp.Position.isin(list(range(int(0.6*total_ranks+1),int(0.8*total_ranks+1))))] = temp.loc[temp.Position.isin(list(range(int(0.6*total_ranks+1),int(0.8*total_ranks+1))))].assign(Rank=4)
temp.loc[temp.Position.isin(list(range(int(0.6*total_ranks+1),int(0.8*total_ranks+1))))] = temp.loc[temp.Position.isin(list(range(int(0.6*total_ranks+1),int(0.8*total_ranks+1))))].assign(Rank4=1.0)
elif(temp.loc[temp.Position.isin(list(range(int(0.8*total_ranks+1),int(total_ranks+1))))].empty==False):
temp.loc[temp.Position.isin(list(range(int(0.8*total_ranks+1),int(total_ranks+1))))] = temp.loc[temp.Position.isin(list(range(int(0.8*total_ranks+1),int(total_ranks+1))))].assign(Rank=5)
temp.loc[temp.Position.isin(list(range(int(0.8*total_ranks+1),int(total_ranks+1))))] = temp.loc[temp.Position.isin(list(range(int(0.8*total_ranks+1),int(total_ranks+1))))].assign(Rank5=1.0)
temp = temp.fillna(0)
Series_per_position.iloc[i,2:Series_per_position.shape[1]] = temp.mean(axis = 0).iloc[1:temp.shape[1]]
Series_per_position = Series_per_position.drop('Series', axis = 1)
ranking = pd.merge(ranking,Series_per_position, on = "Position")
ranking = ranking[["ID", "Return", "Position", "Rank", "Rank1", "Rank2", "Rank3", "Rank4", "Rank5"]]
ranking = ranking.sort_values(["Position"])
#Evaluate submission
rps_sub = []
#for aid in list((pd.unique(ranking.ID))):
for aid in asset_id:
target = np.cumsum(ranking.loc[ranking.ID==aid].iloc[:,4:9].values).tolist()
frc = np.cumsum(submission.loc[submission.ID==aid].iloc[:,1:6].values).tolist()
rps_sub.append(np.mean([(a - b)**2 for a, b in zip(target, frc)]))
submission["RPS"] = rps_sub
output = {'RPS' : np.mean(rps_sub),
'details' : submission}
return(output)
#Function for computing IR
def IR_calculation(hist_data, submission):
asset_id = pd.unique(hist_data.symbol)
for i in range(len(pd.unique(hist_data.date))):
if len(hist_data[hist_data.date == pd.unique(hist_data.date)[i]])<len(asset_id):
for asset in [x for x in asset_id if x not in hist_data[hist_data.date == pd.unique(hist_data.date)[i]].symbol.values]:
right_price = hist_data[hist_data.symbol==asset].sort_values(by='date')
right_price = right_price[right_price.date <= pd.unique(hist_data.date)[i]]
right_price = right_price.price.iloc[-1]
hist_data = hist_data.append({'date' : pd.unique(hist_data.date)[i],
'symbol' : asset,
'price' : right_price}, ignore_index=True)
asset_id = sorted(asset_id)
#Compute percentage returns
returns = pd.DataFrame(columns = ["ID", "Return"])
#Investment weights
weights = submission[["ID","Decision"]]
RET = pd.DataFrame()
for i in range(len(asset_id)):
temp = hist_data.loc[hist_data.symbol==asset_id[i]]
temp = temp.sort_values(by='date')
temp.reset_index(inplace=True, drop=True)
RET = RET.append(temp.price.pct_change()*weights.loc[weights.ID==asset_id[i]].Decision.values[0])
ret = np.log(1+RET.sum()[1:])
sum_ret = sum(ret)
sdp = stdev(ret)
output = {'IR' : sum_ret/sdp,
'details' : list(ret)}
return output
#Run evaluation
RPS_calculation(hist_data = asset_data , submission = submission_data)['RPS']
IR_calculation(hist_data, submission)['IR']