-
Notifications
You must be signed in to change notification settings - Fork 105
/
environment.py
127 lines (94 loc) · 4.16 KB
/
environment.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
import numpy as np
import pandas as pd
from utils import portfolio
class CryptoEnvironment:
def __init__(self, prices = './data/crypto_portfolio.csv', capital = 1e6):
self.prices = prices
self.capital = capital
self.data = self.load_data()
def load_data(self):
data = pd.read_csv(self.prices)
try:
data.index = data['Date']
data = data.drop(columns = ['Date'])
except:
data.index = data['date']
data = data.drop(columns = ['date'])
return data
def preprocess_state(self, state):
return state
def get_state(self, t, lookback, is_cov_matrix = True, is_raw_time_series = False):
assert lookback <= t
decision_making_state = self.data.iloc[t-lookback:t]
decision_making_state = decision_making_state.pct_change().dropna()
if is_cov_matrix:
x = decision_making_state.cov()
return x
else:
if is_raw_time_series:
decision_making_state = self.data.iloc[t-lookback:t]
return self.preprocess_state(decision_making_state)
def get_reward(self, action, action_t, reward_t, alpha = 0.01):
def local_portfolio(returns, weights):
weights = np.array(weights)
rets = returns.mean() # * 252
covs = returns.cov() # * 252
P_ret = np.sum(rets * weights)
P_vol = np.sqrt(np.dot(weights.T, np.dot(covs, weights)))
P_sharpe = P_ret / P_vol
return np.array([P_ret, P_vol, P_sharpe])
data_period = self.data[action_t:reward_t]
weights = action
returns = data_period.pct_change().dropna()
sharpe = local_portfolio(returns, weights)[-1]
sharpe = np.array([sharpe] * len(self.data.columns))
rew = (data_period.values[-1] - data_period.values[0]) / data_period.values[0]
return np.dot(returns, weights), rew
class ETFEnvironment:
def __init__(self, volumes = './data/volumes.txt',
prices = './data/prices.txt',
returns = './data/returns.txt',
capital = 1e6):
self.returns = returns
self.prices = prices
self.volumes = volumes
self.capital = capital
self.data = self.load_data()
def load_data(self):
volumes = np.genfromtxt(self.volumes, delimiter=',')[2:, 1:]
prices = np.genfromtxt(self.prices, delimiter=',')[2:, 1:]
returns=pd.read_csv(self.returns, index_col=0)
assets=np.array(returns.columns)
dates=np.array(returns.index)
returns=returns.as_matrix()
return pd.DataFrame(prices,
columns = assets,
index = dates
)
def preprocess_state(self, state):
return state
def get_state(self, t, lookback, is_cov_matrix = True, is_raw_time_series = False):
assert lookback <= t
decision_making_state = self.data.iloc[t-lookback:t]
decision_making_state = decision_making_state.pct_change().dropna()
if is_cov_matrix:
x = decision_making_state.cov()
return x
else:
if is_raw_time_series:
decision_making_state = self.data.iloc[t-lookback:t]
return self.preprocess_state(decision_making_state)
def get_reward(self, action, action_t, reward_t):
def local_portfolio(returns, weights):
weights = np.array(weights)
rets = returns.mean() # * 252
covs = returns.cov() # * 252
P_ret = np.sum(rets * weights)
P_vol = np.sqrt(np.dot(weights.T, np.dot(covs, weights)))
P_sharpe = P_ret / P_vol
return np.array([P_ret, P_vol, P_sharpe])
weights = action
returns = self.data[action_t:reward_t].pct_change().dropna()
rew = local_portfolio(returns, weights)[-1]
rew = np.array([rew] * len(self.data.columns))
return np.dot(returns, weights), rew