-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathformulas.py
258 lines (206 loc) · 8.14 KB
/
formulas.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
from statsmodels.tsa.seasonal import STL
from statsmodels.tsa.stattools import adfuller
import tensorflow as tf
def seq2seq_window_dataset(series, window_size, batch_size=32,
shuffle_buffer=1000):
series = tf.expand_dims(series, axis=-1)
ds = tf.data.Dataset.from_tensor_slices(series)
ds = ds.window(window_size + 1, shift=1, drop_remainder=True)
ds = ds.flat_map(lambda w: w.batch(window_size + 1))
ds = ds.shuffle(shuffle_buffer)
ds = ds.map(lambda w: (w[:-1], w[1:]))
return ds.batch(batch_size).prefetch(1)
def window_dataset(series, window_size, batch_size=32,
shuffle_buffer=1000):
dataset = tf.data.Dataset.from_tensor_slices(series)
dataset = dataset.window(window_size + 1, shift=1, drop_remainder=True)
dataset = dataset.flat_map(lambda window: window.batch(window_size + 1))
dataset = dataset.shuffle(shuffle_buffer)
dataset = dataset.map(lambda window: (window[:-1], window[-1]))
dataset = dataset.batch(batch_size).prefetch(1)
return dataset
def model_forecast(model, series, window_size):
ds = tf.data.Dataset.from_tensor_slices(series)
ds = ds.window(window_size, shift=1, drop_remainder=True)
ds = ds.flat_map(lambda w: w.batch(window_size))
ds = ds.batch(32).prefetch(1)
forecast = model.predict(ds)
return forecast
def test_stationarity(timeseries, window = 12, cutoff = 0.01):
#Determing rolling statistics
rolmean = timeseries.rolling(window).mean()
rolstd = timeseries.rolling(window).std()
#Plot rolling statistics:
fig = plt.figure(figsize=(12, 8))
orig = plt.plot(timeseries, color='blue',label='Original')
mean = plt.plot(rolmean, color='red', label='Rolling Mean')
std = plt.plot(rolstd, color='black', label = 'Rolling Std')
plt.legend(loc='best')
plt.title('Rolling Mean & Standard Deviation')
plt.show()
#Perform Dickey-Fuller test:
print('Results of Dickey-Fuller Test:')
dftest = adfuller(timeseries, autolag='AIC', maxlag = 20 )
dfoutput = pd.Series(dftest[0:4], index=['Test Statistic','p-value','#Lags Used','Number of Observations Used'])
for key,value in dftest[4].items():
dfoutput['Critical Value (%s)'%key] = value
pvalue = dftest[1]
if pvalue < cutoff:
print('p-value = %.4f. The series is likely stationary.' % pvalue)
else:
print('p-value = %.4f. The series is likely non-stationary.' % pvalue)
print(dfoutput)
def plot_series(time, series, format="-", start=0, end=None, label=None):
"""[Plot the series data over a time range]
Args:
time (data range): [The entire time span of the data in range format]
series ([integers]): [Series value corresponding to its point on the time axis]
format (str, optional): [Graph type]. Defaults to "-".
start (int, optional): [Time to start time series data]. Defaults to 0.
end ([type], optional): [Where to stop time data]. Defaults to None.
label ([str], optional): [Label name of series]. Defaults to None.
"""
plt.plot(time[start:end], series[start:end], format, label=label)
plt.xlabel("Time")
plt.ylabel("Value")
if label:
plt.legend(fontsize=14)
plt.grid(True)
def moving_average_forecast(series, window_size):
"""Forecasts the mean of the last few values.
If window_size=1, then this is equivalent to naive forecast
This implementation is *much* faster than the previous one"""
mov = np.cumsum(series)
mov[window_size:] = mov[window_size:] - mov[:-window_size]
return mov[window_size - 1:-1] / window_size
def sequential_window_dataset(series, window_size):
ds = tf.data.Dataset.from_tensor_slices(series)
ds = ds.window(window_size + 1, shift=window_size, drop_remainder=True)
ds = ds.flat_map(lambda window: window.batch(window_size + 1))
ds = ds.map(lambda window: (window[:-1], window[1:]))
return ds.batch(1).prefetch(1)
def calculate_returns(close):
"""
Compute returns for each ticker and date in close.
Parameters
----------
close : DataFrame
Close prices for each ticker and date
Returns
-------
returns : DataFrame
Returns for each ticker and date
"""
# TODO: Implement Function
return (close - close.shift(1))/close.shift(1)
def resample_prices(close_prices, freq='M'):
"""
Resample close prices for each ticker at specified frequency.
Parameters
----------
close_prices : DataFrame
Close prices for each ticker and date
freq : str
What frequency to sample at
For valid freq choices, see http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases
Returns
-------
prices_resampled : DataFrame
Resampled prices for each ticker and date
"""
return close_prices.resample(freq).last()
def compute_log_returns(prices):
"""
Compute log returns for each ticker.
Parameters
----------
prices : DataFrame
Prices for each ticker and date
Returns
-------
log_returns : DataFrame
Log returns for each ticker and date
"""
r_t = np.log(prices) - np.log(prices.shift(1))
return r_t
def portfolio_returns(df_long, df_short, lookahead_returns, n_stocks):
"""
Compute expected returns for the portfolio, assuming equal investment in each long/short stock.
Parameters
----------
df_long : DataFrame
Top stocks for each ticker and date marked with a 1
df_short : DataFrame
Bottom stocks for each ticker and date marked with a 1
lookahead_returns : DataFrame
Lookahead returns for each ticker and date
n_stocks: int
The number number of stocks chosen for each month
Returns
-------
portfolio_returns : DataFrame
Expected portfolio returns for each ticker and date
"""
return (lookahead_returns*(df_long - df_short)) / n_stocks
def get_top_n(prev_returns, top_n):
"""
Select the top performing stocks
Parameters
----------
prev_returns : DataFrame
Previous shifted returns for each ticker and date
top_n : int
The number of top performing stocks to get
Returns
-------
top_stocks : DataFrame
Top stocks for each ticker and date marked with a 1
"""
# TODO: Implement Function
top_stocks = prev_returns.apply(lambda x: x.nlargest(top_n), axis=1)
top_stocks = top_stocks.applymap(lambda x: 0 if pd.isna(x) else 1)
top_stocks = top_stocks.astype(int)
return top_stocks
def analyze_alpha(expected_portfolio_returns_by_date):
"""
Perform a t-test with the null hypothesis being that the expected mean return is zero.
Parameters
----------
expected_portfolio_returns_by_date : Pandas Series
Expected portfolio returns for each date
Returns
-------
t_value
T-statistic from t-test
p_value
Corresponding p-value
"""
t_statistic,p_value = stats.ttest_1samp(expected_portfolio_returns_by_date, 0)
return t_statistic,p_value/2
def seasonal_trend_decomp_plot(dataframe,target_series, freq, seasonal_smoother, period):
"""[summary]
Args:
dataframe ([pandas dataframe]): [dataframe holding all of your data]
target_series ([series]): [Name of column in data frame you want to build plot from like 'Adj Close']
freq ([int]): [How do you want to resample data - 'D', 'W','M']
seasonal_smoother ([type]): [Length of smoother in whatever units as defined by freq]
period ([type]): [Periodicity of the sequence (for monthly = 12/year)]
Returns:
[STL plot]: [Seasonal-Trend decomposition using LOESS (STL)]
"""
df = dataframe.set_index('Date')
df = df.resample(freq).last()
target = df[target_series]
stl = STL(target, seasonal = seasonal_smoother)
res = stl.fit()
res.plot()
return
def resample_series(dataframe, target,freq):
df = dataframe.set_index('Date')
df = df.resample(freq).last()
target = df[target]
return target