-
Notifications
You must be signed in to change notification settings - Fork 2
/
nevergrad-tutorial.py
203 lines (131 loc) · 5.58 KB
/
nevergrad-tutorial.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
#!/usr/bin/env python
# coding: utf-8
# In[4]:
# Imports
import pandas as pd
import numpy as np
import plotly.express as px
from numpy import mean, std, absolute
from sklearn import metrics
from sklearn.model_selection import cross_val_score, TimeSeriesSplit
from sklearn.linear_model import Ridge
from sklearn.model_selection import train_test_split
from sklearn.utils.validation import check_is_fitted, check_array
import nevergrad as ng
import statsmodels.tsa.api as tsa
# In[5]:
df= pd.read_csv('dataset.csv')
df.head()
# In[6]:
# Automatically get the Media Variables
spend_var = [col for col in df.columns if 'spend' in col]
y = df['revenue']
# Print all variables in a Timeseries plot
fig = px.line(df, x='days', y=["revenue", "facebook_spend", "google_spend"])
fig.show()
# In[7]:
# Create and split data variables
X = df[spend_var]
y = df['revenue']
X_train, X_test, y_train, y_test = train_test_split(X, y)
# In[8]:
# Function to return Adstocked variables
def adstock(x, theta):
return tsa.filters.recursive_filter(x, theta)
# Function to return Saturated variables
def saturation(x, beta):
return x ** beta
# Function to return model's NRMSE
def nrmse(y_actual, y_pred):
# normalized root mean square error
value = round(np.sqrt(metrics.mean_squared_error(y_actual, y_pred)) / np.mean(y_actual), 3)
passed = "✔️" if value < 0.15 else "❌"
return value#, passed
# Function to return model's MAPE
def mape(y_actual, y_pred):
# mean absolute percentage error
value = round(metrics.mean_absolute_error(y_actual, y_pred)/np.mean(y_actual),3)
passed = "✔️" if value < 0.15 else "❌"
return value#, passed
# Function to return model's R^2
def rsquared(y_actual, y_pred):
# r squared
value = round(metrics.r2_score(y_actual, y_pred), 3)
passed = "✔️" if value > 0.8 else "❌"
return value#, passed
# In[9]:
# Create a dictionary to hold transformed columns
new_X = {}
# We define one big function that does all the modeling
# This allows us to put all the Hyperparameters in one place and run Nevergrad once for all of them
def build_model(alpha, facebook_spend_theta, facebook_spend_beta, google_spend_theta, google_spend_beta):
# Transform all media variables and set them in the new Dictionary
# Adstock first and Saturation second
new_X["facebook_spend"] = saturation(adstock(df["facebook_spend"], facebook_spend_theta), facebook_spend_beta)
new_X["google_spend"] = saturation(adstock(df["google_spend"], google_spend_theta), google_spend_beta)
# Cast Dictionary to DataFrame and append the output column
new_df = pd.DataFrame.from_dict(new_X)
new_df = new_df.join(df['revenue'])
# Train test split data
X = new_df[spend_var]
y = new_df['revenue']
X_train, X_test, y_train, y_test = train_test_split(X, y)
# Define the model
model = Ridge(alpha=alpha)
# Fit the model using new (transformed) data
model.fit(X_train, y_train)
result = df
# Predict using test data
result['prediction'] = model.predict(X)
# Calculate all model's KPIs
nrmse_val = nrmse(result['revenue'], result['prediction'])
mape_val = mape(result['revenue'], result['prediction'])
rsquared_val =rsquared(result['revenue'], result['prediction'])
# The return should be a value to minimize
return mape_val
# In[10]:
# Define the list of hyperparameters to optimize
# List must be the same as the ones in the function's definition, same order recommended too
instrum = ng.p.Instrumentation(
alpha = ng.p.Scalar(),
facebook_spend_theta = ng.p.Scalar(lower=0, upper=1),
facebook_spend_beta = ng.p.Scalar(lower=0, upper=1),
google_spend_theta = ng.p.Scalar(lower=0, upper=1),
google_spend_beta = ng.p.Scalar(lower=0, upper=1)
)
# Define an Optimizer (use NGOpt as default) and set budget as number of trials (recommended 2500+)
optimizer = ng.optimizers.NGOpt(parametrization=instrum, budget=2500)
# Pass the function to minimize
# Nevergrad will automatically map Hyperparams
recommendation = optimizer.minimize(build_model)
# In[11]:
# Input Nevergrad's recommended values to create the optimised model
model_mape = build_model(**recommendation.value[1])
print('MAPE: ', model_mape)
# In[12]:
# Once the minimized variable is good define the function again with more outputs to return
# Rebuild the model with recommended values and use it as you wish
new_X = {}
def build_model(alpha, facebook_spend_theta, facebook_spend_beta, google_spend_theta, google_spend_beta):
new_X["facebook_spend"] = saturation(adstock(df["facebook_spend"], facebook_spend_theta), facebook_spend_beta)
new_X["google_spend"] = saturation(adstock(df["google_spend"], google_spend_theta), google_spend_beta)
new_df = pd.DataFrame.from_dict(new_X)
new_df = new_df.join(df['revenue'])
X = new_df[spend_var]
y = new_df['revenue']
X_train, X_test, y_train, y_test = train_test_split(X, y)
model = Ridge(alpha=alpha)
model.fit(X_train, y_train)
result = df
result['prediction'] = model.predict(X)
nrmse_val = nrmse(result['revenue'], result['prediction'])
mape_val = mape(result['revenue'], result['prediction'])
rsquared_val =rsquared(result['revenue'], result['prediction'])
return mape_val, nrmse_val, rsquared_val, model, result
model_mape, model_nrmse,model_rsq, model, result = build_model(**recommendation.value[1])
# Compare actual vs. predicted values
fig = px.line(result, x='days', y=["revenue", 'prediction'])
fig.show()
print('R^2: ', model_rsq)
print('MAPE: ', model_mape)
print('NRMS: ', model_nrmse)