-
Notifications
You must be signed in to change notification settings - Fork 1
/
train_mlp.py
239 lines (206 loc) · 7.35 KB
/
train_mlp.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
from sys import platform
if platform == "linux" or platform == "linux2":
# linux
pass
elif platform == "darwin":
# OS X
import caffeine
elif platform == "win32":
# Windows...
pass
import os, sys
import numpy as np
import theano
import theano.tensor as T
import data
import utils
from models.mlp import MLP, Hidden_Layer
__docformat__ = 'restructedtext en'
__doc__ = """
This code trains a multi-layer perceptron (MLP) upon the weather data
Data Set:
- 582 events
- 10092 features
- binary result of TRUE|FALSE
The features are unknown preprocessed weather data and the result corresponds to an ice-storm event
Notes:
- This MLP doesn't seem capable of learning the structure of the data
- Validation error remains close to 50%
"""
# Save locations
## built model
MODEL = data.model_dir
MODEL_ID = os.path.splitext(os.path.basename(__file__))[0]
## visualising runtime parameters
DATA_DIR = data.data_dir
PLOT_DIR = data.plot_dir
# Paramter settings
## model parameters
n_hidden = 10000
n_in = 10092
n_out = 2
## training parameters
n_epochs = 10000
batch_size = 10
learning_rate = 0.1
l1_reg = 0.00
L2_reg = 0.00
## early-stopping parameters
patience = 50000 # look as this many examples regardless
patience_increase = 2 # wait this much longer if new best is found
improvement_threshold = 0.995 # consider this improvement significant
# sample for plotting
freq = 1
if __name__ == "__main__":
logger = utils.logs.get_logger(__name__,
update_stream_level=utils.logs.logging.DEBUG)
logger.info('Loading data ...')
source = data.Load_Data(location=data.data_loc,
# search_pat='day1'
)
datasets = source.all()
train_set_x, train_set_y = datasets[0]
valid_set_x, valid_set_y = datasets[1]
test_set_x, test_set_y = datasets[2]
# compute number of minibatches for training, validation and testing
n_train_batches = train_set_x.get_value(borrow=True).shape[0]// batch_size
n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]// batch_size
n_test_batches = test_set_x.get_value(borrow=True).shape[0] // batch_size
logger.info('Building the model ...')
# allocate symbolic variables for the data
index = T.lscalar() # index to a [mini]batch
x = T.matrix('x') # the data is presented as rasterized images
y = T.ivector('y') # the labels are presented as 1D vector of [int] labels
rng = np.random.RandomState(1234)
# construct the MLP class
classifier = MLP(
rng=rng,
inputs=x,
n_in=n_in,
n_hidden=n_hidden,
n_out=n_out
)
# the cost we minimize during training is the negative log likelihood of
# the model plus the regularization terms (l1 and L2); cost is expressed
# here symbolically
cost = (classifier.negativeLogLikelihood(y) + l1_reg*classifier.l1 +
L2_reg*classifier.L2_sqr)
# compiling a Theano function that computes the mistakes that are made
# by the model on a minibatch
logger.debug('building test model')
test_model = theano.function(
inputs=[index],
outputs=classifier.errors(y),
givens={
x: test_set_x[index * batch_size:(index + 1) * batch_size],
y: test_set_y[index * batch_size:(index + 1) * batch_size]
}
)
logger.debug('building validate model')
validate_model = theano.function(
inputs=[index],
outputs=classifier.errors(y),
givens={
x: valid_set_x[index * batch_size:(index + 1) * batch_size],
y: valid_set_y[index * batch_size:(index + 1) * batch_size]
}
)
# compute the gradient of cost with respect to theta (sorted in params)
# the resulting gradients will be stored in a list gparams
gparams = [T.grad(cost, param) for param in classifier.params]
# specify how to update the parameters of the model as a list of
# (variable, update expression) pairs
# given two lists of the same length, A = [a1, a2, a3, a4] and
# B = [b1, b2, b3, b4], zip generates a list C of same size, where each
# element is a pair formed from the two lists :
# C = [(a1, b1), (a2, b2), (a3, b3), (a4, b4)]
updates = [
(param, param - learning_rate * gparam)
for param, gparam in zip(classifier.params, gparams)
]
# compiling a Theano function `train_model` that returns the cost, but
# in the same time updates the parameter of the model based on the rules
# defined in `updates`
logger.debug('building training model')
train_model = theano.function(
inputs=[index],
outputs=[cost] + [g.mean() for g in gparams],
updates=updates,
givens={
x: train_set_x[index * batch_size: (index + 1) * batch_size],
y: train_set_y[index * batch_size: (index + 1) * batch_size]
}
)
logger.info('Training the model ...')
visualise_weights = { # dict of images to create
'inputLayer' + '_weights': { # input - hiddenlayer image
'x':classifier.hiddenLayer.w, # the parameter
'img_shape':(29*2, 29*2*3), # prod. of tuple == # input nodes
'tile_shape':(40, 32), # Max number is # nodes in next layer
'tile_spacing':(1, 1), # separate imgs x,y
'runtime_plots':True
},
'logitLayer' + '_weights': { # hidden - logistic layer
'x':classifier.logitLayer.w,
'img_shape':(100, 100), # prod. of tuple == # hidden nodes
'tile_shape':(1, 2),
'tile_spacing':(1, 1)
}
}
# visualise cost during runtime
visualise_cost = { # visualising the cost
'cost':{'freq':freq} # frequency of sampling
}
# visualise arbitrary parameters at runtime
visualise_params = {
'hiddenLayer' + '_weights': {
'freq':freq,
'x':classifier.hiddenLayer.w
},
'hiddenLayer' + '_bias': {
'freq':freq,
'x': classifier.hiddenLayer.b
},
'logitLayer' + '_weights': {
'freq':freq,
'x': classifier.logitLayer.w
},
'logitLayer' + '_bias': {
'freq':freq,
'x':classifier.logitLayer.b
}
}
visualise_updates = {
'hiddenLayer' + '_weights': {
'update_position':0
},
'hiddenLayer' + '_bias': {
'update_position':1
},
'logitLayer' + '_weights': {
'update_position':2
},
'logitLayer' + '_bias': {
'update_position':3
}
}
param_man = utils.visualise.Visualise_Runtime(
plot_dir=PLOT_DIR,
data_dir=DATA_DIR
)
param_man.initalise(
run_id = MODEL_ID,
default_freq = min(n_train_batches, patience // 2),
params = visualise_params,
cost = visualise_cost,
imgs = visualise_weights,
updates = visualise_updates
)
utils.training.train(classifier, train_model, validate_model, test_model,
n_train_batches, n_valid_batches, n_test_batches,
n_epochs, learning_rate,
patience, patience_increase, improvement_threshold,
MODEL, MODEL_ID, logger,
visualise=param_man
)
pass