-
Notifications
You must be signed in to change notification settings - Fork 52
/
imdb_bilstm.py
115 lines (100 loc) · 4.45 KB
/
imdb_bilstm.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
import numpy as np
from mozi.datasets.imdb import IMDB
from mozi.model import Sequential
from mozi.layers.linear import Linear
from mozi.layers.noise import Dropout
from mozi.layers.activation import RELU, Sigmoid
from mozi.layers.normalization import BatchNormalization
from mozi.layers.embedding import Embedding
from mozi.env import setenv
from mozi.layers.recurrent import BiLSTM, LSTM
from mozi.layers.misc import Transform, Flatten, Reshape
from mozi.learning_method import SGD
from mozi.log import Log
from mozi.train_object import TrainObject
from mozi.cost import mse, error
import theano.tensor as T
import cPickle
import sys
'''
Train a BiDirectionLSTM LSTM on the IMDB sentiment classification task.
The dataset is actually too small for LSTM to be of any advantage
compared to simpler, much faster methods such as TF-IDF+LogReg.
Notes:
- RNNs are tricky. Choice of batch size is important,
choice of loss and optimizer is critical, etc.
Most configurations won't converge.
- LSTM loss decrease during training can be quite different
from what you see with CNNs/MLPs/etc. It's more or less a sigmoid
instead of an inverse exponential.
GPU command:
THEANO_FLAGS=mode=FAST_RUN,device=gpu,floatX=float32 python imdb_lstm.py
250s/epoch on GPU (GT 650M), vs. 400s/epoch on CPU (2.4Ghz Core i7).
'''
def train():
max_features=20000
maxseqlen = 100 # cut texts after this number of words (among top max_features most common words)
batch_size = 16
word_vec_len = 256
iter_class = 'SequentialRecurrentIterator'
seq_len = 10
data = IMDB(pad_zero=True, maxlen=100, nb_words=max_features, batch_size=batch_size,
train_valid_test_ratio=[8,2,0], iter_class=iter_class, seq_len=seq_len)
print('Build model...')
model = Sequential(input_var=T.matrix(), output_var=T.matrix())
model.add(Embedding(max_features, word_vec_len))
# MLP layers
model.add(Transform((word_vec_len,))) # transform from 3d dimensional input to 2d input for mlp
model.add(Linear(word_vec_len, 100))
model.add(RELU())
model.add(BatchNormalization(dim=100, layer_type='fc'))
model.add(Linear(100,100))
model.add(RELU())
model.add(BatchNormalization(dim=100, layer_type='fc'))
model.add(Linear(100, word_vec_len))
model.add(RELU())
model.add(Transform((maxseqlen, word_vec_len))) # transform back from 2d to 3d for recurrent input
# Stacked up BiLSTM layers
model.add(BiLSTM(word_vec_len, 50, output_mode='concat', return_sequences=True))
model.add(BiLSTM(100, 24, output_mode='sum', return_sequences=True))
model.add(LSTM(24, 24, return_sequences=True))
# MLP layers
model.add(Reshape((24 * maxseqlen,)))
model.add(BatchNormalization(dim=24 * maxseqlen, layer_type='fc'))
model.add(Linear(24 * maxseqlen, 50))
model.add(RELU())
model.add(Dropout(0.2))
model.add(Linear(50, 1))
model.add(Sigmoid())
# build learning method
decay_batch = int(data.train.X.shape[0] * 5 / batch_size)
learning_method = SGD(learning_rate=0.1, momentum=0.9,
lr_decay_factor=1.0, decay_batch=decay_batch)
# Build Logger
log = Log(experiment_name = 'MLP',
description = 'This is a tutorial',
save_outputs = True, # log all the outputs from the screen
save_model = True, # save the best model
save_epoch_error = True, # log error at every epoch
save_to_database = {'name': 'Example.sqlite3',
'records': {'Batch_Size': batch_size,
'Learning_Rate': learning_method.learning_rate,
'Momentum': learning_method.momentum}}
) # end log
# put everything into the train object
train_object = TrainObject(model = model,
log = log,
dataset = data,
train_cost = mse,
valid_cost = error,
learning_method = learning_method,
stop_criteria = {'max_epoch' : 100,
'epoch_look_back' : 5,
'percent_decrease' : 0.01}
)
# finally run the code
train_object.setup()
train_object.run()
if __name__ == '__main__':
setenv()
train()