-
Notifications
You must be signed in to change notification settings - Fork 0
/
0.3_LSTM_CNN_IMDB.py
176 lines (156 loc) · 8.49 KB
/
0.3_LSTM_CNN_IMDB.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
# CREDITS: https://machinelearningmastery.com/sequence-classification-lstm-recurrent-neural-networks-python-keras/
# import lib
import numpy
import keras
from keras.datasets import imdb
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout
from keras.layers.embeddings import Embedding
from keras.preprocessing import sequence
# CNN related imports
from keras.layers.convolutional import Conv1D, MaxPooling1D
# print keras version
keras.__version__
# 2.4.3
# CREDITS: https://keras.io/datasets/#imdb-movie-reviews-sentiment-classification
# load dataset
top_words = 5000
(X_train, y_train), (X_test,y_test) = imdb.load_data(num_words=top_words)
print(X_train[1])
""" [1, 194, 1153, 194, 2, 78, 228, 5, 6, 1463, 4369, 2, 134, 26, 4, 715, 8, 118, 1634, 14, 394, 20, 13, 119, 954, 189, 102, 5, 207, 110, 3103, 21, 14, 69, 188, 8, 30, 23, 7, 4, 249, 126, 93, 4, 114, 9, 2300, 1523, 5,
647, 4, 116, 9, 35, 2, 4, 229, 9, 340, 1322, 4, 118, 9, 4, 130, 4901, 19, 4, 1002, 5, 89, 29, 952, 46, 37,
4, 455, 9, 45, 43, 38, 1543, 1905, 398, 4, 1649, 26, 2, 5, 163, 11, 3215, 2, 4, 1153, 9, 194, 775, 7, 2, 2, 349, 2637, 148, 605, 2, 2, 15, 123, 125, 68, 2, 2, 15, 349, 165, 4362, 98, 5, 4, 228, 9, 43, 2, 1157, 15,
299, 120, 5, 120, 174, 11, 220, 175, 136, 50, 9, 4373, 228, 2, 5, 2, 656, 245, 2350, 5, 4, 2, 131, 152, 491, 18, 2, 32, 2, 1212, 14, 9, 6, 371, 78, 22, 625, 64, 1382, 9, 8, 168, 145, 23, 4, 1690, 15, 16, 4, 1355, 5, 28, 6, 52, 154, 462, 33, 89, 78, 285, 16, 145, 95] """
print(type(X_train[1]))
# <class 'list'>
print(len(X_train[1]))
# 189
X_train.shape
# (25000,)
max(numpy.max(X_test))
# 4998
# padding
padding_length = 600
X_train = sequence.pad_sequences(X_train, maxlen=padding_length)
X_test = sequence.pad_sequences(X_test, maxlen=padding_length)
# view a datapoint after padding
print(X_train.shape)
# (25000, 600)
print(X_train[1])
""" [ 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 1 194 1153 194 2 78 228 5 6
1463 4369 2 134 26 4 715 8 118 1634 14 394 20 13
119 954 189 102 5 207 110 3103 21 14 69 188 8 30
116 9 35 2 4 229 9 340 1322 4 118 9 4 130
4901 19 4 1002 5 89 29 952 46 37 4 455 9 45
43 38 1543 1905 398 4 1649 26 2 5 163 11 3215 2
4 1153 9 194 775 7 2 2 349 2637 148 605 2 2
15 123 125 68 2 2 15 349 165 4362 98 5 4 228
9 43 2 1157 15 299 120 5 120 174 11 220 175 136
50 9 4373 228 2 5 2 656 245 2350 5 4 2 131
152 491 18 2 32 2 1212 14 9 6 371 78 22 625
64 1382 9 8 168 145 23 4 1690 15 16 4 1355 5
28 6 52 154 462 33 89 78 285 16 145 95] """
# create model
embedding_layer_outputs = 32
model = Sequential()
model.add(Embedding(top_words,embedding_layer_outputs,input_length=padding_length))
# model.add(Dropout(0.2))
model.add(Conv1D(filters=32, kernel_size=3, padding='same',activation='relu'))
model.add(MaxPooling1D(pool_size=2))
model.add(LSTM(100))
# model.add(Dropout(0.2))
model.add(Dense(1,activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam',metrics=['accuracy'])
print(model.summary())
"""
Model: "sequential_3"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
embedding_3 (Embedding) (None, 600, 32) 160000
_________________________________________________________________
conv1d (Conv1D) (None, 600, 32) 3104
_________________________________________________________________
max_pooling1d (MaxPooling1D) (None, 300, 32) 0
_________________________________________________________________
lstm_3 (LSTM) (None, 100) 53200
_________________________________________________________________
dense_3 (Dense) (None, 1) 101
=================================================================
Total params: 216,405
Trainable params: 216,405
Non-trainable params: 0
_________________________________________________________________
None
"""
# Refer: https://datascience.stackexchange.com/questions/10615/number-of-parameters-in-an-lstm-model for knowing how to compute param
model.fit(X_train,y_train, epochs=10, batch_size=64)
"""
>>> model.fit(X_train,y_train, epochs=3, batch_size=64)
Epoch 1/3
2021-10-01 15:22:00.183536: W tensorflow/stream_executor/gpu/redzone_allocator.cc:314] Internal: Invoking GPU asm compilation is supported on Cuda non-Windows platforms only
Relying on driver to perform ptx compilation.
Modify $PATH to customize ptxas location.
This message will be only logged once.
391/391 [==============================] - 9s 23ms/step - loss: 0.4404 - accuracy: 0.7782
Epoch 2/3
391/391 [==============================] - 9s 22ms/step - loss: 0.2691 - accuracy: 0.8922
Epoch 3/3
<tensorflow.python.keras.callbacks.History object at 0x00000199C5BA38C8>
>>> model.fit(X_train,y_train, epochs=10, batch_size=64)
Epoch 1/10
391/391 [==============================] - 9s 22ms/step - loss: 0.1786 - accuracy: 0.9352
Epoch 2/10
391/391 [==============================] - 9s 22ms/step - loss: 0.1489 - accuracy: 0.9473
Epoch 3/10
391/391 [==============================] - 9s 22ms/step - loss: 0.1258 - accuracy: 0.9572
Epoch 4/10
391/391 [==============================] - 9s 22ms/step - loss: 0.1091 - accuracy: 0.9642
Epoch 5/10
391/391 [==============================] - 9s 22ms/step - loss: 0.0921 - accuracy: 0.9704
Epoch 6/10
391/391 [==============================] - 9s 22ms/step - loss: 0.0691 - accuracy: 0.9800
Epoch 7/10
391/391 [==============================] - 9s 22ms/step - loss: 0.0621 - accuracy: 0.9819
Epoch 8/10
391/391 [==============================] - 9s 22ms/step - loss: 0.0505 - accuracy: 0.9856
Epoch 9/10
391/391 [==============================] - 9s 22ms/step - loss: 0.0550 - accuracy: 0.9839
Epoch 10/10
391/391 [==============================] - 9s 22ms/step - loss: 0.0353 - accuracy: 0.9912
<tensorflow.python.keras.callbacks.History object at 0x00000199D400E3C8>
"""
scores = model.evaluate(X_test,y_test, verbose=0)
print(scores)
# [0.5717116594314575, 0.8641600012779236]
print("Accuracy: %.2f%%" % (scores[1]*100))
# Accuracy: 86.42%