-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlstm_mnist_trainer.py
110 lines (85 loc) · 3.92 KB
/
lstm_mnist_trainer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
'''
@brief LSTM based hand-written number detection.
28x28 pixel dimension mnist data set is used.
In every time-step one slice column of the picture is feeding to LSTM
@author NAZIM YILDIZ
@version 0001
'''
import logging
logging.getLogger("tensorflow").setLevel(logging.ERROR)
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM
from tensorflow import keras
from datetime import datetime
# Model name that we're gonna generate
model_name = 'lstm_mnist_v3'
# Lets use keras mnist data-set
mnist = tf.keras.datasets.mnist
(X_train, Y_train),(X_test, Y_test) = mnist.load_data()
# Normalize the input datas - jsut for Xs
# Every pixel have 8bits value so every single feature we have between 0 and 255
# We need to divide input tensors by 255.0 to normalize.
X_train = X_train / 255.0
X_test = X_test / 255.0
print('X_train.shape = ', X_train.shape)
print('Y_train.shape = ', Y_train.shape)
print('X_test.shape = ', X_test.shape)
print('Y_test.shape = ', Y_test.shape)
Tx = X_train.shape[2]
features = X_train.shape[1]
class_count = 10
Y_train_oh = np.zeros((Y_train.shape[0], class_count))
for i in range(len(Y_train)):
Y_train_oh[i,Y_train[i]] = 1
Y_test_oh = np.zeros((Y_test.shape[0], class_count))
for i in range(len(Y_test)):
Y_test_oh[i, Y_test[i]] = 1
print('Y_train_oh.shape = ', Y_train_oh.shape)
print('Y_test_oh.shape = ', Y_test_oh.shape)
# Lets use Sequential() modeling
model_lstm = Sequential()
# Kind of encoding
model_lstm.add(LSTM(64, input_shape=(Tx, features), activation='tanh', return_sequences=True, stateful=False, name="Layer0_LSTM"))
model_lstm.add(LSTM(units=64, activation='tanh', return_sequences=False, name="Layer1_LSTM"))
model_lstm.add(Dropout(0.2))
model_lstm.add(Dense(128, activation='tanh'))
model_lstm.add(Dropout(0.3))
model_lstm.add(Dense(64, activation='tanh'))
model_lstm.add(Dropout(0.2))
# Output layer which is going to give probabilities of 0-9 numbers
model_lstm.add(Dense(class_count, activation='softmax', name="LayerF_Dense"))
# Different optimizers
opt_sgd = tf.keras.optimizers.SGD(lr=1e-3, decay=1e-5, momentum=0.9, name="SGD")
opt_rms = tf.keras.optimizers.RMSprop(lr=1e-3, rho=0.9, momentum=0.0, epsilon=1e-7, centered=False, name="RMPSprop")
opt_adm = tf.keras.optimizers.Adam(lr=1e-3, beta_1=0.9, beta_2=0.999, epsilon=1e-7, amsgrad=False, decay=1e-5, name="Adam")
loss_cce = tf.keras.losses.CategoricalCrossentropy(from_logits=False, label_smoothing=0.0, reduction="auto")
loss_sce = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False, reduction="auto")
metric1 = tf.keras.metrics.Accuracy()
metric2 = tf.keras.metrics.CategoricalAccuracy()
metric3 = tf.keras.metrics.BinaryAccuracy (threshold=0.7)
metric4 = tf.keras.metrics.TopKCategoricalAccuracy(k=5)
model_lstm.compile(loss=loss_cce, optimizer=opt_sgd, metrics=metric2)
# To start AI where it lefts...
#model_lstm = keras.models.load_model('model_outputs/' + model_name)
# TensorBoard
log_dir = "logs/fit/" + model_name + '_' + datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)
# Lets the education begins!
history = model_lstm.fit(x=X_train, y=Y_train_oh, epochs=50, batch_size = 32, validation_data=(X_test, Y_test_oh),
callbacks=[tensorboard_callback], shuffle=True)
#callbacks=[tensorboard_callback, model_checkpoint_cb],
#shuffle=True)
#print('Model History Out: ', history.history)
score = model_lstm.evaluate(x=X_test, y=Y_test_oh, verbose=0, return_dict=True)
print(score)
# Save the model
model_lstm.save('./model_outputs/' + model_name)
# Lets see the performance of the trained ai
preds = model_lstm.predict(X_test[0:20,:,:])
pred_numbers = np.argmax(preds, axis=1)
print('Y_test = ', Y_test[0:20])
print('pred_numbers = ', pred_numbers)