-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathtrain_modelnet_AE.py
139 lines (106 loc) · 4.48 KB
/
train_modelnet_AE.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
import numpy as np
import time, sys, os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
# 0 = all messages are logged(default behavior)
# 1 = INFO messages are not printed
# 2 = INFO and WARNING messages arenot printed
# 3 = INFO, WARNING, and ERROR messages arenot printed
from src.dataset_loader.modelnet_dataset import dataLoader
import src.module.AE3D as AE3D
import tensorflow as tf
tf.get_logger().warning('test')
# WARNING:tensorflow:test
tf.get_logger().setLevel('ERROR')
tf.get_logger().warning('test')
latent_dim=16
#=========== autoencoder architecture example (from 3D GAN) ===============
encoder_structure = {
'name':'encoder3D',
'input_shape':[64,64,64,1], # or [None,None,None,1]
'filter_num_list':[64,128,256,512,latent_dim],
'filter_size_list':[4,4,4,4,4],
'strides_list':[2,2,2,2,1],
'final_pool':'average',
'activation':'elu',
'final_activation':'None',
}
decoder_structure = {
'name':'decoder3D',
'input_dim' : latent_dim, # must be same as encoder filter_num_list[-1]
'output_shape':[64,64,64,1],
'filter_num_list':[512,256,128,64,1],
'filter_size_list':[4,4,4,4,4],
'strides_list':[1,2,2,2,2],
'activation':'elu',
'final_activation':'sigmoid'
}
def train(training_epoch=1000,
learning_rate=1e-4, BATCH_SIZE_PER_REPLICA=32,
dataset_path=None,
encoder_structure=None, decoder_structure=None,
save_path=None, load_path=None):
strategy = tf.distribute.MirroredStrategy()
with strategy.scope():
model = AE3D.AE3D(encoder_structure=encoder_structure,
decoder_structure=decoder_structure,
BATCH_SIZE_PER_REPLICA=BATCH_SIZE_PER_REPLICA, strategy=strategy,
learning_rate=learning_rate)
data_loader = dataLoader(data_path=dataset_path)
if load_path != None:
print('load weights...')
model.loadModel(load_path=load_path)
print('done!')
loss = np.zeros(3)
precision, recall = 0.0, 0.0
epoch, iteration, run_time = 0., 0., 0.
print('start training...')
while epoch < training_epoch:
start_time = time.time()
batch_size = BATCH_SIZE_PER_REPLICA * strategy.num_replicas_in_sync
batch_data = data_loader.getNextBatch(batchSize=batch_size)
input_images = batch_data['input_images']
output_images = input_images
epoch_curr = data_loader.epoch
data_start = data_loader.batchStart
data_length = data_loader.dataLength
train_dataset = tf.data.Dataset.from_tensor_slices((input_images, output_images)).batch(batch_size)
train_dist_dataset = strategy.experimental_distribute_dataset(train_dataset)
if epoch_curr != epoch or ((iteration+1)%1000==0 and (iteration+1)!=1):
print('')
iteration = 0
loss = loss * 0.0
run_time = 0.0
if save_path != None:
print('save model...')
model.saveModel(save_path)
epoch = epoch_curr
rloss_temp, ploss_temp, total_loss_temp, pr_temp, rc_temp = model.distributed_fit(
inputs=next(iter(train_dist_dataset)))
end_time = time.time()
loss = (loss * iteration + np.array([rloss_temp, ploss_temp,total_loss_temp])) / (iteration + 1.0)
precision = (precision*iteration + pr_temp)/(iteration+1.0)
recall = (recall*iteration + rc_temp)/(iteration+1.0)
run_time = (run_time * iteration + (end_time - start_time)) / (iteration + 1.0)
sys.stdout.write(
"Epoch:{:03d} iter:{:04d} runtime:{:.2f} ".format(int(epoch + 1), int(iteration + 1), run_time))
sys.stdout.write("batch:{} cur/tot:{:05d}/{:05d} ".format(batch_size, data_start, data_length))
sys.stdout.write(
"rloss:{:.4f}, ploss:{:.4f}, tloss:{:.4f} ".format(loss[0], loss[1], loss[2]))
sys.stdout.write(" pr:{:.4f}, rc:{:.4f} \r".format(precision, recall))
sys.stdout.flush()
if np.sum(loss) != np.sum(loss):
print('')
print('NaN')
return
iteration = iteration + 1.0
if __name__ == "__main__":
sys.exit(train(
training_epoch=1000,
learning_rate=1e-3, BATCH_SIZE_PER_REPLICA=110,
dataset_path='/media/yonsei/4TB_HDD/dataset/modelNet/',
encoder_structure=encoder_structure,
decoder_structure=decoder_structure,
save_path='./weights/AE3D/',
# load_path='./weights/AE3D/',
# load_path=None,
))