-
Notifications
You must be signed in to change notification settings - Fork 35
/
Copy pathcfg.py
84 lines (74 loc) · 4.76 KB
/
cfg.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
class flickr_cfg():
def __init__(self):
self.id = "Flickr30k"
############### Global Parameters ##############
self.path_to_descriptors = "./DATA/Flickr30k/flickr30k_incp3/"
self.path_to_images = "./DATA/Flickr30k/flickr30k-images/"
self.descriptor_suffix = "_incp_v3"
############### Split parameters ###############
self.annotations_path = "./DATA/Flickr30k/results_20130124.token"
#splits are taken from
self.train_file = "./DATA/Flickr30k/imgs_train.txt"
self.val_file = "./DATA/Flickr30k/imgs_val.txt"
self.test_file = "./DATA/Flickr30k/imgs_test.txt"
self.experiment = "./experiments/flickr30k/" # files will be overwritten in the case of multiple runs
self.vocab_path = self.experiment
self.model_path = self.experiment
self.results_path = self.experiment
############## Training Parameters #############
self.word_count_threshold = 1
self.learning_rate = 5e-4
self.n_epochs = 10
self.batch_size = 100
self.max_to_keep = 40
self.save_every_n_epoch = 0.2 # fractional number allows to save the model multiple times during every epoch
# makes sense since by epoch here we understand all sentences for the training data (5 for each image)
self.use_hard_cache = False # instead of relying on system file cache explicitly adds all features to dictionary
# this option is RAM-consuming and not effective, reader will be rewritten
############## Model Parameters ################
self.dim_image = 2048 # dimensionality of input features (2048 for InceptionV3 and ResNet)
self.dim_hidden= 1300 # LSTM hidden state size
self.dim_word_emb = 300 # input word dimensionality
self.n_frame_step = 64 # dimensionality of the used convolutional layer (8x8)
self.n_lstm_step = 25 # affects training time, can be reduced since training sentences are significantly shorter
self.forget_bias=1
self.cell_clip = None
self.input_keep_prob = 0.5
self.output_keep_prob = 0.5
class msr_vtt_cfg():
def __init__(self):
self.id = "MSR-VTT"
############### Global Parameters ##############
data_path = "./DATA/MSR-VTT/"
self.descriptor_suffix = "_incp_v3"
############### Split parameters ###############
self.trainval_annotations = data_path + "train_val_videodatainfo.json"
self.test_annotations = data_path + "test_videodatainfo.json"
self.path_to_trainval_descriptors = data_path + "trainval_descriptors/"
self.path_to_test_descriptors = data_path + "test_descriptors/"
self.path_to_trainval_video = data_path + "TrainValVideo/"
self.path_to_test_video = data_path + "TestVideo/"
self.experiment = "./experiments/msr-vtt/" # files will be overwritten in the case of multiple runs
self.vocab_path = self.experiment
self.model_path = self.experiment
self.results_path = self.experiment
############## Training Parameters #############
self.word_count_threshold = 1
self.learning_rate = 5e-4
self.n_epochs = 10
self.batch_size = 100
self.max_to_keep = 30 # number of snapshots to keep
self.save_every_n_epoch = 0.1 # fractional number allows to save the model multiple times during every epoch
# makes sense since by epoch here we understand all sentences for the training data (20 for each video)
self.use_hard_cache = True # instead of relying on system file cache explicitly adds all features to dictionary
# this option is RAM-consuming and not effective, reader will be rewritten
############## Model Parameters ################
self.dim_image = 2048 # dimensionality of input features (2048 for InceptionV3 and ResNet)
self.dim_hidden= 1300 # LSTM hidden state size
self.dim_word_emb = 300 # input word dimensionality
self.n_frame_step = 26 # number of frames used for training
self.n_lstm_step = 25 # affects training time, can be reduced since training sentences are significantly shorter
self.forget_bias=1
self.cell_clip = None
self.input_keep_prob = 0.5
self.output_keep_prob = 0.5