-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathconfig.py
46 lines (40 loc) · 965 Bytes
/
config.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
# coding:utf-8
"""
Configuration File
"""
MODULE = 'TCN' # use TCN-based model
DATASET = 'AVSpeech'
aim_path='./data' # AVMS as the root
MAX_EPOCH = 10000
BATCH_SIZE = 32
BATCH_SIZE_TEST = 32
num_steps_per_epoch = 3000
num_samples_per_epoch = int(num_steps_per_epoch * BATCH_SIZE)
FRAME_RATE = 16000
SHUFFLE_BATCH = True
dB = 5
MAX_LEN = 3
MAX_LEN_SPEECH = int(FRAME_RATE*MAX_LEN)
mix_spk = 2
VIDEO_RATE = 25
MAX_LEN_VIDEO = int(MAX_LEN*VIDEO_RATE)
DATA_AUG = True
finetune = False
type_visual_encoder = 'TCN' # TCN or LSTM
inference = False
# The parameters for low-latency model
causal = False
mode_LN = 'gLN' # cLN, BN, LN (only in causal settings), gLN (non-causal)
# Parameters for SS model
WIN_LEN = 16
layer = 8
stack = 3
visual_layer = 4
visual_stack = 1
MODAL_FUSION = 'CF' # CF or DCF
FUSION_POSITION = '8' # '0','8','16'
VISUAL_DIM = 64
SKIP = True
# Loss
loss_type = 'sisnr'
low_latency = False