-
Notifications
You must be signed in to change notification settings - Fork 3
/
config.py
68 lines (54 loc) · 1.38 KB
/
config.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import os
import torch
# PATHS
PATH_LINCE_DATASET = os.environ.get("PATH_LINCE_DATASET", "./data/lince/ner")
PATH_GLUECOS_LID = os.environ.get("PATH_GLUECOS_LID_DATASET", "./data/GLUECoS/LID/Romanized")
PATH_GLUECOS_NER = os.environ.get("PATH_GLUECOS_NER_DATASET", "./data/GLUECoS/NER/Romanized")
PATH_BASE_MODELS = os.environ.get("PATH_BASE_MODELS", "./base_models")
PATH_CACHE_DATASET = os.environ.get("PATH_CACHE_DATASET", "./data/cache")
PATH_EXPERIMENTS = os.environ.get("PATH_EXPERIMENTS", "./runs")
# HARDWARE
NUM_WORKERS = min(4, int(os.cpu_count() / 2))
AVAIL_GPUS = min(1, torch.cuda.device_count())
# HYPERPARAMS
GLOBAL_SEED = 42
MAX_EPOCHS = 30
BATCH_SIZE = 32
LEARNING_RATE = 3e-5
WARM_RESTARTS = 50 # Restart after 50 epochs
WEIGHT_DECAY = 0
DROPOUT_RATE = 1e-3
BASE_MODEL = "bert-base-multilingual-cased" # mBERT
MAX_SEQUENCE_LENGTH = 64
PADDING = "max_length"
LABEL2ID = {
"O": 0,
"B-PERSON": 1,
"I-PERSON": 2,
"B-ORGANISATION": 3,
"I-ORGANISATION": 4,
"B-PLACE": 5,
"I-PLACE": 6,
}
LID2ID = {
"hi": 0,
"en": 1,
"rest": 2
}
GLC_NER_LABEL2ID = {
"Other": 0,
"B-Per": 1,
"I-Per": 2,
"B-Org": 3,
"I-Org": 4,
"B-Loc": 5,
"I-Loc": 6
}
GLC_LID_LABEL2ID = {
"EN": 0,
"HI": 1,
"OTHER": 2
}
K_CROSSFOLD_VALIDATION_SPLITS = 10
# PROJECT CONFIGURATION
PROJECT_NAME = "MetaLearning-CodeMix"