-
Notifications
You must be signed in to change notification settings - Fork 7
/
acl2020.yaml
36 lines (31 loc) · 1.08 KB
/
acl2020.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
debug: False
verbose: False
data_version_name: acl2020
uncased: True
# preprocess avro options
process_avro__log_every: 1000
# directory name under data/
process_avro__opiec_dir: OPIEC-Clean
# process this many OPIEC avro files (only for creating smaller prototype data
# otherwise set to -1
process_avro__debug_file_nr: -1
# nr of worker for preprocessing
process_avro__nr_of_workers: 6
# nr of tokens for relations
process_avro__len_relation_word: 10
# nr of tokens for subject entities
process_avro__len_subject_word: 10
# nr of tokens for object entities
process_avro__len_object_word: 10
process_avro__minimum_total_count_for_mentions_per_entity: 5
process_avro__the_top_k_mentions_per_entity: 10
process_avro__min_mention_occurrence_count: 5
# preprocess triples options
process_triples__mention_tokens_vocab_size: 200000
process_triples__relation_tokens_vocab_size: 50000
# sample_evaluation options
sample_evaluation__min_relation_token_length_for_testing: 3
sample_evaluation__eval_data_size: 10000
# create_elasticsearch_index options
create_elasticsearch_index__host: localhost
num_workers: 10