-
Notifications
You must be signed in to change notification settings - Fork 9
/
config.ini
55 lines (35 loc) · 2.19 KB
/
config.ini
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
# this is the corpus term df(document frequency) and cf(corpus frequence), here, only df is used for calculating the idf
CORPUS_TERM_DFCF_FILE = ./rob04-title/rob04.title.galago.df.cf
#query data file, an example line is "301 international organized crime", the first column is query id, and follows are query word. here, the query words are stemmed by krovetz in Indri.
QUERY_DATA_FILE = ./rob04-title/rob04.title.krovetz.txt
#document data file, the same as query data file, the first column is document id, and follows are doc words, and the doc words are stemmed by krovetz in Indri.
DOC_DATA_FILE = ./rob04-title/rob04.title.galago.docset
#the initial ranklist generated by galago with QL algorithm. An example line is "302 Q0 FBIS4-67701 1 -6.91998700 galago", although six values exist in a line, only first column(query id) and third column(doc id) are used.
RERANK_DATA_FILE = ./rob04-title/rob04.title.galago.2k.out
#the qrels file provided as ground truth. An example line is "301 0 FBIS3-10082 1", the first column is query id, the second column is always 0 and can be ignored, the third column is document id, and the last column is relevant degreee.
QREL_FILE = ./rob04-title/qrels.rob04.txt
#the idcg file contains the idcg score of each query calculated in advance, An example line is "601 8.959596", the first column is query id, and the second column is the corresponding idcg score.
QREL_IDCG_FILE = ./rob04-title/rob04.qrels.idcg
#the word embed file generated by word2vec, and the corpus is robust 2004 corpus.
WORD_EMBED_FILE = ./wordembedding/rob04.wv.cbow.d300.w10.n10.m10.i10.W.bin
#save the ranklist result
SAVE_RANKLIST_FILE = DRMM-LCH-IDF-rob04-title.ranklist
CAL_ALL_Q = 0
# corpus document count
CORPUS_DOC_COUNT = 521855
#learning rate of w1
LR_W1 = 0.02
#learning rate of w2
LR_W2 = 0.002
#mini-batch size
MINI_BATCH = 20
# activation function, 0: sigmoid, 1: tanh, 2: relu
ACTIVATION_FUNC_TYPE = 1
FOLD_SIZE = 5
MAX_ITERATION = 4
# 0:robust04, 1:clueweb09 2:
TASK_TYPE = 0
# every positive document random sample 10 document for every query
SAMPLE_PERPOSITIVE_LIMITED = 10
SAMPLE_PERQUERY_LIMITED = 1000
SAMPLE_TOTAL_LIMITED = 10000