-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathindex
executable file
·49 lines (40 loc) · 1.63 KB
/
index
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
#!/usr/bin/env bash
JSON=$2
# The number of collections
NUM_COLLECTIONS=$(echo ${JSON} | jq -r '.collections | length')
# For each collection...
for i in $(seq 0 $((${NUM_COLLECTIONS} - 1))); do
COLLECTION_NAME=$(echo ${JSON} | jq -r ".collections[$i].name")
COLLECTION_PATH=$(echo ${JSON} | jq -r ".collections[$i].path")
if [[ ${COLLECTION_NAME} = "core17" ]]; then
COLLECTION="NewYorkTimesCollection"
GENERATOR="JsoupGenerator"
elif [[ ${COLLECTION_NAME} = "core18" ]]; then
COLLECTION="WashingtonPostCollection"
GENERATOR="WapoGenerator"
elif [[ ${COLLECTION_NAME} = "robust04" ]]; then
COLLECTION="TrecCollection"
GENERATOR="JsoupGenerator"
elif [[ ${COLLECTION_NAME} = "gov2" ]]; then
COLLECTION="TrecwebCollection"
GENERATOR="JsoupGenerator"
elif [[ ${COLLECTION_NAME} = "cw09b" ]]; then
COLLECTION="ClueWeb09Collection"
GENERATOR="JsoupGenerator"
elif [[ ${COLLECTION_NAME} = "cw12b" ]]; then
COLLECTION="ClueWeb12Collection"
GENERATOR="JsoupGenerator"
else
echo "Unsupported collection"
exit -1
fi
/work/anserini/solrini/bin/solr create -c ${COLLECTION_NAME} -n anserini -force
sh /work/anserini/target/appassembler/bin/IndexCollection \
-collection ${COLLECTION} -generator ${GENERATOR} \
-threads $(nproc) \
-solr -solr.batch 1000 -solr.commitWithin 300 -solr.index ${COLLECTION_NAME} -solr.zkUrl localhost:9983 \
-input ${COLLECTION_PATH} \
-storePositions -storeDocvectors -storeRawDocs
done
# Stop Solr
/work/anserini/solrini/bin/solr stop -all