This repo provides the 2GTI implementation proposed in the paper "Optimizing Guided Traversal for Fast Learned Sparse Retrieval". The implementation is based on the PISA search engine.
The index file for 2GTI can be found here.
mkdir build
cd build
cmake ..
make
cd bin
./create_wand_data -c $DATA/2GTI -o 2GTI.wand -s gt -b 512
./compress_inverted_index -c $DATA/2GTI -o 2GTI.index -e block_simdbp
# calculate the relevance in parallel
./evaluate_queries -e block_simdbp -i 2GTI.index -w 2GTI.wand -q $DATA/msmarco_dev.queries -k 10 -a maxscore --weighted -s gt --documents $DATA/msmarco.lex --alpha 1 --beta 0.3 --gamma 0.05 > 2GTI.trec
# measure the latency in single thread
./queries -e block_simdbp -i 2GTI.index -w 2GTI.wand -q $DATA/msmarco_dev.queries -k 10 -a maxscore --weighted -s gt --alpha 1 --beta 0.3 --gamma 0.05
(2GTI) Yifan Qiao, Yingrui Yang, Haixin Lin, Tao Yang. 2023. Optimizing Guided Traversal for Fast Learned Sparse Retrieval. Proc. of the ACM Web Conference 2023 (WWW ’23), May 1–5, 2023, Austin, TX, USA.
@inproceedings{2GTI2023,
author = {Yifan Qiao, Yingrui Yang, Haixin Lin, Tao Yang},
title = {Optimizing Guided Traversal for Fast Learned Sparse Retrieval},
booktitle = {Proceedings of the ACM Web Conference 2023 (WWW ’23), May 1–5, 2023, Austin, TX, USA},
year = {2023},
url = {https://doi.org/10.1145/3543507.3583497}
}
(DT) Yifan Qiao, Yingrui Yang, Haixin Lin, Tianbo Xiong, Xiyue Wang, and Tao Yang. 2022. Dual Skipping Guidance for Document Retrieval with Learned Sparse Representations. ArXiv abs/2204.11154 (April 2022).
@misc{DT2022,
author = {Qiao, Yifan and Yang, Yingrui and Lin, Haixin and Xiong, Tianbo and Wang, Xiyue and Yang, Tao},
title = {Dual Skipping Guidance for Document Retrieval with Learned Sparse Representations},
publisher = {arXiv},
year = {2022},
url = {https://arxiv.org/abs/2204.11154}
}