From 9f869766b23b0fa61d91e78fcc337b1e3be3fca8 Mon Sep 17 00:00:00 2001 From: Tor Egge Date: Fri, 20 Sep 2024 15:06:47 +0200 Subject: [PATCH] Pass word along with counts to posting list reader. --- searchlib/src/vespa/searchlib/diskindex/fieldreader.cpp | 2 +- .../src/vespa/searchlib/diskindex/zc4_posting_reader.cpp | 4 ++-- searchlib/src/vespa/searchlib/diskindex/zc4_posting_reader.h | 2 +- .../src/vespa/searchlib/diskindex/zc4_posting_reader_base.cpp | 4 +++- .../src/vespa/searchlib/diskindex/zc4_posting_reader_base.h | 3 ++- searchlib/src/vespa/searchlib/diskindex/zcposting.cpp | 4 ++-- searchlib/src/vespa/searchlib/diskindex/zcposting.h | 2 +- searchlib/src/vespa/searchlib/index/postinglistfile.h | 2 +- .../src/vespa/searchlib/test/fakedata/fakezcfilterocc.cpp | 2 +- 9 files changed, 14 insertions(+), 11 deletions(-) diff --git a/searchlib/src/vespa/searchlib/diskindex/fieldreader.cpp b/searchlib/src/vespa/searchlib/diskindex/fieldreader.cpp index e8ee607764a3..33faf9e0946f 100644 --- a/searchlib/src/vespa/searchlib/diskindex/fieldreader.cpp +++ b/searchlib/src/vespa/searchlib/diskindex/fieldreader.cpp @@ -53,7 +53,7 @@ FieldReader::readCounts() { PostingListCounts counts; _dictFile->readWord(_word, _oldWordNum, counts); - _oldposoccfile->readCounts(counts); + _oldposoccfile->read_word_and_counts(_word, counts); if (_oldWordNum != noWordNumHigh()) { _wordNum = _wordNumMapper.map(_oldWordNum); assert(_wordNum != noWordNum()); diff --git a/searchlib/src/vespa/searchlib/diskindex/zc4_posting_reader.cpp b/searchlib/src/vespa/searchlib/diskindex/zc4_posting_reader.cpp index a5325f3265e3..c49ccf97864c 100644 --- a/searchlib/src/vespa/searchlib/diskindex/zc4_posting_reader.cpp +++ b/searchlib/src/vespa/searchlib/diskindex/zc4_posting_reader.cpp @@ -90,9 +90,9 @@ Zc4PostingReader::read_word_start() template void -Zc4PostingReader::set_counts(const PostingListCounts &counts) +Zc4PostingReader::set_word_and_counts(const std::string& word, const index::PostingListCounts& counts) { - Zc4PostingReaderBase::set_counts(*_decodeContext, counts); + Zc4PostingReaderBase::set_word_and_counts(*_decodeContext, word, counts); } template diff --git a/searchlib/src/vespa/searchlib/diskindex/zc4_posting_reader.h b/searchlib/src/vespa/searchlib/diskindex/zc4_posting_reader.h index ce381c9680e1..c0af398e589b 100644 --- a/searchlib/src/vespa/searchlib/diskindex/zc4_posting_reader.h +++ b/searchlib/src/vespa/searchlib/diskindex/zc4_posting_reader.h @@ -34,7 +34,7 @@ class Zc4PostingReader : public Zc4PostingReaderBase Zc4PostingReader &operator=(Zc4PostingReader &&) = delete; ~Zc4PostingReader(); void read_doc_id_and_features(index::DocIdAndFeatures &features); - void set_counts(const index::PostingListCounts &counts); + void set_word_and_counts(const std::string& word, const index::PostingListCounts& counts); void set_decode_features(DecodeContext *decode_features); DecodeContext &get_decode_features() const { return *_decodeContext; } }; diff --git a/searchlib/src/vespa/searchlib/diskindex/zc4_posting_reader_base.cpp b/searchlib/src/vespa/searchlib/diskindex/zc4_posting_reader_base.cpp index 9128b356c6ac..bbc745a9b48a 100644 --- a/searchlib/src/vespa/searchlib/diskindex/zc4_posting_reader_base.cpp +++ b/searchlib/src/vespa/searchlib/diskindex/zc4_posting_reader_base.cpp @@ -190,6 +190,7 @@ Zc4PostingReaderBase::Zc4PostingReaderBase(bool dynamic_k) _l4_skip(), _chunkNo(0), _features_size(0), + _word(), _counts(), _residue(0) { @@ -295,9 +296,10 @@ Zc4PostingReaderBase::read_word_start(DecodeContext64Base &decode_context) } void -Zc4PostingReaderBase::set_counts(DecodeContext64Base &decode_context, const PostingListCounts &counts) +Zc4PostingReaderBase::set_word_and_counts(bitcompression::DecodeContext64Base &decode_context, const std::string& word, const index::PostingListCounts &counts) { assert(!_has_more && _residue == 0); // Previous words must have been read. + _word = word; _counts = counts; assert((_counts._numDocs == 0) == (_counts._bitLength == 0)); if (_counts._numDocs > 0) { diff --git a/searchlib/src/vespa/searchlib/diskindex/zc4_posting_reader_base.h b/searchlib/src/vespa/searchlib/diskindex/zc4_posting_reader_base.h index a4308ecf6b82..c9a2caeeb293 100644 --- a/searchlib/src/vespa/searchlib/diskindex/zc4_posting_reader_base.h +++ b/searchlib/src/vespa/searchlib/diskindex/zc4_posting_reader_base.h @@ -110,6 +110,7 @@ class Zc4PostingReaderBase // Variable for validating chunk information while reading uint64_t _features_size; + std::string _word; index::PostingListCounts _counts; uint32_t _residue; // Number of unread documents after word header @@ -124,7 +125,7 @@ class Zc4PostingReaderBase Zc4PostingReaderBase &operator=(Zc4PostingReaderBase &&) = delete; ~Zc4PostingReaderBase(); void read_doc_id_and_features(index::DocIdAndFeatures &features); - void set_counts(bitcompression::DecodeContext64Base &decode_context, const index::PostingListCounts &counts); + void set_word_and_counts(bitcompression::DecodeContext64Base &decode_context, const std::string& word, const index::PostingListCounts& counts); ComprFileReadContext &get_read_context() { return _readContext; } Zc4PostingParams &get_posting_params() { return _posting_params; } }; diff --git a/searchlib/src/vespa/searchlib/diskindex/zcposting.cpp b/searchlib/src/vespa/searchlib/diskindex/zcposting.cpp index 98259997bfbe..f3fe9e201a9c 100644 --- a/searchlib/src/vespa/searchlib/diskindex/zcposting.cpp +++ b/searchlib/src/vespa/searchlib/diskindex/zcposting.cpp @@ -57,9 +57,9 @@ Zc4PostingSeqRead::readDocIdAndFeatures(DocIdAndFeatures &features) } void -Zc4PostingSeqRead::readCounts(const PostingListCounts &counts) +Zc4PostingSeqRead::read_word_and_counts(const std::string& word, const PostingListCounts& counts) { - _reader.set_counts(counts); + _reader.set_word_and_counts(word, counts); } diff --git a/searchlib/src/vespa/searchlib/diskindex/zcposting.h b/searchlib/src/vespa/searchlib/diskindex/zcposting.h index 2c2eca93fb66..35a963f72304 100644 --- a/searchlib/src/vespa/searchlib/diskindex/zcposting.h +++ b/searchlib/src/vespa/searchlib/diskindex/zcposting.h @@ -37,7 +37,7 @@ class Zc4PostingSeqRead : public index::PostingListFileSeqRead using PostingListParams = index::PostingListParams; void readDocIdAndFeatures(DocIdAndFeatures &features) override; - void readCounts(const PostingListCounts &counts) override; // Fill in for next word + void read_word_and_counts(const std::string& word, const PostingListCounts &counts) override; // Fill in for next word bool open(const std::string &name, const TuneFileSeqRead &tuneFileRead) override; bool close() override; void getParams(PostingListParams ¶ms) override; diff --git a/searchlib/src/vespa/searchlib/index/postinglistfile.h b/searchlib/src/vespa/searchlib/index/postinglistfile.h index 1750e7bc7b81..4c61d831a7a5 100644 --- a/searchlib/src/vespa/searchlib/index/postinglistfile.h +++ b/searchlib/src/vespa/searchlib/index/postinglistfile.h @@ -34,7 +34,7 @@ class PostingListFileSeqRead { /** * Read counts for a word. */ - virtual void readCounts(const PostingListCounts &counts) = 0; + virtual void read_word_and_counts(const std::string& word, const PostingListCounts& counts) = 0; /** * Open posting list file for sequential read. diff --git a/searchlib/src/vespa/searchlib/test/fakedata/fakezcfilterocc.cpp b/searchlib/src/vespa/searchlib/test/fakedata/fakezcfilterocc.cpp index 13d84f46524d..95170b8b9797 100644 --- a/searchlib/src/vespa/searchlib/test/fakedata/fakezcfilterocc.cpp +++ b/searchlib/src/vespa/searchlib/test/fakedata/fakezcfilterocc.cpp @@ -224,7 +224,7 @@ FakeZcFilterOcc::validate_read(const FakeWord &fw) const PostingListCounts counts; counts._bitLength = _compressedBits; counts._numDocs = _hitDocs; - reader.set_counts(counts); + reader.set_word_and_counts(fw.getName(), counts); auto word_pos_iterator(fw._wordPosFeatures.begin()); auto word_pos_iterator_end(fw._wordPosFeatures.end()); DocIdAndPosOccFeatures check_features;