diff --git a/searchlib/src/tests/attribute/searchcontext/searchcontext_test.cpp b/searchlib/src/tests/attribute/searchcontext/searchcontext_test.cpp index 3e1ff1b1f327..dcee24b2d2a7 100644 --- a/searchlib/src/tests/attribute/searchcontext/searchcontext_test.cpp +++ b/searchlib/src/tests/attribute/searchcontext/searchcontext_test.cpp @@ -27,6 +27,7 @@ #include #include #include +#include #include #include @@ -65,6 +66,7 @@ using attribute::BasicType; using attribute::CollectionType; using attribute::Config; using attribute::HitEstimate; +using attribute::PostingListSearchContext; using attribute::SearchContextParams; using attribute::test::AttributeBuilder; using fef::MatchData; @@ -161,6 +163,7 @@ class SearchContextTest : public ::testing::Test ConfigMap _floatCfg; ConfigMap _stringCfg; static std::string _test_dir; + static bool _default_preserve_weight; static AttributePtr create_as(const AttributeVector& attr, const std::string& name_suffix); @@ -271,6 +274,9 @@ class SearchContextTest : public ::testing::Test // test prefix search void testPrefixSearch(const std::string& name, const Config& cfg); + // Test prefix search with weight information + void test_weighted_prefix_search(const std::string& name, const Config& cfg); + // test fuzzy search void testFuzzySearch(const std::string& name, const Config& cfg); @@ -306,9 +312,11 @@ class SearchContextTest : public ::testing::Test ~SearchContextTest() override; static void SetUpTestSuite(); static void TearDownTestSuite(); + void SetUp() override; }; std::string SearchContextTest::_test_dir = "test_data"; +bool SearchContextTest::_default_preserve_weight = false; SearchContextTest::SearchContextTest() : _integerCfg(), @@ -327,6 +335,7 @@ SearchContextTest::SetUpTestSuite() { std::filesystem::remove_all(_test_dir); std::filesystem::create_directory(_test_dir); + _default_preserve_weight = PostingListSearchContext::get_preserve_weight(); } void @@ -335,6 +344,12 @@ SearchContextTest::TearDownTestSuite() std::filesystem::remove_all(_test_dir); } +void +SearchContextTest::SetUp() +{ + PostingListSearchContext::set_preserve_weight(_default_preserve_weight); +} + void SearchContextTest::addReservedDoc(AttributeVector &ptr) { @@ -1547,6 +1562,67 @@ TEST_F(SearchContextTest, test_prefix_search) } } +void +SearchContextTest::test_weighted_prefix_search(const std::string& name, const Config& cfg) +{ + SCOPED_TRACE(name); + auto attr = AttributeBuilder(name, cfg).get(); + auto string_attr = std::dynamic_pointer_cast(attr); + ASSERT_TRUE(string_attr); + attr->addDocs(800); + uint32_t docid = 0; + std::string val_a("a"); + std::string val_A("A"); + std::string val_aa("aa"); + std::string val_aaa("aaa"); + std::string val_AAA("AAA"); + std::string val_aaaa("aaaa"); + for (docid = 1; docid < 10; ++docid) { + if (attr->hasMultiValue()) { + string_attr->append(docid, val_a, 3); + string_attr->append(docid, val_A, 2); + string_attr->append(docid, val_aa, 10); + if (docid == 1) { + string_attr->append(docid, val_aaa, 300); + string_attr->append(docid, val_AAA, 200); + string_attr->append(docid, val_aaaa, 1000); + } + } else { + string_attr->update(docid, val_aaa); + } + } + attr->commit(); + + for (auto preserve_weight : { false, true }) { + SCOPED_TRACE(std::string("preserve_weight=") + (preserve_weight ? "true" : "false")); + PostingListSearchContext::set_preserve_weight(preserve_weight); + for (auto common_word : { false, true }) { + SCOPED_TRACE(std::string("common_word=") + (common_word ? "true" : "false")); + TermFieldMatchData md; + auto sc = getSearch(*attr, common_word ? val_a : val_aaa, TermType::PREFIXTERM); + sc->fetchPostings(queryeval::ExecuteInfo::FULL, true); + auto itr = sc->createIterator(&md, true); + itr->initRange(1, attr->getCommittedDocIdLimit()); + EXPECT_TRUE(itr->seek(1)); + itr->unpack(1); + EXPECT_EQ(1, md.getDocId()); + int32_t expected_weight = (preserve_weight || !common_word || !cfg.fastSearch()) ? + (attr->hasWeightedSetType() ? + (common_word ? (1000 + 300 + 200 + 10 + 3 + 2) : (1000 + 300 + 200)) : + (attr->hasMultiValue() ? (common_word ? (1 + 1 + 1 + 1 + 1 + 1) : (1 + 1 + 1)) : 1)) : + 1; + EXPECT_EQ(expected_weight, md.getWeight()); + } + } +} + +TEST_F(SearchContextTest, test_weighted_prefix_search) +{ + for (const auto& cfg : _stringCfg) { + test_weighted_prefix_search(cfg.first, cfg.second); + } +} + //----------------------------------------------------------------------------- // Test fuzzy search //----------------------------------------------------------------------------- diff --git a/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.cpp b/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.cpp index c66a82732bef..ec5e4ea1da5b 100644 --- a/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.cpp +++ b/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.cpp @@ -9,6 +9,8 @@ namespace search::attribute { using vespalib::btree::BTreeNode; +bool PostingListSearchContext::_preserve_weight = false; + PostingListSearchContext:: PostingListSearchContext(const IEnumStoreDictionary& dictionary, bool has_btree_dictionary, uint32_t docIdLimit, uint64_t numValues, bool useBitVector, const ISearchContext &baseSearchCtx) diff --git a/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.h b/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.h index 70f99213498d..fc3999775c28 100644 --- a/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.h +++ b/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.h @@ -50,6 +50,7 @@ class PostingListSearchContext : public IPostingListSearchContext EntryRef _frozenRoot; // Posting list in tree form bool _useBitVector; mutable std::optional _estimated_hits; // Snapshot of size of posting lists in range + static bool _preserve_weight; // Use temporary posting list with weight information PostingListSearchContext(const IEnumStoreDictionary& dictionary, bool has_btree_dictionary, uint32_t docIdLimit, uint64_t numValues, bool useBitVector, const ISearchContext &baseSearchCtx); @@ -78,6 +79,11 @@ class PostingListSearchContext : public IPostingListSearchContext * by looking at the posting lists in the range [lower, upper>. */ virtual size_t calc_estimated_hits_in_range() const = 0; + +public: + // Used by unit tests. + static bool get_preserve_weight() noexcept { return _preserve_weight; } + static void set_preserve_weight(bool value) noexcept { _preserve_weight = value; } }; @@ -100,6 +106,8 @@ class PostingListSearchContextT : public PostingListSearchContext */ PostingListMerger _merger; + static constexpr bool merged_array_has_weight = !std::is_same_v; + PostingListSearchContextT(const IEnumStoreDictionary& dictionary, uint32_t docIdLimit, uint64_t numValues, const PostingStore& posting_store, bool useBitVector, const ISearchContext &baseSearchCtx); diff --git a/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.hpp b/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.hpp index 2b8c4608352b..dbe282e23625 100644 --- a/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.hpp +++ b/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.hpp @@ -170,8 +170,9 @@ PostingListSearchContextT::fetchPostings(const ExecuteInfo & exec_info, b if (!_merger.merge_done() && _uniqueValues >= 2u && this->_dictionary.get_has_btree_dictionary()) { if (strict || use_posting_lists_when_non_strict(exec_info)) { size_t sum = estimated_hits_in_range(); + bool force_array = merged_array_has_weight && _preserve_weight && !_useBitVector; //TODO Honour soft_doom and forward it to merge code - if (sum < (_docIdLimit * threshold_for_using_array)) { + if (sum < (_docIdLimit * threshold_for_using_array) || force_array) { _merger.reserveArray(_uniqueValues, sum); fillArray(); } else {