From c219fc51324905a5f10b3df40a6466510a15aa38 Mon Sep 17 00:00:00 2001 From: PragmaTwice Date: Fri, 12 Apr 2024 16:48:49 +0900 Subject: [PATCH 1/2] Unify index info structure between indexer and sema checker --- src/search/index_info.h | 61 +++++++++++++++++++++++++++ src/search/indexer.cc | 43 +++++++++---------- src/search/indexer.h | 25 +++-------- src/search/ir.h | 3 ++ src/search/ir_sema_checker.h | 43 +++---------------- tests/cppunit/indexer_test.cc | 28 ++++++------ tests/cppunit/ir_sema_checker_test.cc | 2 - 7 files changed, 114 insertions(+), 91 deletions(-) create mode 100644 src/search/index_info.h diff --git a/src/search/index_info.h b/src/search/index_info.h new file mode 100644 index 00000000000..e6e95b72348 --- /dev/null +++ b/src/search/index_info.h @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#pragma once + +#include +#include +#include + +#include "search_encoding.h" + +namespace kqir { + +struct IndexInfo; + +struct FieldInfo { + std::string name; + IndexInfo *index = nullptr; + std::unique_ptr metadata; + + FieldInfo(std::string name, std::unique_ptr &&metadata) + : name(std::move(name)), metadata(std::move(metadata)) {} +}; + +struct IndexInfo { + using FieldMap = std::map; + + std::string name; + SearchMetadata metadata; + FieldMap fields; + redis::SearchPrefixesMetadata prefixes; + + IndexInfo(std::string name, SearchMetadata metadata) : name(std::move(name)), metadata(std::move(metadata)) {} + + void Add(FieldInfo &&field) { + const auto &name = field.name; + field.index = this; + fields.emplace(name, std::move(field)); + } +}; + +using IndexMap = std::map; + +} // namespace kqir diff --git a/src/search/indexer.cc b/src/search/indexer.cc index df8f8eb3b2f..27462221e47 100644 --- a/src/search/indexer.cc +++ b/src/search/indexer.cc @@ -77,7 +77,7 @@ rocksdb::Status FieldValueRetriever::Retrieve(std::string_view field, std::strin } } -StatusOr IndexUpdater::Record(std::string_view key, const std::string &ns) { +StatusOr IndexUpdater::Record(std::string_view key, const std::string &ns) const { Database db(indexer->storage, ns); RedisType type = kRedisNone; @@ -87,16 +87,16 @@ StatusOr IndexUpdater::Record(std::string_view key, c // key not exist if (type == kRedisNone) return FieldValues(); - if (type != static_cast(metadata.on_data_type)) { + if (type != static_cast(info->metadata.on_data_type)) { // not the expected type, stop record return {Status::TypeMismatched}; } - auto retriever = GET_OR_RET(FieldValueRetriever::Create(metadata.on_data_type, key, indexer->storage, ns)); + auto retriever = GET_OR_RET(FieldValueRetriever::Create(info->metadata.on_data_type, key, indexer->storage, ns)); FieldValues values; - for (const auto &[field, info] : fields) { - if (info->noindex) { + for (const auto &[field, i] : info->fields) { + if (i.metadata->noindex) { continue; } @@ -112,20 +112,20 @@ StatusOr IndexUpdater::Record(std::string_view key, c } Status IndexUpdater::UpdateIndex(const std::string &field, std::string_view key, std::string_view original, - std::string_view current, const std::string &ns) { + std::string_view current, const std::string &ns) const { if (original == current) { // the value of this field is unchanged, no need to update return Status::OK(); } - auto iter = fields.find(field); - if (iter == fields.end()) { + auto iter = info->fields.find(field); + if (iter == info->fields.end()) { return {Status::NotOK, "No such field to do index updating"}; } - auto *metadata = iter->second.get(); + auto *metadata = iter->second.metadata.get(); auto *storage = indexer->storage; - auto ns_key = ComposeNamespaceKey(ns, name, storage->IsSlotIdEncoded()); + auto ns_key = ComposeNamespaceKey(ns, info->name, storage->IsSlotIdEncoded()); if (auto tag = dynamic_cast(metadata)) { const char delim[] = {tag->separator, '\0'}; auto original_tags = util::Split(original, delim); @@ -163,14 +163,14 @@ Status IndexUpdater::UpdateIndex(const std::string &field, std::string_view key, for (const auto &tag : tags_to_delete) { auto sub_key = ConstructTagFieldSubkey(field, tag, key); - auto index_key = InternalKey(ns_key, sub_key, this->metadata.version, storage->IsSlotIdEncoded()); + auto index_key = InternalKey(ns_key, sub_key, info->metadata.version, storage->IsSlotIdEncoded()); batch->Delete(cf_handle, index_key.Encode()); } for (const auto &tag : tags_to_add) { auto sub_key = ConstructTagFieldSubkey(field, tag, key); - auto index_key = InternalKey(ns_key, sub_key, this->metadata.version, storage->IsSlotIdEncoded()); + auto index_key = InternalKey(ns_key, sub_key, info->metadata.version, storage->IsSlotIdEncoded()); batch->Put(cf_handle, index_key.Encode(), Slice()); } @@ -184,7 +184,7 @@ Status IndexUpdater::UpdateIndex(const std::string &field, std::string_view key, if (!original.empty()) { auto original_num = GET_OR_RET(ParseFloat(std::string(original.begin(), original.end()))); auto sub_key = ConstructNumericFieldSubkey(field, original_num, key); - auto index_key = InternalKey(ns_key, sub_key, this->metadata.version, storage->IsSlotIdEncoded()); + auto index_key = InternalKey(ns_key, sub_key, info->metadata.version, storage->IsSlotIdEncoded()); batch->Delete(cf_handle, index_key.Encode()); } @@ -192,7 +192,7 @@ Status IndexUpdater::UpdateIndex(const std::string &field, std::string_view key, if (!current.empty()) { auto current_num = GET_OR_RET(ParseFloat(std::string(current.begin(), current.end()))); auto sub_key = ConstructNumericFieldSubkey(field, current_num, key); - auto index_key = InternalKey(ns_key, sub_key, this->metadata.version, storage->IsSlotIdEncoded()); + auto index_key = InternalKey(ns_key, sub_key, info->metadata.version, storage->IsSlotIdEncoded()); batch->Put(cf_handle, index_key.Encode(), Slice()); } @@ -206,11 +206,11 @@ Status IndexUpdater::UpdateIndex(const std::string &field, std::string_view key, return Status::OK(); } -Status IndexUpdater::Update(const FieldValues &original, std::string_view key, const std::string &ns) { +Status IndexUpdater::Update(const FieldValues &original, std::string_view key, const std::string &ns) const { auto current = GET_OR_RET(Record(key, ns)); - for (const auto &[field, info] : fields) { - if (info->noindex) { + for (const auto &[field, i] : info->fields) { + if (i.metadata->noindex) { continue; } @@ -230,9 +230,8 @@ Status IndexUpdater::Update(const FieldValues &original, std::string_view key, c } void GlobalIndexer::Add(IndexUpdater updater) { - auto &up = updaters.emplace_back(std::move(updater)); - for (const auto &prefix : up.prefixes) { - prefix_map.insert(prefix, &up); + for (const auto &prefix : updater.info->prefixes.prefixes) { + prefix_map.insert(prefix, updater); } } @@ -240,14 +239,14 @@ StatusOr GlobalIndexer::Record(std::string_view key auto iter = prefix_map.longest_prefix(key); if (iter != prefix_map.end()) { auto updater = iter.value(); - return std::make_pair(updater, GET_OR_RET(updater->Record(key, ns))); + return std::make_pair(updater, GET_OR_RET(updater.Record(key, ns))); } return {Status::NoPrefixMatched}; } Status GlobalIndexer::Update(const RecordResult &original, std::string_view key, const std::string &ns) { - return original.first->Update(original.second, key, ns); + return original.first.Update(original.second, key, ns); } } // namespace redis diff --git a/src/search/indexer.h b/src/search/indexer.h index c404ecbbc07..d238955707b 100644 --- a/src/search/indexer.h +++ b/src/search/indexer.h @@ -29,6 +29,7 @@ #include "commands/commander.h" #include "config/config.h" +#include "index_info.h" #include "indexer.h" #include "search/search_encoding.h" #include "server/server.h" @@ -69,32 +70,20 @@ struct FieldValueRetriever { struct IndexUpdater { using FieldValues = std::map; - std::string name; - SearchMetadata metadata; - std::vector prefixes; - std::map> fields; + const kqir::IndexInfo *info = nullptr; GlobalIndexer *indexer = nullptr; - IndexUpdater(const IndexUpdater &) = delete; - IndexUpdater(IndexUpdater &&) = default; - - IndexUpdater &operator=(IndexUpdater &&) = default; - IndexUpdater &operator=(const IndexUpdater &) = delete; - - ~IndexUpdater() = default; - - StatusOr Record(std::string_view key, const std::string &ns); + StatusOr Record(std::string_view key, const std::string &ns) const; Status UpdateIndex(const std::string &field, std::string_view key, std::string_view original, - std::string_view current, const std::string &ns); - Status Update(const FieldValues &original, std::string_view key, const std::string &ns); + std::string_view current, const std::string &ns) const; + Status Update(const FieldValues &original, std::string_view key, const std::string &ns) const; }; struct GlobalIndexer { using FieldValues = IndexUpdater::FieldValues; - using RecordResult = std::pair; + using RecordResult = std::pair; - std::deque updaters; - tsl::htrie_map prefix_map; + tsl::htrie_map prefix_map; engine::Storage *storage = nullptr; diff --git a/src/search/ir.h b/src/search/ir.h index 6bcabff72d6..2b85b458034 100644 --- a/src/search/ir.h +++ b/src/search/ir.h @@ -34,6 +34,7 @@ #include "fmt/core.h" #include "ir_iterator.h" +#include "search/index_info.h" #include "string_util.h" #include "type_util.h" @@ -76,6 +77,7 @@ struct Ref : Node {}; struct FieldRef : Ref { std::string name; + const FieldInfo *info = nullptr; explicit FieldRef(std::string name) : name(std::move(name)) {} @@ -348,6 +350,7 @@ struct SelectClause : Node { struct IndexRef : Ref { std::string name; + const IndexInfo *info = nullptr; explicit IndexRef(std::string name) : name(std::move(name)) {} diff --git a/src/search/ir_sema_checker.h b/src/search/ir_sema_checker.h index 21c0ea0fc7e..471c3a3f714 100644 --- a/src/search/ir_sema_checker.h +++ b/src/search/ir_sema_checker.h @@ -23,49 +23,18 @@ #include #include +#include "index_info.h" #include "ir.h" #include "search_encoding.h" #include "storage/redis_metadata.h" namespace kqir { -struct IndexInfo; - -struct FieldInfo { - std::string name; - IndexInfo *index = nullptr; - std::unique_ptr metadata; - - FieldInfo(std::string name, std::unique_ptr &&metadata) - : name(std::move(name)), metadata(std::move(metadata)) {} -}; - -struct IndexInfo { - using FieldMap = std::map; - - std::string name; - SearchMetadata metadata; - FieldMap fields; - - IndexInfo(std::string name, SearchMetadata metadata) : name(std::move(name)), metadata(std::move(metadata)) {} - - void Add(FieldInfo &&field) { - const auto &name = field.name; - field.index = this; - fields.emplace(name, std::move(field)); - } -}; - -using IndexMap = std::map; - struct SemaChecker { const IndexMap &index_map; const IndexInfo *current_index = nullptr; - using Result = std::map>; - Result result; - explicit SemaChecker(const IndexMap &index_map) : index_map(index_map) {} Status Check(Node *node) { @@ -73,7 +42,7 @@ struct SemaChecker { auto index_name = v->index->name; if (auto iter = index_map.find(index_name); iter != index_map.end()) { current_index = &iter->second; - result.emplace(v->index.get(), current_index); + v->index->info = current_index; GET_OR_RET(Check(v->select.get())); GET_OR_RET(Check(v->query_expr.get())); @@ -88,7 +57,7 @@ struct SemaChecker { if (auto iter = current_index->fields.find(v->field->name); iter == current_index->fields.end()) { return {Status::NotOK, fmt::format("field `{}` not found in index `{}`", v->field->name, current_index->name)}; } else { - result.emplace(v->field.get(), &iter->second); + v->field->info = &iter->second; } } else if (auto v = dynamic_cast(node)) { for (const auto &n : v->inners) { @@ -106,7 +75,7 @@ struct SemaChecker { } else if (auto meta = dynamic_cast(iter->second.metadata.get()); !meta) { return {Status::NotOK, fmt::format("field `{}` is not a tag field", v->field->name)}; } else { - result.emplace(v->field.get(), &iter->second); + v->field->info = &iter->second; if (v->tag->val.empty()) { return {Status::NotOK, "tag cannot be an empty string"}; @@ -122,14 +91,14 @@ struct SemaChecker { } else if (!dynamic_cast(iter->second.metadata.get())) { return {Status::NotOK, fmt::format("field `{}` is not a numeric field", v->field->name)}; } else { - result.emplace(v->field.get(), &iter->second); + v->field->info = &iter->second; } } else if (auto v = dynamic_cast(node)) { for (const auto &n : v->fields) { if (auto iter = current_index->fields.find(n->name); iter == current_index->fields.end()) { return {Status::NotOK, fmt::format("field `{}` not found in index `{}`", n->name, current_index->name)}; } else { - result.emplace(n.get(), &iter->second); + n->info = &iter->second; } } } else if (auto v [[maybe_unused]] = dynamic_cast(node)) { diff --git a/tests/cppunit/indexer_test.cc b/tests/cppunit/indexer_test.cc index f30b45cad25..b827102a730 100644 --- a/tests/cppunit/indexer_test.cc +++ b/tests/cppunit/indexer_test.cc @@ -25,32 +25,36 @@ #include +#include "search/index_info.h" #include "search/search_encoding.h" #include "storage/redis_metadata.h" #include "types/redis_hash.h" struct IndexerTest : TestBase { redis::GlobalIndexer indexer; + kqir::IndexMap map; std::string ns = "index_test"; IndexerTest() : indexer(storage_.get()) { SearchMetadata hash_field_meta(false); hash_field_meta.on_data_type = SearchOnDataType::HASH; - std::map> hash_fields; - hash_fields.emplace("x", std::make_unique()); - hash_fields.emplace("y", std::make_unique()); + kqir::IndexInfo hash_info("hashtest", hash_field_meta); + hash_info.Add(kqir::FieldInfo("x", std::make_unique())); + hash_info.Add(kqir::FieldInfo("y", std::make_unique())); - redis::IndexUpdater hash_updater{"hashtest", hash_field_meta, {"idxtesthash"}, std::move(hash_fields), &indexer}; + map.emplace("hashtest", std::move(hash_info)); + + redis::IndexUpdater hash_updater{&map.at("hashtest"), &indexer}; SearchMetadata json_field_meta(false); json_field_meta.on_data_type = SearchOnDataType::JSON; - std::map> json_fields; - json_fields.emplace("$.x", std::make_unique()); - json_fields.emplace("$.y", std::make_unique()); + kqir::IndexInfo json_info("jsontest", json_field_meta); + json_info.Add(kqir::FieldInfo("$.x", std::make_unique())); + json_info.Add(kqir::FieldInfo("$.y", std::make_unique())); - redis::IndexUpdater json_updater{"jsontest", json_field_meta, {"idxtestjson"}, std::move(json_fields), &indexer}; + redis::IndexUpdater json_updater{&map.at("jsontest"), &indexer}; indexer.Add(std::move(hash_updater)); indexer.Add(std::move(json_updater)); @@ -72,7 +76,7 @@ TEST_F(IndexerTest, HashTag) { { auto s = indexer.Record(key1, ns); ASSERT_TRUE(s); - ASSERT_EQ(s->first->name, idxname); + ASSERT_EQ(s->first.info->name, idxname); ASSERT_TRUE(s->second.empty()); uint64_t cnt = 0; @@ -111,7 +115,7 @@ TEST_F(IndexerTest, HashTag) { { auto s = indexer.Record(key1, ns); ASSERT_TRUE(s); - ASSERT_EQ(s->first->name, idxname); + ASSERT_EQ(s->first.info->name, idxname); ASSERT_EQ(s->second.size(), 1); ASSERT_EQ(s->second["x"], "food,kitChen,Beauty"); @@ -179,7 +183,7 @@ TEST_F(IndexerTest, JsonTag) { { auto s = indexer.Record(key1, ns); ASSERT_TRUE(s); - ASSERT_EQ(s->first->name, idxname); + ASSERT_EQ(s->first.info->name, idxname); ASSERT_TRUE(s->second.empty()); auto s_set = db.Set(key1, "$", R"({"x": "food,kitChen,Beauty"})"); @@ -217,7 +221,7 @@ TEST_F(IndexerTest, JsonTag) { { auto s = indexer.Record(key1, ns); ASSERT_TRUE(s); - ASSERT_EQ(s->first->name, idxname); + ASSERT_EQ(s->first.info->name, idxname); ASSERT_EQ(s->second.size(), 1); ASSERT_EQ(s->second["$.x"], "food,kitChen,Beauty"); diff --git a/tests/cppunit/ir_sema_checker_test.cc b/tests/cppunit/ir_sema_checker_test.cc index db222f6cadd..75beb68693e 100644 --- a/tests/cppunit/ir_sema_checker_test.cc +++ b/tests/cppunit/ir_sema_checker_test.cc @@ -76,7 +76,5 @@ TEST(SemaCheckerTest, Simple) { auto root = *Parse("select f1 from ia where f1 hastag \"a\" and f2 = 1 order by f3"); ASSERT_EQ(checker.Check(root.get()).Msg(), "ok"); - - ASSERT_EQ(checker.result.size(), 5); } } From 168df3beadd90992e46baf6fdedfc606f51e0bd4 Mon Sep 17 00:00:00 2001 From: PragmaTwice Date: Fri, 12 Apr 2024 20:27:45 +0900 Subject: [PATCH 2/2] fix --- src/search/indexer.cc | 1 + src/search/indexer.h | 2 ++ tests/cppunit/indexer_test.cc | 8 ++++++-- 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/src/search/indexer.cc b/src/search/indexer.cc index 27462221e47..3e7bbf1fb46 100644 --- a/src/search/indexer.cc +++ b/src/search/indexer.cc @@ -230,6 +230,7 @@ Status IndexUpdater::Update(const FieldValues &original, std::string_view key, c } void GlobalIndexer::Add(IndexUpdater updater) { + updater.indexer = this; for (const auto &prefix : updater.info->prefixes.prefixes) { prefix_map.insert(prefix, updater); } diff --git a/src/search/indexer.h b/src/search/indexer.h index d238955707b..dc1f03e0bdc 100644 --- a/src/search/indexer.h +++ b/src/search/indexer.h @@ -73,6 +73,8 @@ struct IndexUpdater { const kqir::IndexInfo *info = nullptr; GlobalIndexer *indexer = nullptr; + explicit IndexUpdater(const kqir::IndexInfo *info) : info(info) {} + StatusOr Record(std::string_view key, const std::string &ns) const; Status UpdateIndex(const std::string &field, std::string_view key, std::string_view original, std::string_view current, const std::string &ns) const; diff --git a/tests/cppunit/indexer_test.cc b/tests/cppunit/indexer_test.cc index b827102a730..ae5a045ef56 100644 --- a/tests/cppunit/indexer_test.cc +++ b/tests/cppunit/indexer_test.cc @@ -42,10 +42,11 @@ struct IndexerTest : TestBase { kqir::IndexInfo hash_info("hashtest", hash_field_meta); hash_info.Add(kqir::FieldInfo("x", std::make_unique())); hash_info.Add(kqir::FieldInfo("y", std::make_unique())); + hash_info.prefixes.prefixes.emplace_back("idxtesthash"); map.emplace("hashtest", std::move(hash_info)); - redis::IndexUpdater hash_updater{&map.at("hashtest"), &indexer}; + redis::IndexUpdater hash_updater{&map.at("hashtest")}; SearchMetadata json_field_meta(false); json_field_meta.on_data_type = SearchOnDataType::JSON; @@ -53,8 +54,11 @@ struct IndexerTest : TestBase { kqir::IndexInfo json_info("jsontest", json_field_meta); json_info.Add(kqir::FieldInfo("$.x", std::make_unique())); json_info.Add(kqir::FieldInfo("$.y", std::make_unique())); + json_info.prefixes.prefixes.emplace_back("idxtestjson"); - redis::IndexUpdater json_updater{&map.at("jsontest"), &indexer}; + map.emplace("jsontest", std::move(json_info)); + + redis::IndexUpdater json_updater{&map.at("jsontest")}; indexer.Add(std::move(hash_updater)); indexer.Add(std::move(json_updater));