diff --git a/CMakeLists.txt b/CMakeLists.txt index ea4d1bec892..3e384e03665 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -138,6 +138,7 @@ include(cmake/fmt.cmake) include(cmake/jsoncons.cmake) include(cmake/xxhash.cmake) include(cmake/span.cmake) +include(cmake/trie.cmake) if (ENABLE_LUAJIT) include(cmake/luajit.cmake) @@ -169,6 +170,7 @@ list(APPEND EXTERNAL_LIBS Threads::Threads) list(APPEND EXTERNAL_LIBS ${Backtrace_LIBRARY}) list(APPEND EXTERNAL_LIBS xxhash) list(APPEND EXTERNAL_LIBS span-lite) +list(APPEND EXTERNAL_LIBS tsl_hat_trie) # Add git sha to version.h find_package(Git REQUIRED) diff --git a/NOTICE b/NOTICE index c3ab6e56d30..a25a60d1e70 100644 --- a/NOTICE +++ b/NOTICE @@ -65,6 +65,7 @@ The text of each license is also included in licenses/LICENSE-[project].txt * fmt(https://github.com/fmtlib/fmt) * LuaJIT(https://github.com/KvrocksLabs/LuaJIT) * lua(https://github.com/KvrocksLabs/lua, alternative to LuaJIT) +* hat-trie(https://github.com/Tessil/hat-trie) ================================================================ Boost Software License Version 1.0 diff --git a/cmake/trie.cmake b/cmake/trie.cmake new file mode 100644 index 00000000000..30d63429c25 --- /dev/null +++ b/cmake/trie.cmake @@ -0,0 +1,27 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +include_guard() + +include(cmake/utils.cmake) + +FetchContent_DeclareGitHubWithMirror(trie + Tessil/hat-trie 906e6abd1e7063f1dacd3a6b270aa654b525eb0a + MD5=a930364e9f6b60371319664bddf78000 +) + +FetchContent_MakeAvailableWithArgs(trie) diff --git a/licenses/LICENSE-hat-trie.txt b/licenses/LICENSE-hat-trie.txt new file mode 100644 index 00000000000..e9c5ae95f36 --- /dev/null +++ b/licenses/LICENSE-hat-trie.txt @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2017 Thibaut Goetghebuer-Planchon + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/src/common/status.h b/src/common/status.h index 37eae9d8281..ade19f86d0e 100644 --- a/src/common/status.h +++ b/src/common/status.h @@ -66,6 +66,9 @@ class [[nodiscard]] Status { // Blocking BlockingCmd, + + // Search + NoPrefixMatched, }; Status() : impl_{nullptr} {} diff --git a/src/search/indexer.cc b/src/search/indexer.cc new file mode 100644 index 00000000000..f608d3df6dc --- /dev/null +++ b/src/search/indexer.cc @@ -0,0 +1,118 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#include "indexer.h" + +#include + +#include "storage/redis_metadata.h" +#include "types/redis_hash.h" + +namespace redis { + +StatusOr FieldValueRetriever::Create(SearchOnDataType type, std::string_view key, + engine::Storage *storage, const std::string &ns) { + if (type == SearchOnDataType::HASH) { + Hash db(storage, ns); + std::string ns_key = db.AppendNamespacePrefix(key); + HashMetadata metadata(false); + auto s = db.GetMetadata(ns_key, &metadata); + if (!s.ok()) return {Status::NotOK, s.ToString()}; + return FieldValueRetriever(db, metadata, key); + } else if (type == SearchOnDataType::JSON) { + Json db(storage, ns); + std::string ns_key = db.AppendNamespacePrefix(key); + JsonMetadata metadata(false); + JsonValue value; + auto s = db.read(ns_key, &metadata, &value); + if (!s.ok()) return {Status::NotOK, s.ToString()}; + return FieldValueRetriever(value); + } else { + assert(false && "unreachable code: unexpected SearchOnDataType"); + __builtin_unreachable(); + } +} + +rocksdb::Status FieldValueRetriever::Retrieve(std::string_view field, std::string *output) { + if (std::holds_alternative(db)) { + auto &[hash, metadata, key] = std::get(db); + std::string ns_key = hash.AppendNamespacePrefix(key); + LatestSnapShot ss(hash.storage_); + rocksdb::ReadOptions read_options; + read_options.snapshot = ss.GetSnapShot(); + std::string sub_key = InternalKey(ns_key, field, metadata.version, hash.storage_->IsSlotIdEncoded()).Encode(); + return hash.storage_->Get(read_options, sub_key, output); + } else if (std::holds_alternative(db)) { + auto &value = std::get(db); + auto s = value.Get(field); + if (!s.IsOK()) return rocksdb::Status::Corruption(s.Msg()); + if (s->value.size() != 1) + return rocksdb::Status::NotFound("json value specified by the field (json path) should exist and be unique"); + *output = s->value[0].as_string(); + return rocksdb::Status::OK(); + } else { + __builtin_unreachable(); + } +} + +StatusOr IndexUpdater::Record(std::string_view key, const std::string &ns) { + Database db(indexer->storage, ns); + + RedisType type = kRedisNone; + auto s = db.Type(key, &type); + if (!s.ok()) return {Status::NotOK, s.ToString()}; + + if (type != static_cast(on_data_type)) { + // not the expected type, stop record + return {Status::NotOK, "this data type cannot be indexed"}; + } + + auto retriever = GET_OR_RET(FieldValueRetriever::Create(on_data_type, key, indexer->storage, ns)); + + FieldValues values; + for (const auto &[field, info] : fields) { + std::string value; + auto s = retriever.Retrieve(field, &value); + if (s.IsNotFound()) continue; + if (!s.ok()) return {Status::NotOK, s.ToString()}; + + values.emplace(field, value); + } + + return values; +} + +void GlobalIndexer::Add(IndexUpdater updater) { + auto &up = updaters.emplace_back(std::move(updater)); + for (const auto &prefix : up.prefixes) { + prefix_map.emplace(prefix, &up); + } +} + +StatusOr GlobalIndexer::Record(std::string_view key, const std::string &ns) { + auto iter = prefix_map.longest_prefix(key); + if (iter != prefix_map.end()) { + return iter.value()->Record(key, ns); + } + + return {Status::NoPrefixMatched}; +} + +} // namespace redis diff --git a/src/search/indexer.h b/src/search/indexer.h new file mode 100644 index 00000000000..e153d55584c --- /dev/null +++ b/src/search/indexer.h @@ -0,0 +1,92 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +#pragma once + +#include + +#include +#include +#include +#include + +#include "commands/commander.h" +#include "config/config.h" +#include "indexer.h" +#include "search/search_encoding.h" +#include "server/server.h" +#include "storage/redis_metadata.h" +#include "storage/storage.h" +#include "types/redis_hash.h" +#include "types/redis_json.h" + +namespace redis { + +struct GlobalIndexer; + +struct FieldValueRetriever { + struct HashData { + Hash hash; + HashMetadata metadata; + std::string_view key; + + HashData(Hash hash, HashMetadata metadata, std::string_view key) + : hash(std::move(hash)), metadata(std::move(metadata)), key(key) {} + }; + using JsonData = JsonValue; + + using Variant = std::variant; + Variant db; + + static StatusOr Create(SearchOnDataType type, std::string_view key, engine::Storage *storage, + const std::string &ns); + + explicit FieldValueRetriever(Hash hash, HashMetadata metadata, std::string_view key) + : db(std::in_place_type, std::move(hash), std::move(metadata), key) {} + + explicit FieldValueRetriever(JsonValue json) : db(std::in_place_type, std::move(json)) {} + + rocksdb::Status Retrieve(std::string_view field, std::string *output); +}; + +struct IndexUpdater { + using FieldValues = std::map; + + SearchOnDataType on_data_type; + std::vector prefixes; + std::map> fields; + GlobalIndexer *indexer = nullptr; + + StatusOr Record(std::string_view key, const std::string &ns); +}; + +struct GlobalIndexer { + std::deque updaters; + tsl::htrie_map prefix_map; + + engine::Storage *storage = nullptr; + + explicit GlobalIndexer(engine::Storage *storage) : storage(storage) {} + + void Add(IndexUpdater updater); + StatusOr Record(std::string_view key, const std::string &ns); +}; + +} // namespace redis diff --git a/src/search/search_encoding.h b/src/search/search_encoding.h index deea106b02d..2acec050dde 100644 --- a/src/search/search_encoding.h +++ b/src/search/search_encoding.h @@ -73,9 +73,9 @@ struct SearchFieldMetadata { void DecodeFlag(uint8_t flag) { noindex = flag & 1; } - void Encode(std::string *dst) const { PutFixed8(dst, MakeFlag()); } + virtual void Encode(std::string *dst) const { PutFixed8(dst, MakeFlag()); } - rocksdb::Status Decode(Slice *input) { + virtual rocksdb::Status Decode(Slice *input) { uint8_t flag = 0; if (!GetFixed8(input, &flag)) { return rocksdb::Status::Corruption(kErrorInsufficientLength); @@ -96,13 +96,13 @@ struct SearchTagFieldMetadata : SearchFieldMetadata { char separator = ','; bool case_sensitive = false; - void Encode(std::string *dst) const { + void Encode(std::string *dst) const override { SearchFieldMetadata::Encode(dst); PutFixed8(dst, separator); PutFixed8(dst, case_sensitive); } - rocksdb::Status Decode(Slice *input) { + rocksdb::Status Decode(Slice *input) override { if (auto s = SearchFieldMetadata::Decode(input); !s.ok()) { return s; } diff --git a/src/types/redis_hash.h b/src/types/redis_hash.h index fc004ed840d..8ae0a066cce 100644 --- a/src/types/redis_hash.h +++ b/src/types/redis_hash.h @@ -66,6 +66,8 @@ class Hash : public SubKeyScanner { private: rocksdb::Status GetMetadata(const Slice &ns_key, HashMetadata *metadata); + + friend struct FieldValueRetriever; }; } // namespace redis diff --git a/src/types/redis_json.h b/src/types/redis_json.h index a2135b756ee..8d0f15cb6dc 100644 --- a/src/types/redis_json.h +++ b/src/types/redis_json.h @@ -76,6 +76,8 @@ class Json : public Database { rocksdb::Status numop(JsonValue::NumOpEnum op, const std::string &user_key, const std::string &path, const std::string &value, JsonValue *result); std::vector readMulti(const std::vector &ns_keys, std::vector &values); + + friend struct FieldValueRetriever; }; } // namespace redis