Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Unify index info structure between indexer and sema checker #2240

Merged
merged 3 commits into from
Apr 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 61 additions & 0 deletions src/search/index_info.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*
*/

#pragma once

#include <map>
#include <memory>
#include <string>

#include "search_encoding.h"

namespace kqir {

struct IndexInfo;

struct FieldInfo {
std::string name;
IndexInfo *index = nullptr;
std::unique_ptr<redis::SearchFieldMetadata> metadata;

FieldInfo(std::string name, std::unique_ptr<redis::SearchFieldMetadata> &&metadata)
: name(std::move(name)), metadata(std::move(metadata)) {}
};

struct IndexInfo {
using FieldMap = std::map<std::string, FieldInfo>;

std::string name;
SearchMetadata metadata;
FieldMap fields;
redis::SearchPrefixesMetadata prefixes;

IndexInfo(std::string name, SearchMetadata metadata) : name(std::move(name)), metadata(std::move(metadata)) {}

void Add(FieldInfo &&field) {
const auto &name = field.name;
field.index = this;
fields.emplace(name, std::move(field));
}
};

using IndexMap = std::map<std::string, IndexInfo>;

} // namespace kqir
44 changes: 22 additions & 22 deletions src/search/indexer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ rocksdb::Status FieldValueRetriever::Retrieve(std::string_view field, std::strin
}
}

StatusOr<IndexUpdater::FieldValues> IndexUpdater::Record(std::string_view key, const std::string &ns) {
StatusOr<IndexUpdater::FieldValues> IndexUpdater::Record(std::string_view key, const std::string &ns) const {
Database db(indexer->storage, ns);

RedisType type = kRedisNone;
Expand All @@ -87,16 +87,16 @@ StatusOr<IndexUpdater::FieldValues> IndexUpdater::Record(std::string_view key, c
// key not exist
if (type == kRedisNone) return FieldValues();

if (type != static_cast<RedisType>(metadata.on_data_type)) {
if (type != static_cast<RedisType>(info->metadata.on_data_type)) {
// not the expected type, stop record
return {Status::TypeMismatched};
}

auto retriever = GET_OR_RET(FieldValueRetriever::Create(metadata.on_data_type, key, indexer->storage, ns));
auto retriever = GET_OR_RET(FieldValueRetriever::Create(info->metadata.on_data_type, key, indexer->storage, ns));

FieldValues values;
for (const auto &[field, info] : fields) {
if (info->noindex) {
for (const auto &[field, i] : info->fields) {
if (i.metadata->noindex) {
continue;
}

Expand All @@ -112,20 +112,20 @@ StatusOr<IndexUpdater::FieldValues> IndexUpdater::Record(std::string_view key, c
}

Status IndexUpdater::UpdateIndex(const std::string &field, std::string_view key, std::string_view original,
std::string_view current, const std::string &ns) {
std::string_view current, const std::string &ns) const {
if (original == current) {
// the value of this field is unchanged, no need to update
return Status::OK();
}

auto iter = fields.find(field);
if (iter == fields.end()) {
auto iter = info->fields.find(field);
if (iter == info->fields.end()) {
return {Status::NotOK, "No such field to do index updating"};
}

auto *metadata = iter->second.get();
auto *metadata = iter->second.metadata.get();
auto *storage = indexer->storage;
auto ns_key = ComposeNamespaceKey(ns, name, storage->IsSlotIdEncoded());
auto ns_key = ComposeNamespaceKey(ns, info->name, storage->IsSlotIdEncoded());
if (auto tag = dynamic_cast<SearchTagFieldMetadata *>(metadata)) {
const char delim[] = {tag->separator, '\0'};
auto original_tags = util::Split(original, delim);
Expand Down Expand Up @@ -163,14 +163,14 @@ Status IndexUpdater::UpdateIndex(const std::string &field, std::string_view key,

for (const auto &tag : tags_to_delete) {
auto sub_key = ConstructTagFieldSubkey(field, tag, key);
auto index_key = InternalKey(ns_key, sub_key, this->metadata.version, storage->IsSlotIdEncoded());
auto index_key = InternalKey(ns_key, sub_key, info->metadata.version, storage->IsSlotIdEncoded());

batch->Delete(cf_handle, index_key.Encode());
}

for (const auto &tag : tags_to_add) {
auto sub_key = ConstructTagFieldSubkey(field, tag, key);
auto index_key = InternalKey(ns_key, sub_key, this->metadata.version, storage->IsSlotIdEncoded());
auto index_key = InternalKey(ns_key, sub_key, info->metadata.version, storage->IsSlotIdEncoded());

batch->Put(cf_handle, index_key.Encode(), Slice());
}
Expand All @@ -184,15 +184,15 @@ Status IndexUpdater::UpdateIndex(const std::string &field, std::string_view key,
if (!original.empty()) {
auto original_num = GET_OR_RET(ParseFloat(std::string(original.begin(), original.end())));
auto sub_key = ConstructNumericFieldSubkey(field, original_num, key);
auto index_key = InternalKey(ns_key, sub_key, this->metadata.version, storage->IsSlotIdEncoded());
auto index_key = InternalKey(ns_key, sub_key, info->metadata.version, storage->IsSlotIdEncoded());

batch->Delete(cf_handle, index_key.Encode());
}

if (!current.empty()) {
auto current_num = GET_OR_RET(ParseFloat(std::string(current.begin(), current.end())));
auto sub_key = ConstructNumericFieldSubkey(field, current_num, key);
auto index_key = InternalKey(ns_key, sub_key, this->metadata.version, storage->IsSlotIdEncoded());
auto index_key = InternalKey(ns_key, sub_key, info->metadata.version, storage->IsSlotIdEncoded());

batch->Put(cf_handle, index_key.Encode(), Slice());
}
Expand All @@ -206,11 +206,11 @@ Status IndexUpdater::UpdateIndex(const std::string &field, std::string_view key,
return Status::OK();
}

Status IndexUpdater::Update(const FieldValues &original, std::string_view key, const std::string &ns) {
Status IndexUpdater::Update(const FieldValues &original, std::string_view key, const std::string &ns) const {
auto current = GET_OR_RET(Record(key, ns));

for (const auto &[field, info] : fields) {
if (info->noindex) {
for (const auto &[field, i] : info->fields) {
if (i.metadata->noindex) {
continue;
}

Expand All @@ -230,24 +230,24 @@ Status IndexUpdater::Update(const FieldValues &original, std::string_view key, c
}

void GlobalIndexer::Add(IndexUpdater updater) {
auto &up = updaters.emplace_back(std::move(updater));
for (const auto &prefix : up.prefixes) {
prefix_map.insert(prefix, &up);
updater.indexer = this;
for (const auto &prefix : updater.info->prefixes.prefixes) {
prefix_map.insert(prefix, updater);
}
}

StatusOr<GlobalIndexer::RecordResult> GlobalIndexer::Record(std::string_view key, const std::string &ns) {
auto iter = prefix_map.longest_prefix(key);
if (iter != prefix_map.end()) {
auto updater = iter.value();
return std::make_pair(updater, GET_OR_RET(updater->Record(key, ns)));
return std::make_pair(updater, GET_OR_RET(updater.Record(key, ns)));
}

return {Status::NoPrefixMatched};
}

Status GlobalIndexer::Update(const RecordResult &original, std::string_view key, const std::string &ns) {
return original.first->Update(original.second, key, ns);
return original.first.Update(original.second, key, ns);
}

} // namespace redis
25 changes: 8 additions & 17 deletions src/search/indexer.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@

#include "commands/commander.h"
#include "config/config.h"
#include "index_info.h"
#include "indexer.h"
#include "search/search_encoding.h"
#include "server/server.h"
Expand Down Expand Up @@ -69,32 +70,22 @@ struct FieldValueRetriever {
struct IndexUpdater {
using FieldValues = std::map<std::string, std::string>;

std::string name;
SearchMetadata metadata;
std::vector<std::string> prefixes;
std::map<std::string, std::unique_ptr<SearchFieldMetadata>> fields;
const kqir::IndexInfo *info = nullptr;
GlobalIndexer *indexer = nullptr;

IndexUpdater(const IndexUpdater &) = delete;
IndexUpdater(IndexUpdater &&) = default;
explicit IndexUpdater(const kqir::IndexInfo *info) : info(info) {}

IndexUpdater &operator=(IndexUpdater &&) = default;
IndexUpdater &operator=(const IndexUpdater &) = delete;

~IndexUpdater() = default;

StatusOr<FieldValues> Record(std::string_view key, const std::string &ns);
StatusOr<FieldValues> Record(std::string_view key, const std::string &ns) const;
Status UpdateIndex(const std::string &field, std::string_view key, std::string_view original,
std::string_view current, const std::string &ns);
Status Update(const FieldValues &original, std::string_view key, const std::string &ns);
std::string_view current, const std::string &ns) const;
Status Update(const FieldValues &original, std::string_view key, const std::string &ns) const;
};

struct GlobalIndexer {
using FieldValues = IndexUpdater::FieldValues;
using RecordResult = std::pair<IndexUpdater *, FieldValues>;
using RecordResult = std::pair<IndexUpdater, FieldValues>;

std::deque<IndexUpdater> updaters;
tsl::htrie_map<char, IndexUpdater *> prefix_map;
tsl::htrie_map<char, IndexUpdater> prefix_map;

engine::Storage *storage = nullptr;

Expand Down
3 changes: 3 additions & 0 deletions src/search/ir.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@

#include "fmt/core.h"
#include "ir_iterator.h"
#include "search/index_info.h"
#include "string_util.h"
#include "type_util.h"

Expand Down Expand Up @@ -76,6 +77,7 @@ struct Ref : Node {};

struct FieldRef : Ref {
std::string name;
const FieldInfo *info = nullptr;

explicit FieldRef(std::string name) : name(std::move(name)) {}

Expand Down Expand Up @@ -348,6 +350,7 @@ struct SelectClause : Node {

struct IndexRef : Ref {
std::string name;
const IndexInfo *info = nullptr;

explicit IndexRef(std::string name) : name(std::move(name)) {}

Expand Down
43 changes: 6 additions & 37 deletions src/search/ir_sema_checker.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,57 +23,26 @@
#include <map>
#include <memory>

#include "index_info.h"
#include "ir.h"
#include "search_encoding.h"
#include "storage/redis_metadata.h"

namespace kqir {

struct IndexInfo;

struct FieldInfo {
std::string name;
IndexInfo *index = nullptr;
std::unique_ptr<redis::SearchFieldMetadata> metadata;

FieldInfo(std::string name, std::unique_ptr<redis::SearchFieldMetadata> &&metadata)
: name(std::move(name)), metadata(std::move(metadata)) {}
};

struct IndexInfo {
using FieldMap = std::map<std::string, FieldInfo>;

std::string name;
SearchMetadata metadata;
FieldMap fields;

IndexInfo(std::string name, SearchMetadata metadata) : name(std::move(name)), metadata(std::move(metadata)) {}

void Add(FieldInfo &&field) {
const auto &name = field.name;
field.index = this;
fields.emplace(name, std::move(field));
}
};

using IndexMap = std::map<std::string, IndexInfo>;

struct SemaChecker {
const IndexMap &index_map;

const IndexInfo *current_index = nullptr;

using Result = std::map<const Node *, std::variant<const FieldInfo *, const IndexInfo *>>;
Result result;

explicit SemaChecker(const IndexMap &index_map) : index_map(index_map) {}

Status Check(Node *node) {
if (auto v = dynamic_cast<SearchStmt *>(node)) {
auto index_name = v->index->name;
if (auto iter = index_map.find(index_name); iter != index_map.end()) {
current_index = &iter->second;
result.emplace(v->index.get(), current_index);
v->index->info = current_index;

GET_OR_RET(Check(v->select.get()));
GET_OR_RET(Check(v->query_expr.get()));
Expand All @@ -88,7 +57,7 @@ struct SemaChecker {
if (auto iter = current_index->fields.find(v->field->name); iter == current_index->fields.end()) {
return {Status::NotOK, fmt::format("field `{}` not found in index `{}`", v->field->name, current_index->name)};
} else {
result.emplace(v->field.get(), &iter->second);
v->field->info = &iter->second;
}
} else if (auto v = dynamic_cast<AndExpr *>(node)) {
for (const auto &n : v->inners) {
Expand All @@ -106,7 +75,7 @@ struct SemaChecker {
} else if (auto meta = dynamic_cast<redis::SearchTagFieldMetadata *>(iter->second.metadata.get()); !meta) {
return {Status::NotOK, fmt::format("field `{}` is not a tag field", v->field->name)};
} else {
result.emplace(v->field.get(), &iter->second);
v->field->info = &iter->second;

if (v->tag->val.empty()) {
return {Status::NotOK, "tag cannot be an empty string"};
Expand All @@ -122,14 +91,14 @@ struct SemaChecker {
} else if (!dynamic_cast<redis::SearchNumericFieldMetadata *>(iter->second.metadata.get())) {
return {Status::NotOK, fmt::format("field `{}` is not a numeric field", v->field->name)};
} else {
result.emplace(v->field.get(), &iter->second);
v->field->info = &iter->second;
}
} else if (auto v = dynamic_cast<SelectClause *>(node)) {
for (const auto &n : v->fields) {
if (auto iter = current_index->fields.find(n->name); iter == current_index->fields.end()) {
return {Status::NotOK, fmt::format("field `{}` not found in index `{}`", n->name, current_index->name)};
} else {
result.emplace(n.get(), &iter->second);
n->info = &iter->second;
}
}
} else if (auto v [[maybe_unused]] = dynamic_cast<BoolLiteral *>(node)) {
Expand Down
Loading
Loading