Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve bkd #1

Closed
wants to merge 18 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
2548e27
[fix](memory) Fix work load group meaningless GC #27307
xinyiZzz Nov 21, 2023
3e8177b
Revert "[improvement](routine-load) add routine load rows check (#258…
sollhui Nov 21, 2023
d809bee
[refactor](profilev2) add BlocksProduced RowsProduced counter #27291
Mryange Nov 21, 2023
cee8cc4
[Bug](insert)fix insert wrong data on mv when stmt have multiple valu…
BiteTheDDDDt Nov 21, 2023
dd2e9f6
[Fix](statistics)Fix analyze sql including key word bug (#27321)
Jibing-Li Nov 21, 2023
b63f009
[regression](invered index) add test null index case and fix fault in…
airborne12 Nov 21, 2023
fcf7691
[pipeline](close) change the close order in pipeline engine (#27290)
HappenLee Nov 21, 2023
402095b
[doc](fix) fix date trunc doc (#27317)
zy-kkk Nov 21, 2023
016dccb
[fix](null_equal) fix wrong result and coredump of operator <=> (#27312)
jacktengg Nov 21, 2023
7212822
[pipelineX](bug) Fix potential memory access problem (#27326)
Gabriel39 Nov 21, 2023
7e707f5
[fix](fe ut) Fix OlapQueryCacheTest failed (#27305)
xinyiZzz Nov 21, 2023
e58983e
[improvement](publish version) reduce publish version not exist log (…
yujun777 Nov 21, 2023
f7a1c3e
[FIX](jsonb)fix jsonb is not in predict column #27325
amorynan Nov 21, 2023
1ebb54a
[fix](null equal) fix coredump of pushing eq_for_null (#27341)
jacktengg Nov 21, 2023
6fb57c2
[fix](docs) fix auto partition version label #27358
LemonLiTree Nov 21, 2023
0a1908f
[Improvement](inverted index) enable bkd index reader cache and refac…
Nov 21, 2023
ebe6610
[Improvement](inverted index) enable bkd index reader cache and refac…
airborne12 Nov 21, 2023
cca061c
fix bkd null index case
airborne12 Nov 21, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
151 changes: 135 additions & 16 deletions be/src/olap/rowset/segment_v2/inverted_index_cache.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

#include <CLucene/debug/mem.h>
#include <CLucene/search/IndexSearcher.h>
#include <CLucene/util/bkd/bkd_reader.h>
// IWYU pragma: no_include <bthread/errno.h>
#include <errno.h> // IWYU pragma: keep
#include <string.h>
Expand All @@ -31,6 +32,7 @@
#include "olap/olap_common.h"
#include "olap/rowset/segment_v2/inverted_index_compound_directory.h"
#include "olap/rowset/segment_v2/inverted_index_compound_reader.h"
#include "olap/rowset/segment_v2/inverted_index_desc.h"
#include "runtime/exec_env.h"
#include "runtime/thread_context.h"
#include "util/defer_op.h"
Expand All @@ -39,19 +41,78 @@
namespace doris {
namespace segment_v2 {

IndexSearcherPtr InvertedIndexSearcherCache::build_index_searcher(const io::FileSystemSPtr& fs,
const std::string& index_dir,
const std::string& file_name) {
DorisCompoundReader* directory =
Status FulltextIndexSearcherBuilder::build(const io::FileSystemSPtr& fs,

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

warning: method 'build' can be made static [readability-convert-member-functions-to-static]

Suggested change
Status FulltextIndexSearcherBuilder::build(const io::FileSystemSPtr& fs,
static Status FulltextIndexSearcherBuilder::build(const io::FileSystemSPtr& fs,

const std::string& index_dir,
const std::string& file_name,
OptionalIndexSearcherPtr& output_searcher) {
auto* directory =
new DorisCompoundReader(DorisCompoundDirectory::getDirectory(fs, index_dir.c_str()),
file_name.c_str(), config::inverted_index_read_buffer_size);
auto closeDirectory = true;
auto index_searcher =
std::make_shared<lucene::search::IndexSearcher>(directory, closeDirectory);
if (!index_searcher) {
_CLDECDELETE(directory)
output_searcher = std::nullopt;
return Status::Error<ErrorCode::INVERTED_INDEX_CLUCENE_ERROR>(
"FulltextIndexSearcherBuilder build index_searcher error.");
}
// NOTE: need to cl_refcount-- here, so that directory will be deleted when
// index_searcher is destroyed
_CLDECDELETE(directory)
return index_searcher;
output_searcher = index_searcher;
return Status::OK();
}

Status BKDIndexSearcherBuilder::build(const io::FileSystemSPtr& fs, const std::string& index_dir,

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

warning: method 'build' can be made static [readability-convert-member-functions-to-static]

Suggested change
Status BKDIndexSearcherBuilder::build(const io::FileSystemSPtr& fs, const std::string& index_dir,
static Status BKDIndexSearcherBuilder::build(const io::FileSystemSPtr& fs, const std::string& index_dir,

const std::string& file_name,
OptionalIndexSearcherPtr& output_searcher) {
try {
auto compound_reader = std::make_unique<DorisCompoundReader>(
DorisCompoundDirectory::getDirectory(fs, index_dir.c_str()), file_name.c_str(),
config::inverted_index_read_buffer_size);

if (!compound_reader) {
LOG(ERROR) << "compound reader is null when get directory for:" << index_dir << "/"
<< file_name;
output_searcher = std::nullopt;
return Status::Error<ErrorCode::INVERTED_INDEX_CLUCENE_ERROR>(
"compound reader is null");
}
CLuceneError err;
std::unique_ptr<lucene::store::IndexInput> data_in;
std::unique_ptr<lucene::store::IndexInput> meta_in;
std::unique_ptr<lucene::store::IndexInput> index_in;

if (!compound_reader->openInput(
InvertedIndexDescriptor::get_temporary_bkd_index_data_file_name().c_str(),
data_in, err) ||
!compound_reader->openInput(
InvertedIndexDescriptor::get_temporary_bkd_index_meta_file_name().c_str(),
meta_in, err) ||
!compound_reader->openInput(
InvertedIndexDescriptor::get_temporary_bkd_index_file_name().c_str(), index_in,
err)) {
// Consider logging the error or handling it more comprehensively
LOG(ERROR) << "open bkd index input error: {}" << err.what();
output_searcher = std::nullopt;
return Status::Error<ErrorCode::INVERTED_INDEX_CLUCENE_ERROR>(
"open bkd index input error");
}
auto bkd_reader = std::make_shared<lucene::util::bkd::bkd_reader>(data_in.release());
if (0 == bkd_reader->read_meta(meta_in.get())) {
VLOG_NOTICE << "bkd index file is empty:" << compound_reader->toString();
output_searcher = std::nullopt;
return Status::EndOfFile("bkd index file is empty");
}

bkd_reader->read_index(index_in.get());
output_searcher = IndexSearcherPtr {bkd_reader};
return Status::OK();
} catch (const CLuceneError& e) {
return Status::Error<ErrorCode::INVERTED_INDEX_CLUCENE_ERROR>(
"BKDIndexSearcherBuilder build error: {}", e.what());
}
}

InvertedIndexSearcherCache* InvertedIndexSearcherCache::create_global_instance(
Expand Down Expand Up @@ -98,13 +159,18 @@ InvertedIndexSearcherCache::InvertedIndexSearcherCache(size_t capacity, uint32_t
}
}

Status InvertedIndexSearcherCache::get_index_searcher(const io::FileSystemSPtr& fs,
const std::string& index_dir,
const std::string& file_name,
InvertedIndexCacheHandle* cache_handle,
OlapReaderStatistics* stats, bool use_cache) {
Status InvertedIndexSearcherCache::get_index_searcher(
const io::FileSystemSPtr& fs, const std::string& index_dir, const std::string& file_name,
InvertedIndexCacheHandle* cache_handle, OlapReaderStatistics* stats,
InvertedIndexReaderType reader_type, bool use_cache) {
auto file_path = index_dir + "/" + file_name;

bool exists = false;
RETURN_IF_ERROR(fs->exists(file_path, &exists));
if (!exists) {
LOG(WARNING) << "inverted index: " << file_path << " not exist.";
return Status::Error<ErrorCode::INVERTED_INDEX_FILE_NOT_FOUND>(
"inverted index input file {} not found", file_path);
}
using namespace std::chrono;
auto start_time = steady_clock::now();
Defer cost {[&]() {
Expand All @@ -119,14 +185,40 @@ Status InvertedIndexSearcherCache::get_index_searcher(const io::FileSystemSPtr&
}

cache_handle->owned = !use_cache;
IndexSearcherPtr index_searcher = nullptr;
IndexSearcherPtr index_searcher;
std::unique_ptr<IndexSearcherBuilder> index_builder = nullptr;
auto mem_tracker =
std::unique_ptr<MemTracker>(new MemTracker("InvertedIndexSearcherCacheWithRead"));
#ifndef BE_TEST
{
SCOPED_RAW_TIMER(&stats->inverted_index_searcher_open_timer);
SCOPED_CONSUME_MEM_TRACKER(mem_tracker.get());
index_searcher = build_index_searcher(fs, index_dir, file_name);
switch (reader_type) {
case InvertedIndexReaderType::STRING_TYPE:
case InvertedIndexReaderType::FULLTEXT: {
index_builder = std::make_unique<FulltextIndexSearcherBuilder>();
break;
}
case InvertedIndexReaderType::BKD: {
index_builder = std::make_unique<BKDIndexSearcherBuilder>();
break;
}

default:
LOG(ERROR) << "InvertedIndexReaderType:" << reader_type_to_string(reader_type)
<< " is not support for InvertedIndexSearcherCache";
return Status::Error<ErrorCode::INVERTED_INDEX_NOT_SUPPORTED>(
"InvertedIndexSearcherCache do not support reader type.");
}
OptionalIndexSearcherPtr result;
RETURN_IF_ERROR(index_builder->build(fs, index_dir, file_name, result));
if (!result.has_value()) {
LOG(ERROR) << "InvertedIndexReaderType:" << reader_type_to_string(reader_type)
<< " build for InvertedIndexSearcherCache error";
return Status::Error<ErrorCode::INVERTED_INDEX_CLUCENE_ERROR>(
"InvertedIndexSearcherCache build error.");
}
index_searcher = *result;
}
#endif

Expand All @@ -144,7 +236,8 @@ Status InvertedIndexSearcherCache::get_index_searcher(const io::FileSystemSPtr&

Status InvertedIndexSearcherCache::insert(const io::FileSystemSPtr& fs,
const std::string& index_dir,
const std::string& file_name) {
const std::string& file_name,
InvertedIndexReaderType reader_type) {
auto file_path = index_dir + "/" + file_name;

using namespace std::chrono;
Expand All @@ -156,13 +249,39 @@ Status InvertedIndexSearcherCache::insert(const io::FileSystemSPtr& fs,

InvertedIndexSearcherCache::CacheKey cache_key(file_path);
IndexCacheValuePtr cache_value = std::make_unique<InvertedIndexSearcherCache::CacheValue>();
IndexSearcherPtr index_searcher = nullptr;
IndexSearcherPtr index_searcher;
std::unique_ptr<IndexSearcherBuilder> builder = nullptr;
auto mem_tracker =
std::unique_ptr<MemTracker>(new MemTracker("InvertedIndexSearcherCacheWithInsert"));
#ifndef BE_TEST
{
SCOPED_CONSUME_MEM_TRACKER(mem_tracker.get());
index_searcher = build_index_searcher(fs, index_dir, file_name);
switch (reader_type) {
case InvertedIndexReaderType::STRING_TYPE:
case InvertedIndexReaderType::FULLTEXT: {
builder = std::make_unique<FulltextIndexSearcherBuilder>();
break;
}
case InvertedIndexReaderType::BKD: {
builder = std::make_unique<BKDIndexSearcherBuilder>();
break;
}

default:
LOG(ERROR) << "InvertedIndexReaderType:" << reader_type_to_string(reader_type)
<< " is not support for InvertedIndexSearcherCache";
return Status::Error<ErrorCode::INVERTED_INDEX_NOT_SUPPORTED>(
"InvertedIndexSearcherCache do not support reader type.");
}
OptionalIndexSearcherPtr result;
RETURN_IF_ERROR(builder->build(fs, index_dir, file_name, result));
if (!result.has_value()) {
LOG(ERROR) << "InvertedIndexReaderType:" << reader_type_to_string(reader_type)
<< " build for InvertedIndexSearcherCache error";
return Status::Error<ErrorCode::INVERTED_INDEX_CLUCENE_ERROR>(
"InvertedIndexSearcherCache build error.");
}
index_searcher = *result;
}
#endif

Expand Down
45 changes: 36 additions & 9 deletions be/src/olap/rowset/segment_v2/inverted_index_cache.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,11 @@

#include <atomic>
#include <memory>
#include <optional>
#include <roaring/roaring.hh>
#include <string>
#include <utility>
#include <variant>

#include "common/config.h"
#include "common/status.h"
Expand All @@ -54,16 +56,44 @@ namespace lucene {
namespace search {
class IndexSearcher;
} // namespace search
namespace util {
namespace bkd {
Comment on lines +59 to +60

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

warning: nested namespaces can be concatenated [modernize-concat-nested-namespaces]

Suggested change
namespace util {
namespace bkd {
namespace util::bkd {

be/src/olap/rowset/segment_v2/inverted_index_cache.h:61:

- }
- } // namespace util
+ } // namespace util

class bkd_reader;
}
} // namespace util
} // namespace lucene

namespace doris {
struct OlapReaderStatistics;

namespace segment_v2 {
using IndexSearcherPtr = std::shared_ptr<lucene::search::IndexSearcher>;
using FulltextIndexSearcherPtr = std::shared_ptr<lucene::search::IndexSearcher>;
using BKDIndexSearcherPtr = std::shared_ptr<lucene::util::bkd::bkd_reader>;
using IndexSearcherPtr = std::variant<FulltextIndexSearcherPtr, BKDIndexSearcherPtr>;
using OptionalIndexSearcherPtr = std::optional<IndexSearcherPtr>;

class InvertedIndexCacheHandle;

class IndexSearcherBuilder {
public:
virtual Status build(const io::FileSystemSPtr& fs, const std::string& index_dir,
const std::string& file_name,
OptionalIndexSearcherPtr& output_searcher) = 0;
virtual ~IndexSearcherBuilder() = default;
};

class FulltextIndexSearcherBuilder : public IndexSearcherBuilder {
public:
Status build(const io::FileSystemSPtr& fs, const std::string& index_dir,
const std::string& file_name, OptionalIndexSearcherPtr& output_searcher) override;
};

class BKDIndexSearcherBuilder : public IndexSearcherBuilder {
public:
Status build(const io::FileSystemSPtr& fs, const std::string& index_dir,
const std::string& file_name, OptionalIndexSearcherPtr& output_searcher) override;
};

class InvertedIndexSearcherCache : public LRUCachePolicy {
public:
// The cache key of index_searcher lru cache
Expand All @@ -73,7 +103,7 @@ class InvertedIndexSearcherCache : public LRUCachePolicy {
};

// The cache value of index_searcher lru cache.
// Holding a opened index_searcher.
// Holding an opened index_searcher.
struct CacheValue : public LRUCacheValueBase {
IndexSearcherPtr index_searcher;
};
Expand All @@ -95,19 +125,16 @@ class InvertedIndexSearcherCache : public LRUCachePolicy {
return ExecEnv::GetInstance()->get_inverted_index_searcher_cache();
}

static IndexSearcherPtr build_index_searcher(const io::FileSystemSPtr& fs,
const std::string& index_dir,
const std::string& file_name);

InvertedIndexSearcherCache(size_t capacity, uint32_t num_shards);

Status get_index_searcher(const io::FileSystemSPtr& fs, const std::string& index_dir,
const std::string& file_name, InvertedIndexCacheHandle* cache_handle,
OlapReaderStatistics* stats, bool use_cache = true);
OlapReaderStatistics* stats, InvertedIndexReaderType reader_type,
bool use_cache = true);

// function `insert` called after inverted index writer close
Status insert(const io::FileSystemSPtr& fs, const std::string& index_dir,
const std::string& file_name);
const std::string& file_name, InvertedIndexReaderType reader_type);

// function `erase` called after compaction remove segment
Status erase(const std::string& index_file_path);
Expand Down Expand Up @@ -211,7 +238,7 @@ class InvertedIndexQueryCache : public LRUCachePolicy {
key_buf.append("/");
key_buf.append(column_name);
key_buf.append("/");
auto query_type_str = InvertedIndexQueryType_toString(query_type);
auto query_type_str = query_type_to_string(query_type);
if (query_type_str.empty()) {
return "";
}
Expand Down
46 changes: 45 additions & 1 deletion be/src/olap/rowset/segment_v2/inverted_index_query_type.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,50 @@
namespace doris {
namespace segment_v2 {

enum class InvertedIndexReaderType {
UNKNOWN = -1,
FULLTEXT = 0,
STRING_TYPE = 1,
BKD = 2,
};

template <InvertedIndexReaderType T>
constexpr const char* InvertedIndexReaderTypeToString();

template <>
constexpr const char* InvertedIndexReaderTypeToString<InvertedIndexReaderType::UNKNOWN>() {
return "UNKNOWN";
}

template <>
constexpr const char* InvertedIndexReaderTypeToString<InvertedIndexReaderType::FULLTEXT>() {
return "FULLTEXT";
}

template <>
constexpr const char* InvertedIndexReaderTypeToString<InvertedIndexReaderType::STRING_TYPE>() {
return "STRING_TYPE";
}

template <>
constexpr const char* InvertedIndexReaderTypeToString<InvertedIndexReaderType::BKD>() {
return "BKD";
}

inline std::string reader_type_to_string(InvertedIndexReaderType query_type) {
switch (query_type) {
case InvertedIndexReaderType::UNKNOWN:
return InvertedIndexReaderTypeToString<InvertedIndexReaderType::UNKNOWN>();
case InvertedIndexReaderType::FULLTEXT:
return InvertedIndexReaderTypeToString<InvertedIndexReaderType::FULLTEXT>();
case InvertedIndexReaderType::STRING_TYPE:
return InvertedIndexReaderTypeToString<InvertedIndexReaderType::STRING_TYPE>();
case InvertedIndexReaderType::BKD:
return InvertedIndexReaderTypeToString<InvertedIndexReaderType::BKD>();
}
return ""; // Explicitly handle all cases
}

enum class InvertedIndexQueryType {
UNKNOWN_QUERY = -1,
EQUAL_QUERY = 0,
Expand All @@ -34,7 +78,7 @@ enum class InvertedIndexQueryType {
MATCH_PHRASE_QUERY = 7,
};

inline std::string InvertedIndexQueryType_toString(InvertedIndexQueryType query_type) {
inline std::string query_type_to_string(InvertedIndexQueryType query_type) {
switch (query_type) {
case InvertedIndexQueryType::UNKNOWN_QUERY: {
return "UNKNOWN";
Expand Down
Loading
Loading