-
Notifications
You must be signed in to change notification settings - Fork 2
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Improve bkd #1
Improve bkd #1
Changes from all commits
2548e27
3e8177b
d809bee
cee8cc4
dd2e9f6
b63f009
fcf7691
402095b
016dccb
7212822
7e707f5
e58983e
f7a1c3e
1ebb54a
6fb57c2
0a1908f
ebe6610
cca061c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -19,6 +19,7 @@ | |||||
|
||||||
#include <CLucene/debug/mem.h> | ||||||
#include <CLucene/search/IndexSearcher.h> | ||||||
#include <CLucene/util/bkd/bkd_reader.h> | ||||||
// IWYU pragma: no_include <bthread/errno.h> | ||||||
#include <errno.h> // IWYU pragma: keep | ||||||
#include <string.h> | ||||||
|
@@ -31,6 +32,7 @@ | |||||
#include "olap/olap_common.h" | ||||||
#include "olap/rowset/segment_v2/inverted_index_compound_directory.h" | ||||||
#include "olap/rowset/segment_v2/inverted_index_compound_reader.h" | ||||||
#include "olap/rowset/segment_v2/inverted_index_desc.h" | ||||||
#include "runtime/exec_env.h" | ||||||
#include "runtime/thread_context.h" | ||||||
#include "util/defer_op.h" | ||||||
|
@@ -39,19 +41,78 @@ | |||||
namespace doris { | ||||||
namespace segment_v2 { | ||||||
|
||||||
IndexSearcherPtr InvertedIndexSearcherCache::build_index_searcher(const io::FileSystemSPtr& fs, | ||||||
const std::string& index_dir, | ||||||
const std::string& file_name) { | ||||||
DorisCompoundReader* directory = | ||||||
Status FulltextIndexSearcherBuilder::build(const io::FileSystemSPtr& fs, | ||||||
const std::string& index_dir, | ||||||
const std::string& file_name, | ||||||
OptionalIndexSearcherPtr& output_searcher) { | ||||||
auto* directory = | ||||||
new DorisCompoundReader(DorisCompoundDirectory::getDirectory(fs, index_dir.c_str()), | ||||||
file_name.c_str(), config::inverted_index_read_buffer_size); | ||||||
auto closeDirectory = true; | ||||||
auto index_searcher = | ||||||
std::make_shared<lucene::search::IndexSearcher>(directory, closeDirectory); | ||||||
if (!index_searcher) { | ||||||
_CLDECDELETE(directory) | ||||||
output_searcher = std::nullopt; | ||||||
return Status::Error<ErrorCode::INVERTED_INDEX_CLUCENE_ERROR>( | ||||||
"FulltextIndexSearcherBuilder build index_searcher error."); | ||||||
} | ||||||
// NOTE: need to cl_refcount-- here, so that directory will be deleted when | ||||||
// index_searcher is destroyed | ||||||
_CLDECDELETE(directory) | ||||||
return index_searcher; | ||||||
output_searcher = index_searcher; | ||||||
return Status::OK(); | ||||||
} | ||||||
|
||||||
Status BKDIndexSearcherBuilder::build(const io::FileSystemSPtr& fs, const std::string& index_dir, | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. warning: method 'build' can be made static [readability-convert-member-functions-to-static]
Suggested change
|
||||||
const std::string& file_name, | ||||||
OptionalIndexSearcherPtr& output_searcher) { | ||||||
try { | ||||||
auto compound_reader = std::make_unique<DorisCompoundReader>( | ||||||
DorisCompoundDirectory::getDirectory(fs, index_dir.c_str()), file_name.c_str(), | ||||||
config::inverted_index_read_buffer_size); | ||||||
|
||||||
if (!compound_reader) { | ||||||
LOG(ERROR) << "compound reader is null when get directory for:" << index_dir << "/" | ||||||
<< file_name; | ||||||
output_searcher = std::nullopt; | ||||||
return Status::Error<ErrorCode::INVERTED_INDEX_CLUCENE_ERROR>( | ||||||
"compound reader is null"); | ||||||
} | ||||||
CLuceneError err; | ||||||
std::unique_ptr<lucene::store::IndexInput> data_in; | ||||||
std::unique_ptr<lucene::store::IndexInput> meta_in; | ||||||
std::unique_ptr<lucene::store::IndexInput> index_in; | ||||||
|
||||||
if (!compound_reader->openInput( | ||||||
InvertedIndexDescriptor::get_temporary_bkd_index_data_file_name().c_str(), | ||||||
data_in, err) || | ||||||
!compound_reader->openInput( | ||||||
InvertedIndexDescriptor::get_temporary_bkd_index_meta_file_name().c_str(), | ||||||
meta_in, err) || | ||||||
!compound_reader->openInput( | ||||||
InvertedIndexDescriptor::get_temporary_bkd_index_file_name().c_str(), index_in, | ||||||
err)) { | ||||||
// Consider logging the error or handling it more comprehensively | ||||||
LOG(ERROR) << "open bkd index input error: {}" << err.what(); | ||||||
output_searcher = std::nullopt; | ||||||
return Status::Error<ErrorCode::INVERTED_INDEX_CLUCENE_ERROR>( | ||||||
"open bkd index input error"); | ||||||
} | ||||||
auto bkd_reader = std::make_shared<lucene::util::bkd::bkd_reader>(data_in.release()); | ||||||
if (0 == bkd_reader->read_meta(meta_in.get())) { | ||||||
VLOG_NOTICE << "bkd index file is empty:" << compound_reader->toString(); | ||||||
output_searcher = std::nullopt; | ||||||
return Status::EndOfFile("bkd index file is empty"); | ||||||
} | ||||||
|
||||||
bkd_reader->read_index(index_in.get()); | ||||||
output_searcher = IndexSearcherPtr {bkd_reader}; | ||||||
return Status::OK(); | ||||||
} catch (const CLuceneError& e) { | ||||||
return Status::Error<ErrorCode::INVERTED_INDEX_CLUCENE_ERROR>( | ||||||
"BKDIndexSearcherBuilder build error: {}", e.what()); | ||||||
} | ||||||
} | ||||||
|
||||||
InvertedIndexSearcherCache* InvertedIndexSearcherCache::create_global_instance( | ||||||
|
@@ -98,13 +159,18 @@ InvertedIndexSearcherCache::InvertedIndexSearcherCache(size_t capacity, uint32_t | |||||
} | ||||||
} | ||||||
|
||||||
Status InvertedIndexSearcherCache::get_index_searcher(const io::FileSystemSPtr& fs, | ||||||
const std::string& index_dir, | ||||||
const std::string& file_name, | ||||||
InvertedIndexCacheHandle* cache_handle, | ||||||
OlapReaderStatistics* stats, bool use_cache) { | ||||||
Status InvertedIndexSearcherCache::get_index_searcher( | ||||||
const io::FileSystemSPtr& fs, const std::string& index_dir, const std::string& file_name, | ||||||
InvertedIndexCacheHandle* cache_handle, OlapReaderStatistics* stats, | ||||||
InvertedIndexReaderType reader_type, bool use_cache) { | ||||||
auto file_path = index_dir + "/" + file_name; | ||||||
|
||||||
bool exists = false; | ||||||
RETURN_IF_ERROR(fs->exists(file_path, &exists)); | ||||||
if (!exists) { | ||||||
LOG(WARNING) << "inverted index: " << file_path << " not exist."; | ||||||
return Status::Error<ErrorCode::INVERTED_INDEX_FILE_NOT_FOUND>( | ||||||
"inverted index input file {} not found", file_path); | ||||||
} | ||||||
using namespace std::chrono; | ||||||
auto start_time = steady_clock::now(); | ||||||
Defer cost {[&]() { | ||||||
|
@@ -119,14 +185,40 @@ Status InvertedIndexSearcherCache::get_index_searcher(const io::FileSystemSPtr& | |||||
} | ||||||
|
||||||
cache_handle->owned = !use_cache; | ||||||
IndexSearcherPtr index_searcher = nullptr; | ||||||
IndexSearcherPtr index_searcher; | ||||||
std::unique_ptr<IndexSearcherBuilder> index_builder = nullptr; | ||||||
auto mem_tracker = | ||||||
std::unique_ptr<MemTracker>(new MemTracker("InvertedIndexSearcherCacheWithRead")); | ||||||
#ifndef BE_TEST | ||||||
{ | ||||||
SCOPED_RAW_TIMER(&stats->inverted_index_searcher_open_timer); | ||||||
SCOPED_CONSUME_MEM_TRACKER(mem_tracker.get()); | ||||||
index_searcher = build_index_searcher(fs, index_dir, file_name); | ||||||
switch (reader_type) { | ||||||
case InvertedIndexReaderType::STRING_TYPE: | ||||||
case InvertedIndexReaderType::FULLTEXT: { | ||||||
index_builder = std::make_unique<FulltextIndexSearcherBuilder>(); | ||||||
break; | ||||||
} | ||||||
case InvertedIndexReaderType::BKD: { | ||||||
index_builder = std::make_unique<BKDIndexSearcherBuilder>(); | ||||||
break; | ||||||
} | ||||||
|
||||||
default: | ||||||
LOG(ERROR) << "InvertedIndexReaderType:" << reader_type_to_string(reader_type) | ||||||
<< " is not support for InvertedIndexSearcherCache"; | ||||||
return Status::Error<ErrorCode::INVERTED_INDEX_NOT_SUPPORTED>( | ||||||
"InvertedIndexSearcherCache do not support reader type."); | ||||||
} | ||||||
OptionalIndexSearcherPtr result; | ||||||
RETURN_IF_ERROR(index_builder->build(fs, index_dir, file_name, result)); | ||||||
if (!result.has_value()) { | ||||||
LOG(ERROR) << "InvertedIndexReaderType:" << reader_type_to_string(reader_type) | ||||||
<< " build for InvertedIndexSearcherCache error"; | ||||||
return Status::Error<ErrorCode::INVERTED_INDEX_CLUCENE_ERROR>( | ||||||
"InvertedIndexSearcherCache build error."); | ||||||
} | ||||||
index_searcher = *result; | ||||||
} | ||||||
#endif | ||||||
|
||||||
|
@@ -144,7 +236,8 @@ Status InvertedIndexSearcherCache::get_index_searcher(const io::FileSystemSPtr& | |||||
|
||||||
Status InvertedIndexSearcherCache::insert(const io::FileSystemSPtr& fs, | ||||||
const std::string& index_dir, | ||||||
const std::string& file_name) { | ||||||
const std::string& file_name, | ||||||
InvertedIndexReaderType reader_type) { | ||||||
auto file_path = index_dir + "/" + file_name; | ||||||
|
||||||
using namespace std::chrono; | ||||||
|
@@ -156,13 +249,39 @@ Status InvertedIndexSearcherCache::insert(const io::FileSystemSPtr& fs, | |||||
|
||||||
InvertedIndexSearcherCache::CacheKey cache_key(file_path); | ||||||
IndexCacheValuePtr cache_value = std::make_unique<InvertedIndexSearcherCache::CacheValue>(); | ||||||
IndexSearcherPtr index_searcher = nullptr; | ||||||
IndexSearcherPtr index_searcher; | ||||||
std::unique_ptr<IndexSearcherBuilder> builder = nullptr; | ||||||
auto mem_tracker = | ||||||
std::unique_ptr<MemTracker>(new MemTracker("InvertedIndexSearcherCacheWithInsert")); | ||||||
#ifndef BE_TEST | ||||||
{ | ||||||
SCOPED_CONSUME_MEM_TRACKER(mem_tracker.get()); | ||||||
index_searcher = build_index_searcher(fs, index_dir, file_name); | ||||||
switch (reader_type) { | ||||||
case InvertedIndexReaderType::STRING_TYPE: | ||||||
case InvertedIndexReaderType::FULLTEXT: { | ||||||
builder = std::make_unique<FulltextIndexSearcherBuilder>(); | ||||||
break; | ||||||
} | ||||||
case InvertedIndexReaderType::BKD: { | ||||||
builder = std::make_unique<BKDIndexSearcherBuilder>(); | ||||||
break; | ||||||
} | ||||||
|
||||||
default: | ||||||
LOG(ERROR) << "InvertedIndexReaderType:" << reader_type_to_string(reader_type) | ||||||
<< " is not support for InvertedIndexSearcherCache"; | ||||||
return Status::Error<ErrorCode::INVERTED_INDEX_NOT_SUPPORTED>( | ||||||
"InvertedIndexSearcherCache do not support reader type."); | ||||||
} | ||||||
OptionalIndexSearcherPtr result; | ||||||
RETURN_IF_ERROR(builder->build(fs, index_dir, file_name, result)); | ||||||
if (!result.has_value()) { | ||||||
LOG(ERROR) << "InvertedIndexReaderType:" << reader_type_to_string(reader_type) | ||||||
<< " build for InvertedIndexSearcherCache error"; | ||||||
return Status::Error<ErrorCode::INVERTED_INDEX_CLUCENE_ERROR>( | ||||||
"InvertedIndexSearcherCache build error."); | ||||||
} | ||||||
index_searcher = *result; | ||||||
} | ||||||
#endif | ||||||
|
||||||
|
Original file line number | Diff line number | Diff line change | ||||||
---|---|---|---|---|---|---|---|---|
|
@@ -34,9 +34,11 @@ | |||||||
|
||||||||
#include <atomic> | ||||||||
#include <memory> | ||||||||
#include <optional> | ||||||||
#include <roaring/roaring.hh> | ||||||||
#include <string> | ||||||||
#include <utility> | ||||||||
#include <variant> | ||||||||
|
||||||||
#include "common/config.h" | ||||||||
#include "common/status.h" | ||||||||
|
@@ -54,16 +56,44 @@ namespace lucene { | |||||||
namespace search { | ||||||||
class IndexSearcher; | ||||||||
} // namespace search | ||||||||
namespace util { | ||||||||
namespace bkd { | ||||||||
Comment on lines
+59
to
+60
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. warning: nested namespaces can be concatenated [modernize-concat-nested-namespaces]
Suggested change
be/src/olap/rowset/segment_v2/inverted_index_cache.h:61: - }
- } // namespace util
+ } // namespace util |
||||||||
class bkd_reader; | ||||||||
} | ||||||||
} // namespace util | ||||||||
} // namespace lucene | ||||||||
|
||||||||
namespace doris { | ||||||||
struct OlapReaderStatistics; | ||||||||
|
||||||||
namespace segment_v2 { | ||||||||
using IndexSearcherPtr = std::shared_ptr<lucene::search::IndexSearcher>; | ||||||||
using FulltextIndexSearcherPtr = std::shared_ptr<lucene::search::IndexSearcher>; | ||||||||
using BKDIndexSearcherPtr = std::shared_ptr<lucene::util::bkd::bkd_reader>; | ||||||||
using IndexSearcherPtr = std::variant<FulltextIndexSearcherPtr, BKDIndexSearcherPtr>; | ||||||||
using OptionalIndexSearcherPtr = std::optional<IndexSearcherPtr>; | ||||||||
|
||||||||
class InvertedIndexCacheHandle; | ||||||||
|
||||||||
class IndexSearcherBuilder { | ||||||||
public: | ||||||||
virtual Status build(const io::FileSystemSPtr& fs, const std::string& index_dir, | ||||||||
const std::string& file_name, | ||||||||
OptionalIndexSearcherPtr& output_searcher) = 0; | ||||||||
virtual ~IndexSearcherBuilder() = default; | ||||||||
}; | ||||||||
|
||||||||
class FulltextIndexSearcherBuilder : public IndexSearcherBuilder { | ||||||||
public: | ||||||||
Status build(const io::FileSystemSPtr& fs, const std::string& index_dir, | ||||||||
const std::string& file_name, OptionalIndexSearcherPtr& output_searcher) override; | ||||||||
}; | ||||||||
|
||||||||
class BKDIndexSearcherBuilder : public IndexSearcherBuilder { | ||||||||
public: | ||||||||
Status build(const io::FileSystemSPtr& fs, const std::string& index_dir, | ||||||||
const std::string& file_name, OptionalIndexSearcherPtr& output_searcher) override; | ||||||||
}; | ||||||||
|
||||||||
class InvertedIndexSearcherCache : public LRUCachePolicy { | ||||||||
public: | ||||||||
// The cache key of index_searcher lru cache | ||||||||
|
@@ -73,7 +103,7 @@ class InvertedIndexSearcherCache : public LRUCachePolicy { | |||||||
}; | ||||||||
|
||||||||
// The cache value of index_searcher lru cache. | ||||||||
// Holding a opened index_searcher. | ||||||||
// Holding an opened index_searcher. | ||||||||
struct CacheValue : public LRUCacheValueBase { | ||||||||
IndexSearcherPtr index_searcher; | ||||||||
}; | ||||||||
|
@@ -95,19 +125,16 @@ class InvertedIndexSearcherCache : public LRUCachePolicy { | |||||||
return ExecEnv::GetInstance()->get_inverted_index_searcher_cache(); | ||||||||
} | ||||||||
|
||||||||
static IndexSearcherPtr build_index_searcher(const io::FileSystemSPtr& fs, | ||||||||
const std::string& index_dir, | ||||||||
const std::string& file_name); | ||||||||
|
||||||||
InvertedIndexSearcherCache(size_t capacity, uint32_t num_shards); | ||||||||
|
||||||||
Status get_index_searcher(const io::FileSystemSPtr& fs, const std::string& index_dir, | ||||||||
const std::string& file_name, InvertedIndexCacheHandle* cache_handle, | ||||||||
OlapReaderStatistics* stats, bool use_cache = true); | ||||||||
OlapReaderStatistics* stats, InvertedIndexReaderType reader_type, | ||||||||
bool use_cache = true); | ||||||||
|
||||||||
// function `insert` called after inverted index writer close | ||||||||
Status insert(const io::FileSystemSPtr& fs, const std::string& index_dir, | ||||||||
const std::string& file_name); | ||||||||
const std::string& file_name, InvertedIndexReaderType reader_type); | ||||||||
|
||||||||
// function `erase` called after compaction remove segment | ||||||||
Status erase(const std::string& index_file_path); | ||||||||
|
@@ -211,7 +238,7 @@ class InvertedIndexQueryCache : public LRUCachePolicy { | |||||||
key_buf.append("/"); | ||||||||
key_buf.append(column_name); | ||||||||
key_buf.append("/"); | ||||||||
auto query_type_str = InvertedIndexQueryType_toString(query_type); | ||||||||
auto query_type_str = query_type_to_string(query_type); | ||||||||
if (query_type_str.empty()) { | ||||||||
return ""; | ||||||||
} | ||||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
warning: method 'build' can be made static [readability-convert-member-functions-to-static]