Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions be/src/olap/rowset/segment_v2/column_writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -938,7 +938,7 @@ Status ArrayColumnWriter::init() {
if (_opts.need_inverted_index) {
auto* writer = dynamic_cast<ScalarColumnWriter*>(_item_writer.get());
if (writer != nullptr) {
RETURN_IF_ERROR(IndexColumnWriter::create(get_field(), &_inverted_index_builder,
RETURN_IF_ERROR(IndexColumnWriter::create(get_field(), &_inverted_index_writer,
_opts.index_file_writer,
_opts.inverted_indexes[0]));
}
Expand All @@ -956,7 +956,7 @@ Status ArrayColumnWriter::init() {

Status ArrayColumnWriter::write_inverted_index() {
if (_opts.need_inverted_index) {
return _inverted_index_builder->finish();
return _inverted_index_writer->finish();
}
return Status::OK();
}
Expand Down Expand Up @@ -988,7 +988,7 @@ Status ArrayColumnWriter::append_data(const uint8_t** ptr, size_t num_rows) {
// now only support nested type is scala
if (writer != nullptr) {
//NOTE: use array field name as index field, but item_writer size should be used when moving item_data_ptr
RETURN_IF_ERROR(_inverted_index_builder->add_array_values(
RETURN_IF_ERROR(_inverted_index_writer->add_array_values(
_item_writer->get_field()->size(), reinterpret_cast<const void*>(data),
reinterpret_cast<const uint8_t*>(nested_null_map), offsets_ptr, num_rows));
}
Expand Down Expand Up @@ -1025,7 +1025,7 @@ Status ArrayColumnWriter::append_nullable(const uint8_t* null_map, const uint8_t
RETURN_IF_ERROR(append_data(ptr, num_rows));
if (is_nullable()) {
if (_opts.need_inverted_index) {
RETURN_IF_ERROR(_inverted_index_builder->add_array_nulls(null_map, num_rows));
RETURN_IF_ERROR(_inverted_index_writer->add_array_nulls(null_map, num_rows));
}
RETURN_IF_ERROR(_null_writer->append_data(&null_map, num_rows));
}
Expand Down
2 changes: 1 addition & 1 deletion be/src/olap/rowset/segment_v2/column_writer.h
Original file line number Diff line number Diff line change
Expand Up @@ -492,7 +492,7 @@ class ArrayColumnWriter final : public ColumnWriter {
std::unique_ptr<OffsetColumnWriter> _offset_writer;
std::unique_ptr<ScalarColumnWriter> _null_writer;
std::unique_ptr<ColumnWriter> _item_writer;
std::unique_ptr<IndexColumnWriter> _inverted_index_builder;
std::unique_ptr<IndexColumnWriter> _inverted_index_writer;
std::unique_ptr<AnnIndexColumnWriter> _ann_index_writer;
ColumnWriterOptions _opts;
};
Expand Down
114 changes: 65 additions & 49 deletions be/src/olap/rowset/segment_v2/index_writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

#include "common/exception.h"
#include "olap/field.h"
#include "olap/rowset/segment_v2/ann_index/ann_index_writer.h"
#include "olap/rowset/segment_v2/inverted_index_writer.h"

namespace doris::segment_v2 {
Expand All @@ -40,10 +41,11 @@ bool IndexColumnWriter::check_support_inverted_index(const TabletColumn& column)
}

bool IndexColumnWriter::check_support_ann_index(const TabletColumn& column) {
// bellow types are not supported in inverted index for extracted columns
// only array are supported in ann index
return column.is_array_type();
}

// create index writer
Status IndexColumnWriter::create(const Field* field, std::unique_ptr<IndexColumnWriter>* res,
IndexFileWriter* index_file_writer,
const TabletIndex* index_meta) {
Expand All @@ -62,64 +64,78 @@ Status IndexColumnWriter::create(const Field* field, std::unique_ptr<IndexColumn
field_name = std::to_string(field->unique_id());
}
}
bool single_field = true;
if (type == FieldType::OLAP_FIELD_TYPE_ARRAY) {
const auto* array_typeinfo = dynamic_cast<const ArrayTypeInfo*>(typeinfo);
DBUG_EXECUTE_IF("InvertedIndexColumnWriter::create_array_typeinfo_is_nullptr",
{ array_typeinfo = nullptr; })
if (array_typeinfo != nullptr) {
typeinfo = array_typeinfo->item_type_info();
type = typeinfo->type();
single_field = false;
} else {
return Status::NotSupported("unsupported array type for inverted index: " +
std::to_string(int(type)));

if (index_meta->is_inverted_index()) {
bool single_field = true;
if (type == FieldType::OLAP_FIELD_TYPE_ARRAY) {
const auto* array_typeinfo = dynamic_cast<const ArrayTypeInfo*>(typeinfo);
DBUG_EXECUTE_IF("InvertedIndexColumnWriter::create_array_typeinfo_is_nullptr",
{ array_typeinfo = nullptr; })
if (array_typeinfo != nullptr) {
typeinfo = array_typeinfo->item_type_info();
type = typeinfo->type();
single_field = false;
} else {
return Status::NotSupported("unsupported array type for inverted index: " +
std::to_string(int(type)));
}
}
}

DBUG_EXECUTE_IF("InvertedIndexColumnWriter::create_unsupported_type_for_inverted_index",
{ type = FieldType::OLAP_FIELD_TYPE_JSONB; })
switch (type) {
DBUG_EXECUTE_IF("InvertedIndexColumnWriter::create_unsupported_type_for_inverted_index",
{ type = FieldType::OLAP_FIELD_TYPE_JSONB; })
switch (type) {
#define M(TYPE) \
case TYPE: \
*res = std::make_unique<InvertedIndexColumnWriter<TYPE>>(field_name, index_file_writer, \
index_meta, single_field); \
break;
M(FieldType::OLAP_FIELD_TYPE_TINYINT)
M(FieldType::OLAP_FIELD_TYPE_SMALLINT)
M(FieldType::OLAP_FIELD_TYPE_INT)
M(FieldType::OLAP_FIELD_TYPE_UNSIGNED_INT)
M(FieldType::OLAP_FIELD_TYPE_BIGINT)
M(FieldType::OLAP_FIELD_TYPE_LARGEINT)
M(FieldType::OLAP_FIELD_TYPE_CHAR)
M(FieldType::OLAP_FIELD_TYPE_VARCHAR)
M(FieldType::OLAP_FIELD_TYPE_STRING)
M(FieldType::OLAP_FIELD_TYPE_DATE)
M(FieldType::OLAP_FIELD_TYPE_DATETIME)
M(FieldType::OLAP_FIELD_TYPE_DECIMAL)
M(FieldType::OLAP_FIELD_TYPE_DATEV2)
M(FieldType::OLAP_FIELD_TYPE_DATETIMEV2)
M(FieldType::OLAP_FIELD_TYPE_DECIMAL32)
M(FieldType::OLAP_FIELD_TYPE_DECIMAL64)
M(FieldType::OLAP_FIELD_TYPE_DECIMAL128I)
M(FieldType::OLAP_FIELD_TYPE_DECIMAL256)
M(FieldType::OLAP_FIELD_TYPE_BOOL)
M(FieldType::OLAP_FIELD_TYPE_IPV4)
M(FieldType::OLAP_FIELD_TYPE_IPV6)
M(FieldType::OLAP_FIELD_TYPE_FLOAT)
M(FieldType::OLAP_FIELD_TYPE_DOUBLE)
M(FieldType::OLAP_FIELD_TYPE_TINYINT)
M(FieldType::OLAP_FIELD_TYPE_SMALLINT)
M(FieldType::OLAP_FIELD_TYPE_INT)
M(FieldType::OLAP_FIELD_TYPE_UNSIGNED_INT)
M(FieldType::OLAP_FIELD_TYPE_BIGINT)
M(FieldType::OLAP_FIELD_TYPE_LARGEINT)
M(FieldType::OLAP_FIELD_TYPE_CHAR)
M(FieldType::OLAP_FIELD_TYPE_VARCHAR)
M(FieldType::OLAP_FIELD_TYPE_STRING)
M(FieldType::OLAP_FIELD_TYPE_DATE)
M(FieldType::OLAP_FIELD_TYPE_DATETIME)
M(FieldType::OLAP_FIELD_TYPE_DECIMAL)
M(FieldType::OLAP_FIELD_TYPE_DATEV2)
M(FieldType::OLAP_FIELD_TYPE_DATETIMEV2)
M(FieldType::OLAP_FIELD_TYPE_DECIMAL32)
M(FieldType::OLAP_FIELD_TYPE_DECIMAL64)
M(FieldType::OLAP_FIELD_TYPE_DECIMAL128I)
M(FieldType::OLAP_FIELD_TYPE_DECIMAL256)
M(FieldType::OLAP_FIELD_TYPE_BOOL)
M(FieldType::OLAP_FIELD_TYPE_IPV4)
M(FieldType::OLAP_FIELD_TYPE_IPV6)
M(FieldType::OLAP_FIELD_TYPE_FLOAT)
M(FieldType::OLAP_FIELD_TYPE_DOUBLE)
#undef M
default:
return Status::NotSupported("unsupported type for inverted index: " +
std::to_string(int(type)));
}
if (*res != nullptr) {
auto st = (*res)->init();
if (!st.ok()) {
(*res)->close_on_error();
return st;
default:
return Status::NotSupported("unsupported type for inverted index: " +
std::to_string(int(type)));
}
if (*res != nullptr) {
auto st = (*res)->init();
if (!st.ok()) {
(*res)->close_on_error();
return st;
}
}
} else if (index_meta->is_ann_index()) {
DCHECK(type == FieldType::OLAP_FIELD_TYPE_ARRAY);
*res = std ::make_unique<AnnIndexColumnWriter>(index_file_writer, index_meta);
if (*res != nullptr) {
auto st = (*res)->init();
if (!st.ok()) {
(*res)->close_on_error();
return st;
}
}
}

return Status::OK();
}

Expand Down
42 changes: 28 additions & 14 deletions be/src/olap/rowset/segment_v2/segment_iterator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -733,6 +733,13 @@ Status SegmentIterator::_get_row_ranges_by_column_conditions() {
return Status::OK();
}

bool SegmentIterator::_column_has_ann_index(int32_t cid) {
bool has_ann_index = _index_iterators[cid] != nullptr &&
_index_iterators[cid]->get_reader(AnnIndexReaderType::ANN);

return has_ann_index;
}

Status SegmentIterator::_apply_ann_topn_predicate() {
if (_ann_topn_runtime == nullptr) {
return Status::OK();
Expand All @@ -742,7 +749,7 @@ Status SegmentIterator::_apply_ann_topn_predicate() {
size_t src_col_idx = _ann_topn_runtime->get_src_column_idx();
ColumnId src_cid = _schema->column_id(src_col_idx);
IndexIterator* ann_index_iterator = _index_iterators[src_cid].get();
bool has_ann_index = ann_index_iterator != nullptr;
bool has_ann_index = _column_has_ann_index(src_cid);
bool has_common_expr_push_down = !_common_expr_ctxs_push_down.empty();
bool has_column_predicate = std::any_of(_is_pred_column.begin(), _is_pred_column.end(),
[](bool is_pred) { return is_pred; });
Expand Down Expand Up @@ -1418,6 +1425,13 @@ Status SegmentIterator::_init_bitmap_index_iterators() {
return Status::OK();
}
for (auto cid : _schema->column_ids()) {
const auto& col = _opts.tablet_schema->column(cid);
int col_uid = col.unique_id() >= 0 ? col.unique_id() : col.parent_unique_id();
// The column is not in this segment
if (!_segment->_tablet_schema->has_column_unique_id(col_uid)) {
continue;
}

if (_bitmap_index_iterators[cid] == nullptr) {
RETURN_IF_ERROR(_segment->new_bitmap_index_iterator(
_opts.tablet_schema->column(cid), _opts, &_bitmap_index_iterators[cid]));
Expand Down Expand Up @@ -1480,14 +1494,14 @@ Status SegmentIterator::_init_index_iterators() {
for (auto cid : _schema->column_ids()) {
if (_index_iterators[cid] == nullptr) {
const auto& column = _opts.tablet_schema->column(cid);
int32_t col_unique_id =
column.is_extracted_column() ? column.parent_unique_id() : column.unique_id();
RETURN_IF_ERROR(_segment->new_index_iterator(
column,
_segment->_tablet_schema->ann_index(col_unique_id, column.suffix_path()), _opts,
&_index_iterators[cid]));
if (_index_iterators[cid] != nullptr) {
_index_iterators[cid]->set_context(_index_query_context);
const auto* index_meta = _segment->_tablet_schema->ann_index(column);
if (index_meta) {
RETURN_IF_ERROR(_segment->new_index_iterator(column, index_meta, _opts,
&_index_iterators[cid]));

if (_index_iterators[cid] != nullptr) {
_index_iterators[cid]->set_context(_index_query_context);
}
}
}
}
Expand Down Expand Up @@ -1681,22 +1695,22 @@ Status SegmentIterator::_seek_columns(const std::vector<ColumnId>& column_ids, r
* This is an estimate, if we want more precise cost, statistics collection is necessary(this is a todo).
* In short, when returned non-pred columns contains string/hll/bitmap, we using Lazy Materialization.
* Otherwise, we disable it.
*
*
* When Lazy Materialization enable, we need to read column at least two times.
* First time to read Pred col, second time to read non-pred.
* Here's an interesting question to research, whether read Pred col once is the best plan.
* (why not read Pred col twice or more?)
*
* When Lazy Materialization disable, we just need to read once.
*
*
*
*
* 2 Whether the predicate type can be evaluate in a fast way(using SIMD to eval pred)
* Such as integer type and float type, they can be eval fast.
* But for BloomFilter/string/date, they eval slow.
* If a type can be eval fast, we use vectorization to eval it.
* Otherwise, we use short-circuit to eval it.
*
*
*
*
*/

// todo(wb) need a UT here
Expand Down
5 changes: 4 additions & 1 deletion be/src/olap/rowset/segment_v2/segment_iterator.h
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,6 @@ class SegmentIterator : public RowwiseIterator {
[[nodiscard]] Status _init_bitmap_index_iterators();
[[nodiscard]] Status _init_index_iterators();

Status _apply_ann_topn_predicate();
// calculate row ranges that fall into requested key ranges using short key index
[[nodiscard]] Status _get_row_ranges_by_keys();
[[nodiscard]] Status _prepare_seek(const StorageReadOptions::KeyRange& key_range);
Expand All @@ -192,13 +191,17 @@ class SegmentIterator : public RowwiseIterator {
// calculate row ranges that satisfy requested column conditions using various column index
[[nodiscard]] Status _get_row_ranges_by_column_conditions();
[[nodiscard]] Status _get_row_ranges_from_conditions(RowRanges* condition_row_ranges);

[[nodiscard]] Status _apply_bitmap_index();
[[nodiscard]] Status _apply_inverted_index();
[[nodiscard]] Status _apply_inverted_index_on_column_predicate(
ColumnPredicate* pred, std::vector<ColumnPredicate*>& remaining_predicates,
bool* continue_apply);
[[nodiscard]] Status _apply_ann_topn_predicate();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[[nodiscard]] is not necessary. since class Status has already been marked as [[nodiscard]]. You can make another pr to remove all useless [[nodiscard]] for Status.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok, get it

[[nodiscard]] Status _apply_index_expr();

bool _column_has_fulltext_index(int32_t cid);
bool _column_has_ann_index(int32_t cid);
bool _downgrade_without_index(Status res, bool need_remaining = false);
inline bool _inverted_index_not_support_pred_type(const PredicateType& type);
bool _is_literal_node(const TExprNodeType::type& node_type);
Expand Down
4 changes: 2 additions & 2 deletions be/src/olap/tablet_schema.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1544,7 +1544,8 @@ void TabletSchema::update_tablet_columns(const TabletSchema& tablet_schema,

bool TabletSchema::has_inverted_index_with_index_id(int64_t index_id) const {
for (size_t i = 0; i < _indexes.size(); i++) {
if (_indexes[i]->index_type() == IndexType::INVERTED &&
if ((_indexes[i]->index_type() == IndexType::INVERTED ||
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

function name has_inverted_index_with_index_id is not changed? or maybe we need another method?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Currently, in order to make as few changes as possible (also for the convenience of review), no modifications have been made. Not only BE, but FE also have the same problem.

_indexes[i]->index_type() == IndexType::ANN) &&
_indexes[i]->index_id() == index_id) {
return true;
}
Expand Down Expand Up @@ -1645,7 +1646,6 @@ const TabletIndex* TabletSchema::ann_index(int32_t col_unique_id,
}

const TabletIndex* TabletSchema::ann_index(const TabletColumn& col) const {
// Some columns(Float, Double, JSONB ...) from the variant do not support inverted index
if (!segment_v2::IndexColumnWriter::check_support_ann_index(col)) {
return nullptr;
}
Expand Down
2 changes: 2 additions & 0 deletions be/src/olap/tablet_schema.h
Original file line number Diff line number Diff line change
Expand Up @@ -341,6 +341,8 @@ class TabletIndex : public MetadataAdder<TabletIndex> {

bool is_inverted_index() const { return _index_type == IndexType::INVERTED; }

bool is_ann_index() const { return _index_type == IndexType::ANN; }

void remove_parser_and_analyzer() {
_properties.erase(INVERTED_INDEX_PARSER_KEY);
_properties.erase(INVERTED_INDEX_PARSER_KEY_ALIAS);
Expand Down
Loading
Loading