Skip to content

Commit 1731994

Browse files
branch-4.0: [ann] Ann index only scan #57243 (#57758)
cherry pick from #57243
1 parent 67be2a1 commit 1731994

File tree

18 files changed

+1101
-53
lines changed

18 files changed

+1101
-53
lines changed

be/src/olap/rowset/segment_v2/segment_iterator.cpp

Lines changed: 69 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
#include <memory>
2929
#include <numeric>
3030
#include <set>
31+
#include <unordered_map>
3132
#include <utility>
3233
#include <vector>
3334

@@ -701,6 +702,8 @@ Status SegmentIterator::_apply_ann_topn_predicate() {
701702
"Ann topn can not be evaluated by ann index, has_ann_index: {}, "
702703
"has_common_expr_push_down: {}, has_column_predicate: {}",
703704
has_ann_index, has_common_expr_push_down, has_column_predicate);
705+
// Disable index-only scan on ann indexed column.
706+
_need_read_data_indices[src_cid] = true;
704707
return Status::OK();
705708
}
706709

@@ -712,11 +715,15 @@ Status SegmentIterator::_apply_ann_topn_predicate() {
712715
if (_ann_topn_runtime->is_asc()) {
713716
VLOG_DEBUG << fmt::format(
714717
"Asc topn for inner product can not be evaluated by ann index");
718+
// Disable index-only scan on ann indexed column.
719+
_need_read_data_indices[src_cid] = true;
715720
return Status::OK();
716721
}
717722
} else {
718723
if (!_ann_topn_runtime->is_asc()) {
719724
VLOG_DEBUG << fmt::format("Desc topn for l2/cosine can not be evaluated by ann index");
725+
// Disable index-only scan on ann indexed column.
726+
_need_read_data_indices[src_cid] = true;
720727
return Status::OK();
721728
}
722729
}
@@ -727,6 +734,8 @@ Status SegmentIterator::_apply_ann_topn_predicate() {
727734
"ann index",
728735
metric_to_string(_ann_topn_runtime->get_metric_type()),
729736
metric_to_string(ann_index_reader->get_metric_type()));
737+
// Disable index-only scan on ann indexed column.
738+
_need_read_data_indices[src_cid] = true;
730739
return Status::OK();
731740
}
732741

@@ -738,6 +747,8 @@ Status SegmentIterator::_apply_ann_topn_predicate() {
738747
"to "
739748
"filter",
740749
pre_size, rows_of_segment);
750+
// Disable index-only scan on ann indexed column.
751+
_need_read_data_indices[src_cid] = true;
741752
return Status::OK();
742753
}
743754
vectorized::IColumn::MutablePtr result_column;
@@ -772,6 +783,10 @@ Status SegmentIterator::_apply_ann_topn_predicate() {
772783
virtual_column_iter->prepare_materialization(std::move(result_column),
773784
std::move(result_row_ids));
774785

786+
_need_read_data_indices[src_cid] = false;
787+
VLOG_DEBUG << fmt::format(
788+
"Enable ANN index-only scan for src column cid {} (skip reading data pages)", src_cid);
789+
775790
return Status::OK();
776791
}
777792

@@ -1044,9 +1059,9 @@ Status SegmentIterator::_apply_index_expr() {
10441059
segment_v2::AnnIndexStats ann_index_stats;
10451060
for (const auto& expr_ctx : _common_expr_ctxs_push_down) {
10461061
size_t origin_rows = _row_bitmap.cardinality();
1047-
RETURN_IF_ERROR(expr_ctx->evaluate_ann_range_search(_index_iterators, _schema->column_ids(),
1048-
_column_iterators, _row_bitmap,
1049-
ann_index_stats));
1062+
RETURN_IF_ERROR(expr_ctx->evaluate_ann_range_search(
1063+
_index_iterators, _schema->column_ids(), _column_iterators,
1064+
_common_expr_to_slotref_map, _row_bitmap, ann_index_stats));
10501065
_opts.stats->rows_ann_index_range_filtered += (origin_rows - _row_bitmap.cardinality());
10511066
_opts.stats->ann_index_load_ns += ann_index_stats.load_index_costs_ns.value();
10521067
_opts.stats->ann_index_range_search_ns += ann_index_stats.search_costs_ns.value();
@@ -1057,7 +1072,7 @@ Status SegmentIterator::_apply_index_expr() {
10571072
}
10581073

10591074
for (auto it = _common_expr_ctxs_push_down.begin(); it != _common_expr_ctxs_push_down.end();) {
1060-
if ((*it)->root()->has_been_executed()) {
1075+
if ((*it)->root()->ann_range_search_executedd()) {
10611076
_opts.stats->ann_index_range_search_cnt++;
10621077
it = _common_expr_ctxs_push_down.erase(it);
10631078
} else {
@@ -1808,14 +1823,6 @@ Status SegmentIterator::_vec_init_lazy_materialization() {
18081823
if (pred_id_set.find(cid) != pred_id_set.end()) {
18091824
_predicate_column_ids.push_back(cid);
18101825
}
1811-
// In the past, if schema columns > pred columns, the _lazy_materialization_read maybe == false, but
1812-
// we make sure using _lazy_materialization_read= true now, so these logic may never happens. I comment
1813-
// these lines and we could delete them in the future to make the code more clear.
1814-
// else if (non_pred_set.find(cid) != non_pred_set.end()) {
1815-
// _predicate_column_ids.push_back(cid);
1816-
// // when _lazy_materialization_read = false, non-predicate column should also be filtered by sel idx, so we regard it as pred columns
1817-
// _is_pred_column[cid] = true;
1818-
// }
18191826
}
18201827
} else if (_is_need_expr_eval) {
18211828
DCHECK(!_is_need_vec_eval && !_is_need_short_eval);
@@ -2029,8 +2036,9 @@ Status SegmentIterator::_output_non_pred_columns(vectorized::Block* block) {
20292036
if (column_in_block_is_nothing || column_is_normal) {
20302037
block->replace_by_position(loc, std::move(_current_return_columns[cid]));
20312038
VLOG_DEBUG << fmt::format(
2032-
"Output non-predicate column, cid: {}, loc: {}, col_name: {}", cid, loc,
2033-
_schema->column(cid)->name());
2039+
"Output non-predicate column, cid: {}, loc: {}, col_name: {}, rows {}", cid,
2040+
loc, _schema->column(cid)->name(),
2041+
block->get_by_position(loc).column->size());
20342042
}
20352043
// Means virtual column in block has been materialized(maybe by common expr).
20362044
// so do nothing here.
@@ -2073,6 +2081,8 @@ Status SegmentIterator::_read_columns_by_index(uint32_t nrows_read_limit, uint16
20732081

20742082
for (auto cid : _predicate_column_ids) {
20752083
auto& column = _current_return_columns[cid];
2084+
VLOG_DEBUG << fmt::format("Reading column {}, col_name {}", cid,
2085+
_schema->column(cid)->name());
20762086
if (!_virtual_column_exprs.contains(cid)) {
20772087
if (_no_need_read_key_data(cid, column, nrows_read)) {
20782088
VLOG_DEBUG << fmt::format("Column {} no need to read.", cid);
@@ -2822,6 +2832,8 @@ void SegmentIterator::_calculate_expr_in_remaining_conjunct_root() {
28222832
if (root_expr == nullptr) {
28232833
continue;
28242834
}
2835+
_common_expr_to_slotref_map[root_expr_ctx.get()] =
2836+
std::unordered_map<ColumnId, vectorized::VExpr*>();
28252837

28262838
std::stack<vectorized::VExprSPtr> stack;
28272839
stack.emplace(root_expr);
@@ -2831,10 +2843,53 @@ void SegmentIterator::_calculate_expr_in_remaining_conjunct_root() {
28312843
stack.pop();
28322844

28332845
for (const auto& child : expr->children()) {
2846+
if (child->is_virtual_slot_ref()) {
2847+
// Expand virtual slot ref to its underlying expression tree and
2848+
// collect real slot refs used inside. We still associate those
2849+
// slot refs with the current parent expr node for inverted index
2850+
// tracking, just like normal slot refs.
2851+
auto* vir_slot_ref = assert_cast<vectorized::VirtualSlotRef*>(child.get());
2852+
auto vir_expr = vir_slot_ref->get_virtual_column_expr();
2853+
if (vir_expr) {
2854+
std::stack<vectorized::VExprSPtr> vir_stack;
2855+
vir_stack.emplace(vir_expr);
2856+
2857+
while (!vir_stack.empty()) {
2858+
const auto& vir_node = vir_stack.top();
2859+
vir_stack.pop();
2860+
2861+
for (const auto& vir_child : vir_node->children()) {
2862+
if (vir_child->is_slot_ref()) {
2863+
auto* inner_slot_ref =
2864+
assert_cast<vectorized::VSlotRef*>(vir_child.get());
2865+
_common_expr_inverted_index_status[_schema->column_id(
2866+
inner_slot_ref->column_id())][expr.get()] = false;
2867+
_common_expr_to_slotref_map[root_expr_ctx.get()]
2868+
[inner_slot_ref->column_id()] =
2869+
expr.get();
2870+
// Print debug info for virtual slot expansion
2871+
LOG(INFO) << fmt::format(
2872+
"common_expr_ctx_ptr: {}, expr_ptr: {}, "
2873+
"virtual_slotref_ptr: {}, inner_slotref_ptr: {}, "
2874+
"column_id: {}",
2875+
fmt::ptr(root_expr_ctx.get()), fmt::ptr(expr.get()),
2876+
fmt::ptr(child.get()), fmt::ptr(vir_child.get()),
2877+
inner_slot_ref->column_id());
2878+
}
2879+
2880+
if (!vir_child->children().empty()) {
2881+
vir_stack.emplace(vir_child);
2882+
}
2883+
}
2884+
}
2885+
}
2886+
}
28342887
if (child->is_slot_ref()) {
28352888
auto* column_slot_ref = assert_cast<vectorized::VSlotRef*>(child.get());
28362889
_common_expr_inverted_index_status[_schema->column_id(
28372890
column_slot_ref->column_id())][expr.get()] = false;
2891+
_common_expr_to_slotref_map[root_expr_ctx.get()][column_slot_ref->column_id()] =
2892+
expr.get();
28382893
}
28392894
}
28402895

be/src/olap/rowset/segment_v2/segment_iterator.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -506,6 +506,9 @@ class SegmentIterator : public RowwiseIterator {
506506

507507
// key is column uid, value is the sparse column cache
508508
std::unordered_map<int32_t, PathToSparseColumnCacheUPtr> _variant_sparse_column_cache;
509+
510+
std::unordered_map<vectorized::VExprContext*, std::unordered_map<ColumnId, vectorized::VExpr*>>
511+
_common_expr_to_slotref_map;
509512
};
510513

511514
} // namespace segment_v2

be/src/olap/rowset/segment_v2/virtual_column_iterator.cpp

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#include <cstring>
2222
#include <memory>
2323

24+
#include "common/logging.h"
2425
#include "vec/columns/column.h"
2526
#include "vec/columns/column_nothing.h"
2627

@@ -79,12 +80,15 @@ void VirtualColumnIterator::prepare_materialization(vectorized::IColumn::Ptr col
7980

8081
_size = n;
8182

82-
std::string msg;
83-
for (const auto& pair : _row_id_to_idx) {
84-
msg += fmt::format("{}: {}, ", pair.first, pair.second);
83+
if (VLOG_DEBUG_IS_ON) {
84+
std::string msg;
85+
for (const auto& pair : _row_id_to_idx) {
86+
msg += fmt::format("{}: {}, ", pair.first, pair.second);
87+
}
88+
89+
VLOG_DEBUG << fmt::format("virtual column iterator, row_idx_to_idx:\n{}", msg);
8590
}
8691

87-
VLOG_DEBUG << fmt::format("virtual column iterator, row_idx_to_idx:\n{}", msg);
8892
_filter = doris::vectorized::IColumn::Filter(_size, 0);
8993
}
9094

be/src/pipeline/exec/olap_scan_operator.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -444,7 +444,7 @@ Status OlapScanLocalState::_init_scanners(std::list<vectorized::ScannerSPtr>* sc
444444
auto& p = _parent->cast<OlapScanOperatorX>();
445445

446446
for (auto uid : p._olap_scan_node.output_column_unique_ids) {
447-
_maybe_read_column_ids.emplace(uid);
447+
_output_column_ids.emplace(uid);
448448
}
449449

450450
// ranges constructed from scan keys

be/src/pipeline/exec/olap_scan_operator.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ class OlapScanLocalState final : public ScanLocalState<OlapScanLocalState> {
111111
OlapScanKeys _scan_keys;
112112
std::vector<FilterOlapParam<TCondition>> _olap_filters;
113113
// If column id in this set, indicate that we need to read data after index filtering
114-
std::set<int32_t> _maybe_read_column_ids;
114+
std::set<int32_t> _output_column_ids;
115115

116116
std::unique_ptr<RuntimeProfile> _segment_profile;
117117
std::unique_ptr<RuntimeProfile> _index_filter_profile;

be/src/vec/exec/scan/olap_scanner.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -339,7 +339,7 @@ Status OlapScanner::_init_tablet_reader_params(
339339
_tablet_reader_params.vir_col_idx_to_type = _vir_col_idx_to_type;
340340
_tablet_reader_params.score_runtime = _score_runtime;
341341
_tablet_reader_params.output_columns =
342-
((pipeline::OlapScanLocalState*)_local_state)->_maybe_read_column_ids;
342+
((pipeline::OlapScanLocalState*)_local_state)->_output_column_ids;
343343
_tablet_reader_params.ann_topn_runtime = _ann_topn_runtime;
344344
for (const auto& ele :
345345
((pipeline::OlapScanLocalState*)_local_state)->_cast_types_for_variants) {

be/src/vec/exprs/vectorized_fn_call.cpp

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -605,15 +605,31 @@ Status VectorizedFnCall::evaluate_ann_range_search(
605605
}
606606
virtual_column_iterator->prepare_materialization(std::move(distance_col),
607607
std::move(result.row_ids));
608+
_virtual_column_is_fulfilled = true;
608609
} else {
609-
DCHECK(this->op() != TExprOpcode::LE && this->op() != TExprOpcode::LT)
610-
<< "Should not have distance";
610+
// Whether the ANN index should have produced distance depends on metric and operator:
611+
// - L2: distance is produced for LE/LT; not produced for GE/GT
612+
// - IP: distance is produced for GE/GT; not produced for LE/LT
613+
#ifndef NDEBUG
614+
const bool should_have_distance =
615+
(range_search_runtime.is_le_or_lt &&
616+
range_search_runtime.metric_type == AnnIndexMetric::L2) ||
617+
(!range_search_runtime.is_le_or_lt &&
618+
range_search_runtime.metric_type == AnnIndexMetric::IP);
619+
// If we expected distance but didn't get it, assert in debug to catch logic errors.
620+
DCHECK(!should_have_distance) << "Expected distance from ANN index but got none";
621+
#endif
622+
_virtual_column_is_fulfilled = false;
611623
}
624+
} else {
625+
// Dest is not virtual column.
626+
_virtual_column_is_fulfilled = true;
612627
}
613628

614629
_has_been_executed = true;
615-
VLOG_DEBUG << fmt::format("Ann range search filtered {} rows, origin {} rows",
616-
origin_num - row_bitmap.cardinality(), origin_num);
630+
VLOG_DEBUG << fmt::format(
631+
"Ann range search filtered {} rows, origin {} rows, virtual column is full-filled: {}",
632+
origin_num - row_bitmap.cardinality(), origin_num, _virtual_column_is_fulfilled);
617633

618634
ann_index_stats = *stats;
619635
return Status::OK();

be/src/vec/exprs/vexpr.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1002,9 +1002,13 @@ void VExpr::prepare_ann_range_search(const doris::VectorSearchUserParams& params
10021002
}
10031003
}
10041004

1005-
bool VExpr::has_been_executed() {
1005+
bool VExpr::ann_range_search_executedd() {
10061006
return _has_been_executed;
10071007
}
10081008

1009+
bool VExpr::ann_dist_is_fulfilled() const {
1010+
return _virtual_column_is_fulfilled;
1011+
}
1012+
10091013
#include "common/compile_check_end.h"
10101014
} // namespace doris::vectorized

be/src/vec/exprs/vexpr.h

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,8 @@ class VExpr {
167167

168168
bool is_slot_ref() const { return _node_type == TExprNodeType::SLOT_REF; }
169169

170+
bool is_virtual_slot_ref() const { return _node_type == TExprNodeType::VIRTUAL_SLOT_REF; }
171+
170172
bool is_column_ref() const { return _node_type == TExprNodeType::COLUMN_REF; }
171173

172174
virtual bool is_literal() const { return false; }
@@ -308,7 +310,9 @@ class VExpr {
308310
segment_v2::AnnRangeSearchRuntime& range_search_runtime,
309311
bool& suitable_for_ann_index);
310312

311-
bool has_been_executed();
313+
bool ann_range_search_executedd();
314+
315+
bool ann_dist_is_fulfilled() const;
312316

313317
protected:
314318
/// Simple debug string that provides no expr subclass-specific information
@@ -394,7 +398,12 @@ class VExpr {
394398
uint32_t _index_unique_id = 0;
395399
bool _enable_inverted_index_query = true;
396400

401+
// Indicates whether the expr row_bitmap has been updated.
397402
bool _has_been_executed = false;
403+
// Indicates whether the virtual column is fulfilled.
404+
// NOTE, if there is no virtual column in the expr tree, and expr
405+
// is evaluated by ann index, this flag is still true.
406+
bool _virtual_column_is_fulfilled = false;
398407
};
399408

400409
} // namespace vectorized

be/src/vec/exprs/vexpr_context.cpp

Lines changed: 39 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@
2525
#include "common/cast_set.h"
2626
#include "common/compiler_util.h" // IWYU pragma: keep
2727
#include "common/exception.h"
28+
#include "common/status.h"
29+
#include "olap/olap_common.h"
2830
#include "runtime/runtime_state.h"
2931
#include "runtime/thread_context.h"
3032
#include "udf/udf.h"
@@ -463,12 +465,45 @@ Status VExprContext::evaluate_ann_range_search(
463465
const std::vector<std::unique_ptr<segment_v2::IndexIterator>>& cid_to_index_iterators,
464466
const std::vector<ColumnId>& idx_to_cid,
465467
const std::vector<std::unique_ptr<segment_v2::ColumnIterator>>& column_iterators,
468+
const std::unordered_map<vectorized::VExprContext*,
469+
std::unordered_map<ColumnId, vectorized::VExpr*>>&
470+
common_expr_to_slotref_map,
466471
roaring::Roaring& row_bitmap, segment_v2::AnnIndexStats& ann_index_stats) {
467-
if (_root != nullptr) {
468-
return _root->evaluate_ann_range_search(_ann_range_search_runtime, cid_to_index_iterators,
469-
idx_to_cid, column_iterators, row_bitmap,
470-
ann_index_stats);
472+
if (_root == nullptr) {
473+
return Status::OK();
474+
}
475+
476+
RETURN_IF_ERROR(_root->evaluate_ann_range_search(
477+
_ann_range_search_runtime, cid_to_index_iterators, idx_to_cid, column_iterators,
478+
row_bitmap, ann_index_stats));
479+
480+
if (!_root->ann_range_search_executedd()) {
481+
return Status::OK();
482+
}
483+
484+
if (!_root->ann_dist_is_fulfilled()) {
485+
// Do not perform index scan in this case.
486+
return Status::OK();
487+
}
488+
489+
auto src_col_idx = _ann_range_search_runtime.src_col_idx;
490+
auto slot_ref_map_it = common_expr_to_slotref_map.find(this);
491+
if (slot_ref_map_it == common_expr_to_slotref_map.end()) {
492+
return Status::OK();
493+
}
494+
auto& slot_ref_map = slot_ref_map_it->second;
495+
ColumnId cid = idx_to_cid[src_col_idx];
496+
if (slot_ref_map.find(cid) == slot_ref_map.end()) {
497+
return Status::OK();
471498
}
499+
const VExpr* slot_ref_expr_addr = slot_ref_map.find(cid)->second;
500+
_inverted_index_context->set_true_for_inverted_index_status(slot_ref_expr_addr,
501+
idx_to_cid[cid]);
502+
503+
VLOG_DEBUG << fmt::format(
504+
"Evaluate ann range search for expr {}, src_col_idx {}, cid {}, row_bitmap "
505+
"cardinality {}",
506+
_root->debug_string(), src_col_idx, cid, row_bitmap.cardinality());
472507
return Status::OK();
473508
}
474509

0 commit comments

Comments
 (0)