2828#include < memory>
2929#include < numeric>
3030#include < set>
31+ #include < unordered_map>
3132#include < utility>
3233#include < vector>
3334
@@ -701,6 +702,8 @@ Status SegmentIterator::_apply_ann_topn_predicate() {
701702 " Ann topn can not be evaluated by ann index, has_ann_index: {}, "
702703 " has_common_expr_push_down: {}, has_column_predicate: {}" ,
703704 has_ann_index, has_common_expr_push_down, has_column_predicate);
705+ // Disable index-only scan on ann indexed column.
706+ _need_read_data_indices[src_cid] = true ;
704707 return Status::OK ();
705708 }
706709
@@ -712,11 +715,15 @@ Status SegmentIterator::_apply_ann_topn_predicate() {
712715 if (_ann_topn_runtime->is_asc ()) {
713716 VLOG_DEBUG << fmt::format (
714717 " Asc topn for inner product can not be evaluated by ann index" );
718+ // Disable index-only scan on ann indexed column.
719+ _need_read_data_indices[src_cid] = true ;
715720 return Status::OK ();
716721 }
717722 } else {
718723 if (!_ann_topn_runtime->is_asc ()) {
719724 VLOG_DEBUG << fmt::format (" Desc topn for l2/cosine can not be evaluated by ann index" );
725+ // Disable index-only scan on ann indexed column.
726+ _need_read_data_indices[src_cid] = true ;
720727 return Status::OK ();
721728 }
722729 }
@@ -727,6 +734,8 @@ Status SegmentIterator::_apply_ann_topn_predicate() {
727734 " ann index" ,
728735 metric_to_string (_ann_topn_runtime->get_metric_type ()),
729736 metric_to_string (ann_index_reader->get_metric_type ()));
737+ // Disable index-only scan on ann indexed column.
738+ _need_read_data_indices[src_cid] = true ;
730739 return Status::OK ();
731740 }
732741
@@ -738,6 +747,8 @@ Status SegmentIterator::_apply_ann_topn_predicate() {
738747 " to "
739748 " filter" ,
740749 pre_size, rows_of_segment);
750+ // Disable index-only scan on ann indexed column.
751+ _need_read_data_indices[src_cid] = true ;
741752 return Status::OK ();
742753 }
743754 vectorized::IColumn::MutablePtr result_column;
@@ -772,6 +783,10 @@ Status SegmentIterator::_apply_ann_topn_predicate() {
772783 virtual_column_iter->prepare_materialization (std::move (result_column),
773784 std::move (result_row_ids));
774785
786+ _need_read_data_indices[src_cid] = false ;
787+ VLOG_DEBUG << fmt::format (
788+ " Enable ANN index-only scan for src column cid {} (skip reading data pages)" , src_cid);
789+
775790 return Status::OK ();
776791}
777792
@@ -1044,9 +1059,9 @@ Status SegmentIterator::_apply_index_expr() {
10441059 segment_v2::AnnIndexStats ann_index_stats;
10451060 for (const auto & expr_ctx : _common_expr_ctxs_push_down) {
10461061 size_t origin_rows = _row_bitmap.cardinality ();
1047- RETURN_IF_ERROR (expr_ctx->evaluate_ann_range_search (_index_iterators, _schema-> column_ids (),
1048- _column_iterators, _row_bitmap ,
1049- ann_index_stats));
1062+ RETURN_IF_ERROR (expr_ctx->evaluate_ann_range_search (
1063+ _index_iterators, _schema-> column_ids (), _column_iterators ,
1064+ _common_expr_to_slotref_map, _row_bitmap, ann_index_stats));
10501065 _opts.stats ->rows_ann_index_range_filtered += (origin_rows - _row_bitmap.cardinality ());
10511066 _opts.stats ->ann_index_load_ns += ann_index_stats.load_index_costs_ns .value ();
10521067 _opts.stats ->ann_index_range_search_ns += ann_index_stats.search_costs_ns .value ();
@@ -1057,7 +1072,7 @@ Status SegmentIterator::_apply_index_expr() {
10571072 }
10581073
10591074 for (auto it = _common_expr_ctxs_push_down.begin (); it != _common_expr_ctxs_push_down.end ();) {
1060- if ((*it)->root ()->has_been_executed ()) {
1075+ if ((*it)->root ()->ann_range_search_executedd ()) {
10611076 _opts.stats ->ann_index_range_search_cnt ++;
10621077 it = _common_expr_ctxs_push_down.erase (it);
10631078 } else {
@@ -1808,14 +1823,6 @@ Status SegmentIterator::_vec_init_lazy_materialization() {
18081823 if (pred_id_set.find (cid) != pred_id_set.end ()) {
18091824 _predicate_column_ids.push_back (cid);
18101825 }
1811- // In the past, if schema columns > pred columns, the _lazy_materialization_read maybe == false, but
1812- // we make sure using _lazy_materialization_read= true now, so these logic may never happens. I comment
1813- // these lines and we could delete them in the future to make the code more clear.
1814- // else if (non_pred_set.find(cid) != non_pred_set.end()) {
1815- // _predicate_column_ids.push_back(cid);
1816- // // when _lazy_materialization_read = false, non-predicate column should also be filtered by sel idx, so we regard it as pred columns
1817- // _is_pred_column[cid] = true;
1818- // }
18191826 }
18201827 } else if (_is_need_expr_eval) {
18211828 DCHECK (!_is_need_vec_eval && !_is_need_short_eval);
@@ -2029,8 +2036,9 @@ Status SegmentIterator::_output_non_pred_columns(vectorized::Block* block) {
20292036 if (column_in_block_is_nothing || column_is_normal) {
20302037 block->replace_by_position (loc, std::move (_current_return_columns[cid]));
20312038 VLOG_DEBUG << fmt::format (
2032- " Output non-predicate column, cid: {}, loc: {}, col_name: {}" , cid, loc,
2033- _schema->column (cid)->name ());
2039+ " Output non-predicate column, cid: {}, loc: {}, col_name: {}, rows {}" , cid,
2040+ loc, _schema->column (cid)->name (),
2041+ block->get_by_position (loc).column ->size ());
20342042 }
20352043 // Means virtual column in block has been materialized(maybe by common expr).
20362044 // so do nothing here.
@@ -2073,6 +2081,8 @@ Status SegmentIterator::_read_columns_by_index(uint32_t nrows_read_limit, uint16
20732081
20742082 for (auto cid : _predicate_column_ids) {
20752083 auto & column = _current_return_columns[cid];
2084+ VLOG_DEBUG << fmt::format (" Reading column {}, col_name {}" , cid,
2085+ _schema->column (cid)->name ());
20762086 if (!_virtual_column_exprs.contains (cid)) {
20772087 if (_no_need_read_key_data (cid, column, nrows_read)) {
20782088 VLOG_DEBUG << fmt::format (" Column {} no need to read." , cid);
@@ -2822,6 +2832,8 @@ void SegmentIterator::_calculate_expr_in_remaining_conjunct_root() {
28222832 if (root_expr == nullptr ) {
28232833 continue ;
28242834 }
2835+ _common_expr_to_slotref_map[root_expr_ctx.get ()] =
2836+ std::unordered_map<ColumnId, vectorized::VExpr*>();
28252837
28262838 std::stack<vectorized::VExprSPtr> stack;
28272839 stack.emplace (root_expr);
@@ -2831,10 +2843,53 @@ void SegmentIterator::_calculate_expr_in_remaining_conjunct_root() {
28312843 stack.pop ();
28322844
28332845 for (const auto & child : expr->children ()) {
2846+ if (child->is_virtual_slot_ref ()) {
2847+ // Expand virtual slot ref to its underlying expression tree and
2848+ // collect real slot refs used inside. We still associate those
2849+ // slot refs with the current parent expr node for inverted index
2850+ // tracking, just like normal slot refs.
2851+ auto * vir_slot_ref = assert_cast<vectorized::VirtualSlotRef*>(child.get ());
2852+ auto vir_expr = vir_slot_ref->get_virtual_column_expr ();
2853+ if (vir_expr) {
2854+ std::stack<vectorized::VExprSPtr> vir_stack;
2855+ vir_stack.emplace (vir_expr);
2856+
2857+ while (!vir_stack.empty ()) {
2858+ const auto & vir_node = vir_stack.top ();
2859+ vir_stack.pop ();
2860+
2861+ for (const auto & vir_child : vir_node->children ()) {
2862+ if (vir_child->is_slot_ref ()) {
2863+ auto * inner_slot_ref =
2864+ assert_cast<vectorized::VSlotRef*>(vir_child.get ());
2865+ _common_expr_inverted_index_status[_schema->column_id (
2866+ inner_slot_ref->column_id ())][expr.get ()] = false ;
2867+ _common_expr_to_slotref_map[root_expr_ctx.get ()]
2868+ [inner_slot_ref->column_id ()] =
2869+ expr.get ();
2870+ // Print debug info for virtual slot expansion
2871+ LOG (INFO) << fmt::format (
2872+ " common_expr_ctx_ptr: {}, expr_ptr: {}, "
2873+ " virtual_slotref_ptr: {}, inner_slotref_ptr: {}, "
2874+ " column_id: {}" ,
2875+ fmt::ptr (root_expr_ctx.get ()), fmt::ptr (expr.get ()),
2876+ fmt::ptr (child.get ()), fmt::ptr (vir_child.get ()),
2877+ inner_slot_ref->column_id ());
2878+ }
2879+
2880+ if (!vir_child->children ().empty ()) {
2881+ vir_stack.emplace (vir_child);
2882+ }
2883+ }
2884+ }
2885+ }
2886+ }
28342887 if (child->is_slot_ref ()) {
28352888 auto * column_slot_ref = assert_cast<vectorized::VSlotRef*>(child.get ());
28362889 _common_expr_inverted_index_status[_schema->column_id (
28372890 column_slot_ref->column_id ())][expr.get ()] = false ;
2891+ _common_expr_to_slotref_map[root_expr_ctx.get ()][column_slot_ref->column_id ()] =
2892+ expr.get ();
28382893 }
28392894 }
28402895
0 commit comments