From 990223d5fca409917f20290bdbeeb4ec439dca5a Mon Sep 17 00:00:00 2001 From: Gabriel Date: Mon, 24 Nov 2025 15:21:55 +0800 Subject: [PATCH 1/4] [refactor](scan) Remove colname_to_value_range from OlapTableScan --- be/src/pipeline/exec/olap_scan_operator.cpp | 19 ++++++++++++------- be/src/pipeline/exec/scan_operator.cpp | 15 ++++----------- be/src/pipeline/exec/scan_operator.h | 7 +------ 3 files changed, 17 insertions(+), 24 deletions(-) diff --git a/be/src/pipeline/exec/olap_scan_operator.cpp b/be/src/pipeline/exec/olap_scan_operator.cpp index 9080fbd581b9b5..63b2a6f68de3a6 100644 --- a/be/src/pipeline/exec/olap_scan_operator.cpp +++ b/be/src/pipeline/exec/olap_scan_operator.cpp @@ -892,10 +892,15 @@ Status OlapScanLocalState::_build_key_ranges_and_filters() { for (int column_index = 0; column_index < column_names.size() && !_scan_keys.has_range_value() && !eos && !should_break; ++column_index) { - auto iter = _colname_to_value_range.find(column_names[column_index]); - if (_colname_to_value_range.end() == iter) { + if (p._colname_to_slot_id.find(column_names[column_index]) == + p._colname_to_slot_id.end()) { break; } + auto iter = _slot_id_to_value_range.find(p._colname_to_slot_id[column_names[column_index]]); + if (_slot_id_to_value_range.end() == iter) { + break; + } + const auto& value_range = iter->second.second; RETURN_IF_ERROR(std::visit( [&](auto&& range) { @@ -908,11 +913,11 @@ Status OlapScanLocalState::_build_key_ranges_and_filters() { _scan_keys.extend_scan_key(temp_range, p._max_scan_key_num, &exact_range, &eos, &should_break)); if (exact_range) { - _colname_to_value_range.erase(iter->first); + _slot_id_to_value_range.erase(iter->first); } } else { // if exceed max_pushdown_conditions_per_column, use whole_value_rang instead - // and will not erase from _colname_to_value_range, it must be not exact_range + // and will not erase from _slot_id_to_value_range, it must be not exact_range temp_range.set_whole_value_range(); RETURN_IF_ERROR( _scan_keys.extend_scan_key(temp_range, p._max_scan_key_num, @@ -920,16 +925,16 @@ Status OlapScanLocalState::_build_key_ranges_and_filters() { } return Status::OK(); }, - iter->second)); + value_range)); } if (eos) { _eos = true; _scan_dependency->set_ready(); } - for (auto& iter : _colname_to_value_range) { + for (auto& iter : _slot_id_to_value_range) { std::vector> filters; - std::visit([&](auto&& range) { range.to_olap_filter(filters); }, iter.second); + std::visit([&](auto&& range) { range.to_olap_filter(filters); }, iter.second.second); for (const auto& filter : filters) { _olap_filters.emplace_back(filter); diff --git a/be/src/pipeline/exec/scan_operator.cpp b/be/src/pipeline/exec/scan_operator.cpp index 886d2f919e5a32..bdfcd51c08a077 100644 --- a/be/src/pipeline/exec/scan_operator.cpp +++ b/be/src/pipeline/exec/scan_operator.cpp @@ -236,17 +236,6 @@ Status ScanLocalState::_normalize_conjuncts(RuntimeState* state) { } ++it; } - for (auto& it : _slot_id_to_value_range) { - std::visit( - [&](auto&& range) { - if (range.is_empty_value_range()) { - _eos = true; - _scan_dependency->set_ready(); - } - }, - it.second.second); - _colname_to_value_range[it.second.first->col_name()] = it.second.second; - } return Status::OK(); } @@ -360,6 +349,10 @@ Status ScanLocalState::_normalize_predicate( _normalize_function_filters(cur_expr, context, slot, &pdt), status); } + if (value_range.is_empty_value_range()) { + _eos = true; + _scan_dependency->set_ready(); + } }, *range); RETURN_IF_ERROR(status); diff --git a/be/src/pipeline/exec/scan_operator.h b/be/src/pipeline/exec/scan_operator.h index b7647c40e2a7d1..a40ed1921354b6 100644 --- a/be/src/pipeline/exec/scan_operator.h +++ b/be/src/pipeline/exec/scan_operator.h @@ -320,21 +320,16 @@ class ScanLocalState : public ScanLocalStateBase { // Parsed from conjuncts phmap::flat_hash_map> _slot_id_to_value_range; - // column -> ColumnValueRange - // We use _colname_to_value_range to store a column and its conresponding value ranges. - std::unordered_map _colname_to_value_range; // But if a col is with value range, eg: 1 < col < 10, which is "!is_fixed_range", // in this case we can not merge "1 < col < 10" with "col not in (2)". // So we have to save "col not in (2)" to another structure: "_not_in_value_ranges". // When the data source try to use the value ranges, it should use both ranges in - // "_colname_to_value_range" and in "_not_in_value_ranges" + // "_slot_id_to_value_range" and in "_not_in_value_ranges" std::vector _not_in_value_ranges; std::atomic _eos = false; - std::mutex _block_lock; - std::vector> _filter_dependencies; // ScanLocalState owns the ownership of scanner, scanner context only has its weakptr From eb27550588851a98903688e66deeebe9a8068826 Mon Sep 17 00:00:00 2001 From: Gabriel Date: Mon, 24 Nov 2025 15:26:28 +0800 Subject: [PATCH 2/4] update --- be/src/pipeline/exec/olap_scan_operator.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/be/src/pipeline/exec/olap_scan_operator.cpp b/be/src/pipeline/exec/olap_scan_operator.cpp index 63b2a6f68de3a6..bba02fac00e59d 100644 --- a/be/src/pipeline/exec/olap_scan_operator.cpp +++ b/be/src/pipeline/exec/olap_scan_operator.cpp @@ -896,7 +896,8 @@ Status OlapScanLocalState::_build_key_ranges_and_filters() { p._colname_to_slot_id.end()) { break; } - auto iter = _slot_id_to_value_range.find(p._colname_to_slot_id[column_names[column_index]]); + auto iter = + _slot_id_to_value_range.find(p._colname_to_slot_id[column_names[column_index]]); if (_slot_id_to_value_range.end() == iter) { break; } From 17887cfbec0e8e8c1ebcbbfbc0191c148c509fe5 Mon Sep 17 00:00:00 2001 From: Gabriel Date: Tue, 25 Nov 2025 15:26:11 +0800 Subject: [PATCH 3/4] update --- be/src/exec/olap_common.h | 4 ++-- be/src/pipeline/exec/scan_operator.cpp | 15 +++++++++++---- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/be/src/exec/olap_common.h b/be/src/exec/olap_common.h index 5d9f3741c511bc..912e87c6ab7cc5 100644 --- a/be/src/exec/olap_common.h +++ b/be/src/exec/olap_common.h @@ -192,7 +192,7 @@ class ColumnValueRange { size_t get_fixed_value_size() const { return _fixed_values.size(); } - void to_olap_filter(std::vector>& filters) { + void to_olap_filter(std::vector>& filters) const { if (is_fixed_value_range()) { // 1. convert to in filter condition to_in_condition(filters, true); @@ -259,7 +259,7 @@ class ColumnValueRange { } } - void to_in_condition(std::vector>& filters, bool is_in = true) { + void to_in_condition(std::vector>& filters, bool is_in = true) const { TCondition condition; condition.__set_column_name(_column_name); condition.__set_condition_op(is_in ? "*=" : "!*="); diff --git a/be/src/pipeline/exec/scan_operator.cpp b/be/src/pipeline/exec/scan_operator.cpp index bdfcd51c08a077..6ba563a0ac9873 100644 --- a/be/src/pipeline/exec/scan_operator.cpp +++ b/be/src/pipeline/exec/scan_operator.cpp @@ -237,6 +237,17 @@ Status ScanLocalState::_normalize_conjuncts(RuntimeState* state) { ++it; } + for (auto& it : _slot_id_to_value_range) { + std::visit( + [&](auto&& range) { + if (range.is_empty_value_range()) { + _eos = true; + _scan_dependency->set_ready(); + } + }, + it.second.second); + } + return Status::OK(); } @@ -349,10 +360,6 @@ Status ScanLocalState::_normalize_predicate( _normalize_function_filters(cur_expr, context, slot, &pdt), status); } - if (value_range.is_empty_value_range()) { - _eos = true; - _scan_dependency->set_ready(); - } }, *range); RETURN_IF_ERROR(status); From 87fdf843188c626e67e7a34fdfb532731bbf7964 Mon Sep 17 00:00:00 2001 From: Gabriel Date: Tue, 25 Nov 2025 15:34:46 +0800 Subject: [PATCH 4/4] update --- be/src/exec/olap_common.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/be/src/exec/olap_common.h b/be/src/exec/olap_common.h index 912e87c6ab7cc5..5f14b56a14af5f 100644 --- a/be/src/exec/olap_common.h +++ b/be/src/exec/olap_common.h @@ -259,7 +259,8 @@ class ColumnValueRange { } } - void to_in_condition(std::vector>& filters, bool is_in = true) const { + void to_in_condition(std::vector>& filters, + bool is_in = true) const { TCondition condition; condition.__set_column_name(_column_name); condition.__set_condition_op(is_in ? "*=" : "!*=");