From a51ef94dbb6cda379df516b6937896c8b702c0d6 Mon Sep 17 00:00:00 2001 From: Qi Chen Date: Fri, 18 Apr 2025 11:25:43 +0800 Subject: [PATCH] [fix](orc-reader) Fix orc lazy materialization should not be bundled with pushdown. (#49835) Problem Summary: The current orc pushdown and delayed materialization conditions are connected together. The conditions that can be pushed down must be used for delayed materialization conditions. This is unreasonable. The two should be orthogonal. - Fix orc lazy materialization should not be bundled with pushdown. - Fix materialization for hive acid table. --- be/src/apache-orc | 2 +- be/src/vec/exec/format/orc/vorc_reader.cpp | 22 +++++++++++++++++++--- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/be/src/apache-orc b/be/src/apache-orc index a4808d90ac067e..9c31ca7e3b8a53 160000 --- a/be/src/apache-orc +++ b/be/src/apache-orc @@ -1 +1 @@ -Subproject commit a4808d90ac067e2999a61278981cfb453f226d23 +Subproject commit 9c31ca7e3b8a53bb97923b5e5a9c2e684b331179 diff --git a/be/src/vec/exec/format/orc/vorc_reader.cpp b/be/src/vec/exec/format/orc/vorc_reader.cpp index 193756bc64da63..85b5f14a2a6445 100644 --- a/be/src/vec/exec/format/orc/vorc_reader.cpp +++ b/be/src/vec/exec/format/orc/vorc_reader.cpp @@ -695,7 +695,8 @@ bool static build_search_argument(std::vector& predicates, int ind bool OrcReader::_init_search_argument( const std::unordered_map* colname_to_value_range) { - if ((!_enable_filter_by_min_max) || colname_to_value_range->empty()) { + if ((!_enable_filter_by_min_max) || _colname_to_value_range == nullptr || + colname_to_value_range->empty()) { return false; } std::vector predicates; @@ -797,12 +798,25 @@ Status OrcReader::set_fill_columns( visit_slot(conjunct->root().get()); } + if (_is_acid) { + _lazy_read_ctx.predicate_orc_columns.insert( + _lazy_read_ctx.predicate_orc_columns.end(), + TransactionalHive::READ_ROW_COLUMN_NAMES.begin(), + TransactionalHive::READ_ROW_COLUMN_NAMES.end()); + } + for (auto& read_col : _read_cols_lower_case) { _lazy_read_ctx.all_read_columns.emplace_back(read_col); if (predicate_columns.size() > 0) { auto iter = predicate_columns.find(read_col); if (iter == predicate_columns.end()) { - _lazy_read_ctx.lazy_read_columns.emplace_back(read_col); + if (!_is_acid || + std::find(TransactionalHive::READ_ROW_COLUMN_NAMES_LOWER_CASE.begin(), + TransactionalHive::READ_ROW_COLUMN_NAMES_LOWER_CASE.end(), + read_col) == + TransactionalHive::READ_ROW_COLUMN_NAMES_LOWER_CASE.end()) { + _lazy_read_ctx.lazy_read_columns.emplace_back(read_col); + } } else { _lazy_read_ctx.predicate_columns.first.emplace_back(iter->first); _lazy_read_ctx.predicate_columns.second.emplace_back(iter->second.second); @@ -859,8 +873,10 @@ Status OrcReader::set_fill_columns( _lazy_read_ctx.can_lazy_read = true; } - if (_colname_to_value_range == nullptr || !_init_search_argument(_colname_to_value_range)) { + if (_lazy_read_ctx.conjuncts.empty()) { _lazy_read_ctx.can_lazy_read = false; + } else { + _init_search_argument(_colname_to_value_range); } try { _row_reader_options.range(_range_start_offset, _range_size);