From 89da2278073a61c94a1cc0cbfc1a1f19ab11afa9 Mon Sep 17 00:00:00 2001 From: Socrates Date: Mon, 10 Feb 2025 10:03:54 +0800 Subject: [PATCH] [enhance](runtime filter) impl partition pruning in runtime filer (#47025) This PR implements partition pruning through runtime filters. When executing a SQL query like: ```sql SELECT count(*) FROM int_partition_table WHERE partition_col = ( SELECT partition_col FROM int_partition_table GROUP BY partition_col HAVING count(*) > 0 ORDER BY partition_col DESC LIMIT 1 ) ``` During execution, the backend (BE) will receive a dynamic runtime filter condition `partition_col = xxx`. Since partition_col is a partitioning column, we can use its value to determine if the partition can be pruned. Additionally, this mechanism also supports filtering queries like: ```sql SELECT count(*) FROM int_partition_table WHERE func(partition_col) = xxx ``` If func cannot be evaluated at the frontend (FE), the frontend will not perform partition pruning. However, since the backend can compute func, this mechanism allows us to handle pruning scenarios that are not possible at the frontend, providing a more efficient pruning process on the backend side. --- be/src/vec/core/block.cpp | 4 +- be/src/vec/exec/scan/vfile_scanner.cpp | 162 +++++++++++++++++- be/src/vec/exec/scan/vfile_scanner.h | 12 +- .../partition_col=2025-01-01/000000_0 | Bin 0 -> 587 bytes .../partition_col=2025-01-02/000000_0 | Bin 0 -> 581 bytes .../partition_col=2025-01-03/000000_0 | Bin 0 -> 600 bytes .../partition_col=100.01/000000_0 | Bin 0 -> 587 bytes .../partition_col=200.02/000000_0 | Bin 0 -> 581 bytes .../partition_col=300.03/000000_0 | Bin 0 -> 600 bytes .../partition_col=1/000000_0 | Bin 0 -> 587 bytes .../partition_col=2/000000_0 | Bin 0 -> 581 bytes .../partition_col=3/000000_0 | Bin 0 -> 600 bytes .../partition_col=A/000000_0 | Bin 0 -> 587 bytes .../partition_col=B/000000_0 | Bin 0 -> 581 bytes .../partition_col=C/000000_0 | Bin 0 -> 600 bytes .../org/apache/doris/qe/SessionVariable.java | 16 +- gensrc/thrift/PaloInternalService.thrift | 2 + ..._hive_runtime_filter_partition_pruning.out | 61 +++++++ ...ve_runtime_filter_partition_pruning.groovy | 107 ++++++++++++ 19 files changed, 354 insertions(+), 10 deletions(-) create mode 100644 docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/partition_tables/date_partition_table/partition_col=2025-01-01/000000_0 create mode 100644 docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/partition_tables/date_partition_table/partition_col=2025-01-02/000000_0 create mode 100644 docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/partition_tables/date_partition_table/partition_col=2025-01-03/000000_0 create mode 100644 docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/partition_tables/decimal_partition_table/partition_col=100.01/000000_0 create mode 100644 docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/partition_tables/decimal_partition_table/partition_col=200.02/000000_0 create mode 100644 docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/partition_tables/decimal_partition_table/partition_col=300.03/000000_0 create mode 100644 docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/partition_tables/int_partition_table/partition_col=1/000000_0 create mode 100644 docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/partition_tables/int_partition_table/partition_col=2/000000_0 create mode 100644 docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/partition_tables/int_partition_table/partition_col=3/000000_0 create mode 100644 docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/partition_tables/string_partition_table/partition_col=A/000000_0 create mode 100644 docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/partition_tables/string_partition_table/partition_col=B/000000_0 create mode 100644 docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/partition_tables/string_partition_table/partition_col=C/000000_0 create mode 100644 regression-test/data/external_table_p0/hive/test_hive_runtime_filter_partition_pruning.out create mode 100644 regression-test/suites/external_table_p0/hive/test_hive_runtime_filter_partition_pruning.groovy diff --git a/be/src/vec/core/block.cpp b/be/src/vec/core/block.cpp index 95cedf4c362f7e..5b4d858673f406 100644 --- a/be/src/vec/core/block.cpp +++ b/be/src/vec/core/block.cpp @@ -147,7 +147,7 @@ void Block::initialize_index_by_name() { void Block::insert(size_t position, const ColumnWithTypeAndName& elem) { if (position > data.size()) { throw Exception(ErrorCode::INTERNAL_ERROR, - "invalid input position, position={}, data.size{}, names={}", position, + "invalid input position, position={}, data.size={}, names={}", position, data.size(), dump_names()); } @@ -164,7 +164,7 @@ void Block::insert(size_t position, const ColumnWithTypeAndName& elem) { void Block::insert(size_t position, ColumnWithTypeAndName&& elem) { if (position > data.size()) { throw Exception(ErrorCode::INTERNAL_ERROR, - "invalid input position, position={}, data.size{}, names={}", position, + "invalid input position, position={}, data.size={}, names={}", position, data.size(), dump_names()); } diff --git a/be/src/vec/exec/scan/vfile_scanner.cpp b/be/src/vec/exec/scan/vfile_scanner.cpp index a22777672b6b4f..8a1f64a55b4054 100644 --- a/be/src/vec/exec/scan/vfile_scanner.cpp +++ b/be/src/vec/exec/scan/vfile_scanner.cpp @@ -20,22 +20,29 @@ #include #include #include +#include #include #include +#include +#include #include #include #include +#include #include +#include #include #include "common/compiler_util.h" // IWYU pragma: keep #include "common/config.h" #include "common/logging.h" +#include "common/status.h" #include "io/cache/block_file_cache_profile.h" #include "runtime/descriptors.h" #include "runtime/runtime_state.h" #include "runtime/types.h" +#include "util/runtime_profile.h" #include "vec/aggregate_functions/aggregate_function.h" #include "vec/columns/column.h" #include "vec/columns/column_nullable.h" @@ -67,6 +74,7 @@ #include "vec/exec/scan/vscan_node.h" #include "vec/exprs/vexpr.h" #include "vec/exprs/vexpr_context.h" +#include "vec/exprs/vexpr_fwd.h" #include "vec/exprs/vslot_ref.h" #include "vec/functions/function.h" #include "vec/functions/function_string.h" @@ -130,12 +138,17 @@ Status VFileScanner::prepare(RuntimeState* state, const VExprContextSPtrs& conju ADD_TIMER_WITH_LEVEL(_local_state->scanner_profile(), "FileScannerPreFilterTimer", 1); _convert_to_output_block_timer = ADD_TIMER_WITH_LEVEL(_local_state->scanner_profile(), "FileScannerConvertOuputBlockTime", 1); + _runtime_filter_partition_prune_timer = ADD_TIMER_WITH_LEVEL( + _local_state->scanner_profile(), "FileScannerRuntimeFilterPartitionPruningTime", 1); _empty_file_counter = ADD_COUNTER_WITH_LEVEL(_local_state->scanner_profile(), "EmptyFileNum", TUnit::UNIT, 1); _not_found_file_counter = ADD_COUNTER_WITH_LEVEL(_local_state->scanner_profile(), "NotFoundFileNum", TUnit::UNIT, 1); _file_counter = ADD_COUNTER_WITH_LEVEL(_local_state->scanner_profile(), "FileNumber", TUnit::UNIT, 1); + _runtime_filter_partition_pruned_range_counter = + ADD_COUNTER_WITH_LEVEL(_local_state->scanner_profile(), + "RuntimeFilterPartitionPrunedRangeNum", TUnit::UNIT, 1); _file_cache_statistics.reset(new io::FileCacheStatistics()); _io_ctx.reset(new io::IOContext()); @@ -174,6 +187,113 @@ Status VFileScanner::prepare(RuntimeState* state, const VExprContextSPtrs& conju return Status::OK(); } +// check if the expr is a partition pruning expr +bool VFileScanner::_check_partition_prune_expr(const VExprSPtr& expr) { + if (expr->is_slot_ref()) { + auto* slot_ref = static_cast(expr.get()); + return _partition_slot_index_map.find(slot_ref->slot_id()) != + _partition_slot_index_map.end(); + } + if (expr->is_literal()) { + return true; + } + return std::ranges::all_of(expr->children(), [this](const auto& child) { + return _check_partition_prune_expr(child); + }); +} + +void VFileScanner::_init_runtime_filter_partition_prune_ctxs() { + _runtime_filter_partition_prune_ctxs.clear(); + for (auto& conjunct : _conjuncts) { + auto impl = conjunct->root()->get_impl(); + // If impl is not null, which means this a conjuncts from runtime filter. + auto expr = impl ? impl : conjunct->root(); + if (_check_partition_prune_expr(expr)) { + _runtime_filter_partition_prune_ctxs.emplace_back(conjunct); + } + } +} + +void VFileScanner::_init_runtime_filter_partition_prune_block() { + // init block with empty column + for (auto const* slot_desc : _real_tuple_desc->slots()) { + if (!slot_desc->need_materialize()) { + // should be ignored from reading + continue; + } + _runtime_filter_partition_prune_block.insert( + ColumnWithTypeAndName(slot_desc->get_empty_mutable_column(), + slot_desc->get_data_type_ptr(), slot_desc->col_name())); + } +} + +Status VFileScanner::_process_runtime_filters_partition_prune(bool& can_filter_all) { + SCOPED_TIMER(_runtime_filter_partition_prune_timer); + if (_runtime_filter_partition_prune_ctxs.empty() || _partition_col_descs.empty()) { + return Status::OK(); + } + size_t partition_value_column_size = 1; + + // 1. Get partition key values to string columns. + std::unordered_map parititon_slot_id_to_column; + for (auto const& partition_col_desc : _partition_col_descs) { + const auto& [partition_value, partition_slot_desc] = partition_col_desc.second; + auto test_serde = partition_slot_desc->get_data_type_ptr()->get_serde(); + auto partition_value_column = partition_slot_desc->get_data_type_ptr()->create_column(); + auto* col_ptr = static_cast(partition_value_column.get()); + Slice slice(partition_value.data(), partition_value.size()); + int num_deserialized = 0; + RETURN_IF_ERROR(test_serde->deserialize_column_from_fixed_json( + *col_ptr, slice, partition_value_column_size, &num_deserialized, {})); + parititon_slot_id_to_column[partition_slot_desc->id()] = std::move(partition_value_column); + } + + // 2. Fill _runtime_filter_partition_prune_block from the partition column, then execute conjuncts and filter block. + // 2.1 Fill _runtime_filter_partition_prune_block from the partition column to match the conjuncts executing. + size_t index = 0; + bool first_column_filled = false; + for (auto const* slot_desc : _real_tuple_desc->slots()) { + if (!slot_desc->need_materialize()) { + // should be ignored from reading + continue; + } + if (parititon_slot_id_to_column.find(slot_desc->id()) != + parititon_slot_id_to_column.end()) { + auto data_type = slot_desc->get_data_type_ptr(); + auto partition_value_column = std::move(parititon_slot_id_to_column[slot_desc->id()]); + if (data_type->is_nullable()) { + _runtime_filter_partition_prune_block.insert( + index, ColumnWithTypeAndName( + ColumnNullable::create( + std::move(partition_value_column), + ColumnUInt8::create(partition_value_column_size, 0)), + data_type, slot_desc->col_name())); + } else { + _runtime_filter_partition_prune_block.insert( + index, ColumnWithTypeAndName(std::move(partition_value_column), data_type, + slot_desc->col_name())); + } + if (index == 0) { + first_column_filled = true; + } + } + index++; + } + + // 2.2 Execute conjuncts. + if (!first_column_filled) { + // VExprContext.execute has an optimization, the filtering is executed when block->rows() > 0 + // The following process may be tricky and time-consuming, but we have no other way. + _runtime_filter_partition_prune_block.get_by_position(0).column->assume_mutable()->resize( + partition_value_column_size); + } + IColumn::Filter result_filter(_runtime_filter_partition_prune_block.rows(), 1); + RETURN_IF_ERROR(VExprContext::execute_conjuncts(_runtime_filter_partition_prune_ctxs, nullptr, + &_runtime_filter_partition_prune_block, + &result_filter, &can_filter_all)); + return Status::OK(); +} + Status VFileScanner::_process_conjuncts_for_dict_filter() { _slot_id_to_filter_conjuncts.clear(); _not_single_slot_filter_conjuncts.clear(); @@ -237,6 +357,11 @@ Status VFileScanner::open(RuntimeState* state) { RETURN_IF_ERROR(_split_source->get_next(&_first_scan_range, &_current_range)); if (_first_scan_range) { RETURN_IF_ERROR(_init_expr_ctxes()); + if (_state->query_options().enable_runtime_filter_partition_prune && + !_partition_slot_index_map.empty()) { + _init_runtime_filter_partition_prune_ctxs(); + _init_runtime_filter_partition_prune_block(); + } } else { // there's no scan range in split source. stop scanner directly. _scanner_eof = true; @@ -716,6 +841,29 @@ Status VFileScanner::_get_next_reader() { const TFileRangeDesc& range = _current_range; _current_range_path = range.path; + if (!_partition_slot_descs.empty()) { + // we need get partition columns first for runtime filter partition pruning + RETURN_IF_ERROR(_generate_parititon_columns()); + + if (_state->query_options().enable_runtime_filter_partition_prune) { + // if enable_runtime_filter_partition_prune is true, we need to check whether this range can be filtered out + // by runtime filter partition prune + if (_push_down_conjuncts.size() < _conjuncts.size()) { + // there are new runtime filters, need to re-init runtime filter partition pruning ctxs + _init_runtime_filter_partition_prune_ctxs(); + } + + bool can_filter_all = false; + RETURN_IF_ERROR(_process_runtime_filters_partition_prune(can_filter_all)); + if (can_filter_all) { + // this range can be filtered out by runtime filter partition pruning + // so we need to skip this range + COUNTER_UPDATE(_runtime_filter_partition_pruned_range_counter, 1); + continue; + } + } + } + // create reader for specific format Status init_status; // for compatibility, if format_type is not set in range, use the format type of params @@ -976,7 +1124,8 @@ Status VFileScanner::_get_next_reader() { _missing_cols.clear(); RETURN_IF_ERROR(_cur_reader->get_columns(&_name_to_col_type, &_missing_cols)); _cur_reader->set_push_down_agg_type(_get_push_down_agg_type()); - RETURN_IF_ERROR(_generate_fill_columns()); + RETURN_IF_ERROR(_generate_missing_columns()); + RETURN_IF_ERROR(_cur_reader->set_fill_columns(_partition_col_descs, _missing_col_descs)); if (VLOG_NOTICE_IS_ON && !_missing_cols.empty() && _is_load) { fmt::memory_buffer col_buf; for (auto& col : _missing_cols) { @@ -1006,10 +1155,8 @@ Status VFileScanner::_get_next_reader() { return Status::OK(); } -Status VFileScanner::_generate_fill_columns() { +Status VFileScanner::_generate_parititon_columns() { _partition_col_descs.clear(); - _missing_col_descs.clear(); - const TFileRangeDesc& range = _current_range; if (range.__isset.columns_from_path && !_partition_slot_descs.empty()) { for (const auto& slot_desc : _partition_slot_descs) { @@ -1030,7 +1177,11 @@ Status VFileScanner::_generate_fill_columns() { } } } + return Status::OK(); +} +Status VFileScanner::_generate_missing_columns() { + _missing_col_descs.clear(); if (!_missing_cols.empty()) { for (auto slot_desc : _real_tuple_desc->slots()) { if (!slot_desc->is_materialized()) { @@ -1048,8 +1199,7 @@ Status VFileScanner::_generate_fill_columns() { _missing_col_descs.emplace(slot_desc->col_name(), it->second); } } - - return _cur_reader->set_fill_columns(_partition_col_descs, _missing_col_descs); + return Status::OK(); } Status VFileScanner::_init_expr_ctxes() { diff --git a/be/src/vec/exec/scan/vfile_scanner.h b/be/src/vec/exec/scan/vfile_scanner.h index f68cc4a66e3910..ca7a03c68c022d 100644 --- a/be/src/vec/exec/scan/vfile_scanner.h +++ b/be/src/vec/exec/scan/vfile_scanner.h @@ -38,6 +38,7 @@ #include "vec/core/block.h" #include "vec/exec/format/generic_reader.h" #include "vec/exec/scan/vscanner.h" +#include "vec/exprs/vexpr_fwd.h" namespace doris { class RuntimeState; @@ -160,6 +161,8 @@ class VFileScanner : public VScanner { Block _src_block; VExprContextSPtrs _push_down_conjuncts; + VExprContextSPtrs _runtime_filter_partition_prune_ctxs; + Block _runtime_filter_partition_prune_block; std::unique_ptr _file_cache_statistics; std::unique_ptr _io_ctx; @@ -175,9 +178,11 @@ class VFileScanner : public VScanner { RuntimeProfile::Counter* _fill_missing_columns_timer = nullptr; RuntimeProfile::Counter* _pre_filter_timer = nullptr; RuntimeProfile::Counter* _convert_to_output_block_timer = nullptr; + RuntimeProfile::Counter* _runtime_filter_partition_prune_timer = nullptr; RuntimeProfile::Counter* _empty_file_counter = nullptr; RuntimeProfile::Counter* _not_found_file_counter = nullptr; RuntimeProfile::Counter* _file_counter = nullptr; + RuntimeProfile::Counter* _runtime_filter_partition_pruned_range_counter = nullptr; const std::unordered_map* _col_name_to_slot_id = nullptr; // single slot filter conjuncts @@ -205,7 +210,12 @@ class VFileScanner : public VScanner { Status _convert_to_output_block(Block* block); Status _truncate_char_or_varchar_columns(Block* block); void _truncate_char_or_varchar_column(Block* block, int idx, int len); - Status _generate_fill_columns(); + Status _generate_parititon_columns(); + Status _generate_missing_columns(); + bool _check_partition_prune_expr(const VExprSPtr& expr); + void _init_runtime_filter_partition_prune_ctxs(); + void _init_runtime_filter_partition_prune_block(); + Status _process_runtime_filters_partition_prune(bool& is_partition_pruned); Status _process_conjuncts_for_dict_filter(); Status _process_late_arrival_conjuncts(); void _get_slot_ids(VExpr* expr, std::vector* slot_ids); diff --git a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/partition_tables/date_partition_table/partition_col=2025-01-01/000000_0 b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/partition_tables/date_partition_table/partition_col=2025-01-01/000000_0 new file mode 100644 index 0000000000000000000000000000000000000000..db221ac2e908cba7f6b2121092b0b18a54948519 GIT binary patch literal 587 zcmaJ26ah;_;fn|%g)hJ>qxzl&4i0Wn%tE;a z5>Or7*}Og>&t^dIA!!Y19|PG(FF1mw4SLOM z%V5C)h2778%weyH{9cp=q!Q)uISEsi=xG=;{be*{ig=L*XLq+Nli)((=*z(We1J6>GxZ9(6YQ^)gcJ2 M4|;`V=)oF&0cjLy#Q*>R literal 0 HcmV?d00001 diff --git a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/partition_tables/date_partition_table/partition_col=2025-01-02/000000_0 b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/partition_tables/date_partition_table/partition_col=2025-01-02/000000_0 new file mode 100644 index 0000000000000000000000000000000000000000..1ea988102c76638e2e7ed62e2bd3c947a32748c9 GIT binary patch literal 581 zcmaJ<&r1S96n?Jjx{!x1v+Tkg3{(tio7MG4un1X3NC$~J$EYJNxmv4k)ImB(2mgcm zYkRXwNA}Vxwvx(kQ~XK5DguT=+sm-05DY*Zv%=u5hfJ0WLlO|XcCC5 z25D^RRVb#eLq2{oQ&&9l`v_T+Bq7cu(P>3o#xZbTz+ezIP4)Zhs3`?(qu*Y#1ki5E z5>c$%n}eYriN|SgC<0D2AWB{_6&y}7=%`~Jh&)QwF-S#xG#6lK7PY*BPLW;#O}8YR zVA@z1i-PX`vjvNyVjO{L3ZDr|GMfhRn7G9YR;cMv6L}<_aK-lk-u`tuES?}!O{u0dgGY?_eEq3SrsGntJhgYi=m$7qE?$%4J#F$g!PTeiLy-K~*?bqC1t;~-tj{>X@yd?lS H_m7md-i8OjhEp=c>J8rSZ0#YR`k zKll$W^q;&15*kcQ=CY zde6}u@w*^m+u5x&67yx&D64?qa$kQKl- zh9n!)!Gs|RE2`oYu?qAm60wqu6DUhh9GoI{{@IJ7f^WD4JQZI!mE@Na^NZ#pQ%;(4 z&~w&TlcvsimEa|HVJl@|#)E^^=dUdzB_q9OB#abQ-nk3?h=rBNAF|5BC}48#IP&%{ zFOHDhzMWeO_D6wcR1CeMYo*S_A9OX{GH9(26ah;_;fn|%g)hJ>qxzl&4i0Wn%tE;a z5>Or7*}Og>&t^dIA!!Y19|PG(FF1mw4SLOM z%V5C)h2778%weyH{9cp=q!Q)uISEsi=xG=;{be*{ig=L*XLq+Nli)((=*z(We1J6>GxZ9(6YQ^)gcJ2 M4|;`V=)oF&0cjLy#Q*>R literal 0 HcmV?d00001 diff --git a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/partition_tables/decimal_partition_table/partition_col=200.02/000000_0 b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/partition_tables/decimal_partition_table/partition_col=200.02/000000_0 new file mode 100644 index 0000000000000000000000000000000000000000..1ea988102c76638e2e7ed62e2bd3c947a32748c9 GIT binary patch literal 581 zcmaJ<&r1S96n?Jjx{!x1v+Tkg3{(tio7MG4un1X3NC$~J$EYJNxmv4k)ImB(2mgcm zYkRXwNA}Vxwvx(kQ~XK5DguT=+sm-05DY*Zv%=u5hfJ0WLlO|XcCC5 z25D^RRVb#eLq2{oQ&&9l`v_T+Bq7cu(P>3o#xZbTz+ezIP4)Zhs3`?(qu*Y#1ki5E z5>c$%n}eYriN|SgC<0D2AWB{_6&y}7=%`~Jh&)QwF-S#xG#6lK7PY*BPLW;#O}8YR zVA@z1i-PX`vjvNyVjO{L3ZDr|GMfhRn7G9YR;cMv6L}<_aK-lk-u`tuES?}!O{u0dgGY?_eEq3SrsGntJhgYi=m$7qE?$%4J#F$g!PTeiLy-K~*?bqC1t;~-tj{>X@yd?lS H_m7md-i8OjhEp=c>J8rSZ0#YR`k zKll$W^q;&15*kcQ=CY zde6}u@w*^m+u5x&67yx&D64?qa$kQKl- zh9n!)!Gs|RE2`oYu?qAm60wqu6DUhh9GoI{{@IJ7f^WD4JQZI!mE@Na^NZ#pQ%;(4 z&~w&TlcvsimEa|HVJl@|#)E^^=dUdzB_q9OB#abQ-nk3?h=rBNAF|5BC}48#IP&%{ zFOHDhzMWeO_D6wcR1CeMYo*S_A9OX{GH9(26ah;_;fn|%g)hJ>qxzl&4i0Wn%tE;a z5>Or7*}Og>&t^dIA!!Y19|PG(FF1mw4SLOM z%V5C)h2778%weyH{9cp=q!Q)uISEsi=xG=;{be*{ig=L*XLq+Nli)((=*z(We1J6>GxZ9(6YQ^)gcJ2 M4|;`V=)oF&0cjLy#Q*>R literal 0 HcmV?d00001 diff --git a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/partition_tables/int_partition_table/partition_col=2/000000_0 b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/partition_tables/int_partition_table/partition_col=2/000000_0 new file mode 100644 index 0000000000000000000000000000000000000000..1ea988102c76638e2e7ed62e2bd3c947a32748c9 GIT binary patch literal 581 zcmaJ<&r1S96n?Jjx{!x1v+Tkg3{(tio7MG4un1X3NC$~J$EYJNxmv4k)ImB(2mgcm zYkRXwNA}Vxwvx(kQ~XK5DguT=+sm-05DY*Zv%=u5hfJ0WLlO|XcCC5 z25D^RRVb#eLq2{oQ&&9l`v_T+Bq7cu(P>3o#xZbTz+ezIP4)Zhs3`?(qu*Y#1ki5E z5>c$%n}eYriN|SgC<0D2AWB{_6&y}7=%`~Jh&)QwF-S#xG#6lK7PY*BPLW;#O}8YR zVA@z1i-PX`vjvNyVjO{L3ZDr|GMfhRn7G9YR;cMv6L}<_aK-lk-u`tuES?}!O{u0dgGY?_eEq3SrsGntJhgYi=m$7qE?$%4J#F$g!PTeiLy-K~*?bqC1t;~-tj{>X@yd?lS H_m7md-i8OjhEp=c>J8rSZ0#YR`k zKll$W^q;&15*kcQ=CY zde6}u@w*^m+u5x&67yx&D64?qa$kQKl- zh9n!)!Gs|RE2`oYu?qAm60wqu6DUhh9GoI{{@IJ7f^WD4JQZI!mE@Na^NZ#pQ%;(4 z&~w&TlcvsimEa|HVJl@|#)E^^=dUdzB_q9OB#abQ-nk3?h=rBNAF|5BC}48#IP&%{ zFOHDhzMWeO_D6wcR1CeMYo*S_A9OX{GH9(26ah;_;fn|%g)hJ>qxzl&4i0Wn%tE;a z5>Or7*}Og>&t^dIA!!Y19|PG(FF1mw4SLOM z%V5C)h2778%weyH{9cp=q!Q)uISEsi=xG=;{be*{ig=L*XLq+Nli)((=*z(We1J6>GxZ9(6YQ^)gcJ2 M4|;`V=)oF&0cjLy#Q*>R literal 0 HcmV?d00001 diff --git a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/partition_tables/string_partition_table/partition_col=B/000000_0 b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/partition_tables/string_partition_table/partition_col=B/000000_0 new file mode 100644 index 0000000000000000000000000000000000000000..1ea988102c76638e2e7ed62e2bd3c947a32748c9 GIT binary patch literal 581 zcmaJ<&r1S96n?Jjx{!x1v+Tkg3{(tio7MG4un1X3NC$~J$EYJNxmv4k)ImB(2mgcm zYkRXwNA}Vxwvx(kQ~XK5DguT=+sm-05DY*Zv%=u5hfJ0WLlO|XcCC5 z25D^RRVb#eLq2{oQ&&9l`v_T+Bq7cu(P>3o#xZbTz+ezIP4)Zhs3`?(qu*Y#1ki5E z5>c$%n}eYriN|SgC<0D2AWB{_6&y}7=%`~Jh&)QwF-S#xG#6lK7PY*BPLW;#O}8YR zVA@z1i-PX`vjvNyVjO{L3ZDr|GMfhRn7G9YR;cMv6L}<_aK-lk-u`tuES?}!O{u0dgGY?_eEq3SrsGntJhgYi=m$7qE?$%4J#F$g!PTeiLy-K~*?bqC1t;~-tj{>X@yd?lS H_m7md-i8OjhEp=c>J8rSZ0#YR`k zKll$W^q;&15*kcQ=CY zde6}u@w*^m+u5x&67yx&D64?qa$kQKl- zh9n!)!Gs|RE2`oYu?qAm60wqu6DUhh9GoI{{@IJ7f^WD4JQZI!mE@Na^NZ#pQ%;(4 z&~w&TlcvsimEa|HVJl@|#)E^^=dUdzB_q9OB#abQ-nk3?h=rBNAF|5BC}48#IP&%{ zFOHDhzMWeO_D6wcR1CeMYo*S_A9OX{GH9( 0 + order by partition_col desc limit 1); + """ + qt_runtime_filter_partition_pruning_decimal2 """ + select count(*) from decimal_partition_table where partition_col in + (select partition_col from decimal_partition_table + group by partition_col having count(*) > 0 + order by partition_col desc limit 2); + """ + qt_runtime_filter_partition_pruning_decimal3 """ + select count(*) from decimal_partition_table where abs(partition_col) = + (select partition_col from decimal_partition_table + group by partition_col having count(*) > 0 + order by partition_col desc limit 1); + """ + qt_runtime_filter_partition_pruning_int1 """ + select count(*) from int_partition_table where partition_col = + (select partition_col from int_partition_table + group by partition_col having count(*) > 0 + order by partition_col desc limit 1); + """ + qt_runtime_filter_partition_pruning_int2 """ + select count(*) from int_partition_table where partition_col in + (select partition_col from int_partition_table + group by partition_col having count(*) > 0 + order by partition_col desc limit 2); + """ + qt_runtime_filter_partition_pruning_int3 """ + select count(*) from int_partition_table where abs(partition_col) = + (select partition_col from int_partition_table + group by partition_col having count(*) > 0 + order by partition_col desc limit 1); + """ + qt_runtime_filter_partition_pruning_string1 """ + select count(*) from string_partition_table where partition_col = + (select partition_col from string_partition_table + group by partition_col having count(*) > 0 + order by partition_col desc limit 1); + """ + qt_runtime_filter_partition_pruning_string2 """ + select count(*) from string_partition_table where partition_col in + (select partition_col from string_partition_table + group by partition_col having count(*) > 0 + order by partition_col desc limit 2); + """ + qt_runtime_filter_partition_pruning_date1 """ + select count(*) from date_partition_table where partition_col = + (select partition_col from date_partition_table + group by partition_col having count(*) > 0 + order by partition_col desc limit 1); + """ + qt_runtime_filter_partition_pruning_decimal2 """ + select count(*) from date_partition_table where partition_col in + (select partition_col from date_partition_table + group by partition_col having count(*) > 0 + order by partition_col desc limit 2); + """ + } + + String enabled = context.config.otherConfigs.get("enableHiveTest") + if (enabled == null || !enabled.equalsIgnoreCase("true")) { + logger.info("diable Hive test.") + return; + } + + for (String hivePrefix : ["hive2", "hive3"]) { + try { + String hms_port = context.config.otherConfigs.get(hivePrefix + "HmsPort") + String catalog_name = "${hivePrefix}_test_hive_runtime_filter_partition_pruning" + String externalEnvIp = context.config.otherConfigs.get("externalEnvIp") + + sql """drop catalog if exists ${catalog_name}""" + sql """create catalog if not exists ${catalog_name} properties ( + "type"="hms", + 'hive.metastore.uris' = 'thrift://${externalEnvIp}:${hms_port}' + );""" + sql """use `${catalog_name}`.`partition_tables`""" + + test_runtime_filter_partition_pruning() + + } finally { + } + } +} +