diff --git a/be/src/runtime/exec_env.h b/be/src/runtime/exec_env.h index c5f9f77a783f0c..f3255be929194d 100644 --- a/be/src/runtime/exec_env.h +++ b/be/src/runtime/exec_env.h @@ -322,6 +322,7 @@ class ExecEnv { static void set_tracking_memory(bool tracking_memory) { _s_tracking_memory.store(tracking_memory, std::memory_order_release); } + void set_orc_memory_pool(orc::MemoryPool* pool) { _orc_memory_pool = pool; } void set_non_block_close_thread_pool(std::unique_ptr&& pool) { _non_block_close_thread_pool = std::move(pool); } diff --git a/be/src/vec/exec/format/orc/vorc_reader.cpp b/be/src/vec/exec/format/orc/vorc_reader.cpp index 911620a61002f3..b53ce3a0a50619 100644 --- a/be/src/vec/exec/format/orc/vorc_reader.cpp +++ b/be/src/vec/exec/format/orc/vorc_reader.cpp @@ -18,13 +18,14 @@ #include "vorc_reader.h" #include -#include #include +#include #include #include #include #include +#include // IWYU pragma: no_include #include // IWYU pragma: keep #include @@ -33,12 +34,11 @@ #include #include #include -#include +#include #include "cctz/civil_time.h" #include "cctz/time_zone.h" #include "common/exception.h" -#include "exec/olap_utils.h" #include "exprs/create_predicate_function.h" #include "exprs/hybrid_set.h" #include "gutil/strings/substitute.h" @@ -55,6 +55,7 @@ #include "runtime/descriptors.h" #include "runtime/primitive_type.h" #include "runtime/thread_context.h" +#include "util/runtime_profile.h" #include "util/slice.h" #include "util/timezone_utils.h" #include "vec/columns/column.h" @@ -72,15 +73,13 @@ #include "vec/data_types/data_type_nullable.h" #include "vec/data_types/data_type_struct.h" #include "vec/exec/format/orc/orc_file_reader.h" -#include "vec/exec/format/orc/orc_memory_pool.h" #include "vec/exec/format/table/transactional_hive_common.h" #include "vec/exprs/vbloom_predicate.h" #include "vec/exprs/vdirect_in_predicate.h" #include "vec/exprs/vectorized_fn_call.h" +#include "vec/exprs/vexpr_fwd.h" #include "vec/exprs/vin_predicate.h" -#include "vec/exprs/vliteral.h" #include "vec/exprs/vruntimefilter_wrapper.h" -#include "vec/exprs/vslot_ref.h" #include "vec/runtime/vdatetime_value.h" namespace doris { @@ -265,6 +264,10 @@ void OrcReader::_init_profile() { ADD_CHILD_TIMER_WITH_LEVEL(_profile, "DecodeNullMapTime", orc_profile, 1); _orc_profile.filter_block_time = ADD_CHILD_TIMER_WITH_LEVEL(_profile, "FilterBlockTime", orc_profile, 1); + _orc_profile.selected_row_group_count = + ADD_COUNTER_WITH_LEVEL(_profile, "SelectedRowGroupCount", TUnit::UNIT, 1); + _orc_profile.evaluated_row_group_count = + ADD_COUNTER_WITH_LEVEL(_profile, "EvaluatedRowGroupCount", TUnit::UNIT, 1); } } @@ -288,6 +291,7 @@ Status OrcReader::_create_file_reader() { try { orc::ReaderOptions options; options.setMemoryPool(*ExecEnv::GetInstance()->orc_memory_pool()); + options.setReaderMetrics(&_reader_metrics); _reader = orc::createReader( std::unique_ptr(_file_input_stream.release()), options); } catch (std::exception& e) { @@ -376,7 +380,7 @@ Status OrcReader::_init_read_columns() { std::vector orc_cols; std::vector orc_cols_lower_case; bool is_hive1_orc = false; - _init_orc_cols(root_type, orc_cols, orc_cols_lower_case, _type_map, &is_hive1_orc); + _init_orc_cols(root_type, orc_cols, orc_cols_lower_case, _type_map, &is_hive1_orc, false); // In old version slot_name_to_schema_pos may not be set in _scan_params // TODO, should be removed in 2.2 or later @@ -432,7 +436,7 @@ Status OrcReader::_init_read_columns() { void OrcReader::_init_orc_cols(const orc::Type& type, std::vector& orc_cols, std::vector& orc_cols_lower_case, std::unordered_map& type_map, - bool* is_hive1_orc) { + bool* is_hive1_orc, bool should_add_acid_prefix) const { bool hive1_orc = true; for (int i = 0; i < type.getSubtypeCount(); ++i) { orc_cols.emplace_back(type.getFieldName(i)); @@ -442,11 +446,17 @@ void OrcReader::_init_orc_cols(const orc::Type& type, std::vector& } orc_cols_lower_case.emplace_back(std::move(filed_name_lower_case)); auto file_name = type.getFieldName(i); + if (should_add_acid_prefix) { + file_name = fmt::format( + "{}.{}", TransactionalHive::ACID_COLUMN_NAMES[TransactionalHive::ROW_OFFSET], + file_name); + } type_map.emplace(std::move(file_name), type.getSubtype(i)); if (_is_acid) { const orc::Type* sub_type = type.getSubtype(i); if (sub_type->getKind() == orc::TypeKind::STRUCT) { - _init_orc_cols(*sub_type, orc_cols, orc_cols_lower_case, type_map, is_hive1_orc); + _init_orc_cols(*sub_type, orc_cols, orc_cols_lower_case, type_map, is_hive1_orc, + true); } } } @@ -500,8 +510,10 @@ static std::unordered_map TYPEKIND_TO_PRE {orc::TypeKind::BOOLEAN, orc::PredicateDataType::BOOLEAN}}; template -std::tuple convert_to_orc_literal(const orc::Type* type, const void* value, - int precision, int scale) { +std::tuple convert_to_orc_literal(const orc::Type* type, + StringRef& literal_data, int precision, + int scale) { + const auto* value = literal_data.data; try { switch (type->getKind()) { case orc::TypeKind::BOOLEAN: { @@ -549,8 +561,7 @@ std::tuple convert_to_orc_literal(const orc::Type* type, con case orc::TypeKind::VARCHAR: { if (primitive_type == TYPE_STRING || primitive_type == TYPE_CHAR || primitive_type == TYPE_VARCHAR) { - StringRef* string_value = (StringRef*)value; - return std::make_tuple(true, orc::Literal(string_value->data, string_value->size)); + return std::make_tuple(true, orc::Literal(literal_data.data, literal_data.size)); } return std::make_tuple(false, orc::Literal(false)); } @@ -629,192 +640,358 @@ std::tuple convert_to_orc_literal(const orc::Type* type, con } } -template -std::vector value_range_to_predicate( - const ColumnValueRange& col_val_range, const orc::Type* type) { - std::vector predicates; +std::tuple OrcReader::_make_orc_literal( + const VSlotRef* slot_ref, const VLiteral* literal) { + DCHECK(_col_name_to_file_col_name.contains(slot_ref->expr_name())); + auto file_col_name = _col_name_to_file_col_name[slot_ref->expr_name()]; + if (!_type_map.contains(file_col_name)) { + LOG(WARNING) << "Column " << slot_ref->expr_name() << " not found in _type_map"; + return std::make_tuple(false, orc::Literal(false), orc::PredicateDataType::LONG); + } + DCHECK(_type_map.contains(file_col_name)); + const auto* orc_type = _type_map[file_col_name]; + if (!TYPEKIND_TO_PREDICATE_TYPE.contains(orc_type->getKind())) { + LOG(WARNING) << "Unsupported Push Down Orc Type [TypeKind=" << orc_type->getKind() << "]"; + return std::make_tuple(false, orc::Literal(false), orc::PredicateDataType::LONG); + } + const auto predicate_type = TYPEKIND_TO_PREDICATE_TYPE[orc_type->getKind()]; + if (literal == nullptr) { + // only get the predicate_type + return std::make_tuple(true, orc::Literal(true), predicate_type); + } + // this only happens when the literals of in_predicate contains null value, like in (1, null) + if (literal->get_column_ptr()->is_null_at(0)) { + return std::make_tuple(false, orc::Literal(false), predicate_type); + } + auto literal_data = literal->get_column_ptr()->get_data_at(0); + auto* slot = _tuple_descriptor->slots()[slot_ref->column_id()]; + auto slot_type = slot->type(); + switch (slot_type.type) { +#define M(NAME) \ + case TYPE_##NAME: { \ + auto [valid, orc_literal] = convert_to_orc_literal( \ + orc_type, literal_data, slot_type.precision, slot_type.scale); \ + return std::make_tuple(valid, orc_literal, predicate_type); \ + } +#define APPLY_FOR_PRIMITIVE_TYPE(M) \ + M(TINYINT) \ + M(SMALLINT) \ + M(INT) \ + M(BIGINT) \ + M(LARGEINT) \ + M(DATE) \ + M(DATETIME) \ + M(DATEV2) \ + M(DATETIMEV2) \ + M(VARCHAR) \ + M(STRING) \ + M(HLL) \ + M(DECIMAL32) \ + M(DECIMAL64) \ + M(DECIMAL128I) \ + M(DECIMAL256) \ + M(DECIMALV2) \ + M(BOOLEAN) \ + M(IPV4) \ + M(IPV6) + APPLY_FOR_PRIMITIVE_TYPE(M) +#undef M + default: { + VLOG_CRITICAL << "Unsupported Convert Orc Literal [ColName=" << slot->col_name() << "]"; + return std::make_tuple(false, orc::Literal(false), predicate_type); + } + } +} - PrimitiveType src_type = OrcReader::convert_to_doris_type(type).type; - if (src_type != primitive_type) { - if (!(is_string_type(src_type) && is_string_type(primitive_type))) { - // not support schema change - return predicates; - } +// check if the slot of expr can be pushed down to orc reader and make orc predicate type +bool OrcReader::_check_slot_can_push_down(const VExprSPtr& expr) { + if (!expr->children()[0]->is_slot_ref()) { + return false; + } + const auto* slot_ref = static_cast(expr->children()[0].get()); + // check if the slot exists in orc file and not partition column + if (!_col_name_to_file_col_name.contains(slot_ref->expr_name()) || + _lazy_read_ctx.predicate_partition_columns.contains(slot_ref->expr_name())) { + return false; } + auto [valid, _, predicate_type] = _make_orc_literal(slot_ref, nullptr); + if (valid) { + _vslot_ref_to_orc_predicate_data_type[slot_ref] = predicate_type; + } + return valid; +} - orc::PredicateDataType predicate_data_type; - auto type_it = TYPEKIND_TO_PREDICATE_TYPE.find(type->getKind()); - if (type_it == TYPEKIND_TO_PREDICATE_TYPE.end()) { - // Unsupported type - return predicates; - } else { - predicate_data_type = type_it->second; +// check if the literal of expr can be pushed down to orc reader and make orc literal +bool OrcReader::_check_literal_can_push_down(const VExprSPtr& expr, uint16_t child_id) { + if (!expr->children()[child_id]->is_literal()) { + return false; + } + // the slot has been checked in _check_slot_can_push_down before calling this function + const auto* slot_ref = static_cast(expr->children()[0].get()); + const auto* literal = static_cast(expr->children()[child_id].get()); + auto [valid, orc_literal, _] = _make_orc_literal(slot_ref, literal); + if (valid) { + _vliteral_to_orc_literal.insert(std::make_pair(literal, orc_literal)); } + return valid; +} - if (col_val_range.is_fixed_value_range()) { - OrcPredicate in_predicate; - in_predicate.col_name = col_val_range.column_name(); - in_predicate.data_type = predicate_data_type; - in_predicate.op = SQLFilterOp::FILTER_IN; - for (const auto& value : col_val_range.get_fixed_value_set()) { - auto [valid, literal] = convert_to_orc_literal( - type, &value, col_val_range.precision(), col_val_range.scale()); - if (valid) { - in_predicate.literals.push_back(literal); - } - } - if (!in_predicate.literals.empty()) { - predicates.emplace_back(in_predicate); - } - return predicates; +// check if there are rest children of expr can be pushed down to orc reader +bool OrcReader::_check_rest_children_can_push_down(const VExprSPtr& expr) { + if (expr->children().size() < 2) { + return false; } - const auto& high_value = col_val_range.get_range_max_value(); - const auto& low_value = col_val_range.get_range_min_value(); - const auto& high_op = col_val_range.get_range_high_op(); - const auto& low_op = col_val_range.get_range_low_op(); + bool at_least_one_child_can_push_down = false; + for (size_t i = 1; i < expr->children().size(); ++i) { + if (_check_literal_can_push_down(expr, i)) { + at_least_one_child_can_push_down = true; + } + } + return at_least_one_child_can_push_down; +} - // orc can only push down is_null. When col_value_range._contain_null = true, only indicating that - // value can be null, not equals null, so ignore _contain_null in col_value_range - if (col_val_range.is_high_value_maximum() && high_op == SQLFilterOp::FILTER_LESS_OR_EQUAL && - col_val_range.is_low_value_mininum() && low_op == SQLFilterOp::FILTER_LARGER_OR_EQUAL) { - return predicates; +// check if the expr can be pushed down to orc reader +bool OrcReader::_check_expr_can_push_down(const VExprSPtr& expr) { + if (expr == nullptr) { + return false; } - if (low_value < high_value) { - if (!col_val_range.is_low_value_mininum() || - SQLFilterOp::FILTER_LARGER_OR_EQUAL != low_op) { - auto [valid, low_literal] = convert_to_orc_literal( - type, &low_value, col_val_range.precision(), col_val_range.scale()); - if (valid) { - OrcPredicate low_predicate; - low_predicate.col_name = col_val_range.column_name(); - low_predicate.data_type = predicate_data_type; - low_predicate.op = low_op; - low_predicate.literals.emplace_back(low_literal); - predicates.emplace_back(low_predicate); - } - } - if (!col_val_range.is_high_value_maximum() || - SQLFilterOp::FILTER_LESS_OR_EQUAL != high_op) { - auto [valid, high_literal] = convert_to_orc_literal( - type, &high_value, col_val_range.precision(), col_val_range.scale()); - if (valid) { - OrcPredicate high_predicate; - high_predicate.col_name = col_val_range.column_name(); - high_predicate.data_type = predicate_data_type; - high_predicate.op = high_op; - high_predicate.literals.emplace_back(high_literal); - predicates.emplace_back(high_predicate); - } + switch (expr->op()) { + case TExprOpcode::COMPOUND_AND: + // at least one child can be pushed down + return std::ranges::any_of(expr->children(), [this](const auto& child) { + return _check_expr_can_push_down(child); + }); + case TExprOpcode::COMPOUND_OR: + // all children must be pushed down + return std::ranges::all_of(expr->children(), [this](const auto& child) { + return _check_expr_can_push_down(child); + }); + case TExprOpcode::COMPOUND_NOT: + DCHECK_EQ(expr->children().size(), 1); + return _check_expr_can_push_down(expr->children()[0]); + + case TExprOpcode::GE: + case TExprOpcode::GT: + case TExprOpcode::LE: + case TExprOpcode::LT: + case TExprOpcode::EQ: + case TExprOpcode::NE: + case TExprOpcode::FILTER_IN: + case TExprOpcode::FILTER_NOT_IN: + // can't push down if expr is null aware predicate + return expr->node_type() != TExprNodeType::NULL_AWARE_BINARY_PRED && + expr->node_type() != TExprNodeType::NULL_AWARE_IN_PRED && + _check_slot_can_push_down(expr) && _check_rest_children_can_push_down(expr); + + case TExprOpcode::INVALID_OPCODE: + if (expr->node_type() == TExprNodeType::FUNCTION_CALL) { + auto fn_name = expr->fn().name.function_name; + // only support is_null_pred and is_not_null_pred + if (fn_name == "is_null_pred" || fn_name == "is_not_null_pred") { + return _check_slot_can_push_down(expr); + } + VLOG_CRITICAL << "Unsupported function [funciton=" << fn_name << "]"; } + return false; + default: + VLOG_CRITICAL << "Unsupported Opcode [OpCode=" << expr->op() << "]"; + return false; + } +} + +void OrcReader::_build_less_than(const VExprSPtr& expr, + std::unique_ptr& builder) { + DCHECK(expr->children().size() == 2); + DCHECK(expr->children()[0]->is_slot_ref()); + DCHECK(expr->children()[1]->is_literal()); + const auto* slot_ref = static_cast(expr->children()[0].get()); + const auto* literal = static_cast(expr->children()[1].get()); + DCHECK(_vslot_ref_to_orc_predicate_data_type.contains(slot_ref)); + auto predicate_type = _vslot_ref_to_orc_predicate_data_type[slot_ref]; + DCHECK(_vliteral_to_orc_literal.contains(literal)); + auto orc_literal = _vliteral_to_orc_literal.find(literal)->second; + builder->lessThan(slot_ref->expr_name(), predicate_type, orc_literal); +} + +void OrcReader::_build_less_than_equals(const VExprSPtr& expr, + std::unique_ptr& builder) { + DCHECK(expr->children().size() == 2); + DCHECK(expr->children()[0]->is_slot_ref()); + DCHECK(expr->children()[1]->is_literal()); + const auto* slot_ref = static_cast(expr->children()[0].get()); + const auto* literal = static_cast(expr->children()[1].get()); + DCHECK(_vslot_ref_to_orc_predicate_data_type.contains(slot_ref)); + auto predicate_type = _vslot_ref_to_orc_predicate_data_type[slot_ref]; + DCHECK(_vliteral_to_orc_literal.contains(literal)); + auto orc_literal = _vliteral_to_orc_literal.find(literal)->second; + builder->lessThanEquals(slot_ref->expr_name(), predicate_type, orc_literal); +} + +void OrcReader::_build_equals(const VExprSPtr& expr, + std::unique_ptr& builder) { + DCHECK(expr->children().size() == 2); + DCHECK(expr->children()[0]->is_slot_ref()); + DCHECK(expr->children()[1]->is_literal()); + const auto* slot_ref = static_cast(expr->children()[0].get()); + const auto* literal = static_cast(expr->children()[1].get()); + DCHECK(_vslot_ref_to_orc_predicate_data_type.contains(slot_ref)); + auto predicate_type = _vslot_ref_to_orc_predicate_data_type[slot_ref]; + DCHECK(_vliteral_to_orc_literal.contains(literal)); + auto orc_literal = _vliteral_to_orc_literal.find(literal)->second; + builder->equals(slot_ref->expr_name(), predicate_type, orc_literal); +} + +void OrcReader::_build_filter_in(const VExprSPtr& expr, + std::unique_ptr& builder) { + DCHECK(expr->children().size() >= 2); + DCHECK(expr->children()[0]->is_slot_ref()); + const auto* slot_ref = static_cast(expr->children()[0].get()); + std::vector literals; + DCHECK(_vslot_ref_to_orc_predicate_data_type.contains(slot_ref)); + orc::PredicateDataType predicate_type = _vslot_ref_to_orc_predicate_data_type[slot_ref]; + for (size_t i = 1; i < expr->children().size(); ++i) { + DCHECK(expr->children()[i]->is_literal()); + const auto* literal = static_cast(expr->children()[i].get()); + if (_vliteral_to_orc_literal.contains(literal)) { + auto orc_literal = _vliteral_to_orc_literal.find(literal)->second; + literals.emplace_back(orc_literal); + } + } + DCHECK(!literals.empty()); + if (literals.size() == 1) { + builder->equals(slot_ref->expr_name(), predicate_type, literals[0]); + } else { + builder->in(slot_ref->expr_name(), predicate_type, literals); } - return predicates; } -bool static build_search_argument(std::vector& predicates, int index, - std::unique_ptr& builder) { - if (index >= predicates.size()) { +void OrcReader::_build_is_null(const VExprSPtr& expr, + std::unique_ptr& builder) { + DCHECK(expr->children().size() == 1); + DCHECK(expr->children()[0]->is_slot_ref()); + const auto* slot_ref = static_cast(expr->children()[0].get()); + DCHECK(_vslot_ref_to_orc_predicate_data_type.contains(slot_ref)); + auto predicate_type = _vslot_ref_to_orc_predicate_data_type[slot_ref]; + builder->isNull(slot_ref->expr_name(), predicate_type); +} + +bool OrcReader::_build_search_argument(const VExprSPtr& expr, + std::unique_ptr& builder) { + // OPTIMIZE: check expr only once + if (!_check_expr_can_push_down(expr)) { return false; } - if (index < predicates.size() - 1) { + switch (expr->op()) { + case TExprOpcode::COMPOUND_AND: { builder->startAnd(); + bool at_least_one_can_push_down = false; + for (const auto& child : expr->children()) { + if (_build_search_argument(child, builder)) { + at_least_one_can_push_down = true; + } + } + DCHECK(at_least_one_can_push_down); + builder->end(); + break; } - OrcPredicate& predicate = predicates[index]; - switch (predicate.op) { - case SQLFilterOp::FILTER_IN: { - if (predicate.literals.size() == 1) { - builder->equals(predicate.col_name, predicate.data_type, predicate.literals[0]); - } else { - builder->in(predicate.col_name, predicate.data_type, predicate.literals); + case TExprOpcode::COMPOUND_OR: { + builder->startOr(); + bool all_can_push_down = true; + for (const auto& child : expr->children()) { + if (!_build_search_argument(child, builder)) { + all_can_push_down = false; + } } + DCHECK(all_can_push_down); + builder->end(); break; } - case SQLFilterOp::FILTER_LESS: - builder->lessThan(predicate.col_name, predicate.data_type, predicate.literals[0]); + case TExprOpcode::COMPOUND_NOT: { + DCHECK_EQ(expr->children().size(), 1); + builder->startNot(); + auto res = _build_search_argument(expr->children()[0], builder); + DCHECK(res); + builder->end(); break; - case SQLFilterOp::FILTER_LESS_OR_EQUAL: - builder->lessThanEquals(predicate.col_name, predicate.data_type, predicate.literals[0]); + } + case TExprOpcode::GE: + builder->startNot(); + _build_less_than(expr, builder); + builder->end(); break; - case SQLFilterOp::FILTER_LARGER: { + case TExprOpcode::GT: builder->startNot(); - builder->lessThanEquals(predicate.col_name, predicate.data_type, predicate.literals[0]); + _build_less_than_equals(expr, builder); builder->end(); break; - } - case SQLFilterOp::FILTER_LARGER_OR_EQUAL: { + case TExprOpcode::LE: + _build_less_than_equals(expr, builder); + break; + case TExprOpcode::LT: + _build_less_than(expr, builder); + break; + case TExprOpcode::EQ: + _build_equals(expr, builder); + break; + case TExprOpcode::NE: builder->startNot(); - builder->lessThan(predicate.col_name, predicate.data_type, predicate.literals[0]); + _build_equals(expr, builder); builder->end(); break; - } - default: - return false; - } - if (index < predicates.size() - 1) { - bool can_build = build_search_argument(predicates, index + 1, builder); - if (!can_build) { - return false; - } + case TExprOpcode::FILTER_IN: + _build_filter_in(expr, builder); + break; + case TExprOpcode::FILTER_NOT_IN: + builder->startNot(); + _build_filter_in(expr, builder); builder->end(); + break; + // is null and is not null is represented as function call + case TExprOpcode::INVALID_OPCODE: + DCHECK(expr->node_type() == TExprNodeType::FUNCTION_CALL); + if (expr->fn().name.function_name == "is_null_pred") { + _build_is_null(expr, builder); + } else if (expr->fn().name.function_name == "is_not_null_pred") { + builder->startNot(); + _build_is_null(expr, builder); + builder->end(); + } else { + // should not reach here, because _check_expr_can_push_down has already checked + __builtin_unreachable(); + } + break; + + default: + // should not reach here, because _check_expr_can_push_down has already checked + __builtin_unreachable(); } return true; } -bool OrcReader::_init_search_argument( - const std::unordered_map* colname_to_value_range) { - if ((!_enable_filter_by_min_max) || colname_to_value_range->empty()) { - return false; - } - std::vector predicates; - auto& root_type = _reader->getType(); - std::unordered_map type_map; - for (int i = 0; i < root_type.getSubtypeCount(); ++i) { - type_map.emplace(get_field_name_lower_case(&root_type, i), root_type.getSubtype(i)); - } - for (auto& col_name : _lazy_read_ctx.all_read_columns) { - auto iter = colname_to_value_range->find(col_name); - if (iter == colname_to_value_range->end()) { - continue; - } - auto type_it = type_map.find(_col_name_to_file_col_name[col_name]); - if (type_it == type_map.end()) { - continue; +bool OrcReader::_init_search_argument(const VExprContextSPtrs& conjuncts) { + // build search argument, if any expr can not be pushed down, return false + auto builder = orc::SearchArgumentFactory::newBuilder(); + bool at_least_one_can_push_down = false; + builder->startAnd(); + for (const auto& expr_ctx : conjuncts) { + _vslot_ref_to_orc_predicate_data_type.clear(); + _vliteral_to_orc_literal.clear(); + if (_build_search_argument(expr_ctx->root(), builder)) { + at_least_one_can_push_down = true; } - std::visit( - [&](auto& range) { - std::vector value_predicates = - value_range_to_predicate(range, type_it->second); - for (auto& range_predicate : value_predicates) { - predicates.emplace_back(range_predicate); - } - }, - iter->second); } - if (predicates.empty()) { + if (!at_least_one_can_push_down) { + // if all exprs can not be pushed down, builder->end() will throw exception return false; } + builder->end(); - if (_is_hive1_orc_or_use_idx) { - // use hive 1.x version orc file, need to convert column name to internal column name - for (OrcPredicate& it : predicates) { - it.col_name = _col_name_to_file_col_name[it.col_name]; - } - } - - // check if all column names in predicates are same as orc file - DCHECK(std::all_of(predicates.begin(), predicates.end(), [&](const OrcPredicate& predicate) { - return type_map.contains(predicate.col_name); - })); - - std::unique_ptr builder = orc::SearchArgumentFactory::newBuilder(); - if (build_search_argument(predicates, 0, builder)) { - std::unique_ptr sargs = builder->build(); - _row_reader_options.searchArgument(std::move(sargs)); - return true; - } else { - return false; - } + auto sargs = builder->build(); + _profile->add_info_string("OrcReader SearchArgument: ", sargs->toString()); + _row_reader_options.searchArgument(std::move(sargs)); + return true; } Status OrcReader::set_fill_columns( @@ -846,7 +1023,7 @@ Status OrcReader::set_fill_columns( visit_slot(child.get()); } } else if (VInPredicate* in_predicate = typeid_cast(filter_impl)) { - if (in_predicate->children().size() > 0) { + if (!in_predicate->children().empty()) { visit_slot(in_predicate->children()[0].get()); } } else { @@ -865,12 +1042,25 @@ Status OrcReader::set_fill_columns( visit_slot(conjunct->root().get()); } + if (_is_acid) { + _lazy_read_ctx.predicate_orc_columns.insert( + _lazy_read_ctx.predicate_orc_columns.end(), + TransactionalHive::READ_ROW_COLUMN_NAMES.begin(), + TransactionalHive::READ_ROW_COLUMN_NAMES.end()); + } + for (auto& read_col : _read_cols_lower_case) { _lazy_read_ctx.all_read_columns.emplace_back(read_col); if (predicate_columns.size() > 0) { auto iter = predicate_columns.find(read_col); if (iter == predicate_columns.end()) { - _lazy_read_ctx.lazy_read_columns.emplace_back(read_col); + if (!_is_acid || + std::find(TransactionalHive::READ_ROW_COLUMN_NAMES_LOWER_CASE.begin(), + TransactionalHive::READ_ROW_COLUMN_NAMES_LOWER_CASE.end(), + read_col) == + TransactionalHive::READ_ROW_COLUMN_NAMES_LOWER_CASE.end()) { + _lazy_read_ctx.lazy_read_columns.emplace_back(read_col); + } } else { _lazy_read_ctx.predicate_columns.first.emplace_back(iter->first); _lazy_read_ctx.predicate_columns.second.emplace_back(iter->second.second); @@ -926,8 +1116,22 @@ Status OrcReader::set_fill_columns( _lazy_read_ctx.can_lazy_read = true; } - if (_colname_to_value_range == nullptr || !_init_search_argument(_colname_to_value_range)) { + if (_lazy_read_ctx.conjuncts.empty()) { _lazy_read_ctx.can_lazy_read = false; + } else if (_enable_filter_by_min_max) { + auto res = _init_search_argument(_lazy_read_ctx.conjuncts); + if (_state->query_options().check_orc_init_sargs_success && !res) { + std::stringstream ss; + for (const auto& conjunct : _lazy_read_ctx.conjuncts) { + ss << conjunct->root()->debug_string() << "\n"; + } + std::string conjuncts_str = ss.str(); + return Status::InternalError( + "Session variable check_orc_init_sargs_success is set, but " + "_init_search_argument returns false because all exprs can not be pushed " + "down:\n " + + conjuncts_str); + } } try { _row_reader_options.range(_range_start_offset, _range_size); @@ -1069,7 +1273,8 @@ Status OrcReader::_fill_partition_columns( if (num_deserialized != rows) { return Status::InternalError( "Failed to fill partition column: {}={} ." - "Number of rows expected to be written : {}, number of rows actually written : " + "Number of rows expected to be written : {}, number of rows actually " + "written : " "{}", slot_desc->col_name(), value, num_deserialized, rows); } @@ -1675,6 +1880,12 @@ std::string OrcReader::get_field_name_lower_case(const orc::Type* orc_type, int Status OrcReader::get_next_block(Block* block, size_t* read_rows, bool* eof) { RETURN_IF_ERROR(get_next_block_impl(block, read_rows, eof)); + if (*eof) { + COUNTER_UPDATE(_orc_profile.selected_row_group_count, + _reader_metrics.SelectedRowGroupCount); + COUNTER_UPDATE(_orc_profile.evaluated_row_group_count, + _reader_metrics.EvaluatedRowGroupCount); + } if (_orc_filter) { RETURN_IF_ERROR(_orc_filter->get_status()); } diff --git a/be/src/vec/exec/format/orc/vorc_reader.h b/be/src/vec/exec/format/orc/vorc_reader.h index 4ebfb68a22f347..98c31645b41183 100644 --- a/be/src/vec/exec/format/orc/vorc_reader.h +++ b/be/src/vec/exec/format/orc/vorc_reader.h @@ -18,9 +18,9 @@ #pragma once #include -#include -#include +#include +#include #include #include #include @@ -41,6 +41,7 @@ #include "orc/Reader.hh" #include "orc/Type.hh" #include "orc/Vector.hh" +#include "orc/sargs/Literal.hh" #include "runtime/types.h" #include "util/runtime_profile.h" #include "vec/aggregate_functions/aggregate_function.h" @@ -51,6 +52,8 @@ #include "vec/exec/format/format_common.h" #include "vec/exec/format/generic_reader.h" #include "vec/exec/format/table/transactional_hive_reader.h" +#include "vec/exprs/vliteral.h" +#include "vec/exprs/vslot_ref.h" namespace doris { class RuntimeState; @@ -80,13 +83,6 @@ namespace doris::vectorized { class ORCFileInputStream; -struct OrcPredicate { - std::string col_name; - orc::PredicateDataType data_type; - std::vector literals; - SQLFilterOp op; -}; - struct LazyReadContext { VExprContextSPtrs conjuncts; bool can_lazy_read = false; @@ -227,6 +223,8 @@ class OrcReader : public GenericReader { RuntimeProfile::Counter* decode_value_time = nullptr; RuntimeProfile::Counter* decode_null_map_time = nullptr; RuntimeProfile::Counter* filter_block_time = nullptr; + RuntimeProfile::Counter* selected_row_group_count = nullptr; + RuntimeProfile::Counter* evaluated_row_group_count = nullptr; }; class ORCFilterImpl : public orc::ORCFilter { @@ -287,11 +285,30 @@ class OrcReader : public GenericReader { void _init_orc_cols(const orc::Type& type, std::vector& orc_cols, std::vector& orc_cols_lower_case, std::unordered_map& type_map, - bool* is_hive1_orc); + bool* is_hive1_orc, bool should_add_acid_prefix) const; static bool _check_acid_schema(const orc::Type& type); static const orc::Type& _remove_acid(const orc::Type& type); - bool _init_search_argument( - const std::unordered_map* colname_to_value_range); + + // functions for building search argument until _init_search_argument + std::tuple _make_orc_literal( + const VSlotRef* slot_ref, const VLiteral* literal); + bool _check_slot_can_push_down(const VExprSPtr& expr); + bool _check_literal_can_push_down(const VExprSPtr& expr, uint16_t child_id); + bool _check_rest_children_can_push_down(const VExprSPtr& expr); + bool _check_expr_can_push_down(const VExprSPtr& expr); + void _build_less_than(const VExprSPtr& expr, + std::unique_ptr& builder); + void _build_less_than_equals(const VExprSPtr& expr, + std::unique_ptr& builder); + void _build_equals(const VExprSPtr& expr, std::unique_ptr& builder); + void _build_filter_in(const VExprSPtr& expr, + std::unique_ptr& builder); + void _build_is_null(const VExprSPtr& expr, + std::unique_ptr& builder); + bool _build_search_argument(const VExprSPtr& expr, + std::unique_ptr& builder); + bool _init_search_argument(const VExprContextSPtrs& conjuncts); + void _init_bloom_filter( std::unordered_map* colname_to_value_range); void _init_system_properties(); @@ -584,6 +601,7 @@ class OrcReader : public GenericReader { std::unique_ptr _file_input_stream; Statistics _statistics; OrcProfile _orc_profile; + orc::ReaderMetrics _reader_metrics; std::unique_ptr _batch; std::unique_ptr _reader; @@ -629,6 +647,9 @@ class OrcReader : public GenericReader { std::unordered_map _table_col_to_file_col; //support iceberg position delete . std::vector* _position_delete_ordered_rowids = nullptr; + std::unordered_map + _vslot_ref_to_orc_predicate_data_type; + std::unordered_map _vliteral_to_orc_literal; // If you set "orc_tiny_stripe_threshold_bytes" = 0, the use tiny stripes merge io optimization will not be used. int64_t _orc_tiny_stripe_threshold_bytes = 8L * 1024L * 1024L; diff --git a/be/test/exec/test_data/orc_scanner/orders.orc b/be/test/exec/test_data/orc_scanner/orders.orc new file mode 100644 index 00000000000000..6fad5043288d43 Binary files /dev/null and b/be/test/exec/test_data/orc_scanner/orders.orc differ diff --git a/be/test/testutil/desc_tbl_builder.cpp b/be/test/testutil/desc_tbl_builder.cpp index 4cba9a44a4b0d1..6404d1c5449165 100644 --- a/be/test/testutil/desc_tbl_builder.cpp +++ b/be/test/testutil/desc_tbl_builder.cpp @@ -17,20 +17,9 @@ #include "testutil/desc_tbl_builder.h" -#include -#include -#include +#include -#include - -#include "common/object_pool.h" #include "common/status.h" -#include "gtest/gtest_pred_impl.h" -#include "runtime/define_primitive_type.h" -#include "runtime/descriptors.h" -#include "util/bit_util.h" - -using std::vector; namespace doris { @@ -44,7 +33,7 @@ TupleDescBuilder& DescriptorTblBuilder::declare_tuple() { // item_id of -1 indicates no itemTupleId static TSlotDescriptor make_slot_descriptor(int id, int parent_id, const TypeDescriptor& type, - int slot_idx, int item_id) { + const std::string& name, int slot_idx, int item_id) { int null_byte = slot_idx / 8; int null_bit = slot_idx % 8; TSlotDescriptor slot_desc; @@ -58,6 +47,7 @@ static TSlotDescriptor make_slot_descriptor(int id, int parent_id, const TypeDes slot_desc.__set_nullIndicatorBit(null_bit); slot_desc.__set_slotIdx(slot_idx); slot_desc.__set_isMaterialized(true); + slot_desc.__set_colName(name); // if (item_id != -1) { // slot_desc.__set_itemTupleId(item_id); // } @@ -78,8 +68,9 @@ DescriptorTbl* DescriptorTblBuilder::build() { int tuple_id = 0; int slot_id = 0; - for (int i = 0; i < _tuples_descs.size(); ++i) { - build_tuple(_tuples_descs[i]->slot_types(), &thrift_desc_tbl, &tuple_id, &slot_id); + for (auto& _tuples_desc : _tuples_descs) { + build_tuple(_tuples_desc->slot_types(), _tuples_desc->slot_names(), &thrift_desc_tbl, + &tuple_id, &slot_id); } Status status = DescriptorTbl::create(_obj_pool, thrift_desc_tbl, &desc_tbl); @@ -87,7 +78,8 @@ DescriptorTbl* DescriptorTblBuilder::build() { return desc_tbl; } -TTupleDescriptor DescriptorTblBuilder::build_tuple(const vector& slot_types, +TTupleDescriptor DescriptorTblBuilder::build_tuple(const std::vector& slot_types, + const std::vector& slot_names, TDescriptorTable* thrift_desc_tbl, int* next_tuple_id, int* slot_id) { // We never materialize struct slots (there's no in-memory representation of structs, @@ -95,7 +87,8 @@ TTupleDescriptor DescriptorTblBuilder::build_tuple(const vector& // still have a struct item type. In this case, the array item tuple contains the // "inlined" struct fields. if (slot_types.size() == 1 && slot_types[0].type == TYPE_STRUCT) { - return build_tuple(slot_types[0].children, thrift_desc_tbl, next_tuple_id, slot_id); + return build_tuple(slot_types[0].children, slot_types[0].field_names, thrift_desc_tbl, + next_tuple_id, slot_id); } int tuple_id = *next_tuple_id; @@ -111,7 +104,7 @@ TTupleDescriptor DescriptorTblBuilder::build_tuple(const vector& // } thrift_desc_tbl->slotDescriptors.push_back( - make_slot_descriptor(*slot_id, tuple_id, slot_types[i], i, item_id)); + make_slot_descriptor(*slot_id, tuple_id, slot_types[i], slot_names[i], i, item_id)); thrift_desc_tbl->__isset.slotDescriptors = true; ++(*slot_id); } diff --git a/be/test/testutil/desc_tbl_builder.h b/be/test/testutil/desc_tbl_builder.h index c29ef9acd43eb5..968b29bd00143c 100644 --- a/be/test/testutil/desc_tbl_builder.h +++ b/be/test/testutil/desc_tbl_builder.h @@ -20,15 +20,16 @@ #include +#include #include +#include "common/object_pool.h" +#include "runtime/descriptors.h" #include "runtime/types.h" namespace doris { -class ObjectPool; class TupleDescBuilder; -class DescriptorTbl; // Aids in the construction of a DescriptorTbl by declaring tuples and slots // associated with those tuples. @@ -40,6 +41,7 @@ class DescriptorTbl; // DescriptorTblBuilder builder; // builder.declare_tuple() << TYPE_TINYINT << TYPE_TIMESTAMP; // gets TupleId 0 // builder.declare_tuple() << TYPE_FLOAT; // gets TupleId 1 +// builder.declare_tuple() << std::make_tuple(TYPE_INT, "col1") << std::make_tuple(TYPE_STRING, "col2"); // gets Tuple with type and name // DescriptorTbl desc_tbl = builder.build(); class DescriptorTblBuilder { public: @@ -57,20 +59,31 @@ class DescriptorTblBuilder { std::vector _tuples_descs; TTupleDescriptor build_tuple(const std::vector& slot_types, + const std::vector& slot_names, TDescriptorTable* thrift_desc_tbl, int* tuple_id, int* slot_id); }; class TupleDescBuilder { public: + using SlotType = std::tuple; + TupleDescBuilder& operator<<(const SlotType& slot) { + _slot_types.push_back(std::get<0>(slot)); + _slot_names.push_back(std::get<1>(slot)); + return *this; + } + TupleDescBuilder& operator<<(const TypeDescriptor& slot_type) { _slot_types.push_back(slot_type); + _slot_names.emplace_back(""); return *this; } std::vector slot_types() const { return _slot_types; } + std::vector slot_names() const { return _slot_names; } private: std::vector _slot_types; + std::vector _slot_names; }; } // end namespace doris diff --git a/be/test/vec/exec/orc/orc_convert_to_orc_literal_test.cpp b/be/test/vec/exec/orc/orc_convert_to_orc_literal_test.cpp index ac79f22a6bbc8c..947fb957ee57ae 100644 --- a/be/test/vec/exec/orc/orc_convert_to_orc_literal_test.cpp +++ b/be/test/vec/exec/orc/orc_convert_to_orc_literal_test.cpp @@ -41,7 +41,7 @@ TEST_F(OrcReaderConvertToOrcLiteralTest, ConvertTypesTest) { StringRef literal_data(reinterpret_cast(&tiny_value), sizeof(tiny_value)); auto orc_type_ptr = createPrimitiveType(orc::TypeKind::BYTE); auto [success, literal] = - convert_to_orc_literal(orc_type_ptr.get(), literal_data.data, 0, 0); + convert_to_orc_literal(orc_type_ptr.get(), literal_data, 0, 0); ASSERT_TRUE(success); ASSERT_EQ(literal.getLong(), 127); } @@ -52,7 +52,7 @@ TEST_F(OrcReaderConvertToOrcLiteralTest, ConvertTypesTest) { StringRef literal_data(reinterpret_cast(&small_value), sizeof(small_value)); auto orc_type_ptr = createPrimitiveType(orc::TypeKind::SHORT); auto [success, literal] = - convert_to_orc_literal(orc_type_ptr.get(), literal_data.data, 0, 0); + convert_to_orc_literal(orc_type_ptr.get(), literal_data, 0, 0); ASSERT_TRUE(success); ASSERT_EQ(literal.getLong(), 32000); } @@ -63,7 +63,7 @@ TEST_F(OrcReaderConvertToOrcLiteralTest, ConvertTypesTest) { StringRef literal_data(reinterpret_cast(&int_value), sizeof(int_value)); auto orc_type_ptr = createPrimitiveType(orc::TypeKind::INT); auto [success, literal] = - convert_to_orc_literal(orc_type_ptr.get(), literal_data.data, 0, 0); + convert_to_orc_literal(orc_type_ptr.get(), literal_data, 0, 0); ASSERT_TRUE(success); ASSERT_EQ(literal.getLong(), 2147483647); } @@ -74,7 +74,7 @@ TEST_F(OrcReaderConvertToOrcLiteralTest, ConvertTypesTest) { StringRef literal_data(reinterpret_cast(&big_value), sizeof(big_value)); auto orc_type_ptr = createPrimitiveType(orc::TypeKind::LONG); auto [success, literal] = - convert_to_orc_literal(orc_type_ptr.get(), literal_data.data, 0, 0); + convert_to_orc_literal(orc_type_ptr.get(), literal_data, 0, 0); ASSERT_TRUE(success); ASSERT_EQ(literal.getLong(), 9223372036854775807LL); } @@ -84,7 +84,7 @@ TEST_F(OrcReaderConvertToOrcLiteralTest, ConvertTypesTest) { StringRef literal_data(reinterpret_cast(&float_value), sizeof(float_value)); auto orc_type_ptr = createPrimitiveType(orc::TypeKind::FLOAT); auto [success, literal] = - convert_to_orc_literal(orc_type_ptr.get(), literal_data.data, 0, 0); + convert_to_orc_literal(orc_type_ptr.get(), literal_data, 0, 0); ASSERT_TRUE(success); ASSERT_NEAR(literal.getFloat(), 3.14159f, 0.0001); } @@ -95,7 +95,7 @@ TEST_F(OrcReaderConvertToOrcLiteralTest, ConvertTypesTest) { StringRef literal_data(reinterpret_cast(&double_value), sizeof(double_value)); auto orc_type_ptr = createPrimitiveType(orc::TypeKind::DOUBLE); auto [success, literal] = - convert_to_orc_literal(orc_type_ptr.get(), literal_data.data, 0, 0); + convert_to_orc_literal(orc_type_ptr.get(), literal_data, 0, 0); ASSERT_TRUE(success); ASSERT_DOUBLE_EQ(literal.getFloat(), 3.14159265358979323846); } @@ -105,12 +105,24 @@ TEST_F(OrcReaderConvertToOrcLiteralTest, ConvertTypesTest) { StringRef literal_data(str_value.data(), str_value.size()); auto orc_type_ptr = createPrimitiveType(orc::TypeKind::STRING); auto [success, literal] = - convert_to_orc_literal(orc_type_ptr.get(), (void*)&literal_data, 0, 0); + convert_to_orc_literal(orc_type_ptr.get(), literal_data, 0, 0); ASSERT_TRUE(success); ASSERT_EQ(std::string(literal.getString().data(), literal.getString().length()), "Hello, World!"); } + // VARCHAR test + { + std::string str_value = "VARCHAR test"; + StringRef literal_data(str_value.data(), str_value.size()); + auto orc_type_ptr = createPrimitiveType(orc::TypeKind::VARCHAR); + auto [success, literal] = + convert_to_orc_literal(orc_type_ptr.get(), literal_data, 0, 0); + ASSERT_TRUE(success); + ASSERT_EQ(std::string(literal.getString().data(), literal.getString().length()), + "VARCHAR test"); + } + // DECIMAL32 test { int32_t decimal32_value = 12345; @@ -118,7 +130,7 @@ TEST_F(OrcReaderConvertToOrcLiteralTest, ConvertTypesTest) { sizeof(decimal32_value)); auto orc_type_ptr = createPrimitiveType(orc::TypeKind::DECIMAL); auto [success, literal] = - convert_to_orc_literal(orc_type_ptr.get(), literal_data.data, 9, 4); + convert_to_orc_literal(orc_type_ptr.get(), literal_data, 9, 4); ASSERT_TRUE(success); ASSERT_EQ(literal.getDecimal().toString(), "1.2345"); } @@ -129,8 +141,8 @@ TEST_F(OrcReaderConvertToOrcLiteralTest, ConvertTypesTest) { StringRef literal_data(reinterpret_cast(&decimal64_value), sizeof(decimal64_value)); auto orc_type_ptr = createPrimitiveType(orc::TypeKind::DECIMAL); - auto [success, literal] = convert_to_orc_literal(orc_type_ptr.get(), - literal_data.data, 18, 6); + auto [success, literal] = + convert_to_orc_literal(orc_type_ptr.get(), literal_data, 18, 6); ASSERT_TRUE(success); ASSERT_EQ(literal.getDecimal().toString(), "123456789.012345"); } @@ -141,8 +153,8 @@ TEST_F(OrcReaderConvertToOrcLiteralTest, ConvertTypesTest) { StringRef literal_data(reinterpret_cast(&decimal128_value), sizeof(decimal128_value)); auto orc_type_ptr = createPrimitiveType(orc::TypeKind::DECIMAL); - auto [success, literal] = convert_to_orc_literal( - orc_type_ptr.get(), literal_data.data, 38, 9); + auto [success, literal] = + convert_to_orc_literal(orc_type_ptr.get(), literal_data, 38, 9); ASSERT_TRUE(success); ASSERT_EQ(literal.getDecimal().toString(), "1.234512345"); } @@ -154,7 +166,7 @@ TEST_F(OrcReaderConvertToOrcLiteralTest, ConvertTypesTest) { StringRef literal_data(reinterpret_cast(&date_value), sizeof(date_value)); auto orc_type_ptr = createPrimitiveType(orc::TypeKind::DATE); auto [success, literal] = - convert_to_orc_literal(orc_type_ptr.get(), literal_data.data, 0, 0); + convert_to_orc_literal(orc_type_ptr.get(), literal_data, 0, 0); ASSERT_TRUE(success); // Verify converted day offset @@ -165,7 +177,7 @@ TEST_F(OrcReaderConvertToOrcLiteralTest, ConvertTypesTest) { date_value.from_date_str("0001-01-01", 10); literal_data = StringRef(reinterpret_cast(&date_value), sizeof(date_value)); std::tie(success, literal) = - convert_to_orc_literal(orc_type_ptr.get(), literal_data.data, 0, 0); + convert_to_orc_literal(orc_type_ptr.get(), literal_data, 0, 0); ASSERT_TRUE(success); //-719162 ASSERT_EQ(literal.getDate(), -719162); @@ -173,7 +185,7 @@ TEST_F(OrcReaderConvertToOrcLiteralTest, ConvertTypesTest) { date_value.from_date_str("9999-12-31", 10); literal_data = StringRef(reinterpret_cast(&date_value), sizeof(date_value)); std::tie(success, literal) = - convert_to_orc_literal(orc_type_ptr.get(), literal_data.data, 0, 0); + convert_to_orc_literal(orc_type_ptr.get(), literal_data, 0, 0); ASSERT_TRUE(success); // ASSERT_EQ(literal.getDate(), 2932896); } @@ -187,7 +199,7 @@ TEST_F(OrcReaderConvertToOrcLiteralTest, ConvertTypesTest) { sizeof(datetime_value)); auto orc_type_ptr = createPrimitiveType(orc::TypeKind::TIMESTAMP); auto [success, literal] = - convert_to_orc_literal(orc_type_ptr.get(), literal_data.data, 0, 0); + convert_to_orc_literal(orc_type_ptr.get(), literal_data, 0, 0); ASSERT_TRUE(success); // Verify seconds and nanoseconds @@ -198,7 +210,7 @@ TEST_F(OrcReaderConvertToOrcLiteralTest, ConvertTypesTest) { literal_data = StringRef(reinterpret_cast(&datetime_value), sizeof(datetime_value)); std::tie(success, literal) = - convert_to_orc_literal(orc_type_ptr.get(), literal_data.data, 0, 0); + convert_to_orc_literal(orc_type_ptr.get(), literal_data, 0, 0); ASSERT_TRUE(success); ASSERT_EQ(literal.getTimestamp().getMillis(), 1710374400000); // @@ -207,10 +219,307 @@ TEST_F(OrcReaderConvertToOrcLiteralTest, ConvertTypesTest) { literal_data = StringRef(reinterpret_cast(&datetime_value), sizeof(datetime_value)); std::tie(success, literal) = - convert_to_orc_literal(orc_type_ptr.get(), literal_data.data, 0, 0); + convert_to_orc_literal(orc_type_ptr.get(), literal_data, 0, 0); ASSERT_TRUE(success); ASSERT_EQ(literal.getTimestamp().getMillis(), 1709208000000); // } + + // Type mismatch test + { + // Try to convert INT type to STRING + int32_t int_value = 42; + StringRef literal_data(reinterpret_cast(&int_value), sizeof(int_value)); + auto orc_type_ptr = createPrimitiveType(orc::TypeKind::STRING); + auto [success, literal] = + convert_to_orc_literal(orc_type_ptr.get(), literal_data, 0, 0); + ASSERT_FALSE(success); + } + + // Try to convert FLOAT to DOUBLE + { + float float_value = 3.14f; + StringRef literal_data(reinterpret_cast(&float_value), sizeof(float_value)); + auto orc_type_ptr = createPrimitiveType(orc::TypeKind::DOUBLE); + auto [success, literal] = + convert_to_orc_literal(orc_type_ptr.get(), literal_data, 0, 0); + ASSERT_FALSE(success); + } + + // Try to convert DATE to TIMESTAMP + { + VecDateTimeValue date_value; + date_value.from_date_str("2024-03-14", 10); + StringRef literal_data(reinterpret_cast(&date_value), sizeof(date_value)); + auto orc_type_ptr = createPrimitiveType(orc::TypeKind::TIMESTAMP); + auto [success, literal] = + convert_to_orc_literal(orc_type_ptr.get(), literal_data, 0, 0); + ASSERT_FALSE(success); + } + + { + // TINYINT -> other integer types + int8_t tiny_value = 42; + StringRef literal_data(reinterpret_cast(&tiny_value), sizeof(tiny_value)); + + // TINYINT -> SHORT + { + auto orc_type_ptr = createPrimitiveType(orc::TypeKind::SHORT); + auto [success, literal] = + convert_to_orc_literal(orc_type_ptr.get(), literal_data, 0, 0); + ASSERT_TRUE(success); + } + + // TINYINT -> INT + { + auto orc_type_ptr = createPrimitiveType(orc::TypeKind::INT); + auto [success, literal] = + convert_to_orc_literal(orc_type_ptr.get(), literal_data, 0, 0); + ASSERT_TRUE(success); + } + + // TINYINT -> LONG + { + auto orc_type_ptr = createPrimitiveType(orc::TypeKind::LONG); + auto [success, literal] = + convert_to_orc_literal(orc_type_ptr.get(), literal_data, 0, 0); + ASSERT_TRUE(success); + } + } + + // 2. Converting between floating point and integer types + { + // FLOAT -> integer types + float float_value = 3.14f; + StringRef literal_data(reinterpret_cast(&float_value), sizeof(float_value)); + + // FLOAT -> INT + { + auto orc_type_ptr = createPrimitiveType(orc::TypeKind::INT); + auto [success, literal] = + convert_to_orc_literal(orc_type_ptr.get(), literal_data, 0, 0); + ASSERT_FALSE(success); + } + + // FLOAT -> LONG + { + auto orc_type_ptr = createPrimitiveType(orc::TypeKind::LONG); + auto [success, literal] = + convert_to_orc_literal(orc_type_ptr.get(), literal_data, 0, 0); + ASSERT_FALSE(success); + } + + // INT -> FLOAT + int32_t int_value = 42; + literal_data = StringRef(reinterpret_cast(&int_value), sizeof(int_value)); + { + auto orc_type_ptr = createPrimitiveType(orc::TypeKind::FLOAT); + auto [success, literal] = + convert_to_orc_literal(orc_type_ptr.get(), literal_data, 0, 0); + ASSERT_FALSE(success); + } + } + + // 3. Conversion between string and numeric types + { + // STRING -> numeric types + std::string str_value = "123"; + StringRef literal_data(str_value.data(), str_value.size()); + + // STRING -> INT + { + auto orc_type_ptr = createPrimitiveType(orc::TypeKind::INT); + auto [success, literal] = + convert_to_orc_literal(orc_type_ptr.get(), literal_data, 0, 0); + ASSERT_FALSE(success); + } + + // STRING -> FLOAT + { + auto orc_type_ptr = createPrimitiveType(orc::TypeKind::FLOAT); + auto [success, literal] = + convert_to_orc_literal(orc_type_ptr.get(), literal_data, 0, 0); + ASSERT_FALSE(success); + } + + // INT -> STRING + int32_t int_value = 42; + literal_data = StringRef(reinterpret_cast(&int_value), sizeof(int_value)); + { + auto orc_type_ptr = createPrimitiveType(orc::TypeKind::STRING); + auto [success, literal] = + convert_to_orc_literal(orc_type_ptr.get(), literal_data, 0, 0); + ASSERT_FALSE(success); + } + } + + // 4. Conversion between date/time types and other types + { + // DATE -> other types + VecDateTimeValue date_value; + date_value.from_date_str("2024-03-14", 10); + StringRef literal_data(reinterpret_cast(&date_value), sizeof(date_value)); + + // DATE -> INT + { + auto orc_type_ptr = createPrimitiveType(orc::TypeKind::INT); + auto [success, literal] = + convert_to_orc_literal(orc_type_ptr.get(), literal_data, 0, 0); + ASSERT_FALSE(success); + } + + // DATE -> STRING + { + auto orc_type_ptr = createPrimitiveType(orc::TypeKind::STRING); + auto [success, literal] = + convert_to_orc_literal(orc_type_ptr.get(), literal_data, 0, 0); + ASSERT_FALSE(success); + } + + // INT -> DATE + int32_t int_value = 42; + literal_data = StringRef(reinterpret_cast(&int_value), sizeof(int_value)); + { + auto orc_type_ptr = createPrimitiveType(orc::TypeKind::DATE); + auto [success, literal] = + convert_to_orc_literal(orc_type_ptr.get(), literal_data, 0, 0); + ASSERT_FALSE(success); + } + } + + // 5. Conversion between Decimal and other types + { + // DECIMAL -> other types + int128_t decimal_value = 123456789; + StringRef literal_data(reinterpret_cast(&decimal_value), + sizeof(decimal_value)); + + // DECIMAL -> INT + { + auto orc_type_ptr = createPrimitiveType(orc::TypeKind::INT); + auto [success, literal] = convert_to_orc_literal(orc_type_ptr.get(), + literal_data, 0, 0); + ASSERT_FALSE(success); + } + + // DECIMAL -> FLOAT + { + auto orc_type_ptr = createPrimitiveType(orc::TypeKind::FLOAT); + auto [success, literal] = convert_to_orc_literal(orc_type_ptr.get(), + literal_data, 0, 0); + ASSERT_FALSE(success); + } + + // INT -> DECIMAL + int32_t int_value = 42; + literal_data = StringRef(reinterpret_cast(&int_value), sizeof(int_value)); + { + auto orc_type_ptr = createPrimitiveType(orc::TypeKind::DECIMAL); + auto [success, literal] = + convert_to_orc_literal(orc_type_ptr.get(), literal_data, 10, 2); + ASSERT_FALSE(success); + } + } + + // 6. Conversion between BOOLEAN and other types + { + // BOOLEAN -> other types + uint8_t bool_value = true; + StringRef literal_data(reinterpret_cast(&bool_value), sizeof(bool_value)); + + // BOOLEAN -> INT + { + auto orc_type_ptr = createPrimitiveType(orc::TypeKind::INT); + auto [success, literal] = + convert_to_orc_literal(orc_type_ptr.get(), literal_data, 0, 0); + ASSERT_FALSE(success); + } + + // BOOLEAN -> STRING + { + auto orc_type_ptr = createPrimitiveType(orc::TypeKind::STRING); + auto [success, literal] = + convert_to_orc_literal(orc_type_ptr.get(), literal_data, 0, 0); + ASSERT_FALSE(success); + } + + // INT -> BOOLEAN + int32_t int_value = 1; + literal_data = StringRef(reinterpret_cast(&int_value), sizeof(int_value)); + { + auto orc_type_ptr = createPrimitiveType(orc::TypeKind::BOOLEAN); + auto [success, literal] = + convert_to_orc_literal(orc_type_ptr.get(), literal_data, 0, 0); + ASSERT_FALSE(success); + } + } + + // 7. Conversion between TIMESTAMP and other types + { + // TIMESTAMP -> other types + VecDateTimeValue datetime_value; + datetime_value.from_date_str("2024-03-14 15:30:45", 19); + StringRef literal_data(reinterpret_cast(&datetime_value), + sizeof(datetime_value)); + + // TIMESTAMP -> INT + { + auto orc_type_ptr = createPrimitiveType(orc::TypeKind::INT); + auto [success, literal] = + convert_to_orc_literal(orc_type_ptr.get(), literal_data, 0, 0); + ASSERT_FALSE(success); + } + + // TIMESTAMP -> STRING + { + auto orc_type_ptr = createPrimitiveType(orc::TypeKind::STRING); + auto [success, literal] = + convert_to_orc_literal(orc_type_ptr.get(), literal_data, 0, 0); + ASSERT_FALSE(success); + } + + // INT -> TIMESTAMP + int64_t int_value = 1615737045; + literal_data = StringRef(reinterpret_cast(&int_value), sizeof(int_value)); + { + auto orc_type_ptr = createPrimitiveType(orc::TypeKind::TIMESTAMP); + auto [success, literal] = + convert_to_orc_literal(orc_type_ptr.get(), literal_data, 0, 0); + ASSERT_FALSE(success); + } + } + + // 8. Conversion between VARCHAR and other types + { + // VARCHAR -> other types + std::string varchar_value = "test string"; + StringRef literal_data(varchar_value.data(), varchar_value.size()); + + // VARCHAR -> INT + { + auto orc_type_ptr = createPrimitiveType(orc::TypeKind::INT); + auto [success, literal] = + convert_to_orc_literal(orc_type_ptr.get(), literal_data, 0, 0); + ASSERT_FALSE(success); + } + + // VARCHAR -> DECIMAL + { + auto orc_type_ptr = createPrimitiveType(orc::TypeKind::DECIMAL); + auto [success, literal] = + convert_to_orc_literal(orc_type_ptr.get(), literal_data, 10, 2); + ASSERT_FALSE(success); + } + + // INT -> VARCHAR + int32_t int_value = 42; + literal_data = StringRef(reinterpret_cast(&int_value), sizeof(int_value)); + { + auto orc_type_ptr = createPrimitiveType(orc::TypeKind::VARCHAR); + auto [success, literal] = + convert_to_orc_literal(orc_type_ptr.get(), literal_data, 0, 0); + ASSERT_FALSE(success); + } + } } } // namespace vectorized } // namespace doris diff --git a/be/test/vec/exec/orc_reader_test.cpp b/be/test/vec/exec/orc_reader_test.cpp new file mode 100644 index 00000000000000..ff7452ae625428 --- /dev/null +++ b/be/test/vec/exec/orc_reader_test.cpp @@ -0,0 +1,161 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include + +#include +#include +#include +#include + +#include "orc/sargs/SearchArgument.hh" +#include "runtime/define_primitive_type.h" +#include "runtime/exec_env.h" +#include "runtime/runtime_state.h" +#include "testutil/desc_tbl_builder.h" +#include "vec/exec/format/orc/orc_memory_pool.h" +#include "vec/exec/format/orc/vorc_reader.h" +#include "vec/exprs/vexpr_context.h" +#include "vec/exprs/vexpr_fwd.h" +#include "vec/utils/util.hpp" +namespace doris::vectorized { +class OrcReaderTest : public testing::Test { +public: + OrcReaderTest() = default; + ~OrcReaderTest() override = default; + +private: + static constexpr const char* CANNOT_PUSH_DOWN_ERROR = "can't push down"; + std::string build_search_argument(const std::string& expr) { + // build orc_reader for table orders + std::vector column_names = { + "o_orderkey", "o_custkey", "o_orderstatus", "o_totalprice", "o_orderdate", + "o_orderpriority", "o_clerk", "o_shippriority", "o_comment"}; + ObjectPool object_pool; + DescriptorTblBuilder builder(&object_pool); + builder.declare_tuple() << std::make_tuple(TYPE_INT, "o_orderkey") + << std::make_tuple(TYPE_INT, "o_custkey") + << std::make_tuple(TYPE_STRING, "o_orderstatus") + << std::make_tuple(TYPE_DOUBLE, "o_totalprice") + << std::make_tuple(TYPE_DATE, "o_orderdate") + << std::make_tuple(TYPE_STRING, "o_orderpriority") + << std::make_tuple(TYPE_STRING, "o_clerk") + << std::make_tuple(TYPE_INT, "o_shippriority") + << std::make_tuple(TYPE_STRING, "o_comment"); + DescriptorTbl* desc_tbl = builder.build(); + auto* tuple_desc = const_cast(desc_tbl->get_tuple_descriptor(0)); + RowDescriptor row_desc(tuple_desc, false); + TFileScanRangeParams params; + TFileRangeDesc range; + range.path = "./be/test/exec/test_data/orc_scanner/orders.orc"; + range.start_offset = 0; + range.size = 1293; + auto reader = OrcReader::create_unique(params, range, "", nullptr, true); + auto status = reader->init_reader(&column_names, nullptr, {}, false, tuple_desc, &row_desc, + nullptr, nullptr); + EXPECT_TRUE(status.ok()); + + // deserialize expr + auto exprx = apache::thrift::from_json_string(expr); + VExprContextSPtr context; + status = VExpr::create_expr_tree(exprx, context); + EXPECT_TRUE(status.ok()); + + // prepare expr context + RuntimeState state; + state.set_desc_tbl(desc_tbl); + status = context->prepare(&state, row_desc); + EXPECT_TRUE(status.ok()); + + // build search argument + auto sarg_builder = orc::SearchArgumentFactory::newBuilder(); + auto res = reader->_build_search_argument(context->root(), sarg_builder); + if (!res) { + return CANNOT_PUSH_DOWN_ERROR; + } + return sarg_builder->build()->toString(); + } +}; + +TEST_F(OrcReaderTest, test_build_search_argument) { + ExecEnv::GetInstance()->set_orc_memory_pool(new ORCMemoryPool()); + std::vector + exprs = + { + // select count(o_orderkey) from tpch1_orc.orders where o_orderkey < 100 or o_orderkey > 5999900 or o_orderkey in (1000000, 2000000, 3000000); + R"|({"1":{"lst":["rec",13,{"1":{"i32":6},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":3},"4":{"i32":2},"20":{"i32":-1},"29":{"tf":1}},{"1":{"i32":6},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":3},"4":{"i32":2},"20":{"i32":-1},"29":{"tf":1}},{"1":{"i32":2},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":11},"4":{"i32":2},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"lt"}}},"2":{"i32":0},"3":{"lst":["rec",2,{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}},{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}]},"4":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"5":{"tf":0},"7":{"str":"lt(int, int)"},"11":{"i64":0},"13":{"tf":1},"14":{"tf":0},"15":{"tf":0},"16":{"i64":360}}},"28":{"i32":5},"29":{"tf":1}},{"1":{"i32":16},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"15":{"rec":{"1":{"i32":0},"2":{"i32":0},"3":{"i32":-1}}},"20":{"i32":-1},"29":{"tf":1},"36":{"str":"o_orderkey"}},{"1":{"i32":9},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"10":{"rec":{"1":{"i64":100}}},"20":{"i32":-1},"29":{"tf":0}},{"1":{"i32":2},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":13},"4":{"i32":2},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"gt"}}},"2":{"i32":0},"3":{"lst":["rec",2,{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}},{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}]},"4":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"5":{"tf":0},"7":{"str":"gt(int, int)"},"11":{"i64":0},"13":{"tf":1},"14":{"tf":0},"15":{"tf":0},"16":{"i64":360}}},"28":{"i32":5},"29":{"tf":1}},{"1":{"i32":16},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"15":{"rec":{"1":{"i32":0},"2":{"i32":0},"3":{"i32":-1}}},"20":{"i32":-1},"29":{"tf":1},"36":{"str":"o_orderkey"}},{"1":{"i32":9},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"10":{"rec":{"1":{"i64":5999900}}},"20":{"i32":-1},"29":{"tf":0}},{"1":{"i32":11},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":5},"4":{"i32":4},"11":{"rec":{"1":{"tf":0}}},"20":{"i32":-1},"29":{"tf":1}},{"1":{"i32":16},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"15":{"rec":{"1":{"i32":0},"2":{"i32":0},"3":{"i32":-1}}},"20":{"i32":-1},"29":{"tf":1},"36":{"str":"o_orderkey"}},{"1":{"i32":9},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"10":{"rec":{"1":{"i64":1000000}}},"20":{"i32":-1},"29":{"tf":0}},{"1":{"i32":9},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"10":{"rec":{"1":{"i64":2000000}}},"20":{"i32":-1},"29":{"tf":0}},{"1":{"i32":9},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"10":{"rec":{"1":{"i64":3000000}}},"20":{"i32":-1},"29":{"tf":0}}]}})|", + // select count(o_orderkey) from tpch1_orc.orders where o_orderkey is null or (o_orderkey between 100 and 1000 and o_orderkey not in (200, 300, 400)); + R"|({"1":{"lst":["rec",16,{"1":{"i32":6},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":3},"4":{"i32":2},"20":{"i32":-1},"29":{"tf":1}},{"1":{"i32":20},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"4":{"i32":1},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"is_null_pred"}}},"2":{"i32":0},"3":{"lst":["rec",1,{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}]},"4":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"5":{"tf":0},"7":{"str":"is_null_pred(int)"},"11":{"i64":0},"13":{"tf":1},"14":{"tf":0},"15":{"tf":0},"16":{"i64":360}}},"29":{"tf":0}},{"1":{"i32":16},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"15":{"rec":{"1":{"i32":0},"2":{"i32":0},"3":{"i32":-1}}},"20":{"i32":-1},"29":{"tf":1},"36":{"str":"o_orderkey"}},{"1":{"i32":6},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":2},"4":{"i32":2},"20":{"i32":-1},"29":{"tf":1}},{"1":{"i32":6},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":2},"4":{"i32":2},"20":{"i32":-1},"29":{"tf":1}},{"1":{"i32":2},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":14},"4":{"i32":2},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"ge"}}},"2":{"i32":0},"3":{"lst":["rec",2,{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}},{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}]},"4":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"5":{"tf":0},"7":{"str":"ge(int, int)"},"11":{"i64":0},"13":{"tf":1},"14":{"tf":0},"15":{"tf":0},"16":{"i64":360}}},"28":{"i32":5},"29":{"tf":1}},{"1":{"i32":16},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"15":{"rec":{"1":{"i32":0},"2":{"i32":0},"3":{"i32":-1}}},"20":{"i32":-1},"29":{"tf":1},"36":{"str":"o_orderkey"}},{"1":{"i32":9},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"10":{"rec":{"1":{"i64":100}}},"20":{"i32":-1},"29":{"tf":0}},{"1":{"i32":2},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":12},"4":{"i32":2},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"le"}}},"2":{"i32":0},"3":{"lst":["rec",2,{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}},{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}]},"4":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"5":{"tf":0},"7":{"str":"le(int, int)"},"11":{"i64":0},"13":{"tf":1},"14":{"tf":0},"15":{"tf":0},"16":{"i64":360}}},"28":{"i32":5},"29":{"tf":1}},{"1":{"i32":16},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"15":{"rec":{"1":{"i32":0},"2":{"i32":0},"3":{"i32":-1}}},"20":{"i32":-1},"29":{"tf":1},"36":{"str":"o_orderkey"}},{"1":{"i32":9},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"10":{"rec":{"1":{"i64":1000}}},"20":{"i32":-1},"29":{"tf":0}},{"1":{"i32":11},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":6},"4":{"i32":4},"11":{"rec":{"1":{"tf":1}}},"20":{"i32":-1},"29":{"tf":1}},{"1":{"i32":16},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"15":{"rec":{"1":{"i32":0},"2":{"i32":0},"3":{"i32":-1}}},"20":{"i32":-1},"29":{"tf":1},"36":{"str":"o_orderkey"}},{"1":{"i32":9},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"10":{"rec":{"1":{"i64":200}}},"20":{"i32":-1},"29":{"tf":0}},{"1":{"i32":9},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"10":{"rec":{"1":{"i64":300}}},"20":{"i32":-1},"29":{"tf":0}},{"1":{"i32":9},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"10":{"rec":{"1":{"i64":400}}},"20":{"i32":-1},"29":{"tf":0}}]}})|", + // select count(o_orderkey) from tpch1_orc.orders where o_orderkey is null or (o_orderkey between 1000000 and 1200000 and o_orderkey != 1100000); + R"|({"1":{"lst":["rec",14,{"1":{"i32":6},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":3},"4":{"i32":2},"20":{"i32":-1},"29":{"tf":1}},{"1":{"i32":20},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"4":{"i32":1},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"is_null_pred"}}},"2":{"i32":0},"3":{"lst":["rec",1,{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}]},"4":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"5":{"tf":0},"7":{"str":"is_null_pred(int)"},"11":{"i64":0},"13":{"tf":1},"14":{"tf":0},"15":{"tf":0},"16":{"i64":360}}},"29":{"tf":0}},{"1":{"i32":16},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"15":{"rec":{"1":{"i32":0},"2":{"i32":0},"3":{"i32":-1}}},"20":{"i32":-1},"29":{"tf":1},"36":{"str":"o_orderkey"}},{"1":{"i32":6},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":2},"4":{"i32":2},"20":{"i32":-1},"29":{"tf":1}},{"1":{"i32":6},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":2},"4":{"i32":2},"20":{"i32":-1},"29":{"tf":1}},{"1":{"i32":2},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":14},"4":{"i32":2},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"ge"}}},"2":{"i32":0},"3":{"lst":["rec",2,{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}},{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}]},"4":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"5":{"tf":0},"7":{"str":"ge(int, int)"},"11":{"i64":0},"13":{"tf":1},"14":{"tf":0},"15":{"tf":0},"16":{"i64":360}}},"28":{"i32":5},"29":{"tf":1}},{"1":{"i32":16},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"15":{"rec":{"1":{"i32":0},"2":{"i32":0},"3":{"i32":-1}}},"20":{"i32":-1},"29":{"tf":1},"36":{"str":"o_orderkey"}},{"1":{"i32":9},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"10":{"rec":{"1":{"i64":1000000}}},"20":{"i32":-1},"29":{"tf":0}},{"1":{"i32":2},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":12},"4":{"i32":2},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"le"}}},"2":{"i32":0},"3":{"lst":["rec",2,{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}},{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}]},"4":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"5":{"tf":0},"7":{"str":"le(int, int)"},"11":{"i64":0},"13":{"tf":1},"14":{"tf":0},"15":{"tf":0},"16":{"i64":360}}},"28":{"i32":5},"29":{"tf":1}},{"1":{"i32":16},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"15":{"rec":{"1":{"i32":0},"2":{"i32":0},"3":{"i32":-1}}},"20":{"i32":-1},"29":{"tf":1},"36":{"str":"o_orderkey"}},{"1":{"i32":9},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"10":{"rec":{"1":{"i64":1200000}}},"20":{"i32":-1},"29":{"tf":0}},{"1":{"i32":2},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":10},"4":{"i32":2},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"ne"}}},"2":{"i32":0},"3":{"lst":["rec",2,{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}},{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}]},"4":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"5":{"tf":0},"7":{"str":"ne(int, int)"},"11":{"i64":0},"13":{"tf":1},"14":{"tf":0},"15":{"tf":0},"16":{"i64":360}}},"28":{"i32":5},"29":{"tf":1}},{"1":{"i32":16},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"15":{"rec":{"1":{"i32":0},"2":{"i32":0},"3":{"i32":-1}}},"20":{"i32":-1},"29":{"tf":1},"36":{"str":"o_orderkey"}},{"1":{"i32":9},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"10":{"rec":{"1":{"i64":1100000}}},"20":{"i32":-1},"29":{"tf":0}}]}})|", + // SELECT count(o_orderkey) FROM tpch1_orc.orders WHERE o_orderkey IN (1000000, 2000000, 3000000) OR (o_orderdate >= '1994-01-01' AND o_orderdate <= '1994-12-31'); + R"|({"1":{"lst":["rec",13,{"1":{"i32":6},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":3},"4":{"i32":2},"20":{"i32":-1},"29":{"tf":1}},{"1":{"i32":11},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":5},"4":{"i32":4},"11":{"rec":{"1":{"tf":0}}},"20":{"i32":-1},"29":{"tf":1}},{"1":{"i32":16},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"15":{"rec":{"1":{"i32":0},"2":{"i32":0},"3":{"i32":-1}}},"20":{"i32":-1},"29":{"tf":1},"36":{"str":"o_orderkey"}},{"1":{"i32":9},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"10":{"rec":{"1":{"i64":1000000}}},"20":{"i32":-1},"29":{"tf":0}},{"1":{"i32":9},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"10":{"rec":{"1":{"i64":2000000}}},"20":{"i32":-1},"29":{"tf":0}},{"1":{"i32":9},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"10":{"rec":{"1":{"i64":3000000}}},"20":{"i32":-1},"29":{"tf":0}},{"1":{"i32":6},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":2},"4":{"i32":2},"20":{"i32":-1},"29":{"tf":1}},{"1":{"i32":2},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":14},"4":{"i32":2},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"ge"}}},"2":{"i32":0},"3":{"lst":["rec",2,{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":26}}}}]},"3":{"i64":-1}},{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":26}}}}]},"3":{"i64":-1}}]},"4":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"5":{"tf":0},"7":{"str":"ge(datev2, datev2)"},"11":{"i64":0},"13":{"tf":1},"14":{"tf":0},"15":{"tf":0},"16":{"i64":360}}},"28":{"i32":26},"29":{"tf":1}},{"1":{"i32":16},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":26}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"15":{"rec":{"1":{"i32":4},"2":{"i32":0},"3":{"i32":-1}}},"20":{"i32":-1},"29":{"tf":1},"36":{"str":"o_orderdate"}},{"1":{"i32":7},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":26}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"8":{"rec":{"1":{"str":"1994-01-01"}}},"20":{"i32":-1},"29":{"tf":0}},{"1":{"i32":2},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":12},"4":{"i32":2},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"le"}}},"2":{"i32":0},"3":{"lst":["rec",2,{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":26}}}}]},"3":{"i64":-1}},{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":26}}}}]},"3":{"i64":-1}}]},"4":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"5":{"tf":0},"7":{"str":"le(datev2, datev2)"},"11":{"i64":0},"13":{"tf":1},"14":{"tf":0},"15":{"tf":0},"16":{"i64":360}}},"28":{"i32":26},"29":{"tf":1}},{"1":{"i32":16},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":26}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"15":{"rec":{"1":{"i32":4},"2":{"i32":0},"3":{"i32":-1}}},"20":{"i32":-1},"29":{"tf":1},"36":{"str":"o_orderdate"}},{"1":{"i32":7},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":26}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"8":{"rec":{"1":{"str":"1994-12-31"}}},"20":{"i32":-1},"29":{"tf":0}}]}})|", + // select count(o_orderkey) from tpch1_orc.orders where o_orderkey < 2 or (o_comment like '%delayed%' and o_orderpriority = '1-URGENT'); + R"|({"1":{"lst":["rec",11,{"1":{"i32":6},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":3},"4":{"i32":2},"20":{"i32":-1},"29":{"tf":1}},{"1":{"i32":2},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":11},"4":{"i32":2},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"lt"}}},"2":{"i32":0},"3":{"lst":["rec",2,{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}},{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}]},"4":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"5":{"tf":0},"7":{"str":"lt(int, int)"},"11":{"i64":0},"13":{"tf":1},"14":{"tf":0},"15":{"tf":0},"16":{"i64":360}}},"28":{"i32":5},"29":{"tf":1}},{"1":{"i32":16},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"15":{"rec":{"1":{"i32":0},"2":{"i32":0},"3":{"i32":-1}}},"20":{"i32":-1},"29":{"tf":1},"36":{"str":"o_orderkey"}},{"1":{"i32":9},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"10":{"rec":{"1":{"i64":2}}},"20":{"i32":-1},"29":{"tf":0}},{"1":{"i32":6},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":2},"4":{"i32":2},"20":{"i32":-1},"29":{"tf":1}},{"1":{"i32":20},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"4":{"i32":2},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"like"}}},"2":{"i32":0},"3":{"lst":["rec",2,{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":23},"2":{"i32":2147483643}}}}]},"3":{"i64":-1}},{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":15},"2":{"i32":9}}}}]},"3":{"i64":9}}]},"4":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"5":{"tf":0},"7":{"str":"like(text, varchar(9))"},"9":{"rec":{"1":{"str":""}}},"11":{"i64":0},"13":{"tf":1},"14":{"tf":0},"15":{"tf":0},"16":{"i64":360}}},"29":{"tf":1}},{"1":{"i32":16},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":23},"2":{"i32":2147483643}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"15":{"rec":{"1":{"i32":8},"2":{"i32":0},"3":{"i32":-1}}},"20":{"i32":-1},"29":{"tf":1},"36":{"str":"o_comment"}},{"1":{"i32":17},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":15},"2":{"i32":65533}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"16":{"rec":{"1":{"str":"%delayed%"}}},"20":{"i32":-1},"29":{"tf":0}},{"1":{"i32":2},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":9},"4":{"i32":2},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"eq"}}},"2":{"i32":0},"3":{"lst":["rec",2,{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":23},"2":{"i32":2147483643}}}}]},"3":{"i64":-1}},{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":15},"2":{"i32":65533}}}}]},"3":{"i64":-1}}]},"4":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"5":{"tf":0},"7":{"str":"eq(text, varchar(65533))"},"11":{"i64":0},"13":{"tf":1},"14":{"tf":0},"15":{"tf":0},"16":{"i64":360}}},"28":{"i32":23},"29":{"tf":1}},{"1":{"i32":16},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":23},"2":{"i32":2147483643}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"15":{"rec":{"1":{"i32":5},"2":{"i32":0},"3":{"i32":-1}}},"20":{"i32":-1},"29":{"tf":1},"36":{"str":"o_orderpriority"}},{"1":{"i32":17},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":15},"2":{"i32":65533}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"16":{"rec":{"1":{"str":"1-URGENT"}}},"20":{"i32":-1},"29":{"tf":0}}]}})|", + // select count(o_orderkey) from tpch1_orc.orders where o_orderkey < 2 or (o_totalprice < 173665.47 and o_custkey >= 36901); + R"|({"1":{"lst":["rec",11,{"1":{"i32":6},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":3},"4":{"i32":2},"20":{"i32":-1},"29":{"tf":1}},{"1":{"i32":2},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":11},"4":{"i32":2},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"lt"}}},"2":{"i32":0},"3":{"lst":["rec",2,{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}},{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}]},"4":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"5":{"tf":0},"7":{"str":"lt(int, int)"},"11":{"i64":0},"13":{"tf":1},"14":{"tf":0},"15":{"tf":0},"16":{"i64":360}}},"28":{"i32":5},"29":{"tf":1}},{"1":{"i32":16},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"15":{"rec":{"1":{"i32":0},"2":{"i32":0},"3":{"i32":-1}}},"20":{"i32":-1},"29":{"tf":1},"36":{"str":"o_orderkey"}},{"1":{"i32":9},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"10":{"rec":{"1":{"i64":2}}},"20":{"i32":-1},"29":{"tf":0}},{"1":{"i32":6},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":2},"4":{"i32":2},"20":{"i32":-1},"29":{"tf":1}},{"1":{"i32":2},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":11},"4":{"i32":2},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"lt"}}},"2":{"i32":0},"3":{"lst":["rec",2,{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":30},"3":{"i32":12},"4":{"i32":2}}}}]},"3":{"i64":-1}},{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":30},"3":{"i32":12},"4":{"i32":2}}}}]},"3":{"i64":-1}}]},"4":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"5":{"tf":0},"7":{"str":"lt(decimalv3(12,2), decimalv3(12,2))"},"11":{"i64":0},"13":{"tf":1},"14":{"tf":0},"15":{"tf":0},"16":{"i64":360}}},"28":{"i32":30},"29":{"tf":1}},{"1":{"i32":16},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":30},"3":{"i32":12},"4":{"i32":2}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"15":{"rec":{"1":{"i32":3},"2":{"i32":0},"3":{"i32":-1}}},"20":{"i32":-1},"29":{"tf":1},"36":{"str":"o_totalprice"}},{"1":{"i32":10},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":30},"3":{"i32":12},"4":{"i32":2}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"19":{"rec":{"1":{"str":"173665.47"}}},"20":{"i32":-1},"29":{"tf":0}},{"1":{"i32":2},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":14},"4":{"i32":2},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"ge"}}},"2":{"i32":0},"3":{"lst":["rec",2,{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}},{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}]},"4":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"5":{"tf":0},"7":{"str":"ge(int, int)"},"11":{"i64":0},"13":{"tf":1},"14":{"tf":0},"15":{"tf":0},"16":{"i64":360}}},"28":{"i32":5},"29":{"tf":1}},{"1":{"i32":16},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"15":{"rec":{"1":{"i32":1},"2":{"i32":0},"3":{"i32":-1}}},"20":{"i32":-1},"29":{"tf":1},"36":{"str":"o_custkey"}},{"1":{"i32":9},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"10":{"rec":{"1":{"i64":36901}}},"20":{"i32":-1},"29":{"tf":0}}]}})|", + // select count(o_orderkey) from tpch1_orc.orders where o_orderkey < 1 + 1; + R"|({"1":{"lst":["rec",3,{"1":{"i32":2},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":11},"4":{"i32":2},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"lt"}}},"2":{"i32":0},"3":{"lst":["rec",2,{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}},{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}]},"4":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"5":{"tf":0},"7":{"str":"lt(int, int)"},"11":{"i64":0},"13":{"tf":1},"14":{"tf":0},"15":{"tf":0},"16":{"i64":360}}},"28":{"i32":5},"29":{"tf":1}},{"1":{"i32":16},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"15":{"rec":{"1":{"i32":0},"2":{"i32":0},"3":{"i32":-1}}},"20":{"i32":-1},"29":{"tf":1},"36":{"str":"o_orderkey"}},{"1":{"i32":9},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"10":{"rec":{"1":{"i64":2}}},"20":{"i32":-1},"29":{"tf":0}}]}})|", + // select count(o_orderkey) from tpch1_orc.orders where o_orderkey in (null, 25); + R"|({"1":{"lst":["rec",4,{"1":{"i32":11},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":5},"4":{"i32":3},"11":{"rec":{"1":{"tf":0}}},"20":{"i32":-1},"29":{"tf":1}},{"1":{"i32":16},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"15":{"rec":{"1":{"i32":0},"2":{"i32":0},"3":{"i32":-1}}},"20":{"i32":-1},"29":{"tf":1},"36":{"str":"o_orderkey"}},{"1":{"i32":15},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"20":{"i32":-1},"29":{"tf":1}},{"1":{"i32":9},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"10":{"rec":{"1":{"i64":25}}},"20":{"i32":-1},"29":{"tf":0}}]}})|", + // SELECT count(o_orderkey) FROM tpch1_orc.orders WHERE o_comment LIKE '%delayed%' OR o_orderpriority = '1-URGENT'; + R"|({"1":{"lst":["rec",7,{"1":{"i32":6},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":3},"4":{"i32":2},"20":{"i32":-1},"29":{"tf":1}},{"1":{"i32":20},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"4":{"i32":2},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"like"}}},"2":{"i32":0},"3":{"lst":["rec",2,{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":23},"2":{"i32":2147483643}}}}]},"3":{"i64":-1}},{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":15},"2":{"i32":9}}}}]},"3":{"i64":9}}]},"4":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"5":{"tf":0},"7":{"str":"like(text, varchar(9))"},"9":{"rec":{"1":{"str":""}}},"11":{"i64":0},"13":{"tf":1},"14":{"tf":0},"15":{"tf":0},"16":{"i64":360}}},"29":{"tf":1}},{"1":{"i32":16},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":23},"2":{"i32":2147483643}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"15":{"rec":{"1":{"i32":8},"2":{"i32":0},"3":{"i32":-1}}},"20":{"i32":-1},"29":{"tf":1},"36":{"str":"o_comment"}},{"1":{"i32":17},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":15},"2":{"i32":65533}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"16":{"rec":{"1":{"str":"%delayed%"}}},"20":{"i32":-1},"29":{"tf":0}},{"1":{"i32":2},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":9},"4":{"i32":2},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"eq"}}},"2":{"i32":0},"3":{"lst":["rec",2,{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":23},"2":{"i32":2147483643}}}}]},"3":{"i64":-1}},{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":15},"2":{"i32":65533}}}}]},"3":{"i64":-1}}]},"4":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"5":{"tf":0},"7":{"str":"eq(text, varchar(65533))"},"11":{"i64":0},"13":{"tf":1},"14":{"tf":0},"15":{"tf":0},"16":{"i64":360}}},"28":{"i32":23},"29":{"tf":1}},{"1":{"i32":16},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":23},"2":{"i32":2147483643}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"15":{"rec":{"1":{"i32":5},"2":{"i32":0},"3":{"i32":-1}}},"20":{"i32":-1},"29":{"tf":1},"36":{"str":"o_orderpriority"}},{"1":{"i32":17},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":15},"2":{"i32":65533}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"16":{"rec":{"1":{"str":"1-URGENT"}}},"20":{"i32":-1},"29":{"tf":0}}]}})|", + // select count(o_orderkey) from tpch1_orc.orders where o_orderkey between 1 and 100 or random() > 0.5; + R"|({"1":{"lst":["rec",11,{"1":{"i32":6},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":3},"4":{"i32":2},"20":{"i32":-1},"29":{"tf":1}},{"1":{"i32":6},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":2},"4":{"i32":2},"20":{"i32":-1},"29":{"tf":1}},{"1":{"i32":2},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":14},"4":{"i32":2},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"ge"}}},"2":{"i32":0},"3":{"lst":["rec",2,{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}},{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}]},"4":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"5":{"tf":0},"7":{"str":"ge(int, int)"},"11":{"i64":0},"13":{"tf":1},"14":{"tf":0},"15":{"tf":0},"16":{"i64":360}}},"28":{"i32":5},"29":{"tf":1}},{"1":{"i32":16},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"15":{"rec":{"1":{"i32":0},"2":{"i32":0},"3":{"i32":-1}}},"20":{"i32":-1},"29":{"tf":1},"36":{"str":"o_orderkey"}},{"1":{"i32":9},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"10":{"rec":{"1":{"i64":1}}},"20":{"i32":-1},"29":{"tf":0}},{"1":{"i32":2},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":12},"4":{"i32":2},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"le"}}},"2":{"i32":0},"3":{"lst":["rec",2,{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}},{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}]},"4":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"5":{"tf":0},"7":{"str":"le(int, int)"},"11":{"i64":0},"13":{"tf":1},"14":{"tf":0},"15":{"tf":0},"16":{"i64":360}}},"28":{"i32":5},"29":{"tf":1}},{"1":{"i32":16},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"15":{"rec":{"1":{"i32":0},"2":{"i32":0},"3":{"i32":-1}}},"20":{"i32":-1},"29":{"tf":1},"36":{"str":"o_orderkey"}},{"1":{"i32":9},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"10":{"rec":{"1":{"i64":100}}},"20":{"i32":-1},"29":{"tf":0}},{"1":{"i32":2},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":13},"4":{"i32":2},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"gt"}}},"2":{"i32":0},"3":{"lst":["rec",2,{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":8}}}}]},"3":{"i64":-1}},{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":8}}}}]},"3":{"i64":-1}}]},"4":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"5":{"tf":0},"7":{"str":"gt(double, double)"},"11":{"i64":0},"13":{"tf":1},"14":{"tf":0},"15":{"tf":0},"16":{"i64":360}}},"28":{"i32":8},"29":{"tf":0}},{"1":{"i32":20},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":8}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"random"}}},"2":{"i32":0},"3":{"lst":["rec",0]},"4":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":8}}}}]},"3":{"i64":-1}}},"5":{"tf":0},"7":{"str":"random()"},"9":{"rec":{"1":{"str":""}}},"11":{"i64":0},"13":{"tf":1},"14":{"tf":0},"15":{"tf":0},"16":{"i64":360}}},"29":{"tf":0}},{"1":{"i32":8},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":8}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"9":{"rec":{"1":{"dbl":0.5}}},"20":{"i32":-1},"29":{"tf":0}}]}})|", + // select count(o_orderkey) from tpch1_orc.orders where lower(o_orderpriority) = '1-urgent'; + R"|({"1":{"lst":["rec",4,{"1":{"i32":2},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":9},"4":{"i32":2},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"eq"}}},"2":{"i32":0},"3":{"lst":["rec",2,{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":15},"2":{"i32":65533}}}}]},"3":{"i64":-1}},{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":15},"2":{"i32":65533}}}}]},"3":{"i64":-1}}]},"4":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"5":{"tf":0},"7":{"str":"eq(varchar(65533), varchar(65533))"},"11":{"i64":0},"13":{"tf":1},"14":{"tf":0},"15":{"tf":0},"16":{"i64":360}}},"28":{"i32":15},"29":{"tf":1}},{"1":{"i32":20},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":15},"2":{"i32":65533}}}}]},"3":{"i64":-1}}},"4":{"i32":1},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"lower"}}},"2":{"i32":0},"3":{"lst":["rec",1,{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":23},"2":{"i32":2147483643}}}}]},"3":{"i64":-1}}]},"4":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":15},"2":{"i32":65533}}}}]},"3":{"i64":-1}}},"5":{"tf":0},"7":{"str":"lower(text)"},"9":{"rec":{"1":{"str":""}}},"11":{"i64":0},"13":{"tf":1},"14":{"tf":0},"15":{"tf":0},"16":{"i64":360}}},"29":{"tf":1}},{"1":{"i32":16},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":23},"2":{"i32":2147483643}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"15":{"rec":{"1":{"i32":5},"2":{"i32":0},"3":{"i32":-1}}},"20":{"i32":-1},"29":{"tf":1},"36":{"str":"o_orderpriority"}},{"1":{"i32":17},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":15},"2":{"i32":65533}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"16":{"rec":{"1":{"str":"1-urgent"}}},"20":{"i32":-1},"29":{"tf":0}}]}})|", + // select count(o_orderkey) from tpch1_orc.orders where o_orderkey * 2 < 60; + R"|({"1":{"lst":["rec",5,{"1":{"i32":2},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":11},"4":{"i32":2},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"lt"}}},"2":{"i32":0},"3":{"lst":["rec",2,{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":6}}}}]},"3":{"i64":-1}},{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":6}}}}]},"3":{"i64":-1}}]},"4":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"5":{"tf":0},"7":{"str":"lt(bigint, bigint)"},"11":{"i64":0},"13":{"tf":1},"14":{"tf":0},"15":{"tf":0},"16":{"i64":360}}},"28":{"i32":6},"29":{"tf":1}},{"1":{"i32":1},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":6}}}}]},"3":{"i64":-1}}},"3":{"i32":55},"4":{"i32":2},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"multiply"}}},"2":{"i32":0},"3":{"lst":["rec",2,{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}},{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":3}}}}]},"3":{"i64":-1}}]},"4":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":6}}}}]},"3":{"i64":-1}}},"5":{"tf":0},"7":{"str":"multiply(int, tinyint)"},"11":{"i64":0},"13":{"tf":1},"14":{"tf":0},"15":{"tf":0},"16":{"i64":360}}},"29":{"tf":1}},{"1":{"i32":16},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"15":{"rec":{"1":{"i32":0},"2":{"i32":0},"3":{"i32":-1}}},"20":{"i32":-1},"29":{"tf":1},"36":{"str":"o_orderkey"}},{"1":{"i32":9},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":3}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"10":{"rec":{"1":{"i64":2}}},"20":{"i32":-1},"29":{"tf":0}},{"1":{"i32":9},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":6}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"10":{"rec":{"1":{"i64":60}}},"20":{"i32":-1},"29":{"tf":0}}]}})|", + // select count(o_orderkey) from tpch1_orc.orders where o_orderdate is not null; + R"|({"1":{"lst":["rec",4,{"1":{"i32":2},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":9},"4":{"i32":2},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"eq"}}},"2":{"i32":0},"3":{"lst":["rec",2,{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":27},"3":{"i32":18},"4":{"i32":0}}}}]},"3":{"i64":-1}},{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":27},"3":{"i32":18},"4":{"i32":0}}}}]},"3":{"i64":-1}}]},"4":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"5":{"tf":0},"7":{"str":"eq(datetimev2(0), datetimev2(0))"},"11":{"i64":0},"13":{"tf":1},"14":{"tf":0},"15":{"tf":0},"16":{"i64":360}}},"28":{"i32":27},"29":{"tf":1}},{"1":{"i32":5},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":27},"3":{"i32":18},"4":{"i32":0}}}}]},"3":{"i64":-1}}},"3":{"i32":4},"4":{"i32":1},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"casttodatetimev2"}}},"2":{"i32":0},"3":{"lst":["rec",1,{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":26}}}}]},"3":{"i64":-1}}]},"4":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":27},"3":{"i32":18},"4":{"i32":0}}}}]},"3":{"i64":-1}}},"5":{"tf":0},"7":{"str":"casttodatetimev2(datev2)"},"11":{"i64":0},"13":{"tf":1},"14":{"tf":0},"15":{"tf":0},"16":{"i64":360}}},"29":{"tf":1}},{"1":{"i32":16},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":26}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"15":{"rec":{"1":{"i32":4},"2":{"i32":0},"3":{"i32":-1}}},"20":{"i32":-1},"29":{"tf":1},"36":{"str":"o_orderdate"}},{"1":{"i32":7},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":27},"3":{"i32":18},"4":{"i32":0}}}}]},"3":{"i64":-1}}},"4":{"i32":0},"8":{"rec":{"1":{"str":"2024-11-12 21:13:02"}}},"20":{"i32":-1},"29":{"tf":0}}]}})|", + }; + std::vector result_search_arguments = { + "leaf-0 = (o_orderkey < 100), leaf-1 = (o_orderkey <= 5999900), leaf-2 " + "= (o_orderkey " + "in " + "[1000000, 2000000, 3000000]), expr = (or leaf-0 (not leaf-1) leaf-2)", + "leaf-0 = (o_orderkey is null), leaf-1 = (o_orderkey < 100), leaf-2 = " + "(o_orderkey <= " + "1000), leaf-3 = (o_orderkey in [200, 300, 400]), expr = (and (or " + "leaf-0 (not leaf-1)) " + "(or leaf-0 leaf-2) (or leaf-0 (not leaf-3)))", + "leaf-0 = (o_orderkey is null), leaf-1 = (o_orderkey < 1000000), leaf-2 = (o_orderkey " + "<= 1200000), leaf-3 = (o_orderkey = 1100000), expr = (and (or leaf-0 (not leaf-1)) " + "(or leaf-0 leaf-2) (or leaf-0 (not leaf-3)))", + "leaf-0 = (o_orderkey in [1000000, 2000000, 3000000]), leaf-1 = (o_orderdate < " + "17121205), leaf-2 = (o_orderdate <= 17121205), expr = (and (or leaf-0 (not leaf-1)) " + "(or leaf-0 leaf-2))", + "leaf-0 = (o_orderkey < 2), leaf-1 = (o_orderpriority = 1-URGENT), expr = (or leaf-0 " + "leaf-1)", + "leaf-0 = (o_orderkey < 2), leaf-1 = (o_custkey < 36901), expr = (or leaf-0 (not " + "leaf-1))", + "leaf-0 = (o_orderkey < 2), expr = leaf-0", + "leaf-0 = (o_orderkey = 25), expr = leaf-0", + CANNOT_PUSH_DOWN_ERROR, + CANNOT_PUSH_DOWN_ERROR, + CANNOT_PUSH_DOWN_ERROR, + CANNOT_PUSH_DOWN_ERROR, + CANNOT_PUSH_DOWN_ERROR, + }; + for (int i = 0; i < exprs.size(); i++) { + auto search_argument = build_search_argument(exprs[i]); + ASSERT_EQ(search_argument, result_search_arguments[i]); + } +} + +} // namespace doris::vectorized diff --git a/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/orc_predicate/orc_predicate_table.hql b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/orc_predicate/orc_predicate_table.hql index a946b25ff1af04..6a1c9dce521415 100644 --- a/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/orc_predicate/orc_predicate_table.hql +++ b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/orc_predicate/orc_predicate_table.hql @@ -14,3 +14,25 @@ create table type_changed_table ( ) stored as orc; insert into type_changed_table values (1, 'Alice'), (2, 'Bob'), (3, 'Charlie'); ALTER TABLE type_changed_table CHANGE COLUMN id id STRING; + +CREATE TABLE table_a ( + id INT, + age INT +) STORED AS ORC; + +INSERT INTO table_a VALUES +(1, null), +(2, 18), +(3, null), +(4, 25); + +CREATE TABLE table_b ( + id INT, + age INT +) STORED AS ORC; + +INSERT INTO table_b VALUES +(1, null), +(2, null), +(3, 1000000), +(4, 100); diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java index a2e158f64e6c4a..4f4cea552be70a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java @@ -489,6 +489,8 @@ public class SessionVariable implements Serializable, Writable { public static final String ENABLE_ORC_FILTER_BY_MIN_MAX = "enable_orc_filter_by_min_max"; + public static final String CHECK_ORC_INIT_SARGS_SUCCESS = "check_orc_init_sargs_success"; + public static final String INLINE_CTE_REFERENCED_THRESHOLD = "inline_cte_referenced_threshold"; public static final String ENABLE_CTE_MATERIALIZE = "enable_cte_materialize"; @@ -1872,6 +1874,14 @@ public void setEnableLeftZigZag(boolean enableLeftZigZag) { needForward = true) public boolean enableOrcFilterByMinMax = true; + @VariableMgr.VarAttr( + name = CHECK_ORC_INIT_SARGS_SUCCESS, + description = {"是否检查orc init sargs是否成功。默认为 false。", + "Whether to check whether orc init sargs is successful. " + + "The default value is false."}, + needForward = true) + public boolean checkOrcInitSargsSuccess = false; + @VariableMgr.VarAttr( name = EXTERNAL_TABLE_ANALYZE_PART_NUM, description = {"收集外表统计信息行数时选取的采样分区数,默认 -1 表示全部分区", @@ -3513,6 +3523,14 @@ public void setEnableOrcFilterByMinMax(boolean enableOrcFilterByMinMax) { this.enableOrcFilterByMinMax = enableOrcFilterByMinMax; } + public boolean isCheckOrcInitSargsSuccess() { + return checkOrcInitSargsSuccess; + } + + public void setCheckOrcInitSargsSuccess(boolean checkOrcInitSargsSuccess) { + this.checkOrcInitSargsSuccess = checkOrcInitSargsSuccess; + } + public String getSqlDialect() { return sqlDialect; } @@ -4146,6 +4164,7 @@ public TQueryOptions toThrift() { tResult.setEnableOrcLazyMat(enableOrcLazyMat); tResult.setEnableParquetFilterByMinMax(enableParquetFilterByMinMax); tResult.setEnableOrcFilterByMinMax(enableOrcFilterByMinMax); + tResult.setCheckOrcInitSargsSuccess(checkOrcInitSargsSuccess); tResult.setEnableDeleteSubPredicateV2(enableDeleteSubPredicateV2); tResult.setTruncateCharOrVarcharColumns(truncateCharOrVarcharColumns); diff --git a/gensrc/thrift/PaloInternalService.thrift b/gensrc/thrift/PaloInternalService.thrift index db3edcc937332c..508d64f772ae28 100644 --- a/gensrc/thrift/PaloInternalService.thrift +++ b/gensrc/thrift/PaloInternalService.thrift @@ -362,6 +362,7 @@ struct TQueryOptions { 150: optional bool enable_runtime_filter_partition_prune = true; 163: optional bool inverted_index_compatible_read = false + 164: optional bool check_orc_init_sargs_success = false // upgrade options. keep them same in every branch. 200: optional bool new_is_ip_address_in_range = false; diff --git a/regression-test/data/external_table_p0/hive/test_hive_orc.out b/regression-test/data/external_table_p0/hive/test_hive_orc.out index 066c5d4b4d3b5f..03942dbe9fb979 100644 --- a/regression-test/data/external_table_p0/hive/test_hive_orc.out +++ b/regression-test/data/external_table_p0/hive/test_hive_orc.out @@ -110,6 +110,81 @@ tablets tinyint_col 179 182 182 187 183 181 177 183 177 187 183 202 202 186 528 -- !string_col_dict_plain_mixed3 -- 10240 +-- !predicate_pushdown1 -- +55 + +-- !predicate_pushdown2 -- +228 + +-- !predicate_pushdown3 -- +53 + +-- !predicate_pushdown4 -- +50000 + +-- !predicate_pushdown5 -- +90425 + +-- !predicate_pushdown6 -- +279428 + +-- !predicate_pushdown7 -- +300343 + +-- !predicate_pushdown8 -- +1533 + +-- !predicate_pushdown_in1 -- +0 + +-- !predicate_pushdown_in2 -- +1634 + +-- !predicate_pushdown_in3 -- +1597 + +-- !orc_all_types_tinyint_col_is_null -- +366 + +-- !orc_all_types_smallint_col_is_null -- +359 + +-- !orc_all_types_int_col_is_null -- +361 + +-- !orc_all_types_bigint_col_is_null -- +377 + +-- !orc_all_types_boolean_col_is_null -- +369 + +-- !orc_all_types_float_col_is_null -- +340 + +-- !orc_all_types_double_col_is_null -- +395 + +-- !orc_all_types_string_col_is_null -- +347 + +-- !orc_all_types_binary_col_is_null -- +362 + +-- !orc_all_types_timestamp_col_is_null -- +339 + +-- !orc_all_types_decimal_col_is_null -- +367 + +-- !orc_all_types_char_col_is_null -- +0 + +-- !orc_all_types_varchar_col_is_null -- +0 + +-- !orc_all_types_date_col_is_null -- +378 + -- !select_top50 -- 4 55 999742610 400899305488827731 false 6.5976813E8 7.8723304616937395E17 \N base tennis pit vertical friday 2022-08-19T07:29:58 \N tablets smallint_col 2019-02-07 [7.53124931825377e+17] ["NbSSBtwzpxNSkkwga"] tablets smallint_col 2 49 999613702 105493714032727452 \N 6.3322381E8 9.8642324410240179E17 Unveil bright recruit participate. Suspect impression camera mathematical revelation. Fault live2 elbow debt west hydrogen current. how literary 2022-09-03T17:20:21 481707.1065 tablets boolean_col 2020-01-12 [] ["HoMrAnn", "wteEFvIwoZsVpVQdscMb", null, "zcGFmv", "kGEBBckbMtX", "hrEtCGFdPWZK"] tablets boolean_col @@ -273,6 +348,81 @@ tablets tinyint_col 179 182 182 187 183 181 177 183 177 187 183 202 202 186 528 -- !string_col_dict_plain_mixed3 -- 10240 +-- !predicate_pushdown1 -- +55 + +-- !predicate_pushdown2 -- +228 + +-- !predicate_pushdown3 -- +53 + +-- !predicate_pushdown4 -- +50000 + +-- !predicate_pushdown5 -- +90425 + +-- !predicate_pushdown6 -- +279428 + +-- !predicate_pushdown7 -- +300343 + +-- !predicate_pushdown8 -- +1533 + +-- !predicate_pushdown_in1 -- +0 + +-- !predicate_pushdown_in2 -- +1634 + +-- !predicate_pushdown_in3 -- +1597 + +-- !orc_all_types_tinyint_col_is_null -- +366 + +-- !orc_all_types_smallint_col_is_null -- +359 + +-- !orc_all_types_int_col_is_null -- +361 + +-- !orc_all_types_bigint_col_is_null -- +377 + +-- !orc_all_types_boolean_col_is_null -- +369 + +-- !orc_all_types_float_col_is_null -- +340 + +-- !orc_all_types_double_col_is_null -- +395 + +-- !orc_all_types_string_col_is_null -- +347 + +-- !orc_all_types_binary_col_is_null -- +362 + +-- !orc_all_types_timestamp_col_is_null -- +339 + +-- !orc_all_types_decimal_col_is_null -- +367 + +-- !orc_all_types_char_col_is_null -- +0 + +-- !orc_all_types_varchar_col_is_null -- +0 + +-- !orc_all_types_date_col_is_null -- +378 + -- !select_top50 -- 4 55 999742610 400899305488827731 false 6.5976813E8 7.8723304616937395E17 \N base tennis pit vertical friday 2022-08-19T07:29:58 \N tablets smallint_col 2019-02-07 [7.53124931825377e+17] ["NbSSBtwzpxNSkkwga"] tablets smallint_col 2 49 999613702 105493714032727452 \N 6.3322381E8 9.8642324410240179E17 Unveil bright recruit participate. Suspect impression camera mathematical revelation. Fault live2 elbow debt west hydrogen current. how literary 2022-09-03T17:20:21 481707.1065 tablets boolean_col 2020-01-12 [] ["HoMrAnn", "wteEFvIwoZsVpVQdscMb", null, "zcGFmv", "kGEBBckbMtX", "hrEtCGFdPWZK"] tablets boolean_col diff --git a/regression-test/data/external_table_p0/hive/test_hive_orc_predicate.out b/regression-test/data/external_table_p0/hive/test_hive_orc_predicate.out index 8060ddd620c1a2..7d21967daade2d 100644 --- a/regression-test/data/external_table_p0/hive/test_hive_orc_predicate.out +++ b/regression-test/data/external_table_p0/hive/test_hive_orc_predicate.out @@ -17,6 +17,8 @@ 1 \N 1 \N 3 \N 1 \N +-- !predicate_full_acid_push_down -- + -- !lazy_materialization_for_list_type -- ["c", "a"] ["b", "c"] @@ -77,6 +79,9 @@ 1 \N 1 \N 3 \N 1 \N +-- !predicate_full_acid_push_down -- +2 BB 20230101 + -- !lazy_materialization_for_list_type -- ["c", "a"] ["b", "c"] diff --git a/regression-test/suites/external_table_p0/hive/test_hive_orc.groovy b/regression-test/suites/external_table_p0/hive/test_hive_orc.groovy index 0f837c0abd3088..8d85feaa77ae9a 100644 --- a/regression-test/suites/external_table_p0/hive/test_hive_orc.groovy +++ b/regression-test/suites/external_table_p0/hive/test_hive_orc.groovy @@ -81,6 +81,39 @@ suite("test_hive_orc", "all_types,p0,external,hive,external_docker,external_dock qt_string_col_dict_plain_mixed3 """select count(col2) from string_col_dict_plain_mixed_orc where col1 like '%Test%';""" } + def predicate_pushdown = { + qt_predicate_pushdown1 """ select count(o_orderkey) from tpch1_orc.orders where o_orderkey is not null and (o_orderkey < 100 or o_orderkey > 5999900 or o_orderkey in (1000000, 2000000, 3000000)); """ + qt_predicate_pushdown2 """ select count(o_orderkey) from tpch1_orc.orders where o_orderkey is null or (o_orderkey between 100 and 1000 and o_orderkey not in (200, 300, 400)); """ + qt_predicate_pushdown3 """ select count(o_orderkey) from tpch1_orc.orders where o_orderkey is not null and (o_orderkey < 100 or o_orderkey > 5999900 or o_orderkey = 3000000); """ + qt_predicate_pushdown4 """ select count(o_orderkey) from tpch1_orc.orders where o_orderkey is null or (o_orderkey between 1000000 and 1200000 and o_orderkey != 1100000); """ + qt_predicate_pushdown5 """ SELECT count(o_orderkey) FROM tpch1_orc.orders WHERE (o_orderdate >= '1994-01-01' AND o_orderdate <= '1994-12-31') AND (o_orderpriority = '5-LOW' OR o_orderpriority = '3-MEDIUM') AND o_totalprice > 2000;""" + qt_predicate_pushdown6 """ SELECT count(o_orderkey) FROM tpch1_orc.orders WHERE o_orderstatus <> 'F' AND o_custkey < 54321; """ + qt_predicate_pushdown7 """ SELECT count(o_orderkey) FROM tpch1_orc.orders WHERE o_comment LIKE '%delayed%' OR o_orderpriority = '1-URGENT'; """ + qt_predicate_pushdown8 """ SELECT count(o_orderkey) FROM tpch1_orc.orders WHERE o_orderkey IN (1000000, 2000000, 3000000) OR o_clerk = 'Clerk#000000470'; """ + + qt_predicate_pushdown_in1 """ select count(*) from orc_all_types where boolean_col in (null); """ + qt_predicate_pushdown_in2 """ select count(*) from orc_all_types where boolean_col in (null, 0); """ + qt_predicate_pushdown_in3 """ select count(*) from orc_all_types where boolean_col in (null, 1); """ + + def test_col_is_null = { String col -> + "qt_orc_all_types_${col}_is_null" """ select count(*) from orc_all_types where ${col} is null; """ + } + test_col_is_null("tinyint_col") + test_col_is_null("smallint_col") + test_col_is_null("int_col") + test_col_is_null("bigint_col") + test_col_is_null("boolean_col") + test_col_is_null("float_col") + test_col_is_null("double_col") + test_col_is_null("string_col") + test_col_is_null("binary_col") + test_col_is_null("timestamp_col") + test_col_is_null("decimal_col") + test_col_is_null("char_col") + test_col_is_null("varchar_col") + test_col_is_null("date_col") + } + String enabled = context.config.otherConfigs.get("enableHiveTest") if (enabled == null || !enabled.equalsIgnoreCase("true")) { logger.info("diable Hive test.") @@ -108,6 +141,7 @@ suite("test_hive_orc", "all_types,p0,external,hive,external_docker,external_dock only_partition_col() decimals() string_col_dict_plain_mixed() + predicate_pushdown() sql """drop catalog if exists ${catalog_name}""" diff --git a/regression-test/suites/external_table_p0/hive/test_hive_orc_predicate.groovy b/regression-test/suites/external_table_p0/hive/test_hive_orc_predicate.groovy index 899c2d6ad40265..3f45e6ff439ac3 100644 --- a/regression-test/suites/external_table_p0/hive/test_hive_orc_predicate.groovy +++ b/regression-test/suites/external_table_p0/hive/test_hive_orc_predicate.groovy @@ -43,6 +43,17 @@ suite("test_hive_orc_predicate", "p0,external,hive,external_docker,external_dock qt_predicate_changed_type2 """ select * from type_changed_table where id = '2';""" qt_predicate_changed_type3 """ select * from type_changed_table where id = '3';""" + qt_predicate_null_aware_equal_in_rt """select * from table_a inner join table_b on table_a.age <=> table_b.age and table_b.id in (1,3) order by table_a.id;""" + + // use check_orc_init_sargs_success to test full acid push down + sql """use `${catalog_name}`.`default`""" + if (hivePrefix == "hive3") { + sql """ set check_orc_init_sargs_success = true; """ + qt_predicate_full_acid_push_down """ select * from orc_full_acid_par where value = 'BB' order by id;""" + sql """ set check_orc_init_sargs_success = false; """ + } + + sql """use `${catalog_name}`.`multi_catalog`""" qt_lazy_materialization_for_list_type """ select l from complex_data_orc where id > 2 order by id; """ qt_lazy_materialization_for_map_type """ select m from complex_data_orc where id > 2 order by id; """ qt_lazy_materialization_for_list_and_map_type """ select * from complex_data_orc where id > 2 order by id; """ @@ -50,6 +61,7 @@ suite("test_hive_orc_predicate", "p0,external,hive,external_docker,external_dock sql """drop catalog if exists ${catalog_name}""" } finally { + sql """ set check_orc_init_sargs_success = false; """ } } }