diff --git a/be/src/exec/olap_utils.h b/be/src/exec/olap_utils.h index ddf8562fea1daa..444df52a009f4d 100644 --- a/be/src/exec/olap_utils.h +++ b/be/src/exec/olap_utils.h @@ -104,23 +104,23 @@ inline SQLFilterOp to_olap_filter_type(TExprOpcode::type type, bool opposite) { return FILTER_IN; } -inline SQLFilterOp to_olap_filter_type(const std::string& function_name, bool opposite) { +inline SQLFilterOp to_olap_filter_type(const std::string& function_name) { if (function_name == "lt") { - return opposite ? FILTER_LARGER : FILTER_LESS; + return FILTER_LESS; } else if (function_name == "gt") { - return opposite ? FILTER_LESS : FILTER_LARGER; + return FILTER_LARGER; } else if (function_name == "le") { - return opposite ? FILTER_LARGER_OR_EQUAL : FILTER_LESS_OR_EQUAL; + return FILTER_LESS_OR_EQUAL; } else if (function_name == "ge") { - return opposite ? FILTER_LESS_OR_EQUAL : FILTER_LARGER_OR_EQUAL; + return FILTER_LARGER_OR_EQUAL; } else if (function_name == "eq") { - return opposite ? FILTER_NOT_IN : FILTER_IN; + return FILTER_IN; } else if (function_name == "ne") { - return opposite ? FILTER_IN : FILTER_NOT_IN; + return FILTER_NOT_IN; } else if (function_name == "in") { - return opposite ? FILTER_NOT_IN : FILTER_IN; + return FILTER_IN; } else if (function_name == "not_in") { - return opposite ? FILTER_IN : FILTER_NOT_IN; + return FILTER_NOT_IN; } else { DCHECK(false) << "Function Name: " << function_name; return FILTER_IN; diff --git a/be/src/exprs/create_predicate_function.h b/be/src/exprs/create_predicate_function.h index 422792b2c209f7..2c79566a013f4c 100644 --- a/be/src/exprs/create_predicate_function.h +++ b/be/src/exprs/create_predicate_function.h @@ -268,11 +268,13 @@ std::shared_ptr create_olap_column_predicate( const TabletColumn* column, bool) { // currently only support like predicate if constexpr (PT == TYPE_CHAR) { - return LikeColumnPredicate::create_shared( - filter->_opposite, column_id, filter->_fn_ctx, filter->_string_param); + return LikeColumnPredicate::create_shared(filter->_opposite, column_id, + column->name(), filter->_fn_ctx, + filter->_string_param); } else if constexpr (PT == TYPE_VARCHAR || PT == TYPE_STRING) { - return LikeColumnPredicate::create_shared( - filter->_opposite, column_id, filter->_fn_ctx, filter->_string_param); + return LikeColumnPredicate::create_shared(filter->_opposite, column_id, + column->name(), filter->_fn_ctx, + filter->_string_param); } throw Exception(ErrorCode::INTERNAL_ERROR, "function filter do not support type {}", PT); } diff --git a/be/src/olap/accept_null_predicate.h b/be/src/olap/accept_null_predicate.h index 79792443637894..b223cd3a401aef 100644 --- a/be/src/olap/accept_null_predicate.h +++ b/be/src/olap/accept_null_predicate.h @@ -41,7 +41,8 @@ class AcceptNullPredicate : public ColumnPredicate { public: AcceptNullPredicate(const std::shared_ptr& nested) - : ColumnPredicate(nested->column_id(), nested->primitive_type(), nested->opposite()), + : ColumnPredicate(nested->column_id(), nested->col_name(), nested->primitive_type(), + nested->opposite()), _nested {nested} {} AcceptNullPredicate(const AcceptNullPredicate& other, uint32_t col_id) : ColumnPredicate(other, col_id), diff --git a/be/src/olap/bitmap_filter_predicate.h b/be/src/olap/bitmap_filter_predicate.h index 730233b5c75f91..9afaac4608220e 100644 --- a/be/src/olap/bitmap_filter_predicate.h +++ b/be/src/olap/bitmap_filter_predicate.h @@ -33,9 +33,9 @@ class BitmapFilterColumnPredicate final : public ColumnPredicate { using CppType = typename PrimitiveTypeTraits::CppType; using SpecificFilter = BitmapFilterFunc; - BitmapFilterColumnPredicate(uint32_t column_id, + BitmapFilterColumnPredicate(uint32_t column_id, std::string col_name, const std::shared_ptr& filter) - : ColumnPredicate(column_id, T), + : ColumnPredicate(column_id, col_name, T), _filter(filter), _specific_filter(assert_cast(_filter.get())) {} ~BitmapFilterColumnPredicate() override = default; diff --git a/be/src/olap/bloom_filter_predicate.h b/be/src/olap/bloom_filter_predicate.h index eae433203aef10..e25afc878aa066 100644 --- a/be/src/olap/bloom_filter_predicate.h +++ b/be/src/olap/bloom_filter_predicate.h @@ -35,9 +35,9 @@ class BloomFilterColumnPredicate final : public ColumnPredicate { ENABLE_FACTORY_CREATOR(BloomFilterColumnPredicate); using SpecificFilter = BloomFilterFunc; - BloomFilterColumnPredicate(uint32_t column_id, + BloomFilterColumnPredicate(uint32_t column_id, std::string col_name, const std::shared_ptr& filter) - : ColumnPredicate(column_id, T), + : ColumnPredicate(column_id, col_name, T), _filter(filter), _specific_filter(assert_cast(_filter.get())) {} ~BloomFilterColumnPredicate() override = default; diff --git a/be/src/olap/column_predicate.h b/be/src/olap/column_predicate.h index 47a9bec5bd193d..692729a8987d23 100644 --- a/be/src/olap/column_predicate.h +++ b/be/src/olap/column_predicate.h @@ -197,9 +197,12 @@ struct PredicateTypeTraits { class ColumnPredicate : public std::enable_shared_from_this { public: - explicit ColumnPredicate(uint32_t column_id, PrimitiveType primitive_type, + explicit ColumnPredicate(uint32_t column_id, std::string col_name, PrimitiveType primitive_type, bool opposite = false) - : _column_id(column_id), _primitive_type(primitive_type), _opposite(opposite) { + : _column_id(column_id), + _col_name(col_name), + _primitive_type(primitive_type), + _opposite(opposite) { reset_judge_selectivity(); } ColumnPredicate(const ColumnPredicate& other, uint32_t col_id) : ColumnPredicate(other) { @@ -316,6 +319,7 @@ class ColumnPredicate : public std::enable_shared_from_this { DCHECK(false) << "should not reach here"; } uint32_t column_id() const { return _column_id; } + std::string col_name() const { return _col_name; } bool opposite() const { return _opposite; } @@ -421,6 +425,7 @@ class ColumnPredicate : public std::enable_shared_from_this { } uint32_t _column_id; + const std::string _col_name; PrimitiveType _primitive_type; // TODO: the value is only in delete condition, better be template value bool _opposite; diff --git a/be/src/olap/comparison_predicate.h b/be/src/olap/comparison_predicate.h index f4cf674ab8299c..e146ea40df945e 100644 --- a/be/src/olap/comparison_predicate.h +++ b/be/src/olap/comparison_predicate.h @@ -35,8 +35,9 @@ class ComparisonPredicateBase final : public ColumnPredicate { public: ENABLE_FACTORY_CREATOR(ComparisonPredicateBase); using T = typename PrimitiveTypeTraits::CppType; - ComparisonPredicateBase(uint32_t column_id, const T& value, bool opposite = false) - : ColumnPredicate(column_id, Type, opposite), _value(value) {} + ComparisonPredicateBase(uint32_t column_id, std::string col_name, const T& value, + bool opposite = false) + : ColumnPredicate(column_id, col_name, Type, opposite), _value(value) {} ComparisonPredicateBase(const ComparisonPredicateBase& other, uint32_t col_id) : ColumnPredicate(other, col_id), _value(other._value) {} ComparisonPredicateBase(const ComparisonPredicateBase& other) = delete; diff --git a/be/src/olap/delete_handler.cpp b/be/src/olap/delete_handler.cpp index b65bc89c64eacf..2b40351296ddca 100644 --- a/be/src/olap/delete_handler.cpp +++ b/be/src/olap/delete_handler.cpp @@ -252,42 +252,44 @@ Status convert(const vectorized::DataTypePtr& data_type, const std::list(index, type, v, true, arena); \ + predicate = create_comparison_predicate0(index, col_name, type, v, \ + true, arena); \ return Status::OK(); \ case PredicateType::NE: \ - predicate = \ - create_comparison_predicate0(index, type, v, true, arena); \ + predicate = create_comparison_predicate0(index, col_name, type, v, \ + true, arena); \ return Status::OK(); \ case PredicateType::GT: \ - predicate = \ - create_comparison_predicate0(index, type, v, true, arena); \ + predicate = create_comparison_predicate0(index, col_name, type, v, \ + true, arena); \ return Status::OK(); \ case PredicateType::GE: \ - predicate = \ - create_comparison_predicate0(index, type, v, true, arena); \ + predicate = create_comparison_predicate0(index, col_name, type, v, \ + true, arena); \ return Status::OK(); \ case PredicateType::LT: \ - predicate = \ - create_comparison_predicate0(index, type, v, true, arena); \ + predicate = create_comparison_predicate0(index, col_name, type, v, \ + true, arena); \ return Status::OK(); \ case PredicateType::LE: \ - predicate = \ - create_comparison_predicate0(index, type, v, true, arena); \ + predicate = create_comparison_predicate0(index, col_name, type, v, \ + true, arena); \ return Status::OK(); \ default: \ return Status::Error( \ "invalid condition operator. operator={}", type_to_op_str(res.condition_op)); \ } \ } -Status parse_to_predicate(const uint32_t index, const vectorized::DataTypePtr& type, +Status parse_to_predicate(const uint32_t index, const std::string col_name, + const vectorized::DataTypePtr& type, DeleteHandler::ConditionParseResult& res, vectorized::Arena& arena, std::shared_ptr& predicate) { DCHECK_EQ(res.value_str.size(), 1); if (res.condition_op == PredicateType::IS_NULL || res.condition_op == PredicateType::IS_NOT_NULL) { - predicate = NullPredicate::create_shared( - index, res.condition_op == PredicateType::IS_NOT_NULL, type->get_primitive_type()); + predicate = NullPredicate::create_shared(index, col_name, + res.condition_op == PredicateType::IS_NOT_NULL, + type->get_primitive_type()); return Status::OK(); } StringRef v; @@ -318,28 +320,28 @@ Status parse_to_predicate(const uint32_t index, const vectorized::DataTypePtr& t RETURN_IF_ERROR(convert(type, res.value_str.front(), arena, v)); switch (res.condition_op) { case PredicateType::EQ: - predicate = - create_comparison_predicate0(index, type, v, true, arena); + predicate = create_comparison_predicate0(index, col_name, type, v, + true, arena); return Status::OK(); case PredicateType::NE: - predicate = - create_comparison_predicate0(index, type, v, true, arena); + predicate = create_comparison_predicate0(index, col_name, type, v, + true, arena); return Status::OK(); case PredicateType::GT: - predicate = - create_comparison_predicate0(index, type, v, true, arena); + predicate = create_comparison_predicate0(index, col_name, type, v, + true, arena); return Status::OK(); case PredicateType::GE: - predicate = - create_comparison_predicate0(index, type, v, true, arena); + predicate = create_comparison_predicate0(index, col_name, type, v, + true, arena); return Status::OK(); case PredicateType::LT: - predicate = - create_comparison_predicate0(index, type, v, true, arena); + predicate = create_comparison_predicate0(index, col_name, type, v, + true, arena); return Status::OK(); case PredicateType::LE: - predicate = - create_comparison_predicate0(index, type, v, true, arena); + predicate = create_comparison_predicate0(index, col_name, type, v, + true, arena); return Status::OK(); default: return Status::Error( @@ -356,7 +358,8 @@ Status parse_to_predicate(const uint32_t index, const vectorized::DataTypePtr& t #undef CONVERT_CASE } -Status parse_to_in_predicate(const uint32_t index, const vectorized::DataTypePtr& type, +Status parse_to_in_predicate(const uint32_t index, const std::string& col_name, + const vectorized::DataTypePtr& type, DeleteHandler::ConditionParseResult& res, vectorized::Arena& arena, std::shared_ptr& predicate) { DCHECK_GT(res.value_str.size(), 1); @@ -364,13 +367,15 @@ Status parse_to_in_predicate(const uint32_t index, const vectorized::DataTypePtr case PredicateType::IN_LIST: { std::shared_ptr set; RETURN_IF_ERROR(convert(type, res.value_str, arena, set)); - predicate = create_in_list_predicate(index, type, set, true); + predicate = + create_in_list_predicate(index, col_name, type, set, true); break; } case PredicateType::NOT_IN_LIST: { std::shared_ptr set; RETURN_IF_ERROR(convert(type, res.value_str, arena, set)); - predicate = create_in_list_predicate(index, type, set, true); + predicate = create_in_list_predicate(index, col_name, type, set, + true); break; } default: @@ -741,7 +746,7 @@ Status DeleteHandler::_parse_column_pred(TabletSchemaSPtr complete_schema, const auto& column = complete_schema->column_by_uid(col_unique_id); uint32_t index = complete_schema->field_index(col_unique_id); std::shared_ptr predicate; - RETURN_IF_ERROR(parse_to_predicate(index, column.get_vec_type(), condition, + RETURN_IF_ERROR(parse_to_predicate(index, column.name(), column.get_vec_type(), condition, _predicate_arena, predicate)); if (predicate != nullptr) { delete_conditions->column_predicate_vec.push_back(predicate); @@ -800,8 +805,8 @@ Status DeleteHandler::init(TabletSchemaSPtr tablet_schema, const auto& column = tablet_schema->column_by_uid(col_unique_id); uint32_t index = tablet_schema->field_index(col_unique_id); std::shared_ptr predicate; - RETURN_IF_ERROR(parse_to_in_predicate(index, column.get_vec_type(), condition, - _predicate_arena, predicate)); + RETURN_IF_ERROR(parse_to_in_predicate(index, column.name(), column.get_vec_type(), + condition, _predicate_arena, predicate)); temp.column_predicate_vec.push_back(predicate); } diff --git a/be/src/olap/in_list_predicate.h b/be/src/olap/in_list_predicate.h index 7e8baa1122792a..193a024855eb71 100644 --- a/be/src/olap/in_list_predicate.h +++ b/be/src/olap/in_list_predicate.h @@ -77,32 +77,10 @@ class InListPredicateBase final : public ColumnPredicate { std::is_same_v, StringSet>, HybridSet, vectorized::PredicateColumnType>>>>; - template - InListPredicateBase(uint32_t column_id, const ConditionType& conditions, - const ConvertFunc& convert, bool is_opposite, - const vectorized::DataTypePtr& data_type, vectorized::Arena& arena) - : ColumnPredicate(column_id, Type, is_opposite), - _min_value(type_limit::max()), - _max_value(type_limit::min()) { - _values = std::make_shared(false); - for (const auto& condition : conditions) { - T tmp; - if constexpr (Type == TYPE_STRING || Type == TYPE_CHAR) { - tmp = convert(data_type, condition, arena); - } else if constexpr (Type == TYPE_DECIMAL32 || Type == TYPE_DECIMAL64 || - Type == TYPE_DECIMAL128I || Type == TYPE_DECIMAL256) { - tmp = convert(data_type, condition); - } else { - tmp = convert(condition); - } - _values->insert(&tmp); - _update_min_max(tmp); - } - } - - InListPredicateBase(uint32_t column_id, const std::shared_ptr& hybrid_set, - bool is_opposite, size_t char_length = 0) - : ColumnPredicate(column_id, Type, is_opposite), + InListPredicateBase(uint32_t column_id, std::string col_name, + const std::shared_ptr& hybrid_set, bool is_opposite, + size_t char_length = 0) + : ColumnPredicate(column_id, col_name, Type, is_opposite), _min_value(type_limit::max()), _max_value(type_limit::min()) { CHECK(hybrid_set != nullptr); diff --git a/be/src/olap/like_column_predicate.cpp b/be/src/olap/like_column_predicate.cpp index 9359fef6b04978..813acaabca64d1 100644 --- a/be/src/olap/like_column_predicate.cpp +++ b/be/src/olap/like_column_predicate.cpp @@ -26,9 +26,9 @@ namespace doris { template -LikeColumnPredicate::LikeColumnPredicate(bool opposite, uint32_t column_id, +LikeColumnPredicate::LikeColumnPredicate(bool opposite, uint32_t column_id, std::string col_name, doris::FunctionContext* fn_ctx, doris::StringRef val) - : ColumnPredicate(column_id, T, opposite), pattern(val) { + : ColumnPredicate(column_id, col_name, T, opposite), pattern(val) { static_assert(T == TYPE_VARCHAR || T == TYPE_CHAR || T == TYPE_STRING, "LikeColumnPredicate only supports the following types: TYPE_VARCHAR, TYPE_CHAR, " "TYPE_STRING"); diff --git a/be/src/olap/like_column_predicate.h b/be/src/olap/like_column_predicate.h index 0e7a0480f43cd6..cdcc52bfa7dba9 100644 --- a/be/src/olap/like_column_predicate.h +++ b/be/src/olap/like_column_predicate.h @@ -47,8 +47,8 @@ template class LikeColumnPredicate final : public ColumnPredicate { public: ENABLE_FACTORY_CREATOR(LikeColumnPredicate); - LikeColumnPredicate(bool opposite, uint32_t column_id, doris::FunctionContext* fn_ctx, - doris::StringRef val); + LikeColumnPredicate(bool opposite, uint32_t column_id, std::string col_name, + doris::FunctionContext* fn_ctx, doris::StringRef val); ~LikeColumnPredicate() override = default; LikeColumnPredicate(const LikeColumnPredicate& other, uint32_t col_id) : ColumnPredicate(other, col_id) { diff --git a/be/src/olap/null_predicate.cpp b/be/src/olap/null_predicate.cpp index b2db30383c6716..ff17496229c44a 100644 --- a/be/src/olap/null_predicate.cpp +++ b/be/src/olap/null_predicate.cpp @@ -31,8 +31,9 @@ using namespace doris::vectorized; namespace doris { -NullPredicate::NullPredicate(uint32_t column_id, bool is_null, PrimitiveType type, bool opposite) - : ColumnPredicate(column_id, type), _is_null(opposite != is_null) {} +NullPredicate::NullPredicate(uint32_t column_id, std::string col_name, bool is_null, + PrimitiveType type, bool opposite) + : ColumnPredicate(column_id, col_name, type), _is_null(opposite != is_null) {} PredicateType NullPredicate::type() const { return _is_null ? PredicateType::IS_NULL : PredicateType::IS_NOT_NULL; diff --git a/be/src/olap/null_predicate.h b/be/src/olap/null_predicate.h index 3def7e9be3ae34..e0217f6c1f4907 100644 --- a/be/src/olap/null_predicate.h +++ b/be/src/olap/null_predicate.h @@ -46,7 +46,8 @@ class IColumn; class NullPredicate final : public ColumnPredicate { public: ENABLE_FACTORY_CREATOR(NullPredicate); - NullPredicate(uint32_t column_id, bool is_null, PrimitiveType type, bool opposite = false); + NullPredicate(uint32_t column_id, std::string col_name, bool is_null, PrimitiveType type, + bool opposite = false); NullPredicate(const NullPredicate& other) = delete; NullPredicate(const NullPredicate& other, uint32_t column_id) : ColumnPredicate(other, column_id), _is_null(other._is_null) {} diff --git a/be/src/olap/predicate_creator.cpp b/be/src/olap/predicate_creator.cpp index b2e35de6d1f5c7..9cf9d6a3412c2b 100644 --- a/be/src/olap/predicate_creator.cpp +++ b/be/src/olap/predicate_creator.cpp @@ -20,7 +20,7 @@ namespace doris { std::shared_ptr create_bloom_filter_predicate( - const uint32_t cid, const vectorized::DataTypePtr& data_type, + const uint32_t cid, const std::string col_name, const vectorized::DataTypePtr& data_type, const std::shared_ptr& filter) { // Do the necessary type conversion, for CAST(STRING AS CHAR), we do nothing here but change the data type to the target type CHAR std::shared_ptr filter_olap; @@ -28,73 +28,80 @@ std::shared_ptr create_bloom_filter_predicate( filter_olap->light_copy(filter.get()); switch (data_type->get_primitive_type()) { case TYPE_TINYINT: { - return BloomFilterColumnPredicate::create_shared(cid, filter_olap); + return BloomFilterColumnPredicate::create_shared(cid, col_name, filter_olap); } case TYPE_SMALLINT: { - return BloomFilterColumnPredicate::create_shared(cid, filter_olap); + return BloomFilterColumnPredicate::create_shared(cid, col_name, filter_olap); } case TYPE_INT: { - return BloomFilterColumnPredicate::create_shared(cid, filter_olap); + return BloomFilterColumnPredicate::create_shared(cid, col_name, filter_olap); } case TYPE_BIGINT: { - return BloomFilterColumnPredicate::create_shared(cid, filter_olap); + return BloomFilterColumnPredicate::create_shared(cid, col_name, filter_olap); } case TYPE_LARGEINT: { - return BloomFilterColumnPredicate::create_shared(cid, filter_olap); + return BloomFilterColumnPredicate::create_shared(cid, col_name, filter_olap); } case TYPE_FLOAT: { - return BloomFilterColumnPredicate::create_shared(cid, filter_olap); + return BloomFilterColumnPredicate::create_shared(cid, col_name, filter_olap); } case TYPE_DOUBLE: { - return BloomFilterColumnPredicate::create_shared(cid, filter_olap); + return BloomFilterColumnPredicate::create_shared(cid, col_name, filter_olap); } case TYPE_DECIMALV2: { - return BloomFilterColumnPredicate::create_shared(cid, filter_olap); + return BloomFilterColumnPredicate::create_shared(cid, col_name, + filter_olap); } case TYPE_DECIMAL32: { - return BloomFilterColumnPredicate::create_shared(cid, filter_olap); + return BloomFilterColumnPredicate::create_shared(cid, col_name, + filter_olap); } case TYPE_DECIMAL64: { - return BloomFilterColumnPredicate::create_shared(cid, filter_olap); + return BloomFilterColumnPredicate::create_shared(cid, col_name, + filter_olap); } case TYPE_DECIMAL128I: { - return BloomFilterColumnPredicate::create_shared(cid, filter_olap); + return BloomFilterColumnPredicate::create_shared(cid, col_name, + filter_olap); } case TYPE_DECIMAL256: { - return BloomFilterColumnPredicate::create_shared(cid, filter_olap); + return BloomFilterColumnPredicate::create_shared(cid, col_name, + filter_olap); } case TYPE_CHAR: { - return BloomFilterColumnPredicate::create_shared(cid, filter_olap); + return BloomFilterColumnPredicate::create_shared(cid, col_name, filter_olap); } case TYPE_VARCHAR: { - return BloomFilterColumnPredicate::create_shared(cid, filter_olap); + return BloomFilterColumnPredicate::create_shared(cid, col_name, filter_olap); } case TYPE_STRING: { - return BloomFilterColumnPredicate::create_shared(cid, filter_olap); + return BloomFilterColumnPredicate::create_shared(cid, col_name, filter_olap); } case TYPE_DATE: { - return BloomFilterColumnPredicate::create_shared(cid, filter_olap); + return BloomFilterColumnPredicate::create_shared(cid, col_name, filter_olap); } case TYPE_DATEV2: { - return BloomFilterColumnPredicate::create_shared(cid, filter_olap); + return BloomFilterColumnPredicate::create_shared(cid, col_name, filter_olap); } case TYPE_DATETIME: { - return BloomFilterColumnPredicate::create_shared(cid, filter_olap); + return BloomFilterColumnPredicate::create_shared(cid, col_name, filter_olap); } case TYPE_DATETIMEV2: { - return BloomFilterColumnPredicate::create_shared(cid, filter_olap); + return BloomFilterColumnPredicate::create_shared(cid, col_name, + filter_olap); } case TYPE_TIMESTAMPTZ: { - return BloomFilterColumnPredicate::create_shared(cid, filter_olap); + return BloomFilterColumnPredicate::create_shared(cid, col_name, + filter_olap); } case TYPE_BOOLEAN: { - return BloomFilterColumnPredicate::create_shared(cid, filter_olap); + return BloomFilterColumnPredicate::create_shared(cid, col_name, filter_olap); } case TYPE_IPV4: { - return BloomFilterColumnPredicate::create_shared(cid, filter_olap); + return BloomFilterColumnPredicate::create_shared(cid, col_name, filter_olap); } case TYPE_IPV6: { - return BloomFilterColumnPredicate::create_shared(cid, filter_olap); + return BloomFilterColumnPredicate::create_shared(cid, col_name, filter_olap); } default: throw Exception(ErrorCode::INVALID_ARGUMENT, @@ -105,20 +112,20 @@ std::shared_ptr create_bloom_filter_predicate( } std::shared_ptr create_bitmap_filter_predicate( - const uint32_t cid, const vectorized::DataTypePtr& data_type, + const uint32_t cid, const std::string col_name, const vectorized::DataTypePtr& data_type, const std::shared_ptr& filter) { switch (data_type->get_primitive_type()) { case TYPE_TINYINT: { - return BitmapFilterColumnPredicate::create_shared(cid, filter); + return BitmapFilterColumnPredicate::create_shared(cid, col_name, filter); } case TYPE_SMALLINT: { - return BitmapFilterColumnPredicate::create_shared(cid, filter); + return BitmapFilterColumnPredicate::create_shared(cid, col_name, filter); } case TYPE_INT: { - return BitmapFilterColumnPredicate::create_shared(cid, filter); + return BitmapFilterColumnPredicate::create_shared(cid, col_name, filter); } case TYPE_BIGINT: { - return BitmapFilterColumnPredicate::create_shared(cid, filter); + return BitmapFilterColumnPredicate::create_shared(cid, col_name, filter); } default: throw Exception(ErrorCode::INVALID_ARGUMENT, diff --git a/be/src/olap/predicate_creator.h b/be/src/olap/predicate_creator.h index eb9dbcbbc2c9f0..c225dcfc3d9b20 100644 --- a/be/src/olap/predicate_creator.h +++ b/be/src/olap/predicate_creator.h @@ -49,110 +49,120 @@ namespace doris { template std::shared_ptr create_in_list_predicate(const uint32_t cid, + const std::string col_name, const std::shared_ptr& set, bool is_opposite, size_t char_length = 0) { auto set_size = set->size(); if (set_size == 1) { - return InListPredicateBase::create_shared(cid, set, is_opposite, char_length); + return InListPredicateBase::create_shared(cid, col_name, set, is_opposite, + char_length); } else if (set_size == 2) { - return InListPredicateBase::create_shared(cid, set, is_opposite, char_length); + return InListPredicateBase::create_shared(cid, col_name, set, is_opposite, + char_length); } else if (set_size == 3) { - return InListPredicateBase::create_shared(cid, set, is_opposite, char_length); + return InListPredicateBase::create_shared(cid, col_name, set, is_opposite, + char_length); } else if (set_size == 4) { - return InListPredicateBase::create_shared(cid, set, is_opposite, char_length); + return InListPredicateBase::create_shared(cid, col_name, set, is_opposite, + char_length); } else if (set_size == 5) { - return InListPredicateBase::create_shared(cid, set, is_opposite, char_length); + return InListPredicateBase::create_shared(cid, col_name, set, is_opposite, + char_length); } else if (set_size == 6) { - return InListPredicateBase::create_shared(cid, set, is_opposite, char_length); + return InListPredicateBase::create_shared(cid, col_name, set, is_opposite, + char_length); } else if (set_size == 7) { - return InListPredicateBase::create_shared(cid, set, is_opposite, char_length); + return InListPredicateBase::create_shared(cid, col_name, set, is_opposite, + char_length); } else if (set_size == FIXED_CONTAINER_MAX_SIZE) { - return InListPredicateBase::create_shared(cid, set, is_opposite, char_length); + return InListPredicateBase::create_shared(cid, col_name, set, is_opposite, + char_length); } else { return InListPredicateBase::create_shared( - cid, set, is_opposite, char_length); + cid, col_name, set, is_opposite, char_length); } } template std::shared_ptr create_in_list_predicate(const uint32_t cid, + const std::string col_name, const vectorized::DataTypePtr& data_type, const std::shared_ptr set, bool is_opposite) { switch (data_type->get_primitive_type()) { case TYPE_TINYINT: { - return create_in_list_predicate(cid, set, is_opposite); + return create_in_list_predicate(cid, col_name, set, is_opposite); } case TYPE_SMALLINT: { - return create_in_list_predicate(cid, set, is_opposite); + return create_in_list_predicate(cid, col_name, set, is_opposite); } case TYPE_INT: { - return create_in_list_predicate(cid, set, is_opposite); + return create_in_list_predicate(cid, col_name, set, is_opposite); } case TYPE_BIGINT: { - return create_in_list_predicate(cid, set, is_opposite); + return create_in_list_predicate(cid, col_name, set, is_opposite); } case TYPE_LARGEINT: { - return create_in_list_predicate(cid, set, is_opposite); + return create_in_list_predicate(cid, col_name, set, is_opposite); } case TYPE_FLOAT: { - return create_in_list_predicate(cid, set, is_opposite); + return create_in_list_predicate(cid, col_name, set, is_opposite); } case TYPE_DOUBLE: { - return create_in_list_predicate(cid, set, is_opposite); + return create_in_list_predicate(cid, col_name, set, is_opposite); } case TYPE_DECIMALV2: { - return create_in_list_predicate(cid, set, is_opposite); + return create_in_list_predicate(cid, col_name, set, is_opposite); } case TYPE_DECIMAL32: { - return create_in_list_predicate(cid, set, is_opposite); + return create_in_list_predicate(cid, col_name, set, is_opposite); } case TYPE_DECIMAL64: { - return create_in_list_predicate(cid, set, is_opposite); + return create_in_list_predicate(cid, col_name, set, is_opposite); } case TYPE_DECIMAL128I: { - return create_in_list_predicate(cid, set, is_opposite); + return create_in_list_predicate(cid, col_name, set, is_opposite); } case TYPE_DECIMAL256: { - return create_in_list_predicate(cid, set, is_opposite); + return create_in_list_predicate(cid, col_name, set, is_opposite); } case TYPE_CHAR: { return create_in_list_predicate( - cid, set, is_opposite, + cid, col_name, set, is_opposite, assert_cast( vectorized::remove_nullable(data_type).get()) ->len()); } case TYPE_VARCHAR: { - return create_in_list_predicate(cid, set, is_opposite); + return create_in_list_predicate(cid, col_name, set, is_opposite); } case TYPE_STRING: { - return create_in_list_predicate(cid, set, is_opposite); + return create_in_list_predicate(cid, col_name, set, is_opposite); } case TYPE_DATE: { - return create_in_list_predicate(cid, set, is_opposite); + return create_in_list_predicate(cid, col_name, set, is_opposite); } case TYPE_DATEV2: { - return create_in_list_predicate(cid, set, is_opposite); + return create_in_list_predicate(cid, col_name, set, is_opposite); } case TYPE_DATETIME: { - return create_in_list_predicate(cid, set, is_opposite); + return create_in_list_predicate(cid, col_name, set, is_opposite); } case TYPE_DATETIMEV2: { - return create_in_list_predicate(cid, set, is_opposite); + return create_in_list_predicate(cid, col_name, set, is_opposite); } case TYPE_TIMESTAMPTZ: { - return create_in_list_predicate(cid, set, is_opposite); + return create_in_list_predicate(cid, col_name, set, is_opposite); } case TYPE_BOOLEAN: { - return create_in_list_predicate(cid, set, is_opposite); + return create_in_list_predicate(cid, col_name, set, is_opposite); } case TYPE_IPV4: { - return create_in_list_predicate(cid, set, is_opposite); + return create_in_list_predicate(cid, col_name, set, is_opposite); } case TYPE_IPV6: { - return create_in_list_predicate(cid, set, is_opposite); + return create_in_list_predicate(cid, col_name, set, is_opposite); } default: throw Exception(Status::InternalError("Unsupported type {} for in_predicate", @@ -163,57 +173,67 @@ std::shared_ptr create_in_list_predicate(const uint32_t cid, template std::shared_ptr create_comparison_predicate0( - const uint32_t cid, const vectorized::DataTypePtr& data_type, StringRef& value, - bool opposite, vectorized::Arena& arena) { + const uint32_t cid, const std::string col_name, const vectorized::DataTypePtr& data_type, + StringRef& value, bool opposite, vectorized::Arena& arena) { switch (data_type->get_primitive_type()) { case TYPE_TINYINT: { return ComparisonPredicateBase::create_shared( - cid, *(typename PrimitiveTypeTraits::CppType*)value.data, opposite); + cid, col_name, *(typename PrimitiveTypeTraits::CppType*)value.data, + opposite); } case TYPE_SMALLINT: { return ComparisonPredicateBase::create_shared( - cid, *(typename PrimitiveTypeTraits::CppType*)value.data, opposite); + cid, col_name, *(typename PrimitiveTypeTraits::CppType*)value.data, + opposite); } case TYPE_INT: { return ComparisonPredicateBase::create_shared( - cid, *(typename PrimitiveTypeTraits::CppType*)value.data, opposite); + cid, col_name, *(typename PrimitiveTypeTraits::CppType*)value.data, + opposite); } case TYPE_BIGINT: { return ComparisonPredicateBase::create_shared( - cid, *(typename PrimitiveTypeTraits::CppType*)value.data, opposite); + cid, col_name, *(typename PrimitiveTypeTraits::CppType*)value.data, + opposite); } case TYPE_LARGEINT: { return ComparisonPredicateBase::create_shared( - cid, *(typename PrimitiveTypeTraits::CppType*)value.data, opposite); + cid, col_name, *(typename PrimitiveTypeTraits::CppType*)value.data, + opposite); } case TYPE_FLOAT: { return ComparisonPredicateBase::create_shared( - cid, *(typename PrimitiveTypeTraits::CppType*)value.data, opposite); + cid, col_name, *(typename PrimitiveTypeTraits::CppType*)value.data, + opposite); } case TYPE_DOUBLE: { return ComparisonPredicateBase::create_shared( - cid, *(typename PrimitiveTypeTraits::CppType*)value.data, opposite); + cid, col_name, *(typename PrimitiveTypeTraits::CppType*)value.data, + opposite); } case TYPE_DECIMALV2: { return ComparisonPredicateBase::create_shared( - cid, *(typename PrimitiveTypeTraits::CppType*)value.data, opposite); + cid, col_name, *(typename PrimitiveTypeTraits::CppType*)value.data, + opposite); } case TYPE_DECIMAL32: { return ComparisonPredicateBase::create_shared( - cid, *(typename PrimitiveTypeTraits::CppType*)value.data, opposite); + cid, col_name, *(typename PrimitiveTypeTraits::CppType*)value.data, + opposite); } case TYPE_DECIMAL64: { return ComparisonPredicateBase::create_shared( - cid, *(typename PrimitiveTypeTraits::CppType*)value.data, opposite); + cid, col_name, *(typename PrimitiveTypeTraits::CppType*)value.data, + opposite); } case TYPE_DECIMAL128I: { return ComparisonPredicateBase::create_shared( - cid, *(typename PrimitiveTypeTraits::CppType*)value.data, - opposite); + cid, col_name, + *(typename PrimitiveTypeTraits::CppType*)value.data, opposite); } case TYPE_DECIMAL256: { return ComparisonPredicateBase::create_shared( - cid, *(typename PrimitiveTypeTraits::CppType*)value.data, + cid, col_name, *(typename PrimitiveTypeTraits::CppType*)value.data, opposite); } case TYPE_CHAR: { @@ -227,53 +247,59 @@ std::shared_ptr create_comparison_predicate0( memset(buffer, 0, target); memcpy(buffer, value.data, value.size); StringRef v = {buffer, target}; - return ComparisonPredicateBase::create_shared(cid, v, opposite); + return ComparisonPredicateBase::create_shared(cid, col_name, v, opposite); } case TYPE_VARCHAR: { char* buffer = arena.alloc(value.size); memcpy(buffer, value.data, value.size); StringRef v = {buffer, value.size}; - return ComparisonPredicateBase::create_shared(cid, v, opposite); + return ComparisonPredicateBase::create_shared(cid, col_name, v, opposite); } case TYPE_STRING: { char* buffer = arena.alloc(value.size); memcpy(buffer, value.data, value.size); StringRef v = {buffer, value.size}; - return ComparisonPredicateBase::create_shared(cid, v, opposite); + return ComparisonPredicateBase::create_shared(cid, col_name, v, opposite); } case TYPE_DATE: { return ComparisonPredicateBase::create_shared( - cid, *(typename PrimitiveTypeTraits::CppType*)value.data, opposite); + cid, col_name, *(typename PrimitiveTypeTraits::CppType*)value.data, + opposite); } case TYPE_DATEV2: { return ComparisonPredicateBase::create_shared( - cid, *(typename PrimitiveTypeTraits::CppType*)value.data, opposite); + cid, col_name, *(typename PrimitiveTypeTraits::CppType*)value.data, + opposite); } case TYPE_DATETIME: { return ComparisonPredicateBase::create_shared( - cid, *(typename PrimitiveTypeTraits::CppType*)value.data, opposite); + cid, col_name, *(typename PrimitiveTypeTraits::CppType*)value.data, + opposite); } case TYPE_DATETIMEV2: { return ComparisonPredicateBase::create_shared( - cid, *(typename PrimitiveTypeTraits::CppType*)value.data, + cid, col_name, *(typename PrimitiveTypeTraits::CppType*)value.data, opposite); } case TYPE_TIMESTAMPTZ: { return ComparisonPredicateBase::create_shared( - cid, *(typename PrimitiveTypeTraits::CppType*)value.data, - opposite); + cid, col_name, + *(typename PrimitiveTypeTraits::CppType*)value.data, opposite); } case TYPE_BOOLEAN: { return ComparisonPredicateBase::create_shared( - cid, *(typename PrimitiveTypeTraits::CppType*)value.data, opposite); + cid, col_name, *(typename PrimitiveTypeTraits::CppType*)value.data, + opposite); } case TYPE_IPV4: { return ComparisonPredicateBase::create_shared( - cid, *(typename PrimitiveTypeTraits::CppType*)value.data, opposite); + cid, col_name, *(typename PrimitiveTypeTraits::CppType*)value.data, + opposite); } case TYPE_IPV6: { return ComparisonPredicateBase::create_shared( - cid, *(typename PrimitiveTypeTraits::CppType*)value.data, opposite); + cid, col_name, *(typename PrimitiveTypeTraits::CppType*)value.data, + opposite); } default: throw Exception(Status::InternalError("Unsupported type {} for comparison_predicate", @@ -291,11 +317,11 @@ std::shared_ptr build_set() { } std::shared_ptr create_bloom_filter_predicate( - const uint32_t cid, const vectorized::DataTypePtr& data_type, + const uint32_t cid, const std::string col_name, const vectorized::DataTypePtr& data_type, const std::shared_ptr& filter); std::shared_ptr create_bitmap_filter_predicate( - const uint32_t cid, const vectorized::DataTypePtr& data_type, + const uint32_t cid, const std::string col_name, const vectorized::DataTypePtr& data_type, const std::shared_ptr& filter); #include "common/compile_check_end.h" } //namespace doris diff --git a/be/src/olap/push_handler.cpp b/be/src/olap/push_handler.cpp index 06af26699177a1..e494f7db7d7126 100644 --- a/be/src/olap/push_handler.cpp +++ b/be/src/olap/push_handler.cpp @@ -645,8 +645,8 @@ Status PushBrokerReader::_get_next_reader() { _io_ctx.get(), _runtime_state.get()); init_status = parquet_reader->init_reader( - _all_col_names, &_col_name_to_block_idx, _push_down_exprs, _real_tuple_desc, - _default_val_row_desc.get(), _col_name_to_slot_id, + _all_col_names, &_col_name_to_block_idx, _push_down_exprs, _slot_id_to_predicates, + _or_predicates, _real_tuple_desc, _default_val_row_desc.get(), _col_name_to_slot_id, &_not_single_slot_filter_conjuncts, &_slot_id_to_filter_conjuncts, vectorized::TableSchemaChangeHelper::ConstNode::get_instance(), false); _cur_reader = std::move(parquet_reader); diff --git a/be/src/olap/push_handler.h b/be/src/olap/push_handler.h index 92a1fd2cfde908..0b60a81c62465a 100644 --- a/be/src/olap/push_handler.h +++ b/be/src/olap/push_handler.h @@ -141,6 +141,8 @@ class PushBrokerReader { std::vector _all_col_names; std::unordered_map _col_name_to_block_idx; vectorized::VExprContextSPtrs _push_down_exprs; + phmap::flat_hash_map>> _slot_id_to_predicates; + std::vector> _or_predicates; const std::unordered_map* _col_name_to_slot_id; // single slot filter conjuncts std::unordered_map _slot_id_to_filter_conjuncts; diff --git a/be/src/olap/shared_predicate.h b/be/src/olap/shared_predicate.h index c06591cc79c728..46cda6653b9e5f 100644 --- a/be/src/olap/shared_predicate.h +++ b/be/src/olap/shared_predicate.h @@ -36,8 +36,8 @@ class SharedPredicate final : public ColumnPredicate { ENABLE_FACTORY_CREATOR(SharedPredicate); public: - SharedPredicate(uint32_t column_id) - : ColumnPredicate(column_id, PrimitiveType::INVALID_TYPE), + SharedPredicate(uint32_t column_id, std::string col_name) + : ColumnPredicate(column_id, col_name, PrimitiveType::INVALID_TYPE), _mtx(std::make_shared()) {} SharedPredicate(const ColumnPredicate& other) = delete; SharedPredicate(const SharedPredicate& other, uint32_t column_id) diff --git a/be/src/pipeline/exec/file_scan_operator.cpp b/be/src/pipeline/exec/file_scan_operator.cpp index 06db998710f559..ec3262706678a6 100644 --- a/be/src/pipeline/exec/file_scan_operator.cpp +++ b/be/src/pipeline/exec/file_scan_operator.cpp @@ -32,6 +32,72 @@ namespace doris::pipeline { #include "common/compile_check_begin.h" +PushDownType FileScanLocalState::_should_push_down_binary_predicate( + vectorized::VectorizedFnCall* fn_call, vectorized::VExprContext* expr_ctx, + StringRef* constant_val, const std::set fn_name) const { + if (!fn_name.contains(fn_call->fn().name.function_name)) { + return PushDownType::UNACCEPTABLE; + } + DCHECK(constant_val->data == nullptr) << "constant_val should not have a value"; + const auto& children = fn_call->children(); + DCHECK(children.size() == 2); + DCHECK_EQ(children[0]->node_type(), TExprNodeType::SLOT_REF); + if (children[1]->is_constant()) { + std::shared_ptr const_col_wrapper; + THROW_IF_ERROR(children[1]->get_const_col(expr_ctx, &const_col_wrapper)); + const auto* const_column = + assert_cast(const_col_wrapper->column_ptr.get()); + *constant_val = const_column->get_data_at(0); + return PushDownType::PARTIAL_ACCEPTABLE; + } else { + // only handle constant value + return PushDownType::UNACCEPTABLE; + } +} + +bool FileScanLocalState::_should_push_down_or_predicate_recursively( + const vectorized::VExprSPtr& expr) const { + if (expr->node_type() == TExprNodeType::COMPOUND_PRED && + expr->op() == TExprOpcode::COMPOUND_OR) { + return std::ranges::all_of(expr->children(), [this](const vectorized::VExprSPtr& it) { + return _should_push_down_or_predicate_recursively(it); + }); + } else if (expr->node_type() == TExprNodeType::COMPOUND_PRED && + expr->op() == TExprOpcode::COMPOUND_AND) { + return std::ranges::any_of(expr->children(), [this](const vectorized::VExprSPtr& it) { + return _should_push_down_or_predicate_recursively(it); + }); + } else { + auto children = expr->children(); + if (children.empty() || children[0]->node_type() != TExprNodeType::SLOT_REF) { + // not a slot ref(column) + return false; + } + std::shared_ptr slot_ref = + std::dynamic_pointer_cast(children[0]); + auto entry = _slot_id_to_predicates.find(slot_ref->slot_id()); + if (_slot_id_to_predicates.end() == entry) { + return false; + } + if (is_complex_type(slot_ref->data_type()->get_primitive_type())) { + return false; + } + return true; + } +} + +PushDownType FileScanLocalState::_should_push_down_or_predicate( + const vectorized::VExprContext* expr_ctx) const { + auto expr = expr_ctx->root()->get_impl() ? expr_ctx->root()->get_impl() : expr_ctx->root(); + if (expr->node_type() == TExprNodeType::COMPOUND_PRED && + expr->op() == TExprOpcode::COMPOUND_OR) { + if (_should_push_down_or_predicate_recursively(expr)) { + return PushDownType::PARTIAL_ACCEPTABLE; + } + } + return PushDownType::UNACCEPTABLE; +} + int FileScanLocalState::max_scanners_concurrency(RuntimeState* state) const { // For select * from table limit 10; should just use one thread. if (should_run_serial()) { diff --git a/be/src/pipeline/exec/file_scan_operator.h b/be/src/pipeline/exec/file_scan_operator.h index a2d834bb0d1bf2..c682f30f409266 100644 --- a/be/src/pipeline/exec/file_scan_operator.h +++ b/be/src/pipeline/exec/file_scan_operator.h @@ -60,6 +60,31 @@ class FileScanLocalState final : public ScanLocalState { private: friend class vectorized::FileScanner; + PushDownType _should_push_down_bloom_filter() const override { + return PushDownType::PARTIAL_ACCEPTABLE; + } + PushDownType _should_push_down_topn_filter() const override { + return PushDownType::PARTIAL_ACCEPTABLE; + } + PushDownType _should_push_down_bitmap_filter() const override { + return PushDownType::PARTIAL_ACCEPTABLE; + } + PushDownType _should_push_down_is_null_predicate( + vectorized::VectorizedFnCall* fn_call) const override { + return fn_call->fn().name.function_name == "is_null_pred" || + fn_call->fn().name.function_name == "is_not_null_pred" + ? PushDownType::PARTIAL_ACCEPTABLE + : PushDownType::UNACCEPTABLE; + } + PushDownType _should_push_down_in_predicate() const override { + return PushDownType::PARTIAL_ACCEPTABLE; + } + PushDownType _should_push_down_binary_predicate( + vectorized::VectorizedFnCall* fn_call, vectorized::VExprContext* expr_ctx, + StringRef* constant_val, const std::set fn_name) const override; + PushDownType _should_push_down_or_predicate( + const vectorized::VExprContext* expr_ctx) const override; + bool _should_push_down_or_predicate_recursively(const vectorized::VExprSPtr& expr) const; std::shared_ptr _split_source = nullptr; int _max_scanners; // A in memory cache to save some common components diff --git a/be/src/pipeline/exec/mock_scan_operator.h b/be/src/pipeline/exec/mock_scan_operator.h index 9a7c51952ee219..65e6cd32782f4a 100644 --- a/be/src/pipeline/exec/mock_scan_operator.h +++ b/be/src/pipeline/exec/mock_scan_operator.h @@ -33,13 +33,49 @@ class MockScanLocalState final : public ScanLocalState { bool _is_key_column(const std::string& col_name) override { return true; } private: - PushDownType _should_push_down_bloom_filter() override { return PushDownType::ACCEPTABLE; } + PushDownType _should_push_down_bloom_filter() const override { + return PushDownType::ACCEPTABLE; + } - PushDownType _should_push_down_bitmap_filter() override { return PushDownType::ACCEPTABLE; } - - PushDownType _should_push_down_is_null_predicate() override { return PushDownType::ACCEPTABLE; } + PushDownType _should_push_down_bitmap_filter() const override { + return PushDownType::ACCEPTABLE; + } bool _should_push_down_common_expr() override { return true; } + PushDownType _should_push_down_topn_filter() const override { return PushDownType::ACCEPTABLE; } + + PushDownType _should_push_down_is_null_predicate( + vectorized::VectorizedFnCall* fn_call) const override { + return fn_call->fn().name.function_name == "is_null_pred" || + fn_call->fn().name.function_name == "is_not_null_pred" + ? PushDownType::ACCEPTABLE + : PushDownType::UNACCEPTABLE; + } + PushDownType _should_push_down_in_predicate() const override { + return PushDownType::ACCEPTABLE; + } + PushDownType _should_push_down_binary_predicate( + vectorized::VectorizedFnCall* fn_call, vectorized::VExprContext* expr_ctx, + StringRef* constant_val, const std::set fn_name) const override { + if (!fn_name.contains(fn_call->fn().name.function_name)) { + return PushDownType::UNACCEPTABLE; + } + DCHECK(constant_val->data == nullptr) << "constant_val should not have a value"; + const auto& children = fn_call->children(); + DCHECK(children.size() == 2); + DCHECK_EQ(children[0]->node_type(), TExprNodeType::SLOT_REF); + if (children[1]->is_constant()) { + std::shared_ptr const_col_wrapper; + THROW_IF_ERROR(children[1]->get_const_col(expr_ctx, &const_col_wrapper)); + const auto* const_column = assert_cast( + const_col_wrapper->column_ptr.get()); + *constant_val = const_column->get_data_at(0); + return PushDownType::ACCEPTABLE; + } else { + // only handle constant value + return PushDownType::UNACCEPTABLE; + } + } }; class MockScanOperatorX final : public ScanOperatorX { diff --git a/be/src/pipeline/exec/olap_scan_operator.cpp b/be/src/pipeline/exec/olap_scan_operator.cpp index 962a2fe14ce47e..923f0b5f42b689 100644 --- a/be/src/pipeline/exec/olap_scan_operator.cpp +++ b/be/src/pipeline/exec/olap_scan_operator.cpp @@ -83,6 +83,29 @@ Status OlapScanLocalState::init(RuntimeState* state, LocalStateInfo& info) { return Status::OK(); } +PushDownType OlapScanLocalState::_should_push_down_binary_predicate( + vectorized::VectorizedFnCall* fn_call, vectorized::VExprContext* expr_ctx, + StringRef* constant_val, const std::set fn_name) const { + if (!fn_name.contains(fn_call->fn().name.function_name)) { + return PushDownType::UNACCEPTABLE; + } + DCHECK(constant_val->data == nullptr) << "constant_val should not have a value"; + const auto& children = fn_call->children(); + DCHECK(children.size() == 2); + DCHECK_EQ(children[0]->node_type(), TExprNodeType::SLOT_REF); + if (children[1]->is_constant()) { + std::shared_ptr const_col_wrapper; + THROW_IF_ERROR(children[1]->get_const_col(expr_ctx, &const_col_wrapper)); + const auto* const_column = + assert_cast(const_col_wrapper->column_ptr.get()); + *constant_val = const_column->get_data_at(0); + return PushDownType::ACCEPTABLE; + } else { + // only handle constant value + return PushDownType::UNACCEPTABLE; + } +} + Status OlapScanLocalState::_init_profile() { RETURN_IF_ERROR(ScanLocalState::_init_profile()); // Rows read from storage. diff --git a/be/src/pipeline/exec/olap_scan_operator.h b/be/src/pipeline/exec/olap_scan_operator.h index 4d6910b065f143..e649e9e6169e0b 100644 --- a/be/src/pipeline/exec/olap_scan_operator.h +++ b/be/src/pipeline/exec/olap_scan_operator.h @@ -78,12 +78,28 @@ class OlapScanLocalState final : public ScanLocalState { doris::FunctionContext** fn_ctx, PushDownType& pdt) override; - PushDownType _should_push_down_bloom_filter() override { return PushDownType::ACCEPTABLE; } - PushDownType _should_push_down_topn_filter() override { return PushDownType::ACCEPTABLE; } + PushDownType _should_push_down_bloom_filter() const override { + return PushDownType::ACCEPTABLE; + } + PushDownType _should_push_down_topn_filter() const override { return PushDownType::ACCEPTABLE; } - PushDownType _should_push_down_bitmap_filter() override { return PushDownType::ACCEPTABLE; } + PushDownType _should_push_down_bitmap_filter() const override { + return PushDownType::ACCEPTABLE; + } - PushDownType _should_push_down_is_null_predicate() override { return PushDownType::ACCEPTABLE; } + PushDownType _should_push_down_is_null_predicate( + vectorized::VectorizedFnCall* fn_call) const override { + return fn_call->fn().name.function_name == "is_null_pred" || + fn_call->fn().name.function_name == "is_not_null_pred" + ? PushDownType::ACCEPTABLE + : PushDownType::UNACCEPTABLE; + } + PushDownType _should_push_down_in_predicate() const override { + return PushDownType::ACCEPTABLE; + } + PushDownType _should_push_down_binary_predicate( + vectorized::VectorizedFnCall* fn_call, vectorized::VExprContext* expr_ctx, + StringRef* constant_val, const std::set fn_name) const override; bool _should_push_down_common_expr() override; diff --git a/be/src/pipeline/exec/scan_operator.cpp b/be/src/pipeline/exec/scan_operator.cpp index 9c61e75d114719..122ca9dd444160 100644 --- a/be/src/pipeline/exec/scan_operator.cpp +++ b/be/src/pipeline/exec/scan_operator.cpp @@ -261,7 +261,8 @@ Status ScanLocalState::_normalize_conjuncts(RuntimeState* state) { auto& conjunct = *it; if (conjunct->root()) { vectorized::VExprSPtr new_root; - RETURN_IF_ERROR(_normalize_predicate(conjunct.get(), new_root)); + RETURN_IF_ERROR( + _normalize_predicate(conjunct.get(), conjunct->root(), new_root, nullptr)); if (new_root) { conjunct->set_root(new_root); if (_should_push_down_common_expr() && @@ -295,131 +296,170 @@ Status ScanLocalState::_normalize_conjuncts(RuntimeState* state) { template Status ScanLocalState::_normalize_predicate(vectorized::VExprContext* context, - vectorized::VExprSPtr& output_expr) { - const auto expr_root = context->root(); - static constexpr auto is_leaf = [](auto&& expr) { return !expr->is_and_expr(); }; - if (expr_root != nullptr) { - if (is_leaf(expr_root)) { - if (dynamic_cast(expr_root.get())) { - // If the expr has virtual slot ref, we need to keep it in the tree. - output_expr = expr_root; - return Status::OK(); + const vectorized::VExprSPtr& root, + vectorized::VExprSPtr& output_expr, + MutilColumnBlockPredicate* parent) { + auto expr_root = root->is_rf_wrapper() ? root->get_impl() : root; + if (expr_root->node_type() == TExprNodeType::COMPOUND_PRED && + expr_root->op() == TExprOpcode::COMPOUND_OR) { + if (_should_push_down_or_predicate(context) != PushDownType::UNACCEPTABLE) { + std::unique_ptr new_root = + OrBlockColumnPredicate::create_unique(); + DCHECK_GE(expr_root->get_num_children(), 1); + for (auto& child : expr_root->children()) { + vectorized::VExprSPtr tmp = nullptr; + RETURN_IF_ERROR(_normalize_predicate(context, child, tmp, new_root.get())); + DCHECK_NE(tmp, nullptr); } - - SlotDescriptor* slot = nullptr; - ColumnValueRangeType* range = nullptr; - PushDownType pdt = PushDownType::UNACCEPTABLE; - RETURN_IF_ERROR(_eval_const_conjuncts(context, &pdt)); - if (pdt == PushDownType::ACCEPTABLE) { - output_expr = nullptr; - return Status::OK(); + if (parent) { + parent->add_column_predicate(std::move(new_root)); + } else { + _or_predicates.emplace_back(std::move(new_root)); } - std::shared_ptr slotref; - for (const auto& child : expr_root->children()) { - if (vectorized::VExpr::expr_without_cast(child)->node_type() != - TExprNodeType::SLOT_REF) { - // not a slot ref(column) - continue; - } - slotref = std::dynamic_pointer_cast( - vectorized::VExpr::expr_without_cast(child)); + } + } else if (expr_root->node_type() == TExprNodeType::COMPOUND_PRED && + expr_root->op() == TExprOpcode::COMPOUND_AND) { + if (!parent) { + // AndPredicate is illegal on scan operator unless it is a child of OrPredicate + return Status::InternalError( + "And expr must have parent MutilColumnBlockPredicate, but now {}", + expr_root->debug_string()); + } + std::unique_ptr new_root = + AndBlockColumnPredicate::create_unique(); + DCHECK_GE(expr_root->get_num_children(), 1); + for (const auto& child : expr_root->children()) { + vectorized::VExprSPtr tmp = nullptr; + RETURN_IF_ERROR(_normalize_predicate(context, child, tmp, new_root.get())); + } + DCHECK_GE(new_root->num_of_column_predicate(), 1); + parent->add_column_predicate(std::move(new_root)); + } else { + PushDownType pdt = PushDownType::UNACCEPTABLE; + if (dynamic_cast(expr_root.get())) { + // If the expr has virtual slot ref, we need to keep it in the tree. + output_expr = expr_root; + return Status::OK(); + } + + SlotDescriptor* slot = nullptr; + ColumnValueRangeType* range = nullptr; + RETURN_IF_ERROR(_eval_const_conjuncts(context, &pdt)); + if (pdt == PushDownType::ACCEPTABLE) { + output_expr = nullptr; + return Status::OK(); + } + std::shared_ptr slotref; + for (const auto& child : expr_root->children()) { + if (vectorized::VExpr::expr_without_cast(child)->node_type() != + TExprNodeType::SLOT_REF) { + // not a slot ref(column) + continue; } - if (_is_predicate_acting_on_slot(expr_root->children(), &slot, &range)) { - Status status = Status::OK(); - std::visit( - [&](auto& value_range) { - RETURN_IF_PUSH_DOWN( - _normalize_in_and_eq_predicate( - context, slot, _slot_id_to_predicates[slot->id()], - value_range, &pdt), - status); - RETURN_IF_PUSH_DOWN( - _normalize_not_in_and_not_eq_predicate( - context, slot, _slot_id_to_predicates[slot->id()], - value_range, &pdt), - status); - RETURN_IF_PUSH_DOWN( - _normalize_is_null_predicate(context, slot, - _slot_id_to_predicates[slot->id()], - value_range, &pdt), - status); - RETURN_IF_PUSH_DOWN( - _normalize_noneq_binary_predicate( - context, slot, _slot_id_to_predicates[slot->id()], - value_range, &pdt), - status); - RETURN_IF_PUSH_DOWN(_normalize_bitmap_filter( - context, slot, - _slot_id_to_predicates[slot->id()], &pdt), - status); - RETURN_IF_PUSH_DOWN(_normalize_bloom_filter( - context, slot, - _slot_id_to_predicates[slot->id()], &pdt), - status); - RETURN_IF_PUSH_DOWN(_normalize_bloom_filter( - context, slot, - _slot_id_to_predicates[slot->id()], &pdt), - status); - RETURN_IF_PUSH_DOWN(_normalize_topn_filter( - context, slot, - _slot_id_to_predicates[slot->id()], &pdt), + slotref = std::dynamic_pointer_cast( + vectorized::VExpr::expr_without_cast(child)); + } + if (_is_predicate_acting_on_slot(expr_root->children(), &slot, &range)) { + Status status = Status::OK(); + std::visit( + [&](auto& value_range) { + auto r = root; + RETURN_IF_PUSH_DOWN( + _normalize_in_and_eq_predicate(context, r, slot, + _slot_id_to_predicates[slot->id()], + value_range, &pdt, parent), + status); + RETURN_IF_PUSH_DOWN( + _normalize_not_in_and_not_eq_predicate( + context, r, slot, _slot_id_to_predicates[slot->id()], + value_range, &pdt, parent), + status); + RETURN_IF_PUSH_DOWN( + _normalize_is_null_predicate(context, r, slot, + _slot_id_to_predicates[slot->id()], + value_range, &pdt, parent), + status); + RETURN_IF_PUSH_DOWN( + _normalize_noneq_binary_predicate( + context, r, slot, _slot_id_to_predicates[slot->id()], + value_range, &pdt, parent), + status); + RETURN_IF_PUSH_DOWN( + _normalize_bitmap_filter(context, r, slot, + _slot_id_to_predicates[slot->id()], &pdt, + parent), + status); + RETURN_IF_PUSH_DOWN( + _normalize_bloom_filter(context, r, slot, + _slot_id_to_predicates[slot->id()], &pdt, + parent), + status); + RETURN_IF_PUSH_DOWN( + _normalize_topn_filter(context, r, slot, + _slot_id_to_predicates[slot->id()], &pdt, + parent), + status); + if (state()->enable_function_pushdown()) { + RETURN_IF_PUSH_DOWN(_normalize_function_filters(context, slot, &pdt), status); - if (state()->enable_function_pushdown()) { - RETURN_IF_PUSH_DOWN( - _normalize_function_filters(context, slot, &pdt), status); - } - }, - *range); - RETURN_IF_ERROR(status); - } - if (pdt == PushDownType::ACCEPTABLE && slotref != nullptr && - slotref->data_type()->get_primitive_type() == PrimitiveType::TYPE_VARIANT) { - // remaining it in the expr tree, in order to filter by function if the pushdown - // predicate is not applied - output_expr = expr_root; // remaining in conjunct tree - return Status::OK(); - } + } + }, + *range); + RETURN_IF_ERROR(status); + } + if (pdt == PushDownType::ACCEPTABLE && slotref != nullptr && + slotref->data_type()->get_primitive_type() == PrimitiveType::TYPE_VARIANT) { + // remaining it in the expr tree, in order to filter by function if the pushdown + // predicate is not applied + output_expr = expr_root; // remaining in conjunct tree + return Status::OK(); + } - if (pdt == PushDownType::ACCEPTABLE && (_is_key_column(slot->col_name()))) { - output_expr = nullptr; - return Status::OK(); - } else { - // for PARTIAL_ACCEPTABLE and UNACCEPTABLE, do not remove expr from the tree - output_expr = expr_root; - return Status::OK(); - } + if (pdt == PushDownType::ACCEPTABLE && (_is_key_column(slot->col_name()))) { + output_expr = nullptr; + return Status::OK(); } else { - return Status::InternalError("conjunct root should not and expr, but now {}", - expr_root->debug_string()); + // for PARTIAL_ACCEPTABLE and UNACCEPTABLE, do not remove expr from the tree + output_expr = root; + return Status::OK(); } } - output_expr = expr_root; + output_expr = root; return Status::OK(); } template Status ScanLocalState::_normalize_bloom_filter( - vectorized::VExprContext* expr_ctx, SlotDescriptor* slot, - std::vector>& predicates, PushDownType* pdt) { - auto expr = expr_ctx->root()->is_rf_wrapper() ? expr_ctx->root()->get_impl() : expr_ctx->root(); + vectorized::VExprContext* expr_ctx, vectorized::VExprSPtr& root, SlotDescriptor* slot, + std::vector>& predicates, PushDownType* pdt, + MutilColumnBlockPredicate* parent) { + std::shared_ptr pred = nullptr; + Defer defer = [&]() { + if (parent && pred) { + DCHECK(*pdt != PushDownType::UNACCEPTABLE) << root->debug_string(); + parent->add_column_predicate(SingleColumnBlockPredicate::create_unique(pred)); + } else if (pred) { + DCHECK(*pdt != PushDownType::UNACCEPTABLE) << root->debug_string(); + predicates.emplace_back(pred); + } + }; + auto expr = root->is_rf_wrapper() ? root->get_impl() : root; if (TExprNodeType::BLOOM_PRED == expr->node_type()) { DCHECK(expr->get_num_children() == 1); - DCHECK(expr_ctx->root()->is_rf_wrapper()); - PushDownType temp_pdt = _should_push_down_bloom_filter(); - auto* rf_wrapper = assert_cast(expr_ctx->root().get()); - if (temp_pdt != PushDownType::UNACCEPTABLE) { - auto* rf_expr = assert_cast(expr_ctx->root().get()); - predicates.emplace_back( - create_bloom_filter_predicate(slot->id(), - slot->type()->get_primitive_type() == TYPE_VARIANT - ? expr->get_child(0)->data_type() - : slot->type(), - expr->get_bloom_filter_func())); - predicates.back()->attach_profile_counter( - rf_wrapper->filter_id(), rf_wrapper->predicate_filtered_rows_counter(), - rf_wrapper->predicate_input_rows_counter(), - rf_expr->predicate_always_true_rows_counter()); - *pdt = temp_pdt; + DCHECK(root->is_rf_wrapper()); + *pdt = _should_push_down_bloom_filter(); + if (*pdt != PushDownType::UNACCEPTABLE) { + auto* rf_expr = assert_cast(root.get()); + pred = create_bloom_filter_predicate( + _parent->intermediate_row_desc().get_column_id(slot->id()), slot->col_name(), + slot->type()->get_primitive_type() == TYPE_VARIANT + ? expr->get_child(0)->data_type() + : slot->type(), + expr->get_bloom_filter_func()); + pred->attach_profile_counter(rf_expr->filter_id(), + rf_expr->predicate_filtered_rows_counter(), + rf_expr->predicate_input_rows_counter(), + rf_expr->predicate_always_true_rows_counter()); } } return Status::OK(); @@ -427,18 +467,28 @@ Status ScanLocalState::_normalize_bloom_filter( template Status ScanLocalState::_normalize_topn_filter( - vectorized::VExprContext* expr_ctx, SlotDescriptor* slot, - std::vector>& predicates, PushDownType* pdt) { - auto expr = expr_ctx->root()->is_rf_wrapper() ? expr_ctx->root()->get_impl() : expr_ctx->root(); + vectorized::VExprContext* expr_ctx, vectorized::VExprSPtr& root, SlotDescriptor* slot, + std::vector>& predicates, PushDownType* pdt, + MutilColumnBlockPredicate* parent) { + std::shared_ptr pred = nullptr; + Defer defer = [&]() { + if (parent && pred) { + DCHECK(*pdt != PushDownType::UNACCEPTABLE) << root->debug_string(); + parent->add_column_predicate(SingleColumnBlockPredicate::create_unique(pred)); + } else if (pred) { + DCHECK(*pdt != PushDownType::UNACCEPTABLE) << root->debug_string(); + predicates.emplace_back(pred); + } + }; + auto expr = root->is_rf_wrapper() ? root->get_impl() : root; if (expr->is_topn_filter()) { - PushDownType temp_pdt = _should_push_down_topn_filter(); - if (temp_pdt != PushDownType::UNACCEPTABLE) { + *pdt = _should_push_down_topn_filter(); + if (*pdt != PushDownType::UNACCEPTABLE) { auto& p = _parent->cast(); - auto& pred = _state->get_query_ctx()->get_runtime_predicate( + auto& tmp = _state->get_query_ctx()->get_runtime_predicate( assert_cast(expr.get())->source_node_id()); - if (_push_down_topn(pred)) { - predicates.emplace_back(pred.get_predicate(p.node_id())); - *pdt = temp_pdt; + if (_push_down_topn(tmp)) { + pred = tmp.get_predicate(p.node_id()); } } } @@ -447,27 +497,36 @@ Status ScanLocalState::_normalize_topn_filter( template Status ScanLocalState::_normalize_bitmap_filter( - vectorized::VExprContext* expr_ctx, SlotDescriptor* slot, - std::vector>& predicates, PushDownType* pdt) { - auto expr = expr_ctx->root()->is_rf_wrapper() ? expr_ctx->root()->get_impl() : expr_ctx->root(); + vectorized::VExprContext* expr_ctx, vectorized::VExprSPtr& root, SlotDescriptor* slot, + std::vector>& predicates, PushDownType* pdt, + MutilColumnBlockPredicate* parent) { + std::shared_ptr pred = nullptr; + Defer defer = [&]() { + if (parent && pred) { + DCHECK(*pdt != PushDownType::UNACCEPTABLE) << root->debug_string(); + parent->add_column_predicate(SingleColumnBlockPredicate::create_unique(pred)); + } else if (pred) { + DCHECK(*pdt != PushDownType::UNACCEPTABLE) << root->debug_string(); + predicates.emplace_back(pred); + } + }; + auto expr = root->is_rf_wrapper() ? root->get_impl() : root; if (TExprNodeType::BITMAP_PRED == expr->node_type()) { - DCHECK(expr->get_num_children() == 1); - DCHECK(expr_ctx->root()->is_rf_wrapper()); - PushDownType temp_pdt = _should_push_down_bitmap_filter(); - auto* rf_wrapper = assert_cast(expr_ctx->root().get()); - if (temp_pdt != PushDownType::UNACCEPTABLE) { - auto* rf_expr = assert_cast(expr_ctx->root().get()); - predicates.emplace_back(create_bitmap_filter_predicate( - slot->id(), + *pdt = _should_push_down_bitmap_filter(); + if (*pdt != PushDownType::UNACCEPTABLE) { + DCHECK(expr->get_num_children() == 1); + DCHECK(root->is_rf_wrapper()); + auto* rf_expr = assert_cast(root.get()); + pred = create_bitmap_filter_predicate( + _parent->intermediate_row_desc().get_column_id(slot->id()), slot->col_name(), slot->type()->get_primitive_type() == TYPE_VARIANT ? expr->get_child(0)->data_type() : slot->type(), - expr->get_bitmap_filter_func())); - predicates.back()->attach_profile_counter( - rf_wrapper->filter_id(), rf_wrapper->predicate_filtered_rows_counter(), - rf_wrapper->predicate_input_rows_counter(), - rf_expr->predicate_always_true_rows_counter()); - *pdt = temp_pdt; + expr->get_bitmap_filter_func()); + pred->attach_profile_counter(rf_expr->filter_id(), + rf_expr->predicate_filtered_rows_counter(), + rf_expr->predicate_input_rows_counter(), + rf_expr->predicate_always_true_rows_counter()); } } return Status::OK(); @@ -491,7 +550,7 @@ Status ScanLocalState::_normalize_function_filters(vectorized::VExprCon StringRef val; PushDownType temp_pdt; RETURN_IF_ERROR(_should_push_down_function_filter( - reinterpret_cast(fn_expr), expr_ctx, &val, &fn_ctx, + assert_cast(fn_expr), expr_ctx, &val, &fn_ctx, temp_pdt)); if (temp_pdt != PushDownType::UNACCEPTABLE) { std::string col = slot->col_name(); @@ -599,9 +658,19 @@ Status ScanLocalState::_eval_const_conjuncts(vectorized::VExprContext* template template Status ScanLocalState::_normalize_in_and_eq_predicate( - vectorized::VExprContext* expr_ctx, SlotDescriptor* slot, + vectorized::VExprContext* expr_ctx, vectorized::VExprSPtr& root, SlotDescriptor* slot, std::vector>& predicates, ColumnValueRange& range, - PushDownType* pdt) { + PushDownType* pdt, MutilColumnBlockPredicate* parent) { + std::shared_ptr pred = nullptr; + Defer defer = [&]() { + if (parent && pred) { + DCHECK(*pdt != PushDownType::UNACCEPTABLE) << root->debug_string(); + parent->add_column_predicate(SingleColumnBlockPredicate::create_unique(pred)); + } else if (pred) { + DCHECK(*pdt != PushDownType::UNACCEPTABLE) << root->debug_string(); + predicates.emplace_back(pred); + } + }; auto temp_range = ColumnValueRange::create_empty_column_value_range( slot->is_nullable(), range.precision(), range.scale()); @@ -610,9 +679,13 @@ Status ScanLocalState::_normalize_in_and_eq_predicate( return Status::OK(); } - auto expr = expr_ctx->root()->is_rf_wrapper() ? expr_ctx->root()->get_impl() : expr_ctx->root(); + auto expr = root->is_rf_wrapper() ? root->get_impl() : root; // 1. Normalize in conjuncts like 'where col in (v1, v2, v3)' if (TExprNodeType::IN_PRED == expr->node_type()) { + *pdt = _should_push_down_in_predicate(); + if (*pdt == PushDownType::UNACCEPTABLE) { + return Status::OK(); + } HybridSetBase::IteratorBase* iter = nullptr; auto hybrid_set = expr->get_set_func(); @@ -621,34 +694,18 @@ Status ScanLocalState::_normalize_in_and_eq_predicate( if (hybrid_set->size() <= _parent->cast()._max_pushdown_conditions_per_column) { iter = hybrid_set->begin(); - } else { - predicates.emplace_back(create_in_list_predicate( - slot->id(), - slot->type()->get_primitive_type() == TYPE_VARIANT - ? expr->get_child(0)->data_type() - : slot->type(), - expr->get_set_func(), false)); - if (expr_ctx->root()->is_rf_wrapper()) { - auto* rf_wrapper = - assert_cast(expr_ctx->root().get()); - predicates.back()->attach_profile_counter( - rf_wrapper->filter_id(), rf_wrapper->predicate_filtered_rows_counter(), - rf_wrapper->predicate_input_rows_counter(), - rf_wrapper->predicate_always_true_rows_counter()); - } - *pdt = PushDownType::ACCEPTABLE; - return Status::OK(); } } else { // normal in predicate - auto* pred = assert_cast(expr.get()); - if (_should_push_down_in_predicate(pred, false) == PushDownType::UNACCEPTABLE) { + auto* tmp = assert_cast(expr.get()); + if (tmp->is_not_in()) { + *pdt = PushDownType::UNACCEPTABLE; return Status::OK(); } // begin to push InPredicate value into ColumnValueRange auto* state = reinterpret_cast( - expr_ctx->fn_context(pred->fn_context_index()) + expr_ctx->fn_context(tmp->fn_context_index()) ->get_function_state(FunctionContext::FRAGMENT_LOCAL)); // xx in (col, xx, xx) should not be push down @@ -660,37 +717,31 @@ Status ScanLocalState::_normalize_in_and_eq_predicate( iter = state->hybrid_set->begin(); } - while (iter->has_next()) { - // column in (nullptr) is always false so continue to - // dispose next item - DCHECK(iter->get_value() != nullptr); - const auto* value = iter->get_value(); - RETURN_IF_ERROR(_change_value_range( - temp_range, value, ColumnValueRange::add_fixed_value_range, "")); - iter->next(); + if (iter && !parent) { + while (iter->has_next()) { + // column in (nullptr) is always false so continue to + // dispose next item + DCHECK(iter->get_value() != nullptr); + const auto* value = iter->get_value(); + RETURN_IF_ERROR(_change_value_range( + temp_range, value, ColumnValueRange::add_fixed_value_range, "")); + iter->next(); + } + range.intersection(temp_range); } - range.intersection(temp_range); - predicates.emplace_back(create_in_list_predicate( - slot->id(), + pred = create_in_list_predicate( + _parent->intermediate_row_desc().get_column_id(slot->id()), slot->col_name(), slot->type()->get_primitive_type() == TYPE_VARIANT ? expr->get_child(0)->data_type() : slot->type(), - hybrid_set, false)); - *pdt = PushDownType::ACCEPTABLE; + hybrid_set, false); } else if (TExprNodeType::BINARY_PRED == expr->node_type()) { DCHECK(expr->get_num_children() == 2); - auto eq_checker = [](const std::string& fn_name) { return fn_name == "eq"; }; - StringRef value; - int slot_ref_child = -1; - - PushDownType temp_pdt; - RETURN_IF_ERROR(_should_push_down_binary_predicate( - assert_cast(expr.get()), expr_ctx, &value, - &slot_ref_child, eq_checker, temp_pdt)); - if (temp_pdt == PushDownType::UNACCEPTABLE) { + *pdt = _should_push_down_binary_predicate( + assert_cast(expr.get()), expr_ctx, &value, {"eq"}); + if (*pdt == PushDownType::UNACCEPTABLE) { return Status::OK(); } - DCHECK(slot_ref_child >= 0); // where A = nullptr should return empty result set auto fn_name = std::string(""); if (value.data != nullptr) { @@ -700,99 +751,66 @@ Status ScanLocalState::_normalize_in_and_eq_predicate( "PrimitiveType {} meet invalid input value size {}, expect size {}", T, value.size, sizeof(typename PrimitiveTypeTraits::CppType)); } - predicates.emplace_back(create_comparison_predicate0( - slot->id(), + pred = create_comparison_predicate0( + _parent->intermediate_row_desc().get_column_id(slot->id()), slot->col_name(), slot->type()->get_primitive_type() == TYPE_VARIANT ? expr->get_child(0)->data_type() : slot->type(), - value, false, _arena)); + value, false, _arena); - if constexpr (T == TYPE_CHAR || T == TYPE_VARCHAR || T == TYPE_STRING || - T == TYPE_HLL) { - auto val = StringRef(value.data, value.size); - RETURN_IF_ERROR(_change_value_range( - temp_range, reinterpret_cast(&val), - ColumnValueRange::add_fixed_value_range, fn_name)); - } else { - if (sizeof(typename PrimitiveTypeTraits::CppType) != value.size) { - return Status::InternalError( - "PrimitiveType {} meet invalid input value size {}, expect size {}", T, - value.size, sizeof(typename PrimitiveTypeTraits::CppType)); + if (!parent) { + if constexpr (T == TYPE_CHAR || T == TYPE_VARCHAR || T == TYPE_STRING || + T == TYPE_HLL) { + auto val = StringRef(value.data, value.size); + RETURN_IF_ERROR(_change_value_range( + temp_range, reinterpret_cast(&val), + ColumnValueRange::add_fixed_value_range, fn_name)); + } else { + if (sizeof(typename PrimitiveTypeTraits::CppType) != value.size) { + return Status::InternalError( + "PrimitiveType {} meet invalid input value size {}, expect size {}", + T, value.size, sizeof(typename PrimitiveTypeTraits::CppType)); + } + RETURN_IF_ERROR(_change_value_range( + temp_range, reinterpret_cast(value.data), + ColumnValueRange::add_fixed_value_range, fn_name)); } - RETURN_IF_ERROR(_change_value_range( - temp_range, reinterpret_cast(value.data), - ColumnValueRange::add_fixed_value_range, fn_name)); + range.intersection(temp_range); } - range.intersection(temp_range); } else { + *pdt = PushDownType::UNACCEPTABLE; _eos = true; _scan_dependency->set_ready(); } - *pdt = temp_pdt; } return Status::OK(); } -template -Status ScanLocalState::_should_push_down_binary_predicate( - vectorized::VectorizedFnCall* fn_call, vectorized::VExprContext* expr_ctx, - StringRef* constant_val, int* slot_ref_child, - const std::function& fn_checker, PushDownType& pdt) { - if (!fn_checker(fn_call->fn().name.function_name)) { - pdt = PushDownType::UNACCEPTABLE; - return Status::OK(); - } - DCHECK(constant_val->data == nullptr) << "constant_val should not have a value"; - const auto& children = fn_call->children(); - DCHECK(children.size() == 2); - for (int i = 0; i < 2; i++) { - if (vectorized::VExpr::expr_without_cast(children[i])->node_type() != - TExprNodeType::SLOT_REF) { - // not a slot ref(column) - continue; - } - if (!children[1 - i]->is_constant()) { - // only handle constant value - pdt = PushDownType::UNACCEPTABLE; - return Status::OK(); - } else { - std::shared_ptr const_col_wrapper; - RETURN_IF_ERROR(children[1 - i]->get_const_col(expr_ctx, &const_col_wrapper)); - if (const auto* const_column = check_and_get_column( - const_col_wrapper->column_ptr.get())) { - *slot_ref_child = i; - *constant_val = const_column->get_data_at(0); - } else { - pdt = PushDownType::UNACCEPTABLE; - return Status::OK(); - } - } - } - pdt = PushDownType::ACCEPTABLE; - return Status::OK(); -} - -template -PushDownType ScanLocalState::_should_push_down_in_predicate(vectorized::VInPredicate* pred, - bool is_not_in) { - if (pred->is_not_in() != is_not_in) { - return PushDownType::UNACCEPTABLE; - } - return PushDownType::ACCEPTABLE; -} - template template Status ScanLocalState::_normalize_not_in_and_not_eq_predicate( - vectorized::VExprContext* expr_ctx, SlotDescriptor* slot, + vectorized::VExprContext* expr_ctx, vectorized::VExprSPtr& root, SlotDescriptor* slot, std::vector>& predicates, ColumnValueRange& range, - PushDownType* pdt) { + PushDownType* pdt, MutilColumnBlockPredicate* parent) { + std::shared_ptr pred = nullptr; + Defer defer = [&]() { + if (parent && pred) { + DCHECK(*pdt != PushDownType::UNACCEPTABLE) << root->debug_string(); + parent->add_column_predicate(SingleColumnBlockPredicate::create_unique(pred)); + } else if (pred) { + DCHECK(*pdt != PushDownType::UNACCEPTABLE) << root->debug_string(); + predicates.emplace_back(pred); + } + }; bool is_fixed_range = range.is_fixed_value_range(); - PushDownType temp_pdt = PushDownType::UNACCEPTABLE; - auto expr = expr_ctx->root()->is_rf_wrapper() ? expr_ctx->root()->get_impl() : expr_ctx->root(); + auto expr = root->is_rf_wrapper() ? root->get_impl() : root; // 1. Normalize in conjuncts like 'where col in (v1, v2, v3)' if (TExprNodeType::IN_PRED == expr->node_type()) { + *pdt = _should_push_down_in_predicate(); + if (*pdt == PushDownType::UNACCEPTABLE) { + return Status::OK(); + } /// `VDirectInPredicate` here should not be pushed down. /// here means the `VDirectInPredicate` is too big to be converted into `ColumnValueRange`. /// For non-key columns and `_storage_no_merge()` is false, this predicate should not be pushed down. @@ -801,15 +819,15 @@ Status ScanLocalState::_normalize_not_in_and_not_eq_predicate( return Status::OK(); } - auto* pred = assert_cast(expr.get()); - if ((_should_push_down_in_predicate(pred, true)) == PushDownType::UNACCEPTABLE) { + auto* tmp = assert_cast(expr.get()); + if (!tmp->is_not_in()) { *pdt = PushDownType::UNACCEPTABLE; return Status::OK(); } // begin to push InPredicate value into ColumnValueRange auto* state = reinterpret_cast( - expr_ctx->fn_context(pred->fn_context_index()) + expr_ctx->fn_context(tmp->fn_context_index()) ->get_function_state(FunctionContext::FRAGMENT_LOCAL)); // xx in (col, xx, xx) should not be push down @@ -824,35 +842,33 @@ Status ScanLocalState::_normalize_not_in_and_not_eq_predicate( _eos = true; _scan_dependency->set_ready(); } - predicates.emplace_back(create_in_list_predicate( - slot->id(), + pred = create_in_list_predicate( + _parent->intermediate_row_desc().get_column_id(slot->id()), slot->col_name(), slot->type()->get_primitive_type() == TYPE_VARIANT ? expr->get_child(0)->data_type() : slot->type(), - state->hybrid_set, false)); - while (iter->has_next()) { - // column not in (nullptr) is always true - DCHECK(iter->get_value() != nullptr); - const auto value = iter->get_value(); - if (is_fixed_range) { - RETURN_IF_ERROR(_change_value_range( - range, value, ColumnValueRange::remove_fixed_value_range, fn_name)); + state->hybrid_set, false); + if (!parent) { + while (iter->has_next()) { + // column not in (nullptr) is always true + DCHECK(iter->get_value() != nullptr); + const auto value = iter->get_value(); + if (is_fixed_range) { + RETURN_IF_ERROR(_change_value_range( + range, value, ColumnValueRange::remove_fixed_value_range, fn_name)); + } + iter->next(); } - iter->next(); } } else if (TExprNodeType::BINARY_PRED == expr->node_type()) { DCHECK(expr->get_num_children() == 2); - auto ne_checker = [](const std::string& fn_name) { return fn_name == "ne"; }; StringRef value; - int slot_ref_child = -1; - RETURN_IF_ERROR(_should_push_down_binary_predicate( - assert_cast(expr.get()), expr_ctx, &value, - &slot_ref_child, ne_checker, temp_pdt)); - if (temp_pdt == PushDownType::UNACCEPTABLE) { + *pdt = _should_push_down_binary_predicate( + assert_cast(expr.get()), expr_ctx, &value, {"ne"}); + if (*pdt == PushDownType::UNACCEPTABLE) { return Status::OK(); } - DCHECK(slot_ref_child >= 0); // where A = nullptr should return empty result set if (value.data != nullptr) { if (!is_string_type(T) && @@ -861,37 +877,38 @@ Status ScanLocalState::_normalize_not_in_and_not_eq_predicate( "PrimitiveType {} meet invalid input value size {}, expect size {}", T, value.size, sizeof(typename PrimitiveTypeTraits::CppType)); } - predicates.emplace_back(create_comparison_predicate0( - slot->id(), + pred = create_comparison_predicate0( + _parent->intermediate_row_desc().get_column_id(slot->id()), slot->col_name(), slot->type()->get_primitive_type() == TYPE_VARIANT ? expr->get_child(0)->data_type() : slot->type(), - value, false, _arena)); - auto fn_name = std::string(""); - if constexpr (T == TYPE_CHAR || T == TYPE_VARCHAR || T == TYPE_STRING || - T == TYPE_HLL) { - auto val = StringRef(value.data, value.size); - if (is_fixed_range) { - RETURN_IF_ERROR(_change_value_range( - range, reinterpret_cast(&val), - ColumnValueRange::remove_fixed_value_range, fn_name)); - } - } else { - if (is_fixed_range) { - RETURN_IF_ERROR(_change_value_range( - range, reinterpret_cast(value.data), - ColumnValueRange::remove_fixed_value_range, fn_name)); + value, false, _arena); + if (!parent) { + auto fn_name = std::string(""); + if constexpr (T == TYPE_CHAR || T == TYPE_VARCHAR || T == TYPE_STRING || + T == TYPE_HLL) { + auto val = StringRef(value.data, value.size); + if (is_fixed_range) { + RETURN_IF_ERROR(_change_value_range( + range, reinterpret_cast(&val), + ColumnValueRange::remove_fixed_value_range, fn_name)); + } + } else { + if (is_fixed_range) { + RETURN_IF_ERROR(_change_value_range( + range, reinterpret_cast(value.data), + ColumnValueRange::remove_fixed_value_range, fn_name)); + } } } } else { + *pdt = PushDownType::UNACCEPTABLE; _eos = true; _scan_dependency->set_ready(); } } else { *pdt = PushDownType::UNACCEPTABLE; - return Status::OK(); } - *pdt = PushDownType::ACCEPTABLE; return Status::OK(); } @@ -900,8 +917,7 @@ template ::_change_value_range(ColumnValueRange& temp_range, const void* value, const ChangeFixedValueRangeFunc& func, - const std::string& fn_name, - int slot_ref_child) { + const std::string& fn_name) { if constexpr (PrimitiveType == TYPE_DATE) { VecDateTimeValue tmp_value; memcpy(&tmp_value, value, sizeof(VecDateTimeValue)); @@ -917,7 +933,7 @@ Status ScanLocalState::_change_value_range(ColumnValueRange::CppType*>( &tmp_value)); } @@ -927,7 +943,7 @@ Status ScanLocalState::_change_value_range(ColumnValueRange::CppType*>( value)); } else { - func(temp_range, to_olap_filter_type(fn_name, slot_ref_child), + func(temp_range, to_olap_filter_type(fn_name), reinterpret_cast::CppType*>( reinterpret_cast(value))); } @@ -935,7 +951,7 @@ Status ScanLocalState::_change_value_range(ColumnValueRange(value)); } else { - func(temp_range, to_olap_filter_type(fn_name, slot_ref_child), + func(temp_range, to_olap_filter_type(fn_name), reinterpret_cast(value)); } } else if constexpr ((PrimitiveType == TYPE_DECIMALV2) || (PrimitiveType == TYPE_CHAR) || @@ -954,7 +970,7 @@ Status ScanLocalState::_change_value_range(ColumnValueRange::CppType*>( value)); } else { - func(temp_range, to_olap_filter_type(fn_name, slot_ref_child), + func(temp_range, to_olap_filter_type(fn_name), reinterpret_cast::CppType*>( value)); } @@ -968,30 +984,50 @@ Status ScanLocalState::_change_value_range(ColumnValueRange template Status ScanLocalState::_normalize_is_null_predicate( - vectorized::VExprContext* expr_ctx, SlotDescriptor* slot, + vectorized::VExprContext* expr_ctx, vectorized::VExprSPtr& root, SlotDescriptor* slot, std::vector>& predicates, ColumnValueRange& range, - PushDownType* pdt) { - auto expr = expr_ctx->root()->is_rf_wrapper() ? expr_ctx->root()->get_impl() : expr_ctx->root(); - PushDownType temp_pdt = _should_push_down_is_null_predicate(); - if (temp_pdt == PushDownType::UNACCEPTABLE) { + PushDownType* pdt, MutilColumnBlockPredicate* parent) { + std::shared_ptr pred = nullptr; + Defer defer = [&]() { + if (parent && pred) { + DCHECK(*pdt != PushDownType::UNACCEPTABLE) << root->debug_string(); + parent->add_column_predicate(SingleColumnBlockPredicate::create_unique(pred)); + } else if (pred) { + DCHECK(*pdt != PushDownType::UNACCEPTABLE) << root->debug_string(); + predicates.emplace_back(pred); + } + }; + auto expr = root->is_rf_wrapper() ? root->get_impl() : root; + if (auto fn_call = dynamic_cast(expr.get())) { + *pdt = _should_push_down_is_null_predicate(fn_call); + } else { + *pdt = PushDownType::UNACCEPTABLE; + } + + if (*pdt == PushDownType::UNACCEPTABLE) { return Status::OK(); } - if (auto fn_call = dynamic_cast(expr.get())) { - if (fn_call->fn().name.function_name == "is_null_pred") { - predicates.emplace_back(NullPredicate::create_shared(slot->id(), true, T)); + auto fn_call = assert_cast(expr.get()); + if (fn_call->fn().name.function_name == "is_null_pred") { + pred = NullPredicate::create_shared( + _parent->intermediate_row_desc().get_column_id(slot->id()), slot->col_name(), true, + T); + if (!parent) { auto temp_range = ColumnValueRange::create_empty_column_value_range( slot->is_nullable(), range.precision(), range.scale()); temp_range.set_contain_null(true); range.intersection(temp_range); - *pdt = temp_pdt; - } else if (fn_call->fn().name.function_name == "is_not_null_pred") { - predicates.emplace_back(NullPredicate::create_shared(slot->id(), false, T)); + } + } else if (fn_call->fn().name.function_name == "is_not_null_pred") { + pred = NullPredicate::create_shared( + _parent->intermediate_row_desc().get_column_id(slot->id()), slot->col_name(), false, + T); + if (!parent) { auto temp_range = ColumnValueRange::create_empty_column_value_range( slot->is_nullable(), range.precision(), range.scale()); temp_range.set_contain_null(false); range.intersection(temp_range); - *pdt = temp_pdt; } } return Status::OK(); @@ -1000,77 +1036,88 @@ Status ScanLocalState::_normalize_is_null_predicate( template template Status ScanLocalState::_normalize_noneq_binary_predicate( - vectorized::VExprContext* expr_ctx, SlotDescriptor* slot, + vectorized::VExprContext* expr_ctx, vectorized::VExprSPtr& root, SlotDescriptor* slot, std::vector>& predicates, ColumnValueRange& range, - PushDownType* pdt) { - auto expr = expr_ctx->root()->is_rf_wrapper() ? expr_ctx->root()->get_impl() : expr_ctx->root(); + PushDownType* pdt, MutilColumnBlockPredicate* parent) { + std::shared_ptr pred = nullptr; + Defer defer = [&]() { + if (parent && pred) { + DCHECK(*pdt != PushDownType::UNACCEPTABLE) << root->debug_string(); + parent->add_column_predicate(SingleColumnBlockPredicate::create_unique(pred)); + } else if (pred) { + DCHECK(*pdt != PushDownType::UNACCEPTABLE) << root->debug_string(); + predicates.emplace_back(pred); + } + }; + auto expr = root->is_rf_wrapper() ? root->get_impl() : root; if (TExprNodeType::BINARY_PRED == expr->node_type()) { DCHECK(expr->get_num_children() == 2); - auto noneq_checker = [](const std::string& fn_name) { - return fn_name != "ne" && fn_name != "eq" && fn_name != "eq_for_null"; - }; StringRef value; - int slot_ref_child = -1; - PushDownType temp_pdt; - RETURN_IF_ERROR(_should_push_down_binary_predicate( + *pdt = _should_push_down_binary_predicate( assert_cast(expr.get()), expr_ctx, &value, - &slot_ref_child, noneq_checker, temp_pdt)); - if (temp_pdt != PushDownType::UNACCEPTABLE) { - DCHECK(slot_ref_child >= 0); - const std::string& function_name = - assert_cast(expr.get())->fn().name.function_name; - - // where A = nullptr should return empty result set - if (value.data != nullptr) { - if (function_name == "lt") { - predicates.emplace_back(create_comparison_predicate0( - slot->id(), - slot->type()->get_primitive_type() == TYPE_VARIANT - ? expr->get_child(0)->data_type() - : slot->type(), - value, false, _arena)); - } else if (function_name == "gt") { - predicates.emplace_back(create_comparison_predicate0( - slot->id(), - slot->type()->get_primitive_type() == TYPE_VARIANT - ? expr->get_child(0)->data_type() - : slot->type(), - value, false, _arena)); - } else if (function_name == "le") { - predicates.emplace_back(create_comparison_predicate0( - slot->id(), - slot->type()->get_primitive_type() == TYPE_VARIANT - ? expr->get_child(0)->data_type() - : slot->type(), - value, false, _arena)); - } else if (function_name == "ge") { - predicates.emplace_back(create_comparison_predicate0( - slot->id(), - slot->type()->get_primitive_type() == TYPE_VARIANT - ? expr->get_child(0)->data_type() - : slot->type(), - value, false, _arena)); - } else { - throw Exception( - Status::InternalError("Unsupported function name: {}", function_name)); - } + {"lt", "gt", "le", "ge"}); + if (*pdt == PushDownType::UNACCEPTABLE) { + return Status::OK(); + } + const std::string& function_name = + assert_cast(expr.get())->fn().name.function_name; + + // where A = nullptr should return empty result set + if (value.data != nullptr) { + if (function_name == "lt") { + pred = create_comparison_predicate0( + _parent->intermediate_row_desc().get_column_id(slot->id()), + slot->col_name(), + slot->type()->get_primitive_type() == TYPE_VARIANT + ? expr->get_child(0)->data_type() + : slot->type(), + value, false, _arena); + } else if (function_name == "gt") { + pred = create_comparison_predicate0( + _parent->intermediate_row_desc().get_column_id(slot->id()), + slot->col_name(), + slot->type()->get_primitive_type() == TYPE_VARIANT + ? expr->get_child(0)->data_type() + : slot->type(), + value, false, _arena); + } else if (function_name == "le") { + pred = create_comparison_predicate0( + _parent->intermediate_row_desc().get_column_id(slot->id()), + slot->col_name(), + slot->type()->get_primitive_type() == TYPE_VARIANT + ? expr->get_child(0)->data_type() + : slot->type(), + value, false, _arena); + } else if (function_name == "ge") { + pred = create_comparison_predicate0( + _parent->intermediate_row_desc().get_column_id(slot->id()), + slot->col_name(), + slot->type()->get_primitive_type() == TYPE_VARIANT + ? expr->get_child(0)->data_type() + : slot->type(), + value, false, _arena); + } else { + throw Exception( + Status::InternalError("Unsupported function name: {}", function_name)); + } + if (!parent) { if constexpr (T == TYPE_CHAR || T == TYPE_VARCHAR || T == TYPE_STRING || T == TYPE_HLL) { auto val = StringRef(value.data, value.size); RETURN_IF_ERROR(_change_value_range(range, reinterpret_cast(&val), ColumnValueRange::add_value_range, - function_name, slot_ref_child)); + function_name)); } else { RETURN_IF_ERROR(_change_value_range( range, reinterpret_cast(value.data), - ColumnValueRange::add_value_range, function_name, slot_ref_child)); + ColumnValueRange::add_value_range, function_name)); } - *pdt = temp_pdt; - } else { - _eos = true; - _scan_dependency->set_ready(); } + } else { + *pdt = PushDownType::UNACCEPTABLE; + _eos = true; + _scan_dependency->set_ready(); } } return Status::OK(); diff --git a/be/src/pipeline/exec/scan_operator.h b/be/src/pipeline/exec/scan_operator.h index 8a984cedc6cddd..f1f1624e423220 100644 --- a/be/src/pipeline/exec/scan_operator.h +++ b/be/src/pipeline/exec/scan_operator.h @@ -202,18 +202,31 @@ class ScanLocalState : public ScanLocalStateBase { virtual bool _storage_no_merge() { return false; } virtual bool _push_down_topn(const vectorized::RuntimePredicate& predicate) { return false; } virtual bool _is_key_column(const std::string& col_name) { return false; } - virtual PushDownType _should_push_down_bloom_filter() { return PushDownType::UNACCEPTABLE; } - virtual PushDownType _should_push_down_topn_filter() { return PushDownType::UNACCEPTABLE; } - virtual PushDownType _should_push_down_bitmap_filter() { return PushDownType::UNACCEPTABLE; } - virtual PushDownType _should_push_down_is_null_predicate() { + virtual PushDownType _should_push_down_bloom_filter() const { return PushDownType::UNACCEPTABLE; } - Status _should_push_down_binary_predicate( + virtual PushDownType _should_push_down_topn_filter() const { + return PushDownType::UNACCEPTABLE; + } + virtual PushDownType _should_push_down_bitmap_filter() const { + return PushDownType::UNACCEPTABLE; + } + virtual PushDownType _should_push_down_is_null_predicate( + vectorized::VectorizedFnCall* fn_call) const { + return PushDownType::UNACCEPTABLE; + } + virtual PushDownType _should_push_down_in_predicate() const { + return PushDownType::UNACCEPTABLE; + } + virtual PushDownType _should_push_down_or_predicate( + const vectorized::VExprContext* expr_ctx) const { + return PushDownType::UNACCEPTABLE; + } + virtual PushDownType _should_push_down_binary_predicate( vectorized::VectorizedFnCall* fn_call, vectorized::VExprContext* expr_ctx, - StringRef* constant_val, int* slot_ref_child, - const std::function& fn_checker, PushDownType& pdt); - - PushDownType _should_push_down_in_predicate(vectorized::VInPredicate* in_pred, bool is_not_in); + StringRef* constant_val, const std::set fn_name) const { + return PushDownType::UNACCEPTABLE; + } virtual Status _should_push_down_function_filter(vectorized::VectorizedFnCall* fn_call, vectorized::VExprContext* expr_ctx, @@ -234,20 +247,26 @@ class ScanLocalState : public ScanLocalStateBase { } Status _normalize_conjuncts(RuntimeState* state); + // Normalize a conjunct and try to convert it to column predicate recursively. Status _normalize_predicate(vectorized::VExprContext* context, - vectorized::VExprSPtr& output_expr); + const vectorized::VExprSPtr& root, + vectorized::VExprSPtr& output_expr, + MutilColumnBlockPredicate* parent); Status _eval_const_conjuncts(vectorized::VExprContext* expr_ctx, PushDownType* pdt); - Status _normalize_bloom_filter(vectorized::VExprContext* expr_ctx, SlotDescriptor* slot, + Status _normalize_bloom_filter(vectorized::VExprContext* expr_ctx, vectorized::VExprSPtr& root, + SlotDescriptor* slot, std::vector>& predicates, - PushDownType* pdt); - Status _normalize_topn_filter(vectorized::VExprContext* expr_ctx, SlotDescriptor* slot, + PushDownType* pdt, MutilColumnBlockPredicate* parent); + Status _normalize_topn_filter(vectorized::VExprContext* expr_ctx, vectorized::VExprSPtr& root, + SlotDescriptor* slot, std::vector>& predicates, - PushDownType* pdt); + PushDownType* pdt, MutilColumnBlockPredicate* parent); - Status _normalize_bitmap_filter(vectorized::VExprContext* expr_ctx, SlotDescriptor* slot, + Status _normalize_bitmap_filter(vectorized::VExprContext* expr_ctx, vectorized::VExprSPtr& root, + SlotDescriptor* slot, std::vector>& predicates, - PushDownType* pdt); + PushDownType* pdt, MutilColumnBlockPredicate* parent); Status _normalize_function_filters(vectorized::VExprContext* expr_ctx, SlotDescriptor* slot, PushDownType* pdt); @@ -256,29 +275,32 @@ class ScanLocalState : public ScanLocalStateBase { SlotDescriptor** slot_desc, ColumnValueRangeType** range); template - Status _normalize_in_and_eq_predicate(vectorized::VExprContext* expr_ctx, SlotDescriptor* slot, + Status _normalize_in_and_eq_predicate(vectorized::VExprContext* expr_ctx, + vectorized::VExprSPtr& root, SlotDescriptor* slot, std::vector>& predicates, - ColumnValueRange& range, PushDownType* pdt); + ColumnValueRange& range, PushDownType* pdt, + MutilColumnBlockPredicate* parent); template Status _normalize_not_in_and_not_eq_predicate( - vectorized::VExprContext* expr_ctx, SlotDescriptor* slot, + vectorized::VExprContext* expr_ctx, vectorized::VExprSPtr& root, SlotDescriptor* slot, std::vector>& predicates, ColumnValueRange& range, - PushDownType* pdt); + PushDownType* pdt, MutilColumnBlockPredicate* parent); template Status _normalize_noneq_binary_predicate( - vectorized::VExprContext* expr_ctx, SlotDescriptor* slot, + vectorized::VExprContext* expr_ctx, vectorized::VExprSPtr& root, SlotDescriptor* slot, std::vector>& predicates, ColumnValueRange& range, - PushDownType* pdt); + PushDownType* pdt, MutilColumnBlockPredicate* parent); template - Status _normalize_is_null_predicate(vectorized::VExprContext* expr_ctx, SlotDescriptor* slot, + Status _normalize_is_null_predicate(vectorized::VExprContext* expr_ctx, + vectorized::VExprSPtr& root, SlotDescriptor* slot, std::vector>& predicates, - ColumnValueRange& range, PushDownType* pdt); + ColumnValueRange& range, PushDownType* pdt, + MutilColumnBlockPredicate* parent); template Status _change_value_range(ColumnValueRange& range, const void* value, - const ChangeFixedValueRangeFunc& func, const std::string& fn_name, - int slot_ref_child = -1); + const ChangeFixedValueRangeFunc& func, const std::string& fn_name); Status _prepare_scanners(); @@ -313,6 +335,7 @@ class ScanLocalState : public ScanLocalStateBase { // Parsed from conjuncts phmap::flat_hash_map _slot_id_to_value_range; phmap::flat_hash_map>> _slot_id_to_predicates; + std::vector> _or_predicates; std::atomic _eos = false; diff --git a/be/src/runtime/runtime_predicate.cpp b/be/src/runtime/runtime_predicate.cpp index e8ff7a284a7839..40855755fe29d4 100644 --- a/be/src/runtime/runtime_predicate.cpp +++ b/be/src/runtime/runtime_predicate.cpp @@ -68,7 +68,7 @@ Status RuntimePredicate::init_target( slot_id_to_slot_desc[get_texpr(target_node_id).nodes[0].slot_ref.slot_id] ->col_name(); _contexts[target_node_id].predicate = - SharedPredicate::create_shared(cast_set(column_id)); + SharedPredicate::create_shared(cast_set(column_id), ""); } _detected_target = true; return Status::OK(); @@ -178,7 +178,7 @@ StringRef RuntimePredicate::_get_string_ref(const Field& field, const PrimitiveT } throw Exception(ErrorCode::INTERNAL_ERROR, "meet invalid type, type={}", type_to_string(type)); - return StringRef(); + return {}; } bool RuntimePredicate::_init(PrimitiveType type) { @@ -218,8 +218,8 @@ Status RuntimePredicate::update(const Field& value) { const auto& column = *DORIS_TRY(ctx.tablet_schema->column(ctx.col_name)); auto str_ref = _get_string_ref(_orderby_extrem, _type); std::shared_ptr pred = - _pred_constructor(ctx.predicate->column_id(), column.get_vec_type(), str_ref, false, - _predicate_arena); + _pred_constructor(ctx.predicate->column_id(), column.name(), column.get_vec_type(), + str_ref, false, _predicate_arena); // For NULLS FIRST, wrap a AcceptNullPredicate to return true for NULL // since ORDER BY ASC/DESC should get NULL first but pred returns NULL diff --git a/be/src/runtime/runtime_predicate.h b/be/src/runtime/runtime_predicate.h index aa1e52522f8550..1e20bf800e13e8 100644 --- a/be/src/runtime/runtime_predicate.h +++ b/be/src/runtime/runtime_predicate.h @@ -155,8 +155,8 @@ class RuntimePredicate { Field _orderby_extrem {PrimitiveType::TYPE_NULL}; Arena _predicate_arena; std::function( - const int cid, const vectorized::DataTypePtr& data_type, StringRef& value, - bool opposite, vectorized::Arena& arena)> + const int cid, const std::string& col_name, const vectorized::DataTypePtr& data_type, + StringRef& value, bool opposite, vectorized::Arena& arena)> _pred_constructor; bool _detected_source = false; bool _detected_target = false; diff --git a/be/src/vec/exec/format/generic_reader.cpp b/be/src/vec/exec/format/generic_reader.cpp deleted file mode 100644 index 8414dc8599cc28..00000000000000 --- a/be/src/vec/exec/format/generic_reader.cpp +++ /dev/null @@ -1,294 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "vec/exec/format/generic_reader.h" - -#include "olap/predicate_creator.h" -#include "vec/data_types/data_type.h" -#include "vec/data_types/data_type_nullable.h" -#include "vec/data_types/data_type_string.h" -#include "vec/exprs/vruntimefilter_wrapper.h" -#include "vec/exprs/vslot_ref.h" -#include "vec/exprs/vtopn_pred.h" - -namespace doris::vectorized { -#include "common/compile_check_begin.h" - -Status ExprPushDownHelper::_extract_predicates(const VExprSPtr& expr, int& cid, - DataTypePtr& data_type, - std::vector& values, bool null_pred, - bool& parsed) const { - parsed = false; - values.clear(); - if (!expr->children()[0]->is_slot_ref()) [[unlikely]] { - return Status::OK(); - } - const auto* slot_ref = assert_cast(expr->children()[0].get()); - cid = slot_ref->column_id(); - values.reserve(expr->children().size() - 1); - data_type = remove_nullable(slot_ref->data_type()); - if (null_pred) { - DCHECK_EQ(expr->children().size(), 1); - parsed = true; - } - for (size_t child_id = 1; child_id < expr->children().size(); child_id++) { - auto child_expr = expr->children()[child_id]; - if (!child_expr->is_literal()) { - return Status::OK(); - } - const auto* literal = static_cast(child_expr.get()); - if (literal->get_column_ptr()->is_null_at(0)) { - continue; - } - values.emplace_back(literal->get_column_ptr()->get_data_at(0)); - parsed = true; - } - return Status::OK(); -} - -Status ExprPushDownHelper::convert_predicates(const VExprSPtrs& exprs, - std::unique_ptr& root, - Arena& arena) { - if (exprs.empty()) { - return Status::OK(); - } - - int cid; - DataTypePtr data_type; - std::vector values; - bool parsed = false; - for (const auto& expr : exprs) { - cid = -1; - values.clear(); - parsed = false; - switch (expr->node_type()) { - case TExprNodeType::BINARY_PRED: { - RETURN_IF_ERROR(_extract_predicates(expr, cid, data_type, values, false, parsed)); - if (parsed) { - std::shared_ptr predicate; - if (expr->op() == TExprOpcode::EQ) { - predicate = create_comparison_predicate0( - cid, data_type, values[0], false, arena); - } else if (expr->op() == TExprOpcode::NE) { - predicate = create_comparison_predicate0( - cid, data_type, values[0], false, arena); - } else if (expr->op() == TExprOpcode::LT) { - predicate = create_comparison_predicate0( - cid, data_type, values[0], false, arena); - } else if (expr->op() == TExprOpcode::LE) { - predicate = create_comparison_predicate0( - cid, data_type, values[0], false, arena); - } else if (expr->op() == TExprOpcode::GT) { - predicate = create_comparison_predicate0( - cid, data_type, values[0], false, arena); - } else if (expr->op() == TExprOpcode::GE) { - predicate = create_comparison_predicate0( - cid, data_type, values[0], false, arena); - } else { - break; - } - root->add_column_predicate(SingleColumnBlockPredicate::create_unique(predicate)); - } - break; - } - case TExprNodeType::IN_PRED: { - switch (expr->op()) { - case TExprOpcode::FILTER_IN: { - std::shared_ptr set; - RETURN_IF_ERROR(_extract_predicates(expr, cid, data_type, values, false, parsed)); - if (parsed) { - switch (data_type->get_primitive_type()) { -#define BUILD_SET_CASE(PType) \ - case PType: { \ - set = build_set(); \ - break; \ - } - BUILD_SET_CASE(TYPE_TINYINT); - BUILD_SET_CASE(TYPE_SMALLINT); - BUILD_SET_CASE(TYPE_INT); - BUILD_SET_CASE(TYPE_BIGINT); - BUILD_SET_CASE(TYPE_LARGEINT); - BUILD_SET_CASE(TYPE_FLOAT); - BUILD_SET_CASE(TYPE_DOUBLE); - BUILD_SET_CASE(TYPE_CHAR); - BUILD_SET_CASE(TYPE_STRING); - BUILD_SET_CASE(TYPE_DATE); - BUILD_SET_CASE(TYPE_DATETIME); - BUILD_SET_CASE(TYPE_DATEV2); - BUILD_SET_CASE(TYPE_DATETIMEV2); - BUILD_SET_CASE(TYPE_BOOLEAN); - BUILD_SET_CASE(TYPE_IPV4); - BUILD_SET_CASE(TYPE_IPV6); - BUILD_SET_CASE(TYPE_DECIMALV2); - BUILD_SET_CASE(TYPE_DECIMAL32); - BUILD_SET_CASE(TYPE_DECIMAL64); - BUILD_SET_CASE(TYPE_DECIMAL128I); - BUILD_SET_CASE(TYPE_DECIMAL256); - case TYPE_VARCHAR: { - set = build_set(); - break; - } -#undef BUILD_SET_CASE - default: - throw Exception(Status::Error( - "unsupported data type in delete handler. type={}", - type_to_string(data_type->get_primitive_type()))); - } - if (is_string_type(data_type->get_primitive_type())) { - for (size_t i = 0; i < values.size(); i++) { - set->insert(reinterpret_cast(&values[i])); - } - } else { - for (size_t i = 0; i < values.size(); i++) { - set->insert(reinterpret_cast(values[i].data)); - } - } - root->add_column_predicate(SingleColumnBlockPredicate::create_unique( - create_in_list_predicate(cid, data_type, set, - false))); - } - break; - } - default: { - break; - } - } - break; - } - case TExprNodeType::COMPOUND_PRED: { - switch (expr->op()) { - case TExprOpcode::COMPOUND_AND: { - for (const auto& child : expr->children()) { - RETURN_IF_ERROR(convert_predicates({child}, root, arena)); - } - break; - } - case TExprOpcode::COMPOUND_OR: { - std::unique_ptr new_root = - OrBlockColumnPredicate::create_unique(); - for (const auto& child : expr->children()) { - RETURN_IF_ERROR(convert_predicates({child}, new_root, arena)); - } - root->add_column_predicate(std::move(new_root)); - break; - } - default: { - break; - } - } - break; - } - case TExprNodeType::FUNCTION_CALL: { - auto fn_name = expr->fn().name.function_name; - // only support `is null` and `is not null` - if (fn_name == "is_null_pred" || fn_name == "is_not_null_pred") { - RETURN_IF_ERROR(_extract_predicates(expr, cid, data_type, values, true, parsed)); - if (parsed) { - root->add_column_predicate(SingleColumnBlockPredicate::create_unique( - NullPredicate::create_shared(cid, true, data_type->get_primitive_type(), - fn_name == "is_not_null_pred"))); - } - } - break; - } - default: - break; - } - } - - return Status::OK(); -} - -bool ExprPushDownHelper::check_expr_can_push_down(const VExprSPtr& expr) const { - if (expr == nullptr) { - return false; - } - - switch (expr->node_type()) { - case TExprNodeType::BINARY_PRED: - case TExprNodeType::IN_PRED: { - switch (expr->op()) { - case TExprOpcode::GE: - case TExprOpcode::GT: - case TExprOpcode::LE: - case TExprOpcode::LT: - case TExprOpcode::EQ: - case TExprOpcode::FILTER_IN: - return _check_slot_can_push_down(expr) && _check_other_children_is_literal(expr); - default: { - return false; - } - } - } - case TExprNodeType::COMPOUND_PRED: { - switch (expr->op()) { - case TExprOpcode::COMPOUND_AND: { - // at least one child can be pushed down - return std::ranges::any_of(expr->children(), [this](const auto& child) { - return check_expr_can_push_down(child); - }); - } - case TExprOpcode::COMPOUND_OR: { - // all children must be pushed down - return std::ranges::all_of(expr->children(), [this](const auto& child) { - return check_expr_can_push_down(child); - }); - } - default: { - return false; - } - } - } - case TExprNodeType::FUNCTION_CALL: { - auto fn_name = expr->fn().name.function_name; - // only support `is null` and `is not null` - if (fn_name == "is_null_pred" || fn_name == "is_not_null_pred") { - return _check_slot_can_push_down(expr); - } - return false; - } - default: { - return false; - } - } -} - -bool ExprPushDownHelper::_check_slot_can_push_down(const VExprSPtr& expr) const { - if (!expr->children()[0]->is_slot_ref()) { - return false; - } - - const auto* slot_ref = assert_cast(expr->children()[0].get()); - // check if the slot exists in parquet file. - if (!_exists_in_file(slot_ref)) { - return false; - } - return _type_matches(slot_ref); -} - -bool ExprPushDownHelper::_check_other_children_is_literal(const VExprSPtr& expr) const { - for (size_t child_id = 1; child_id < expr->children().size(); child_id++) { - auto child_expr = expr->children()[child_id]; - if (!child_expr->is_literal()) { - return false; - } - } - return true; -} - -#include "common/compile_check_end.h" -} // namespace doris::vectorized diff --git a/be/src/vec/exec/format/generic_reader.h b/be/src/vec/exec/format/generic_reader.h index a582ccc2b24ef1..620112a71e7999 100644 --- a/be/src/vec/exec/format/generic_reader.h +++ b/be/src/vec/exec/format/generic_reader.h @@ -112,24 +112,5 @@ class GenericReader : public ProfileCollector { FileMetaCache* _meta_cache = nullptr; }; -class ExprPushDownHelper { -public: - ExprPushDownHelper() = default; - virtual ~ExprPushDownHelper() = default; - bool check_expr_can_push_down(const VExprSPtr& expr) const; - Status convert_predicates(const VExprSPtrs& exprs, - std::unique_ptr& root, Arena& arena); - -protected: - virtual bool _exists_in_file(const VSlotRef*) const = 0; - virtual bool _type_matches(const VSlotRef*) const = 0; - -private: - bool _check_slot_can_push_down(const VExprSPtr& expr) const; - bool _check_other_children_is_literal(const VExprSPtr& expr) const; - Status _extract_predicates(const VExprSPtr& expr, int& cid, DataTypePtr& data_type, - std::vector& values, bool null_pred, bool& parsed) const; -}; - #include "common/compile_check_end.h" } // namespace doris::vectorized diff --git a/be/src/vec/exec/format/parquet/vparquet_group_reader.h b/be/src/vec/exec/format/parquet/vparquet_group_reader.h index d47fa87becee4d..0ea390c9bff86b 100644 --- a/be/src/vec/exec/format/parquet/vparquet_group_reader.h +++ b/be/src/vec/exec/format/parquet/vparquet_group_reader.h @@ -91,6 +91,9 @@ class RowGroupReader : public ProfileCollector { fill_partition_columns; std::unordered_map fill_missing_columns; + phmap::flat_hash_map>> + slot_id_to_predicates; + std::vector> or_predicates; bool can_lazy_read = false; // block->rows() returns the number of rows of the first column, // so we should check and resize the first column diff --git a/be/src/vec/exec/format/parquet/vparquet_reader.cpp b/be/src/vec/exec/format/parquet/vparquet_reader.cpp index 75f7365dade572..21a49673f0837f 100644 --- a/be/src/vec/exec/format/parquet/vparquet_reader.cpp +++ b/be/src/vec/exec/format/parquet/vparquet_reader.cpp @@ -326,8 +326,11 @@ void ParquetReader::_init_file_description() { Status ParquetReader::init_reader( const std::vector& all_column_names, std::unordered_map* col_name_to_block_idx, - const VExprContextSPtrs& conjuncts, const TupleDescriptor* tuple_descriptor, - const RowDescriptor* row_descriptor, + const VExprContextSPtrs& conjuncts, + phmap::flat_hash_map>>& + slot_id_to_predicates, + std::vector>& or_predicates, + const TupleDescriptor* tuple_descriptor, const RowDescriptor* row_descriptor, const std::unordered_map* colname_to_slot_id, const VExprContextSPtrs* not_single_slot_filter_conjuncts, const std::unordered_map* slot_id_to_filter_conjuncts, @@ -380,21 +383,23 @@ Status ParquetReader::init_reader( } // build column predicates for column lazy read _lazy_read_ctx.conjuncts = conjuncts; + _lazy_read_ctx.slot_id_to_predicates = slot_id_to_predicates; + _lazy_read_ctx.or_predicates = or_predicates; return Status::OK(); } -bool ParquetReader::_exists_in_file(const VSlotRef* slot_ref) const { +bool ParquetReader::_exists_in_file(const std::string& expr_name) const { // `_read_table_columns_set` is used to ensure that only columns actually read are subject to min-max filtering. // This primarily handles cases where partition columns also exist in a file. The reason it's not modified // in `_table_info_node_ptr` is that Iceberg、Hudi has inconsistent requirements for this node; // Iceberg partition evolution need read partition columns from a file. // hudi set `hoodie.datasource.write.drop.partition.columns=false` not need read partition columns from a file. - return _table_info_node_ptr->children_column_exists(slot_ref->expr_name()) && - _read_table_columns_set.contains(slot_ref->expr_name()); + return _table_info_node_ptr->children_column_exists(expr_name) && + _read_table_columns_set.contains(expr_name); } -bool ParquetReader::_type_matches(const VSlotRef* slot_ref) const { - auto* slot = _tuple_descriptor->slots()[slot_ref->column_id()]; +bool ParquetReader::_type_matches(const int cid) const { + auto* slot = _tuple_descriptor->slots()[cid]; auto table_col_type = remove_nullable(slot->type()); const auto& file_col_name = _table_info_node_ptr->children_file_column_name(slot->col_name()); @@ -412,11 +417,12 @@ Status ParquetReader::_update_lazy_read_ctx(const VExprContextSPtrs& new_conjunc new_lazy_read_ctx.fill_missing_columns = std::move(_lazy_read_ctx.fill_missing_columns); _lazy_read_ctx = std::move(new_lazy_read_ctx); - _top_runtime_vexprs.clear(); _push_down_predicates.clear(); // std::unordered_map> std::unordered_map> predicate_columns; + + // TODO(gabriel): we should try to clear too much structs which are used to represent conjuncts and predicates. // visit_slot for lazy mat. std::function visit_slot = [&](VExpr* expr) { if (expr->is_slot_ref()) { @@ -466,31 +472,26 @@ Status ParquetReader::_update_lazy_read_ctx(const VExprContextSPtrs& new_conjunc VExprSPtr new_in_slot = nullptr; if (direct_in_predicate->get_slot_in_expr(new_in_slot)) { expr = new_in_slot; - } else { - continue; } - } else { - continue; - } - } else if (VTopNPred* topn_pred = typeid_cast(expr.get())) { - // top runtime filter : only le && ge. - DCHECK(topn_pred->children().size() > 0); - visit_slot(topn_pred->children()[0].get()); - - if (topn_pred->children()[0]->is_slot_ref()) { - // can min-max filter row group and page index. - // Since the filtering conditions for topn are dynamic, the filtering is - // delayed until create next row group reader. - _top_runtime_vexprs.emplace_back(expr); } - continue; - } else { + } else if (VTopNPred* topn_pred = typeid_cast(expr.get()); + topn_pred == nullptr) { visit_slot(expr.get()); } - - if (check_expr_can_push_down(expr)) { - _push_down_predicates.push_back(AndBlockColumnPredicate::create_unique()); - RETURN_IF_ERROR(convert_predicates({expr}, _push_down_predicates.back(), _arena)); + } + if (!_lazy_read_ctx.slot_id_to_predicates.empty()) { + auto and_pred = AndBlockColumnPredicate::create_unique(); + for (const auto& entry : _lazy_read_ctx.slot_id_to_predicates) { + for (const auto& pred : entry.second) { + if (!_exists_in_file(pred->col_name()) || !_type_matches(pred->column_id())) { + continue; + } + and_pred->add_column_predicate( + SingleColumnBlockPredicate::create_unique(pred->clone(pred->column_id()))); + } + } + if (and_pred->num_of_column_predicate() > 0) { + _push_down_predicates.push_back(std::move(and_pred)); } } @@ -719,28 +720,11 @@ Status ParquetReader::_next_row_group_reader() { RETURN_IF_ERROR(_update_lazy_read_ctx(new_push_down_conjuncts)); } - size_t before_predicate_size = _push_down_predicates.size(); - _push_down_predicates.reserve(before_predicate_size + _top_runtime_vexprs.size()); - for (const auto& vexpr : _top_runtime_vexprs) { - VTopNPred* topn_pred = assert_cast(vexpr.get()); - VExprSPtr binary_expr; - if (topn_pred->get_binary_expr(binary_expr)) { - // for min-max filter. - if (check_expr_can_push_down(binary_expr)) { - _push_down_predicates.push_back(AndBlockColumnPredicate::create_unique()); - RETURN_IF_ERROR(convert_predicates({binary_expr}, _push_down_predicates.back(), - _arena)); - } - } - } - candidate_row_ranges.clear(); // The range of lines to be read is determined by the push down predicate. RETURN_IF_ERROR(_process_min_max_bloom_filter( _current_row_group_index, row_group, _push_down_predicates, &candidate_row_ranges)); - _push_down_predicates.resize(before_predicate_size); - std::function column_compressed_size = [&row_group, &column_compressed_size](const FieldSchema* field) -> int64_t { if (field->physical_column_index >= 0) { diff --git a/be/src/vec/exec/format/parquet/vparquet_reader.h b/be/src/vec/exec/format/parquet/vparquet_reader.h index e5c94f6f3af6bd..d6f16d595b8931 100644 --- a/be/src/vec/exec/format/parquet/vparquet_reader.h +++ b/be/src/vec/exec/format/parquet/vparquet_reader.h @@ -70,7 +70,7 @@ class VExprContext; namespace doris::vectorized { #include "common/compile_check_begin.h" -class ParquetReader : public GenericReader, public ExprPushDownHelper { +class ParquetReader : public GenericReader { ENABLE_FACTORY_CREATOR(ParquetReader); public: @@ -118,8 +118,11 @@ class ParquetReader : public GenericReader, public ExprPushDownHelper { Status init_reader( const std::vector& all_column_names, std::unordered_map* col_name_to_block_idx, - const VExprContextSPtrs& conjuncts, const TupleDescriptor* tuple_descriptor, - const RowDescriptor* row_descriptor, + const VExprContextSPtrs& conjuncts, + phmap::flat_hash_map>>& + slot_id_to_predicates, + std::vector>& or_predicates, + const TupleDescriptor* tuple_descriptor, const RowDescriptor* row_descriptor, const std::unordered_map* colname_to_slot_id, const VExprContextSPtrs* not_single_slot_filter_conjuncts, const std::unordered_map* slot_id_to_filter_conjuncts, @@ -257,8 +260,8 @@ class ParquetReader : public GenericReader, public ExprPushDownHelper { Status _set_read_one_line_impl() override { return Status::OK(); } - bool _exists_in_file(const VSlotRef* slot) const override; - bool _type_matches(const VSlotRef*) const override; + bool _exists_in_file(const std::string& expr_name) const; + bool _type_matches(const int cid) const; // update lazy read context when runtime filter changed Status _update_lazy_read_ctx(const VExprContextSPtrs& new_conjuncts); @@ -348,7 +351,6 @@ class ParquetReader : public GenericReader, public ExprPushDownHelper { std::unordered_map* _col_name_to_block_idx = nullptr; // Since the filtering conditions for topn are dynamic, the filtering is delayed until create next row group reader. - VExprSPtrs _top_runtime_vexprs; std::vector> _push_down_predicates; Arena _arena; diff --git a/be/src/vec/exec/format/table/hive_reader.cpp b/be/src/vec/exec/format/table/hive_reader.cpp index ac004230bd0aab..1af8479668357e 100644 --- a/be/src/vec/exec/format/table/hive_reader.cpp +++ b/be/src/vec/exec/format/table/hive_reader.cpp @@ -213,8 +213,11 @@ ColumnIdResult HiveOrcReader::_create_column_ids_by_top_level_col_index( Status HiveParquetReader::init_reader( const std::vector& read_table_col_names, std::unordered_map* col_name_to_block_idx, - const VExprContextSPtrs& conjuncts, const TupleDescriptor* tuple_descriptor, - const RowDescriptor* row_descriptor, + const VExprContextSPtrs& conjuncts, + phmap::flat_hash_map>>& + slot_id_to_predicates, + std::vector>& or_predicates, + const TupleDescriptor* tuple_descriptor, const RowDescriptor* row_descriptor, const std::unordered_map* colname_to_slot_id, const VExprContextSPtrs* not_single_slot_filter_conjuncts, const std::unordered_map* slot_id_to_filter_conjuncts) { @@ -285,9 +288,10 @@ Status HiveParquetReader::init_reader( RETURN_IF_ERROR(init_row_filters()); return parquet_reader->init_reader( - read_table_col_names, col_name_to_block_idx, conjuncts, tuple_descriptor, - row_descriptor, colname_to_slot_id, not_single_slot_filter_conjuncts, - slot_id_to_filter_conjuncts, table_info_node_ptr, true, column_ids, filter_column_ids); + read_table_col_names, col_name_to_block_idx, conjuncts, slot_id_to_predicates, + or_predicates, tuple_descriptor, row_descriptor, colname_to_slot_id, + not_single_slot_filter_conjuncts, slot_id_to_filter_conjuncts, table_info_node_ptr, + true, column_ids, filter_column_ids); } ColumnIdResult HiveParquetReader::_create_column_ids(const FieldDescriptor* field_desc, diff --git a/be/src/vec/exec/format/table/hive_reader.h b/be/src/vec/exec/format/table/hive_reader.h index a47ac8164eb486..8a3385b1d626c8 100644 --- a/be/src/vec/exec/format/table/hive_reader.h +++ b/be/src/vec/exec/format/table/hive_reader.h @@ -88,8 +88,11 @@ class HiveParquetReader final : public HiveReader { Status init_reader( const std::vector& read_table_col_names, std::unordered_map* col_name_to_block_idx, - const VExprContextSPtrs& conjuncts, const TupleDescriptor* tuple_descriptor, - const RowDescriptor* row_descriptor, + const VExprContextSPtrs& conjuncts, + phmap::flat_hash_map>>& + slot_id_to_predicates, + std::vector>& or_predicates, + const TupleDescriptor* tuple_descriptor, const RowDescriptor* row_descriptor, const std::unordered_map* colname_to_slot_id, const VExprContextSPtrs* not_single_slot_filter_conjuncts, const std::unordered_map* slot_id_to_filter_conjuncts); diff --git a/be/src/vec/exec/format/table/hudi_reader.cpp b/be/src/vec/exec/format/table/hudi_reader.cpp index d7bd32ae4987e7..9a2b708d72af9a 100644 --- a/be/src/vec/exec/format/table/hudi_reader.cpp +++ b/be/src/vec/exec/format/table/hudi_reader.cpp @@ -33,8 +33,11 @@ Status HudiReader::get_next_block_inner(Block* block, size_t* read_rows, bool* e Status HudiParquetReader::init_reader( const std::vector& read_table_col_names, std::unordered_map* col_name_to_block_idx, - const VExprContextSPtrs& conjuncts, const TupleDescriptor* tuple_descriptor, - const RowDescriptor* row_descriptor, + const VExprContextSPtrs& conjuncts, + phmap::flat_hash_map>>& + slot_id_to_predicates, + std::vector>& or_predicates, + const TupleDescriptor* tuple_descriptor, const RowDescriptor* row_descriptor, const std::unordered_map* colname_to_slot_id, const VExprContextSPtrs* not_single_slot_filter_conjuncts, const std::unordered_map* slot_id_to_filter_conjuncts) { @@ -47,10 +50,10 @@ Status HudiParquetReader::init_reader( RETURN_IF_ERROR(gen_table_info_node_by_field_id( _params, _range.table_format_params.hudi_params.schema_id, tuple_descriptor, *field_desc)); - return parquet_reader->init_reader(read_table_col_names, col_name_to_block_idx, conjuncts, - tuple_descriptor, row_descriptor, colname_to_slot_id, - not_single_slot_filter_conjuncts, - slot_id_to_filter_conjuncts, table_info_node_ptr); + return parquet_reader->init_reader( + read_table_col_names, col_name_to_block_idx, conjuncts, slot_id_to_predicates, + or_predicates, tuple_descriptor, row_descriptor, colname_to_slot_id, + not_single_slot_filter_conjuncts, slot_id_to_filter_conjuncts, table_info_node_ptr); } #include "common/compile_check_end.h" diff --git a/be/src/vec/exec/format/table/hudi_reader.h b/be/src/vec/exec/format/table/hudi_reader.h index ff15eb78c933ae..08d9035612b055 100644 --- a/be/src/vec/exec/format/table/hudi_reader.h +++ b/be/src/vec/exec/format/table/hudi_reader.h @@ -51,8 +51,11 @@ class HudiParquetReader final : public HudiReader { Status init_reader( const std::vector& read_table_col_names, std::unordered_map* col_name_to_block_idx, - const VExprContextSPtrs& conjuncts, const TupleDescriptor* tuple_descriptor, - const RowDescriptor* row_descriptor, + const VExprContextSPtrs& conjuncts, + phmap::flat_hash_map>>& + slot_id_to_predicates, + std::vector>& or_predicates, + const TupleDescriptor* tuple_descriptor, const RowDescriptor* row_descriptor, const std::unordered_map* colname_to_slot_id, const VExprContextSPtrs* not_single_slot_filter_conjuncts, const std::unordered_map* slot_id_to_filter_conjuncts); diff --git a/be/src/vec/exec/format/table/iceberg_reader.cpp b/be/src/vec/exec/format/table/iceberg_reader.cpp index 7a6f0fcb0e028c..8b9800a7ab3dde 100644 --- a/be/src/vec/exec/format/table/iceberg_reader.cpp +++ b/be/src/vec/exec/format/table/iceberg_reader.cpp @@ -177,10 +177,12 @@ Status IcebergTableReader::_equality_delete_base( init_schema = true; } if (auto* parquet_reader = typeid_cast(delete_reader.get())) { + phmap::flat_hash_map>> tmp; + std::vector> or_predicates; RETURN_IF_ERROR(parquet_reader->init_reader( - equality_delete_col_names, &delete_col_name_to_block_idx, {}, nullptr, nullptr, - nullptr, nullptr, nullptr, TableSchemaChangeHelper::ConstNode::get_instance(), - false)); + equality_delete_col_names, &delete_col_name_to_block_idx, {}, tmp, + or_predicates, nullptr, nullptr, nullptr, nullptr, nullptr, + TableSchemaChangeHelper::ConstNode::get_instance(), false)); } else if (auto* orc_reader = typeid_cast(delete_reader.get())) { RETURN_IF_ERROR(orc_reader->init_reader(&equality_delete_col_names, &delete_col_name_to_block_idx, {}, false, {}, @@ -443,8 +445,11 @@ void IcebergTableReader::_gen_position_delete_file_range(Block& block, DeleteFil Status IcebergParquetReader::init_reader( const std::vector& file_col_names, std::unordered_map* col_name_to_block_idx, - const VExprContextSPtrs& conjuncts, const TupleDescriptor* tuple_descriptor, - const RowDescriptor* row_descriptor, + const VExprContextSPtrs& conjuncts, + phmap::flat_hash_map>>& + slot_id_to_predicates, + std::vector>& or_predicates, + const TupleDescriptor* tuple_descriptor, const RowDescriptor* row_descriptor, const std::unordered_map* colname_to_slot_id, const VExprContextSPtrs* not_single_slot_filter_conjuncts, const std::unordered_map* slot_id_to_filter_conjuncts) { @@ -487,9 +492,10 @@ Status IcebergParquetReader::init_reader( } } return parquet_reader->init_reader( - _all_required_col_names, _col_name_to_block_idx, conjuncts, tuple_descriptor, - row_descriptor, colname_to_slot_id, not_single_slot_filter_conjuncts, - slot_id_to_filter_conjuncts, table_info_node_ptr, true, column_ids, filter_column_ids); + _all_required_col_names, _col_name_to_block_idx, conjuncts, slot_id_to_predicates, + or_predicates, tuple_descriptor, row_descriptor, colname_to_slot_id, + not_single_slot_filter_conjuncts, slot_id_to_filter_conjuncts, table_info_node_ptr, + true, column_ids, filter_column_ids); } ColumnIdResult IcebergParquetReader::_create_column_ids(const FieldDescriptor* field_desc, @@ -559,10 +565,12 @@ Status IcebergParquetReader ::_read_position_delete_file(const TFileRangeDesc* d ParquetReader parquet_delete_reader(_profile, _params, *delete_range, READ_DELETE_FILE_BATCH_SIZE, &_state->timezone_obj(), _io_ctx, _state, _meta_cache); + phmap::flat_hash_map>> tmp; + std::vector> or_predicates; RETURN_IF_ERROR(parquet_delete_reader.init_reader( delete_file_col_names, const_cast*>(&DELETE_COL_NAME_TO_BLOCK_IDX), - {}, nullptr, nullptr, nullptr, nullptr, nullptr, + {}, tmp, or_predicates, nullptr, nullptr, nullptr, nullptr, nullptr, TableSchemaChangeHelper::ConstNode::get_instance(), false)); std::unordered_map> diff --git a/be/src/vec/exec/format/table/iceberg_reader.h b/be/src/vec/exec/format/table/iceberg_reader.h index 56d04cbab82f34..29f41ef714a1a7 100644 --- a/be/src/vec/exec/format/table/iceberg_reader.h +++ b/be/src/vec/exec/format/table/iceberg_reader.h @@ -173,8 +173,11 @@ class IcebergParquetReader final : public IcebergTableReader { Status init_reader( const std::vector& file_col_names, std::unordered_map* col_name_to_block_idx, - const VExprContextSPtrs& conjuncts, const TupleDescriptor* tuple_descriptor, - const RowDescriptor* row_descriptor, + const VExprContextSPtrs& conjuncts, + phmap::flat_hash_map>>& + slot_id_to_predicates, + std::vector>& or_predicates, + const TupleDescriptor* tuple_descriptor, const RowDescriptor* row_descriptor, const std::unordered_map* colname_to_slot_id, const VExprContextSPtrs* not_single_slot_filter_conjuncts, const std::unordered_map* slot_id_to_filter_conjuncts); diff --git a/be/src/vec/exec/format/table/paimon_reader.h b/be/src/vec/exec/format/table/paimon_reader.h index d5e2ec5a35da42..734c98c20edabd 100644 --- a/be/src/vec/exec/format/table/paimon_reader.h +++ b/be/src/vec/exec/format/table/paimon_reader.h @@ -104,8 +104,11 @@ class PaimonParquetReader final : public PaimonReader { Status init_reader( const std::vector& read_table_col_names, std::unordered_map* col_name_to_block_idx, - const VExprContextSPtrs& conjuncts, const TupleDescriptor* tuple_descriptor, - const RowDescriptor* row_descriptor, + const VExprContextSPtrs& conjuncts, + phmap::flat_hash_map>>& + slot_id_to_predicates, + std::vector>& or_predicates, + const TupleDescriptor* tuple_descriptor, const RowDescriptor* row_descriptor, const std::unordered_map* colname_to_slot_id, const VExprContextSPtrs* not_single_slot_filter_conjuncts, const std::unordered_map* slot_id_to_filter_conjuncts) { @@ -119,10 +122,10 @@ class PaimonParquetReader final : public PaimonReader { _params, _range.table_format_params.paimon_params.schema_id, tuple_descriptor, *field_desc)); - return parquet_reader->init_reader(read_table_col_names, col_name_to_block_idx, conjuncts, - tuple_descriptor, row_descriptor, colname_to_slot_id, - not_single_slot_filter_conjuncts, - slot_id_to_filter_conjuncts, table_info_node_ptr); + return parquet_reader->init_reader( + read_table_col_names, col_name_to_block_idx, conjuncts, slot_id_to_predicates, + or_predicates, tuple_descriptor, row_descriptor, colname_to_slot_id, + not_single_slot_filter_conjuncts, slot_id_to_filter_conjuncts, table_info_node_ptr); } }; #include "common/compile_check_end.h" diff --git a/be/src/vec/exec/scan/file_scanner.cpp b/be/src/vec/exec/scan/file_scanner.cpp index 5a65b380c0e784..414620b0cbe15b 100644 --- a/be/src/vec/exec/scan/file_scanner.cpp +++ b/be/src/vec/exec/scan/file_scanner.cpp @@ -40,6 +40,7 @@ #include "exec/rowid_fetcher.h" #include "io/cache/block_file_cache_profile.h" #include "olap/rowset/segment_v2/column_reader.h" +#include "pipeline/exec/file_scan_operator.h" #include "runtime/descriptors.h" #include "runtime/runtime_state.h" #include "util/runtime_profile.h" @@ -1213,15 +1214,23 @@ Status FileScanner::_init_parquet_reader(std::unique_ptr&& parque const TFileRangeDesc& range = _current_range; Status init_status = Status::OK(); + phmap::flat_hash_map>> slot_id_to_predicates = + _local_state + ? _local_state->cast()._slot_id_to_predicates + : phmap::flat_hash_map>> {}; + std::vector> or_predicates = + _local_state ? _local_state->cast()._or_predicates + : std::vector> {}; if (range.__isset.table_format_params && range.table_format_params.table_format_type == "iceberg") { std::unique_ptr iceberg_reader = IcebergParquetReader::create_unique( std::move(parquet_reader), _profile, _state, *_params, range, _kv_cache, _io_ctx.get(), file_meta_cache_ptr); init_status = iceberg_reader->init_reader( - _file_col_names, &_src_block_name_to_idx, _push_down_conjuncts, _real_tuple_desc, - _default_val_row_desc.get(), _col_name_to_slot_id, - &_not_single_slot_filter_conjuncts, &_slot_id_to_filter_conjuncts); + _file_col_names, &_src_block_name_to_idx, _push_down_conjuncts, + slot_id_to_predicates, or_predicates, _real_tuple_desc, _default_val_row_desc.get(), + _col_name_to_slot_id, &_not_single_slot_filter_conjuncts, + &_slot_id_to_filter_conjuncts); _cur_reader = std::move(iceberg_reader); } else if (range.__isset.table_format_params && range.table_format_params.table_format_type == "paimon") { @@ -1229,9 +1238,10 @@ Status FileScanner::_init_parquet_reader(std::unique_ptr&& parque std::move(parquet_reader), _profile, _state, *_params, range, _io_ctx.get(), file_meta_cache_ptr); init_status = paimon_reader->init_reader( - _file_col_names, &_src_block_name_to_idx, _push_down_conjuncts, _real_tuple_desc, - _default_val_row_desc.get(), _col_name_to_slot_id, - &_not_single_slot_filter_conjuncts, &_slot_id_to_filter_conjuncts); + _file_col_names, &_src_block_name_to_idx, _push_down_conjuncts, + slot_id_to_predicates, or_predicates, _real_tuple_desc, _default_val_row_desc.get(), + _col_name_to_slot_id, &_not_single_slot_filter_conjuncts, + &_slot_id_to_filter_conjuncts); RETURN_IF_ERROR(paimon_reader->init_row_filters()); _cur_reader = std::move(paimon_reader); } else if (range.__isset.table_format_params && @@ -1240,18 +1250,20 @@ Status FileScanner::_init_parquet_reader(std::unique_ptr&& parque std::move(parquet_reader), _profile, _state, *_params, range, _io_ctx.get(), file_meta_cache_ptr); init_status = hudi_reader->init_reader( - _file_col_names, &_src_block_name_to_idx, _push_down_conjuncts, _real_tuple_desc, - _default_val_row_desc.get(), _col_name_to_slot_id, - &_not_single_slot_filter_conjuncts, &_slot_id_to_filter_conjuncts); + _file_col_names, &_src_block_name_to_idx, _push_down_conjuncts, + slot_id_to_predicates, or_predicates, _real_tuple_desc, _default_val_row_desc.get(), + _col_name_to_slot_id, &_not_single_slot_filter_conjuncts, + &_slot_id_to_filter_conjuncts); _cur_reader = std::move(hudi_reader); } else if (range.table_format_params.table_format_type == "hive") { auto hive_reader = HiveParquetReader::create_unique(std::move(parquet_reader), _profile, _state, *_params, range, _io_ctx.get(), &_is_file_slot, file_meta_cache_ptr); init_status = hive_reader->init_reader( - _file_col_names, &_src_block_name_to_idx, _push_down_conjuncts, _real_tuple_desc, - _default_val_row_desc.get(), _col_name_to_slot_id, - &_not_single_slot_filter_conjuncts, &_slot_id_to_filter_conjuncts); + _file_col_names, &_src_block_name_to_idx, _push_down_conjuncts, + slot_id_to_predicates, or_predicates, _real_tuple_desc, _default_val_row_desc.get(), + _col_name_to_slot_id, &_not_single_slot_filter_conjuncts, + &_slot_id_to_filter_conjuncts); _cur_reader = std::move(hive_reader); } else if (range.table_format_params.table_format_type == "tvf") { const FieldDescriptor* parquet_meta = nullptr; @@ -1265,9 +1277,10 @@ Status FileScanner::_init_parquet_reader(std::unique_ptr&& parque RETURN_IF_ERROR(TableSchemaChangeHelper::BuildTableInfoUtil::by_parquet_name( _real_tuple_desc, *parquet_meta, tvf_info_node)); init_status = parquet_reader->init_reader( - _file_col_names, &_src_block_name_to_idx, _push_down_conjuncts, _real_tuple_desc, - _default_val_row_desc.get(), _col_name_to_slot_id, - &_not_single_slot_filter_conjuncts, &_slot_id_to_filter_conjuncts, tvf_info_node); + _file_col_names, &_src_block_name_to_idx, _push_down_conjuncts, + slot_id_to_predicates, or_predicates, _real_tuple_desc, _default_val_row_desc.get(), + _col_name_to_slot_id, &_not_single_slot_filter_conjuncts, + &_slot_id_to_filter_conjuncts, tvf_info_node); _cur_reader = std::move(parquet_reader); } else if (_is_load) { const FieldDescriptor* parquet_meta = nullptr; @@ -1295,9 +1308,10 @@ Status FileScanner::_init_parquet_reader(std::unique_ptr&& parque } init_status = parquet_reader->init_reader( - _file_col_names, &_src_block_name_to_idx, _push_down_conjuncts, _real_tuple_desc, - _default_val_row_desc.get(), _col_name_to_slot_id, - &_not_single_slot_filter_conjuncts, &_slot_id_to_filter_conjuncts, load_info_node); + _file_col_names, &_src_block_name_to_idx, _push_down_conjuncts, + slot_id_to_predicates, or_predicates, _real_tuple_desc, _default_val_row_desc.get(), + _col_name_to_slot_id, &_not_single_slot_filter_conjuncts, + &_slot_id_to_filter_conjuncts, load_info_node); _cur_reader = std::move(parquet_reader); } diff --git a/be/test/olap/block_column_predicate_test.cpp b/be/test/olap/block_column_predicate_test.cpp index d056a6f6e712f7..6ad31db343ca58 100644 --- a/be/test/olap/block_column_predicate_test.cpp +++ b/be/test/olap/block_column_predicate_test.cpp @@ -83,7 +83,7 @@ TEST_F(BlockColumnPredicateTest, SINGLE_COLUMN_VEC) { int rows = 10; int col_idx = 0; std::shared_ptr pred( - new ComparisonPredicateBase(col_idx, value)); + new ComparisonPredicateBase(col_idx, "", value)); SingleColumnBlockPredicate single_column_block_pred(pred); std::vector sel_idx(rows); @@ -111,9 +111,9 @@ TEST_F(BlockColumnPredicateTest, AND_MUTI_COLUMN_VEC) { int rows = 10; int col_idx = 0; std::shared_ptr less_pred( - new ComparisonPredicateBase(col_idx, less_value)); + new ComparisonPredicateBase(col_idx, "", less_value)); std::shared_ptr great_pred( - new ComparisonPredicateBase(col_idx, great_value)); + new ComparisonPredicateBase(col_idx, "", great_value)); auto single_less_pred = SingleColumnBlockPredicate::create_unique(less_pred); auto single_great_pred = SingleColumnBlockPredicate::create_unique(great_pred); @@ -146,9 +146,9 @@ TEST_F(BlockColumnPredicateTest, OR_MUTI_COLUMN_VEC) { int rows = 10; int col_idx = 0; std::shared_ptr less_pred( - new ComparisonPredicateBase(col_idx, less_value)); + new ComparisonPredicateBase(col_idx, "", less_value)); std::shared_ptr great_pred( - new ComparisonPredicateBase(col_idx, great_value)); + new ComparisonPredicateBase(col_idx, "", great_value)); auto single_less_pred = SingleColumnBlockPredicate::create_unique(less_pred); auto single_great_pred = SingleColumnBlockPredicate::create_unique(great_pred); @@ -181,11 +181,11 @@ TEST_F(BlockColumnPredicateTest, OR_AND_MUTI_COLUMN_VEC) { int rows = 10; int col_idx = 0; std::shared_ptr less_pred( - new ComparisonPredicateBase(0, less_value)); + new ComparisonPredicateBase(0, "", less_value)); std::shared_ptr great_pred( - new ComparisonPredicateBase(0, great_value)); + new ComparisonPredicateBase(0, "", great_value)); std::shared_ptr less_pred1( - new ComparisonPredicateBase(0, great_value)); + new ComparisonPredicateBase(0, "", great_value)); // Test for and or single // (column < 5 and column > 3) or column < 3 @@ -248,11 +248,11 @@ TEST_F(BlockColumnPredicateTest, AND_OR_MUTI_COLUMN_VEC) { int rows = 10; int col_idx = 0; std::shared_ptr less_pred( - new ComparisonPredicateBase(0, less_value)); + new ComparisonPredicateBase(0, "", less_value)); std::shared_ptr great_pred( - new ComparisonPredicateBase(0, great_value)); + new ComparisonPredicateBase(0, "", great_value)); std::shared_ptr less_pred1( - new ComparisonPredicateBase(0, great_value)); + new ComparisonPredicateBase(0, "", great_value)); // Test for and or single // (column < 5 or column < 3) and column > 3 @@ -305,7 +305,8 @@ void single_column_predicate_test_func(const std::pair::CppType check_value, bool expect_match) { int col_idx = 0; - std::shared_ptr pred(new ComparisonPredicateBase(col_idx, check_value)); + std::shared_ptr pred( + new ComparisonPredicateBase(col_idx, "", check_value)); SingleColumnBlockPredicate single_column_block_pred(pred); bool matched = single_column_block_pred.evaluate_and(statistic); @@ -1331,7 +1332,8 @@ void single_column_predicate_test_func(const segment_v2::BloomFilter* bf, typename PrimitiveTypeTraits::CppType check_value, bool expect_match) { int col_idx = 0; - std::shared_ptr pred(new ComparisonPredicateBase(col_idx, check_value)); + std::shared_ptr pred( + new ComparisonPredicateBase(col_idx, "", check_value)); SingleColumnBlockPredicate single_column_block_pred(pred); bool matched = single_column_block_pred.evaluate_and(bf); @@ -1388,7 +1390,7 @@ TEST_F(BlockColumnPredicateTest, PARQUET_COMPARISON_PREDICATE) { int value = 5; int col_idx = 0; std::shared_ptr pred( - new ComparisonPredicateBase(col_idx, value)); + new ComparisonPredicateBase(col_idx, "", value)); SingleColumnBlockPredicate single_column_block_pred(pred); std::unique_ptr parquet_field_col1 = std::make_unique(); @@ -1465,7 +1467,7 @@ TEST_F(BlockColumnPredicateTest, PARQUET_COMPARISON_PREDICATE) { int value = 5; int col_idx = 0; std::shared_ptr pred( - new ComparisonPredicateBase(col_idx, value)); + new ComparisonPredicateBase(col_idx, "", value)); SingleColumnBlockPredicate single_column_block_pred(pred); std::unique_ptr parquet_field_col1 = std::make_unique(); @@ -1534,7 +1536,7 @@ TEST_F(BlockColumnPredicateTest, PARQUET_COMPARISON_PREDICATE) { int value = 5; int col_idx = 0; std::shared_ptr pred( - new ComparisonPredicateBase(col_idx, value)); + new ComparisonPredicateBase(col_idx, "", value)); SingleColumnBlockPredicate single_column_block_pred(pred); std::unique_ptr parquet_field_col1 = std::make_unique(); @@ -1603,7 +1605,7 @@ TEST_F(BlockColumnPredicateTest, PARQUET_COMPARISON_PREDICATE) { int value = 5; int col_idx = 0; std::shared_ptr pred( - new ComparisonPredicateBase(col_idx, value)); + new ComparisonPredicateBase(col_idx, "", value)); SingleColumnBlockPredicate single_column_block_pred(pred); std::unique_ptr parquet_field_col1 = std::make_unique(); @@ -1675,7 +1677,7 @@ TEST_F(BlockColumnPredicateTest, PARQUET_COMPARISON_PREDICATE) { float value = 5.0; int col_idx = 0; std::shared_ptr pred( - new ComparisonPredicateBase(col_idx, value)); + new ComparisonPredicateBase(col_idx, "", value)); SingleColumnBlockPredicate single_column_block_pred(pred); std::unique_ptr parquet_field_col1 = std::make_unique(); @@ -1769,7 +1771,7 @@ TEST_F(BlockColumnPredicateTest, PARQUET_COMPARISON_PREDICATE) { float value = 5; int col_idx = 0; std::shared_ptr pred( - new ComparisonPredicateBase(col_idx, value)); + new ComparisonPredicateBase(col_idx, "", value)); SingleColumnBlockPredicate single_column_block_pred(pred); std::unique_ptr parquet_field_col1 = std::make_unique(); @@ -1838,7 +1840,7 @@ TEST_F(BlockColumnPredicateTest, PARQUET_COMPARISON_PREDICATE) { float value = 5.0; int col_idx = 0; std::shared_ptr pred( - new ComparisonPredicateBase(col_idx, value)); + new ComparisonPredicateBase(col_idx, "", value)); SingleColumnBlockPredicate single_column_block_pred(pred); std::unique_ptr parquet_field_col1 = std::make_unique(); @@ -1907,7 +1909,7 @@ TEST_F(BlockColumnPredicateTest, PARQUET_COMPARISON_PREDICATE) { float value = 5.0; int col_idx = 0; std::shared_ptr pred( - new ComparisonPredicateBase(col_idx, value)); + new ComparisonPredicateBase(col_idx, "", value)); SingleColumnBlockPredicate single_column_block_pred(pred); std::unique_ptr parquet_field_col1 = std::make_unique(); @@ -1983,7 +1985,7 @@ TEST_F(BlockColumnPredicateTest, PARQUET_IN_PREDICATE) { hybrid_set->insert(&value); std::shared_ptr pred( new InListPredicateBase( - col_idx, hybrid_set, false)); + col_idx, "", hybrid_set, false)); SingleColumnBlockPredicate single_column_block_pred(pred); std::unique_ptr parquet_field_col1 = std::make_unique(); @@ -2029,7 +2031,7 @@ TEST_F(BlockColumnPredicateTest, PARQUET_IN_PREDICATE) { hybrid_set->insert(&value); std::shared_ptr pred( new InListPredicateBase( - col_idx, hybrid_set, false)); + col_idx, "", hybrid_set, false)); SingleColumnBlockPredicate single_column_block_pred(pred); std::unique_ptr parquet_field_col1 = std::make_unique(); @@ -2076,7 +2078,7 @@ TEST_F(BlockColumnPredicateTest, PARQUET_COMPARISON_PREDICATE_BLOOM_FILTER) { const int value = 42; const int col_idx = 0; std::shared_ptr pred( - new ComparisonPredicateBase(col_idx, value)); + new ComparisonPredicateBase(col_idx, "", value)); SingleColumnBlockPredicate single_column_block_pred(pred); auto parquet_field = std::make_unique(); @@ -2238,7 +2240,7 @@ TEST_F(BlockColumnPredicateTest, PARQUET_IN_PREDICATE_BLOOM_FILTER) { const int included_value = 7; hybrid_set->insert(&included_value); std::shared_ptr pred( - new InListPredicateBase(col_idx, hybrid_set, + new InListPredicateBase(col_idx, "", hybrid_set, false)); SingleColumnBlockPredicate single_column_block_pred(pred); @@ -2370,7 +2372,7 @@ TEST_F(BlockColumnPredicateTest, NULL_PREDICATE) { { int col_idx = 0; std::shared_ptr pred( - new NullPredicate(col_idx, true, PrimitiveType::TYPE_INT)); + new NullPredicate(col_idx, "", true, PrimitiveType::TYPE_INT)); SingleColumnBlockPredicate single_column_block_pred(pred); std::unique_ptr parquet_field_col1 = std::make_unique(); @@ -2408,7 +2410,7 @@ TEST_F(BlockColumnPredicateTest, NULL_PREDICATE) { { int col_idx = 0; std::shared_ptr pred( - new NullPredicate(col_idx, false, PrimitiveType::TYPE_INT)); + new NullPredicate(col_idx, "", false, PrimitiveType::TYPE_INT)); SingleColumnBlockPredicate single_column_block_pred(pred); std::unique_ptr parquet_field_col1 = std::make_unique(); @@ -2464,12 +2466,12 @@ TEST_F(BlockColumnPredicateTest, COMBINED_PREDICATE) { int col_idx = 0; int value = 5; std::shared_ptr pred( - new ComparisonPredicateBase(col_idx, value)); + new ComparisonPredicateBase(col_idx, "", value)); true_predicate = std::make_unique(pred); std::unique_ptr false_predicate; std::shared_ptr pred2( - new ComparisonPredicateBase(col_idx, value)); + new ComparisonPredicateBase(col_idx, "", value)); false_predicate = std::make_unique(pred2); std::unique_ptr parquet_field_col1 = @@ -2509,12 +2511,12 @@ TEST_F(BlockColumnPredicateTest, COMBINED_PREDICATE) { int col_idx = 0; int value = 5; std::shared_ptr pred( - new ComparisonPredicateBase(col_idx, value)); + new ComparisonPredicateBase(col_idx, "", value)); true_predicate = std::make_unique(pred); std::unique_ptr true_predicate2; std::shared_ptr pred2( - new ComparisonPredicateBase(col_idx, value)); + new ComparisonPredicateBase(col_idx, "", value)); true_predicate2 = std::make_unique(pred2); std::unique_ptr parquet_field_col1 = @@ -2554,12 +2556,12 @@ TEST_F(BlockColumnPredicateTest, COMBINED_PREDICATE) { int col_idx = 0; int value = 5; std::shared_ptr pred( - new ComparisonPredicateBase(col_idx, value)); + new ComparisonPredicateBase(col_idx, "", value)); true_predicate = std::make_unique(pred); std::unique_ptr false_predicate; std::shared_ptr pred2( - new ComparisonPredicateBase(col_idx, value)); + new ComparisonPredicateBase(col_idx, "", value)); false_predicate = std::make_unique(pred2); std::unique_ptr parquet_field_col1 = @@ -2599,12 +2601,12 @@ TEST_F(BlockColumnPredicateTest, COMBINED_PREDICATE) { int col_idx = 0; int value = 5; std::shared_ptr pred( - new ComparisonPredicateBase(col_idx, value)); + new ComparisonPredicateBase(col_idx, "", value)); false_predicate2 = std::make_unique(pred); std::unique_ptr false_predicate; std::shared_ptr pred2( - new ComparisonPredicateBase(col_idx, value)); + new ComparisonPredicateBase(col_idx, "", value)); false_predicate = std::make_unique(pred2); std::unique_ptr parquet_field_col1 = @@ -2644,7 +2646,7 @@ TEST_F(BlockColumnPredicateTest, COMBINED_PREDICATE) { int value = 5; std::unique_ptr false_predicate; std::shared_ptr pred2( - new ComparisonPredicateBase(col_idx, value)); + new ComparisonPredicateBase(col_idx, "", value)); false_predicate = std::make_unique(pred2); std::unique_ptr parquet_field_col1 = diff --git a/be/test/olap/date_bloom_filter_test.cpp b/be/test/olap/date_bloom_filter_test.cpp index a75bf852ac5239..b5539fff686298 100644 --- a/be/test/olap/date_bloom_filter_test.cpp +++ b/be/test/olap/date_bloom_filter_test.cpp @@ -180,7 +180,7 @@ TEST_F(DateBloomFilterTest, query_index_test) { auto test = [&](const std::string& query_string, bool result) { auto date = timestamp_from_date(query_string); std::unique_ptr> date_pred( - new ComparisonPredicateBase(0, date)); + new ComparisonPredicateBase(0, "", date)); EXPECT_EQ(date_pred->evaluate_and(bf.get()), result); }; test("2024-11-08", true); @@ -202,7 +202,7 @@ TEST_F(DateBloomFilterTest, query_index_test) { auto test = [&](const std::string& query_string, bool result) { auto datetime = timestamp_from_datetime(query_string); std::unique_ptr> date_pred( - new ComparisonPredicateBase(0, datetime)); + new ComparisonPredicateBase(0, "", datetime)); EXPECT_EQ(date_pred->evaluate_and(bf.get()), result); }; test("2024-11-08 09:00:00", true); @@ -276,17 +276,17 @@ TEST_F(DateBloomFilterTest, in_list_predicate_test) { test_positive({"2024-11-08", "2024-11-09"}); std::unique_ptr> date_pred0( - new InListPredicateBase(0, hybrid_set, + new InListPredicateBase(0, "", hybrid_set, false)); EXPECT_EQ(date_pred0->evaluate_and(bf.get()), true); test_positive({"2024-11-08"}); std::unique_ptr> date_pred1( - new InListPredicateBase(0, hybrid_set, + new InListPredicateBase(0, "", hybrid_set, false)); EXPECT_EQ(date_pred1->evaluate_and(bf.get()), true); test_positive({"2024-11-09"}); std::unique_ptr> date_pred2( - new InListPredicateBase(0, hybrid_set, + new InListPredicateBase(0, "", hybrid_set, false)); EXPECT_EQ(date_pred2->evaluate_and(bf.get()), true); @@ -301,19 +301,19 @@ TEST_F(DateBloomFilterTest, in_list_predicate_test) { test_negative({"2024-11-20"}); std::unique_ptr> date_pred00( - new InListPredicateBase(0, hybrid_set, + new InListPredicateBase(0, "", hybrid_set, false)); EXPECT_EQ(date_pred00->evaluate_and(bf.get()), false); test_negative({"2024-11-08", "2024-11-20"}); std::unique_ptr> date_pred10( - new InListPredicateBase(0, hybrid_set, + new InListPredicateBase(0, "", hybrid_set, false)); EXPECT_EQ(date_pred10->evaluate_and(bf.get()), true); test_negative({"2024-11-20", "2024-11-21"}); std::unique_ptr> date_pred20( - new InListPredicateBase(0, hybrid_set, + new InListPredicateBase(0, "", hybrid_set, false)); EXPECT_EQ(date_pred20->evaluate_and(bf.get()), false); @@ -344,17 +344,17 @@ TEST_F(DateBloomFilterTest, in_list_predicate_test) { test_positive({"2024-11-08 09:00:00", "2024-11-09 09:00:00"}); std::unique_ptr> datetime_pred0(new InListPredicateBase( - 0, hybrid_set, false)); + 0, "", hybrid_set, false)); EXPECT_EQ(datetime_pred0->evaluate_and(bf.get()), true); test_positive({"2024-11-08 09:00:00"}); std::unique_ptr> datetime_pred1(new InListPredicateBase( - 0, hybrid_set, false)); + 0, "", hybrid_set, false)); EXPECT_EQ(datetime_pred1->evaluate_and(bf.get()), true); test_positive({"2024-11-09 09:00:00"}); std::unique_ptr> datetime_pred2(new InListPredicateBase( - 0, hybrid_set, false)); + 0, "", hybrid_set, false)); EXPECT_EQ(datetime_pred2->evaluate_and(bf.get()), true); // Test negative cases @@ -370,17 +370,17 @@ TEST_F(DateBloomFilterTest, in_list_predicate_test) { test_negative({"2024-11-20 09:00:00"}); std::unique_ptr> datetime_pred33(new InListPredicateBase( - 0, hybrid_set, false)); + 0, "", hybrid_set, false)); EXPECT_EQ(datetime_pred33->evaluate_and(bf.get()), false); test_negative({"2024-11-08 09:00:00", "2024-11-20 09:00:00"}); std::unique_ptr> datetime_pred34(new InListPredicateBase( - 0, hybrid_set, false)); + 0, "", hybrid_set, false)); EXPECT_EQ(datetime_pred34->evaluate_and(bf.get()), true); test_negative({"2024-11-20 09:00:00", "2024-11-21 09:00:00"}); std::unique_ptr> datetime_pred45(new InListPredicateBase( - 0, hybrid_set, false)); + 0, "", hybrid_set, false)); EXPECT_EQ(datetime_pred45->evaluate_and(bf.get()), false); } } diff --git a/be/test/pipeline/operator/scan_normalize_predicate_test.cpp b/be/test/pipeline/operator/scan_normalize_predicate_test.cpp index f7274bd96b115b..081444ed02ad65 100644 --- a/be/test/pipeline/operator/scan_normalize_predicate_test.cpp +++ b/be/test/pipeline/operator/scan_normalize_predicate_test.cpp @@ -55,7 +55,8 @@ TEST_F(ScanNormalizePredicate, test1) { vectorized::VExprSPtr new_root; auto conjunct_expr_root = MockSlotRef::create_mock_context(0, std::make_shared()); - auto st = local_state->_normalize_predicate(conjunct_expr_root.get(), new_root); + auto st = local_state->_normalize_predicate(conjunct_expr_root.get(), + conjunct_expr_root->root(), new_root, nullptr); EXPECT_TRUE(st) << st.msg(); std::cout << new_root->debug_string() << std::endl; } @@ -83,7 +84,8 @@ TEST_F(ScanNormalizePredicate, test_eval_const_conjuncts1) { vectorized::VExprSPtr new_root; auto conjunct_expr_root = ctx; - auto st = local_state->_normalize_predicate(conjunct_expr_root.get(), new_root); + auto st = local_state->_normalize_predicate(conjunct_expr_root.get(), + conjunct_expr_root->root(), new_root, nullptr); EXPECT_EQ(new_root, nullptr); EXPECT_TRUE(local_state->_scan_dependency->ready()); @@ -112,7 +114,8 @@ TEST_F(ScanNormalizePredicate, test_eval_const_conjuncts2) { vectorized::VExprSPtr new_root; auto conjunct_expr_root = ctx; - auto st = local_state->_normalize_predicate(conjunct_expr_root.get(), new_root); + auto st = local_state->_normalize_predicate(conjunct_expr_root.get(), + conjunct_expr_root->root(), new_root, nullptr); EXPECT_EQ(new_root, nullptr); EXPECT_TRUE(local_state->_scan_dependency->ready()); EXPECT_TRUE(local_state->_eos); @@ -159,7 +162,8 @@ TEST_F(ScanNormalizePredicate, test_eval_const_conjuncts4) { vectorized::VExprSPtr new_root; auto conjunct_expr_root = ctx; - auto st = local_state->_normalize_predicate(conjunct_expr_root.get(), new_root); + auto st = local_state->_normalize_predicate(conjunct_expr_root.get(), + conjunct_expr_root->root(), new_root, nullptr); EXPECT_TRUE(st.ok()); std::cout << st.msg() << std::endl; } @@ -200,7 +204,8 @@ TEST_F(ScanNormalizePredicate, test_is_predicate_acting_on_slot1) { vectorized::VExprSPtr new_root; auto conjunct_expr_root = ctx; - auto st = local_state->_normalize_predicate(conjunct_expr_root.get(), new_root); + auto st = local_state->_normalize_predicate(conjunct_expr_root.get(), + conjunct_expr_root->root(), new_root, nullptr); EXPECT_TRUE(st.ok()); std::cout << st.msg() << std::endl; } @@ -258,7 +263,8 @@ TEST_F(ScanNormalizePredicate, test_is_predicate_acting_on_slot2) { vectorized::VExprSPtr new_root; auto conjunct_expr_root = ctx; - auto st = local_state->_normalize_predicate(conjunct_expr_root.get(), new_root); + auto st = local_state->_normalize_predicate(conjunct_expr_root.get(), + conjunct_expr_root->root(), new_root, nullptr); EXPECT_TRUE(st.ok()); std::cout << st.msg() << std::endl; } @@ -281,7 +287,8 @@ TEST_F(ScanNormalizePredicate, test_is_predicate_acting_on_slot2) { vectorized::VExprSPtr new_root; auto conjunct_expr_root = ctx; - auto st = local_state->_normalize_predicate(conjunct_expr_root.get(), new_root); + auto st = local_state->_normalize_predicate(conjunct_expr_root.get(), + conjunct_expr_root->root(), new_root, nullptr); EXPECT_TRUE(st.ok()); std::cout << st.msg() << std::endl; } @@ -342,7 +349,8 @@ TEST_F(ScanNormalizePredicate, test_is_predicate_acting_on_slot3) { vectorized::VExprSPtr new_root; auto conjunct_expr_root = ctx; - EXPECT_TRUE(local_state->_normalize_predicate(conjunct_expr_root.get(), new_root)); + EXPECT_TRUE(local_state->_normalize_predicate( + conjunct_expr_root.get(), conjunct_expr_root->root(), new_root, nullptr)); } EXPECT_TRUE(local_state->_scan_dependency->ready()); EXPECT_TRUE(local_state->_eos); @@ -380,7 +388,8 @@ TEST_F(ScanNormalizePredicate, test_is_predicate_acting_on_slot4) { vectorized::VExprSPtr new_root; auto conjunct_expr_root = ctx; - EXPECT_TRUE(local_state->_normalize_predicate(conjunct_expr_root.get(), new_root)); + EXPECT_TRUE(local_state->_normalize_predicate( + conjunct_expr_root.get(), conjunct_expr_root->root(), new_root, nullptr)); } EXPECT_TRUE(local_state->_slot_id_to_value_range.contains(SlotId)); @@ -437,7 +446,8 @@ TEST_F(ScanNormalizePredicate, test_is_predicate_acting_on_slot5) { vectorized::VExprSPtr new_root; auto conjunct_expr_root = ctx; - EXPECT_TRUE(local_state->_normalize_predicate(conjunct_expr_root.get(), new_root)); + EXPECT_TRUE(local_state->_normalize_predicate( + conjunct_expr_root.get(), conjunct_expr_root->root(), new_root, nullptr)); } EXPECT_TRUE(local_state->_slot_id_to_value_range.contains(SlotId)); @@ -500,7 +510,8 @@ TEST_F(ScanNormalizePredicate, test_is_predicate_acting_on_slot6) { vectorized::VExprSPtr new_root; auto conjunct_expr_root = ctx; - EXPECT_TRUE(local_state->_normalize_predicate(conjunct_expr_root.get(), new_root)); + EXPECT_TRUE(local_state->_normalize_predicate( + conjunct_expr_root.get(), conjunct_expr_root->root(), new_root, nullptr)); } EXPECT_TRUE(local_state->_slot_id_to_value_range.contains(SlotId)); @@ -563,7 +574,8 @@ TEST_F(ScanNormalizePredicate, test_is_predicate_acting_on_slot7) { vectorized::VExprSPtr new_root; auto conjunct_expr_root = ctx; - EXPECT_TRUE(local_state->_normalize_predicate(conjunct_expr_root.get(), new_root)); + EXPECT_TRUE(local_state->_normalize_predicate( + conjunct_expr_root.get(), conjunct_expr_root->root(), new_root, nullptr)); } EXPECT_TRUE(local_state->_scan_dependency->ready()); @@ -608,7 +620,8 @@ TEST_F(ScanNormalizePredicate, test_is_predicate_acting_on_slot8) { vectorized::VExprSPtr new_root; auto conjunct_expr_root = ctx; - EXPECT_TRUE(local_state->_normalize_predicate(conjunct_expr_root.get(), new_root)); + EXPECT_TRUE(local_state->_normalize_predicate( + conjunct_expr_root.get(), conjunct_expr_root->root(), new_root, nullptr)); } auto& output_range = local_state->_slot_id_to_value_range[SlotId]; @@ -660,7 +673,8 @@ TEST_F(ScanNormalizePredicate, test_is_predicate_acting_on_slot10) { vectorized::VExprSPtr new_root; auto conjunct_expr_root = ctx; - EXPECT_TRUE(local_state->_normalize_predicate(conjunct_expr_root.get(), new_root)); + EXPECT_TRUE(local_state->_normalize_predicate( + conjunct_expr_root.get(), conjunct_expr_root->root(), new_root, nullptr)); } } @@ -701,7 +715,8 @@ TEST_F(ScanNormalizePredicate, test_is_predicate_acting_on_slot11) { vectorized::VExprSPtr new_root; auto conjunct_expr_root = ctx; - EXPECT_TRUE(local_state->_normalize_predicate(conjunct_expr_root.get(), new_root)); + EXPECT_TRUE(local_state->_normalize_predicate( + conjunct_expr_root.get(), conjunct_expr_root->root(), new_root, nullptr)); } } @@ -745,7 +760,8 @@ TEST_F(ScanNormalizePredicate, test_is_predicate_acting_on_slot12) { vectorized::VExprSPtr new_root; auto conjunct_expr_root = ctx; - EXPECT_TRUE(local_state->_normalize_predicate(conjunct_expr_root.get(), new_root)); + EXPECT_TRUE(local_state->_normalize_predicate( + conjunct_expr_root.get(), conjunct_expr_root->root(), new_root, nullptr)); } auto& output_range = local_state->_slot_id_to_value_range[SlotId]; @@ -801,7 +817,8 @@ TEST_F(ScanNormalizePredicate, test_is_predicate_acting_on_slot13) { vectorized::VExprSPtr new_root; auto conjunct_expr_root = ctx; - EXPECT_TRUE(local_state->_normalize_predicate(conjunct_expr_root.get(), new_root)); + EXPECT_TRUE(local_state->_normalize_predicate( + conjunct_expr_root.get(), conjunct_expr_root->root(), new_root, nullptr)); } auto& output_range = local_state->_slot_id_to_value_range[SlotId]; @@ -857,7 +874,8 @@ TEST_F(ScanNormalizePredicate, test_is_predicate_acting_on_slot14) { vectorized::VExprSPtr new_root; auto conjunct_expr_root = ctx; - EXPECT_TRUE(local_state->_normalize_predicate(conjunct_expr_root.get(), new_root)); + EXPECT_TRUE(local_state->_normalize_predicate( + conjunct_expr_root.get(), conjunct_expr_root->root(), new_root, nullptr)); } auto& output_range = local_state->_slot_id_to_value_range[SlotId]; @@ -917,7 +935,8 @@ TEST_F(ScanNormalizePredicate, test_is_predicate_acting_on_slot15) { vectorized::VExprSPtr new_root; auto conjunct_expr_root = ctx; - EXPECT_TRUE(local_state->_normalize_predicate(conjunct_expr_root.get(), new_root)); + EXPECT_TRUE(local_state->_normalize_predicate( + conjunct_expr_root.get(), conjunct_expr_root->root(), new_root, nullptr)); } auto& output_range = local_state->_slot_id_to_value_range[SlotId]; @@ -982,7 +1001,8 @@ TEST_F(ScanNormalizePredicate, test_double_predicate) { vectorized::VExprSPtr new_root; auto conjunct_expr_root = ctx; - auto st = local_state->_normalize_predicate(conjunct_expr_root.get(), new_root); + auto st = local_state->_normalize_predicate(conjunct_expr_root.get(), + conjunct_expr_root->root(), new_root, nullptr); EXPECT_TRUE(st.ok()); EXPECT_EQ(new_root, nullptr); @@ -1023,7 +1043,8 @@ TEST_F(ScanNormalizePredicate, test_double_predicate) { vectorized::VExprSPtr new_root; auto conjunct_expr_root = ctx; - auto st = local_state->_normalize_predicate(conjunct_expr_root.get(), new_root); + auto st = local_state->_normalize_predicate(conjunct_expr_root.get(), + conjunct_expr_root->root(), new_root, nullptr); EXPECT_TRUE(st.ok()); EXPECT_EQ(new_root, nullptr); EXPECT_TRUE(local_state->_slot_id_to_value_range.contains(SlotId)); @@ -1065,7 +1086,8 @@ TEST_F(ScanNormalizePredicate, test_double_predicate) { vectorized::VExprSPtr new_root; auto conjunct_expr_root = ctx; - EXPECT_TRUE(local_state->_normalize_predicate(conjunct_expr_root.get(), new_root)); + EXPECT_TRUE(local_state->_normalize_predicate( + conjunct_expr_root.get(), conjunct_expr_root->root(), new_root, nullptr)); EXPECT_EQ(new_root, nullptr); EXPECT_TRUE(local_state->_slot_id_to_value_range.contains(SlotId)); } @@ -1091,7 +1113,8 @@ TEST_F(ScanNormalizePredicate, test_double_predicate) { vectorized::VExprSPtr new_root; auto conjunct_expr_root = ctx; - auto st = local_state->_normalize_predicate(conjunct_expr_root.get(), new_root); + auto st = local_state->_normalize_predicate(conjunct_expr_root.get(), + conjunct_expr_root->root(), new_root, nullptr); EXPECT_TRUE(st.ok()); EXPECT_EQ(new_root, nullptr); EXPECT_TRUE(local_state->_slot_id_to_value_range.contains(SlotId)); @@ -1116,7 +1139,8 @@ TEST_F(ScanNormalizePredicate, test_double_predicate) { vectorized::VExprSPtr new_root; auto conjunct_expr_root = ctx; - EXPECT_TRUE(local_state->_normalize_predicate(conjunct_expr_root.get(), new_root)); + EXPECT_TRUE(local_state->_normalize_predicate( + conjunct_expr_root.get(), conjunct_expr_root->root(), new_root, nullptr)); auto& output_range = local_state->_slot_id_to_value_range[SlotId]; std::visit( [](auto&& arg) { @@ -1154,7 +1178,8 @@ TEST_F(ScanNormalizePredicate, test_double_predicate) { vectorized::VExprSPtr new_root; auto conjunct_expr_root = ctx; - EXPECT_TRUE(local_state->_normalize_predicate(conjunct_expr_root.get(), new_root)); + EXPECT_TRUE(local_state->_normalize_predicate( + conjunct_expr_root.get(), conjunct_expr_root->root(), new_root, nullptr)); auto& output_range = local_state->_slot_id_to_value_range[SlotId]; std::visit( [](auto&& arg) { @@ -1195,7 +1220,8 @@ TEST_F(ScanNormalizePredicate, test_double_predicate) { vectorized::VExprSPtr new_root; auto conjunct_expr_root = ctx; - auto st = local_state->_normalize_predicate(conjunct_expr_root.get(), new_root); + auto st = local_state->_normalize_predicate(conjunct_expr_root.get(), + conjunct_expr_root->root(), new_root, nullptr); EXPECT_TRUE(st.ok()); EXPECT_EQ(new_root, nullptr); @@ -1254,7 +1280,8 @@ TEST_F(ScanNormalizePredicate, test_double_predicate) { vectorized::VExprSPtr new_root; auto conjunct_expr_root = ctx; - auto st = local_state->_normalize_predicate(conjunct_expr_root.get(), new_root); + auto st = local_state->_normalize_predicate(conjunct_expr_root.get(), + conjunct_expr_root->root(), new_root, nullptr); EXPECT_TRUE(st.ok()); EXPECT_EQ(new_root, nullptr); @@ -1310,7 +1337,8 @@ TEST_F(ScanNormalizePredicate, test_double_predicate) { vectorized::VExprSPtr new_root; auto conjunct_expr_root = ctx; - auto st = local_state->_normalize_predicate(conjunct_expr_root.get(), new_root); + auto st = local_state->_normalize_predicate(conjunct_expr_root.get(), + conjunct_expr_root->root(), new_root, nullptr); EXPECT_TRUE(st.ok()); EXPECT_EQ(new_root, nullptr); @@ -1369,7 +1397,8 @@ TEST_F(ScanNormalizePredicate, test_double_predicate) { vectorized::VExprSPtr new_root; auto conjunct_expr_root = ctx; - auto st = local_state->_normalize_predicate(conjunct_expr_root.get(), new_root); + auto st = local_state->_normalize_predicate(conjunct_expr_root.get(), + conjunct_expr_root->root(), new_root, nullptr); EXPECT_TRUE(st.ok()); EXPECT_EQ(new_root, nullptr); @@ -1450,7 +1479,8 @@ TEST_F(ScanNormalizePredicate, test_timestamptz_predicate) { vectorized::VExprSPtr new_root; auto conjunct_expr_root = ctx; - auto st = local_state->_normalize_predicate(conjunct_expr_root.get(), new_root); + auto st = local_state->_normalize_predicate(conjunct_expr_root.get(), + conjunct_expr_root->root(), new_root, nullptr); EXPECT_TRUE(st.ok()); EXPECT_EQ(new_root, nullptr); @@ -1492,7 +1522,8 @@ TEST_F(ScanNormalizePredicate, test_timestamptz_predicate) { vectorized::VExprSPtr new_root; auto conjunct_expr_root = ctx; - auto st = local_state->_normalize_predicate(conjunct_expr_root.get(), new_root); + auto st = local_state->_normalize_predicate(conjunct_expr_root.get(), + conjunct_expr_root->root(), new_root, nullptr); EXPECT_TRUE(st.ok()); EXPECT_EQ(new_root, nullptr); EXPECT_TRUE(local_state->_slot_id_to_value_range.contains(SlotId)); @@ -1536,7 +1567,8 @@ TEST_F(ScanNormalizePredicate, test_timestamptz_predicate) { vectorized::VExprSPtr new_root; auto conjunct_expr_root = ctx; - EXPECT_TRUE(local_state->_normalize_predicate(conjunct_expr_root.get(), new_root)); + EXPECT_TRUE(local_state->_normalize_predicate( + conjunct_expr_root.get(), conjunct_expr_root->root(), new_root, nullptr)); EXPECT_EQ(new_root, nullptr); EXPECT_TRUE(local_state->_slot_id_to_value_range.contains(SlotId)); } @@ -1563,7 +1595,8 @@ TEST_F(ScanNormalizePredicate, test_timestamptz_predicate) { vectorized::VExprSPtr new_root; auto conjunct_expr_root = ctx; - auto st = local_state->_normalize_predicate(conjunct_expr_root.get(), new_root); + auto st = local_state->_normalize_predicate(conjunct_expr_root.get(), + conjunct_expr_root->root(), new_root, nullptr); EXPECT_TRUE(st.ok()); EXPECT_EQ(new_root, nullptr); EXPECT_TRUE(local_state->_slot_id_to_value_range.contains(SlotId)); @@ -1592,7 +1625,8 @@ TEST_F(ScanNormalizePredicate, test_timestamptz_predicate) { vectorized::VExprSPtr new_root; auto conjunct_expr_root = ctx; - EXPECT_TRUE(local_state->_normalize_predicate(conjunct_expr_root.get(), new_root)); + EXPECT_TRUE(local_state->_normalize_predicate( + conjunct_expr_root.get(), conjunct_expr_root->root(), new_root, nullptr)); auto& output_range = local_state->_slot_id_to_value_range[SlotId]; std::visit( [](auto&& arg) { @@ -1631,7 +1665,8 @@ TEST_F(ScanNormalizePredicate, test_timestamptz_predicate) { vectorized::VExprSPtr new_root; auto conjunct_expr_root = ctx; - EXPECT_TRUE(local_state->_normalize_predicate(conjunct_expr_root.get(), new_root)); + EXPECT_TRUE(local_state->_normalize_predicate( + conjunct_expr_root.get(), conjunct_expr_root->root(), new_root, nullptr)); auto& output_range = local_state->_slot_id_to_value_range[SlotId]; std::visit( [](auto&& arg) { @@ -1674,7 +1709,8 @@ TEST_F(ScanNormalizePredicate, test_timestamptz_predicate) { vectorized::VExprSPtr new_root; auto conjunct_expr_root = ctx; - auto st = local_state->_normalize_predicate(conjunct_expr_root.get(), new_root); + auto st = local_state->_normalize_predicate(conjunct_expr_root.get(), + conjunct_expr_root->root(), new_root, nullptr); EXPECT_TRUE(st.ok()); EXPECT_EQ(new_root, nullptr); @@ -1734,7 +1770,8 @@ TEST_F(ScanNormalizePredicate, test_timestamptz_predicate) { vectorized::VExprSPtr new_root; auto conjunct_expr_root = ctx; - auto st = local_state->_normalize_predicate(conjunct_expr_root.get(), new_root); + auto st = local_state->_normalize_predicate(conjunct_expr_root.get(), + conjunct_expr_root->root(), new_root, nullptr); EXPECT_TRUE(st.ok()); EXPECT_EQ(new_root, nullptr); @@ -1791,7 +1828,8 @@ TEST_F(ScanNormalizePredicate, test_timestamptz_predicate) { vectorized::VExprSPtr new_root; auto conjunct_expr_root = ctx; - auto st = local_state->_normalize_predicate(conjunct_expr_root.get(), new_root); + auto st = local_state->_normalize_predicate(conjunct_expr_root.get(), + conjunct_expr_root->root(), new_root, nullptr); EXPECT_TRUE(st.ok()); EXPECT_EQ(new_root, nullptr); @@ -1851,7 +1889,8 @@ TEST_F(ScanNormalizePredicate, test_timestamptz_predicate) { vectorized::VExprSPtr new_root; auto conjunct_expr_root = ctx; - auto st = local_state->_normalize_predicate(conjunct_expr_root.get(), new_root); + auto st = local_state->_normalize_predicate(conjunct_expr_root.get(), + conjunct_expr_root->root(), new_root, nullptr); EXPECT_TRUE(st.ok()); EXPECT_EQ(new_root, nullptr); diff --git a/be/test/vec/exec/format/parquet/parquet_expr_test.cpp b/be/test/vec/exec/format/parquet/parquet_expr_test.cpp index a4f2ac1d34237c..ebc949f706cb95 100644 --- a/be/test/vec/exec/format/parquet/parquet_expr_test.cpp +++ b/be/test/vec/exec/format/parquet/parquet_expr_test.cpp @@ -281,9 +281,11 @@ class ParquetExprTest : public testing::Test { &ctz, nullptr, nullptr); p_reader->set_file_reader(local_file_reader); colname_to_slot_id.emplace("int64_col", 2); - static_cast(p_reader->init_reader(column_names, &col_name_to_block_idx, {}, - tuple_desc, nullptr, &colname_to_slot_id, nullptr, - nullptr)); + phmap::flat_hash_map>> tmp; + std::vector> or_predicates; + static_cast(p_reader->init_reader(column_names, &col_name_to_block_idx, {}, tmp, + or_predicates, tuple_desc, nullptr, + &colname_to_slot_id, nullptr, nullptr)); size_t meta_size; static_cast(parse_thrift_footer(p_reader->_file_reader, &doris_file_metadata, @@ -401,126 +403,6 @@ TEST_F(ParquetExprTest, test_min_max) { } } -TEST_F(ParquetExprTest, test_ne) { - auto slot_ref = std::make_shared(0, std::make_shared()); - auto fn_eq = MockFnCall::create("ne"); - auto const_val = std::make_shared( - ColumnHelper::create_column_with_name({100})); - - slot_ref->set_expr_name("int32_all_null_col"); - fn_eq->add_child(slot_ref); - fn_eq->add_child(const_val); - fn_eq->_node_type = TExprNodeType::BINARY_PRED; - fn_eq->_opcode = TExprOpcode::NE; - slot_ref->_slot_id = 1; - EXPECT_FALSE(fn_eq->is_constant()); - - auto ctx = VExprContext::create_shared(fn_eq); - ctx->_prepared = true; - ctx->_opened = true; - ASSERT_FALSE(p_reader->check_expr_can_push_down(ctx->root())); -} - -TEST_F(ParquetExprTest, test_eq) { - auto slot_ref = std::make_shared(0, std::make_shared()); - auto fn_eq = MockFnCall::create("eq"); - auto const_val = std::make_shared( - ColumnHelper::create_column_with_name({100})); - slot_ref->set_expr_name("int32_all_null_col"); - fn_eq->add_child(slot_ref); - fn_eq->add_child(const_val); - fn_eq->_node_type = TExprNodeType::BINARY_PRED; - fn_eq->_opcode = TExprOpcode::EQ; - slot_ref->_slot_id = 1; - slot_ref->_column_id = 1; - EXPECT_FALSE(fn_eq->is_constant()); - - auto ctx = VExprContext::create_shared(fn_eq); - ctx->_prepared = true; - ctx->_opened = true; - ASSERT_TRUE(p_reader->check_expr_can_push_down(ctx->root())); -} - -TEST_F(ParquetExprTest, test_le) { - auto slot_ref = std::make_shared(0, std::make_shared()); - auto fn_eq = MockFnCall::create("le"); - auto const_val = std::make_shared( - ColumnHelper::create_column_with_name({100})); - slot_ref->set_expr_name("int32_all_null_col"); - fn_eq->add_child(slot_ref); - fn_eq->add_child(const_val); - fn_eq->_node_type = TExprNodeType::BINARY_PRED; - fn_eq->_opcode = TExprOpcode::LE; - slot_ref->_slot_id = 1; - slot_ref->_column_id = 1; - EXPECT_FALSE(fn_eq->is_constant()); - - auto ctx = VExprContext::create_shared(fn_eq); - ctx->_prepared = true; - ctx->_opened = true; - ASSERT_TRUE(p_reader->check_expr_can_push_down(ctx->root())); -} - -TEST_F(ParquetExprTest, test_ge) { - auto slot_ref = std::make_shared(0, std::make_shared()); - auto fn_eq = MockFnCall::create("ge"); - auto const_val = std::make_shared( - ColumnHelper::create_column_with_name({100})); - slot_ref->set_expr_name("int32_all_null_col"); - fn_eq->add_child(slot_ref); - fn_eq->add_child(const_val); - fn_eq->_node_type = TExprNodeType::BINARY_PRED; - fn_eq->_opcode = TExprOpcode::GE; - slot_ref->_slot_id = 1; - slot_ref->_column_id = 1; - EXPECT_FALSE(fn_eq->is_constant()); - - auto ctx = VExprContext::create_shared(fn_eq); - ctx->_prepared = true; - ctx->_opened = true; - ASSERT_TRUE(p_reader->check_expr_can_push_down(ctx->root())); -} - -TEST_F(ParquetExprTest, test_gt) { - auto slot_ref = std::make_shared(0, std::make_shared()); - auto fn_eq = MockFnCall::create("gt"); - auto const_val = std::make_shared( - ColumnHelper::create_column_with_name({100})); - slot_ref->set_expr_name("int32_all_null_col"); - fn_eq->add_child(slot_ref); - fn_eq->add_child(const_val); - fn_eq->_node_type = TExprNodeType::BINARY_PRED; - fn_eq->_opcode = TExprOpcode::GT; - slot_ref->_slot_id = 1; - slot_ref->_column_id = 1; - EXPECT_FALSE(fn_eq->is_constant()); - - auto ctx = VExprContext::create_shared(fn_eq); - ctx->_prepared = true; - ctx->_opened = true; - ASSERT_TRUE(p_reader->check_expr_can_push_down(ctx->root())); -} - -TEST_F(ParquetExprTest, test_lt) { - auto slot_ref = std::make_shared(0, std::make_shared()); - auto fn_eq = MockFnCall::create("lt"); - auto const_val = std::make_shared( - ColumnHelper::create_column_with_name({100})); - slot_ref->set_expr_name("int32_all_null_col"); - fn_eq->add_child(slot_ref); - fn_eq->add_child(const_val); - fn_eq->_node_type = TExprNodeType::BINARY_PRED; - fn_eq->_opcode = TExprOpcode::LT; - slot_ref->_slot_id = 1; - slot_ref->_column_id = 1; - EXPECT_FALSE(fn_eq->is_constant()); - - auto ctx = VExprContext::create_shared(fn_eq); - ctx->_prepared = true; - ctx->_opened = true; - ASSERT_TRUE(p_reader->check_expr_can_push_down(ctx->root())); -} - TEST_F(ParquetExprTest, test_ge_2) { // int64_col = 10000000001 [10000000000 , 10000000000+3) // int64_col = 10000000001 [10000000000 , 10000000000+3) int loc = 2; @@ -540,7 +422,6 @@ TEST_F(ParquetExprTest, test_ge_2) { // int64_col = 10000000001 [10000000000 , auto ctx = VExprContext::create_shared(fn_eq); ctx->_prepared = true; ctx->_opened = true; - ASSERT_TRUE(p_reader->check_expr_can_push_down(ctx->root())); { const std::function& @@ -592,7 +473,6 @@ TEST_F(ParquetExprTest, test_lt_2) { // string_col < name_1 auto ctx = VExprContext::create_shared(fn_eq); ctx->_prepared = true; ctx->_opened = true; - ASSERT_TRUE(p_reader->check_expr_can_push_down(ctx->root())); { const std::function& @@ -636,8 +516,6 @@ TEST_F(ParquetExprTest, test_is_null) { // int32_all_null_col is null ctx->_prepared = true; ctx->_opened = true; - ASSERT_TRUE(p_reader->check_expr_can_push_down(ctx->root())); - { const std::function& get_stat_func = @@ -687,8 +565,6 @@ TEST_F(ParquetExprTest, test_is_not_null) { // int32_all_null_col is not null ctx->_prepared = true; ctx->_opened = true; - ASSERT_TRUE(p_reader->check_expr_can_push_down(ctx->root())); - { const std::function& get_stat_func = @@ -738,8 +614,6 @@ TEST_F(ParquetExprTest, test_is_null_2) { // int32_partial_null_col is null ctx->_prepared = true; ctx->_opened = true; - ASSERT_TRUE(p_reader->check_expr_can_push_down(ctx->root())); - { const std::function& get_stat_func = @@ -1174,69 +1048,28 @@ TEST_F(ParquetExprTest, test_expr_push_down_eq_bool) { } TEST_F(ParquetExprTest, test_expr_push_down_and) { + std::unique_ptr pred = AndBlockColumnPredicate::create_unique(); auto and_expr = std::make_shared(); and_expr->_op = TExprOpcode::COMPOUND_AND; and_expr->_opcode = TExprOpcode::COMPOUND_AND; and_expr->_node_type = TExprNodeType::COMPOUND_PRED; // x <= 10000000002 { - auto slot_ref = std::make_shared(2, std::make_shared()); - auto fn_le = MockFnCall::create("le"); - auto const_val = std::make_shared( - ColumnHelper::create_column_with_name({10000000002})); - slot_ref->set_expr_name("int64_col"); - fn_le->add_child(slot_ref); - fn_le->add_child(const_val); - fn_le->_node_type = TExprNodeType::BINARY_PRED; - fn_le->_opcode = TExprOpcode::LE; - slot_ref->_slot_id = 2; - slot_ref->_column_id = 2; - EXPECT_FALSE(fn_le->is_constant()); - - auto ctx = VExprContext::create_shared(fn_le); - ctx->_prepared = true; - ctx->_opened = true; - and_expr->add_child(ctx->root()); + pred->add_column_predicate(SingleColumnBlockPredicate::create_unique( + ComparisonPredicateBase::create_shared( + 2, "", 10000000002))); } { // x > 100 - auto slot_ref = std::make_shared(2, std::make_shared()); - auto fn_le = MockFnCall::create("gt"); - auto const_val = std::make_shared( - ColumnHelper::create_column_with_name({100})); - slot_ref->set_expr_name("int64_col"); - fn_le->add_child(slot_ref); - fn_le->add_child(const_val); - fn_le->_node_type = TExprNodeType::BINARY_PRED; - fn_le->_opcode = TExprOpcode::GT; - slot_ref->_slot_id = 2; - slot_ref->_column_id = 2; - EXPECT_FALSE(fn_le->is_constant()); - - auto ctx = VExprContext::create_shared(fn_le); - ctx->_prepared = true; - ctx->_opened = true; - and_expr->add_child(ctx->root()); + pred->add_column_predicate(SingleColumnBlockPredicate::create_unique( + ComparisonPredicateBase::create_shared(2, "", + 100))); } { // x >= 900 - auto slot_ref = std::make_shared(2, std::make_shared()); - auto fn_le = MockFnCall::create("ge"); - auto const_val = std::make_shared( - ColumnHelper::create_column_with_name({900})); - slot_ref->set_expr_name("int64_col"); - fn_le->add_child(slot_ref); - fn_le->add_child(const_val); - fn_le->_node_type = TExprNodeType::BINARY_PRED; - fn_le->_opcode = TExprOpcode::GE; - slot_ref->_slot_id = 2; - slot_ref->_column_id = 2; - EXPECT_FALSE(fn_le->is_constant()); - - auto ctx = VExprContext::create_shared(fn_le); - ctx->_prepared = true; - ctx->_opened = true; - and_expr->add_child(ctx->root()); + pred->add_column_predicate(SingleColumnBlockPredicate::create_unique( + ComparisonPredicateBase::create_shared(2, "", + 900))); } const std::function& get_stat_func = @@ -1250,15 +1083,8 @@ TEST_F(ParquetExprTest, test_expr_push_down_and) { } return true; }; - ASSERT_TRUE(p_reader->check_expr_can_push_down(and_expr)); - p_reader->_enable_filter_by_min_max = true; - std::map>> push_down_simple_predicates; - push_down_simple_predicates.emplace(2, std::vector> {}); - p_reader->_push_down_predicates.push_back(AndBlockColumnPredicate::create_unique()); - ASSERT_TRUE(p_reader->convert_predicates({and_expr}, p_reader->_push_down_predicates.back(), - p_reader->_arena) - .ok()); + p_reader->_push_down_predicates.push_back(std::move(pred)); bool filter_group = false; bool filtered_by_min_max = false; @@ -1333,13 +1159,12 @@ TEST_F(ParquetExprTest, test_expr_push_down_or_string) { } return true; }; - ASSERT_TRUE(p_reader->check_expr_can_push_down(or_expr)); } TEST_F(ParquetExprTest, test_bloom_filter_skipped_when_range_miss) { const int col_idx = 2; const int64_t predicate_value = 10000000001; - ComparisonPredicateBase eq_pred(col_idx, predicate_value); + ComparisonPredicateBase eq_pred(col_idx, "", predicate_value); ParquetPredicate::ColumnStat stat; stat.ctz = &ctz; @@ -1381,7 +1206,7 @@ TEST_F(ParquetExprTest, test_bloom_filter_skipped_when_range_miss) { TEST_F(ParquetExprTest, test_bloom_filter_rejects_value) { const int col_idx = 2; const int64_t predicate_value = 10000000001; - ComparisonPredicateBase eq_pred(col_idx, predicate_value); + ComparisonPredicateBase eq_pred(col_idx, "", predicate_value); ParquetPredicate::ColumnStat stat; stat.ctz = &ctz; @@ -1432,7 +1257,7 @@ TEST_F(ParquetExprTest, test_bloom_filter_rejects_value) { TEST_F(ParquetExprTest, test_bloom_filter_accepts_value) { const int col_idx = 2; const int64_t predicate_value = 10000000001; - ComparisonPredicateBase eq_pred(col_idx, predicate_value); + ComparisonPredicateBase eq_pred(col_idx, "", predicate_value); ParquetPredicate::ColumnStat stat; stat.ctz = &ctz; @@ -1483,7 +1308,7 @@ TEST_F(ParquetExprTest, test_bloom_filter_accepts_value) { TEST_F(ParquetExprTest, test_bloom_filter_skipped_when_min_max_evicts_rowgroup) { const int col_idx = 2; const int64_t predicate_value = 10000000001; - ComparisonPredicateBase eq_pred(col_idx, predicate_value); + ComparisonPredicateBase eq_pred(col_idx, "", predicate_value); ParquetPredicate::ColumnStat stat; stat.ctz = &ctz; @@ -1526,7 +1351,7 @@ TEST_F(ParquetExprTest, test_bloom_filter_skipped_when_min_max_evicts_rowgroup) TEST_F(ParquetExprTest, test_bloom_filter_loader_called_when_min_max_allows) { const int col_idx = 2; const int64_t predicate_value = 10000000001; - ComparisonPredicateBase eq_pred(col_idx, predicate_value); + ComparisonPredicateBase eq_pred(col_idx, "", predicate_value); ParquetPredicate::ColumnStat stat; stat.ctz = &ctz; @@ -1577,7 +1402,7 @@ TEST_F(ParquetExprTest, test_bloom_filter_loader_called_when_min_max_allows) { TEST_F(ParquetExprTest, test_bloom_filter_loader_not_called_when_missing_metadata) { const int col_idx = 2; const int64_t predicate_value = 10000000001; - ComparisonPredicateBase eq_pred(col_idx, predicate_value); + ComparisonPredicateBase eq_pred(col_idx, "", predicate_value); ParquetPredicate::ColumnStat stat; stat.ctz = &ctz; @@ -1619,7 +1444,7 @@ TEST_F(ParquetExprTest, test_bloom_filter_loader_not_called_when_missing_metadat TEST_F(ParquetExprTest, test_bloom_filter_loader_resets_on_failure) { const int col_idx = 2; const int64_t predicate_value = 10000000001; - ComparisonPredicateBase eq_pred(col_idx, predicate_value); + ComparisonPredicateBase eq_pred(col_idx, "", predicate_value); ParquetPredicate::ColumnStat stat; stat.ctz = &ctz; @@ -1666,7 +1491,7 @@ TEST_F(ParquetExprTest, test_bloom_filter_loader_resets_on_failure) { TEST_F(ParquetExprTest, test_bloom_filter_not_supported_type) { const int col_idx = 6; // bool column const bool predicate_value = true; - ComparisonPredicateBase eq_pred(col_idx, predicate_value); + ComparisonPredicateBase eq_pred(col_idx, "", predicate_value); ParquetPredicate::ColumnStat stat; stat.ctz = &ctz; @@ -1706,7 +1531,7 @@ TEST_F(ParquetExprTest, test_bloom_filter_not_supported_type) { TEST_F(ParquetExprTest, test_bloom_filter_min_max_overlap_but_no_loader) { const int col_idx = 2; const int64_t predicate_value = 10000000001; - ComparisonPredicateBase eq_pred(col_idx, predicate_value); + ComparisonPredicateBase eq_pred(col_idx, "", predicate_value); ParquetPredicate::ColumnStat stat; stat.ctz = &ctz; @@ -1748,7 +1573,7 @@ TEST_F(ParquetExprTest, test_in_list_predicate_uses_bloom_filter) { set->insert(&v); } - InListPredicateBase in_pred(col_idx, set, false); + InListPredicateBase in_pred(col_idx, "", set, false); ParquetPredicate::ColumnStat stat; stat.ctz = &ctz; @@ -1801,7 +1626,7 @@ TEST_F(ParquetExprTest, test_in_list_predicate_no_loader_on_range_miss) { set->insert(&v); } - InListPredicateBase in_pred(col_idx, set, false); + InListPredicateBase in_pred(col_idx, "", set, false); ParquetPredicate::ColumnStat stat; stat.ctz = &ctz; @@ -1843,7 +1668,7 @@ TEST_F(ParquetExprTest, test_in_list_predicate_no_loader_on_range_miss) { TEST_F(ParquetExprTest, test_bloom_filter_reused_after_first_load) { const int col_idx = 2; const int64_t predicate_value = 10000000001; - ComparisonPredicateBase eq_pred(col_idx, predicate_value); + ComparisonPredicateBase eq_pred(col_idx, "", predicate_value); ParquetPredicate::ColumnStat stat; stat.ctz = &ctz; diff --git a/be/test/vec/exec/format/parquet/parquet_read_lines.cpp b/be/test/vec/exec/format/parquet/parquet_read_lines.cpp index 5213d659bb2e18..745fed9ad7fa31 100644 --- a/be/test/vec/exec/format/parquet/parquet_read_lines.cpp +++ b/be/test/vec/exec/format/parquet/parquet_read_lines.cpp @@ -152,8 +152,11 @@ static void read_parquet_lines(std::vector numeric_types, runtime_state.set_desc_tbl(desc_tbl); std::unordered_map colname_to_value_range; - static_cast(p_reader->init_reader(column_names, &col_name_to_block_idx, {}, nullptr, - nullptr, nullptr, nullptr, nullptr)); + phmap::flat_hash_map>> tmp; + std::vector> or_predicates; + static_cast(p_reader->init_reader(column_names, &col_name_to_block_idx, {}, tmp, + or_predicates, nullptr, nullptr, nullptr, nullptr, + nullptr)); std::unordered_map> partition_columns; std::unordered_map missing_columns; diff --git a/be/test/vec/exec/format/parquet/parquet_reader_test.cpp b/be/test/vec/exec/format/parquet/parquet_reader_test.cpp index 09d056431f5141..351068f4bf8357 100644 --- a/be/test/vec/exec/format/parquet/parquet_reader_test.cpp +++ b/be/test/vec/exec/format/parquet/parquet_reader_test.cpp @@ -151,8 +151,11 @@ TEST_F(ParquetReaderTest, normal) { RuntimeState runtime_state((TQueryOptions()), TQueryGlobals()); runtime_state.set_desc_tbl(desc_tbl); - static_cast(p_reader->init_reader(column_names, &col_name_to_block_idx, {}, nullptr, - nullptr, nullptr, nullptr, nullptr)); + phmap::flat_hash_map>> tmp; + std::vector> or_predicates; + static_cast(p_reader->init_reader(column_names, &col_name_to_block_idx, {}, tmp, + or_predicates, nullptr, nullptr, nullptr, nullptr, + nullptr)); std::unordered_map> partition_columns; std::unordered_map missing_columns; @@ -215,8 +218,10 @@ TEST_F(ParquetReaderTest, uuid_varbinary) { RuntimeState runtime_state = RuntimeState(TQueryOptions(), TQueryGlobals()); runtime_state.set_desc_tbl(desc_tbl); - st = p_reader->init_reader(column_names, &col_name_to_block_idx, {}, nullptr, nullptr, nullptr, - nullptr, nullptr); + phmap::flat_hash_map>> tmp; + std::vector> or_predicates; + st = p_reader->init_reader(column_names, &col_name_to_block_idx, {}, tmp, or_predicates, + nullptr, nullptr, nullptr, nullptr, nullptr); EXPECT_TRUE(st.ok()) << st; std::unordered_map> partition_columns; @@ -288,8 +293,10 @@ TEST_F(ParquetReaderTest, varbinary_varbinary) { RuntimeState runtime_state = RuntimeState(TQueryOptions(), TQueryGlobals()); runtime_state.set_desc_tbl(desc_tbl); - st = p_reader->init_reader(column_names, &col_name_to_block_idx, {}, nullptr, nullptr, nullptr, - nullptr, nullptr); + phmap::flat_hash_map>> tmp; + std::vector> or_predicates; + st = p_reader->init_reader(column_names, &col_name_to_block_idx, {}, tmp, or_predicates, + nullptr, nullptr, nullptr, nullptr, nullptr); EXPECT_TRUE(st.ok()) << st; std::unordered_map> partition_columns; @@ -363,8 +370,10 @@ TEST_F(ParquetReaderTest, varbinary_string) { RuntimeState runtime_state = RuntimeState(TQueryOptions(), TQueryGlobals()); runtime_state.set_desc_tbl(desc_tbl); - st = p_reader->init_reader(column_names, &col_name_to_block_idx, {}, nullptr, nullptr, nullptr, - nullptr, nullptr); + phmap::flat_hash_map>> tmp; + std::vector> or_predicates; + st = p_reader->init_reader(column_names, &col_name_to_block_idx, {}, tmp, or_predicates, + nullptr, nullptr, nullptr, nullptr, nullptr); EXPECT_TRUE(st.ok()) << st; std::unordered_map> partition_columns; @@ -438,8 +447,10 @@ TEST_F(ParquetReaderTest, varbinary_string2) { RuntimeState runtime_state = RuntimeState(TQueryOptions(), TQueryGlobals()); runtime_state.set_desc_tbl(desc_tbl); - st = p_reader->init_reader(column_names, &col_name_to_block_idx, {}, nullptr, nullptr, nullptr, - nullptr, nullptr); + phmap::flat_hash_map>> tmp; + std::vector> or_predicates; + st = p_reader->init_reader(column_names, &col_name_to_block_idx, {}, tmp, or_predicates, + nullptr, nullptr, nullptr, nullptr, nullptr); EXPECT_TRUE(st.ok()) << st; std::unordered_map> partition_columns; diff --git a/be/test/vec/exec/format/table/hive/hive_reader_test.cpp b/be/test/vec/exec/format/table/hive/hive_reader_test.cpp index b1cfe06e22787c..421f4218742fed 100644 --- a/be/test/vec/exec/format/table/hive/hive_reader_test.cpp +++ b/be/test/vec/exec/format/table/hive/hive_reader_test.cpp @@ -572,9 +572,12 @@ TEST_F(HiveReaderTest, read_hive_parquet_file) { const VExprContextSPtrs* not_single_slot_filter_conjuncts = nullptr; const std::unordered_map* slot_id_to_filter_conjuncts = nullptr; - st = hive_reader->init_reader(table_col_names, &col_name_to_block_idx, conjuncts, - tuple_descriptor, row_descriptor, colname_to_slot_id, - not_single_slot_filter_conjuncts, slot_id_to_filter_conjuncts); + phmap::flat_hash_map>> tmp; + std::vector> or_predicates; + st = hive_reader->init_reader(table_col_names, &col_name_to_block_idx, conjuncts, tmp, + or_predicates, tuple_descriptor, row_descriptor, + colname_to_slot_id, not_single_slot_filter_conjuncts, + slot_id_to_filter_conjuncts); ASSERT_TRUE(st.ok()) << st; std::unordered_map> diff --git a/be/test/vec/exec/format/table/iceberg/iceberg_reader_test.cpp b/be/test/vec/exec/format/table/iceberg/iceberg_reader_test.cpp index 11d12ad0387816..40373f05ad93e6 100644 --- a/be/test/vec/exec/format/table/iceberg/iceberg_reader_test.cpp +++ b/be/test/vec/exec/format/table/iceberg/iceberg_reader_test.cpp @@ -572,9 +572,12 @@ TEST_F(IcebergReaderTest, read_iceberg_parquet_file) { const VExprContextSPtrs* not_single_slot_filter_conjuncts = nullptr; const std::unordered_map* slot_id_to_filter_conjuncts = nullptr; - st = iceberg_reader->init_reader(table_col_names, &col_name_to_block_idx, conjuncts, - tuple_descriptor, row_descriptor, colname_to_slot_id, - not_single_slot_filter_conjuncts, slot_id_to_filter_conjuncts); + phmap::flat_hash_map>> tmp; + std::vector> or_predicates; + st = iceberg_reader->init_reader(table_col_names, &col_name_to_block_idx, conjuncts, tmp, + or_predicates, tuple_descriptor, row_descriptor, + colname_to_slot_id, not_single_slot_filter_conjuncts, + slot_id_to_filter_conjuncts); ASSERT_TRUE(st.ok()) << st; std::unordered_map>