diff --git a/be/src/exec/olap_common.h b/be/src/exec/olap_common.h index 07be79763593f6..dfb85d0f0eb0d0 100644 --- a/be/src/exec/olap_common.h +++ b/be/src/exec/olap_common.h @@ -425,6 +425,7 @@ class OlapScanKeys { _end_include(true), _is_convertible(true) {} + // TODO(gabriel): use ColumnPredicate to extend scan key template Status extend_scan_key(ColumnValueRange& range, int32_t max_scan_key_num, bool* exact_value, bool* eos, bool* should_break); diff --git a/be/src/exprs/bitmapfilter_predicate.h b/be/src/exprs/bitmapfilter_predicate.h index b695883205fbae..e8f149ce87f694 100644 --- a/be/src/exprs/bitmapfilter_predicate.h +++ b/be/src/exprs/bitmapfilter_predicate.h @@ -19,6 +19,7 @@ #include +#include "common/cast_set.h" #include "runtime/define_primitive_type.h" #include "runtime/primitive_type.h" #include "runtime_filter/runtime_filter_definitions.h" @@ -67,7 +68,8 @@ class BitmapFilterFunc : public BitmapFilterFuncBase { if (right < 0) { return false; } - return _bitmap_value->contains_any(std::max(left, (CppType)0), right); + return _bitmap_value->contains_any(cast_set(std::max(left, (CppType)0)), + cast_set(right)); } private: diff --git a/be/src/exprs/create_predicate_function.h b/be/src/exprs/create_predicate_function.h index bcb4ec49e75c99..71434978ccecb2 100644 --- a/be/src/exprs/create_predicate_function.h +++ b/be/src/exprs/create_predicate_function.h @@ -231,55 +231,56 @@ inline auto create_bitmap_filter(PrimitiveType type) { } template -ColumnPredicate* create_olap_column_predicate(uint32_t column_id, - const std::shared_ptr& filter, - const TabletColumn*, bool null_aware) { +std::shared_ptr create_olap_column_predicate( + uint32_t column_id, const std::shared_ptr& filter, const TabletColumn*, + bool null_aware) { std::shared_ptr filter_olap; filter_olap.reset(create_bloom_filter(PT, null_aware)); filter_olap->light_copy(filter.get()); // create a new filter to match the input filter and PT. For example, filter may be varchar, but PT is char - return new BloomFilterColumnPredicate(column_id, filter_olap); + return BloomFilterColumnPredicate::create_shared(column_id, filter_olap); } template -ColumnPredicate* create_olap_column_predicate(uint32_t column_id, - const std::shared_ptr& filter, - const TabletColumn*, bool) { +std::shared_ptr create_olap_column_predicate( + uint32_t column_id, const std::shared_ptr& filter, + const TabletColumn*, bool) { if constexpr (PT == TYPE_TINYINT || PT == TYPE_SMALLINT || PT == TYPE_INT || PT == TYPE_BIGINT) { - return new BitmapFilterColumnPredicate(column_id, filter); + return BitmapFilterColumnPredicate::create_shared(column_id, filter); } else { throw Exception(ErrorCode::INTERNAL_ERROR, "bitmap filter do not support type {}", PT); } } template -ColumnPredicate* create_olap_column_predicate(uint32_t column_id, - const std::shared_ptr& filter, - const TabletColumn* column, bool) { +std::shared_ptr create_olap_column_predicate( + uint32_t column_id, const std::shared_ptr& filter, + const TabletColumn* column, bool) { return create_in_list_predicate(column_id, filter, column->length()); } template -ColumnPredicate* create_olap_column_predicate(uint32_t column_id, - const std::shared_ptr& filter, - const TabletColumn* column, bool) { +std::shared_ptr create_olap_column_predicate( + uint32_t column_id, const std::shared_ptr& filter, + const TabletColumn* column, bool) { // currently only support like predicate if constexpr (PT == TYPE_CHAR) { - return new LikeColumnPredicate(filter->_opposite, column_id, filter->_fn_ctx, - filter->_string_param); + return LikeColumnPredicate::create_shared( + filter->_opposite, column_id, filter->_fn_ctx, filter->_string_param); } else if constexpr (PT == TYPE_VARCHAR || PT == TYPE_STRING) { - return new LikeColumnPredicate(filter->_opposite, column_id, filter->_fn_ctx, - filter->_string_param); + return LikeColumnPredicate::create_shared( + filter->_opposite, column_id, filter->_fn_ctx, filter->_string_param); } throw Exception(ErrorCode::INTERNAL_ERROR, "function filter do not support type {}", PT); } template -ColumnPredicate* create_column_predicate(uint32_t column_id, const std::shared_ptr& filter, - FieldType type, const TabletColumn* column, - bool null_aware = false) { +std::shared_ptr create_column_predicate(uint32_t column_id, + const std::shared_ptr& filter, + FieldType type, const TabletColumn* column, + bool null_aware = false) { switch (type) { #define M(NAME) \ case FieldType::OLAP_FIELD_##NAME: { \ diff --git a/be/src/olap/accept_null_predicate.h b/be/src/olap/accept_null_predicate.h index 85135f9440aca2..79792443637894 100644 --- a/be/src/olap/accept_null_predicate.h +++ b/be/src/olap/accept_null_predicate.h @@ -40,8 +40,27 @@ class AcceptNullPredicate : public ColumnPredicate { ENABLE_FACTORY_CREATOR(AcceptNullPredicate); public: - AcceptNullPredicate(ColumnPredicate* nested) - : ColumnPredicate(nested->column_id(), nested->opposite()), _nested {nested} {} + AcceptNullPredicate(const std::shared_ptr& nested) + : ColumnPredicate(nested->column_id(), nested->primitive_type(), nested->opposite()), + _nested {nested} {} + AcceptNullPredicate(const AcceptNullPredicate& other, uint32_t col_id) + : ColumnPredicate(other, col_id), + _nested(assert_cast(other)._nested + ? assert_cast(other)._nested->clone( + col_id) + : nullptr) {} + AcceptNullPredicate(const AcceptNullPredicate& other) = delete; + ~AcceptNullPredicate() override = default; + std::shared_ptr clone(uint32_t col_id) const override { + return AcceptNullPredicate::create_shared(*this, col_id); + } + std::string debug_string() const override { + auto n = _nested; + fmt::memory_buffer debug_string_buffer; + fmt::format_to(debug_string_buffer, "AcceptNullPredicate({}, nested={})", + ColumnPredicate::debug_string(), n ? n->debug_string() : "null"); + return fmt::to_string(debug_string_buffer); + } PredicateType type() const override { return _nested->type(); } @@ -173,11 +192,7 @@ class AcceptNullPredicate : public ColumnPredicate { return _nested->evaluate(column, sel, size); } - std::string _debug_string() const override { - return "passnull predicate for " + _nested->debug_string(); - } - - std::unique_ptr _nested; + std::shared_ptr _nested; }; } //namespace doris diff --git a/be/src/olap/bitmap_filter_predicate.h b/be/src/olap/bitmap_filter_predicate.h index 506e8b8c6f3563..730233b5c75f91 100644 --- a/be/src/olap/bitmap_filter_predicate.h +++ b/be/src/olap/bitmap_filter_predicate.h @@ -27,17 +27,32 @@ namespace doris { template -class BitmapFilterColumnPredicate : public ColumnPredicate { +class BitmapFilterColumnPredicate final : public ColumnPredicate { public: + ENABLE_FACTORY_CREATOR(BitmapFilterColumnPredicate); using CppType = typename PrimitiveTypeTraits::CppType; using SpecificFilter = BitmapFilterFunc; BitmapFilterColumnPredicate(uint32_t column_id, const std::shared_ptr& filter) - : ColumnPredicate(column_id), + : ColumnPredicate(column_id, T), _filter(filter), _specific_filter(assert_cast(_filter.get())) {} ~BitmapFilterColumnPredicate() override = default; + BitmapFilterColumnPredicate(const BitmapFilterColumnPredicate& other, uint32_t col_id) + : ColumnPredicate(other, col_id), + _filter(other._filter), + _specific_filter(assert_cast(_filter.get())) {} + BitmapFilterColumnPredicate(const BitmapFilterColumnPredicate& other) = delete; + std::shared_ptr clone(uint32_t col_id) const override { + return BitmapFilterColumnPredicate::create_shared(*this, col_id); + } + std::string debug_string() const override { + fmt::memory_buffer debug_string_buffer; + fmt::format_to(debug_string_buffer, "BitmapFilterColumnPredicate({})", + ColumnPredicate::debug_string()); + return fmt::to_string(debug_string_buffer); + } PredicateType type() const override { return PredicateType::BITMAP_FILTER; } @@ -85,10 +100,6 @@ class BitmapFilterColumnPredicate : public ColumnPredicate { return new_size; } - std::string _debug_string() const override { - return "BitmapFilterColumnPredicate(" + type_to_string(T) + ")"; - } - std::shared_ptr _filter; SpecificFilter* _specific_filter; // owned by _filter diff --git a/be/src/olap/block_column_predicate.h b/be/src/olap/block_column_predicate.h index 29af97a2d8fd00..ee73daeb4504e0 100644 --- a/be/src/olap/block_column_predicate.h +++ b/be/src/olap/block_column_predicate.h @@ -60,7 +60,7 @@ class BlockColumnPredicate { virtual void get_all_column_ids(std::set& column_id_set) const = 0; virtual void get_all_column_predicate( - std::set& predicate_set) const = 0; + std::set>& predicate_set) const = 0; virtual uint16_t evaluate(vectorized::MutableColumns& block, uint16_t* sel, uint16_t selected_size) const { @@ -118,13 +118,15 @@ class SingleColumnBlockPredicate : public BlockColumnPredicate { ENABLE_FACTORY_CREATOR(SingleColumnBlockPredicate); public: - explicit SingleColumnBlockPredicate(const ColumnPredicate* pre) : _predicate(pre) {} + explicit SingleColumnBlockPredicate(const std::shared_ptr& pre) + : _predicate(pre) {} void get_all_column_ids(std::set& column_id_set) const override { column_id_set.insert(_predicate->column_id()); } - void get_all_column_predicate(std::set& predicate_set) const override { + void get_all_column_predicate( + std::set>& predicate_set) const override { predicate_set.insert(_predicate); } @@ -154,7 +156,7 @@ class SingleColumnBlockPredicate : public BlockColumnPredicate { } private: - const ColumnPredicate* _predicate = nullptr; + const std::shared_ptr _predicate = nullptr; }; class MutilColumnBlockPredicate : public BlockColumnPredicate { @@ -185,7 +187,8 @@ class MutilColumnBlockPredicate : public BlockColumnPredicate { } } - void get_all_column_predicate(std::set& predicate_set) const override { + void get_all_column_predicate( + std::set>& predicate_set) const override { for (auto& child_block_predicate : _block_column_predicate_vec) { child_block_predicate->get_all_column_predicate(predicate_set); } diff --git a/be/src/olap/bloom_filter_predicate.h b/be/src/olap/bloom_filter_predicate.h index 972ff3845dd82f..eae433203aef10 100644 --- a/be/src/olap/bloom_filter_predicate.h +++ b/be/src/olap/bloom_filter_predicate.h @@ -30,16 +30,31 @@ namespace doris { template -class BloomFilterColumnPredicate : public ColumnPredicate { +class BloomFilterColumnPredicate final : public ColumnPredicate { public: + ENABLE_FACTORY_CREATOR(BloomFilterColumnPredicate); using SpecificFilter = BloomFilterFunc; BloomFilterColumnPredicate(uint32_t column_id, const std::shared_ptr& filter) - : ColumnPredicate(column_id), + : ColumnPredicate(column_id, T), _filter(filter), _specific_filter(assert_cast(_filter.get())) {} ~BloomFilterColumnPredicate() override = default; + BloomFilterColumnPredicate(const BloomFilterColumnPredicate& other, uint32_t col_id) + : ColumnPredicate(other, col_id), + _filter(other._filter), + _specific_filter(assert_cast(_filter.get())) {} + BloomFilterColumnPredicate(const BloomFilterColumnPredicate& other) = delete; + std::shared_ptr clone(uint32_t col_id) const override { + return BloomFilterColumnPredicate::create_shared(*this, col_id); + } + std::string debug_string() const override { + fmt::memory_buffer debug_string_buffer; + fmt::format_to(debug_string_buffer, "BloomFilterColumnPredicate({})", + ColumnPredicate::debug_string()); + return fmt::to_string(debug_string_buffer); + } PredicateType type() const override { return PredicateType::BF; } @@ -76,8 +91,6 @@ class BloomFilterColumnPredicate : public ColumnPredicate { return new_size; } - std::string _debug_string() const override { return "BloomFilter(" + type_to_string(T) + ")"; } - std::shared_ptr _filter; SpecificFilter* _specific_filter; // owned by _filter }; diff --git a/be/src/olap/column_predicate.h b/be/src/olap/column_predicate.h index b99c93b1056e0e..04a798d373c0e4 100644 --- a/be/src/olap/column_predicate.h +++ b/be/src/olap/column_predicate.h @@ -160,14 +160,20 @@ struct PredicateTypeTraits { class ColumnPredicate { public: - explicit ColumnPredicate(uint32_t column_id, bool opposite = false) - : _column_id(column_id), _opposite(opposite) { + explicit ColumnPredicate(uint32_t column_id, PrimitiveType primitive_type, + bool opposite = false) + : _column_id(column_id), _primitive_type(primitive_type), _opposite(opposite) { reset_judge_selectivity(); } + ColumnPredicate(const ColumnPredicate& other, uint32_t col_id) : ColumnPredicate(other) { + _column_id = col_id; + } virtual ~ColumnPredicate() = default; virtual PredicateType type() const = 0; + virtual PrimitiveType primitive_type() const { return _primitive_type; } + virtual std::shared_ptr clone(uint32_t col_id) const = 0; //evaluate predicate on inverted virtual Status evaluate(const vectorized::IndexFieldNameAndTypePair& name_with_type, @@ -178,6 +184,16 @@ class ColumnPredicate { } virtual double get_ignore_threshold() const { return 0; } + // Return the size of value set for IN/NOT IN predicates and 0 for others. + virtual std::string debug_string() const { + fmt::memory_buffer debug_string_buffer; + fmt::format_to(debug_string_buffer, + "Column ID: {}, Data Type: {}, PredicateType: {}, opposite: {}, Runtime " + "Filter ID: {}", + _column_id, type_to_string(primitive_type()), pred_type_string(type()), + _opposite, _runtime_filter_id); + return fmt::to_string(debug_string_buffer); + } // evaluate predicate on IColumn // a short circuit eval way @@ -266,14 +282,6 @@ class ColumnPredicate { bool opposite() const { return _opposite; } - std::string debug_string() const { - return _debug_string() + - fmt::format(", column_id={}, opposite={}, can_ignore={}, runtime_filter_id={}", - _column_id, _opposite, _can_ignore(), _runtime_filter_id); - } - - int get_runtime_filter_id() const { return _runtime_filter_id; } - void attach_profile_counter( int filter_id, std::shared_ptr predicate_filtered_rows_counter, std::shared_ptr predicate_input_rows_counter, @@ -347,7 +355,6 @@ class ColumnPredicate { virtual bool is_runtime_filter() const { return _can_ignore(); } protected: - virtual std::string _debug_string() const = 0; virtual bool _can_ignore() const { return _runtime_filter_id != -1; } virtual uint16_t _evaluate_inner(const vectorized::IColumn& column, uint16_t* sel, uint16_t size) const { @@ -377,6 +384,7 @@ class ColumnPredicate { } uint32_t _column_id; + PrimitiveType _primitive_type; // TODO: the value is only in delete condition, better be template value bool _opposite; int _runtime_filter_id = -1; @@ -399,6 +407,9 @@ class ColumnPredicate { std::make_shared(TUnit::UNIT, 0); std::shared_ptr _predicate_always_true_rows_counter = std::make_shared(TUnit::UNIT, 0); + +private: + ColumnPredicate(const ColumnPredicate& other) = default; }; } //namespace doris diff --git a/be/src/olap/comparison_predicate.h b/be/src/olap/comparison_predicate.h index 12db94a5f716dc..ef9729543afd1a 100644 --- a/be/src/olap/comparison_predicate.h +++ b/be/src/olap/comparison_predicate.h @@ -31,11 +31,25 @@ namespace doris { #include "common/compile_check_begin.h" template -class ComparisonPredicateBase : public ColumnPredicate { +class ComparisonPredicateBase final : public ColumnPredicate { public: + ENABLE_FACTORY_CREATOR(ComparisonPredicateBase); using T = typename PrimitiveTypeTraits::CppType; ComparisonPredicateBase(uint32_t column_id, const T& value, bool opposite = false) - : ColumnPredicate(column_id, opposite), _value(value) {} + : ColumnPredicate(column_id, Type, opposite), _value(value) {} + ComparisonPredicateBase(const ComparisonPredicateBase& other, uint32_t col_id) + : ColumnPredicate(other, col_id), _value(other._value) {} + ComparisonPredicateBase(const ComparisonPredicateBase& other) = delete; + std::shared_ptr clone(uint32_t col_id) const override { + DCHECK(_segment_id_to_cached_code.empty()); + return ComparisonPredicateBase::create_shared(*this, col_id); + } + std::string debug_string() const override { + fmt::memory_buffer debug_string_buffer; + fmt::format_to(debug_string_buffer, "ComparisonPredicateBase({})", + ColumnPredicate::debug_string()); + return fmt::to_string(debug_string_buffer); + } PredicateType type() const override { return PT; } @@ -695,12 +709,6 @@ class ComparisonPredicateBase : public ColumnPredicate { return code; } - std::string _debug_string() const override { - std::string info = - "ComparisonPredicateBase(" + type_to_string(Type) + ", " + type_to_string(PT) + ")"; - return info; - } - mutable phmap::parallel_flat_hash_map< std::pair, int32_t, phmap::priv::hash_default_hash>, diff --git a/be/src/olap/delete_handler.cpp b/be/src/olap/delete_handler.cpp index a6b24128ef29d0..9f3fcdecef65de 100644 --- a/be/src/olap/delete_handler.cpp +++ b/be/src/olap/delete_handler.cpp @@ -369,7 +369,7 @@ Status DeleteHandler::_parse_column_pred(TabletSchemaSPtr complete_schema, condition.__set_column_unique_id(col_unique_id); const auto& column = complete_schema->column_by_uid(col_unique_id); uint32_t index = complete_schema->field_index(col_unique_id); - auto* predicate = + auto predicate = parse_to_predicate(column.get_vec_type(), index, condition, _predicate_arena, true); if (predicate != nullptr) { delete_conditions->column_predicate_vec.push_back(predicate); @@ -457,19 +457,13 @@ DeleteHandler::~DeleteHandler() { return; } - for (auto& cond : _del_conds) { - for (const auto* pred : cond.column_predicate_vec) { - delete pred; - } - } - _del_conds.clear(); _is_inited = false; } void DeleteHandler::get_delete_conditions_after_version( int64_t version, AndBlockColumnPredicate* and_block_column_predicate_ptr, - std::unordered_map>* + std::unordered_map>>* del_predicates_for_zone_map) const { for (const auto& del_cond : _del_conds) { if (del_cond.filter_version > version) { @@ -484,7 +478,7 @@ void DeleteHandler::get_delete_conditions_after_version( del_cond.column_predicate_vec[0]->column_id()) < 1) { del_predicates_for_zone_map->insert( {del_cond.column_predicate_vec[0]->column_id(), - std::vector {}}); + std::vector> {}}); } (*del_predicates_for_zone_map)[del_cond.column_predicate_vec[0]->column_id()] .push_back(del_cond.column_predicate_vec[0]); @@ -498,7 +492,8 @@ void DeleteHandler::get_delete_conditions_after_version( // // TODO: need refactor design and code to use more version delete and more column delete to filter zone page. std::for_each(del_cond.column_predicate_vec.cbegin(), del_cond.column_predicate_vec.cend(), - [&or_column_predicate](const ColumnPredicate* predicate) { + [&or_column_predicate]( + const std::shared_ptr predicate) { or_column_predicate->add_column_predicate( SingleColumnBlockPredicate::create_unique(predicate)); }); diff --git a/be/src/olap/delete_handler.h b/be/src/olap/delete_handler.h index d1c6a866cf2216..7f793ea0f11181 100644 --- a/be/src/olap/delete_handler.h +++ b/be/src/olap/delete_handler.h @@ -39,7 +39,7 @@ class TCondition; // Represent a delete condition. struct DeleteConditions { int64_t filter_version = 0; // The version of this condition - std::vector column_predicate_vec; + std::vector> column_predicate_vec; }; // This class is used for checking whether a row should be deleted. @@ -111,7 +111,7 @@ class DeleteHandler { void get_delete_conditions_after_version( int64_t version, AndBlockColumnPredicate* and_block_column_predicate_ptr, - std::unordered_map>* + std::unordered_map>>* del_predicates_for_zone_map) const; private: diff --git a/be/src/olap/in_list_predicate.h b/be/src/olap/in_list_predicate.h index 2246d0e2fccc15..2566830659903d 100644 --- a/be/src/olap/in_list_predicate.h +++ b/be/src/olap/in_list_predicate.h @@ -62,15 +62,26 @@ namespace doris { * @tparam PT * @tparam HybridSetType */ -template -class InListPredicateBase : public ColumnPredicate { +template +class InListPredicateBase final : public ColumnPredicate { public: + ENABLE_FACTORY_CREATOR(InListPredicateBase); using T = typename PrimitiveTypeTraits::CppType; + using HybridSetType = std::conditional_t< + N >= 1 && N <= FIXED_CONTAINER_MAX_SIZE, + std::conditional_t< + std::is_same_v, StringSet>, + HybridSet, + vectorized::PredicateColumnType>>>, + std::conditional_t< + std::is_same_v, StringSet>, + HybridSet, + vectorized::PredicateColumnType>>>>; template InListPredicateBase(uint32_t column_id, const ConditionType& conditions, const ConvertFunc& convert, bool is_opposite, const vectorized::DataTypePtr& data_type, vectorized::Arena& arena) - : ColumnPredicate(column_id, is_opposite), + : ColumnPredicate(column_id, Type, is_opposite), _min_value(type_limit::max()), _max_value(type_limit::min()) { _values = std::make_shared(false); @@ -90,8 +101,8 @@ class InListPredicateBase : public ColumnPredicate { } InListPredicateBase(uint32_t column_id, const std::shared_ptr& hybrid_set, - size_t char_length = 0) - : ColumnPredicate(column_id, false), + bool is_opposite, size_t char_length = 0) + : ColumnPredicate(column_id, Type, is_opposite), _min_value(type_limit::max()), _max_value(type_limit::min()) { CHECK(hybrid_set != nullptr); @@ -132,8 +143,26 @@ class InListPredicateBase : public ColumnPredicate { iter->next(); } } + InListPredicateBase(const InListPredicateBase& other, uint32_t col_id) + : ColumnPredicate(other, col_id) { + _values = other._values; + _min_value = other._min_value; + _max_value = other._max_value; + _temp_datas = other._temp_datas; + DCHECK(_segment_id_to_value_in_dict_flags.empty()); + } + InListPredicateBase(const InListPredicateBase& other) = delete; + std::shared_ptr clone(uint32_t col_id) const override { + return InListPredicateBase::create_shared(*this, col_id); + } ~InListPredicateBase() override = default; + std::string debug_string() const override { + fmt::memory_buffer debug_string_buffer; + fmt::format_to(debug_string_buffer, "InListPredicateBase({})", + ColumnPredicate::debug_string()); + return fmt::to_string(debug_string_buffer); + } PredicateType type() const override { return PT; } @@ -656,10 +685,6 @@ class InListPredicateBase : public ColumnPredicate { } } - std::string _debug_string() const override { - return "InListPredicate(" + type_to_string(Type) + ", " + type_to_string(PT) + ")"; - } - void _update_min_max(const T& value) { if (Compare::greater(value, _max_value)) { _max_value = value; @@ -681,33 +706,17 @@ class InListPredicateBase : public ColumnPredicate { template -ColumnPredicate* _create_in_list_predicate(uint32_t column_id, const ConditionType& conditions, - const ConvertFunc& convert, bool is_opposite, - const vectorized::DataTypePtr& data_type, - vectorized::Arena& arena) { - using T = typename PrimitiveTypeTraits::CppType; - if constexpr (N >= 1 && N <= FIXED_CONTAINER_MAX_SIZE) { - using Set = std::conditional_t< - std::is_same_v, StringSet>, - HybridSet, - vectorized::PredicateColumnType>>>; - return new InListPredicateBase(column_id, conditions, convert, is_opposite, - data_type, arena); - } else { - using Set = std::conditional_t< - std::is_same_v, StringSet>, - HybridSet, - vectorized::PredicateColumnType>>>; - return new InListPredicateBase(column_id, conditions, convert, is_opposite, - data_type, arena); - } +std::shared_ptr _create_in_list_predicate( + uint32_t column_id, const ConditionType& conditions, const ConvertFunc& convert, + bool is_opposite, const vectorized::DataTypePtr& data_type, vectorized::Arena& arena) { + return InListPredicateBase::create_shared(column_id, conditions, convert, + is_opposite, data_type, arena); } template -ColumnPredicate* create_in_list_predicate(uint32_t column_id, const ConditionType& conditions, - const ConvertFunc& convert, bool is_opposite, - const vectorized::DataTypePtr& data_type, - vectorized::Arena& arena) { +std::shared_ptr create_in_list_predicate( + uint32_t column_id, const ConditionType& conditions, const ConvertFunc& convert, + bool is_opposite, const vectorized::DataTypePtr& data_type, vectorized::Arena& arena) { if (conditions.size() == 1) { return _create_in_list_predicate( column_id, conditions, convert, is_opposite, data_type, arena); @@ -740,29 +749,16 @@ ColumnPredicate* create_in_list_predicate(uint32_t column_id, const ConditionTyp } template -ColumnPredicate* _create_in_list_predicate(uint32_t column_id, - const std::shared_ptr& hybrid_set, - size_t char_length = 0) { - using T = typename PrimitiveTypeTraits::CppType; - if constexpr (N >= 1 && N <= FIXED_CONTAINER_MAX_SIZE) { - using Set = std::conditional_t< - std::is_same_v, StringSet>, - HybridSet, - vectorized::PredicateColumnType>>>; - return new InListPredicateBase(column_id, hybrid_set, char_length); - } else { - using Set = std::conditional_t< - std::is_same_v, StringSet>, - HybridSet, - vectorized::PredicateColumnType>>>; - return new InListPredicateBase(column_id, hybrid_set, char_length); - } +std::shared_ptr _create_in_list_predicate( + uint32_t column_id, const std::shared_ptr& hybrid_set, + size_t char_length = 0) { + return InListPredicateBase::create_shared(column_id, hybrid_set, char_length); } template -ColumnPredicate* create_in_list_predicate(uint32_t column_id, - const std::shared_ptr& hybrid_set, - size_t char_length = 0) { +std::shared_ptr create_in_list_predicate( + uint32_t column_id, const std::shared_ptr& hybrid_set, + size_t char_length = 0) { if (hybrid_set->size() == 1) { return _create_in_list_predicate(column_id, hybrid_set, char_length); } else if (hybrid_set->size() == 2) { diff --git a/be/src/olap/iterators.h b/be/src/olap/iterators.h index 03c821e3c03935..8fc880b719051e 100644 --- a/be/src/olap/iterators.h +++ b/be/src/olap/iterators.h @@ -103,9 +103,10 @@ class StorageReadOptions { AndBlockColumnPredicate::create_shared(); // reader's column predicate, nullptr if not existed // used to fiter rows in row block - std::vector column_predicates; + std::vector> column_predicates; std::unordered_map> col_id_to_predicates; - std::unordered_map> del_predicates_for_zone_map; + std::unordered_map>> + del_predicates_for_zone_map; TPushAggOp::type push_down_agg_type_opt = TPushAggOp::NONE; // REQUIRED (null is not allowed) diff --git a/be/src/olap/like_column_predicate.cpp b/be/src/olap/like_column_predicate.cpp index a2bc50735efb08..9359fef6b04978 100644 --- a/be/src/olap/like_column_predicate.cpp +++ b/be/src/olap/like_column_predicate.cpp @@ -28,7 +28,7 @@ namespace doris { template LikeColumnPredicate::LikeColumnPredicate(bool opposite, uint32_t column_id, doris::FunctionContext* fn_ctx, doris::StringRef val) - : ColumnPredicate(column_id, opposite), pattern(val) { + : ColumnPredicate(column_id, T, opposite), pattern(val) { static_assert(T == TYPE_VARCHAR || T == TYPE_CHAR || T == TYPE_STRING, "LikeColumnPredicate only supports the following types: TYPE_VARCHAR, TYPE_CHAR, " "TYPE_STRING"); diff --git a/be/src/olap/like_column_predicate.h b/be/src/olap/like_column_predicate.h index 267b7ac1ea126d..0e7a0480f43cd6 100644 --- a/be/src/olap/like_column_predicate.h +++ b/be/src/olap/like_column_predicate.h @@ -44,11 +44,29 @@ namespace doris { class FunctionContext; template -class LikeColumnPredicate : public ColumnPredicate { +class LikeColumnPredicate final : public ColumnPredicate { public: + ENABLE_FACTORY_CREATOR(LikeColumnPredicate); LikeColumnPredicate(bool opposite, uint32_t column_id, doris::FunctionContext* fn_ctx, doris::StringRef val); ~LikeColumnPredicate() override = default; + LikeColumnPredicate(const LikeColumnPredicate& other, uint32_t col_id) + : ColumnPredicate(other, col_id) { + _origin = other._origin; + pattern = other.pattern; + _state = other._state; + _opposite = other._opposite; + } + LikeColumnPredicate(const LikeColumnPredicate& other) = delete; + std::shared_ptr clone(uint32_t col_id) const override { + return LikeColumnPredicate::create_shared(*this, col_id); + } + std::string debug_string() const override { + fmt::memory_buffer debug_string_buffer; + fmt::format_to(debug_string_buffer, "LikeColumnPredicate({}, pattern={}, origin={})", + ColumnPredicate::debug_string(), pattern, _origin); + return fmt::to_string(debug_string_buffer); + } PredicateType type() const override { return PredicateType::EQ; } void evaluate_vec(const vectorized::IColumn& column, uint16_t size, bool* flags) const override; @@ -171,11 +189,6 @@ class LikeColumnPredicate : public ColumnPredicate { std::shared_mutex> _segment_id_to_cached_res_flags; - std::string _debug_string() const override { - std::string info = "LikeColumnPredicate"; - return info; - } - std::string _origin; // lifetime controlled by scan node using StateType = vectorized::LikeState; @@ -187,7 +200,7 @@ class LikeColumnPredicate : public ColumnPredicate { // Hyperscan API. So here _like_state is separate for each instance of // LikeColumnPredicate. vectorized::LikeSearchState _like_state; - std::unique_ptr _page_ng_bf; // for ngram-bf index + std::shared_ptr _page_ng_bf; // for ngram-bf index }; } // namespace doris diff --git a/be/src/olap/null_predicate.cpp b/be/src/olap/null_predicate.cpp index 602964241213a6..b2db30383c6716 100644 --- a/be/src/olap/null_predicate.cpp +++ b/be/src/olap/null_predicate.cpp @@ -31,8 +31,8 @@ using namespace doris::vectorized; namespace doris { -NullPredicate::NullPredicate(uint32_t column_id, bool is_null, bool opposite) - : ColumnPredicate(column_id), _is_null(opposite != is_null) {} +NullPredicate::NullPredicate(uint32_t column_id, bool is_null, PrimitiveType type, bool opposite) + : ColumnPredicate(column_id, type), _is_null(opposite != is_null) {} PredicateType NullPredicate::type() const { return _is_null ? PredicateType::IS_NULL : PredicateType::IS_NOT_NULL; diff --git a/be/src/olap/null_predicate.h b/be/src/olap/null_predicate.h index 73c762eeb89845..3def7e9be3ae34 100644 --- a/be/src/olap/null_predicate.h +++ b/be/src/olap/null_predicate.h @@ -43,9 +43,23 @@ namespace vectorized { class IColumn; } // namespace vectorized -class NullPredicate : public ColumnPredicate { +class NullPredicate final : public ColumnPredicate { public: - NullPredicate(uint32_t column_id, bool is_null, bool opposite = false); + ENABLE_FACTORY_CREATOR(NullPredicate); + NullPredicate(uint32_t column_id, bool is_null, PrimitiveType type, bool opposite = false); + NullPredicate(const NullPredicate& other) = delete; + NullPredicate(const NullPredicate& other, uint32_t column_id) + : ColumnPredicate(other, column_id), _is_null(other._is_null) {} + ~NullPredicate() override = default; + std::shared_ptr clone(uint32_t column_id) const override { + return NullPredicate::create_shared(*this, column_id); + } + std::string debug_string() const override { + fmt::memory_buffer debug_string_buffer; + fmt::format_to(debug_string_buffer, "NullPredicate({}, is_null={})", + ColumnPredicate::debug_string(), _is_null); + return fmt::to_string(debug_string_buffer); + } PredicateType type() const override; @@ -122,11 +136,6 @@ class NullPredicate : public ColumnPredicate { uint16_t _evaluate_inner(const vectorized::IColumn& column, uint16_t* sel, uint16_t size) const override; - std::string _debug_string() const override { - std::string info = "NullPredicate(" + std::string(_is_null ? "is_null" : "not_null") + ")"; - return info; - } - bool _is_null; //true for null, false for not null }; diff --git a/be/src/olap/predicate_creator.cpp b/be/src/olap/predicate_creator.cpp new file mode 100644 index 00000000000000..e5ce9bc98b87a7 --- /dev/null +++ b/be/src/olap/predicate_creator.cpp @@ -0,0 +1,125 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "olap/predicate_creator.h" + +namespace doris { + +std::shared_ptr create_bloom_filter_predicate( + const uint32_t cid, const vectorized::DataTypePtr& data_type, + const std::shared_ptr& filter) { + // Do the necessary type conversion, for CAST(STRING AS CHAR), we do nothing here but change the data type to the target type CHAR + std::shared_ptr filter_olap; + filter_olap.reset(create_bloom_filter(data_type->get_primitive_type(), false)); + filter_olap->light_copy(filter.get()); + switch (data_type->get_primitive_type()) { + case TYPE_TINYINT: { + return BloomFilterColumnPredicate::create_shared(cid, filter_olap); + } + case TYPE_SMALLINT: { + return BloomFilterColumnPredicate::create_shared(cid, filter_olap); + } + case TYPE_INT: { + return BloomFilterColumnPredicate::create_shared(cid, filter_olap); + } + case TYPE_BIGINT: { + return BloomFilterColumnPredicate::create_shared(cid, filter_olap); + } + case TYPE_LARGEINT: { + return BloomFilterColumnPredicate::create_shared(cid, filter_olap); + } + case TYPE_FLOAT: { + return BloomFilterColumnPredicate::create_shared(cid, filter_olap); + } + case TYPE_DOUBLE: { + return BloomFilterColumnPredicate::create_shared(cid, filter_olap); + } + case TYPE_DECIMALV2: { + return BloomFilterColumnPredicate::create_shared(cid, filter_olap); + } + case TYPE_DECIMAL32: { + return BloomFilterColumnPredicate::create_shared(cid, filter_olap); + } + case TYPE_DECIMAL64: { + return BloomFilterColumnPredicate::create_shared(cid, filter_olap); + } + case TYPE_DECIMAL128I: { + return BloomFilterColumnPredicate::create_shared(cid, filter_olap); + } + case TYPE_DECIMAL256: { + return BloomFilterColumnPredicate::create_shared(cid, filter_olap); + } + case TYPE_CHAR: { + return BloomFilterColumnPredicate::create_shared(cid, filter_olap); + } + case TYPE_VARCHAR: { + return BloomFilterColumnPredicate::create_shared(cid, filter_olap); + } + case TYPE_STRING: { + return BloomFilterColumnPredicate::create_shared(cid, filter_olap); + } + case TYPE_DATE: { + return BloomFilterColumnPredicate::create_shared(cid, filter_olap); + } + case TYPE_DATEV2: { + return BloomFilterColumnPredicate::create_shared(cid, filter_olap); + } + case TYPE_DATETIME: { + return BloomFilterColumnPredicate::create_shared(cid, filter_olap); + } + case TYPE_DATETIMEV2: { + return BloomFilterColumnPredicate::create_shared(cid, filter_olap); + } + case TYPE_BOOLEAN: { + return BloomFilterColumnPredicate::create_shared(cid, filter_olap); + } + case TYPE_IPV4: { + return BloomFilterColumnPredicate::create_shared(cid, filter_olap); + } + case TYPE_IPV6: { + return BloomFilterColumnPredicate::create_shared(cid, filter_olap); + } + default: + return nullptr; + } +} + +std::shared_ptr create_bitmap_filter_predicate( + const uint32_t cid, const vectorized::DataTypePtr& data_type, + const std::shared_ptr& filter) { + switch (data_type->get_primitive_type()) { + case TYPE_TINYINT: { + return BitmapFilterColumnPredicate::create_shared(cid, filter); + } + case TYPE_SMALLINT: { + return BitmapFilterColumnPredicate::create_shared(cid, filter); + } + case TYPE_INT: { + return BitmapFilterColumnPredicate::create_shared(cid, filter); + } + case TYPE_BIGINT: { + return BitmapFilterColumnPredicate::create_shared(cid, filter); + } + default: + throw Exception(ErrorCode::INVALID_ARGUMENT, + fmt::format("Cannot use bitmap filter for type: {}", + type_to_string(data_type->get_primitive_type()))); + return nullptr; + } +} + +} // namespace doris diff --git a/be/src/olap/predicate_creator.h b/be/src/olap/predicate_creator.h index 2f2abb6ca74a46..93bd667511380c 100644 --- a/be/src/olap/predicate_creator.h +++ b/be/src/olap/predicate_creator.h @@ -49,9 +49,9 @@ namespace doris { template class PredicateCreator { public: - virtual ColumnPredicate* create(const vectorized::DataTypePtr& data_type, int index, - const ConditionType& conditions, bool opposite, - vectorized::Arena& arena) = 0; + virtual std::shared_ptr create(const vectorized::DataTypePtr& data_type, + int index, const ConditionType& conditions, + bool opposite, vectorized::Arena& arena) = 0; virtual ~PredicateCreator() = default; }; @@ -59,15 +59,16 @@ template class IntegerPredicateCreator : public PredicateCreator { public: using CppType = typename PrimitiveTypeTraits::CppType; - ColumnPredicate* create(const vectorized::DataTypePtr& data_type, int index, - const ConditionType& conditions, bool opposite, - vectorized::Arena& arena) override { + std::shared_ptr create(const vectorized::DataTypePtr& data_type, int index, + const ConditionType& conditions, bool opposite, + vectorized::Arena& arena) override { if constexpr (PredicateTypeTraits::is_list(PT)) { return create_in_list_predicate( index, conditions, convert, opposite, data_type, arena); } else { static_assert(PredicateTypeTraits::is_comparison(PT)); - return new ComparisonPredicateBase(index, convert(conditions), opposite); + return ComparisonPredicateBase::create_shared(index, convert(conditions), + opposite); } } @@ -104,16 +105,16 @@ template class DecimalPredicateCreator : public PredicateCreator { public: using CppType = typename PrimitiveTypeTraits::CppType; - ColumnPredicate* create(const vectorized::DataTypePtr& data_type, int index, - const ConditionType& conditions, bool opposite, - vectorized::Arena& arena) override { + std::shared_ptr create(const vectorized::DataTypePtr& data_type, int index, + const ConditionType& conditions, bool opposite, + vectorized::Arena& arena) override { if constexpr (PredicateTypeTraits::is_list(PT)) { return create_in_list_predicate( index, conditions, convert, opposite, data_type, arena); } else { static_assert(PredicateTypeTraits::is_comparison(PT)); - return new ComparisonPredicateBase(index, convert(data_type, conditions), - opposite); + return ComparisonPredicateBase::create_shared( + index, convert(data_type, conditions), opposite); } } @@ -130,20 +131,21 @@ class DecimalPredicateCreator : public PredicateCreator { template class StringPredicateCreator : public PredicateCreator { public: - ColumnPredicate* create(const vectorized::DataTypePtr& data_type, int index, - const ConditionType& conditions, bool opposite, - vectorized::Arena& arena) override { + std::shared_ptr create(const vectorized::DataTypePtr& data_type, int index, + const ConditionType& conditions, bool opposite, + vectorized::Arena& arena) override { if constexpr (PredicateTypeTraits::is_list(PT)) { return create_in_list_predicate( index, conditions, convert, opposite, data_type, arena); } else { static_assert(PredicateTypeTraits::is_comparison(PT)); - return new ComparisonPredicateBase( + return ComparisonPredicateBase::create_shared( index, convert(data_type, conditions, arena), opposite); } } private: + // TODO(gabriel): remove conversion static StringRef convert(const vectorized::DataTypePtr& data_type, const std::string& condition, vectorized::Arena& arena) { size_t length = condition.length(); @@ -170,15 +172,16 @@ struct CustomPredicateCreator : public PredicateCreator { CustomPredicateCreator(const std::function& convert) : _convert(convert) {} - ColumnPredicate* create(const vectorized::DataTypePtr& data_type, int index, - const ConditionType& conditions, bool opposite, - vectorized::Arena& arena) override { + std::shared_ptr create(const vectorized::DataTypePtr& data_type, int index, + const ConditionType& conditions, bool opposite, + vectorized::Arena& arena) override { if constexpr (PredicateTypeTraits::is_list(PT)) { return create_in_list_predicate( index, conditions, _convert, opposite, data_type, arena); } else { static_assert(PredicateTypeTraits::is_comparison(PT)); - return new ComparisonPredicateBase(index, _convert(conditions), opposite); + return ComparisonPredicateBase::create_shared(index, _convert(conditions), + opposite); } } @@ -296,25 +299,26 @@ std::unique_ptr> get_creator( } template -ColumnPredicate* create_predicate(const vectorized::DataTypePtr& data_type, int index, - const ConditionType& conditions, bool opposite, - vectorized::Arena& arena) { +std::shared_ptr create_predicate(const vectorized::DataTypePtr& data_type, + int index, const ConditionType& conditions, + bool opposite, vectorized::Arena& arena) { return get_creator(data_type)->create(data_type, index, conditions, opposite, arena); } template -ColumnPredicate* create_comparison_predicate(const vectorized::DataTypePtr& data_type, int index, - const std::string& condition, bool opposite, - vectorized::Arena& arena) { +std::shared_ptr create_comparison_predicate( + const vectorized::DataTypePtr& data_type, int index, const std::string& condition, + bool opposite, vectorized::Arena& arena) { static_assert(PredicateTypeTraits::is_comparison(PT)); return create_predicate(data_type, index, condition, opposite, arena); } template -ColumnPredicate* create_list_predicate(const vectorized::DataTypePtr& data_type, int index, - const std::vector& conditions, bool opposite, - vectorized::Arena& arena) { +std::shared_ptr create_list_predicate(const vectorized::DataTypePtr& data_type, + int index, + const std::vector& conditions, + bool opposite, vectorized::Arena& arena) { static_assert(PredicateTypeTraits::is_list(PT)); return create_predicate>(data_type, index, conditions, opposite, arena); @@ -322,12 +326,15 @@ ColumnPredicate* create_list_predicate(const vectorized::DataTypePtr& data_type, // This method is called in reader and in deletehandler. // The "column" parameter might represent a column resulting from the decomposition of a variant column. -inline ColumnPredicate* parse_to_predicate(const vectorized::DataTypePtr& data_type, uint32_t index, - const TCondition& condition, vectorized::Arena& arena, - bool opposite = false) { +inline std::shared_ptr parse_to_predicate(const vectorized::DataTypePtr& data_type, + uint32_t index, + const TCondition& condition, + vectorized::Arena& arena, + bool opposite = false) { if (to_lower(condition.condition_op) == "is") { - return new NullPredicate(index, to_lower(condition.condition_values[0]) == "null", - opposite); + return NullPredicate::create_shared(index, + to_lower(condition.condition_values[0]) == "null", + data_type->get_primitive_type(), opposite); } if ((condition.condition_op == "*=" || condition.condition_op == "!*=") && @@ -358,5 +365,236 @@ inline ColumnPredicate* parse_to_predicate(const vectorized::DataTypePtr& data_t } return create(data_type, index, condition.condition_values[0], opposite, arena); } + +template +std::shared_ptr create_in_list_predicate(const uint32_t cid, + const std::shared_ptr& set, + bool is_opposite, + size_t char_length = 0) { + auto set_size = set->size(); + if (set_size == 1) { + return InListPredicateBase::create_shared(cid, set, is_opposite, char_length); + } else if (set_size == 2) { + return InListPredicateBase::create_shared(cid, set, is_opposite, char_length); + } else if (set_size == 3) { + return InListPredicateBase::create_shared(cid, set, is_opposite, char_length); + } else if (set_size == 4) { + return InListPredicateBase::create_shared(cid, set, is_opposite, char_length); + } else if (set_size == 5) { + return InListPredicateBase::create_shared(cid, set, is_opposite, char_length); + } else if (set_size == 6) { + return InListPredicateBase::create_shared(cid, set, is_opposite, char_length); + } else if (set_size == 7) { + return InListPredicateBase::create_shared(cid, set, is_opposite, char_length); + } else if (set_size == FIXED_CONTAINER_MAX_SIZE) { + return InListPredicateBase::create_shared(cid, set, is_opposite, char_length); + } else { + return InListPredicateBase::create_shared( + cid, set, is_opposite, char_length); + } +} + +template +std::shared_ptr create_in_list_predicate(const uint32_t cid, + const vectorized::DataTypePtr& data_type, + const std::shared_ptr set, + bool is_opposite) { + switch (data_type->get_primitive_type()) { + case TYPE_TINYINT: { + return create_in_list_predicate(cid, set, is_opposite); + } + case TYPE_SMALLINT: { + return create_in_list_predicate(cid, set, is_opposite); + } + case TYPE_INT: { + return create_in_list_predicate(cid, set, is_opposite); + } + case TYPE_BIGINT: { + return create_in_list_predicate(cid, set, is_opposite); + } + case TYPE_LARGEINT: { + return create_in_list_predicate(cid, set, is_opposite); + } + case TYPE_FLOAT: { + return create_in_list_predicate(cid, set, is_opposite); + } + case TYPE_DOUBLE: { + return create_in_list_predicate(cid, set, is_opposite); + } + case TYPE_DECIMALV2: { + return create_in_list_predicate(cid, set, is_opposite); + } + case TYPE_DECIMAL32: { + return create_in_list_predicate(cid, set, is_opposite); + } + case TYPE_DECIMAL64: { + return create_in_list_predicate(cid, set, is_opposite); + } + case TYPE_DECIMAL128I: { + return create_in_list_predicate(cid, set, is_opposite); + } + case TYPE_DECIMAL256: { + return create_in_list_predicate(cid, set, is_opposite); + } + case TYPE_CHAR: { + return create_in_list_predicate( + cid, set, is_opposite, + assert_cast( + vectorized::remove_nullable(data_type).get()) + ->len()); + } + case TYPE_VARCHAR: { + return create_in_list_predicate(cid, set, is_opposite); + } + case TYPE_STRING: { + return create_in_list_predicate(cid, set, is_opposite); + } + case TYPE_DATE: { + return create_in_list_predicate(cid, set, is_opposite); + } + case TYPE_DATEV2: { + return create_in_list_predicate(cid, set, is_opposite); + } + case TYPE_DATETIME: { + return create_in_list_predicate(cid, set, is_opposite); + } + case TYPE_DATETIMEV2: { + return create_in_list_predicate(cid, set, is_opposite); + } + case TYPE_BOOLEAN: { + return create_in_list_predicate(cid, set, is_opposite); + } + case TYPE_IPV4: { + return create_in_list_predicate(cid, set, is_opposite); + } + case TYPE_IPV6: { + return create_in_list_predicate(cid, set, is_opposite); + } + default: + throw Exception(Status::InternalError("Unsupported type {} for in_predicate", + type_to_string(data_type->get_primitive_type()))); + return nullptr; + } +} + +template +std::shared_ptr create_comparison_predicate0( + const uint32_t cid, const vectorized::DataTypePtr& data_type, StringRef& value, + bool opposite, vectorized::Arena& arena) { + switch (data_type->get_primitive_type()) { + case TYPE_TINYINT: { + return ComparisonPredicateBase::create_shared( + cid, *(typename PrimitiveTypeTraits::CppType*)value.data, opposite); + } + case TYPE_SMALLINT: { + return ComparisonPredicateBase::create_shared( + cid, *(typename PrimitiveTypeTraits::CppType*)value.data, opposite); + } + case TYPE_INT: { + return ComparisonPredicateBase::create_shared( + cid, *(typename PrimitiveTypeTraits::CppType*)value.data, opposite); + } + case TYPE_BIGINT: { + return ComparisonPredicateBase::create_shared( + cid, *(typename PrimitiveTypeTraits::CppType*)value.data, opposite); + } + case TYPE_LARGEINT: { + return ComparisonPredicateBase::create_shared( + cid, *(typename PrimitiveTypeTraits::CppType*)value.data, opposite); + } + case TYPE_FLOAT: { + return ComparisonPredicateBase::create_shared( + cid, *(typename PrimitiveTypeTraits::CppType*)value.data, opposite); + } + case TYPE_DOUBLE: { + return ComparisonPredicateBase::create_shared( + cid, *(typename PrimitiveTypeTraits::CppType*)value.data, opposite); + } + case TYPE_DECIMALV2: { + return ComparisonPredicateBase::create_shared( + cid, *(typename PrimitiveTypeTraits::CppType*)value.data, opposite); + } + case TYPE_DECIMAL32: { + return ComparisonPredicateBase::create_shared( + cid, *(typename PrimitiveTypeTraits::CppType*)value.data, opposite); + } + case TYPE_DECIMAL64: { + return ComparisonPredicateBase::create_shared( + cid, *(typename PrimitiveTypeTraits::CppType*)value.data, opposite); + } + case TYPE_DECIMAL128I: { + return ComparisonPredicateBase::create_shared( + cid, *(typename PrimitiveTypeTraits::CppType*)value.data, + opposite); + } + case TYPE_DECIMAL256: { + return ComparisonPredicateBase::create_shared( + cid, *(typename PrimitiveTypeTraits::CppType*)value.data, + opposite); + } + case TYPE_CHAR: { + // TODO(gabriel): Use std::string instead of StringRef + size_t target = assert_cast( + vectorized::remove_nullable(data_type).get()) + ->len(); + StringRef v = value; + if (target > value.size) { + char* buffer = arena.alloc(target); + memset(buffer, 0, target); + memcpy(buffer, value.data, value.size); + v = {buffer, target}; + } + + return ComparisonPredicateBase::create_shared(cid, v, opposite); + } + case TYPE_VARCHAR: { + return ComparisonPredicateBase::create_shared(cid, value, opposite); + } + case TYPE_STRING: { + return ComparisonPredicateBase::create_shared(cid, value, opposite); + } + case TYPE_DATE: { + return ComparisonPredicateBase::create_shared( + cid, *(typename PrimitiveTypeTraits::CppType*)value.data, opposite); + } + case TYPE_DATEV2: { + return ComparisonPredicateBase::create_shared( + cid, *(typename PrimitiveTypeTraits::CppType*)value.data, opposite); + } + case TYPE_DATETIME: { + return ComparisonPredicateBase::create_shared( + cid, *(typename PrimitiveTypeTraits::CppType*)value.data, opposite); + } + case TYPE_DATETIMEV2: { + return ComparisonPredicateBase::create_shared( + cid, *(typename PrimitiveTypeTraits::CppType*)value.data, + opposite); + } + case TYPE_BOOLEAN: { + return ComparisonPredicateBase::create_shared( + cid, *(typename PrimitiveTypeTraits::CppType*)value.data, opposite); + } + case TYPE_IPV4: { + return ComparisonPredicateBase::create_shared( + cid, *(typename PrimitiveTypeTraits::CppType*)value.data, opposite); + } + case TYPE_IPV6: { + return ComparisonPredicateBase::create_shared( + cid, *(typename PrimitiveTypeTraits::CppType*)value.data, opposite); + } + default: + throw Exception(Status::InternalError("Unsupported type {} for comparison_predicate", + type_to_string(data_type->get_primitive_type()))); + return nullptr; + } +} + +std::shared_ptr create_bloom_filter_predicate( + const uint32_t cid, const vectorized::DataTypePtr& data_type, + const std::shared_ptr& filter); + +std::shared_ptr create_bitmap_filter_predicate( + const uint32_t cid, const vectorized::DataTypePtr& data_type, + const std::shared_ptr& filter); #include "common/compile_check_end.h" } //namespace doris diff --git a/be/src/olap/rowset/rowset_reader_context.h b/be/src/olap/rowset/rowset_reader_context.h index fffa16d0f5cb3d..846c721ca341ae 100644 --- a/be/src/olap/rowset/rowset_reader_context.h +++ b/be/src/olap/rowset/rowset_reader_context.h @@ -58,9 +58,9 @@ struct RowsetReaderContext { TPushAggOp::type push_down_agg_type_opt = TPushAggOp::NONE; // column name -> column predicate // adding column_name for predicate to make use of column selectivity - const std::vector* predicates = nullptr; + const std::vector>* predicates = nullptr; // value column predicate in UNIQUE table - const std::vector* value_predicates = nullptr; + const std::vector>* value_predicates = nullptr; const std::vector* lower_bound_keys = nullptr; const std::vector* is_lower_keys_included = nullptr; const std::vector* upper_bound_keys = nullptr; diff --git a/be/src/olap/rowset/segment_v2/column_reader.cpp b/be/src/olap/rowset/segment_v2/column_reader.cpp index 313ae7f3f37fe9..2ddea2988c4ad7 100644 --- a/be/src/olap/rowset/segment_v2/column_reader.cpp +++ b/be/src/olap/rowset/segment_v2/column_reader.cpp @@ -381,8 +381,8 @@ Status ColumnReader::read_page(const ColumnIteratorOptions& iter_opts, const Pag Status ColumnReader::get_row_ranges_by_zone_map( const AndBlockColumnPredicate* col_predicates, - const std::vector* delete_predicates, RowRanges* row_ranges, - const ColumnIteratorOptions& iter_opts) { + const std::vector>* delete_predicates, + RowRanges* row_ranges, const ColumnIteratorOptions& iter_opts) { std::vector page_indexes; RETURN_IF_ERROR( _get_filtered_pages(col_predicates, delete_predicates, &page_indexes, iter_opts)); @@ -448,8 +448,9 @@ Status ColumnReader::match_condition(const AndBlockColumnPredicate* col_predicat return Status::OK(); } -Status ColumnReader::prune_predicates_by_zone_map(std::vector& predicates, - const int column_id, bool* pruned) const { +Status ColumnReader::prune_predicates_by_zone_map( + std::vector>& predicates, const int column_id, + bool* pruned) const { *pruned = false; if (_zone_map_index == nullptr) { return Status::OK(); @@ -558,7 +559,7 @@ bool ColumnReader::_zone_map_match_condition(const ZoneMapPB& zone_map, Status ColumnReader::_get_filtered_pages( const AndBlockColumnPredicate* col_predicates, - const std::vector* delete_predicates, + const std::vector>* delete_predicates, std::vector* page_indexes, const ColumnIteratorOptions& iter_opts) { RETURN_IF_ERROR(_load_zone_map_index(_use_index_page_cache, _opts.kept_in_memory, iter_opts)); @@ -1945,7 +1946,8 @@ Status FileColumnIterator::_read_dict_data() { Status FileColumnIterator::get_row_ranges_by_zone_map( const AndBlockColumnPredicate* col_predicates, - const std::vector* delete_predicates, RowRanges* row_ranges) { + const std::vector>* delete_predicates, + RowRanges* row_ranges) { if (_reader->has_zone_map()) { RETURN_IF_ERROR(_reader->get_row_ranges_by_zone_map(col_predicates, delete_predicates, row_ranges, _opts)); diff --git a/be/src/olap/rowset/segment_v2/column_reader.h b/be/src/olap/rowset/segment_v2/column_reader.h index 14ea18f112ef92..dc49562b7a995c 100644 --- a/be/src/olap/rowset/segment_v2/column_reader.h +++ b/be/src/olap/rowset/segment_v2/column_reader.h @@ -193,10 +193,10 @@ class ColumnReader : public MetadataAdder, // get row ranges with zone map // - cond_column is user's query predicate // - delete_condition is a delete predicate of one version - Status get_row_ranges_by_zone_map(const AndBlockColumnPredicate* col_predicates, - const std::vector* delete_predicates, - RowRanges* row_ranges, - const ColumnIteratorOptions& iter_opts); + Status get_row_ranges_by_zone_map( + const AndBlockColumnPredicate* col_predicates, + const std::vector>* delete_predicates, + RowRanges* row_ranges, const ColumnIteratorOptions& iter_opts); // get row ranges with bloom filter index Status get_row_ranges_by_bloom_filter(const AndBlockColumnPredicate* col_predicates, @@ -207,7 +207,7 @@ class ColumnReader : public MetadataAdder, bool is_empty() const { return _num_rows == 0; } - Status prune_predicates_by_zone_map(std::vector& predicates, + Status prune_predicates_by_zone_map(std::vector>& predicates, const int column_id, bool* pruned) const; CompressionTypePB get_compression() const { return _meta_compression; } @@ -258,10 +258,10 @@ class ColumnReader : public MetadataAdder, Status _parse_zone_map_skip_null(const ZoneMapPB& zone_map, WrapperField* min_value_container, WrapperField* max_value_container) const; - Status _get_filtered_pages(const AndBlockColumnPredicate* col_predicates, - const std::vector* delete_predicates, - std::vector* page_indexes, - const ColumnIteratorOptions& iter_opts); + Status _get_filtered_pages( + const AndBlockColumnPredicate* col_predicates, + const std::vector>* delete_predicates, + std::vector* page_indexes, const ColumnIteratorOptions& iter_opts); Status _calculate_row_ranges(const std::vector& page_indexes, RowRanges* row_ranges, const ColumnIteratorOptions& iter_opts); @@ -345,7 +345,8 @@ class ColumnIterator { virtual Status get_row_ranges_by_zone_map( const AndBlockColumnPredicate* col_predicates, - const std::vector* delete_predicates, RowRanges* row_ranges) { + const std::vector>* delete_predicates, + RowRanges* row_ranges) { return Status::OK(); } @@ -436,9 +437,10 @@ class FileColumnIterator final : public ColumnIterator { // get row ranges by zone map // - cond_column is user's query predicate // - delete_condition is delete predicate of one version - Status get_row_ranges_by_zone_map(const AndBlockColumnPredicate* col_predicates, - const std::vector* delete_predicates, - RowRanges* row_ranges) override; + Status get_row_ranges_by_zone_map( + const AndBlockColumnPredicate* col_predicates, + const std::vector>* delete_predicates, + RowRanges* row_ranges) override; Status get_row_ranges_by_bloom_filter(const AndBlockColumnPredicate* col_predicates, RowRanges* row_ranges) override; diff --git a/be/src/olap/rowset/segment_v2/segment.cpp b/be/src/olap/rowset/segment_v2/segment.cpp index 378e1053dfab38..e5a9b8a9d7f1d1 100644 --- a/be/src/olap/rowset/segment_v2/segment.cpp +++ b/be/src/olap/rowset/segment_v2/segment.cpp @@ -283,7 +283,7 @@ Status Segment::new_iterator(SchemaSPtr schema, const StorageReadOptions& read_o AndBlockColumnPredicate and_predicate; and_predicate.add_column_predicate( - SingleColumnBlockPredicate::create_unique(runtime_predicate.get())); + SingleColumnBlockPredicate::create_unique(runtime_predicate)); std::shared_ptr reader; Status st = get_column_reader( read_options.tablet_schema->column(runtime_predicate->column_id()), &reader, @@ -340,7 +340,7 @@ Status Segment::new_iterator(SchemaSPtr schema, const StorageReadOptions& read_o options_with_pruned_predicates.column_predicates = pruned_predicates; //because column_predicates is changed, we need to rebuild col_id_to_predicates so that inverted index will not go through it. options_with_pruned_predicates.col_id_to_predicates.clear(); - for (auto* pred : options_with_pruned_predicates.column_predicates) { + for (auto pred : options_with_pruned_predicates.column_predicates) { if (!options_with_pruned_predicates.col_id_to_predicates.contains( pred->column_id())) { options_with_pruned_predicates.col_id_to_predicates.insert( diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp b/be/src/olap/rowset/segment_v2/segment_iterator.cpp index cbaad52e36f5d0..be7fe93d6cceae 100644 --- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp +++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp @@ -424,7 +424,7 @@ Status SegmentIterator::_init_impl(const StorageReadOptions& opts) { void SegmentIterator::_initialize_predicate_results() { // Initialize from _col_predicates - for (auto* pred : _col_predicates) { + for (auto pred : _col_predicates) { int cid = pred->column_id(); _column_predicate_index_exec_status[cid][pred] = false; } @@ -950,7 +950,7 @@ Status SegmentIterator::_get_row_ranges_from_conditions(RowRanges* condition_row _opts)) { AndBlockColumnPredicate and_predicate; and_predicate.add_column_predicate( - SingleColumnBlockPredicate::create_unique(runtime_predicate.get())); + SingleColumnBlockPredicate::create_unique(runtime_predicate)); RowRanges column_rp_row_ranges = RowRanges::create_single(num_rows()); RETURN_IF_ERROR(_column_iterators[runtime_predicate->column_id()] @@ -1010,7 +1010,7 @@ Status SegmentIterator::_extract_common_expr_columns(const vectorized::VExprSPtr return Status::OK(); } -bool SegmentIterator::_check_apply_by_inverted_index(ColumnPredicate* pred) { +bool SegmentIterator::_check_apply_by_inverted_index(std::shared_ptr pred) { if (_opts.runtime_state && !_opts.runtime_state->query_options().enable_inverted_index_query) { return false; } @@ -1038,8 +1038,8 @@ bool SegmentIterator::_check_apply_by_inverted_index(ColumnPredicate* pred) { } // Function filter no apply inverted index - if (dynamic_cast*>(pred) != nullptr || - dynamic_cast*>(pred) != nullptr) { + if (dynamic_cast*>(pred.get()) != nullptr || + dynamic_cast*>(pred.get()) != nullptr) { return false; } @@ -1151,8 +1151,8 @@ inline bool SegmentIterator::_inverted_index_not_support_pred_type(const Predica } Status SegmentIterator::_apply_inverted_index_on_column_predicate( - ColumnPredicate* pred, std::vector& remaining_predicates, - bool* continue_apply) { + std::shared_ptr pred, + std::vector>& remaining_predicates, bool* continue_apply) { if (!_check_apply_by_inverted_index(pred)) { remaining_predicates.emplace_back(pred); } else { @@ -1240,8 +1240,8 @@ bool SegmentIterator::_need_read_data(ColumnId cid) { } Status SegmentIterator::_apply_inverted_index() { - std::vector remaining_predicates; - std::set no_need_to_pass_column_predicate_set; + std::vector> remaining_predicates; + std::set> no_need_to_pass_column_predicate_set; for (auto pred : _col_predicates) { if (no_need_to_pass_column_predicate_set.count(pred) > 0) { @@ -1671,9 +1671,9 @@ Status SegmentIterator::_vec_init_lazy_materialization() { std::set del_cond_id_set; _opts.delete_condition_predicates->get_all_column_ids(del_cond_id_set); - std::set delete_predicate_set {}; + std::set> delete_predicate_set {}; _opts.delete_condition_predicates->get_all_column_predicate(delete_predicate_set); - for (const auto* const predicate : delete_predicate_set) { + for (auto predicate : delete_predicate_set) { if (PredicateTypeTraits::is_range(predicate->type())) { _delete_range_column_ids.push_back(predicate->column_id()); } else if (PredicateTypeTraits::is_bloom_filter(predicate->type())) { @@ -1693,7 +1693,7 @@ Status SegmentIterator::_vec_init_lazy_materialization() { auto& runtime_predicate = _opts.runtime_state->get_query_ctx()->get_runtime_predicate(id); _col_predicates.push_back( - runtime_predicate.get_predicate(_opts.topn_filter_target_node_id).get()); + runtime_predicate.get_predicate(_opts.topn_filter_target_node_id)); VLOG_DEBUG << fmt::format( "After appending topn filter to col_predicates, " "col_predicates size: {}, col_predicate: {}", @@ -1706,7 +1706,7 @@ Status SegmentIterator::_vec_init_lazy_materialization() { std::set short_cir_pred_col_id_set; // using set for distinct cid std::set vec_pred_col_id_set; - for (auto* predicate : _col_predicates) { + for (auto predicate : _col_predicates) { auto cid = predicate->column_id(); _is_pred_column[cid] = true; pred_column_ids.insert(cid); @@ -1858,7 +1858,7 @@ Status SegmentIterator::_vec_init_lazy_materialization() { return Status::OK(); } -bool SegmentIterator::_can_evaluated_by_vectorized(ColumnPredicate* predicate) { +bool SegmentIterator::_can_evaluated_by_vectorized(std::shared_ptr predicate) { auto cid = predicate->column_id(); FieldType field_type = _schema->column(cid)->type(); if (field_type == FieldType::OLAP_FIELD_TYPE_VARIANT) { @@ -2279,7 +2279,7 @@ uint16_t SegmentIterator::_evaluate_short_circuit_predicate(uint16_t* vec_sel_ro } uint16_t original_size = selected_size; - for (auto* predicate : _short_cir_eval_predicate) { + for (auto predicate : _short_cir_eval_predicate) { auto column_id = predicate->column_id(); auto& short_cir_column = _current_return_columns[column_id]; selected_size = predicate->evaluate(*short_cir_column, vec_sel_rowid_idx, selected_size); @@ -2818,7 +2818,7 @@ void SegmentIterator::_convert_dict_code_for_predicate_if_necessary() { } void SegmentIterator::_convert_dict_code_for_predicate_if_necessary_impl( - ColumnPredicate* predicate) { + std::shared_ptr predicate) { auto& column = _current_return_columns[predicate->column_id()]; auto* col_ptr = column.get(); diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.h b/be/src/olap/rowset/segment_v2/segment_iterator.h index 7863b758d0802e..5ef63c3c6ec4c2 100644 --- a/be/src/olap/rowset/segment_v2/segment_iterator.h +++ b/be/src/olap/rowset/segment_v2/segment_iterator.h @@ -138,7 +138,7 @@ class SegmentIterator : public RowwiseIterator { _update_profile(profile, _pre_eval_block_predicate, "PreEvaluatePredicates"); if (_opts.delete_condition_predicates != nullptr) { - std::set delete_predicate_set; + std::set> delete_predicate_set; _opts.delete_condition_predicates->get_all_column_predicate(delete_predicate_set); _update_profile(profile, delete_predicate_set, "DeleteConditionPredicates"); } @@ -192,7 +192,8 @@ class SegmentIterator : public RowwiseIterator { [[nodiscard]] Status _apply_inverted_index(); [[nodiscard]] Status _apply_inverted_index_on_column_predicate( - ColumnPredicate* pred, std::vector& remaining_predicates, + std::shared_ptr pred, + std::vector>& remaining_predicates, bool* continue_apply); [[nodiscard]] Status _apply_ann_topn_predicate(); [[nodiscard]] Status _apply_index_expr(); @@ -277,7 +278,7 @@ class SegmentIterator : public RowwiseIterator { return Status::OK(); } - bool _can_evaluated_by_vectorized(ColumnPredicate* predicate); + bool _can_evaluated_by_vectorized(std::shared_ptr predicate); [[nodiscard]] Status _extract_common_expr_columns(const vectorized::VExprSPtr& expr); // same with _extract_common_expr_columns, but only extract columns that can be used for index @@ -292,9 +293,10 @@ class SegmentIterator : public RowwiseIterator { // Dictionary column should do something to initial. void _convert_dict_code_for_predicate_if_necessary(); - void _convert_dict_code_for_predicate_if_necessary_impl(ColumnPredicate* predicate); + void _convert_dict_code_for_predicate_if_necessary_impl( + std::shared_ptr predicate); - bool _check_apply_by_inverted_index(ColumnPredicate* pred); + bool _check_apply_by_inverted_index(std::shared_ptr pred); void _output_index_result_column_for_expr(uint16_t* sel_rowid_idx, uint16_t select_size, vectorized::Block* block); @@ -424,8 +426,8 @@ class SegmentIterator : public RowwiseIterator { std::map _need_read_data_indices; std::vector _is_common_expr_column; vectorized::MutableColumns _current_return_columns; - std::vector _pre_eval_block_predicate; - std::vector _short_cir_eval_predicate; + std::vector> _pre_eval_block_predicate; + std::vector> _short_cir_eval_predicate; std::vector _delete_range_column_ids; std::vector _delete_bloom_filter_column_ids; // when lazy materialization is enabled, segmentIter need to read data at least twice @@ -446,7 +448,7 @@ class SegmentIterator : public RowwiseIterator { StorageReadOptions _opts; // make a copy of `_opts.column_predicates` in order to make local changes - std::vector _col_predicates; + std::vector> _col_predicates; vectorized::VExprContextSPtrs _common_expr_ctxs_push_down; bool _enable_common_expr_pushdown = false; std::vector _remaining_conjunct_roots; @@ -475,7 +477,7 @@ class SegmentIterator : public RowwiseIterator { std::unique_ptr _pool; // used to collect filter information. - std::vector _filter_info_id; + std::vector> _filter_info_id; bool _record_rowids = false; int64_t _tablet_id = 0; std::set _output_columns; @@ -486,7 +488,7 @@ class SegmentIterator : public RowwiseIterator { * column and column_predicates on it. * a boolean value to indicate whether the column has been read by the index. */ - std::unordered_map> + std::unordered_map, bool>> _column_predicate_index_exec_status; /* diff --git a/be/src/olap/shared_predicate.h b/be/src/olap/shared_predicate.h index 45eae1b7f80ff9..2b0c32c8246450 100644 --- a/be/src/olap/shared_predicate.h +++ b/be/src/olap/shared_predicate.h @@ -32,30 +32,57 @@ namespace doris { // SharedPredicate only used on topn runtime predicate. // Runtime predicate globally share one predicate, to ensure that updates can be real-time. // At the beginning nested predicate may be nullptr, in which case predicate always returns true. -class SharedPredicate : public ColumnPredicate { +class SharedPredicate final : public ColumnPredicate { ENABLE_FACTORY_CREATOR(SharedPredicate); public: - SharedPredicate(uint32_t column_id) : ColumnPredicate(column_id) {} + SharedPredicate(uint32_t column_id) + : ColumnPredicate(column_id, PrimitiveType::INVALID_TYPE), + _mtx(std::make_shared()) {} + SharedPredicate(const ColumnPredicate& other) = delete; + SharedPredicate(const SharedPredicate& other, uint32_t column_id) + : ColumnPredicate(other, column_id), + _mtx(std::make_shared()), + _nested(assert_cast(other)._nested + ? other._nested->clone(column_id) + : nullptr) {} + ~SharedPredicate() override = default; + std::string debug_string() const override { + std::shared_lock lock(*_mtx); + fmt::memory_buffer debug_string_buffer; + fmt::format_to(debug_string_buffer, "SharedPredicate({}, nested={})", + ColumnPredicate::debug_string(), _nested ? _nested->debug_string() : "null"); + return fmt::to_string(debug_string_buffer); + } + std::shared_ptr clone(uint32_t column_id) const override { + return SharedPredicate::create_shared(*this, column_id); + } PredicateType type() const override { - std::shared_lock lock(_mtx); + std::shared_lock lock(*_mtx); if (!_nested) { // topn filter is le or ge return PredicateType::LE; } return _nested->type(); } + PrimitiveType primitive_type() const override { + std::shared_lock lock(*_mtx); + if (!_nested) { + return PrimitiveType::INVALID_TYPE; + } + return _nested->primitive_type(); + } - void set_nested(ColumnPredicate* nested) { - std::unique_lock lock(_mtx); - _nested.reset(nested); + void set_nested(const std::shared_ptr& nested) { + std::unique_lock lock(*_mtx); + _nested = nested; } Status evaluate(const vectorized::IndexFieldNameAndTypePair& name_with_type, IndexIterator* iterator, uint32_t num_rows, roaring::Roaring* bitmap) const override { - std::shared_lock lock(_mtx); + std::shared_lock lock(*_mtx); if (!_nested) { return Status::OK(); } @@ -64,7 +91,7 @@ class SharedPredicate : public ColumnPredicate { void evaluate_and(const vectorized::IColumn& column, const uint16_t* sel, uint16_t size, bool* flags) const override { - std::shared_lock lock(_mtx); + std::shared_lock lock(*_mtx); if (!_nested) { return; } @@ -77,7 +104,7 @@ class SharedPredicate : public ColumnPredicate { } bool evaluate_and(const std::pair& statistic) const override { - std::shared_lock lock(_mtx); + std::shared_lock lock(*_mtx); if (!_nested) { return ColumnPredicate::evaluate_and(statistic); } @@ -85,7 +112,7 @@ class SharedPredicate : public ColumnPredicate { } bool evaluate_del(const std::pair& statistic) const override { - std::shared_lock lock(_mtx); + std::shared_lock lock(*_mtx); if (!_nested) { return ColumnPredicate::evaluate_del(statistic); } @@ -93,7 +120,7 @@ class SharedPredicate : public ColumnPredicate { } bool evaluate_and(const BloomFilter* bf) const override { - std::shared_lock lock(_mtx); + std::shared_lock lock(*_mtx); if (!_nested) { return ColumnPredicate::evaluate_and(bf); } @@ -101,7 +128,7 @@ class SharedPredicate : public ColumnPredicate { } bool can_do_bloom_filter(bool ngram) const override { - std::shared_lock lock(_mtx); + std::shared_lock lock(*_mtx); if (!_nested) { return ColumnPredicate::can_do_bloom_filter(ngram); } @@ -110,7 +137,7 @@ class SharedPredicate : public ColumnPredicate { void evaluate_vec(const vectorized::IColumn& column, uint16_t size, bool* flags) const override { - std::shared_lock lock(_mtx); + std::shared_lock lock(*_mtx); if (!_nested) { for (uint16_t i = 0; i < size; ++i) { flags[i] = true; @@ -122,7 +149,7 @@ class SharedPredicate : public ColumnPredicate { void evaluate_and_vec(const vectorized::IColumn& column, uint16_t size, bool* flags) const override { - std::shared_lock lock(_mtx); + std::shared_lock lock(*_mtx); if (!_nested) { return; } @@ -130,7 +157,7 @@ class SharedPredicate : public ColumnPredicate { } std::string get_search_str() const override { - std::shared_lock lock(_mtx); + std::shared_lock lock(*_mtx); if (!_nested) { DCHECK(false) << "should not reach here"; } @@ -140,22 +167,14 @@ class SharedPredicate : public ColumnPredicate { private: uint16_t _evaluate_inner(const vectorized::IColumn& column, uint16_t* sel, uint16_t size) const override { - std::shared_lock lock(_mtx); + std::shared_lock lock(*_mtx); if (!_nested) { return size; } return _nested->evaluate(column, sel, size); } - std::string _debug_string() const override { - std::shared_lock lock(_mtx); - if (!_nested) { - return "shared_predicate(unknow)"; - } - return "shared_predicate(" + _nested->debug_string() + ")"; - } - - mutable std::shared_mutex _mtx; + mutable std::shared_ptr _mtx; std::shared_ptr _nested; }; diff --git a/be/src/olap/tablet_reader.cpp b/be/src/olap/tablet_reader.cpp index 63744b6f420a7f..015b72f163b4b4 100644 --- a/be/src/olap/tablet_reader.cpp +++ b/be/src/olap/tablet_reader.cpp @@ -81,9 +81,9 @@ std::string TabletReader::ReaderParams::to_string() const { ss << " end_keys=" << key; } - for (auto& condition : conditions) { - ss << " conditions=" << apache::thrift::ThriftDebugString(condition.filter); - } + // for (auto& condition : conditions) { + // ss << " conditions=" << apache::thrift::ThriftDebugString(condition.filter); + // } return ss.str(); } @@ -102,15 +102,6 @@ std::string TabletReader::KeysParam::to_string() const { return ss.str(); } -TabletReader::~TabletReader() { - for (auto* pred : _col_predicates) { - delete pred; - } - for (auto* pred : _value_col_predicates) { - delete pred; - } -} - Status TabletReader::init(const ReaderParams& read_params) { SCOPED_RAW_TIMER(&_stats.tablet_reader_init_timer_ns); @@ -521,47 +512,18 @@ Status TabletReader::_init_orderby_keys_param(const ReaderParams& read_params) { Status TabletReader::_init_conditions_param(const ReaderParams& read_params) { SCOPED_RAW_TIMER(&_stats.tablet_reader_init_conditions_param_timer_ns); - std::vector predicates; - - auto parse_and_emplace_predicates = [this, &predicates](auto& params) { - for (const auto& param : params) { - ColumnPredicate* predicate = _parse_to_predicate({param.column_name, param.filter}); - predicate->attach_profile_counter(param.runtime_filter_id, param.filtered_rows_counter, - param.input_rows_counter, - param.always_true_rows_counter); - predicates.emplace_back(predicate); - } - }; - - for (const auto& param : read_params.conditions) { - TCondition tmp_cond = param.filter; - RETURN_IF_ERROR(_tablet_schema->have_column(tmp_cond.column_name)); - // The "column" parameter might represent a column resulting from the decomposition of a variant column. - // Instead of using a "unique_id" for identification, we are utilizing a "path" to denote this column. - const auto& column = *DORIS_TRY(_tablet_schema->column(tmp_cond.column_name)); - const auto& mcolumn = materialize_column(column); - uint32_t index = _tablet_schema->field_index(tmp_cond.column_name); - ColumnPredicate* predicate = - parse_to_predicate(mcolumn.get_vec_type(), index, tmp_cond, _predicate_arena); - // record condition value into predicate_params in order to pushdown segment_iterator, - // _gen_predicate_result_sign will build predicate result unique sign with condition value - predicate->attach_profile_counter(param.runtime_filter_id, param.filtered_rows_counter, - param.input_rows_counter, param.always_true_rows_counter); - predicates.emplace_back(predicate); - } - parse_and_emplace_predicates(read_params.bloom_filters); - parse_and_emplace_predicates(read_params.bitmap_filters); - parse_and_emplace_predicates(read_params.in_filters); - + std::vector> predicates; + std::copy(read_params.predicates.cbegin(), read_params.predicates.cend(), + std::inserter(predicates, predicates.begin())); // Function filter push down to storage engine - auto is_like_predicate = [](ColumnPredicate* _pred) { - return dynamic_cast*>(_pred) != nullptr || - dynamic_cast*>(_pred) != nullptr; + auto is_like_predicate = [](std::shared_ptr _pred) { + return dynamic_cast*>(_pred.get()) != nullptr || + dynamic_cast*>(_pred.get()) != nullptr; }; for (const auto& filter : read_params.function_filters) { predicates.emplace_back(_parse_to_predicate(filter)); - auto* pred = predicates.back(); + auto pred = predicates.back(); const auto& col = _tablet_schema->column(pred->column_id()); const auto* tablet_index = _tablet_schema->get_ngram_bf_index(col.unique_id()); @@ -582,7 +544,7 @@ Status TabletReader::_init_conditions_param(const ReaderParams& read_params) { } } - for (auto* predicate : predicates) { + for (auto predicate : predicates) { auto column = _tablet_schema->column(predicate->column_id()); if (column.aggregation() != FieldAggregationMethod::OLAP_FIELD_AGGREGATION_NONE) { _value_col_predicates.push_back(predicate); @@ -600,39 +562,12 @@ Status TabletReader::_init_conditions_param(const ReaderParams& read_params) { return Status::OK(); } -ColumnPredicate* TabletReader::_parse_to_predicate( - const std::pair>& bloom_filter) { - int32_t index = _tablet_schema->field_index(bloom_filter.first); - if (index < 0) { - return nullptr; - } - const TabletColumn& column = materialize_column(_tablet_schema->column(index)); - return create_column_predicate(index, bloom_filter.second, column.type(), &column); -} - -ColumnPredicate* TabletReader::_parse_to_predicate( - const std::pair>& in_filter) { - int32_t index = _tablet_schema->field_index(in_filter.first); - if (index < 0) { - return nullptr; - } - const TabletColumn& column = materialize_column(_tablet_schema->column(index)); - return create_column_predicate(index, in_filter.second, column.type(), &column); -} - -ColumnPredicate* TabletReader::_parse_to_predicate( - const std::pair>& bitmap_filter) { - int32_t index = _tablet_schema->field_index(bitmap_filter.first); - if (index < 0) { - return nullptr; - } - const TabletColumn& column = materialize_column(_tablet_schema->column(index)); - return create_column_predicate(index, bitmap_filter.second, column.type(), &column); -} - -ColumnPredicate* TabletReader::_parse_to_predicate(const FunctionFilter& function_filter) { +std::shared_ptr TabletReader::_parse_to_predicate( + const FunctionFilter& function_filter) { int32_t index = _tablet_schema->field_index(function_filter._col_name); if (index < 0) { + throw Exception(Status::InternalError("Column {} not found in tablet schema", + function_filter._col_name)); return nullptr; } const TabletColumn& column = materialize_column(_tablet_schema->column(index)); diff --git a/be/src/olap/tablet_reader.h b/be/src/olap/tablet_reader.h index 593afa9dbe54fd..0dad5424611a81 100644 --- a/be/src/olap/tablet_reader.h +++ b/be/src/olap/tablet_reader.h @@ -138,10 +138,7 @@ class TabletReader { bool start_key_include = false; bool end_key_include = false; - std::vector> conditions; - std::vector>> bloom_filters; - std::vector>> bitmap_filters; - std::vector>> in_filters; + std::vector> predicates; std::vector function_filters; std::vector delete_predicates; // slots that cast may be eliminated in storage layer @@ -212,7 +209,7 @@ class TabletReader { TabletReader() = default; - virtual ~TabletReader(); + virtual ~TabletReader() = default; TabletReader(const TabletReader&) = delete; void operator=(const TabletReader&) = delete; @@ -266,16 +263,8 @@ class TabletReader { Status _init_conditions_param(const ReaderParams& read_params); - ColumnPredicate* _parse_to_predicate( - const std::pair>& bloom_filter); - - ColumnPredicate* _parse_to_predicate( - const std::pair>& bitmap_filter); - - ColumnPredicate* _parse_to_predicate( - const std::pair>& in_filter); - - virtual ColumnPredicate* _parse_to_predicate(const FunctionFilter& function_filter); + virtual std::shared_ptr _parse_to_predicate( + const FunctionFilter& function_filter); Status _init_delete_condition(const ReaderParams& read_params); @@ -308,8 +297,8 @@ class TabletReader { KeysParam _keys_param; std::vector _is_lower_keys_included; std::vector _is_upper_keys_included; - std::vector _col_predicates; - std::vector _value_col_predicates; + std::vector> _col_predicates; + std::vector> _value_col_predicates; DeleteHandler _delete_handler; // Indicates whether the tablets has do a aggregation in storage engine. diff --git a/be/src/pipeline/exec/olap_scan_operator.cpp b/be/src/pipeline/exec/olap_scan_operator.cpp index 4c6f1409d2bc32..f96f372d26c01a 100644 --- a/be/src/pipeline/exec/olap_scan_operator.cpp +++ b/be/src/pipeline/exec/olap_scan_operator.cpp @@ -812,30 +812,21 @@ void OlapScanLocalState::set_scan_ranges(RuntimeState* state, } } -static std::string olap_filter_to_string(const doris::TCondition& condition) { - auto op_name = condition.condition_op; - if (condition.condition_op == "*=") { - op_name = "IN"; - } else if (condition.condition_op == "!*=") { - op_name = "NOT IN"; - } - return fmt::format("{{{} {} {}}}", condition.column_name, op_name, - condition.condition_values.size() > 128 - ? "[more than 128 elements]" - : to_string(condition.condition_values)); -} - -static std::string olap_filters_to_string(const std::vector>& filters) { - std::string filters_string; - filters_string += "["; - for (auto it = filters.cbegin(); it != filters.cend(); it++) { - if (it != filters.cbegin()) { - filters_string += ", "; +static std::string predicates_to_string( + const phmap::flat_hash_map>>& + slot_id_to_predicates) { + fmt::memory_buffer debug_string_buffer; + for (const auto& [slot_id, predicates] : slot_id_to_predicates) { + if (predicates.empty()) { + continue; + } + fmt::format_to(debug_string_buffer, "Slot ID: {}: [", slot_id); + for (const auto& predicate : predicates) { + fmt::format_to(debug_string_buffer, "{{{}}}, ", predicate->debug_string()); } - filters_string += olap_filter_to_string(it->filter); + fmt::format_to(debug_string_buffer, "] "); } - filters_string += "]"; - return filters_string; + return fmt::to_string(debug_string_buffer); } static std::string tablets_id_to_string( @@ -895,6 +886,7 @@ Status OlapScanLocalState::_build_key_ranges_and_filters() { if (_slot_id_to_value_range.end() == iter) { break; } + DCHECK(_slot_id_to_predicates.count(iter->first) > 0); const auto& value_range = iter->second.second; RETURN_IF_ERROR(std::visit( @@ -908,7 +900,21 @@ Status OlapScanLocalState::_build_key_ranges_and_filters() { _scan_keys.extend_scan_key(temp_range, p._max_scan_key_num, &exact_range, &eos, &should_break)); if (exact_range) { - _slot_id_to_value_range.erase(iter->first); + auto key = iter->first; + _slot_id_to_value_range.erase(key); + + std::vector> new_predicates; + for (const auto& it : _slot_id_to_predicates[key]) { + if (it->type() == PredicateType::NOT_IN_LIST || + it->type() == PredicateType::NE) { + new_predicates.push_back(it); + } + } + if (new_predicates.empty()) { + _slot_id_to_predicates.erase(key); + } else { + _slot_id_to_predicates[key] = new_predicates; + } } } else { // if exceed max_pushdown_conditions_per_column, use whole_value_rang instead @@ -926,21 +932,6 @@ Status OlapScanLocalState::_build_key_ranges_and_filters() { _eos = true; _scan_dependency->set_ready(); } - - for (auto& iter : _slot_id_to_value_range) { - std::vector> filters; - std::visit([&](auto&& range) { range.to_olap_filter(filters); }, iter.second.second); - - for (const auto& filter : filters) { - _olap_filters.emplace_back(filter); - } - } - - // Append value ranges in "_not_in_value_ranges" - for (auto& range : _not_in_value_ranges) { - std::visit([&](auto&& the_range) { the_range.to_in_condition(_olap_filters, false); }, - range); - } } else { custom_profile()->add_info_string("PushDownAggregate", push_down_agg_to_string(p._push_down_agg_type)); @@ -948,7 +939,7 @@ Status OlapScanLocalState::_build_key_ranges_and_filters() { if (state()->enable_profile()) { custom_profile()->add_info_string("PushDownPredicates", - olap_filters_to_string(_olap_filters)); + predicates_to_string(_slot_id_to_predicates)); custom_profile()->add_info_string("KeyRanges", _scan_keys.debug_string()); custom_profile()->add_info_string("TabletIds", tablets_id_to_string(_scan_ranges)); } diff --git a/be/src/pipeline/exec/olap_scan_operator.h b/be/src/pipeline/exec/olap_scan_operator.h index b5fc420c4aec59..210a725a8bf060 100644 --- a/be/src/pipeline/exec/olap_scan_operator.h +++ b/be/src/pipeline/exec/olap_scan_operator.h @@ -109,7 +109,6 @@ class OlapScanLocalState final : public ScanLocalState { std::atomic_bool _sync_tablet = false; std::vector> _cond_ranges; OlapScanKeys _scan_keys; - std::vector> _olap_filters; // If column id in this set, indicate that we need to read data after index filtering std::set _output_column_ids; diff --git a/be/src/pipeline/exec/scan_operator.cpp b/be/src/pipeline/exec/scan_operator.cpp index 6e321bba3228a0..54f652af0f7480 100644 --- a/be/src/pipeline/exec/scan_operator.cpp +++ b/be/src/pipeline/exec/scan_operator.cpp @@ -25,6 +25,10 @@ #include #include "common/global_types.h" +#include "olap/bloom_filter_predicate.h" +#include "olap/in_list_predicate.h" +#include "olap/null_predicate.h" +#include "olap/predicate_creator.h" #include "pipeline/exec/es_scan_operator.h" #include "pipeline/exec/file_scan_operator.h" #include "pipeline/exec/group_commit_scan_operator.h" @@ -241,11 +245,16 @@ Status ScanLocalState::_normalize_conjuncts(RuntimeState* state) { } } init_value_range(slot, slot->type()); + _slot_id_to_predicates.insert( + {slot->id(), std::vector>()}); } get_cast_types_for_variants(); for (const auto& [colname, type] : _cast_types_for_variants) { init_value_range(p._slot_id_to_slot_desc[p._colname_to_slot_id[colname]], type); + _slot_id_to_predicates.insert( + {p._slot_id_to_slot_desc[p._colname_to_slot_id[colname]]->id(), + std::vector>()}); } RETURN_IF_ERROR(_get_topn_filters(state)); @@ -254,7 +263,7 @@ Status ScanLocalState::_normalize_conjuncts(RuntimeState* state) { auto& conjunct = *it; if (conjunct->root()) { vectorized::VExprSPtr new_root; - RETURN_IF_ERROR(_normalize_predicate(conjunct->root(), conjunct.get(), new_root)); + RETURN_IF_ERROR(_normalize_predicate(conjunct.get(), new_root)); if (new_root) { conjunct->set_root(new_root); if (_should_push_down_common_expr() && @@ -287,62 +296,58 @@ Status ScanLocalState::_normalize_conjuncts(RuntimeState* state) { } template -Status ScanLocalState::_normalize_predicate( - const vectorized::VExprSPtr& conjunct_expr_root, vectorized::VExprContext* context, - vectorized::VExprSPtr& output_expr) { +Status ScanLocalState::_normalize_predicate(vectorized::VExprContext* context, + vectorized::VExprSPtr& output_expr) { + const auto expr_root = context->root(); static constexpr auto is_leaf = [](auto&& expr) { return !expr->is_and_expr(); }; - auto in_predicate_checker = [](const vectorized::VExprSPtrs& children, - std::shared_ptr& slot, - vectorized::VExprSPtr& child_contains_slot) { + auto in_predicate_checker = [&](const vectorized::VExprSPtrs& children, + SlotDescriptor** slot_desc, ColumnValueRangeType** range) { if (children.empty() || vectorized::VExpr::expr_without_cast(children[0])->node_type() != TExprNodeType::SLOT_REF) { // not a slot ref(column) return false; } - slot = std::dynamic_pointer_cast( - vectorized::VExpr::expr_without_cast(children[0])); - child_contains_slot = children[0]; - return true; + std::shared_ptr slot = + std::dynamic_pointer_cast( + vectorized::VExpr::expr_without_cast(children[0])); + *slot_desc = + _parent->cast()._slot_id_to_slot_desc[slot->slot_id()]; + return _is_predicate_acting_on_slot(slot, children[0], range); }; - auto eq_predicate_checker = [](const vectorized::VExprSPtrs& children, - std::shared_ptr& slot, - vectorized::VExprSPtr& child_contains_slot) { - for (const auto& child : children) { - if (vectorized::VExpr::expr_without_cast(child)->node_type() != - TExprNodeType::SLOT_REF) { - // not a slot ref(column) - continue; - } - slot = std::dynamic_pointer_cast( - vectorized::VExpr::expr_without_cast(child)); - CHECK(slot != nullptr); - child_contains_slot = child; - return true; + auto eq_predicate_checker = [&](const vectorized::VExprSPtrs& children, + SlotDescriptor** slot_desc, ColumnValueRangeType** range) { + if (children.empty() || vectorized::VExpr::expr_without_cast(children[0])->node_type() != + TExprNodeType::SLOT_REF) { + // not a slot ref(column) + return false; } - return false; + std::shared_ptr slot = + std::dynamic_pointer_cast( + vectorized::VExpr::expr_without_cast(children[0])); + CHECK(slot != nullptr); + *slot_desc = + _parent->cast()._slot_id_to_slot_desc[slot->slot_id()]; + return _is_predicate_acting_on_slot(slot, children[0], range); }; - if (conjunct_expr_root != nullptr) { - if (is_leaf(conjunct_expr_root)) { - auto impl = conjunct_expr_root->get_impl(); - // If impl is not null, which means this is a conjunct from runtime filter. - vectorized::VExpr* cur_expr = impl ? impl.get() : conjunct_expr_root.get(); - if (dynamic_cast(cur_expr)) { + if (expr_root != nullptr) { + if (is_leaf(expr_root)) { + if (dynamic_cast(expr_root.get())) { // If the expr has virtual slot ref, we need to keep it in the tree. - output_expr = conjunct_expr_root; + output_expr = expr_root; return Status::OK(); } SlotDescriptor* slot = nullptr; ColumnValueRangeType* range = nullptr; PushDownType pdt = PushDownType::UNACCEPTABLE; - RETURN_IF_ERROR(_eval_const_conjuncts(cur_expr, context, &pdt)); + RETURN_IF_ERROR(_eval_const_conjuncts(context, &pdt)); if (pdt == PushDownType::ACCEPTABLE) { output_expr = nullptr; return Status::OK(); } std::shared_ptr slotref; - for (const auto& child : cur_expr->children()) { + for (const auto& child : expr_root->children()) { if (vectorized::VExpr::expr_without_cast(child)->node_type() != TExprNodeType::SLOT_REF) { // not a slot ref(column) @@ -351,20 +356,20 @@ Status ScanLocalState::_normalize_predicate( slotref = std::dynamic_pointer_cast( vectorized::VExpr::expr_without_cast(child)); } - if (_is_predicate_acting_on_slot(cur_expr, in_predicate_checker, &slot, &range) || - _is_predicate_acting_on_slot(cur_expr, eq_predicate_checker, &slot, &range)) { + if (in_predicate_checker(expr_root->children(), &slot, &range) || + eq_predicate_checker(expr_root->children(), &slot, &range)) { Status status = Status::OK(); std::visit( [&](auto& value_range) { bool need_set_runtime_filter_id = value_range.is_whole_value_range() && - conjunct_expr_root->is_rf_wrapper(); + expr_root->is_rf_wrapper(); Defer set_runtime_filter_id {[&]() { // rf predicates is always appended to the end of conjuncts. We need to ensure that there is no non-rf predicate after rf-predicate // If it is not a whole range, it means that the column has other non-rf predicates, so it cannot be marked as rf predicate. // If the range where non-rf predicates are located is incorrectly marked as rf, can_ignore will return true, resulting in the predicate not taking effect and getting an incorrect result. if (need_set_runtime_filter_id) { auto* rf_expr = assert_cast( - conjunct_expr_root.get()); + expr_root.get()); DCHECK(rf_expr->predicate_filtered_rows_counter() != nullptr); DCHECK(rf_expr->predicate_input_rows_counter() != nullptr); value_range.attach_profile_counter( @@ -374,27 +379,38 @@ Status ScanLocalState::_normalize_predicate( rf_expr->predicate_always_true_rows_counter()); } }}; - RETURN_IF_PUSH_DOWN(_normalize_in_and_eq_predicate( - cur_expr, context, slot, value_range, &pdt), - status); - RETURN_IF_PUSH_DOWN(_normalize_not_in_and_not_eq_predicate( - cur_expr, context, slot, value_range, &pdt), - status); - RETURN_IF_PUSH_DOWN(_normalize_is_null_predicate( - cur_expr, context, slot, value_range, &pdt), - status); - RETURN_IF_PUSH_DOWN(_normalize_noneq_binary_predicate( - cur_expr, context, slot, value_range, &pdt), - status); RETURN_IF_PUSH_DOWN( - _normalize_bitmap_filter(cur_expr, context, slot, &pdt), + _normalize_in_and_eq_predicate( + context, slot, _slot_id_to_predicates[slot->id()], + value_range, &pdt), + status); + RETURN_IF_PUSH_DOWN( + _normalize_not_in_and_not_eq_predicate( + context, slot, _slot_id_to_predicates[slot->id()], + value_range, &pdt), status); RETURN_IF_PUSH_DOWN( - _normalize_bloom_filter(cur_expr, context, slot, &pdt), status); + _normalize_is_null_predicate(context, slot, + _slot_id_to_predicates[slot->id()], + value_range, &pdt), + status); + RETURN_IF_PUSH_DOWN( + _normalize_noneq_binary_predicate( + context, slot, _slot_id_to_predicates[slot->id()], + value_range, &pdt), + status); + RETURN_IF_PUSH_DOWN(_normalize_bitmap_filter( + context, slot, + _slot_id_to_predicates[slot->id()], &pdt), + status); + RETURN_IF_PUSH_DOWN(_normalize_bloom_filter( + context, slot, + _slot_id_to_predicates[slot->id()], &pdt), + status); + if (state()->enable_function_pushdown()) { RETURN_IF_PUSH_DOWN( - _normalize_function_filters(cur_expr, context, slot, &pdt), - status); + _normalize_function_filters(context, slot, &pdt), status); } }, *range); @@ -404,7 +420,7 @@ Status ScanLocalState::_normalize_predicate( slotref->data_type()->get_primitive_type() == PrimitiveType::TYPE_VARIANT) { // remaining it in the expr tree, in order to filter by function if the pushdown // predicate is not applied - output_expr = conjunct_expr_root; // remaining in conjunct tree + output_expr = expr_root; // remaining in conjunct tree return Status::OK(); } @@ -413,32 +429,39 @@ Status ScanLocalState::_normalize_predicate( return Status::OK(); } else { // for PARTIAL_ACCEPTABLE and UNACCEPTABLE, do not remove expr from the tree - output_expr = conjunct_expr_root; + output_expr = expr_root; return Status::OK(); } } else { return Status::InternalError("conjunct root should not and expr, but now {}", - conjunct_expr_root->debug_string()); + expr_root->debug_string()); } } - output_expr = conjunct_expr_root; + output_expr = expr_root; return Status::OK(); } template -Status ScanLocalState::_normalize_bloom_filter(vectorized::VExpr* expr, - vectorized::VExprContext* expr_ctx, - SlotDescriptor* slot, PushDownType* pdt) { +Status ScanLocalState::_normalize_bloom_filter( + vectorized::VExprContext* expr_ctx, SlotDescriptor* slot, + std::vector>& predicates, PushDownType* pdt) { + auto expr = expr_ctx->root()->is_rf_wrapper() ? expr_ctx->root()->get_impl() : expr_ctx->root(); if (TExprNodeType::BLOOM_PRED == expr->node_type()) { DCHECK(expr->get_num_children() == 1); DCHECK(expr_ctx->root()->is_rf_wrapper()); PushDownType temp_pdt = _should_push_down_bloom_filter(); + auto* rf_wrapper = assert_cast(expr_ctx->root().get()); if (temp_pdt != PushDownType::UNACCEPTABLE) { auto* rf_expr = assert_cast(expr_ctx->root().get()); - _filter_predicates.bloom_filters.emplace_back( - slot->col_name(), expr->get_bloom_filter_func(), rf_expr->filter_id(), - rf_expr->predicate_filtered_rows_counter(), - rf_expr->predicate_input_rows_counter(), + predicates.emplace_back( + create_bloom_filter_predicate(slot->id(), + slot->type()->get_primitive_type() == TYPE_VARIANT + ? expr->get_child(0)->data_type() + : slot->type(), + expr->get_bloom_filter_func())); + predicates.back()->attach_profile_counter( + rf_wrapper->filter_id(), rf_wrapper->predicate_filtered_rows_counter(), + rf_wrapper->predicate_input_rows_counter(), rf_expr->predicate_always_true_rows_counter()); *pdt = temp_pdt; } @@ -447,19 +470,26 @@ Status ScanLocalState::_normalize_bloom_filter(vectorized::VExpr* expr, } template -Status ScanLocalState::_normalize_bitmap_filter(vectorized::VExpr* expr, - vectorized::VExprContext* expr_ctx, - SlotDescriptor* slot, PushDownType* pdt) { +Status ScanLocalState::_normalize_bitmap_filter( + vectorized::VExprContext* expr_ctx, SlotDescriptor* slot, + std::vector>& predicates, PushDownType* pdt) { + auto expr = expr_ctx->root()->is_rf_wrapper() ? expr_ctx->root()->get_impl() : expr_ctx->root(); if (TExprNodeType::BITMAP_PRED == expr->node_type()) { DCHECK(expr->get_num_children() == 1); DCHECK(expr_ctx->root()->is_rf_wrapper()); PushDownType temp_pdt = _should_push_down_bitmap_filter(); + auto* rf_wrapper = assert_cast(expr_ctx->root().get()); if (temp_pdt != PushDownType::UNACCEPTABLE) { auto* rf_expr = assert_cast(expr_ctx->root().get()); - _filter_predicates.bitmap_filters.emplace_back( - slot->col_name(), expr->get_bitmap_filter_func(), rf_expr->filter_id(), - rf_expr->predicate_filtered_rows_counter(), - rf_expr->predicate_input_rows_counter(), + predicates.emplace_back(create_bitmap_filter_predicate( + slot->id(), + slot->type()->get_primitive_type() == TYPE_VARIANT + ? expr->get_child(0)->data_type() + : slot->type(), + expr->get_bitmap_filter_func())); + predicates.back()->attach_profile_counter( + rf_wrapper->filter_id(), rf_wrapper->predicate_filtered_rows_counter(), + rf_wrapper->predicate_input_rows_counter(), rf_expr->predicate_always_true_rows_counter()); *pdt = temp_pdt; } @@ -468,12 +498,12 @@ Status ScanLocalState::_normalize_bitmap_filter(vectorized::VExpr* expr } template -Status ScanLocalState::_normalize_function_filters(vectorized::VExpr* expr, - vectorized::VExprContext* expr_ctx, +Status ScanLocalState::_normalize_function_filters(vectorized::VExprContext* expr_ctx, SlotDescriptor* slot, PushDownType* pdt) { + auto expr = expr_ctx->root()->is_rf_wrapper() ? expr_ctx->root()->get_impl() : expr_ctx->root(); bool opposite = false; - vectorized::VExpr* fn_expr = expr; + vectorized::VExpr* fn_expr = expr.get(); if (TExprNodeType::COMPOUND_PRED == expr->node_type() && expr->fn().name.function_name == "not") { fn_expr = fn_expr->children()[0].get(); @@ -498,52 +528,38 @@ Status ScanLocalState::_normalize_function_filters(vectorized::VExpr* e template bool ScanLocalState::_is_predicate_acting_on_slot( - vectorized::VExpr* expr, - const std::function&, vectorized::VExprSPtr&)>& - checker, - SlotDescriptor** slot_desc, ColumnValueRangeType** range) { - std::shared_ptr slot_ref; - vectorized::VExprSPtr child_contains_slot; - if (!checker(expr->children(), slot_ref, child_contains_slot)) { - // not a slot ref(column) + const std::shared_ptr& slot_ref, + const vectorized::VExprSPtr& child_contains_slot, ColumnValueRangeType** range) { + auto entry = _slot_id_to_predicates.find(slot_ref->slot_id()); + if (_slot_id_to_predicates.end() == entry) { return false; } - - // slot_ref is a specific expr - // child_contains_slot may include a cast expr - - auto entry = _slot_id_to_value_range.find(slot_ref->slot_id()); - if (_slot_id_to_value_range.end() == entry) { + if (is_complex_type(slot_ref->data_type()->get_primitive_type())) { return false; } - // if the slot is a complex type(array/map/struct), we do not push down the predicate, because - // we delete pack these type into predict column, and origin pack action is wrong. we should - // make sense to push down this complex type after we delete predict column. - if (is_complex_type(slot_ref->data_type()->get_primitive_type())) { + auto& p = _parent->cast(); + auto sid_to_range = _slot_id_to_value_range.find(slot_ref->slot_id()); + if (_slot_id_to_value_range.end() == sid_to_range) { return false; } - *slot_desc = entry->second.first; + *range = &(sid_to_range->second.second); + SlotDescriptor* src_slot_desc = p._slot_id_to_slot_desc[slot_ref->slot_id()]; DCHECK(child_contains_slot != nullptr); if (child_contains_slot->data_type()->get_primitive_type() != - (*slot_desc)->type()->get_primitive_type() || + src_slot_desc->type()->get_primitive_type() || child_contains_slot->data_type()->get_precision() != - (*slot_desc)->type()->get_precision() || - child_contains_slot->data_type()->get_scale() != (*slot_desc)->type()->get_scale()) { - if (!_ignore_cast(*slot_desc, child_contains_slot.get())) { - // the type of predicate not match the slot's type - return false; - } - } else if ((child_contains_slot->data_type()->get_primitive_type() == - PrimitiveType::TYPE_DATETIME || - child_contains_slot->data_type()->get_primitive_type() == - PrimitiveType::TYPE_DATETIMEV2) && - child_contains_slot->node_type() == doris::TExprNodeType::CAST_EXPR) { + src_slot_desc->type()->get_precision() || + child_contains_slot->data_type()->get_scale() != src_slot_desc->type()->get_scale()) { + return _ignore_cast(src_slot_desc, child_contains_slot.get()); + } + if ((child_contains_slot->data_type()->get_primitive_type() == PrimitiveType::TYPE_DATETIME || + child_contains_slot->data_type()->get_primitive_type() == + PrimitiveType::TYPE_DATETIMEV2) && + child_contains_slot->node_type() == doris::TExprNodeType::CAST_EXPR) { // Expr `CAST(CAST(datetime_col AS DATE) AS DATETIME) = datetime_literal` should not be // push down. return false; } - *range = &(entry->second.second); return true; } @@ -566,38 +582,20 @@ std::string ScanLocalState::debug_string(int indentation_level) const { template bool ScanLocalState::_ignore_cast(SlotDescriptor* slot, vectorized::VExpr* expr) { - if (is_string_type(slot->type()->get_primitive_type()) && - is_string_type(expr->data_type()->get_primitive_type())) { - return true; - } // only one level cast expr could push down for variant type // check if expr is cast and it's children is slot if (slot->type()->get_primitive_type() == PrimitiveType::TYPE_VARIANT) { return expr->node_type() == TExprNodeType::CAST_EXPR && expr->children().at(0)->is_slot_ref(); } - if (slot->type()->get_primitive_type() == PrimitiveType::TYPE_ARRAY) { - if (assert_cast( - vectorized::remove_nullable(slot->type()).get()) - ->get_nested_type() - ->get_primitive_type() == expr->data_type()->get_primitive_type()) { - return true; - } - if (is_string_type(assert_cast( - vectorized::remove_nullable(slot->type()).get()) - ->get_nested_type() - ->get_primitive_type()) && - is_string_type(expr->data_type()->get_primitive_type())) { - return true; - } - } return false; } template -Status ScanLocalState::_eval_const_conjuncts(vectorized::VExpr* vexpr, - vectorized::VExprContext* expr_ctx, +Status ScanLocalState::_eval_const_conjuncts(vectorized::VExprContext* expr_ctx, PushDownType* pdt) { + auto vexpr = + expr_ctx->root()->is_rf_wrapper() ? expr_ctx->root()->get_impl() : expr_ctx->root(); // Used to handle constant expressions, such as '1 = 1' _eval_const_conjuncts does not handle cases like 'colA = 1' const char* constant_val = nullptr; if (vexpr->is_constant()) { @@ -645,11 +643,10 @@ Status ScanLocalState::_eval_const_conjuncts(vectorized::VExpr* vexpr, template template -Status ScanLocalState::_normalize_in_and_eq_predicate(vectorized::VExpr* expr, - vectorized::VExprContext* expr_ctx, - SlotDescriptor* slot, - ColumnValueRange& range, - PushDownType* pdt) { +Status ScanLocalState::_normalize_in_and_eq_predicate( + vectorized::VExprContext* expr_ctx, SlotDescriptor* slot, + std::vector>& predicates, ColumnValueRange& range, + PushDownType* pdt) { auto temp_range = ColumnValueRange::create_empty_column_value_range( slot->is_nullable(), range.precision(), range.scale()); @@ -658,6 +655,7 @@ Status ScanLocalState::_normalize_in_and_eq_predicate(vectorized::VExpr return Status::OK(); } + auto expr = expr_ctx->root()->is_rf_wrapper() ? expr_ctx->root()->get_impl() : expr_ctx->root(); // 1. Normalize in conjuncts like 'where col in (v1, v2, v3)' if (TExprNodeType::IN_PRED == expr->node_type()) { HybridSetBase::IteratorBase* iter = nullptr; @@ -669,32 +667,27 @@ Status ScanLocalState::_normalize_in_and_eq_predicate(vectorized::VExpr _parent->cast()._max_pushdown_conditions_per_column) { iter = hybrid_set->begin(); } else { - int runtime_filter_id = -1; - std::shared_ptr predicate_filtered_rows_counter = nullptr; - std::shared_ptr predicate_input_rows_counter = nullptr; - std::shared_ptr predicate_always_true_rows_counter = - nullptr; + predicates.emplace_back(create_in_list_predicate( + slot->id(), + slot->type()->get_primitive_type() == TYPE_VARIANT + ? expr->get_child(0)->data_type() + : slot->type(), + expr->get_set_func(), false)); if (expr_ctx->root()->is_rf_wrapper()) { - auto* rf_expr = + auto* rf_wrapper = assert_cast(expr_ctx->root().get()); - runtime_filter_id = rf_expr->filter_id(); - predicate_filtered_rows_counter = rf_expr->predicate_filtered_rows_counter(); - predicate_input_rows_counter = rf_expr->predicate_input_rows_counter(); - predicate_always_true_rows_counter = - rf_expr->predicate_always_true_rows_counter(); + predicates.back()->attach_profile_counter( + rf_wrapper->filter_id(), rf_wrapper->predicate_filtered_rows_counter(), + rf_wrapper->predicate_input_rows_counter(), + rf_wrapper->predicate_always_true_rows_counter()); } - _filter_predicates.in_filters.emplace_back( - slot->col_name(), expr->get_set_func(), runtime_filter_id, - predicate_filtered_rows_counter, predicate_input_rows_counter, - predicate_always_true_rows_counter); *pdt = PushDownType::ACCEPTABLE; return Status::OK(); } } else { // normal in predicate - auto* pred = static_cast(expr); - PushDownType temp_pdt = _should_push_down_in_predicate(pred, false); - if (temp_pdt == PushDownType::UNACCEPTABLE) { + auto* pred = assert_cast(expr.get()); + if (_should_push_down_in_predicate(pred, false) == PushDownType::UNACCEPTABLE) { return Status::OK(); } @@ -708,6 +701,7 @@ Status ScanLocalState::_normalize_in_and_eq_predicate(vectorized::VExpr return Status::OK(); } + hybrid_set = state->hybrid_set; iter = state->hybrid_set->begin(); } @@ -721,6 +715,11 @@ Status ScanLocalState::_normalize_in_and_eq_predicate(vectorized::VExpr iter->next(); } range.intersection(temp_range); + predicates.emplace_back(create_in_list_predicate( + slot->id(), + slot->type()->get_primitive_type() == TYPE_VARIANT ? expr->get_child(0)->data_type() + : slot->type(), + hybrid_set, false)); *pdt = PushDownType::ACCEPTABLE; } else if (TExprNodeType::BINARY_PRED == expr->node_type()) { DCHECK(expr->get_num_children() == 2); @@ -731,7 +730,7 @@ Status ScanLocalState::_normalize_in_and_eq_predicate(vectorized::VExpr PushDownType temp_pdt; RETURN_IF_ERROR(_should_push_down_binary_predicate( - reinterpret_cast(expr), expr_ctx, &value, + assert_cast(expr.get()), expr_ctx, &value, &slot_ref_child, eq_checker, temp_pdt)); if (temp_pdt == PushDownType::UNACCEPTABLE) { return Status::OK(); @@ -740,6 +739,19 @@ Status ScanLocalState::_normalize_in_and_eq_predicate(vectorized::VExpr // where A = nullptr should return empty result set auto fn_name = std::string(""); if (value.data != nullptr) { + if (!is_string_type(T) && + sizeof(typename PrimitiveTypeTraits::CppType) != value.size) { + return Status::InternalError( + "PrimitiveType {} meet invalid input value size {}, expect size {}", T, + value.size, sizeof(typename PrimitiveTypeTraits::CppType)); + } + predicates.emplace_back(create_comparison_predicate0( + slot->id(), + slot->type()->get_primitive_type() == TYPE_VARIANT + ? expr->get_child(0)->data_type() + : slot->type(), + value, false, _arena)); + if constexpr (T == TYPE_CHAR || T == TYPE_VARCHAR || T == TYPE_STRING || T == TYPE_HLL) { auto val = StringRef(value.data, value.size); @@ -818,12 +830,12 @@ PushDownType ScanLocalState::_should_push_down_in_predicate(vectorized: template template Status ScanLocalState::_normalize_not_in_and_not_eq_predicate( - vectorized::VExpr* expr, vectorized::VExprContext* expr_ctx, SlotDescriptor* slot, - ColumnValueRange& range, PushDownType* pdt) { + vectorized::VExprContext* expr_ctx, SlotDescriptor* slot, + std::vector>& predicates, ColumnValueRange& range, + PushDownType* pdt) { bool is_fixed_range = range.is_fixed_value_range(); - auto not_in_range = ColumnValueRange::create_empty_column_value_range( - range.column_name(), slot->is_nullable(), range.precision(), range.scale()); PushDownType temp_pdt = PushDownType::UNACCEPTABLE; + auto expr = expr_ctx->root()->is_rf_wrapper() ? expr_ctx->root()->get_impl() : expr_ctx->root(); // 1. Normalize in conjuncts like 'where col in (v1, v2, v3)' if (TExprNodeType::IN_PRED == expr->node_type()) { /// `VDirectInPredicate` here should not be pushed down. @@ -834,18 +846,20 @@ Status ScanLocalState::_normalize_not_in_and_not_eq_predicate( return Status::OK(); } - vectorized::VInPredicate* pred = static_cast(expr); - if ((temp_pdt = _should_push_down_in_predicate(pred, true)) == PushDownType::UNACCEPTABLE) { + auto* pred = assert_cast(expr.get()); + if ((_should_push_down_in_predicate(pred, true)) == PushDownType::UNACCEPTABLE) { + *pdt = PushDownType::UNACCEPTABLE; return Status::OK(); } // begin to push InPredicate value into ColumnValueRange - vectorized::InState* state = reinterpret_cast( + auto* state = reinterpret_cast( expr_ctx->fn_context(pred->fn_context_index()) ->get_function_state(FunctionContext::FRAGMENT_LOCAL)); // xx in (col, xx, xx) should not be push down if (!state->use_set) { + *pdt = PushDownType::UNACCEPTABLE; return Status::OK(); } @@ -855,6 +869,11 @@ Status ScanLocalState::_normalize_not_in_and_not_eq_predicate( _eos = true; _scan_dependency->set_ready(); } + predicates.emplace_back(create_in_list_predicate( + slot->id(), + slot->type()->get_primitive_type() == TYPE_VARIANT ? expr->get_child(0)->data_type() + : slot->type(), + state->hybrid_set, false)); while (iter->has_next()) { // column not in (nullptr) is always true DCHECK(iter->get_value() != nullptr); @@ -862,9 +881,6 @@ Status ScanLocalState::_normalize_not_in_and_not_eq_predicate( if (is_fixed_range) { RETURN_IF_ERROR(_change_value_range( range, value, ColumnValueRange::remove_fixed_value_range, fn_name)); - } else { - RETURN_IF_ERROR(_change_value_range( - not_in_range, value, ColumnValueRange::add_fixed_value_range, fn_name)); } iter->next(); } @@ -875,7 +891,7 @@ Status ScanLocalState::_normalize_not_in_and_not_eq_predicate( StringRef value; int slot_ref_child = -1; RETURN_IF_ERROR(_should_push_down_binary_predicate( - reinterpret_cast(expr), expr_ctx, &value, + assert_cast(expr.get()), expr_ctx, &value, &slot_ref_child, ne_checker, temp_pdt)); if (temp_pdt == PushDownType::UNACCEPTABLE) { return Status::OK(); @@ -884,6 +900,18 @@ Status ScanLocalState::_normalize_not_in_and_not_eq_predicate( DCHECK(slot_ref_child >= 0); // where A = nullptr should return empty result set if (value.data != nullptr) { + if (!is_string_type(T) && + sizeof(typename PrimitiveTypeTraits::CppType) != value.size) { + return Status::InternalError( + "PrimitiveType {} meet invalid input value size {}, expect size {}", T, + value.size, sizeof(typename PrimitiveTypeTraits::CppType)); + } + predicates.emplace_back(create_comparison_predicate0( + slot->id(), + slot->type()->get_primitive_type() == TYPE_VARIANT + ? expr->get_child(0)->data_type() + : slot->type(), + value, false, _arena)); auto fn_name = std::string(""); if constexpr (T == TYPE_CHAR || T == TYPE_VARCHAR || T == TYPE_STRING || T == TYPE_HLL) { @@ -892,20 +920,12 @@ Status ScanLocalState::_normalize_not_in_and_not_eq_predicate( RETURN_IF_ERROR(_change_value_range( range, reinterpret_cast(&val), ColumnValueRange::remove_fixed_value_range, fn_name)); - } else { - RETURN_IF_ERROR(_change_value_range( - not_in_range, reinterpret_cast(&val), - ColumnValueRange::add_fixed_value_range, fn_name)); } } else { if (is_fixed_range) { RETURN_IF_ERROR(_change_value_range( range, reinterpret_cast(value.data), ColumnValueRange::remove_fixed_value_range, fn_name)); - } else { - RETURN_IF_ERROR(_change_value_range( - not_in_range, reinterpret_cast(value.data), - ColumnValueRange::add_fixed_value_range, fn_name)); } } } else { @@ -913,17 +933,10 @@ Status ScanLocalState::_normalize_not_in_and_not_eq_predicate( _scan_dependency->set_ready(); } } else { + *pdt = PushDownType::UNACCEPTABLE; return Status::OK(); } - - if (is_fixed_range || - not_in_range.get_fixed_value_size() <= - _parent->cast()._max_pushdown_conditions_per_column) { - if (!is_fixed_range) { - _not_in_value_ranges.push_back(not_in_range); - } - *pdt = temp_pdt; - } + *pdt = PushDownType::ACCEPTABLE; return Status::OK(); } @@ -998,26 +1011,26 @@ Status ScanLocalState::_change_value_range(ColumnValueRange template -Status ScanLocalState::_normalize_is_null_predicate(vectorized::VExpr* expr, - vectorized::VExprContext* expr_ctx, - SlotDescriptor* slot, - ColumnValueRange& range, - PushDownType* pdt) { +Status ScanLocalState::_normalize_is_null_predicate( + vectorized::VExprContext* expr_ctx, SlotDescriptor* slot, + std::vector>& predicates, ColumnValueRange& range, + PushDownType* pdt) { + auto expr = expr_ctx->root()->is_rf_wrapper() ? expr_ctx->root()->get_impl() : expr_ctx->root(); PushDownType temp_pdt = _should_push_down_is_null_predicate(); if (temp_pdt == PushDownType::UNACCEPTABLE) { return Status::OK(); } - if (TExprNodeType::FUNCTION_CALL == expr->node_type()) { - if (reinterpret_cast(expr)->fn().name.function_name == - "is_null_pred") { + if (auto fn_call = dynamic_cast(expr.get())) { + if (fn_call->fn().name.function_name == "is_null_pred") { + predicates.emplace_back(NullPredicate::create_shared(slot->id(), true, T)); auto temp_range = ColumnValueRange::create_empty_column_value_range( slot->is_nullable(), range.precision(), range.scale()); temp_range.set_contain_null(true); range.intersection(temp_range); *pdt = temp_pdt; - } else if (reinterpret_cast(expr)->fn().name.function_name == - "is_not_null_pred") { + } else if (fn_call->fn().name.function_name == "is_not_null_pred") { + predicates.emplace_back(NullPredicate::create_shared(slot->id(), false, T)); auto temp_range = ColumnValueRange::create_empty_column_value_range( slot->is_nullable(), range.precision(), range.scale()); temp_range.set_contain_null(false); @@ -1031,8 +1044,10 @@ Status ScanLocalState::_normalize_is_null_predicate(vectorized::VExpr* template template Status ScanLocalState::_normalize_noneq_binary_predicate( - vectorized::VExpr* expr, vectorized::VExprContext* expr_ctx, SlotDescriptor* slot, - ColumnValueRange& range, PushDownType* pdt) { + vectorized::VExprContext* expr_ctx, SlotDescriptor* slot, + std::vector>& predicates, ColumnValueRange& range, + PushDownType* pdt) { + auto expr = expr_ctx->root()->is_rf_wrapper() ? expr_ctx->root()->get_impl() : expr_ctx->root(); if (TExprNodeType::BINARY_PRED == expr->node_type()) { DCHECK(expr->get_num_children() == 2); @@ -1043,25 +1058,57 @@ Status ScanLocalState::_normalize_noneq_binary_predicate( int slot_ref_child = -1; PushDownType temp_pdt; RETURN_IF_ERROR(_should_push_down_binary_predicate( - reinterpret_cast(expr), expr_ctx, &value, + assert_cast(expr.get()), expr_ctx, &value, &slot_ref_child, noneq_checker, temp_pdt)); if (temp_pdt != PushDownType::UNACCEPTABLE) { DCHECK(slot_ref_child >= 0); - const std::string& fn_name = - reinterpret_cast(expr)->fn().name.function_name; + const std::string& function_name = + assert_cast(expr.get())->fn().name.function_name; // where A = nullptr should return empty result set if (value.data != nullptr) { + if (function_name == "lt") { + predicates.emplace_back(create_comparison_predicate0( + slot->id(), + slot->type()->get_primitive_type() == TYPE_VARIANT + ? expr->get_child(0)->data_type() + : slot->type(), + value, false, _arena)); + } else if (function_name == "gt") { + predicates.emplace_back(create_comparison_predicate0( + slot->id(), + slot->type()->get_primitive_type() == TYPE_VARIANT + ? expr->get_child(0)->data_type() + : slot->type(), + value, false, _arena)); + } else if (function_name == "le") { + predicates.emplace_back(create_comparison_predicate0( + slot->id(), + slot->type()->get_primitive_type() == TYPE_VARIANT + ? expr->get_child(0)->data_type() + : slot->type(), + value, false, _arena)); + } else if (function_name == "ge") { + predicates.emplace_back(create_comparison_predicate0( + slot->id(), + slot->type()->get_primitive_type() == TYPE_VARIANT + ? expr->get_child(0)->data_type() + : slot->type(), + value, false, _arena)); + } else { + throw Exception( + Status::InternalError("Unsupported function name: {}", function_name)); + } if constexpr (T == TYPE_CHAR || T == TYPE_VARCHAR || T == TYPE_STRING || T == TYPE_HLL) { auto val = StringRef(value.data, value.size); RETURN_IF_ERROR(_change_value_range(range, reinterpret_cast(&val), ColumnValueRange::add_value_range, - fn_name, slot_ref_child)); + function_name, slot_ref_child)); } else { RETURN_IF_ERROR(_change_value_range( range, reinterpret_cast(value.data), - ColumnValueRange::add_value_range, fn_name, slot_ref_child)); + ColumnValueRange::add_value_range, function_name, slot_ref_child)); } *pdt = temp_pdt; } else { diff --git a/be/src/pipeline/exec/scan_operator.h b/be/src/pipeline/exec/scan_operator.h index cad47313d073bc..32506b7b110b24 100644 --- a/be/src/pipeline/exec/scan_operator.h +++ b/be/src/pipeline/exec/scan_operator.h @@ -37,6 +37,7 @@ namespace doris::vectorized { #include "common/compile_check_begin.h" class ScannerDelegate; +class OlapScanner; } // namespace doris::vectorized namespace doris::pipeline { @@ -246,47 +247,44 @@ class ScanLocalState : public ScanLocalStateBase { } Status _normalize_conjuncts(RuntimeState* state); - Status _normalize_predicate(const vectorized::VExprSPtr& conjunct_expr_root, - vectorized::VExprContext* context, + Status _normalize_predicate(vectorized::VExprContext* context, vectorized::VExprSPtr& output_expr); - Status _eval_const_conjuncts(vectorized::VExpr* vexpr, vectorized::VExprContext* expr_ctx, - PushDownType* pdt); + Status _eval_const_conjuncts(vectorized::VExprContext* expr_ctx, PushDownType* pdt); - Status _normalize_bloom_filter(vectorized::VExpr* expr, vectorized::VExprContext* expr_ctx, - SlotDescriptor* slot, PushDownType* pdt); + Status _normalize_bloom_filter(vectorized::VExprContext* expr_ctx, SlotDescriptor* slot, + std::vector>& predicates, + PushDownType* pdt); - Status _normalize_bitmap_filter(vectorized::VExpr* expr, vectorized::VExprContext* expr_ctx, - SlotDescriptor* slot, PushDownType* pdt); + Status _normalize_bitmap_filter(vectorized::VExprContext* expr_ctx, SlotDescriptor* slot, + std::vector>& predicates, + PushDownType* pdt); - Status _normalize_function_filters(vectorized::VExpr* expr, vectorized::VExprContext* expr_ctx, - SlotDescriptor* slot, PushDownType* pdt); + Status _normalize_function_filters(vectorized::VExprContext* expr_ctx, SlotDescriptor* slot, + PushDownType* pdt); - bool _is_predicate_acting_on_slot( - vectorized::VExpr* expr, - const std::function&, - vectorized::VExprSPtr&)>& checker, - SlotDescriptor** slot_desc, ColumnValueRangeType** range); + bool _is_predicate_acting_on_slot(const std::shared_ptr& slot_ref, + const vectorized::VExprSPtr& child_contains_slot, + ColumnValueRangeType** range); template - Status _normalize_in_and_eq_predicate(vectorized::VExpr* expr, - vectorized::VExprContext* expr_ctx, SlotDescriptor* slot, + Status _normalize_in_and_eq_predicate(vectorized::VExprContext* expr_ctx, SlotDescriptor* slot, + std::vector>& predicates, ColumnValueRange& range, PushDownType* pdt); template - Status _normalize_not_in_and_not_eq_predicate(vectorized::VExpr* expr, - vectorized::VExprContext* expr_ctx, - SlotDescriptor* slot, ColumnValueRange& range, - PushDownType* pdt); + Status _normalize_not_in_and_not_eq_predicate( + vectorized::VExprContext* expr_ctx, SlotDescriptor* slot, + std::vector>& predicates, ColumnValueRange& range, + PushDownType* pdt); template - Status _normalize_noneq_binary_predicate(vectorized::VExpr* expr, - vectorized::VExprContext* expr_ctx, - SlotDescriptor* slot, ColumnValueRange& range, - PushDownType* pdt); + Status _normalize_noneq_binary_predicate( + vectorized::VExprContext* expr_ctx, SlotDescriptor* slot, + std::vector>& predicates, ColumnValueRange& range, + PushDownType* pdt); template - Status _normalize_is_null_predicate(vectorized::VExpr* expr, vectorized::VExprContext* expr_ctx, - SlotDescriptor* slot, ColumnValueRange& range, - PushDownType* pdt); + Status _normalize_is_null_predicate(vectorized::VExprContext* expr_ctx, SlotDescriptor* slot, + std::vector>& predicates, + ColumnValueRange& range, PushDownType* pdt); bool _ignore_cast(SlotDescriptor* slot, vectorized::VExpr* expr); @@ -318,8 +316,6 @@ class ScanLocalState : public ScanLocalStateBase { std::shared_ptr _scanner_ctx = nullptr; - FilterPredicates _filter_predicates {}; - // Save all function predicates which may be pushed down to data source. std::vector _push_down_functions; @@ -330,13 +326,7 @@ class ScanLocalState : public ScanLocalStateBase { // Parsed from conjuncts phmap::flat_hash_map> _slot_id_to_value_range; - - // But if a col is with value range, eg: 1 < col < 10, which is "!is_fixed_range", - // in this case we can not merge "1 < col < 10" with "col not in (2)". - // So we have to save "col not in (2)" to another structure: "_not_in_value_ranges". - // When the data source try to use the value ranges, it should use both ranges in - // "_slot_id_to_value_range" and in "_not_in_value_ranges" - std::vector _not_in_value_ranges; + phmap::flat_hash_map>> _slot_id_to_predicates; std::atomic _eos = false; @@ -344,6 +334,7 @@ class ScanLocalState : public ScanLocalStateBase { // ScanLocalState owns the ownership of scanner, scanner context only has its weakptr std::list> _scanners; + vectorized::Arena _arena; }; template @@ -403,6 +394,7 @@ class ScanOperatorX : public OperatorX { protected: using LocalState = LocalStateType; + friend class vectorized::OlapScanner; ScanOperatorX(ObjectPool* pool, const TPlanNode& tnode, int operator_id, const DescriptorTbl& descs, int parallel_tasks = 0); virtual ~ScanOperatorX() = default; diff --git a/be/src/runtime/runtime_predicate.cpp b/be/src/runtime/runtime_predicate.cpp index c3f8707957ec68..fbf18f04b0128a 100644 --- a/be/src/runtime/runtime_predicate.cpp +++ b/be/src/runtime/runtime_predicate.cpp @@ -35,23 +35,23 @@ RuntimePredicate::RuntimePredicate(const TTopnFilterDesc& desc) _contexts[p.first].expr = p.second; } - PrimitiveType type = thrift_to_type(desc.target_node_id_to_target_expr.begin() - ->second.nodes[0] - .type.types[0] - .scalar_type.type); - if (!_init(type)) { + _type = thrift_to_type(desc.target_node_id_to_target_expr.begin() + ->second.nodes[0] + .type.types[0] + .scalar_type.type); + if (!_init(_type)) { std::stringstream ss; desc.target_node_id_to_target_expr.begin()->second.nodes[0].printTo(ss); - throw Exception(ErrorCode::INTERNAL_ERROR, "meet invalid type, type={}, expr={}", int(type), - ss.str()); + throw Exception(ErrorCode::INTERNAL_ERROR, "meet invalid type, type={}, expr={}", + type_to_string(_type), ss.str()); } // For ASC sort, create runtime predicate col_name <= max_top_value // since values that > min_top_value are large than any value in current topn values // For DESC sort, create runtime predicate col_name >= min_top_value // since values that < min_top_value are less than any value in current topn values - _pred_constructor = _is_asc ? create_comparison_predicate - : create_comparison_predicate; + _pred_constructor = _is_asc ? create_comparison_predicate0 + : create_comparison_predicate0; } void RuntimePredicate::init_target( @@ -66,141 +66,102 @@ void RuntimePredicate::init_target( _detected_target = true; } -template -std::string get_normal_value(const Field& field) { - using ValueType = typename PrimitiveTypeTraits::CppType; - return cast_to_string(field.get(), 0); -} - -std::string get_date_value(const Field& field) { - using ValueType = typename PrimitiveTypeTraits::CppType; - ValueType value; - Int64 v = field.get(); - auto* p = (VecDateTimeValue*)&v; - value.from_olap_date(p->to_olap_date()); - value.cast_to_date(); - return cast_to_string(value, 0); -} - -std::string get_datetime_value(const Field& field) { - using ValueType = typename PrimitiveTypeTraits::CppType; - ValueType value; - Int64 v = field.get(); - auto* p = (VecDateTimeValue*)&v; - value.from_olap_datetime(p->to_olap_datetime()); - value.to_datetime(); - return cast_to_string(value, 0); -} - -std::string get_time_value(const Field& field) { - using ValueType = typename PrimitiveTypeTraits::CppType; - ValueType value = field.get(); - return cast_to_string(value, 0); -} - -std::string get_decimalv2_value(const Field& field) { - // can NOT use PrimitiveTypeTraits::CppType since - // it is DecimalV2Value and Decimal128V2 can not convert to it implicitly - using ValueType = Decimal128V2::NativeType; - auto v = field.get>(); - // use TYPE_DECIMAL128I instead of TYPE_DECIMALV2 since v.get_scale() - // is always 9 for DECIMALV2 - return cast_to_string(v.get_value(), v.get_scale()); -} - -template -std::string get_decimal_value(const Field& field) { - using ValueType = typename PrimitiveTypeTraits::CppType; - auto v = field.get>(); - return cast_to_string(v.get_value(), v.get_scale()); -} - -bool RuntimePredicate::_init(PrimitiveType type) { - // set get value function +StringRef RuntimePredicate::_get_string_ref(const Field& field, const PrimitiveType type) { switch (type) { case PrimitiveType::TYPE_BOOLEAN: { - _get_value_fn = get_normal_value; - break; + const auto& v = field.get::CppType>(); + return StringRef((char*)&v, sizeof(v)); } case PrimitiveType::TYPE_TINYINT: { - _get_value_fn = get_normal_value; - break; + const auto& v = field.get::CppType>(); + return StringRef((char*)&v, sizeof(v)); } case PrimitiveType::TYPE_SMALLINT: { - _get_value_fn = get_normal_value; - break; + const auto& v = field.get::CppType>(); + return StringRef((char*)&v, sizeof(v)); } case PrimitiveType::TYPE_INT: { - _get_value_fn = get_normal_value; - break; + const auto& v = field.get::CppType>(); + return StringRef((char*)&v, sizeof(v)); } case PrimitiveType::TYPE_BIGINT: { - _get_value_fn = get_normal_value; - break; + const auto& v = field.get::CppType>(); + return StringRef((char*)&v, sizeof(v)); } case PrimitiveType::TYPE_LARGEINT: { - _get_value_fn = get_normal_value; - break; + const auto& v = field.get::CppType>(); + return StringRef((char*)&v, sizeof(v)); } case PrimitiveType::TYPE_CHAR: case PrimitiveType::TYPE_VARCHAR: case PrimitiveType::TYPE_STRING: { - _get_value_fn = [](const Field& field) { return field.get(); }; - break; + const auto& v = field.get(); + auto length = v.size(); + char* buffer = _predicate_arena.alloc(length); + memset(buffer, 0, length); + memcpy(buffer, v.data(), v.length()); + + return {buffer, length}; } case PrimitiveType::TYPE_DATEV2: { - _get_value_fn = get_normal_value; - break; + const auto& v = field.get::CppType>(); + return StringRef((char*)&v, sizeof(v)); } case PrimitiveType::TYPE_DATETIMEV2: { - _get_value_fn = get_normal_value; - break; + const auto& v = field.get::CppType>(); + return StringRef((char*)&v, sizeof(v)); } case PrimitiveType::TYPE_DATE: { - _get_value_fn = get_date_value; - break; + const auto& v = field.get::CppType>(); + return StringRef((char*)&v, sizeof(v)); } case PrimitiveType::TYPE_DATETIME: { - _get_value_fn = get_datetime_value; - break; + const auto& v = field.get::CppType>(); + return StringRef((char*)&v, sizeof(v)); } case PrimitiveType::TYPE_TIMEV2: { - _get_value_fn = get_time_value; - break; + const auto& v = field.get::CppType>(); + return StringRef((char*)&v, sizeof(v)); } case PrimitiveType::TYPE_DECIMAL32: { - _get_value_fn = get_decimal_value; - break; + const auto& v = field.get::CppType>(); + return StringRef((char*)&v, sizeof(v)); } case PrimitiveType::TYPE_DECIMAL64: { - _get_value_fn = get_decimal_value; - break; + const auto& v = field.get::CppType>(); + return StringRef((char*)&v, sizeof(v)); } case PrimitiveType::TYPE_DECIMALV2: { - _get_value_fn = get_decimalv2_value; - break; + const auto& v = field.get::CppType>(); + return StringRef((char*)&v, sizeof(v)); } case PrimitiveType::TYPE_DECIMAL128I: { - _get_value_fn = get_decimal_value; - break; + const auto& v = field.get::CppType>(); + return StringRef((char*)&v, sizeof(v)); } case PrimitiveType::TYPE_DECIMAL256: { - _get_value_fn = get_decimal_value; - break; + const auto& v = field.get::CppType>(); + return StringRef((char*)&v, sizeof(v)); } case PrimitiveType::TYPE_IPV4: { - _get_value_fn = get_normal_value; - break; + const auto& v = field.get::CppType>(); + return StringRef((char*)&v, sizeof(v)); } case PrimitiveType::TYPE_IPV6: { - _get_value_fn = get_normal_value; - break; + const auto& v = field.get::CppType>(); + return StringRef((char*)&v, sizeof(v)); } default: - return false; + break; } - return true; + throw Exception(ErrorCode::INTERNAL_ERROR, "meet invalid type, type={}", type_to_string(type)); + return StringRef(); +} + +bool RuntimePredicate::_init(PrimitiveType type) { + return is_int_or_bool(type) || is_decimal(type) || is_string_type(type) || is_date_type(type) || + is_time_type(type) || is_ip(type); } Status RuntimePredicate::update(const Field& value) { @@ -233,18 +194,19 @@ Status RuntimePredicate::update(const Field& value) { continue; } const auto& column = *DORIS_TRY(ctx.tablet_schema->column(ctx.col_name)); - std::unique_ptr pred { - _pred_constructor(column.get_vec_type(), ctx.predicate->column_id(), - _get_value_fn(_orderby_extrem), false, _predicate_arena)}; + auto str_ref = _get_string_ref(_orderby_extrem, _type); + std::shared_ptr pred = + _pred_constructor(ctx.predicate->column_id(), column.get_vec_type(), str_ref, false, + _predicate_arena); // For NULLS FIRST, wrap a AcceptNullPredicate to return true for NULL // since ORDER BY ASC/DESC should get NULL first but pred returns NULL // and NULL in where predicate will be treated as FALSE if (_nulls_first) { - pred = AcceptNullPredicate::create_unique(pred.release()); + pred = AcceptNullPredicate::create_shared(pred); } - ((SharedPredicate*)ctx.predicate.get())->set_nested(pred.release()); + ((SharedPredicate*)ctx.predicate.get())->set_nested(pred); } return Status::OK(); } diff --git a/be/src/runtime/runtime_predicate.h b/be/src/runtime/runtime_predicate.h index 51c79e1b426199..adf90e9095a481 100644 --- a/be/src/runtime/runtime_predicate.h +++ b/be/src/runtime/runtime_predicate.h @@ -110,6 +110,7 @@ class RuntimePredicate { } private: + StringRef _get_string_ref(const Field& field, const PrimitiveType type); void check_target_node_id(int32_t target_node_id) const { if (!_contexts.contains(target_node_id)) { std::string msg = "context target node ids: ["; @@ -153,13 +154,14 @@ class RuntimePredicate { Field _orderby_extrem {PrimitiveType::TYPE_NULL}; Arena _predicate_arena; - std::function _get_value_fn; - std::function + std::function( + const int cid, const vectorized::DataTypePtr& data_type, StringRef& value, + bool opposite, vectorized::Arena& arena)> _pred_constructor; bool _detected_source = false; bool _detected_target = false; bool _has_value = false; + PrimitiveType _type; }; } // namespace vectorized diff --git a/be/src/vec/exec/format/generic_reader.cpp b/be/src/vec/exec/format/generic_reader.cpp index 8b3339faede6e0..3daa68320f113d 100644 --- a/be/src/vec/exec/format/generic_reader.cpp +++ b/be/src/vec/exec/format/generic_reader.cpp @@ -60,7 +60,7 @@ Status ExprPushDownHelper::_extract_predicates(const VExprSPtr& expr, int& cid, } Status ExprPushDownHelper::convert_predicates( - const VExprSPtrs& exprs, std::vector>& predicates, + const VExprSPtrs& exprs, std::vector>& predicates, std::unique_ptr& root, Arena& arena) { if (exprs.empty()) { return Status::OK(); @@ -95,10 +95,9 @@ Status ExprPushDownHelper::convert_predicates( RETURN_IF_ERROR(_extract_predicates(expr, cid, data_type, values, false, parsed)); if (parsed) { // TODO(gabriel): Use string view - predicates.push_back(std::unique_ptr( - create(data_type, cid, values[0].to_string(), false, arena))); + predicates.push_back(create(data_type, cid, values[0].to_string(), false, arena)); root->add_column_predicate( - SingleColumnBlockPredicate::create_unique(predicates.back().get())); + SingleColumnBlockPredicate::create_unique(predicates.back())); } break; } @@ -112,11 +111,10 @@ Status ExprPushDownHelper::convert_predicates( for (size_t i = 0; i < conditions.size(); i++) { conditions[i] = values[i].to_string(); } - predicates.push_back(std::unique_ptr( - create_list_predicate( - data_type, cid, conditions, false, arena))); + predicates.push_back(create_list_predicate( + data_type, cid, conditions, false, arena)); root->add_column_predicate( - SingleColumnBlockPredicate::create_unique(predicates.back().get())); + SingleColumnBlockPredicate::create_unique(predicates.back())); } break; } @@ -155,10 +153,11 @@ Status ExprPushDownHelper::convert_predicates( if (fn_name == "is_null_pred" || fn_name == "is_not_null_pred") { RETURN_IF_ERROR(_extract_predicates(expr, cid, data_type, values, true, parsed)); if (parsed) { - predicates.push_back(std::unique_ptr( - new NullPredicate(cid, true, fn_name == "is_not_null_pred"))); + predicates.push_back( + NullPredicate::create_shared(cid, true, data_type->get_primitive_type(), + fn_name == "is_not_null_pred")); root->add_column_predicate( - SingleColumnBlockPredicate::create_unique(predicates.back().get())); + SingleColumnBlockPredicate::create_unique(predicates.back())); } } break; diff --git a/be/src/vec/exec/format/generic_reader.h b/be/src/vec/exec/format/generic_reader.h index b21971b7a3f18b..92d3040c4d8998 100644 --- a/be/src/vec/exec/format/generic_reader.h +++ b/be/src/vec/exec/format/generic_reader.h @@ -118,7 +118,7 @@ class ExprPushDownHelper { virtual ~ExprPushDownHelper() = default; bool check_expr_can_push_down(const VExprSPtr& expr) const; Status convert_predicates(const VExprSPtrs& exprs, - std::vector>& predicates, + std::vector>& predicates, std::unique_ptr& root, Arena& arena); protected: diff --git a/be/src/vec/exec/format/parquet/vparquet_reader.h b/be/src/vec/exec/format/parquet/vparquet_reader.h index 26cc7a7436d2ca..49d282dce484c3 100644 --- a/be/src/vec/exec/format/parquet/vparquet_reader.h +++ b/be/src/vec/exec/format/parquet/vparquet_reader.h @@ -339,7 +339,7 @@ class ParquetReader : public GenericReader, public ExprPushDownHelper { // Since the filtering conditions for topn are dynamic, the filtering is delayed until create next row group reader. VExprSPtrs _top_runtime_vexprs; std::vector> _push_down_predicates; - std::vector> _useless_predicates; + std::vector> _useless_predicates; Arena _arena; }; #include "common/compile_check_end.h" diff --git a/be/src/vec/exec/scan/olap_scanner.cpp b/be/src/vec/exec/scan/olap_scanner.cpp index afe61f60e66a87..9f72d5caeddf86 100644 --- a/be/src/vec/exec/scan/olap_scanner.cpp +++ b/be/src/vec/exec/scan/olap_scanner.cpp @@ -76,10 +76,7 @@ OlapScanner::OlapScanner(pipeline::ScanLocalStateBase* parent, OlapScanner::Para .version = {0, params.version}, .start_key {}, .end_key {}, - .conditions {}, - .bloom_filters {}, - .bitmap_filters {}, - .in_filters {}, + .predicates {}, .function_filters {}, .delete_predicates {}, .target_cast_type_for_variants {}, @@ -272,9 +269,10 @@ Status OlapScanner::prepare() { } // Initialize tablet_reader_params - RETURN_IF_ERROR(_init_tablet_reader_params(_key_ranges, local_state->_olap_filters, - local_state->_filter_predicates, - local_state->_push_down_functions)); + RETURN_IF_ERROR(_init_tablet_reader_params( + local_state->_parent->cast()._slot_id_to_slot_desc, + _key_ranges, local_state->_slot_id_to_predicates, + local_state->_push_down_functions)); } // add read columns in profile @@ -320,9 +318,10 @@ Status OlapScanner::open(RuntimeState* state) { // it will be called under tablet read lock because capture rs readers need Status OlapScanner::_init_tablet_reader_params( + const phmap::flat_hash_map& slot_id_to_slot_desc, const std::vector& key_ranges, - const std::vector>& filters, - const pipeline::FilterPredicates& filter_predicates, + const phmap::flat_hash_map>>& + slot_to_predicates, const std::vector& function_filters) { // if the table with rowset [0-x] or [0-1] [2-y], and [0-1] is empty const bool single_version = _tablet_reader_params.has_single_version(); @@ -366,27 +365,26 @@ Status OlapScanner::_init_tablet_reader_params( ((pipeline::OlapScanLocalState*)_local_state)->_cast_types_for_variants) { _tablet_reader_params.target_cast_type_for_variants[ele.first] = ele.second; }; - // Condition - for (auto& filter : filters) { - _tablet_reader_params.conditions.push_back(filter); + auto& tablet_schema = _tablet_reader_params.tablet_schema; + for (auto& predicates : slot_to_predicates) { + const int sid = predicates.first; + DCHECK(slot_id_to_slot_desc.contains(sid)); + int32_t index = + tablet_schema->field_index(slot_id_to_slot_desc.find(sid)->second->col_name()); + if (index < 0) { + throw Exception( + Status::InternalError("Column {} not found in tablet schema", + slot_id_to_slot_desc.find(sid)->second->col_name())); + } + for (auto& predicate : predicates.second) { + _tablet_reader_params.predicates.push_back(predicate->clone(index)); + } } - std::copy(filter_predicates.bloom_filters.cbegin(), filter_predicates.bloom_filters.cend(), - std::inserter(_tablet_reader_params.bloom_filters, - _tablet_reader_params.bloom_filters.begin())); - std::copy(filter_predicates.bitmap_filters.cbegin(), filter_predicates.bitmap_filters.cend(), - std::inserter(_tablet_reader_params.bitmap_filters, - _tablet_reader_params.bitmap_filters.begin())); - - std::copy(filter_predicates.in_filters.cbegin(), filter_predicates.in_filters.cend(), - std::inserter(_tablet_reader_params.in_filters, - _tablet_reader_params.in_filters.begin())); - std::copy(function_filters.cbegin(), function_filters.cend(), std::inserter(_tablet_reader_params.function_filters, _tablet_reader_params.function_filters.begin())); - auto& tablet_schema = _tablet_reader_params.tablet_schema; // Merge the columns in delete predicate that not in latest schema in to current tablet schema for (auto& del_pred : _tablet_reader_params.delete_predicates) { tablet_schema->merge_dropped_columns(*del_pred->tablet_schema()); diff --git a/be/src/vec/exec/scan/olap_scanner.h b/be/src/vec/exec/scan/olap_scanner.h index 27e09f298172f2..4b8d866ba25fa7 100644 --- a/be/src/vec/exec/scan/olap_scanner.h +++ b/be/src/vec/exec/scan/olap_scanner.h @@ -88,10 +88,12 @@ class OlapScanner : public Scanner { void _collect_profile_before_close() override; private: - Status _init_tablet_reader_params(const std::vector& key_ranges, - const std::vector>& filters, - const pipeline::FilterPredicates& filter_predicates, - const std::vector& function_filters); + Status _init_tablet_reader_params( + const phmap::flat_hash_map& slot_id_to_slot_desc, + const std::vector& key_ranges, + const phmap::flat_hash_map>>& + predicates, + const std::vector& function_filters); [[nodiscard]] Status _init_return_columns(); [[nodiscard]] Status _init_variant_columns(); diff --git a/be/src/vec/exprs/vexpr_context.h b/be/src/vec/exprs/vexpr_context.h index 73720abaed3203..6859a7cdf016d1 100644 --- a/be/src/vec/exprs/vexpr_context.h +++ b/be/src/vec/exprs/vexpr_context.h @@ -180,7 +180,7 @@ class VExprContext { [[nodiscard]] Status execute_const_expr(ColumnWithTypeAndName& result); - VExprSPtr root() { return _root; } + VExprSPtr root() const { return _root; } void set_root(const VExprSPtr& expr) { _root = expr; } void set_index_context(std::shared_ptr index_context) { _index_context = std::move(index_context); diff --git a/be/src/vec/functions/in.h b/be/src/vec/functions/in.h index 6324cdfb97f2d8..e52841df682458 100644 --- a/be/src/vec/functions/in.h +++ b/be/src/vec/functions/in.h @@ -56,7 +56,7 @@ using ColumnString = ColumnStr; struct InState { bool use_set = true; - std::unique_ptr hybrid_set; + std::shared_ptr hybrid_set; }; template diff --git a/be/test/olap/block_column_predicate_test.cpp b/be/test/olap/block_column_predicate_test.cpp index 5e054c9543b00c..39c03f839f44fc 100644 --- a/be/test/olap/block_column_predicate_test.cpp +++ b/be/test/olap/block_column_predicate_test.cpp @@ -80,9 +80,9 @@ TEST_F(BlockColumnPredicateTest, SINGLE_COLUMN_VEC) { int value = 5; int rows = 10; int col_idx = 0; - std::unique_ptr pred( + std::shared_ptr pred( new ComparisonPredicateBase(col_idx, value)); - SingleColumnBlockPredicate single_column_block_pred(pred.get()); + SingleColumnBlockPredicate single_column_block_pred(pred); std::vector sel_idx(rows); uint16_t selected_size = rows; @@ -108,12 +108,12 @@ TEST_F(BlockColumnPredicateTest, AND_MUTI_COLUMN_VEC) { int great_value = 3; int rows = 10; int col_idx = 0; - std::unique_ptr less_pred( + std::shared_ptr less_pred( new ComparisonPredicateBase(col_idx, less_value)); - std::unique_ptr great_pred( + std::shared_ptr great_pred( new ComparisonPredicateBase(col_idx, great_value)); - auto single_less_pred = SingleColumnBlockPredicate::create_unique(less_pred.get()); - auto single_great_pred = SingleColumnBlockPredicate::create_unique(great_pred.get()); + auto single_less_pred = SingleColumnBlockPredicate::create_unique(less_pred); + auto single_great_pred = SingleColumnBlockPredicate::create_unique(great_pred); AndBlockColumnPredicate and_block_column_pred; and_block_column_pred.add_column_predicate(std::move(single_less_pred)); @@ -143,12 +143,12 @@ TEST_F(BlockColumnPredicateTest, OR_MUTI_COLUMN_VEC) { int great_value = 3; int rows = 10; int col_idx = 0; - std::unique_ptr less_pred( + std::shared_ptr less_pred( new ComparisonPredicateBase(col_idx, less_value)); - std::unique_ptr great_pred( + std::shared_ptr great_pred( new ComparisonPredicateBase(col_idx, great_value)); - auto single_less_pred = SingleColumnBlockPredicate::create_unique(less_pred.get()); - auto single_great_pred = SingleColumnBlockPredicate::create_unique(great_pred.get()); + auto single_less_pred = SingleColumnBlockPredicate::create_unique(less_pred); + auto single_great_pred = SingleColumnBlockPredicate::create_unique(great_pred); OrBlockColumnPredicate or_block_column_pred; or_block_column_pred.add_column_predicate(std::move(single_less_pred)); @@ -178,25 +178,25 @@ TEST_F(BlockColumnPredicateTest, OR_AND_MUTI_COLUMN_VEC) { int great_value = 3; int rows = 10; int col_idx = 0; - std::unique_ptr less_pred( + std::shared_ptr less_pred( new ComparisonPredicateBase(0, less_value)); - std::unique_ptr great_pred( + std::shared_ptr great_pred( new ComparisonPredicateBase(0, great_value)); - std::unique_ptr less_pred1( + std::shared_ptr less_pred1( new ComparisonPredicateBase(0, great_value)); // Test for and or single // (column < 5 and column > 3) or column < 3 auto and_block_column_pred = AndBlockColumnPredicate::create_unique(); and_block_column_pred->add_column_predicate( - SingleColumnBlockPredicate::create_unique(less_pred.get())); + SingleColumnBlockPredicate::create_unique(less_pred)); and_block_column_pred->add_column_predicate( - SingleColumnBlockPredicate::create_unique(great_pred.get())); + SingleColumnBlockPredicate::create_unique(great_pred)); OrBlockColumnPredicate or_block_column_pred; or_block_column_pred.add_column_predicate(std::move(and_block_column_pred)); or_block_column_pred.add_column_predicate( - SingleColumnBlockPredicate::create_unique(less_pred1.get())); + SingleColumnBlockPredicate::create_unique(less_pred1)); std::vector sel_idx(rows); uint16_t selected_size = rows; @@ -220,13 +220,13 @@ TEST_F(BlockColumnPredicateTest, OR_AND_MUTI_COLUMN_VEC) { // column < 3 or (column < 5 and column > 3) auto and_block_column_pred1 = AndBlockColumnPredicate::create_unique(); and_block_column_pred1->add_column_predicate( - SingleColumnBlockPredicate::create_unique(less_pred.get())); + SingleColumnBlockPredicate::create_unique(less_pred)); and_block_column_pred1->add_column_predicate( - SingleColumnBlockPredicate::create_unique(great_pred.get())); + SingleColumnBlockPredicate::create_unique(great_pred)); OrBlockColumnPredicate or_block_column_pred1; or_block_column_pred1.add_column_predicate( - SingleColumnBlockPredicate::create_unique(less_pred1.get())); + SingleColumnBlockPredicate::create_unique(less_pred1)); or_block_column_pred1.add_column_predicate(std::move(and_block_column_pred1)); selected_size = or_block_column_pred1.evaluate(block, sel_idx.data(), selected_size); @@ -245,25 +245,25 @@ TEST_F(BlockColumnPredicateTest, AND_OR_MUTI_COLUMN_VEC) { int great_value = 3; int rows = 10; int col_idx = 0; - std::unique_ptr less_pred( + std::shared_ptr less_pred( new ComparisonPredicateBase(0, less_value)); - std::unique_ptr great_pred( + std::shared_ptr great_pred( new ComparisonPredicateBase(0, great_value)); - std::unique_ptr less_pred1( + std::shared_ptr less_pred1( new ComparisonPredicateBase(0, great_value)); // Test for and or single // (column < 5 or column < 3) and column > 3 auto or_block_column_pred = OrBlockColumnPredicate::create_unique(); or_block_column_pred->add_column_predicate( - SingleColumnBlockPredicate::create_unique(less_pred.get())); + SingleColumnBlockPredicate::create_unique(less_pred)); or_block_column_pred->add_column_predicate( - SingleColumnBlockPredicate::create_unique(less_pred1.get())); + SingleColumnBlockPredicate::create_unique(less_pred1)); AndBlockColumnPredicate and_block_column_pred; and_block_column_pred.add_column_predicate(std::move(or_block_column_pred)); and_block_column_pred.add_column_predicate( - SingleColumnBlockPredicate::create_unique(great_pred.get())); + SingleColumnBlockPredicate::create_unique(great_pred)); std::vector sel_idx(rows); uint16_t selected_size = rows; @@ -285,13 +285,13 @@ TEST_F(BlockColumnPredicateTest, AND_OR_MUTI_COLUMN_VEC) { // column > 3 and (column < 5 or column < 3) auto or_block_column_pred1 = OrBlockColumnPredicate::create_unique(); or_block_column_pred1->add_column_predicate( - SingleColumnBlockPredicate::create_unique(less_pred.get())); + SingleColumnBlockPredicate::create_unique(less_pred)); or_block_column_pred1->add_column_predicate( - SingleColumnBlockPredicate::create_unique(less_pred1.get())); + SingleColumnBlockPredicate::create_unique(less_pred1)); AndBlockColumnPredicate and_block_column_pred1; and_block_column_pred1.add_column_predicate( - SingleColumnBlockPredicate::create_unique(great_pred.get())); + SingleColumnBlockPredicate::create_unique(great_pred)); and_block_column_pred1.add_column_predicate(std::move(or_block_column_pred1)); EXPECT_EQ(selected_size, 1); @@ -303,8 +303,8 @@ void single_column_predicate_test_func(const std::pair::CppType check_value, bool expect_match) { int col_idx = 0; - std::unique_ptr pred(new ComparisonPredicateBase(col_idx, check_value)); - SingleColumnBlockPredicate single_column_block_pred(pred.get()); + std::shared_ptr pred(new ComparisonPredicateBase(col_idx, check_value)); + SingleColumnBlockPredicate single_column_block_pred(pred); bool matched = single_column_block_pred.evaluate_and(statistic); EXPECT_EQ(matched, expect_match); @@ -1213,9 +1213,9 @@ TEST_F(BlockColumnPredicateTest, PARQUET_COMPARISON_PREDICATE) { {// EQ int value = 5; int col_idx = 0; - std::unique_ptr pred( + std::shared_ptr pred( new ComparisonPredicateBase(col_idx, value)); - SingleColumnBlockPredicate single_column_block_pred(pred.get()); + SingleColumnBlockPredicate single_column_block_pred(pred); std::unique_ptr parquet_field_col1 = std::make_unique(); parquet_field_col1->name = "col1"; @@ -1290,9 +1290,9 @@ TEST_F(BlockColumnPredicateTest, PARQUET_COMPARISON_PREDICATE) { // NE int value = 5; int col_idx = 0; - std::unique_ptr pred( + std::shared_ptr pred( new ComparisonPredicateBase(col_idx, value)); - SingleColumnBlockPredicate single_column_block_pred(pred.get()); + SingleColumnBlockPredicate single_column_block_pred(pred); std::unique_ptr parquet_field_col1 = std::make_unique(); parquet_field_col1->name = "col1"; @@ -1359,9 +1359,9 @@ TEST_F(BlockColumnPredicateTest, PARQUET_COMPARISON_PREDICATE) { // GE int value = 5; int col_idx = 0; - std::unique_ptr pred( + std::shared_ptr pred( new ComparisonPredicateBase(col_idx, value)); - SingleColumnBlockPredicate single_column_block_pred(pred.get()); + SingleColumnBlockPredicate single_column_block_pred(pred); std::unique_ptr parquet_field_col1 = std::make_unique(); parquet_field_col1->name = "col1"; @@ -1428,9 +1428,9 @@ TEST_F(BlockColumnPredicateTest, PARQUET_COMPARISON_PREDICATE) { // LE int value = 5; int col_idx = 0; - std::unique_ptr pred( + std::shared_ptr pred( new ComparisonPredicateBase(col_idx, value)); - SingleColumnBlockPredicate single_column_block_pred(pred.get()); + SingleColumnBlockPredicate single_column_block_pred(pred); std::unique_ptr parquet_field_col1 = std::make_unique(); parquet_field_col1->name = "col1"; @@ -1500,9 +1500,9 @@ TEST_F(BlockColumnPredicateTest, PARQUET_COMPARISON_PREDICATE) { // EQ float value = 5.0; int col_idx = 0; - std::unique_ptr pred( + std::shared_ptr pred( new ComparisonPredicateBase(col_idx, value)); - SingleColumnBlockPredicate single_column_block_pred(pred.get()); + SingleColumnBlockPredicate single_column_block_pred(pred); std::unique_ptr parquet_field_col1 = std::make_unique(); parquet_field_col1->name = "col1"; @@ -1594,9 +1594,9 @@ TEST_F(BlockColumnPredicateTest, PARQUET_COMPARISON_PREDICATE) { // NE float value = 5; int col_idx = 0; - std::unique_ptr pred( + std::shared_ptr pred( new ComparisonPredicateBase(col_idx, value)); - SingleColumnBlockPredicate single_column_block_pred(pred.get()); + SingleColumnBlockPredicate single_column_block_pred(pred); std::unique_ptr parquet_field_col1 = std::make_unique(); parquet_field_col1->name = "col1"; @@ -1663,9 +1663,9 @@ TEST_F(BlockColumnPredicateTest, PARQUET_COMPARISON_PREDICATE) { // GE float value = 5.0; int col_idx = 0; - std::unique_ptr pred( + std::shared_ptr pred( new ComparisonPredicateBase(col_idx, value)); - SingleColumnBlockPredicate single_column_block_pred(pred.get()); + SingleColumnBlockPredicate single_column_block_pred(pred); std::unique_ptr parquet_field_col1 = std::make_unique(); parquet_field_col1->name = "col1"; @@ -1732,9 +1732,9 @@ TEST_F(BlockColumnPredicateTest, PARQUET_COMPARISON_PREDICATE) { // LE float value = 5.0; int col_idx = 0; - std::unique_ptr pred( + std::shared_ptr pred( new ComparisonPredicateBase(col_idx, value)); - SingleColumnBlockPredicate single_column_block_pred(pred.get()); + SingleColumnBlockPredicate single_column_block_pred(pred); std::unique_ptr parquet_field_col1 = std::make_unique(); parquet_field_col1->name = "col1"; @@ -1807,11 +1807,10 @@ TEST_F(BlockColumnPredicateTest, PARQUET_IN_PREDICATE) { int col_idx = 0; auto hybrid_set = std::make_shared>(false); hybrid_set->insert(&value); - std::unique_ptr pred( - new InListPredicateBase>(col_idx, - hybrid_set)); - SingleColumnBlockPredicate single_column_block_pred(pred.get()); + std::shared_ptr pred( + new InListPredicateBase( + col_idx, hybrid_set, false)); + SingleColumnBlockPredicate single_column_block_pred(pred); std::unique_ptr parquet_field_col1 = std::make_unique(); parquet_field_col1->name = "col1"; @@ -1854,11 +1853,10 @@ TEST_F(BlockColumnPredicateTest, PARQUET_IN_PREDICATE) { int col_idx = 0; auto hybrid_set = std::make_shared>(false); hybrid_set->insert(&value); - std::unique_ptr pred( - new InListPredicateBase>(col_idx, - hybrid_set)); - SingleColumnBlockPredicate single_column_block_pred(pred.get()); + std::shared_ptr pred( + new InListPredicateBase( + col_idx, hybrid_set, false)); + SingleColumnBlockPredicate single_column_block_pred(pred); std::unique_ptr parquet_field_col1 = std::make_unique(); parquet_field_col1->name = "col1"; @@ -1903,9 +1901,9 @@ TEST_F(BlockColumnPredicateTest, PARQUET_IN_PREDICATE) { TEST_F(BlockColumnPredicateTest, PARQUET_COMPARISON_PREDICATE_BLOOM_FILTER) { const int value = 42; const int col_idx = 0; - std::unique_ptr pred( + std::shared_ptr pred( new ComparisonPredicateBase(col_idx, value)); - SingleColumnBlockPredicate single_column_block_pred(pred.get()); + SingleColumnBlockPredicate single_column_block_pred(pred); auto parquet_field = std::make_unique(); parquet_field->name = "col1"; @@ -2065,10 +2063,10 @@ TEST_F(BlockColumnPredicateTest, PARQUET_IN_PREDICATE_BLOOM_FILTER) { auto hybrid_set = std::make_shared>(false); const int included_value = 7; hybrid_set->insert(&included_value); - std::unique_ptr pred( - new InListPredicateBase>(col_idx, hybrid_set)); - SingleColumnBlockPredicate single_column_block_pred(pred.get()); + std::shared_ptr pred( + new InListPredicateBase(col_idx, hybrid_set, + false)); + SingleColumnBlockPredicate single_column_block_pred(pred); auto parquet_field = std::make_unique(); parquet_field->name = "col1"; @@ -2197,8 +2195,9 @@ TEST_F(BlockColumnPredicateTest, PARQUET_IN_PREDICATE_BLOOM_FILTER) { TEST_F(BlockColumnPredicateTest, NULL_PREDICATE) { { int col_idx = 0; - std::unique_ptr pred(new NullPredicate(col_idx, true)); - SingleColumnBlockPredicate single_column_block_pred(pred.get()); + std::shared_ptr pred( + new NullPredicate(col_idx, true, PrimitiveType::TYPE_INT)); + SingleColumnBlockPredicate single_column_block_pred(pred); std::unique_ptr parquet_field_col1 = std::make_unique(); parquet_field_col1->name = "col1"; @@ -2234,8 +2233,9 @@ TEST_F(BlockColumnPredicateTest, NULL_PREDICATE) { } { int col_idx = 0; - std::unique_ptr pred(new NullPredicate(col_idx, false)); - SingleColumnBlockPredicate single_column_block_pred(pred.get()); + std::shared_ptr pred( + new NullPredicate(col_idx, false, PrimitiveType::TYPE_INT)); + SingleColumnBlockPredicate single_column_block_pred(pred); std::unique_ptr parquet_field_col1 = std::make_unique(); parquet_field_col1->name = "col1"; @@ -2289,14 +2289,14 @@ TEST_F(BlockColumnPredicateTest, COMBINED_PREDICATE) { std::unique_ptr true_predicate; int col_idx = 0; int value = 5; - std::unique_ptr pred( + std::shared_ptr pred( new ComparisonPredicateBase(col_idx, value)); - true_predicate = std::make_unique(pred.get()); + true_predicate = std::make_unique(pred); std::unique_ptr false_predicate; - std::unique_ptr pred2( + std::shared_ptr pred2( new ComparisonPredicateBase(col_idx, value)); - false_predicate = std::make_unique(pred2.get()); + false_predicate = std::make_unique(pred2); std::unique_ptr parquet_field_col1 = std::make_unique(); @@ -2334,14 +2334,14 @@ TEST_F(BlockColumnPredicateTest, COMBINED_PREDICATE) { std::unique_ptr true_predicate; int col_idx = 0; int value = 5; - std::unique_ptr pred( + std::shared_ptr pred( new ComparisonPredicateBase(col_idx, value)); - true_predicate = std::make_unique(pred.get()); + true_predicate = std::make_unique(pred); std::unique_ptr true_predicate2; - std::unique_ptr pred2( + std::shared_ptr pred2( new ComparisonPredicateBase(col_idx, value)); - true_predicate2 = std::make_unique(pred2.get()); + true_predicate2 = std::make_unique(pred2); std::unique_ptr parquet_field_col1 = std::make_unique(); @@ -2379,14 +2379,14 @@ TEST_F(BlockColumnPredicateTest, COMBINED_PREDICATE) { std::unique_ptr true_predicate; int col_idx = 0; int value = 5; - std::unique_ptr pred( + std::shared_ptr pred( new ComparisonPredicateBase(col_idx, value)); - true_predicate = std::make_unique(pred.get()); + true_predicate = std::make_unique(pred); std::unique_ptr false_predicate; - std::unique_ptr pred2( + std::shared_ptr pred2( new ComparisonPredicateBase(col_idx, value)); - false_predicate = std::make_unique(pred2.get()); + false_predicate = std::make_unique(pred2); std::unique_ptr parquet_field_col1 = std::make_unique(); @@ -2424,14 +2424,14 @@ TEST_F(BlockColumnPredicateTest, COMBINED_PREDICATE) { std::unique_ptr false_predicate2; int col_idx = 0; int value = 5; - std::unique_ptr pred( + std::shared_ptr pred( new ComparisonPredicateBase(col_idx, value)); - false_predicate2 = std::make_unique(pred.get()); + false_predicate2 = std::make_unique(pred); std::unique_ptr false_predicate; - std::unique_ptr pred2( + std::shared_ptr pred2( new ComparisonPredicateBase(col_idx, value)); - false_predicate = std::make_unique(pred2.get()); + false_predicate = std::make_unique(pred2); std::unique_ptr parquet_field_col1 = std::make_unique(); @@ -2469,9 +2469,9 @@ TEST_F(BlockColumnPredicateTest, COMBINED_PREDICATE) { int col_idx = 0; int value = 5; std::unique_ptr false_predicate; - std::unique_ptr pred2( + std::shared_ptr pred2( new ComparisonPredicateBase(col_idx, value)); - false_predicate = std::make_unique(pred2.get()); + false_predicate = std::make_unique(pred2); std::unique_ptr parquet_field_col1 = std::make_unique(); diff --git a/be/test/olap/date_bloom_filter_test.cpp b/be/test/olap/date_bloom_filter_test.cpp index 6ef6eacb3e7858..1dff9938007299 100644 --- a/be/test/olap/date_bloom_filter_test.cpp +++ b/be/test/olap/date_bloom_filter_test.cpp @@ -263,44 +263,58 @@ TEST_F(DateBloomFilterTest, in_list_predicate_test) { EXPECT_TRUE(bf_iter->read_bloom_filter(0, &bf).ok()); // Test positive cases - auto test_positive = [&](const std::vector& values, bool result) { - auto hybrid_set = std::make_shared>(false); + auto hybrid_set = std::make_shared>(false); + auto test_positive = [&](const std::vector& values) { + hybrid_set = std::make_shared>(false); for (const auto& value : values) { auto v = timestamp_from_date(value); hybrid_set->insert(&v); } - std::unique_ptr>> - date_pred(new InListPredicateBase>( - 0, hybrid_set)); - EXPECT_EQ(date_pred->evaluate_and(bf.get()), result); }; - test_positive({"2024-11-08", "2024-11-09"}, true); - test_positive({"2024-11-08"}, true); - test_positive({"2024-11-09"}, true); - - auto test_negative = [&](const std::vector& values, bool result) { - auto hybrid_set = std::make_shared>(false); + test_positive({"2024-11-08", "2024-11-09"}); + std::unique_ptr> date_pred0( + new InListPredicateBase(0, hybrid_set, + false)); + EXPECT_EQ(date_pred0->evaluate_and(bf.get()), true); + test_positive({"2024-11-08"}); + std::unique_ptr> date_pred1( + new InListPredicateBase(0, hybrid_set, + false)); + EXPECT_EQ(date_pred1->evaluate_and(bf.get()), true); + test_positive({"2024-11-09"}); + std::unique_ptr> date_pred2( + new InListPredicateBase(0, hybrid_set, + false)); + EXPECT_EQ(date_pred2->evaluate_and(bf.get()), true); + + auto test_negative = [&](const std::vector& values) { + hybrid_set = std::make_shared>(false); for (const auto& value : values) { auto v = timestamp_from_date(value); hybrid_set->insert(&v); } + }; - std::unique_ptr>> - date_pred(new InListPredicateBase>( - 0, hybrid_set)); + test_negative({"2024-11-20"}); + std::unique_ptr> date_pred00( + new InListPredicateBase(0, hybrid_set, + false)); - EXPECT_EQ(date_pred->evaluate_and(bf.get()), result); - }; + EXPECT_EQ(date_pred00->evaluate_and(bf.get()), false); + test_negative({"2024-11-08", "2024-11-20"}); + std::unique_ptr> date_pred10( + new InListPredicateBase(0, hybrid_set, + false)); + + EXPECT_EQ(date_pred10->evaluate_and(bf.get()), true); + test_negative({"2024-11-20", "2024-11-21"}); + std::unique_ptr> date_pred20( + new InListPredicateBase(0, hybrid_set, + false)); - test_negative({"2024-11-20"}, false); - test_negative({"2024-11-08", "2024-11-20"}, true); - test_negative({"2024-11-20", "2024-11-21"}, false); + EXPECT_EQ(date_pred20->evaluate_and(bf.get()), false); } // Test DATETIME column with IN predicate @@ -316,42 +330,56 @@ TEST_F(DateBloomFilterTest, in_list_predicate_test) { EXPECT_TRUE(bf_iter->read_bloom_filter(0, &bf).ok()); // Test positive cases - auto test_positive = [&](const std::vector& values, bool result) { - auto hybrid_set = std::make_shared>(false); + auto hybrid_set = std::make_shared>(false); + auto test_positive = [&](const std::vector& values) { + hybrid_set = std::make_shared>(false); for (const auto& value : values) { auto v = timestamp_from_datetime(value); hybrid_set->insert(&v); } - std::unique_ptr>> - datetime_pred(new InListPredicateBase>( - 0, hybrid_set)); - EXPECT_EQ(datetime_pred->evaluate_and(bf.get()), result); }; - test_positive({"2024-11-08 09:00:00", "2024-11-09 09:00:00"}, true); - test_positive({"2024-11-08 09:00:00"}, true); - test_positive({"2024-11-09 09:00:00"}, true); + test_positive({"2024-11-08 09:00:00", "2024-11-09 09:00:00"}); + std::unique_ptr> + datetime_pred0(new InListPredicateBase( + 0, hybrid_set, false)); + EXPECT_EQ(datetime_pred0->evaluate_and(bf.get()), true); + test_positive({"2024-11-08 09:00:00"}); + std::unique_ptr> + datetime_pred1(new InListPredicateBase( + 0, hybrid_set, false)); + EXPECT_EQ(datetime_pred1->evaluate_and(bf.get()), true); + test_positive({"2024-11-09 09:00:00"}); + std::unique_ptr> + datetime_pred2(new InListPredicateBase( + 0, hybrid_set, false)); + EXPECT_EQ(datetime_pred2->evaluate_and(bf.get()), true); // Test negative cases - auto test_negative = [&](const std::vector& values, bool result) { - auto hybrid_set = std::make_shared>(false); + hybrid_set = std::make_shared>(false); + auto test_negative = [&](const std::vector& values) { + hybrid_set = std::make_shared>(false); for (const auto& value : values) { auto v = timestamp_from_datetime(value); hybrid_set->insert(&v); } - std::unique_ptr>> - datetime_pred(new InListPredicateBase>( - 0, hybrid_set)); - EXPECT_EQ(datetime_pred->evaluate_and(bf.get()), result); }; - test_negative({"2024-11-20 09:00:00"}, false); - test_negative({"2024-11-08 09:00:00", "2024-11-20 09:00:00"}, true); - test_negative({"2024-11-20 09:00:00", "2024-11-21 09:00:00"}, false); + test_negative({"2024-11-20 09:00:00"}); + std::unique_ptr> + datetime_pred33(new InListPredicateBase( + 0, hybrid_set, false)); + EXPECT_EQ(datetime_pred33->evaluate_and(bf.get()), false); + test_negative({"2024-11-08 09:00:00", "2024-11-20 09:00:00"}); + std::unique_ptr> + datetime_pred34(new InListPredicateBase( + 0, hybrid_set, false)); + EXPECT_EQ(datetime_pred34->evaluate_and(bf.get()), true); + test_negative({"2024-11-20 09:00:00", "2024-11-21 09:00:00"}); + std::unique_ptr> + datetime_pred45(new InListPredicateBase( + 0, hybrid_set, false)); + EXPECT_EQ(datetime_pred45->evaluate_and(bf.get()), false); } } diff --git a/be/test/pipeline/operator/scan_normalize_predicate_test.cpp b/be/test/pipeline/operator/scan_normalize_predicate_test.cpp index eb2d5a7aac1e11..809e08581702bd 100644 --- a/be/test/pipeline/operator/scan_normalize_predicate_test.cpp +++ b/be/test/pipeline/operator/scan_normalize_predicate_test.cpp @@ -53,8 +53,7 @@ TEST_F(ScanNormalizePredicate, test1) { vectorized::VExprSPtr new_root; auto conjunct_expr_root = MockSlotRef::create_mock_context(0, std::make_shared()); - auto st = local_state->_normalize_predicate(conjunct_expr_root->root(), - conjunct_expr_root.get(), new_root); + auto st = local_state->_normalize_predicate(conjunct_expr_root.get(), new_root); EXPECT_TRUE(st) << st.msg(); std::cout << new_root->debug_string() << std::endl; } @@ -82,8 +81,7 @@ TEST_F(ScanNormalizePredicate, test_eval_const_conjuncts1) { vectorized::VExprSPtr new_root; auto conjunct_expr_root = ctx; - auto st = local_state->_normalize_predicate(conjunct_expr_root->root(), - conjunct_expr_root.get(), new_root); + auto st = local_state->_normalize_predicate(conjunct_expr_root.get(), new_root); EXPECT_EQ(new_root, nullptr); EXPECT_TRUE(local_state->_scan_dependency->ready()); @@ -112,8 +110,7 @@ TEST_F(ScanNormalizePredicate, test_eval_const_conjuncts2) { vectorized::VExprSPtr new_root; auto conjunct_expr_root = ctx; - auto st = local_state->_normalize_predicate(conjunct_expr_root->root(), - conjunct_expr_root.get(), new_root); + auto st = local_state->_normalize_predicate(conjunct_expr_root.get(), new_root); EXPECT_EQ(new_root, nullptr); EXPECT_TRUE(local_state->_scan_dependency->ready()); EXPECT_TRUE(local_state->_eos); @@ -137,7 +134,7 @@ TEST_F(ScanNormalizePredicate, test_eval_const_conjuncts3) { vectorized::VExprSPtr new_root; auto conjunct_expr_root = ctx; // There is a DCHECK in the code to ensure size must be equal to 1, wait for this part of the code to be removed later - // auto st = local_state->_normalize_predicate(conjunct_expr_root->root(), + // auto st = local_state->_normalize_predicate( // conjunct_expr_root.get(), new_root); // EXPECT_FALSE(st.ok()); // std::cout << st.msg() << std::endl; @@ -160,8 +157,7 @@ TEST_F(ScanNormalizePredicate, test_eval_const_conjuncts4) { vectorized::VExprSPtr new_root; auto conjunct_expr_root = ctx; - auto st = local_state->_normalize_predicate(conjunct_expr_root->root(), - conjunct_expr_root.get(), new_root); + auto st = local_state->_normalize_predicate(conjunct_expr_root.get(), new_root); EXPECT_TRUE(st.ok()); std::cout << st.msg() << std::endl; } @@ -181,6 +177,8 @@ TEST_F(ScanNormalizePredicate, test_is_predicate_acting_on_slot1) { ColumnValueRange range("mock", false, 0, 0); local_state->_slot_id_to_value_range[SlotId] = std::make_pair(&slot_desc, range); + local_state->_slot_id_to_predicates[SlotId] = std::vector>(); + op->_slot_id_to_slot_desc[SlotId] = local_state->_slot_id_to_value_range[SlotId].first; { auto slot_ref = std::make_shared(0, std::make_shared()); @@ -200,8 +198,7 @@ TEST_F(ScanNormalizePredicate, test_is_predicate_acting_on_slot1) { vectorized::VExprSPtr new_root; auto conjunct_expr_root = ctx; - auto st = local_state->_normalize_predicate(conjunct_expr_root->root(), - conjunct_expr_root.get(), new_root); + auto st = local_state->_normalize_predicate(conjunct_expr_root.get(), new_root); EXPECT_TRUE(st.ok()); std::cout << st.msg() << std::endl; } @@ -238,6 +235,8 @@ TEST_F(ScanNormalizePredicate, test_is_predicate_acting_on_slot2) { ColumnValueRange range("mock", false, 0, 0); local_state->_slot_id_to_value_range[SlotId] = std::make_pair(&slot_desc, range); + local_state->_slot_id_to_predicates[SlotId] = std::vector>(); + op->_slot_id_to_slot_desc[SlotId] = local_state->_slot_id_to_value_range[SlotId].first; { auto slot_ref = std::make_shared(0, std::make_shared()); @@ -257,8 +256,7 @@ TEST_F(ScanNormalizePredicate, test_is_predicate_acting_on_slot2) { vectorized::VExprSPtr new_root; auto conjunct_expr_root = ctx; - auto st = local_state->_normalize_predicate(conjunct_expr_root->root(), - conjunct_expr_root.get(), new_root); + auto st = local_state->_normalize_predicate(conjunct_expr_root.get(), new_root); EXPECT_TRUE(st.ok()); std::cout << st.msg() << std::endl; } @@ -281,8 +279,7 @@ TEST_F(ScanNormalizePredicate, test_is_predicate_acting_on_slot2) { vectorized::VExprSPtr new_root; auto conjunct_expr_root = ctx; - auto st = local_state->_normalize_predicate(conjunct_expr_root->root(), - conjunct_expr_root.get(), new_root); + auto st = local_state->_normalize_predicate(conjunct_expr_root.get(), new_root); EXPECT_TRUE(st.ok()); std::cout << st.msg() << std::endl; } @@ -318,6 +315,8 @@ TEST_F(ScanNormalizePredicate, test_is_predicate_acting_on_slot3) { ColumnValueRange range("mock", false, 0, 0); local_state->_slot_id_to_value_range[SlotId] = std::make_pair(&slot_desc, range); + local_state->_slot_id_to_predicates[SlotId] = std::vector>(); + op->_slot_id_to_slot_desc[SlotId] = local_state->_slot_id_to_value_range[SlotId].first; local_state->_scan_dependency = Dependency::create_shared(0, 0, "DEPENDENCY"); @@ -341,8 +340,7 @@ TEST_F(ScanNormalizePredicate, test_is_predicate_acting_on_slot3) { vectorized::VExprSPtr new_root; auto conjunct_expr_root = ctx; - EXPECT_TRUE(local_state->_normalize_predicate(conjunct_expr_root->root(), - conjunct_expr_root.get(), new_root)); + EXPECT_TRUE(local_state->_normalize_predicate(conjunct_expr_root.get(), new_root)); } EXPECT_TRUE(local_state->_scan_dependency->ready()); EXPECT_TRUE(local_state->_eos); @@ -362,6 +360,8 @@ TEST_F(ScanNormalizePredicate, test_is_predicate_acting_on_slot4) { ColumnValueRange range("mock", false, 0, 0); local_state->_slot_id_to_value_range[SlotId] = std::make_pair(&slot_desc, range); + local_state->_slot_id_to_predicates[SlotId] = std::vector>(); + op->_slot_id_to_slot_desc[SlotId] = local_state->_slot_id_to_value_range[SlotId].first; { auto slot_ref = std::make_shared(0, std::make_shared()); @@ -378,8 +378,7 @@ TEST_F(ScanNormalizePredicate, test_is_predicate_acting_on_slot4) { vectorized::VExprSPtr new_root; auto conjunct_expr_root = ctx; - EXPECT_TRUE(local_state->_normalize_predicate(conjunct_expr_root->root(), - conjunct_expr_root.get(), new_root)); + EXPECT_TRUE(local_state->_normalize_predicate(conjunct_expr_root.get(), new_root)); } EXPECT_TRUE(local_state->_slot_id_to_value_range.contains(SlotId)); @@ -418,6 +417,8 @@ TEST_F(ScanNormalizePredicate, test_is_predicate_acting_on_slot5) { ColumnValueRange range("mock", false, 0, 0); local_state->_slot_id_to_value_range[SlotId] = std::make_pair(&slot_desc, range); + local_state->_slot_id_to_predicates[SlotId] = std::vector>(); + op->_slot_id_to_slot_desc[SlotId] = local_state->_slot_id_to_value_range[SlotId].first; { auto slot_ref = std::make_shared(0, std::make_shared()); @@ -434,8 +435,7 @@ TEST_F(ScanNormalizePredicate, test_is_predicate_acting_on_slot5) { vectorized::VExprSPtr new_root; auto conjunct_expr_root = ctx; - EXPECT_TRUE(local_state->_normalize_predicate(conjunct_expr_root->root(), - conjunct_expr_root.get(), new_root)); + EXPECT_TRUE(local_state->_normalize_predicate(conjunct_expr_root.get(), new_root)); } EXPECT_TRUE(local_state->_slot_id_to_value_range.contains(SlotId)); @@ -474,6 +474,8 @@ TEST_F(ScanNormalizePredicate, test_is_predicate_acting_on_slot6) { ColumnValueRange range("mock", false, 0, 0); local_state->_slot_id_to_value_range[SlotId] = std::make_pair(&slot_desc, range); + local_state->_slot_id_to_predicates[SlotId] = std::vector>(); + op->_slot_id_to_slot_desc[SlotId] = local_state->_slot_id_to_value_range[SlotId].first; { auto slot_ref = std::make_shared(0, std::make_shared()); @@ -496,8 +498,7 @@ TEST_F(ScanNormalizePredicate, test_is_predicate_acting_on_slot6) { vectorized::VExprSPtr new_root; auto conjunct_expr_root = ctx; - EXPECT_TRUE(local_state->_normalize_predicate(conjunct_expr_root->root(), - conjunct_expr_root.get(), new_root)); + EXPECT_TRUE(local_state->_normalize_predicate(conjunct_expr_root.get(), new_root)); } EXPECT_TRUE(local_state->_slot_id_to_value_range.contains(SlotId)); @@ -530,6 +531,8 @@ TEST_F(ScanNormalizePredicate, test_is_predicate_acting_on_slot7) { ColumnValueRange range("mock", false, 0, 0); local_state->_slot_id_to_value_range[SlotId] = std::make_pair(&slot_desc, range); + local_state->_slot_id_to_predicates[SlotId] = std::vector>(); + op->_slot_id_to_slot_desc[SlotId] = local_state->_slot_id_to_value_range[SlotId].first; { auto slot_ref = std::make_shared(0, std::make_shared()); @@ -558,8 +561,7 @@ TEST_F(ScanNormalizePredicate, test_is_predicate_acting_on_slot7) { vectorized::VExprSPtr new_root; auto conjunct_expr_root = ctx; - EXPECT_TRUE(local_state->_normalize_predicate(conjunct_expr_root->root(), - conjunct_expr_root.get(), new_root)); + EXPECT_TRUE(local_state->_normalize_predicate(conjunct_expr_root.get(), new_root)); } EXPECT_TRUE(local_state->_scan_dependency->ready()); @@ -586,6 +588,8 @@ TEST_F(ScanNormalizePredicate, test_is_predicate_acting_on_slot8) { EXPECT_TRUE(range.add_fixed_value(1000)); local_state->_slot_id_to_value_range[SlotId] = std::make_pair(&slot_desc, range); + local_state->_slot_id_to_predicates[SlotId] = std::vector>(); + op->_slot_id_to_slot_desc[SlotId] = local_state->_slot_id_to_value_range[SlotId].first; { auto slot_ref = std::make_shared(0, std::make_shared()); @@ -602,8 +606,7 @@ TEST_F(ScanNormalizePredicate, test_is_predicate_acting_on_slot8) { vectorized::VExprSPtr new_root; auto conjunct_expr_root = ctx; - EXPECT_TRUE(local_state->_normalize_predicate(conjunct_expr_root->root(), - conjunct_expr_root.get(), new_root)); + EXPECT_TRUE(local_state->_normalize_predicate(conjunct_expr_root.get(), new_root)); } auto& output_range = local_state->_slot_id_to_value_range[SlotId].second; @@ -637,6 +640,8 @@ TEST_F(ScanNormalizePredicate, test_is_predicate_acting_on_slot10) { ColumnValueRange range("mock", false, 0, 0); local_state->_slot_id_to_value_range[SlotId] = std::make_pair(&slot_desc, range); + local_state->_slot_id_to_predicates[SlotId] = std::vector>(); + op->_slot_id_to_slot_desc[SlotId] = local_state->_slot_id_to_value_range[SlotId].first; { auto slot_ref = std::make_shared(0, std::make_shared()); @@ -653,27 +658,8 @@ TEST_F(ScanNormalizePredicate, test_is_predicate_acting_on_slot10) { vectorized::VExprSPtr new_root; auto conjunct_expr_root = ctx; - EXPECT_TRUE(local_state->_normalize_predicate(conjunct_expr_root->root(), - conjunct_expr_root.get(), new_root)); + EXPECT_TRUE(local_state->_normalize_predicate(conjunct_expr_root.get(), new_root)); } - - auto& output_range = local_state->_not_in_value_ranges.front(); - std::visit( - [](auto&& arg) { - using T = std::decay_t; - if constexpr (std::is_same_v>) { - EXPECT_EQ(arg._fixed_values.size(), 3); - auto it = arg._fixed_values.begin(); - EXPECT_EQ(*it, 1); - ++it; - EXPECT_EQ(*it, 10); - ++it; - EXPECT_EQ(*it, 100); - } else { - FAIL() << "unexpected type"; - } - }, - output_range); } TEST_F(ScanNormalizePredicate, test_is_predicate_acting_on_slot11) { @@ -692,6 +678,8 @@ TEST_F(ScanNormalizePredicate, test_is_predicate_acting_on_slot11) { ColumnValueRange range("mock", false, 0, 0); local_state->_slot_id_to_value_range[SlotId] = std::make_pair(&slot_desc, range); + local_state->_slot_id_to_predicates[SlotId] = std::vector>(); + op->_slot_id_to_slot_desc[SlotId] = local_state->_slot_id_to_value_range[SlotId].first; { auto slot_ref = std::make_shared(0, std::make_shared()); @@ -711,23 +699,8 @@ TEST_F(ScanNormalizePredicate, test_is_predicate_acting_on_slot11) { vectorized::VExprSPtr new_root; auto conjunct_expr_root = ctx; - EXPECT_TRUE(local_state->_normalize_predicate(conjunct_expr_root->root(), - conjunct_expr_root.get(), new_root)); + EXPECT_TRUE(local_state->_normalize_predicate(conjunct_expr_root.get(), new_root)); } - - auto& output_range = local_state->_not_in_value_ranges.front(); - std::visit( - [](auto&& arg) { - using T = std::decay_t; - if constexpr (std::is_same_v>) { - EXPECT_EQ(arg._fixed_values.size(), 1); - auto it = arg._fixed_values.begin(); - EXPECT_EQ(*it, 100); - } else { - FAIL() << "unexpected type"; - } - }, - output_range); } TEST_F(ScanNormalizePredicate, test_is_predicate_acting_on_slot12) { @@ -749,6 +722,8 @@ TEST_F(ScanNormalizePredicate, test_is_predicate_acting_on_slot12) { EXPECT_TRUE(range.add_fixed_value(100)); local_state->_slot_id_to_value_range[SlotId] = std::make_pair(&slot_desc, range); + local_state->_slot_id_to_predicates[SlotId] = std::vector>(); + op->_slot_id_to_slot_desc[SlotId] = local_state->_slot_id_to_value_range[SlotId].first; { auto slot_ref = std::make_shared(0, std::make_shared()); @@ -768,8 +743,7 @@ TEST_F(ScanNormalizePredicate, test_is_predicate_acting_on_slot12) { vectorized::VExprSPtr new_root; auto conjunct_expr_root = ctx; - EXPECT_TRUE(local_state->_normalize_predicate(conjunct_expr_root->root(), - conjunct_expr_root.get(), new_root)); + EXPECT_TRUE(local_state->_normalize_predicate(conjunct_expr_root.get(), new_root)); } auto& output_range = local_state->_slot_id_to_value_range[SlotId].second; @@ -804,6 +778,8 @@ TEST_F(ScanNormalizePredicate, test_is_predicate_acting_on_slot13) { EXPECT_TRUE(range.add_fixed_value(100)); local_state->_slot_id_to_value_range[SlotId] = std::make_pair(&slot_desc, range); + local_state->_slot_id_to_predicates[SlotId] = std::vector>(); + op->_slot_id_to_slot_desc[SlotId] = local_state->_slot_id_to_value_range[SlotId].first; { auto slot_ref = std::make_shared(0, std::make_shared()); @@ -823,8 +799,7 @@ TEST_F(ScanNormalizePredicate, test_is_predicate_acting_on_slot13) { vectorized::VExprSPtr new_root; auto conjunct_expr_root = ctx; - EXPECT_TRUE(local_state->_normalize_predicate(conjunct_expr_root->root(), - conjunct_expr_root.get(), new_root)); + EXPECT_TRUE(local_state->_normalize_predicate(conjunct_expr_root.get(), new_root)); } auto& output_range = local_state->_slot_id_to_value_range[SlotId].second; @@ -859,6 +834,8 @@ TEST_F(ScanNormalizePredicate, test_is_predicate_acting_on_slot14) { EXPECT_TRUE(range.add_fixed_value(100)); local_state->_slot_id_to_value_range[SlotId] = std::make_pair(&slot_desc, range); + local_state->_slot_id_to_predicates[SlotId] = std::vector>(); + op->_slot_id_to_slot_desc[SlotId] = local_state->_slot_id_to_value_range[SlotId].first; { auto slot_ref = std::make_shared(0, std::make_shared()); @@ -878,8 +855,7 @@ TEST_F(ScanNormalizePredicate, test_is_predicate_acting_on_slot14) { vectorized::VExprSPtr new_root; auto conjunct_expr_root = ctx; - EXPECT_TRUE(local_state->_normalize_predicate(conjunct_expr_root->root(), - conjunct_expr_root.get(), new_root)); + EXPECT_TRUE(local_state->_normalize_predicate(conjunct_expr_root.get(), new_root)); } auto& output_range = local_state->_slot_id_to_value_range[SlotId].second; @@ -918,6 +894,8 @@ TEST_F(ScanNormalizePredicate, test_is_predicate_acting_on_slot15) { EXPECT_TRUE(range.add_fixed_value(100)); local_state->_slot_id_to_value_range[SlotId] = std::make_pair(&slot_desc, range); + local_state->_slot_id_to_predicates[SlotId] = std::vector>(); + op->_slot_id_to_slot_desc[SlotId] = local_state->_slot_id_to_value_range[SlotId].first; { auto slot_ref = std::make_shared(0, std::make_shared()); @@ -937,8 +915,7 @@ TEST_F(ScanNormalizePredicate, test_is_predicate_acting_on_slot15) { vectorized::VExprSPtr new_root; auto conjunct_expr_root = ctx; - EXPECT_TRUE(local_state->_normalize_predicate(conjunct_expr_root->root(), - conjunct_expr_root.get(), new_root)); + EXPECT_TRUE(local_state->_normalize_predicate(conjunct_expr_root.get(), new_root)); } auto& output_range = local_state->_slot_id_to_value_range[SlotId].second; @@ -982,6 +959,9 @@ TEST_F(ScanNormalizePredicate, test_double_predicate) { auto local_state = std::make_shared(state.get(), op.get()); ColumnValueRange range("mock", false, 0, 0); local_state->_slot_id_to_value_range[SlotId] = std::make_pair(&slot_desc, range); + local_state->_slot_id_to_predicates[SlotId] = + std::vector>(); + op->_slot_id_to_slot_desc[SlotId] = local_state->_slot_id_to_value_range[SlotId].first; auto slot_ref = std::make_shared(0, std::make_shared()); auto fn_eq = MockFnCall::create("eq"); @@ -1000,8 +980,7 @@ TEST_F(ScanNormalizePredicate, test_double_predicate) { vectorized::VExprSPtr new_root; auto conjunct_expr_root = ctx; - auto st = local_state->_normalize_predicate(conjunct_expr_root->root(), - conjunct_expr_root.get(), new_root); + auto st = local_state->_normalize_predicate(conjunct_expr_root.get(), new_root); EXPECT_TRUE(st.ok()); EXPECT_EQ(new_root, nullptr); @@ -1025,6 +1004,9 @@ TEST_F(ScanNormalizePredicate, test_double_predicate) { auto local_state = std::make_shared(state.get(), op.get()); ColumnValueRange range("mock", false, 0, 0); local_state->_slot_id_to_value_range[SlotId] = std::make_pair(&slot_desc, range); + local_state->_slot_id_to_predicates[SlotId] = + std::vector>(); + op->_slot_id_to_slot_desc[SlotId] = local_state->_slot_id_to_value_range[SlotId].first; auto slot_ref = std::make_shared(0, std::make_shared()); auto ctx = MockInExpr::create_with_ctx( ColumnHelper::create_column(test_values)); @@ -1039,8 +1021,7 @@ TEST_F(ScanNormalizePredicate, test_double_predicate) { vectorized::VExprSPtr new_root; auto conjunct_expr_root = ctx; - auto st = local_state->_normalize_predicate(conjunct_expr_root->root(), - conjunct_expr_root.get(), new_root); + auto st = local_state->_normalize_predicate(conjunct_expr_root.get(), new_root); EXPECT_TRUE(st.ok()); EXPECT_EQ(new_root, nullptr); EXPECT_TRUE(local_state->_slot_id_to_value_range.contains(SlotId)); @@ -1062,6 +1043,9 @@ TEST_F(ScanNormalizePredicate, test_double_predicate) { auto local_state = std::make_shared(state.get(), op.get()); ColumnValueRange range("mock", false, 0, 0); local_state->_slot_id_to_value_range[SlotId] = std::make_pair(&slot_desc, range); + local_state->_slot_id_to_predicates[SlotId] = + std::vector>(); + op->_slot_id_to_slot_desc[SlotId] = local_state->_slot_id_to_value_range[SlotId].first; auto slot_ref = std::make_shared(0, std::make_shared()); auto fn_eq = MockFnCall::create("ne"); auto const_val = std::make_shared( @@ -1079,30 +1063,18 @@ TEST_F(ScanNormalizePredicate, test_double_predicate) { vectorized::VExprSPtr new_root; auto conjunct_expr_root = ctx; - EXPECT_TRUE(local_state->_normalize_predicate(conjunct_expr_root->root(), - conjunct_expr_root.get(), new_root)); + EXPECT_TRUE(local_state->_normalize_predicate(conjunct_expr_root.get(), new_root)); EXPECT_EQ(new_root, nullptr); EXPECT_TRUE(local_state->_slot_id_to_value_range.contains(SlotId)); - - auto& output_range = local_state->_not_in_value_ranges.front(); - std::visit( - [&](auto&& arg) { - using T = std::decay_t; - if constexpr (std::is_same_v>) { - EXPECT_EQ(arg._fixed_values.size(), 1); - auto it = arg._fixed_values.begin(); - EXPECT_TRUE(Compare::equal(*it, const_v)); - } else { - FAIL() << "unexpected type"; - } - }, - output_range); } // test not in { auto local_state = std::make_shared(state.get(), op.get()); ColumnValueRange range("mock", false, 0, 0); local_state->_slot_id_to_value_range[SlotId] = std::make_pair(&slot_desc, range); + local_state->_slot_id_to_predicates[SlotId] = + std::vector>(); + op->_slot_id_to_slot_desc[SlotId] = local_state->_slot_id_to_value_range[SlotId].first; auto slot_ref = std::make_shared(0, std::make_shared()); auto ctx = MockInExpr::create_with_ctx( ColumnHelper::create_column(test_values), true); @@ -1117,23 +1089,10 @@ TEST_F(ScanNormalizePredicate, test_double_predicate) { vectorized::VExprSPtr new_root; auto conjunct_expr_root = ctx; - auto st = local_state->_normalize_predicate(conjunct_expr_root->root(), - conjunct_expr_root.get(), new_root); + auto st = local_state->_normalize_predicate(conjunct_expr_root.get(), new_root); EXPECT_TRUE(st.ok()); EXPECT_EQ(new_root, nullptr); EXPECT_TRUE(local_state->_slot_id_to_value_range.contains(SlotId)); - - auto& output_range = local_state->_not_in_value_ranges.front(); - std::visit( - [&](auto&& arg) { - using T = std::decay_t; - if constexpr (std::is_same_v>) { - EXPECT_EQ(arg._fixed_values.size(), test_values.size()); - } else { - FAIL() << "unexpected type"; - } - }, - output_range); } // test is null { @@ -1155,8 +1114,7 @@ TEST_F(ScanNormalizePredicate, test_double_predicate) { vectorized::VExprSPtr new_root; auto conjunct_expr_root = ctx; - EXPECT_TRUE(local_state->_normalize_predicate(conjunct_expr_root->root(), - conjunct_expr_root.get(), new_root)); + EXPECT_TRUE(local_state->_normalize_predicate(conjunct_expr_root.get(), new_root)); auto& output_range = local_state->_slot_id_to_value_range[SlotId].second; std::visit( [](auto&& arg) { @@ -1172,46 +1130,48 @@ TEST_F(ScanNormalizePredicate, test_double_predicate) { output_range); } // test is not null - { - auto local_state = std::make_shared(state.get(), op.get()); - ColumnValueRange range("mock", true, 0, 0); - local_state->_slot_id_to_value_range[SlotId] = std::make_pair(&nullable_slot_desc, range); - auto slot_ref = std::make_shared( - 0, std::make_shared(std::make_shared())); - auto fn_eq = MockFnCall::create("is_not_null_pred"); - - fn_eq->add_child(slot_ref); - fn_eq->_node_type = TExprNodeType::FUNCTION_CALL; - slot_ref->_slot_id = SlotId; - EXPECT_FALSE(fn_eq->is_constant()); - - auto ctx = VExprContext::create_shared(fn_eq); - ctx->_prepared = true; - ctx->_opened = true; - - vectorized::VExprSPtr new_root; - auto conjunct_expr_root = ctx; - EXPECT_TRUE(local_state->_normalize_predicate(conjunct_expr_root->root(), - conjunct_expr_root.get(), new_root)); - auto& output_range = local_state->_slot_id_to_value_range[SlotId].second; - std::visit( - [](auto&& arg) { - using T = std::decay_t; - if constexpr (std::is_same_v>) { - EXPECT_FALSE(arg.is_fixed_value_range()); - EXPECT_FALSE(arg.contain_null()); - } else { - FAIL() << "unexpected type"; - } - }, - output_range); - } + // { + // auto local_state = std::make_shared(state.get(), op.get()); + // ColumnValueRange range("mock", true, 0, 0); + // local_state->_slot_id_to_value_range[SlotId] = std::make_pair(&nullable_slot_desc, range); + // auto slot_ref = std::make_shared( + // 0, std::make_shared(std::make_shared())); + // auto fn_eq = MockFnCall::create("is_not_null_pred"); + // + // fn_eq->add_child(slot_ref); + // fn_eq->_node_type = TExprNodeType::FUNCTION_CALL; + // slot_ref->_slot_id = SlotId; + // EXPECT_FALSE(fn_eq->is_constant()); + // + // auto ctx = VExprContext::create_shared(fn_eq); + // ctx->_prepared = true; + // ctx->_opened = true; + // + // vectorized::VExprSPtr new_root; + // auto conjunct_expr_root = ctx; + // EXPECT_TRUE(local_state->_normalize_predicate(conjunct_expr_root.get(), new_root)); + // auto& output_range = local_state->_slot_id_to_value_range[SlotId].second; + // std::visit( + // [](auto&& arg) { + // using T = std::decay_t; + // if constexpr (std::is_same_v>) { + // EXPECT_FALSE(arg.is_fixed_value_range()); + // EXPECT_FALSE(arg.contain_null()); + // } else { + // FAIL() << "unexpected type"; + // } + // }, + // output_range); + // } // test less for (auto const_v : test_values) { // std::cout << "test less const_v=" << const_v << std::endl; auto local_state = std::make_shared(state.get(), op.get()); ColumnValueRange range("mock", false, 0, 0); local_state->_slot_id_to_value_range[SlotId] = std::make_pair(&slot_desc, range); + local_state->_slot_id_to_predicates[SlotId] = + std::vector>(); + op->_slot_id_to_slot_desc[SlotId] = local_state->_slot_id_to_value_range[SlotId].first; auto slot_ref = std::make_shared(0, std::make_shared()); auto fn_eq = MockFnCall::create("lt"); @@ -1230,8 +1190,7 @@ TEST_F(ScanNormalizePredicate, test_double_predicate) { vectorized::VExprSPtr new_root; auto conjunct_expr_root = ctx; - auto st = local_state->_normalize_predicate(conjunct_expr_root->root(), - conjunct_expr_root.get(), new_root); + auto st = local_state->_normalize_predicate(conjunct_expr_root.get(), new_root); EXPECT_TRUE(st.ok()); EXPECT_EQ(new_root, nullptr); @@ -1269,6 +1228,9 @@ TEST_F(ScanNormalizePredicate, test_double_predicate) { auto local_state = std::make_shared(state.get(), op.get()); ColumnValueRange range("mock", false, 0, 0); local_state->_slot_id_to_value_range[SlotId] = std::make_pair(&slot_desc, range); + local_state->_slot_id_to_predicates[SlotId] = + std::vector>(); + op->_slot_id_to_slot_desc[SlotId] = local_state->_slot_id_to_value_range[SlotId].first; auto slot_ref = std::make_shared(0, std::make_shared()); auto fn_eq = MockFnCall::create("le"); @@ -1287,8 +1249,7 @@ TEST_F(ScanNormalizePredicate, test_double_predicate) { vectorized::VExprSPtr new_root; auto conjunct_expr_root = ctx; - auto st = local_state->_normalize_predicate(conjunct_expr_root->root(), - conjunct_expr_root.get(), new_root); + auto st = local_state->_normalize_predicate(conjunct_expr_root.get(), new_root); EXPECT_TRUE(st.ok()); EXPECT_EQ(new_root, nullptr); @@ -1323,6 +1284,9 @@ TEST_F(ScanNormalizePredicate, test_double_predicate) { auto local_state = std::make_shared(state.get(), op.get()); ColumnValueRange range("mock", false, 0, 0); local_state->_slot_id_to_value_range[SlotId] = std::make_pair(&slot_desc, range); + local_state->_slot_id_to_predicates[SlotId] = + std::vector>(); + op->_slot_id_to_slot_desc[SlotId] = local_state->_slot_id_to_value_range[SlotId].first; auto slot_ref = std::make_shared(0, std::make_shared()); auto fn_eq = MockFnCall::create("gt"); @@ -1341,8 +1305,7 @@ TEST_F(ScanNormalizePredicate, test_double_predicate) { vectorized::VExprSPtr new_root; auto conjunct_expr_root = ctx; - auto st = local_state->_normalize_predicate(conjunct_expr_root->root(), - conjunct_expr_root.get(), new_root); + auto st = local_state->_normalize_predicate(conjunct_expr_root.get(), new_root); EXPECT_TRUE(st.ok()); EXPECT_EQ(new_root, nullptr); @@ -1380,6 +1343,9 @@ TEST_F(ScanNormalizePredicate, test_double_predicate) { auto local_state = std::make_shared(state.get(), op.get()); ColumnValueRange range("mock", false, 0, 0); local_state->_slot_id_to_value_range[SlotId] = std::make_pair(&slot_desc, range); + local_state->_slot_id_to_predicates[SlotId] = + std::vector>(); + op->_slot_id_to_slot_desc[SlotId] = local_state->_slot_id_to_value_range[SlotId].first; auto slot_ref = std::make_shared(0, std::make_shared()); auto fn_eq = MockFnCall::create("ge"); @@ -1398,8 +1364,7 @@ TEST_F(ScanNormalizePredicate, test_double_predicate) { vectorized::VExprSPtr new_root; auto conjunct_expr_root = ctx; - auto st = local_state->_normalize_predicate(conjunct_expr_root->root(), - conjunct_expr_root.get(), new_root); + auto st = local_state->_normalize_predicate(conjunct_expr_root.get(), new_root); EXPECT_TRUE(st.ok()); EXPECT_EQ(new_root, nullptr); diff --git a/be/test/testutil/mock/mock_in_expr.h b/be/test/testutil/mock/mock_in_expr.h index 7f31d99c6cdaaf..8542cff046ee0e 100644 --- a/be/test/testutil/mock/mock_in_expr.h +++ b/be/test/testutil/mock/mock_in_expr.h @@ -30,7 +30,7 @@ class VExprContext; // use to mock a slot ref expr class MockInExpr final : public VInPredicate { public: - MockInExpr() = default; + MockInExpr() { _node_type = TExprNodeType::IN_PRED; } Status execute(VExprContext* context, Block* block, int* result_column_id) const override { return Status::OK(); diff --git a/be/test/vec/exec/format/parquet/parquet_expr_test.cpp b/be/test/vec/exec/format/parquet/parquet_expr_test.cpp index b5291a52c7093f..a4bed91ae3f76f 100644 --- a/be/test/vec/exec/format/parquet/parquet_expr_test.cpp +++ b/be/test/vec/exec/format/parquet/parquet_expr_test.cpp @@ -1250,8 +1250,8 @@ TEST_F(ParquetExprTest, test_expr_push_down_and) { ASSERT_TRUE(p_reader->check_expr_can_push_down(and_expr)); p_reader->_enable_filter_by_min_max = true; - std::map>> push_down_simple_predicates; - push_down_simple_predicates.emplace(2, std::vector> {}); + std::map>> push_down_simple_predicates; + push_down_simple_predicates.emplace(2, std::vector> {}); p_reader->_push_down_predicates.push_back(AndBlockColumnPredicate::create_unique()); ASSERT_TRUE(p_reader->convert_predicates({and_expr}, push_down_simple_predicates[2], p_reader->_push_down_predicates.back(), @@ -1746,8 +1746,7 @@ TEST_F(ParquetExprTest, test_in_list_predicate_uses_bloom_filter) { set->insert(&v); } - InListPredicateBase> - in_pred(col_idx, set); + InListPredicateBase in_pred(col_idx, set, false); ParquetPredicate::ColumnStat stat; stat.ctz = &ctz; @@ -1800,8 +1799,7 @@ TEST_F(ParquetExprTest, test_in_list_predicate_no_loader_on_range_miss) { set->insert(&v); } - InListPredicateBase> - in_pred(col_idx, set); + InListPredicateBase in_pred(col_idx, set, false); ParquetPredicate::ColumnStat stat; stat.ctz = &ctz;