diff --git a/be/src/vec/aggregate_functions/aggregate_function.h b/be/src/vec/aggregate_functions/aggregate_function.h index 12d629b42c89f8..05f1bd2a602c68 100644 --- a/be/src/vec/aggregate_functions/aggregate_function.h +++ b/be/src/vec/aggregate_functions/aggregate_function.h @@ -170,9 +170,6 @@ class IAggregateFunction { virtual void deserialize_and_merge_from_column(AggregateDataPtr __restrict place, const IColumn& column, Arena* arena) const = 0; - /// Returns true if a function requires Arena to handle own states (see add(), merge(), deserialize()). - virtual bool allocates_memory_in_arena() const { return false; } - /// Inserts results into a column. virtual void insert_result_into(ConstAggregateDataPtr __restrict place, IColumn& to) const = 0; diff --git a/be/src/vec/aggregate_functions/aggregate_function_binary.h b/be/src/vec/aggregate_functions/aggregate_function_binary.h index a5b6e2b1e0e316..9fba9d11a1013a 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_binary.h +++ b/be/src/vec/aggregate_functions/aggregate_function_binary.h @@ -62,12 +62,12 @@ struct AggregateFunctionBinary String get_name() const override { return StatFunc::Data::name(); } + void reset(AggregateDataPtr __restrict place) const override { this->data(place).reset(); } + DataTypePtr get_return_type() const override { return std::make_shared>(); } - bool allocates_memory_in_arena() const override { return false; } - void add(AggregateDataPtr __restrict place, const IColumn** columns, ssize_t row_num, Arena*) const override { this->data(place).add( diff --git a/be/src/vec/aggregate_functions/aggregate_function_collect.h b/be/src/vec/aggregate_functions/aggregate_function_collect.h index 5a1d2ba19530f7..68de426ea1fdcf 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_collect.h +++ b/be/src/vec/aggregate_functions/aggregate_function_collect.h @@ -626,8 +626,6 @@ class AggregateFunctionCollect return std::make_shared(make_nullable(return_type)); } - bool allocates_memory_in_arena() const override { return ENABLE_ARENA; } - void add(AggregateDataPtr __restrict place, const IColumn** columns, ssize_t row_num, Arena* arena) const override { auto& data = this->data(place); diff --git a/be/src/vec/aggregate_functions/aggregate_function_corr.cpp b/be/src/vec/aggregate_functions/aggregate_function_corr.cpp index 8237f588298064..a454afb45f22e0 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_corr.cpp +++ b/be/src/vec/aggregate_functions/aggregate_function_corr.cpp @@ -76,6 +76,15 @@ struct CorrMoment { } static String name() { return "corr"; } + + void reset() { + m0 = {}; + x1 = {}; + y1 = {}; + xy = {}; + x2 = {}; + y2 = {}; + } }; AggregateFunctionPtr create_aggregate_corr_function(const std::string& name, diff --git a/be/src/vec/aggregate_functions/aggregate_function_distinct.h b/be/src/vec/aggregate_functions/aggregate_function_distinct.h index 6193b28a131e9f..ec6936a128c869 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_distinct.h +++ b/be/src/vec/aggregate_functions/aggregate_function_distinct.h @@ -338,8 +338,6 @@ class AggregateFunctionDistinct DataTypePtr get_return_type() const override { return nested_func->get_return_type(); } - bool allocates_memory_in_arena() const override { return true; } - AggregateFunctionPtr transmit_to_stable() override { return AggregateFunctionPtr(new AggregateFunctionDistinct( nested_func, IAggregateFunction::argument_types)); diff --git a/be/src/vec/aggregate_functions/aggregate_function_foreach.h b/be/src/vec/aggregate_functions/aggregate_function_foreach.h index 4261ef24343b95..7f746e53daac70 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_foreach.h +++ b/be/src/vec/aggregate_functions/aggregate_function_foreach.h @@ -219,10 +219,6 @@ class AggregateFunctionForEach : public IAggregateFunctionDataHelperallocates_memory_in_arena(); - } - void add(AggregateDataPtr __restrict place, const IColumn** columns, ssize_t row_num, Arena* arena) const override { std::vector nested(num_arguments); diff --git a/be/src/vec/aggregate_functions/aggregate_function_group_array_intersect.h b/be/src/vec/aggregate_functions/aggregate_function_group_array_intersect.h index d8cf91865f1ed2..94b34caff78645 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_group_array_intersect.h +++ b/be/src/vec/aggregate_functions/aggregate_function_group_array_intersect.h @@ -93,6 +93,11 @@ struct AggregateFunctionGroupArrayIntersectData { Set value; bool init = false; + void reset() { + init = false; + value = std::make_unique(); + } + void process_col_data(auto& column_data, size_t offset, size_t arr_size, bool& init, Set& set) { const bool is_column_data_nullable = column_data.is_nullable(); @@ -163,7 +168,7 @@ class AggregateFunctionGroupArrayIntersect DataTypePtr get_return_type() const override { return argument_type; } - bool allocates_memory_in_arena() const override { return false; } + void reset(AggregateDataPtr __restrict place) const override { this->data(place).reset(); } void add(AggregateDataPtr __restrict place, const IColumn** columns, ssize_t row_num, Arena*) const override { @@ -331,6 +336,11 @@ struct AggregateFunctionGroupArrayIntersectGenericData { : value(std::make_unique()) {} Set value; bool init = false; + + void reset() { + init = false; + value = std::make_unique(); + } }; /** Template parameter with true value should be used for columns that store their elements in memory continuously. @@ -357,7 +367,7 @@ class AggregateFunctionGroupArrayIntersectGeneric DataTypePtr get_return_type() const override { return input_data_type; } - bool allocates_memory_in_arena() const override { return true; } + void reset(AggregateDataPtr __restrict place) const override { this->data(place).reset(); } void add(AggregateDataPtr __restrict place, const IColumn** columns, ssize_t row_num, Arena* arena) const override { diff --git a/be/src/vec/aggregate_functions/aggregate_function_null.h b/be/src/vec/aggregate_functions/aggregate_function_null.h index 9888dd1da1c728..014a3e9c603b88 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_null.h +++ b/be/src/vec/aggregate_functions/aggregate_function_null.h @@ -177,10 +177,6 @@ class AggregateFunctionNullBaseInline : public IAggregateFunctionHelper nested_function->insert_result_into(nested_place(place), to); } } - - bool allocates_memory_in_arena() const override { - return nested_function->allocates_memory_in_arena(); - } }; /** There are two cases: for single argument and variadic. @@ -329,10 +325,6 @@ class AggregateFunctionNullVariadicInline final arena); } - bool allocates_memory_in_arena() const override { - return this->nested_function->allocates_memory_in_arena(); - } - private: // The array length is fixed in the implementation of some aggregate functions. // Therefore we choose 256 as the appropriate maximum length limit. diff --git a/be/src/vec/aggregate_functions/aggregate_function_orthogonal_bitmap.h b/be/src/vec/aggregate_functions/aggregate_function_orthogonal_bitmap.h index deb53241abb92f..a81bdcddaa3310 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_orthogonal_bitmap.h +++ b/be/src/vec/aggregate_functions/aggregate_function_orthogonal_bitmap.h @@ -52,6 +52,11 @@ struct AggOrthBitmapBaseData { public: using ColVecData = std::conditional_t, ColumnVector, ColumnString>; + void reset() { + bitmap = {}; + first_init = true; + } + void add(const IColumn** columns, size_t row_num) { const auto& bitmap_col = assert_cast(*columns[0]); @@ -99,6 +104,11 @@ struct AggOrthBitMapIntersect : public AggOrthBitmapBaseData { static DataTypePtr get_return_type() { return std::make_shared(); } + void reset() { + AggOrthBitmapBaseData::reset(); + result.reset(); + } + void merge(const AggOrthBitMapIntersect& rhs) { if (rhs.first_init) { return; @@ -120,7 +130,8 @@ struct AggOrthBitMapIntersect : public AggOrthBitmapBaseData { void get(IColumn& to) const { auto& column = assert_cast(to); - column.get_data().emplace_back(result); + column.get_data().emplace_back(result.empty() ? AggOrthBitmapBaseData::bitmap.intersect() + : result); } private: @@ -170,6 +181,11 @@ struct AggOrthBitMapIntersectCount : public AggOrthBitmapBaseData { static DataTypePtr get_return_type() { return std::make_shared(); } + void reset() { + AggOrthBitmapBaseData::reset(); + result = 0; + } + void merge(const AggOrthBitMapIntersectCount& rhs) { if (rhs.first_init) { return; @@ -225,6 +241,11 @@ struct AggOrthBitmapExprCalBaseData { } } + void reset() { + bitmap_expr_cal = {}; + first_init = true; + } + protected: doris::BitmapExprCalculation bitmap_expr_cal; bool first_init = true; @@ -263,6 +284,11 @@ struct AggOrthBitMapExprCal : public AggOrthBitmapExprCalBaseData { ->bitmap_expr_cal.bitmap_calculate()); } + void reset() { + AggOrthBitmapExprCalBaseData::reset(); + result.reset(); + } + private: BitmapValue result; }; @@ -299,6 +325,11 @@ struct AggOrthBitMapExprCalCount : public AggOrthBitmapExprCalBaseData { ->bitmap_expr_cal.bitmap_calculate_count()); } + void reset() { + AggOrthBitmapExprCalBaseData::reset(); + result = 0; + } + private: int64_t result = 0; }; @@ -330,6 +361,11 @@ struct OrthBitmapUnionCountData { column.get_data().emplace_back(result ? result : value.cardinality()); } + void reset() { + value.reset(); + result = 0; + } + private: BitmapValue value; int64_t result = 0; @@ -347,6 +383,8 @@ class AggFunctionOrthBitmapFunc final DataTypePtr get_return_type() const override { return Impl::get_return_type(); } + void reset(AggregateDataPtr __restrict place) const override { this->data(place).reset(); } + void add(AggregateDataPtr __restrict place, const IColumn** columns, ssize_t row_num, Arena*) const override { this->data(place).init_add_key(columns, row_num, _argument_size); diff --git a/be/src/vec/aggregate_functions/aggregate_function_uniq.h b/be/src/vec/aggregate_functions/aggregate_function_uniq.h index 356e0ead2d3d56..e97923a08e6a2d 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_uniq.h +++ b/be/src/vec/aggregate_functions/aggregate_function_uniq.h @@ -76,6 +76,8 @@ struct AggregateFunctionUniqExactData { Set set; static String get_name() { return "multi_distinct"; } + + void reset() { set.clear(); } }; namespace detail { @@ -115,6 +117,8 @@ class AggregateFunctionUniq final DataTypePtr get_return_type() const override { return std::make_shared(); } + void reset(AggregateDataPtr __restrict place) const override { this->data(place).reset(); } + void add(AggregateDataPtr __restrict place, const IColumn** columns, ssize_t row_num, Arena*) const override { detail::OneAdder::add(this->data(place), *columns[0], row_num); diff --git a/be/src/vec/aggregate_functions/aggregate_function_uniq_distribute_key.h b/be/src/vec/aggregate_functions/aggregate_function_uniq_distribute_key.h index 3eaa6418f0b7ca..4c3fa67e1626ae 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_uniq_distribute_key.h +++ b/be/src/vec/aggregate_functions/aggregate_function_uniq_distribute_key.h @@ -68,6 +68,11 @@ struct AggregateFunctionUniqDistributeKeyData { Set set; UInt64 count = 0; + + void reset() { + set.clear(); + count = 0; + } }; template @@ -83,6 +88,8 @@ class AggregateFunctionUniqDistributeKey final DataTypePtr get_return_type() const override { return std::make_shared(); } + void reset(AggregateDataPtr __restrict place) const override { this->data(place).reset(); } + void add(AggregateDataPtr __restrict place, const IColumn** columns, ssize_t row_num, Arena*) const override { detail::OneAdder::add(this->data(place), *columns[0], row_num); diff --git a/be/src/vec/aggregate_functions/aggregate_function_window_funnel.h b/be/src/vec/aggregate_functions/aggregate_function_window_funnel.h index 2afa82e9a6dbc7..c0f0a4e7e20ad0 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_window_funnel.h +++ b/be/src/vec/aggregate_functions/aggregate_function_window_funnel.h @@ -84,10 +84,9 @@ struct WindowFunnelState { bool enable_mode; WindowFunnelMode window_funnel_mode; mutable vectorized::MutableBlock mutable_block; - ColumnVector::Container* timestamp_column_data; + ColumnVector::Container* timestamp_column_data = nullptr; std::vector::Container*> event_columns_datas; SortDescription sort_description {1}; - bool sorted; WindowFunnelState() { event_count = 0; @@ -97,20 +96,15 @@ struct WindowFunnelState { sort_description[0].column_number = 0; sort_description[0].direction = 1; sort_description[0].nulls_direction = -1; - sorted = false; } WindowFunnelState(int arg_event_count) : WindowFunnelState() { event_count = arg_event_count; + event_columns_datas.resize(event_count); auto timestamp_column = ColumnVector::create(); - timestamp_column_data = - &assert_cast&>(*timestamp_column).get_data(); MutableColumns event_columns; for (int i = 0; i < event_count; i++) { - auto event_column = ColumnVector::create(); - event_columns_datas.emplace_back( - &assert_cast&>(*event_column).get_data()); - event_columns.emplace_back(std::move(event_column)); + event_columns.emplace_back(ColumnVector::create()); } Block tmp_block; tmp_block.insert({std::move(timestamp_column), @@ -122,15 +116,18 @@ struct WindowFunnelState { } mutable_block = MutableBlock(std::move(tmp_block)); + _reset_columns_ptr(); } - void reset() { - window = 0; - mutable_block.clear(); - timestamp_column_data = nullptr; - event_columns_datas.clear(); - sorted = false; + void _reset_columns_ptr() { + auto& ts_column = mutable_block.get_column_by_position(0); + timestamp_column_data = &assert_cast&>(*ts_column).get_data(); + for (int i = 0; i != event_count; i++) { + auto& event_column = mutable_block.get_column_by_position(i + 1); + event_columns_datas[i] = &assert_cast&>(*event_column).get_data(); + } } + void reset() { mutable_block.clear_column_data(); } void add(const IColumn** arg_columns, ssize_t row_num, int64_t win, WindowFunnelMode mode) { window = win; @@ -146,26 +143,23 @@ struct WindowFunnelState { } void sort() { - if (sorted) { - return; - } - Block tmp_block = mutable_block.to_block(); auto block = tmp_block.clone_without_columns(); sort_block(tmp_block, block, sort_description, 0); - mutable_block = MutableBlock(std::move(block)); - sorted = true; + mutable_block = std::move(block); + _reset_columns_ptr(); } template - int _match_event_list(size_t& start_row, size_t row_count, - const NativeType* timestamp_data) const { + int _match_event_list(size_t& start_row, size_t row_count) const { int matched_count = 0; DateValueType start_timestamp; DateValueType end_timestamp; TimeInterval interval(SECOND, window, false); int column_idx = 1; + + const NativeType* timestamp_data = timestamp_column_data->data(); const auto& first_event_column = mutable_block.get_column_by_position(column_idx); const auto& first_event_data = assert_cast&>(*first_event_column).get_data(); @@ -250,14 +244,9 @@ struct WindowFunnelState { int _get_internal() const { size_t start_row = 0; int max_found_event_count = 0; - const auto& ts_column = mutable_block.get_column_by_position(0)->get_ptr(); - const auto& timestamp_data = - assert_cast&>(*ts_column).get_data().data(); - auto row_count = mutable_block.rows(); while (start_row < row_count) { - auto found_event_count = - _match_event_list(start_row, row_count, timestamp_data); + auto found_event_count = _match_event_list(start_row, row_count); if (found_event_count == event_count) { return found_event_count; } @@ -324,6 +313,7 @@ struct WindowFunnelState { status = block.serialize( 5, &pblock, &uncompressed_bytes, &compressed_bytes, segment_v2::CompressionTypePB::ZSTD); // ZSTD for better compression ratio + block.clear_column_data(); if (!status.ok()) { throw doris::Exception(ErrorCode::INTERNAL_ERROR, status.to_string()); return; @@ -336,6 +326,9 @@ struct WindowFunnelState { auto data_bytes = buff.size(); write_var_uint(data_bytes, out); out.write(buff.data(), data_bytes); + + mutable_block = std::move(block); + const_cast*>(this)->_reset_columns_ptr(); } void read(BufferReadable& in) { @@ -366,6 +359,7 @@ struct WindowFunnelState { throw doris::Exception(ErrorCode::INTERNAL_ERROR, status.to_string()); } mutable_block = MutableBlock(std::move(block)); + _reset_columns_ptr(); } }; diff --git a/regression-test/data/nereids_p0/sql_functions/aggregate_functions/test_aggregate_window_functions.out b/regression-test/data/nereids_p0/sql_functions/aggregate_functions/test_aggregate_window_functions.out new file mode 100644 index 00000000000000..6729ea26bc1005 --- /dev/null +++ b/regression-test/data/nereids_p0/sql_functions/aggregate_functions/test_aggregate_window_functions.out @@ -0,0 +1,822 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !agg_window_approx_count_distinct -- +beijing 3 +beijing 3 +beijing 3 +beijing 3 +chongqing 2 +chongqing 2 +hubei 3 +hubei 3 +hubei 3 +sichuan 2 +sichuan 2 +sichuan 2 +sichuan 2 + +-- !agg_window_count_by_enum -- +beijing [{"cbe":{"liuxiang":1},"notnull":1,"null":0,"all":1}] +beijing [{"cbe":{"liuxiang2":1,"liuxiang":1},"notnull":2,"null":0,"all":2}] +beijing [{"cbe":{"wangmang":2,"liuxiang2":1,"liuxiang":1},"notnull":4,"null":0,"all":4}] +beijing [{"cbe":{"wangmang":2,"liuxiang2":1,"liuxiang":1},"notnull":4,"null":0,"all":4}] +chongqing [{"cbe":{"lisi":1},"notnull":1,"null":0,"all":1}] +chongqing [{"cbe":{"lisi2":1,"lisi":1},"notnull":2,"null":0,"all":2}] +hubei [{"cbe":{"quyuan":1},"notnull":1,"null":0,"all":1}] +hubei [{"cbe":{"wangwu":1,"quyuan":1},"notnull":2,"null":0,"all":2}] +hubei [{"cbe":{"wangwu2":1,"wangwu":1,"quyuan":1},"notnull":3,"null":0,"all":3}] +sichuan [{"cbe":{"zhangsan":2},"notnull":2,"null":1,"all":3}] +sichuan [{"cbe":{"zhangsan":2},"notnull":2,"null":1,"all":3}] +sichuan [{"cbe":{"zhuge":1,"zhangsan":2},"notnull":3,"null":1,"all":4}] +sichuan [{"cbe":{},"notnull":0,"null":1,"all":1}] + +-- !agg_window_avg_weighted -- +1 4.787199468947368E8 +1 4.787199468947368E8 +1 4.787199468947368E8 +1 4.787199468947368E8 +1 4.787199468947368E8 +1 4.787199468947368E8 +1 4.787199468947368E8 +2 2.7563070204347825E8 +2 2.7563070204347825E8 +2 2.7563070204347825E8 +2 2.7563070204347825E8 +2 2.7563070204347825E8 +3 3.4926324084193546E8 +3 3.4926324084193546E8 +3 3.4926324084193546E8 +3 3.4926324084193546E8 +3 3.4926324084193546E8 + +-- !agg_window_corr -- +1 0.7619441542729813 +1 0.7619441542729813 +1 0.7619441542729813 +1 0.7619441542729813 +1 0.7619441542729813 +1 0.7619441542729813 +1 0.7619441542729813 +2 0.4220928990265549 +2 0.4220928990265549 +2 0.4220928990265549 +2 0.4220928990265549 +2 0.4220928990265549 +3 0.9986446796711458 +3 0.9986446796711458 +3 0.9986446796711458 +3 0.9986446796711458 +3 0.9986446796711458 + +-- !agg_window_covar_samp -- +1 9.266666666666666E10 +1 9.266666666666666E10 +1 9.266666666666666E10 +1 9.266666666666666E10 +1 9.266666666666666E10 +1 9.266666666666666E10 +1 9.266666666666666E10 +2 1.85E10 +2 1.85E10 +2 1.85E10 +2 1.85E10 +2 1.85E10 +3 1.5666666662333337E11 +3 1.5666666662333337E11 +3 1.5666666662333337E11 +3 1.5666666662333337E11 +3 1.5666666662333337E11 + +-- !agg_window_covar_pop -- +1 7.722222222222223E10 +1 7.722222222222223E10 +1 7.722222222222223E10 +1 7.722222222222223E10 +1 7.722222222222223E10 +1 7.722222222222223E10 +1 7.722222222222223E10 +2 1.48E10 +2 1.48E10 +2 1.48E10 +2 1.48E10 +2 1.48E10 +3 1.174999999675E11 +3 1.174999999675E11 +3 1.174999999675E11 +3 1.174999999675E11 +3 1.174999999675E11 + +-- !agg_window_variance_pop -- +1 2.2448979591836736E16 +1 2.2448979591836736E16 +1 2.2448979591836736E16 +1 2.2448979591836736E16 +1 2.2448979591836736E16 +1 2.2448979591836736E16 +1 2.2448979591836736E16 +2 1.36E16 +2 1.36E16 +2 1.36E16 +2 1.36E16 +2 1.36E16 +3 1.0399999990400002E16 +3 1.0399999990400002E16 +3 1.0399999990400002E16 +3 1.0399999990400002E16 +3 1.0399999990400002E16 + +-- !agg_window_stddev_pop -- +1 1.498298354528788E8 +1 1.498298354528788E8 +1 1.498298354528788E8 +1 1.498298354528788E8 +1 1.498298354528788E8 +1 1.498298354528788E8 +1 1.498298354528788E8 +2 1.16619037896906E8 +2 1.16619037896906E8 +2 1.16619037896906E8 +2 1.16619037896906E8 +2 1.16619037896906E8 +3 1.0198039022478783E8 +3 1.0198039022478783E8 +3 1.0198039022478783E8 +3 1.0198039022478783E8 +3 1.0198039022478783E8 + +-- !agg_window_variance_samp -- +1 2.6190476190476192E16 +1 2.6190476190476192E16 +1 2.6190476190476192E16 +1 2.6190476190476192E16 +1 2.6190476190476192E16 +1 2.6190476190476192E16 +1 2.6190476190476192E16 +2 1.7E16 +2 1.7E16 +2 1.7E16 +2 1.7E16 +2 1.7E16 +3 1.2999999988000002E16 +3 1.2999999988000002E16 +3 1.2999999988000002E16 +3 1.2999999988000002E16 +3 1.2999999988000002E16 + +-- !agg_window_stddev_samp -- +1 1.618347187425374E8 +1 1.618347187425374E8 +1 1.618347187425374E8 +1 1.618347187425374E8 +1 1.618347187425374E8 +1 1.618347187425374E8 +1 1.618347187425374E8 +2 1.3038404810405298E8 +2 1.3038404810405298E8 +2 1.3038404810405298E8 +2 1.3038404810405298E8 +2 1.3038404810405298E8 +3 1.1401754245729032E8 +3 1.1401754245729032E8 +3 1.1401754245729032E8 +3 1.1401754245729032E8 +3 1.1401754245729032E8 + +-- !agg_window_group_bit_or -- +1 2044 +1 2044 +1 2044 +1 2044 +1 2044 +1 2044 +1 2044 +2 1020 +2 1020 +2 1020 +2 1020 +2 1020 +3 4088 +3 4088 +3 4088 +3 4088 +3 4088 + +-- !agg_window_group_bit_and -- +1 0 +1 0 +1 0 +1 0 +1 0 +1 0 +1 0 +2 0 +2 0 +2 0 +2 0 +2 0 +3 128 +3 128 +3 128 +3 128 +3 128 + +-- !agg_window_group_bit_xor -- +1 1392 +1 1392 +1 1392 +1 1392 +1 1392 +1 1392 +1 1392 +2 708 +2 708 +2 708 +2 708 +2 708 +3 3912 +3 3912 +3 3912 +3 3912 +3 3912 + +-- !agg_window_bitmap_agg -- +1 100,200,300,1000,2000 +1 100,200,300,1000,2000 +1 100,200,300,1000,2000 +1 100,200,300,1000,2000 +1 100,200,300,1000,2000 +1 100,200,300,1000,2000 +1 100,200,300,1000,2000 +2 100,300,400,500,1000 +2 100,300,400,500,1000 +2 100,300,400,500,1000 +2 100,300,400,500,1000 +2 100,300,400,500,1000 +3 200,1000,2000,3000 +3 200,1000,2000,3000 +3 200,1000,2000,3000 +3 200,1000,2000,3000 +3 200,1000,2000,3000 + +-- !agg_window_bitmap_union_int -- +1 5 +1 5 +1 5 +1 5 +1 5 +1 5 +1 5 +2 5 +2 5 +2 5 +2 5 +2 5 +3 4 +3 4 +3 4 +3 4 +3 4 + +-- !agg_window_histogram -- +1 {"num_buckets":5,"buckets":[{"lower":"100","upper":"100","ndv":1,"count":1,"pre_sum":0},{"lower":"200","upper":"200","ndv":1,"count":2,"pre_sum":1},{"lower":"300","upper":"300","ndv":1,"count":1,"pre_sum":3},{"lower":"1000","upper":"1000","ndv":1,"count":1,"pre_sum":4},{"lower":"2000","upper":"2000","ndv":1,"count":1,"pre_sum":5}]} +1 {"num_buckets":5,"buckets":[{"lower":"100","upper":"100","ndv":1,"count":1,"pre_sum":0},{"lower":"200","upper":"200","ndv":1,"count":2,"pre_sum":1},{"lower":"300","upper":"300","ndv":1,"count":1,"pre_sum":3},{"lower":"1000","upper":"1000","ndv":1,"count":1,"pre_sum":4},{"lower":"2000","upper":"2000","ndv":1,"count":1,"pre_sum":5}]} +1 {"num_buckets":5,"buckets":[{"lower":"100","upper":"100","ndv":1,"count":1,"pre_sum":0},{"lower":"200","upper":"200","ndv":1,"count":2,"pre_sum":1},{"lower":"300","upper":"300","ndv":1,"count":1,"pre_sum":3},{"lower":"1000","upper":"1000","ndv":1,"count":1,"pre_sum":4},{"lower":"2000","upper":"2000","ndv":1,"count":1,"pre_sum":5}]} +1 {"num_buckets":5,"buckets":[{"lower":"100","upper":"100","ndv":1,"count":1,"pre_sum":0},{"lower":"200","upper":"200","ndv":1,"count":2,"pre_sum":1},{"lower":"300","upper":"300","ndv":1,"count":1,"pre_sum":3},{"lower":"1000","upper":"1000","ndv":1,"count":1,"pre_sum":4},{"lower":"2000","upper":"2000","ndv":1,"count":1,"pre_sum":5}]} +1 {"num_buckets":5,"buckets":[{"lower":"100","upper":"100","ndv":1,"count":1,"pre_sum":0},{"lower":"200","upper":"200","ndv":1,"count":2,"pre_sum":1},{"lower":"300","upper":"300","ndv":1,"count":1,"pre_sum":3},{"lower":"1000","upper":"1000","ndv":1,"count":1,"pre_sum":4},{"lower":"2000","upper":"2000","ndv":1,"count":1,"pre_sum":5}]} +1 {"num_buckets":5,"buckets":[{"lower":"100","upper":"100","ndv":1,"count":1,"pre_sum":0},{"lower":"200","upper":"200","ndv":1,"count":2,"pre_sum":1},{"lower":"300","upper":"300","ndv":1,"count":1,"pre_sum":3},{"lower":"1000","upper":"1000","ndv":1,"count":1,"pre_sum":4},{"lower":"2000","upper":"2000","ndv":1,"count":1,"pre_sum":5}]} +1 {"num_buckets":5,"buckets":[{"lower":"100","upper":"100","ndv":1,"count":1,"pre_sum":0},{"lower":"200","upper":"200","ndv":1,"count":2,"pre_sum":1},{"lower":"300","upper":"300","ndv":1,"count":1,"pre_sum":3},{"lower":"1000","upper":"1000","ndv":1,"count":1,"pre_sum":4},{"lower":"2000","upper":"2000","ndv":1,"count":1,"pre_sum":5}]} +2 {"num_buckets":5,"buckets":[{"lower":"100","upper":"100","ndv":1,"count":1,"pre_sum":0},{"lower":"300","upper":"300","ndv":1,"count":1,"pre_sum":1},{"lower":"400","upper":"400","ndv":1,"count":1,"pre_sum":2},{"lower":"500","upper":"500","ndv":1,"count":1,"pre_sum":3},{"lower":"1000","upper":"1000","ndv":1,"count":1,"pre_sum":4}]} +2 {"num_buckets":5,"buckets":[{"lower":"100","upper":"100","ndv":1,"count":1,"pre_sum":0},{"lower":"300","upper":"300","ndv":1,"count":1,"pre_sum":1},{"lower":"400","upper":"400","ndv":1,"count":1,"pre_sum":2},{"lower":"500","upper":"500","ndv":1,"count":1,"pre_sum":3},{"lower":"1000","upper":"1000","ndv":1,"count":1,"pre_sum":4}]} +2 {"num_buckets":5,"buckets":[{"lower":"100","upper":"100","ndv":1,"count":1,"pre_sum":0},{"lower":"300","upper":"300","ndv":1,"count":1,"pre_sum":1},{"lower":"400","upper":"400","ndv":1,"count":1,"pre_sum":2},{"lower":"500","upper":"500","ndv":1,"count":1,"pre_sum":3},{"lower":"1000","upper":"1000","ndv":1,"count":1,"pre_sum":4}]} +2 {"num_buckets":5,"buckets":[{"lower":"100","upper":"100","ndv":1,"count":1,"pre_sum":0},{"lower":"300","upper":"300","ndv":1,"count":1,"pre_sum":1},{"lower":"400","upper":"400","ndv":1,"count":1,"pre_sum":2},{"lower":"500","upper":"500","ndv":1,"count":1,"pre_sum":3},{"lower":"1000","upper":"1000","ndv":1,"count":1,"pre_sum":4}]} +2 {"num_buckets":5,"buckets":[{"lower":"100","upper":"100","ndv":1,"count":1,"pre_sum":0},{"lower":"300","upper":"300","ndv":1,"count":1,"pre_sum":1},{"lower":"400","upper":"400","ndv":1,"count":1,"pre_sum":2},{"lower":"500","upper":"500","ndv":1,"count":1,"pre_sum":3},{"lower":"1000","upper":"1000","ndv":1,"count":1,"pre_sum":4}]} +3 {"num_buckets":4,"buckets":[{"lower":"200","upper":"200","ndv":1,"count":1,"pre_sum":0},{"lower":"1000","upper":"1000","ndv":1,"count":1,"pre_sum":1},{"lower":"2000","upper":"2000","ndv":1,"count":1,"pre_sum":2},{"lower":"3000","upper":"3000","ndv":1,"count":1,"pre_sum":3}]} +3 {"num_buckets":4,"buckets":[{"lower":"200","upper":"200","ndv":1,"count":1,"pre_sum":0},{"lower":"1000","upper":"1000","ndv":1,"count":1,"pre_sum":1},{"lower":"2000","upper":"2000","ndv":1,"count":1,"pre_sum":2},{"lower":"3000","upper":"3000","ndv":1,"count":1,"pre_sum":3}]} +3 {"num_buckets":4,"buckets":[{"lower":"200","upper":"200","ndv":1,"count":1,"pre_sum":0},{"lower":"1000","upper":"1000","ndv":1,"count":1,"pre_sum":1},{"lower":"2000","upper":"2000","ndv":1,"count":1,"pre_sum":2},{"lower":"3000","upper":"3000","ndv":1,"count":1,"pre_sum":3}]} +3 {"num_buckets":4,"buckets":[{"lower":"200","upper":"200","ndv":1,"count":1,"pre_sum":0},{"lower":"1000","upper":"1000","ndv":1,"count":1,"pre_sum":1},{"lower":"2000","upper":"2000","ndv":1,"count":1,"pre_sum":2},{"lower":"3000","upper":"3000","ndv":1,"count":1,"pre_sum":3}]} +3 {"num_buckets":4,"buckets":[{"lower":"200","upper":"200","ndv":1,"count":1,"pre_sum":0},{"lower":"1000","upper":"1000","ndv":1,"count":1,"pre_sum":1},{"lower":"2000","upper":"2000","ndv":1,"count":1,"pre_sum":2},{"lower":"3000","upper":"3000","ndv":1,"count":1,"pre_sum":3}]} + +-- !agg_window_max_by -- +1 \N \N +1 100 123456789.0000000001 +1 1000 523456789.0000000005 +1 200 223456789.0000000004 +1 200 223456789.0000000004 +1 2000 523456789.0000000005 +1 300 423456789.0000000005 +2 100 323456789.0000000005 +2 1000 423456789.0000000005 +2 300 223456789.0000000005 +2 400 123456789.0000000005 +2 500 123456789.0000000006 +3 \N \N +3 1000 223456789.6000000005 +3 200 123456789.1000000005 +3 2000 323456789.1000000005 +3 3000 423456789.2000000005 + +-- !agg_window_min_by -- +1 \N \N +1 100 123456789.0000000001 +1 1000 123456789.0000000001 +1 200 123456789.0000000001 +1 200 123456789.0000000001 +1 2000 123456789.0000000001 +1 300 123456789.0000000001 +2 100 323456789.0000000005 +2 1000 323456789.0000000005 +2 300 323456789.0000000005 +2 400 323456789.0000000005 +2 500 323456789.0000000005 +3 \N \N +3 1000 123456789.1000000005 +3 200 123456789.1000000005 +3 2000 123456789.1000000005 +3 3000 123456789.1000000005 + +-- !agg_window_any_value -- +1 123456789.0000000001 +1 123456789.0000000001 +1 123456789.0000000001 +1 123456789.0000000001 +1 123456789.0000000001 +1 123456789.0000000001 +1 123456789.0000000001 +2 123456789.0000000005 +2 123456789.0000000005 +2 123456789.0000000005 +2 123456789.0000000005 +2 123456789.0000000005 +3 123456789.1000000005 +3 123456789.1000000005 +3 123456789.1000000005 +3 123456789.1000000005 +3 123456789.1000000005 + +-- !agg_window_percentile -- +1 5.23456789E8 +1 5.23456789E8 +1 5.23456789E8 +1 5.23456789E8 +1 5.23456789E8 +1 5.23456789E8 +1 5.23456789E8 +2 4.03456789E8 +2 4.03456789E8 +2 4.03456789E8 +2 4.03456789E8 +2 4.03456789E8 +3 4.03456789E8 +3 4.03456789E8 +3 4.03456789E8 +3 4.03456789E8 +3 4.03456789E8 + +-- !agg_window_percentile_array -- +1 [273456789, 423456789, 523456789] +1 [273456789, 423456789, 523456789] +1 [273456789, 423456789, 523456789] +1 [273456789, 423456789, 523456789] +1 [273456789, 423456789, 523456789] +1 [273456789, 423456789, 523456789] +1 [273456789, 423456789, 523456789] +2 [123456789, 223456789, 323456789] +2 [123456789, 223456789, 323456789] +2 [123456789, 223456789, 323456789] +2 [123456789, 223456789, 323456789] +2 [123456789, 223456789, 323456789] +3 [223456789, 223456789, 323456789] +3 [223456789, 223456789, 323456789] +3 [223456789, 223456789, 323456789] +3 [223456789, 223456789, 323456789] +3 [223456789, 223456789, 323456789] + +-- !agg_window_percentile_approx -- +1 5.234568E8 +1 5.234568E8 +1 5.234568E8 +1 5.234568E8 +1 5.234568E8 +1 5.234568E8 +1 5.234568E8 +2 4.234568E8 +2 4.234568E8 +2 4.234568E8 +2 4.234568E8 +2 4.234568E8 +3 4.234568E8 +3 4.234568E8 +3 4.234568E8 +3 4.234568E8 +3 4.234568E8 + +-- !agg_window_percentile_approx_weighted -- +1 5.234568E8 +1 5.234568E8 +1 5.234568E8 +1 5.234568E8 +1 5.234568E8 +1 5.234568E8 +1 5.234568E8 +2 4.234568E8 +2 4.234568E8 +2 4.234568E8 +2 4.234568E8 +2 4.234568E8 +3 4.234568E8 +3 4.234568E8 +3 4.234568E8 +3 4.234568E8 +3 4.234568E8 + +-- !agg_window_topn -- +1 {"523456789.0000000005":3,"423456789.0000000005":1,"323456789.0000000002":1} +1 {"523456789.0000000005":3,"423456789.0000000005":1,"323456789.0000000002":1} +1 {"523456789.0000000005":3,"423456789.0000000005":1,"323456789.0000000002":1} +1 {"523456789.0000000005":3,"423456789.0000000005":1,"323456789.0000000002":1} +1 {"523456789.0000000005":3,"423456789.0000000005":1,"323456789.0000000002":1} +1 {"523456789.0000000005":3,"423456789.0000000005":1,"323456789.0000000002":1} +1 {"523456789.0000000005":3,"423456789.0000000005":1,"323456789.0000000002":1} +2 {"423456789.0000000005":1,"323456789.0000000005":1,"223456789.0000000005":1} +2 {"423456789.0000000005":1,"323456789.0000000005":1,"223456789.0000000005":1} +2 {"423456789.0000000005":1,"323456789.0000000005":1,"223456789.0000000005":1} +2 {"423456789.0000000005":1,"323456789.0000000005":1,"223456789.0000000005":1} +2 {"423456789.0000000005":1,"323456789.0000000005":1,"223456789.0000000005":1} +3 {"223456789.6000000005":2,"423456789.2000000005":1,"323456789.1000000005":1} +3 {"223456789.6000000005":2,"423456789.2000000005":1,"323456789.1000000005":1} +3 {"223456789.6000000005":2,"423456789.2000000005":1,"323456789.1000000005":1} +3 {"223456789.6000000005":2,"423456789.2000000005":1,"323456789.1000000005":1} +3 {"223456789.6000000005":2,"423456789.2000000005":1,"323456789.1000000005":1} + +-- !agg_window_topn_weighted -- +1 [523456789, 423456789, 323456789] +1 [523456789, 423456789, 323456789] +1 [523456789, 423456789, 323456789] +1 [523456789, 423456789, 323456789] +1 [523456789, 423456789, 323456789] +1 [523456789, 423456789, 323456789] +1 [523456789, 423456789, 323456789] +2 [423456789, 123456789, 223456789] +2 [423456789, 123456789, 223456789] +2 [423456789, 123456789, 223456789] +2 [423456789, 123456789, 223456789] +2 [423456789, 123456789, 223456789] +3 [423456789.2, 323456789.1, 223456789.6] +3 [423456789.2, 323456789.1, 223456789.6] +3 [423456789.2, 323456789.1, 223456789.6] +3 [423456789.2, 323456789.1, 223456789.6] +3 [423456789.2, 323456789.1, 223456789.6] + +-- !agg_window_topn_array -- +1 [523456789.0000000005, 423456789.0000000005, 323456789.0000000002] +1 [523456789.0000000005, 423456789.0000000005, 323456789.0000000002] +1 [523456789.0000000005, 423456789.0000000005, 323456789.0000000002] +1 [523456789.0000000005, 423456789.0000000005, 323456789.0000000002] +1 [523456789.0000000005, 423456789.0000000005, 323456789.0000000002] +1 [523456789.0000000005, 423456789.0000000005, 323456789.0000000002] +1 [523456789.0000000005, 423456789.0000000005, 323456789.0000000002] +2 [423456789.0000000005, 323456789.0000000005, 223456789.0000000005] +2 [423456789.0000000005, 323456789.0000000005, 223456789.0000000005] +2 [423456789.0000000005, 323456789.0000000005, 223456789.0000000005] +2 [423456789.0000000005, 323456789.0000000005, 223456789.0000000005] +2 [423456789.0000000005, 323456789.0000000005, 223456789.0000000005] +3 [223456789.6000000005, 423456789.2000000005, 323456789.1000000005] +3 [223456789.6000000005, 423456789.2000000005, 323456789.1000000005] +3 [223456789.6000000005, 423456789.2000000005, 323456789.1000000005] +3 [223456789.6000000005, 423456789.2000000005, 323456789.1000000005] +3 [223456789.6000000005, 423456789.2000000005, 323456789.1000000005] + +-- !agg_window_multi_distinct_count -- +1 5 +1 5 +1 5 +1 5 +1 5 +1 5 +1 5 +2 5 +2 5 +2 5 +2 5 +2 5 +3 4 +3 4 +3 4 +3 4 +3 4 + +-- !agg_window_multi_distinct_sum -- +1 11320987615.0000000119 +1 1617283945.0000000017 +1 3234567890.0000000034 +1 4851851835.0000000051 +1 6469135780.0000000068 +1 8086419725.0000000085 +1 9703703670.0000000102 +2 1217283945.0000000026 +2 2434567890.0000000052 +2 3651851835.0000000078 +2 4869135780.0000000104 +2 6086419725.0000000130 +3 1093827157.0000000020 +3 2187654314.0000000040 +3 3281481471.0000000060 +3 4375308628.0000000080 +3 5469135785.0000000100 + +-- !agg_window_bitmap_union -- +1 1,2,3 +1 1,2,3 +1 1,2,3 +1 1,2,3 +2 2,3,4 +2 2,3,4 +2 2,3,4 +3 3,4,5,6 +3 3,4,5,6 +3 3,4,5,6 +3 3,4,5,6 + +-- !agg_window_bitmap_intersect -- +1 1 +1 1 +1 1 +1 1 +2 2 +2 2 +2 2 +3 +3 +3 +3 + +-- !agg_window_group_bitmap_xor -- +1 1,3 +1 1,3 +1 1,3 +1 1,3 +2 2,4 +2 2,4 +2 2,4 +3 3,4,5,6 +3 3,4,5,6 +3 3,4,5,6 +3 3,4,5,6 + +-- !agg_window_bitmap_union_count -- +1 3 +1 3 +1 3 +1 3 +2 3 +2 3 +2 3 +3 4 +3 4 +3 4 +3 4 + +-- !agg_window_collect_list -- +1 ["1,2", "1,2", "1,3"] +1 ["1,2", "1,2"] +1 ["1,2", "1,2"] +1 [] +2 ["2,3", "2,3", "2,4"] +2 ["2,3", "2,3"] +2 ["2,3", "2,3"] +3 ["3", "4", "5", "6"] +3 ["3", "4", "5"] +3 ["3", "4"] +3 ["3"] + +-- !agg_window_collect_set -- +1 ["1,2"] +1 ["1,2"] +1 ["1,3", "1,2"] +1 [] +2 ["2,3", "2,4"] +2 ["2,3"] +2 ["2,3"] +3 ["3", "4"] +3 ["3"] +3 ["5", "3", "4"] +3 ["5", "3", "6", "4"] + +-- !agg_window_array_agg -- +1 [null, "1,2", "1,2", "1,3"] +1 [null, "1,2", "1,2"] +1 [null, "1,2", "1,2"] +1 [null] +2 ["2,3", "2,3", "2,4"] +2 ["2,3", "2,3"] +2 ["2,3", "2,3"] +3 ["3", "4", "5", "6"] +3 ["3", "4", "5"] +3 ["3", "4"] +3 ["3"] + +-- !agg_window_group_concat -- +1 \N +1 1,2,1,2 +1 1,2,1,2 +1 1,2,1,2,1,3 +2 2,3,2,3 +2 2,3,2,3 +2 2,3,2,3,2,4 +3 3 +3 3,4 +3 3,4,5 +3 3,4,5,6 + +-- !agg_window_sum0 -- +1 12 +1 12 +1 12 +1 12 +2 16 +2 16 +2 16 +3 25 +3 25 +3 25 +3 25 + +-- !agg_window_group_array_intersect -- +1 [] +1 [] +1 [] +1 [] +2 ["2"] +2 ["2"] +2 ["2"] +3 ["3", "4"] +3 ["3", "4"] +3 ["3", "4"] +3 ["3", "4"] + +-- !window_func_hll_union_agg -- +beijing linux 3 +beijing macos 3 +beijing windows 3 +hebei windows 1 +jiangsu macos 1 +shanghai linux 2 +shanghai windows 2 +shanxi windows 1 +shanxi windows 1 + +-- !window_func_hll_union -- +beijing linux 3 +beijing macos 3 +beijing windows 3 +hebei windows 1 +jiangsu macos 1 +shanghai linux 2 +shanghai windows 2 +shanxi windows 1 +shanxi windows 1 + +-- !map_agg -- +1 {"LA":"V1_1", "LB":"V1_2", "LC":"V1_3"} +1 {"LA":"V1_1", "LB":"V1_2", "LC":"V1_3"} +1 {"LA":"V1_1", "LB":"V1_2", "LC":"V1_3"} +2 {"LA":"V2_1", "LB":"V2_2", "LC":"V2_3"} +2 {"LA":"V2_1", "LB":"V2_2", "LC":"V2_3"} +2 {"LA":"V2_1", "LB":"V2_2", "LC":"V2_3"} +3 {"LA":"V3_1", "LB":"V3_2", "LC":"V3_3"} +3 {"LA":"V3_1", "LB":"V3_2", "LC":"V3_3"} +3 {"LA":"V3_1", "LB":"V3_2", "LC":"V3_3"} +4 {"LA":"V4_1", "LB":"V4_2", "LC":"V4_3"} +4 {"LA":"V4_1", "LB":"V4_2", "LC":"V4_3"} +4 {"LA":"V4_1", "LB":"V4_2", "LC":"V4_3"} +5 {"LA":"V5_1", "LB":"V5_2", "LC":"V5_3"} +5 {"LA":"V5_1", "LB":"V5_2", "LC":"V5_3"} +5 {"LA":"V5_1", "LB":"V5_2", "LC":"V5_3"} + +-- !agg_window_quantile_union -- +20220201 0 1.0 +20220201 1 1.0 + +-- !agg_window_retention_0 -- +1 [1, 1] +2 [1, 0] +3 [0, 0] +4 [0, 0] +5 [1, 1] + +-- !agg_window_retention_1 -- +1 [1, 1] +1 [1, 1] +2 [1, 0] +2 [1, 0] +3 [0, 0] +3 [0, 0] +4 [0, 0] +5 [1, 1] +5 [1, 1] +5 [1, 1] + +-- !agg_window_sequence_match -- +1 true +1 true +1 true +1 true +1 true +1 true +1 true +1 true +2 false +2 false +2 false +2 false +2 false + +-- !agg_window_sequence_count -- +1 2 +1 2 +1 2 +1 2 +1 2 +1 2 +1 2 +1 2 +2 0 +2 0 +2 0 +2 0 +2 0 + +-- !agg_window_sum_foreach -- +1 [1, 2, 3] +2 [20] +3 [100] +4 \N +5 [null, 2] + +-- !agg_window_sum_foreach2 -- +1 \N +2 [1, 2, 3] +3 [21, 2, 3] +4 [120] +5 [100] + +-- !agg_window_covar_foreach -- +1 [0, 0, 0] +2 [0] +3 [0] +4 \N +5 [null, 0] + +-- !agg_window_group_concat_state1 -- +1 string1,string1,string2,string2 +1 string1,string1,string2,string2 +1 string1,string1,string2,string2 +1 string1,string1,string2,string2 +2 string3,string3 +2 string3,string3 + +-- !agg_window_group_concat_state_merge -- +1 string1,string1,string2,string2 +1 string1,string1,string2,string2 +1 string1,string1,string2,string2 +1 string1,string1,string2,string2 +2 string3,string3 +2 string3,string3 + +-- !agg_window_orthogonal_bitmap1 -- +1 1,2 +1 1,2 +1 1,2 +2 2,3 +2 2,3 +2 2,3 + +-- !agg_window_orthogonal_bitmap2 -- +1 2 +1 2 +1 2 +2 2 +2 2 +2 2 + +-- !agg_window_orthogonal_bitmap3 -- +1 4 +1 4 +1 4 +2 5 +2 5 +2 5 + +-- !agg_window_window_funnel -- +100123 2 +100123 2 +100123 2 +100123 2 +100123 2 +100125 3 +100125 3 +100125 3 +100126 2 +100126 2 +100127 2 +100127 2 + diff --git a/regression-test/suites/nereids_p0/sql_functions/aggregate_functions/test_aggregate_window_functions.groovy b/regression-test/suites/nereids_p0/sql_functions/aggregate_functions/test_aggregate_window_functions.groovy new file mode 100644 index 00000000000000..a2016ba739609f --- /dev/null +++ b/regression-test/suites/nereids_p0/sql_functions/aggregate_functions/test_aggregate_window_functions.groovy @@ -0,0 +1,522 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_aggregate_window_functions") { + sql "SET enable_nereids_planner=true" + sql "SET enable_fallback_to_original_planner=false" + + // approx_count_distinct + sql """ + drop table if exists test_aggregate_window_functions; + """ + sql """ + create table test_aggregate_window_functions ( + id bigint, + name varchar(20), + province varchar(20) + ) distributed by hash(id) properties('replication_num' = '1'); + """ + sql """ + insert into test_aggregate_window_functions values + (1, 'zhangsan', "sichuan"), + (4, 'zhangsan', "sichuan"), + (11, 'zhuge', "sichuan"), + (13, null, "sichuan"), + (2, 'lisi', "chongqing"), + (5, 'lisi2', "chongqing"), + (3, 'wangwu', "hubei"), + (6, 'wangwu2', "hubei"), + (12, "quyuan", 'hubei'), + (7, 'liuxiang', "beijing"), + (8, 'wangmang', "beijing"), + (9, 'liuxiang2', "beijing"), + (10, 'wangmang', "beijing"); + """ + order_qt_agg_window_approx_count_distinct "select province, approx_count_distinct(name) over(partition by province) from test_aggregate_window_functions;" + + // count_by_enum + order_qt_agg_window_count_by_enum "select province, count_by_enum(name) over(partition by province order by name) from test_aggregate_window_functions;" + + // avg_weighted + sql """ + drop table if exists test_aggregate_window_functions; + """ + sql """ + create table test_aggregate_window_functions ( + id bigint, + price decimalv3(38, 10), + count bigint + ) distributed by hash(id) properties('replication_num' = '1'); + """ + sql """ + insert into test_aggregate_window_functions values + (1, 123456789.0000000001, 100), + (1, 223456789.0000000004, 200), + (1, 323456789.0000000002, 200), + (1, 423456789.0000000005, 300), + (1, 523456789.0000000005, null), + (1, 523456789.0000000005, 1000), + (1, 523456789.0000000005, 2000), + (2, 123456789.0000000005, 400), + (2, 123456789.0000000006, 500), + (2, 223456789.0000000005, 300), + (2, 323456789.0000000005, 100), + (2, 423456789.0000000005, 1000), + (3, 123456789.1000000005, 200), + (3, 223456789.6000000005, 1000), + (3, 223456789.6000000005, null), + (3, 323456789.1000000005, 2000), + (3, 423456789.2000000005, 3000); + """ + order_qt_agg_window_avg_weighted "select id, avg_weighted(price, count) over(partition by id) from test_aggregate_window_functions;" + + // corr + order_qt_agg_window_corr "select id, corr(price, count) over(partition by id) from test_aggregate_window_functions;" + // covar_samp + order_qt_agg_window_covar_samp "select id, covar_samp(price, count) over(partition by id) from test_aggregate_window_functions;" + // covar_pop + order_qt_agg_window_covar_pop "select id, covar_pop(price, count) over(partition by id) from test_aggregate_window_functions;" + + // variance_pop + order_qt_agg_window_variance_pop "select id, variance_pop(price) over(partition by id) from test_aggregate_window_functions;" + // stddev_pop + order_qt_agg_window_stddev_pop "select id, stddev_pop(price) over(partition by id) from test_aggregate_window_functions;" + + // variance_samp + order_qt_agg_window_variance_samp "select id, variance_samp(price) over(partition by id) from test_aggregate_window_functions;" + // stddev_samp + order_qt_agg_window_stddev_samp "select id, stddev_samp(price) over(partition by id) from test_aggregate_window_functions;" + + // group_bit_or + order_qt_agg_window_group_bit_or "select id, group_bit_or(count) over(partition by id) from test_aggregate_window_functions;" + // group_bit_and + order_qt_agg_window_group_bit_and "select id, group_bit_and(count) over(partition by id) from test_aggregate_window_functions;" + // group_bit_xor + order_qt_agg_window_group_bit_xor "select id, group_bit_xor(count) over(partition by id) from test_aggregate_window_functions;" + + // bitmap_agg + order_qt_agg_window_bitmap_agg "select id, bitmap_to_string(bitmap_agg(count) over(partition by id)) from test_aggregate_window_functions;" + + // BITMAP_UNION_INT + order_qt_agg_window_bitmap_union_int "select id, bitmap_union_int(count) over(partition by id) from test_aggregate_window_functions;" + + // histogram + order_qt_agg_window_histogram "select id, histogram(count) over(partition by id) from test_aggregate_window_functions;" + + // max_by + order_qt_agg_window_max_by "select id, count, max_by(price, count) over(partition by id order by count) from test_aggregate_window_functions;" + // min_by + order_qt_agg_window_min_by "select id, count, min_by(price, count) over(partition by id order by count) from test_aggregate_window_functions;" + // any_value + order_qt_agg_window_any_value "select id, any_value(price) over(partition by id order by price) from test_aggregate_window_functions;" + + // percentile + order_qt_agg_window_percentile "select id, percentile(price, 0.95) over(partition by id) from test_aggregate_window_functions;" + // percentile_array + order_qt_agg_window_percentile_array "select id, percentile_array(price, array(0.25, 0.5, 0.75)) over(partition by id) from test_aggregate_window_functions;" + // percentile_approx + order_qt_agg_window_percentile_approx "select id, percentile_approx(price, 0.95) over(partition by id) from test_aggregate_window_functions;" + // percentile_approx_weighted + order_qt_agg_window_percentile_approx_weighted "select id, percentile_approx_weighted(price, count, 0.95) over(partition by id) from test_aggregate_window_functions;" + + // topn + order_qt_agg_window_topn "select id, topn(price, 3) over(partition by id) from test_aggregate_window_functions;" + // topn_weighted + order_qt_agg_window_topn_weighted "select id, topn_weighted(price, count, 3) over(partition by id) from test_aggregate_window_functions;" + // topn_array + order_qt_agg_window_topn_array "select id, topn_array(price, 3) over(partition by id) from test_aggregate_window_functions;" + + // multi_distinct_count + order_qt_agg_window_multi_distinct_count "select id, multi_distinct_count(price) over(partition by id) from test_aggregate_window_functions;" + + // multi_distinct_count_distribute_key, FE not implemented yet + // order_qt_agg_window_multi_distinct_count_distribute_key "select id, multi_distinct_distribute_key(id) over(partition by id) from test_aggregate_window_functions;" + // order_qt_agg_window_multi_distinct_count_distribute_key "select id, multi_distinct_count_distribute_key(price) over(partition by id) from test_aggregate_window_functions;" + + // multi_distinct_sum + order_qt_agg_window_multi_distinct_sum "select id, multi_distinct_sum(price) over(partition by id) from test_aggregate_window_functions;" + + sql """ + drop table if exists test_aggregate_window_functions; + """ + sql """ + create table test_aggregate_window_functions ( + id bigint, + id2 bigint, + user_ids varchar(64) + ) distributed by hash(id) properties('replication_num' = '1'); + """ + sql """ + insert into test_aggregate_window_functions values + (1, 1, '1,2'), + (1, 3, '1,2'), + (1, 2, '1,3'), + (1, 6, null), + (2, 2, '2,3'), + (2, 5, '2,3'), + (2, 9, '2,4'), + (3, 10, '3'), + (3, 1, '4'), + (3, 5, '5'), + (3, 9, '6'); + """ + // bitmap_union + order_qt_agg_window_bitmap_union "select id, bitmap_to_string(bitmap_union(bitmap_from_string(user_ids)) over(partition by id)) from test_aggregate_window_functions;" + // bitmap_intersect + order_qt_agg_window_bitmap_intersect "select id, bitmap_to_string(bitmap_intersect(bitmap_from_string(user_ids)) over(partition by id)) from test_aggregate_window_functions;" + // group_bitmap_xor + order_qt_agg_window_group_bitmap_xor "select id, bitmap_to_string(group_bitmap_xor(bitmap_from_string(user_ids)) over(partition by id)) from test_aggregate_window_functions;" + // bitmap_union_count + order_qt_agg_window_bitmap_union_count "select id, bitmap_union_count(bitmap_from_string(user_ids)) over(partition by id) from test_aggregate_window_functions;" + + // collect_list + order_qt_agg_window_collect_list "select id, collect_list(user_ids) over(partition by id order by user_ids) from test_aggregate_window_functions;" + // collect_set + order_qt_agg_window_collect_set "select id, collect_set(user_ids) over(partition by id order by user_ids) from test_aggregate_window_functions;" + // array_agg + order_qt_agg_window_array_agg "select id, array_agg(user_ids) over(partition by id order by user_ids) from test_aggregate_window_functions;" + + // group_concat + order_qt_agg_window_group_concat "select id, group_concat(user_ids) over(partition by id order by user_ids) from test_aggregate_window_functions;" + // group_concat distinct + // DISTINCT not allowed in analytic function: group_concat(line 1, pos 11) + // order_qt_agg_window_group_concat_distinct "select id, group_concat(distinct user_ids) over(partition by id) from test_aggregate_window_functions;" + // group_concat order by + // java.sql.SQLException: errCode = 2, detailMessage = Cannot invoke "org.apache.doris.analysis.Expr.getChildren()" because "root" is null + // order_qt_agg_window_group_concat_order_by "select id, group_concat(user_ids order by id2) over(partition by id) from test_aggregate_window_functions;" + // sum0 + order_qt_agg_window_sum0 "select id, sum0(id2) over(partition by id) from test_aggregate_window_functions;" + + // group_array_intersect + sql """ + drop table if exists test_aggregate_window_functions; + """ + sql """ + create table test_aggregate_window_functions ( + id bigint, + user_ids array + ) distributed by hash(id) properties('replication_num' = '1'); + """ + sql """ + insert into test_aggregate_window_functions values + (1, [1,2]), + (1, [1,2]), + (1, [1,3]), + (1, null), + (2, [2,3]), + (2, [2,3]), + (2, [2,4]), + (3, [3,4]), + (3, [4,3,null]), + (3, [5,3,4]), + (3, [3,6,4]); + """ + order_qt_agg_window_group_array_intersect "select id, array_sort(group_array_intersect(user_ids) over(partition by id)) from test_aggregate_window_functions;" + + // hll_union_agg + sql """drop TABLE if EXISTS test_window_func_hll;""" + sql """ + create table test_window_func_hll( + dt date, + id int, + name char(10), + province char(10), + os char(10), + pv hll hll_union + ) + Aggregate KEY (dt,id,name,province,os) + distributed by hash(id) buckets 10 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "storage_format" = "V2" + ) + """ + sql """ + insert into test_window_func_hll + SELECT + dt,id,name,province,os,pv + from ( + SELECT '2022-05-05' as dt,'10001' as id,'test01' as name,'beijing' as province,'windows' as os,hll_hash('windows') as pv + union all + SELECT '2022-05-05' as dt,'10002' as id,'test01' as name,'beijing' as province,'linux' as os,hll_hash('linux') as pv + union all + SELECT '2022-05-05' as dt,'10003' as id,'test01' as name,'beijing' as province,'macos' as os,hll_hash('macos') as pv + union all + SELECT '2022-05-05' as dt,'10004' as id,'test01' as name,'hebei' as province,'windows' as os,hll_hash('windows') as pv + union all + SELECT '2022-05-06' as dt,'10001' as id,'test01' as name,'shanghai' as province,'windows' as os,hll_hash('windows') as pv + union all + SELECT '2022-05-06' as dt,'10002' as id,'test01' as name,'shanghai' as province,'linux' as os,hll_hash('linux') as pv + union all + SELECT '2022-05-06' as dt,'10003' as id,'test01' as name,'jiangsu' as province,'macos' as os,hll_hash('macos') as pv + union all + SELECT '2022-05-06' as dt,'10004' as id,'test01' as name,'shanxi' as province,'windows' as os,hll_hash('windows') as pv + union all + SELECT '2022-05-07' as dt,'10005' as id,'test01' as name,'shanxi' as province,'windows' as os,hll_empty() as pv + ) as a + """ + order_qt_window_func_hll_union_agg "select province, os, hll_union_agg(pv) over(partition by province) from test_window_func_hll;" + order_qt_window_func_hll_union "select province, os, hll_cardinality(hll_union(pv) over(partition by province)) from test_window_func_hll;" + + // map_agg + sql "DROP TABLE IF EXISTS `test_map_agg`;" + sql """ + CREATE TABLE IF NOT EXISTS `test_map_agg` ( + `id` int(11) NOT NULL, + `label_name` varchar(32) NOT NULL, + `value_field` string + ) + DISTRIBUTED BY HASH(`id`) + PROPERTIES ( + "replication_num" = "1" + ); + """ + + sql """ + insert into `test_map_agg` values + (1, "LA", "V1_1"), + (1, "LB", "V1_2"), + (1, "LC", "V1_3"), + (2, "LA", "V2_1"), + (2, "LB", "V2_2"), + (2, "LC", "V2_3"), + (3, "LA", "V3_1"), + (3, "LB", "V3_2"), + (3, "LC", "V3_3"), + (4, "LA", "V4_1"), + (4, "LB", "V4_2"), + (4, "LC", "V4_3"), + (5, "LA", "V5_1"), + (5, "LB", "V5_2"), + (5, "LC", "V5_3"); + """ + order_qt_map_agg "select id, map_agg(label_name, value_field) over(partition by id) from test_map_agg;" + + // quantile_state + sql """ + drop table if exists test_aggregate_window_functions; + """ + sql """ + CREATE TABLE test_aggregate_window_functions ( + `dt` int(11) NULL COMMENT "", + `id` int(11) NULL COMMENT "", + `price` quantile_state QUANTILE_UNION NOT NULL COMMENT "" + ) ENGINE=OLAP + AGGREGATE KEY(`dt`, `id`) + DISTRIBUTED BY HASH(`dt`) + PROPERTIES ( + "replication_num" = "1" + ); + """ + sql """INSERT INTO test_aggregate_window_functions values(20220201,0, to_quantile_state(1, 2048))""" + sql """INSERT INTO test_aggregate_window_functions values(20220201,1, to_quantile_state(-1, 2048)), + (20220201,1, to_quantile_state(0, 2048)),(20220201,1, to_quantile_state(1, 2048)), + (20220201,1, to_quantile_state(2, 2048)),(20220201,1, to_quantile_state(3, 2048)) + """ + + // quantile_union + order_qt_agg_window_quantile_union """select dt, id, quantile_percent(quantile_union(price), 0.5) from test_aggregate_window_functions group by dt, id;""" + + // retention + sql """ + drop table if exists test_aggregate_window_functions; + """ + sql """ + CREATE TABLE test_aggregate_window_functions( + id TINYINT, + action STRING, + time DATETIME + ) DUPLICATE KEY(id) + DISTRIBUTED BY HASH(id) + PROPERTIES ( + "replication_num" = "1" + ); + """ + + sql """ + INSERT INTO test_aggregate_window_functions VALUES + (1,'pv','2022-01-01 08:00:05'), + (2,'pv','2022-01-01 10:20:08'), + (1,'buy','2022-01-02 15:30:10'), + (2,'pv','2022-01-02 17:30:05'), + (3,'buy','2022-01-01 05:30:09'), + (3,'buy','2022-01-02 08:10:15'), + (4,'pv','2022-01-02 21:09:15'), + (5,'pv','2022-01-01 22:10:53'), + (5,'pv','2022-01-02 19:10:52'), + (5,'buy','2022-01-02 20:00:50'); + """ + order_qt_agg_window_retention_0 """ + select id, retention(action='pv' and to_date(time)='2022-01-01', + action='buy' and to_date(time)='2022-01-02') as retention + from test_aggregate_window_functions + group by id;""" + order_qt_agg_window_retention_1 """ + select id, retention(action='pv' and to_date(time)='2022-01-01', + action='buy' and to_date(time)='2022-01-02') over (partition by id) as retention + from test_aggregate_window_functions;""" + + // sequence_match and sequence_count + sql """ + drop table if exists test_aggregate_window_functions; + """ + sql """ + CREATE TABLE test_aggregate_window_functions( + `uid` int, + `date` datetime, + `number` int + ) DISTRIBUTED BY HASH(uid) + PROPERTIES ( + "replication_num" = "1" + ); + """ + sql """ + INSERT INTO test_aggregate_window_functions values + (1, '2022-11-01 10:41:00', 1), + (1, '2022-11-01 11:41:00', 5), + (1, '2022-11-01 12:41:00', 7), + (1, '2022-11-01 12:42:00', 9), + (1, '2022-11-01 12:52:00', 1), + (1, '2022-11-01 13:41:00', 4), + (1, '2022-11-01 13:51:00', 3), + (1, '2022-11-01 14:51:00', 5), + (2, '2022-11-01 20:41:00', 1), + (2, '2022-11-01 23:51:00', 3), + (2, '2022-11-01 22:41:00', 7), + (2, '2022-11-01 22:42:00', 9), + (2, '2022-11-01 23:41:00', 4); + """ + order_qt_agg_window_sequence_match "select uid, sequence_match('(?1)(?2)', date, number = 1, number = 5) over(partition by uid) from test_aggregate_window_functions;" + order_qt_agg_window_sequence_count "select uid, sequence_count('(?1)(?2)', date, number = 1, number = 5) over(partition by uid) from test_aggregate_window_functions;" + + sql """ + drop table if exists test_aggregate_window_functions; + """ + sql """ + CREATE TABLE test_aggregate_window_functions( + `id` INT(11) null COMMENT "", + `a` array null COMMENT "", + `b` array> null COMMENT "", + `s` array null COMMENT "" + ) ENGINE=OLAP + DUPLICATE KEY(`id`) + DISTRIBUTED BY HASH(`id`) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "storage_format" = "V2" + ); + """ + sql """ + insert into test_aggregate_window_functions values + (1,[1,2,3],[[1],[1,2,3],[2]],["ab","123","114514"]), + (2,[20],[[2]],["cd"]), + (3,[100],[[1]],["efg"]) , + (4,null,[null],null), + (5,[null,2],[[2],null],[null,'c']); + """ + // sum_foreach + order_qt_agg_window_sum_foreach "select id, sum_foreach(a) over(partition by id) from test_aggregate_window_functions;" + order_qt_agg_window_sum_foreach2 "select id, sum_foreach(a) over(order by id rows between 2 preceding and 1 preceding) from test_aggregate_window_functions;" + // covar_foreach + order_qt_agg_window_covar_foreach "select id, covar_foreach(a, a) over(partition by id) from test_aggregate_window_functions;" + + sql "drop table if exists test_aggregate_window_functions" + + sql """ + CREATE TABLE IF NOT EXISTS `test_aggregate_window_functions` ( + `kint` int(11) not null, + `kbint` int(11) not null, + `kstr` string not null, + `kstr2` string not null, + `kastr` array not null + ) engine=olap + DISTRIBUTED BY HASH(`kint`) BUCKETS 4 + properties("replication_num" = "1"); + """ + + sql """ + INSERT INTO `test_aggregate_window_functions` VALUES + ( 1, 1, 'string1', 'string3', ['s11', 's12', 's13'] ), + ( 1, 2, 'string2', 'string1', ['s21', 's22', 's23'] ), + ( 2, 3, 'string3', 'string2', ['s31', 's32', 's33'] ), + ( 1, 1, 'string1', 'string3', ['s11', 's12', 's13'] ), + ( 1, 2, 'string2', 'string1', ['s21', 's22', 's23'] ), + ( 2, 3, 'string3', 'string2', ['s31', 's32', 's33'] ); + """ + + order_qt_agg_window_group_concat_state1 "select kint, group_concat(kstr) over(partition by kint) from test_aggregate_window_functions;" + sql "select kint, group_concat_union(group_concat_state(kstr)) over(partition by kint) from test_aggregate_window_functions;" + order_qt_agg_window_group_concat_state_merge "select kint, group_concat_merge(group_concat_state(kstr)) over(partition by kint) from test_aggregate_window_functions;" + + sql """ + drop table if exists test_aggregate_window_functions; + """ + sql """ CREATE TABLE IF NOT EXISTS test_aggregate_window_functions ( + tag_group bigint(20) NULL COMMENT "标签组", + bucket int(11) NOT NULL COMMENT "分桶字段", + members bitmap BITMAP_UNION COMMENT "人群") ENGINE=OLAP + AGGREGATE KEY(tag_group, bucket) + DISTRIBUTED BY HASH(bucket) BUCKETS 64 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1"); + """ + sql """ + insert into test_aggregate_window_functions values + (1, 1, bitmap_from_string('1,2,3,4')), + (2, 1, bitmap_from_string('1,2,3')), + (3, 1, bitmap_from_string('1,2')), + (1, 2, bitmap_from_string('2,3,4,5,6')), + (2, 2, bitmap_from_string('2,3,4')), + (3, 2, bitmap_from_string('2,3')); + """ + order_qt_agg_window_orthogonal_bitmap1 "select bucket, bitmap_to_string(orthogonal_bitmap_intersect(members, tag_group, 1, 2, 3) over(partition by bucket)) from test_aggregate_window_functions;" + order_qt_agg_window_orthogonal_bitmap2 "select bucket, orthogonal_bitmap_intersect_count(members, tag_group, 1, 2, 3) over(partition by bucket) from test_aggregate_window_functions;" + order_qt_agg_window_orthogonal_bitmap3 "select bucket, orthogonal_bitmap_union_count(members) over(partition by bucket) from test_aggregate_window_functions;" + + // window_funnel + sql """ + drop table if exists test_aggregate_window_functions; + """ + sql """ + CREATE TABLE test_aggregate_window_functions( + user_id BIGINT, + event_name VARCHAR(64), + event_timestamp datetime, + phone_brand varchar(64), + tab_num int + ) distributed by hash(event_timestamp) buckets 3 properties("replication_num"="1"); + """ + sql """ + INSERT INTO test_aggregate_window_functions VALUES + (100123, '登录', '2022-05-14 10:01:00', 'HONOR', 1), + (100123, '访问', '2022-05-14 10:02:00', 'HONOR', 2), + (100123, '登录2', '2022-05-14 10:03:00', 'HONOR', 3), + (100123, '下单', '2022-05-14 10:04:00', "HONOR", 4), + (100123, '付款', '2022-05-14 10:10:00', 'HONOR', 4), + (100125, '登录', '2022-05-15 11:00:00', 'XIAOMI', 1), + (100125, '访问', '2022-05-15 11:01:00', 'XIAOMI', 2), + (100125, '下单', '2022-05-15 11:02:00', 'XIAOMI', 6), + (100126, '登录', '2022-05-15 12:00:00', 'IPHONE', 1), + (100126, '访问', '2022-05-15 12:01:00', 'HONOR', 2), + (100127, '登录', '2022-05-15 11:30:00', 'VIVO', 1), + (100127, '访问', '2022-05-15 11:31:00', 'VIVO', 5); + """ + order_qt_agg_window_window_funnel """ + select user_id, window_funnel(3600, "fixed", event_timestamp, event_name = '登录', event_name = '访问', event_name = '下单', event_name = '付款') over(partition by user_id) from test_aggregate_window_functions; + """ + +} \ No newline at end of file