From 4e130db24e3a745831f2e60133229995a7366789 Mon Sep 17 00:00:00 2001 From: zhangstar333 Date: Wed, 18 Jun 2025 15:09:26 +0800 Subject: [PATCH 1/7] update --- be/src/vec/aggregate_functions/aggregate_function_bitmap.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/be/src/vec/aggregate_functions/aggregate_function_bitmap.h b/be/src/vec/aggregate_functions/aggregate_function_bitmap.h index a442fc3e1c55b2..80bc6583d8e19b 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_bitmap.h +++ b/be/src/vec/aggregate_functions/aggregate_function_bitmap.h @@ -50,6 +50,7 @@ struct AggregateFunctionBitmapUnionOp { template static void add(BitmapValue& res, const T& data, bool& is_first) { res.add(data); + is_first = false; } static void add(BitmapValue& res, const BitmapValue& data, bool& is_first) { @@ -63,6 +64,7 @@ struct AggregateFunctionBitmapUnionOp { static void add_batch(BitmapValue& res, std::vector& data, bool& is_first) { res.fastunion(data); + is_first = false; } static void merge(BitmapValue& res, const BitmapValue& data, bool& is_first) { From ee48b8d56e21143c5b033a0ba8659a0315df56d4 Mon Sep 17 00:00:00 2001 From: zhangstar333 Date: Wed, 18 Jun 2025 16:39:04 +0800 Subject: [PATCH 2/7] [Bug](distinct) fix distinct function with over return error result --- .../aggregate_function_bitmap.h | 2 -- .../aggregate_function_distinct.h | 1 + .../window_functions/test_sum.out | 19 ++++++++++++++++ .../window_functions/test_sum.groovy | 22 +++++++++++++++++++ 4 files changed, 42 insertions(+), 2 deletions(-) diff --git a/be/src/vec/aggregate_functions/aggregate_function_bitmap.h b/be/src/vec/aggregate_functions/aggregate_function_bitmap.h index 80bc6583d8e19b..a442fc3e1c55b2 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_bitmap.h +++ b/be/src/vec/aggregate_functions/aggregate_function_bitmap.h @@ -50,7 +50,6 @@ struct AggregateFunctionBitmapUnionOp { template static void add(BitmapValue& res, const T& data, bool& is_first) { res.add(data); - is_first = false; } static void add(BitmapValue& res, const BitmapValue& data, bool& is_first) { @@ -64,7 +63,6 @@ struct AggregateFunctionBitmapUnionOp { static void add_batch(BitmapValue& res, std::vector& data, bool& is_first) { res.fastunion(data); - is_first = false; } static void merge(BitmapValue& res, const BitmapValue& data, bool& is_first) { diff --git a/be/src/vec/aggregate_functions/aggregate_function_distinct.h b/be/src/vec/aggregate_functions/aggregate_function_distinct.h index ac49f8213c6719..4f2124e8a25108 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_distinct.h +++ b/be/src/vec/aggregate_functions/aggregate_function_distinct.h @@ -322,6 +322,7 @@ class AggregateFunctionDistinct nested_func->add_batch_single_place(arguments[0]->size(), get_nested_place(place), arguments_raw.data(), &arena); nested_func->insert_result_into(get_nested_place(place), to); + nested_func->reset(get_nested_place(place)); } size_t size_of_data() const override { return prefix_size + nested_func->size_of_data(); } diff --git a/regression-test/data/query_p0/sql_functions/window_functions/test_sum.out b/regression-test/data/query_p0/sql_functions/window_functions/test_sum.out index 9185f64fa6e8ee..672c114b85ceda 100644 --- a/regression-test/data/query_p0/sql_functions/window_functions/test_sum.out +++ b/regression-test/data/query_p0/sql_functions/window_functions/test_sum.out @@ -4,3 +4,22 @@ 2 1243.500 3 24453.325 +-- !sql_window_muti1 -- +a +a +a +a + +-- !sql_window_muti2 -- +1 +1 +1 +1 + +-- !sql_window_muti3 -- +1 +1 +1 +1 + + diff --git a/regression-test/suites/query_p0/sql_functions/window_functions/test_sum.groovy b/regression-test/suites/query_p0/sql_functions/window_functions/test_sum.groovy index 3611400568dcbe..e61f586181abb6 100644 --- a/regression-test/suites/query_p0/sql_functions/window_functions/test_sum.groovy +++ b/regression-test/suites/query_p0/sql_functions/window_functions/test_sum.groovy @@ -21,5 +21,27 @@ suite("test_sum") { (partition by k1 order by k3 range between current row and unbounded following) as w from test_query_db.test order by k1, w """ + + sql "create database if not exists multi_db" + sql "use multi_db" + sql "DROP TABLE IF EXISTS multi" + sql """ + CREATE TABLE multi ( + id int, + v1 int, + v2 varchar + ) ENGINE = OLAP + DUPLICATE KEY(id) COMMENT 'OLAP' + DISTRIBUTED BY HASH(id) BUCKETS 2 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1" + ); + """ + sql """ + insert into multi values (1, 1, 'a'),(1, 1, 'a'), (2, 1, 'a'), (3, 1, 'a'); + """ + qt_sql_window_muti1 """ select multi_distinct_group_concat(v2) over() from multi; """ + qt_sql_window_muti2 """ select multi_distinct_sum(v1) over() from multi; """ + qt_sql_window_muti3 """ select multi_distinct_count(v1) over() from multi; """ } From 73e5d299b955e24b2e6587b6cc21517c86321c81 Mon Sep 17 00:00:00 2001 From: zhangstar333 Date: Wed, 18 Jun 2025 19:53:51 +0800 Subject: [PATCH 3/7] update --- .../test_aggregate_window_functions.out | 28 +++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/regression-test/data/nereids_p0/sql_functions/aggregate_functions/test_aggregate_window_functions.out b/regression-test/data/nereids_p0/sql_functions/aggregate_functions/test_aggregate_window_functions.out index ae6d1ad35a222f..006cea921b6835 100644 --- a/regression-test/data/nereids_p0/sql_functions/aggregate_functions/test_aggregate_window_functions.out +++ b/regression-test/data/nereids_p0/sql_functions/aggregate_functions/test_aggregate_window_functions.out @@ -505,23 +505,23 @@ sichuan [{"cbe":{},"notnull":0,"null":1,"all":1}] 3 4 -- !agg_window_multi_distinct_sum -- -1 11320987615.0000000119 1 1617283945.0000000017 -1 3234567890.0000000034 -1 4851851835.0000000051 -1 6469135780.0000000068 -1 8086419725.0000000085 -1 9703703670.0000000102 +1 1617283945.0000000017 +1 1617283945.0000000017 +1 1617283945.0000000017 +1 1617283945.0000000017 +1 1617283945.0000000017 +1 1617283945.0000000017 +2 1217283945.0000000026 2 1217283945.0000000026 -2 2434567890.0000000052 -2 3651851835.0000000078 -2 4869135780.0000000104 -2 6086419725.0000000130 +2 1217283945.0000000026 +2 1217283945.0000000026 +2 1217283945.0000000026 +3 1093827157.0000000020 +3 1093827157.0000000020 +3 1093827157.0000000020 +3 1093827157.0000000020 3 1093827157.0000000020 -3 2187654314.0000000040 -3 3281481471.0000000060 -3 4375308628.0000000080 -3 5469135785.0000000100 -- !agg_window_bitmap_union -- 1 1,2,3 From 9318fed02282bbbbc109aa1f0babe98f634fe100 Mon Sep 17 00:00:00 2001 From: zhangstar333 Date: Wed, 18 Jun 2025 19:55:40 +0800 Subject: [PATCH 4/7] update --- .../data/query_p0/sql_functions/window_functions/test_sum.out | 1 - 1 file changed, 1 deletion(-) diff --git a/regression-test/data/query_p0/sql_functions/window_functions/test_sum.out b/regression-test/data/query_p0/sql_functions/window_functions/test_sum.out index 672c114b85ceda..84f4bccb2d1049 100644 --- a/regression-test/data/query_p0/sql_functions/window_functions/test_sum.out +++ b/regression-test/data/query_p0/sql_functions/window_functions/test_sum.out @@ -22,4 +22,3 @@ a 1 1 - From 58ddc5fc188cf70dc296fc0eb6cbb465000c38cd Mon Sep 17 00:00:00 2001 From: zhangstar333 Date: Thu, 19 Jun 2025 11:30:51 +0800 Subject: [PATCH 5/7] update --- .../aggregate_functions/aggregate_function_distinct.h | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/be/src/vec/aggregate_functions/aggregate_function_distinct.h b/be/src/vec/aggregate_functions/aggregate_function_distinct.h index 4f2124e8a25108..bd1aa508594023 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_distinct.h +++ b/be/src/vec/aggregate_functions/aggregate_function_distinct.h @@ -270,6 +270,7 @@ class AggregateFunctionDistinct size_t prefix_size; AggregateFunctionPtr nested_func; size_t arguments_num; + mutable bool has_new_elements = false; AggregateDataPtr get_nested_place(AggregateDataPtr __restrict place) const noexcept { return place + prefix_size; @@ -293,11 +294,13 @@ class AggregateFunctionDistinct void add(AggregateDataPtr __restrict place, const IColumn** columns, ssize_t row_num, Arena* arena) const override { this->data(place).add(columns, arguments_num, row_num, arena); + has_new_elements = true; } void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena* arena) const override { this->data(place).merge(this->data(rhs), arena); + has_new_elements = true; } void serialize(ConstAggregateDataPtr __restrict place, BufferWritable& buf) const override { @@ -319,10 +322,12 @@ class AggregateFunctionDistinct assert(!arguments.empty()); Arena arena; - nested_func->add_batch_single_place(arguments[0]->size(), get_nested_place(place), - arguments_raw.data(), &arena); + if (has_new_elements) { + nested_func->add_batch_single_place(arguments[0]->size(), get_nested_place(place), + arguments_raw.data(), &arena); + has_new_elements = false; + } nested_func->insert_result_into(get_nested_place(place), to); - nested_func->reset(get_nested_place(place)); } size_t size_of_data() const override { return prefix_size + nested_func->size_of_data(); } From c578792b036568f2b5bb60ab07adb6379b6855eb Mon Sep 17 00:00:00 2001 From: zhangstar333 Date: Thu, 19 Jun 2025 11:58:28 +0800 Subject: [PATCH 6/7] update --- .../aggregate_function_distinct.h | 20 +++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/be/src/vec/aggregate_functions/aggregate_function_distinct.h b/be/src/vec/aggregate_functions/aggregate_function_distinct.h index bd1aa508594023..bec69da75d7fe2 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_distinct.h +++ b/be/src/vec/aggregate_functions/aggregate_function_distinct.h @@ -65,6 +65,8 @@ struct AggregateFunctionDistinctSingleNumericData { using Self = AggregateFunctionDistinctSingleNumericData; Container data; + void clear() { data.clear(); } + void add(const IColumn** columns, size_t /* columns_num */, size_t row_num, Arena*) { const auto& vec = assert_cast&, TypeCheckOnRelease::DISABLE>(*columns[0]) @@ -135,6 +137,8 @@ struct AggregateFunctionDistinctGenericData { using Self = AggregateFunctionDistinctGenericData; Container data; + void clear() { data.clear(); } + void merge(const Self& rhs, Arena* arena) { DCHECK(!stable); if constexpr (!stable) { @@ -270,7 +274,6 @@ class AggregateFunctionDistinct size_t prefix_size; AggregateFunctionPtr nested_func; size_t arguments_num; - mutable bool has_new_elements = false; AggregateDataPtr get_nested_place(AggregateDataPtr __restrict place) const noexcept { return place + prefix_size; @@ -294,13 +297,11 @@ class AggregateFunctionDistinct void add(AggregateDataPtr __restrict place, const IColumn** columns, ssize_t row_num, Arena* arena) const override { this->data(place).add(columns, arguments_num, row_num, arena); - has_new_elements = true; } void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena* arena) const override { this->data(place).merge(this->data(rhs), arena); - has_new_elements = true; } void serialize(ConstAggregateDataPtr __restrict place, BufferWritable& buf) const override { @@ -322,12 +323,15 @@ class AggregateFunctionDistinct assert(!arguments.empty()); Arena arena; - if (has_new_elements) { - nested_func->add_batch_single_place(arguments[0]->size(), get_nested_place(place), - arguments_raw.data(), &arena); - has_new_elements = false; - } + nested_func->add_batch_single_place(arguments[0]->size(), get_nested_place(place), + arguments_raw.data(), &arena); nested_func->insert_result_into(get_nested_place(place), to); + this->data(place).clear(); + } + + void reset(AggregateDataPtr place) const override { + this->data(place).clear(); + nested_func->reset(get_nested_place(place)); } size_t size_of_data() const override { return prefix_size + nested_func->size_of_data(); } From 571fbb837d2bb62cc18c7160b02068297099c99f Mon Sep 17 00:00:00 2001 From: zhangstar333 Date: Tue, 24 Jun 2025 14:31:22 +0800 Subject: [PATCH 7/7] update revire --- be/src/vec/aggregate_functions/aggregate_function_distinct.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/be/src/vec/aggregate_functions/aggregate_function_distinct.h b/be/src/vec/aggregate_functions/aggregate_function_distinct.h index bec69da75d7fe2..616d8a1e9a913b 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_distinct.h +++ b/be/src/vec/aggregate_functions/aggregate_function_distinct.h @@ -326,6 +326,9 @@ class AggregateFunctionDistinct nested_func->add_batch_single_place(arguments[0]->size(), get_nested_place(place), arguments_raw.data(), &arena); nested_func->insert_result_into(get_nested_place(place), to); + // for distinct agg function, the real calculate is add_batch_single_place at last step of insert_result_into function. + // but with distinct agg and over() window function together, the result will be inserted into many times with different rows + // so we need to clear the data, thus not to affect the next insert_result_into this->data(place).clear(); }