From 2c7f025169de8848a17b60ff59daf97bfd91cd8b Mon Sep 17 00:00:00 2001 From: zhangstar333 Date: Wed, 25 Jun 2025 19:38:35 +0800 Subject: [PATCH] [Bug](distinct) fix distinct function with over return error result (#51875) ### What problem does this PR solve? Problem Summary: before the result is not correctly, as after insert the rows, should reset all state of nested function. So the result of the next line will not be affected by the previous one ``` mysql> select id, v1, multi_distinct_group_concat(v2) over() from multi; +------+------+----------------------------------------+ | id | v1 | multi_distinct_group_concat(v2) over() | +------+------+----------------------------------------+ | 3 | 1 | a | | 1 | 1 | a,a | | 1 | 1 | a,a,a | | 2 | 1 | a,a,a,a | +------+------+----------------------------------------+ 4 rows in set (0.21 sec) mysql> select id, v1, multi_distinct_sum(v1) over() from multi; +------+------+-------------------------------+ | id | v1 | multi_distinct_sum(v1) over() | +------+------+-------------------------------+ | 1 | 1 | 1 | | 1 | 1 | 2 | | 2 | 1 | 3 | | 3 | 1 | 4 | +------+------+-------------------------------+ 4 rows in set (0.06 sec) ``` --- .../aggregate_function_distinct.h | 13 +++++++++ .../test_aggregate_window_functions.out | 28 +++++++++---------- .../window_functions/test_sum.out | 18 ++++++++++++ .../window_functions/test_sum.groovy | 22 +++++++++++++++ 4 files changed, 67 insertions(+), 14 deletions(-) diff --git a/be/src/vec/aggregate_functions/aggregate_function_distinct.h b/be/src/vec/aggregate_functions/aggregate_function_distinct.h index d4f1c18df454e9..e6befdf671f6cf 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_distinct.h +++ b/be/src/vec/aggregate_functions/aggregate_function_distinct.h @@ -63,6 +63,8 @@ struct AggregateFunctionDistinctSingleNumericData { using Self = AggregateFunctionDistinctSingleNumericData; Container data; + void clear() { data.clear(); } + void add(const IColumn** columns, size_t /* columns_num */, size_t row_num, Arena*) { const auto& vec = assert_cast&, TypeCheckOnRelease::DISABLE>(*columns[0]) @@ -132,6 +134,8 @@ struct AggregateFunctionDistinctGenericData { using Self = AggregateFunctionDistinctGenericData; Container data; + void clear() { data.clear(); } + void merge(const Self& rhs, Arena* arena) { DCHECK(!stable); if constexpr (!stable) { @@ -319,6 +323,15 @@ class AggregateFunctionDistinct nested_func->add_batch_single_place(arguments[0]->size(), get_nested_place(place), arguments_raw.data(), &arena); nested_func->insert_result_into(get_nested_place(place), to); + // for distinct agg function, the real calculate is add_batch_single_place at last step of insert_result_into function. + // but with distinct agg and over() window function together, the result will be inserted into many times with different rows + // so we need to clear the data, thus not to affect the next insert_result_into + this->data(place).clear(); + } + + void reset(AggregateDataPtr place) const override { + this->data(place).clear(); + nested_func->reset(get_nested_place(place)); } size_t size_of_data() const override { return prefix_size + nested_func->size_of_data(); } diff --git a/regression-test/data/nereids_p0/sql_functions/aggregate_functions/test_aggregate_window_functions.out b/regression-test/data/nereids_p0/sql_functions/aggregate_functions/test_aggregate_window_functions.out index ae6d1ad35a222f..006cea921b6835 100644 --- a/regression-test/data/nereids_p0/sql_functions/aggregate_functions/test_aggregate_window_functions.out +++ b/regression-test/data/nereids_p0/sql_functions/aggregate_functions/test_aggregate_window_functions.out @@ -505,23 +505,23 @@ sichuan [{"cbe":{},"notnull":0,"null":1,"all":1}] 3 4 -- !agg_window_multi_distinct_sum -- -1 11320987615.0000000119 1 1617283945.0000000017 -1 3234567890.0000000034 -1 4851851835.0000000051 -1 6469135780.0000000068 -1 8086419725.0000000085 -1 9703703670.0000000102 +1 1617283945.0000000017 +1 1617283945.0000000017 +1 1617283945.0000000017 +1 1617283945.0000000017 +1 1617283945.0000000017 +1 1617283945.0000000017 +2 1217283945.0000000026 2 1217283945.0000000026 -2 2434567890.0000000052 -2 3651851835.0000000078 -2 4869135780.0000000104 -2 6086419725.0000000130 +2 1217283945.0000000026 +2 1217283945.0000000026 +2 1217283945.0000000026 +3 1093827157.0000000020 +3 1093827157.0000000020 +3 1093827157.0000000020 +3 1093827157.0000000020 3 1093827157.0000000020 -3 2187654314.0000000040 -3 3281481471.0000000060 -3 4375308628.0000000080 -3 5469135785.0000000100 -- !agg_window_bitmap_union -- 1 1,2,3 diff --git a/regression-test/data/query_p0/sql_functions/window_functions/test_sum.out b/regression-test/data/query_p0/sql_functions/window_functions/test_sum.out index 9185f64fa6e8ee..84f4bccb2d1049 100644 --- a/regression-test/data/query_p0/sql_functions/window_functions/test_sum.out +++ b/regression-test/data/query_p0/sql_functions/window_functions/test_sum.out @@ -4,3 +4,21 @@ 2 1243.500 3 24453.325 +-- !sql_window_muti1 -- +a +a +a +a + +-- !sql_window_muti2 -- +1 +1 +1 +1 + +-- !sql_window_muti3 -- +1 +1 +1 +1 + diff --git a/regression-test/suites/query_p0/sql_functions/window_functions/test_sum.groovy b/regression-test/suites/query_p0/sql_functions/window_functions/test_sum.groovy index 3611400568dcbe..e61f586181abb6 100644 --- a/regression-test/suites/query_p0/sql_functions/window_functions/test_sum.groovy +++ b/regression-test/suites/query_p0/sql_functions/window_functions/test_sum.groovy @@ -21,5 +21,27 @@ suite("test_sum") { (partition by k1 order by k3 range between current row and unbounded following) as w from test_query_db.test order by k1, w """ + + sql "create database if not exists multi_db" + sql "use multi_db" + sql "DROP TABLE IF EXISTS multi" + sql """ + CREATE TABLE multi ( + id int, + v1 int, + v2 varchar + ) ENGINE = OLAP + DUPLICATE KEY(id) COMMENT 'OLAP' + DISTRIBUTED BY HASH(id) BUCKETS 2 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1" + ); + """ + sql """ + insert into multi values (1, 1, 'a'),(1, 1, 'a'), (2, 1, 'a'), (3, 1, 'a'); + """ + qt_sql_window_muti1 """ select multi_distinct_group_concat(v2) over() from multi; """ + qt_sql_window_muti2 """ select multi_distinct_sum(v1) over() from multi; """ + qt_sql_window_muti3 """ select multi_distinct_count(v1) over() from multi; """ }