From 11919be6f96d03cb6e29b22a43077de5259c1352 Mon Sep 17 00:00:00 2001 From: Gabriel Date: Thu, 6 Feb 2025 09:39:34 +0800 Subject: [PATCH] [fix](local shuffle) Set serial execution for schema scan operator (#47498) `select * from information_schema.workload_group_resource_usage;` got duplicate rows if `enable_local_shuffle` is true. This is caused by a serial schema scan operator which is not set correctly. A regression test case could not be used because the `workload_group_resource_usage` will not get a stable result. --- be/src/pipeline/exec/schema_scan_operator.cpp | 4 +++- .../suites/query_p0/system/test_query_sys.groovy | 8 ++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/be/src/pipeline/exec/schema_scan_operator.cpp b/be/src/pipeline/exec/schema_scan_operator.cpp index 53b05b55d35d2c..dc65a57f4e788b 100644 --- a/be/src/pipeline/exec/schema_scan_operator.cpp +++ b/be/src/pipeline/exec/schema_scan_operator.cpp @@ -83,7 +83,9 @@ SchemaScanOperatorX::SchemaScanOperatorX(ObjectPool* pool, const TPlanNode& tnod _common_scanner_param(new SchemaScannerCommonParam()), _tuple_id(tnode.schema_scan_node.tuple_id), _tuple_idx(0), - _slot_num(0) {} + _slot_num(0) { + Base::_is_serial_operator = tnode.__isset.is_serial_operator && tnode.is_serial_operator; +} Status SchemaScanOperatorX::init(const TPlanNode& tnode, RuntimeState* state) { RETURN_IF_ERROR(Base::init(tnode, state)); diff --git a/regression-test/suites/query_p0/system/test_query_sys.groovy b/regression-test/suites/query_p0/system/test_query_sys.groovy index 9fbe5e8435a7b9..8c2c260f27c9fe 100644 --- a/regression-test/suites/query_p0/system/test_query_sys.groovy +++ b/regression-test/suites/query_p0/system/test_query_sys.groovy @@ -53,4 +53,12 @@ suite("test_query_sys", "query,p0") { sql "select * from http_stream('format'='csv');" exception "No Alive backends" } + + // `workload_group_resource_usage` will be refresh 30s after BE startup so sleep 30s to get a stable result + sleep(30000) + sql """set parallel_pipeline_task_num=8""" + def rows1 = sql """ select count(*) from information_schema.workload_group_resource_usage; """ + sql """set parallel_pipeline_task_num=1""" + def rows2 = sql """ select count(*) from information_schema.workload_group_resource_usage; """ + assertEquals(rows1, rows2) }