From a85027f88e65f88ab9527a03883c8764889fbe9b Mon Sep 17 00:00:00 2001 From: Hu Shenggang Date: Sat, 17 May 2025 12:11:37 +0800 Subject: [PATCH] [fix](join) The clear_column_data function may unintentionally clear columns shared by others --- .../exec/join/process_hash_table_probe_impl.h | 20 ++-- .../join/mark_join/right_semi_mark_join.out | 63 +++++++++++ .../mark_join/right_semi_mark_join.groovy | 104 ++++++++++++++++++ 3 files changed, 178 insertions(+), 9 deletions(-) create mode 100644 regression-test/data/query_p0/join/mark_join/right_semi_mark_join.out create mode 100644 regression-test/suites/query_p0/join/mark_join/right_semi_mark_join.groovy diff --git a/be/src/pipeline/exec/join/process_hash_table_probe_impl.h b/be/src/pipeline/exec/join/process_hash_table_probe_impl.h index 62fa5505d81b54..29c24205972534 100644 --- a/be/src/pipeline/exec/join/process_hash_table_probe_impl.h +++ b/be/src/pipeline/exec/join/process_hash_table_probe_impl.h @@ -285,6 +285,14 @@ Status ProcessHashTableProbe::process(HashTableType& hash_table_ctx, JoinOpType == TJoinOp::NULL_AWARE_LEFT_SEMI_JOIN) && hash_table_ctx.hash_table ->empty_build_side(); // empty build side will return false to instead null + + if constexpr (JoinOpType == TJoinOp::RIGHT_SEMI_JOIN || + JoinOpType == TJoinOp::RIGHT_ANTI_JOIN) { + if (mark_join_flags.empty()) { + mark_join_flags.resize(hash_table_ctx.hash_table->size(), 0); + } + } + return do_mark_join_conjuncts(output_block, ignore_null_map ? nullptr : null_map); } else if (_have_other_join_conjunct) { return do_other_join_conjuncts(output_block, hash_table_ctx.hash_table->get_visited()); @@ -491,12 +499,6 @@ Status ProcessHashTableProbe::do_mark_join_conjuncts(vectorized::Blo } } - if constexpr (is_right_half_join) { - if (mark_join_flags.empty() && _build_block != nullptr) { - mark_join_flags.resize(_build_block->rows(), 0); - } - } - auto filter_column = vectorized::ColumnUInt8::create(row_count, 0); auto* __restrict filter_map = filter_column->get_data().data(); for (size_t i = 0; i != row_count; ++i) { @@ -547,7 +549,7 @@ Status ProcessHashTableProbe::do_mark_join_conjuncts(vectorized::Blo } } // For right semi/anti join, no rows will be output in probe phase. - output_block->clear_column_data(); + output_block->clear(); return Status::OK(); } else { if constexpr (is_anti_join) { @@ -721,8 +723,8 @@ Status ProcessHashTableProbe::finish_probing(HashTableType& hash_tab if constexpr (JoinOpType == TJoinOp::RIGHT_ANTI_JOIN || JoinOpType == TJoinOp::RIGHT_SEMI_JOIN) { if (is_mark_join) { - if (mark_join_flags.empty() && _build_block != nullptr) { - mark_join_flags.resize(_build_block->rows(), 0); + if (mark_join_flags.empty()) { + mark_join_flags.resize(hash_table_ctx.hash_table->size(), 0); } // mark column is nullable diff --git a/regression-test/data/query_p0/join/mark_join/right_semi_mark_join.out b/regression-test/data/query_p0/join/mark_join/right_semi_mark_join.out new file mode 100644 index 00000000000000..e00e19be11e07e --- /dev/null +++ b/regression-test/data/query_p0/join/mark_join/right_semi_mark_join.out @@ -0,0 +1,63 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !test -- +\N \N +0 18332 +1 \N +1 \N +1 \N +2 -56 +3 72 +4 -5581 +5 -62 +5 -62 +6 22979 +6 22979 +6 22979 +6 22979 +6 22979 +6 22979 +6 22979 +6 22979 +7 -41 +7 -41 +7 -41 +7 -41 +8 -54 +9 -6236 +9 -6236 +9 -6236 +9 -6236 +9 -6236 +9 -6236 +10 \N +10 \N +10 \N +10 \N +10 \N +10 \N +10 \N +10 \N +10 \N +11 \N +11 \N +11 \N +12 \N +12 \N +12 \N +12 \N +12 \N +12 \N +12 \N +13 -2343514 +13 -2343514 +13 -2343514 +13 -2343514 +13 -2343514 +13 -2343514 +14 -3361960 +14 -3361960 +14 -3361960 +14 -3361960 +14 -3361960 +14 -3361960 + diff --git a/regression-test/suites/query_p0/join/mark_join/right_semi_mark_join.groovy b/regression-test/suites/query_p0/join/mark_join/right_semi_mark_join.groovy new file mode 100644 index 00000000000000..3557475cdd22c2 --- /dev/null +++ b/regression-test/suites/query_p0/join/mark_join/right_semi_mark_join.groovy @@ -0,0 +1,104 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + + +suite("right_semi_mark_join") { + sql "drop table if exists tbl1;" + sql "drop table if exists tbl2;" + sql "drop table if exists tbl3;" + + sql """ + create table tbl1 (pk int, col1 bigint, col2 bigint) engine = olap DUPLICATE KEY(pk) distributed by hash(pk) buckets 10 properties("replication_num" = "1"); + """ + + sql """ + insert into + tbl1(pk, col1, col2) + values + (0, null, 18332), (1, 788547, null), (2, 4644959, -56), (3, 8364628, 72), (4, null, -5581), + (5, 2344024, -62), (6, -2689177, 22979), (7, 1320, -41), (8, null, -54), (9, 12, -6236), + (10, -8321648, null), (11, 153691, null), (12, -8056, null), (13, -12, -2343514), (14, -35, -3361960); + """ + + sql """ + create table tbl2 ( + pk int, col1 bigint, col2 bigint + ) engine = olap + distributed by hash(pk) buckets 4 + properties("replication_num" = "1"); + """ + + sql """ + insert into + tbl2(pk, col1, col2) + values + (0, 108, 31161), (1, 1479175, 6764263), (2, 110, 25), (3, 110, -18656), (4, null, -51), + (5, 21, 27), (6, -6950217, 1585978), (7, null, null), (8, null, 3453467), (9, null, -6701140); + """ + + sql """ + create table tbl3 ( + pk int, col1 bigint, col2 bigint, col3 bigint + ) engine = olap + DUPLICATE KEY(pk) distributed by hash(pk) buckets 10 + properties("replication_num" = "1"); + """ + + sql """ + insert into + tbl3(pk, col1, col2) + values + (0, 55, -58), (1, 49, 29792), (2, 95, 32361), (3, 31243, -27428), (4, -27400, null), + (5, 31243, null), (6, null, -27428), (7, null, 7), (8, 31243, -21951), (9, 13186, 24466), + (10, null, -8), (11, null, null), (12, -18, 32361), (13, null, -18), (14, 21681, 14079), + (15, 31241, -17653), (16, 5825, 13559), (17, null, -10508), (18, null, 20682), (19, 31243, -98), + (73, -32480, 24424), (74, 31, -27428), (75, 31243, -718), (76, null, 20822), (77, 31243, -27428), + (78, -15934, null), (79, 78, -27428), (80, 8572, -27428), (81, 31243, 4077), (82, null, 114), + (83, 10, -71), (84, -32489, 32361), (85, null, null), (86, -22984, 32361), (87, 26607, -27428), + (5, 31243, null), (6, null, -27428), (7, null, 7), (8, 31243, -21951), (9, 13186, 24466), + (10, null, -8), (11, null, null), (12, -18, 32361), (13, null, -18), (14, 21681, 14079), + (15, 31241, -17653), (16, 5825, 13559), (17, null, -10508), (18, null, 20682), (19, 31243, -98), + (73, -32480, 24424), (74, 31, -27428), (75, 31243, -718), (76, null, 20822), (77, 31243, -27428), + (78, -15934, null), (79, 78, -27428), (80, 8572, -27428), (81, 31243, 4077), (82, null, 114), + (83, 10, -71), (84, -32489, 32361), (85, null, null), (86, -22984, 32361), (87, 26607, -27428), + (10, null, -8), (11, null, null), (12, -18, 32361), (13, null, -18), (14, 21681, 14079), + (15, 31241, -17653), (16, 5825, 13559), (17, null, -10508), (18, null, 20682), (19, 31243, -98), + (73, -32480, 24424), (74, 31, -27428), (75, 31243, -718), (76, null, 20822), (77, 31243, -27428), + (78, -15934, null), (79, 78, -27428), (80, 8572, -27428), (81, 31243, 4077), (82, null, 114), + (83, 10, -71), (84, -32489, 32361), (85, null, null), (86, -22984, 32361), (87, 26607, -27428); + """ + + qt_test """ + SELECT + T1.pk AS C1, + T1.col2 AS C2 + FROM + tbl1 AS T1 FULL + OUTER JOIN tbl2 AS T2 ON T1.col1 <= T2.col2 + OR T2.col1 IN ( + SELECT + T3.col2 + FROM + tbl3 AS T3 + WHERE + T2.col2 = T3.col1 + ) + ORDER BY + C1, + C2 DESC; + """ +} \ No newline at end of file