diff --git a/be/src/pipeline/exec/hashjoin_probe_operator.cpp b/be/src/pipeline/exec/hashjoin_probe_operator.cpp index a98c72157712b3..85280c8295cae8 100644 --- a/be/src/pipeline/exec/hashjoin_probe_operator.cpp +++ b/be/src/pipeline/exec/hashjoin_probe_operator.cpp @@ -78,6 +78,7 @@ Status HashJoinProbeLocalState::open(RuntimeState* state) { _process_hashtable_ctx_variants = std::make_unique(); auto& p = _parent->cast(); + _right_col_idx = p._right_col_idx; std::visit( [&](auto&& join_op_variants, auto have_other_join_conjunct) { using JoinOpType = std::decay_t; @@ -536,7 +537,7 @@ Status HashJoinProbeOperatorX::init(const TPlanNode& tnode, RuntimeState* state) const bool probe_dispose_null = _match_all_probe || _build_unique || _join_op == TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN || _join_op == TJoinOp::NULL_AWARE_LEFT_SEMI_JOIN || _join_op == TJoinOp::LEFT_ANTI_JOIN || - _join_op == TJoinOp::LEFT_SEMI_JOIN; + _join_op == TJoinOp::LEFT_SEMI_JOIN || _is_mark_join; const std::vector& eq_join_conjuncts = tnode.hash_join_node.eq_join_conjuncts; std::vector probe_not_ignore_null(eq_join_conjuncts.size()); size_t conjuncts_index = 0; @@ -668,14 +669,34 @@ Status HashJoinProbeOperatorX::prepare(RuntimeState* state) { } } - const int right_col_idx = - (_is_right_semi_anti && !_have_other_join_conjunct) ? 0 : _left_table_data_types.size(); + _right_col_idx = (_is_right_semi_anti && !_have_other_join_conjunct && + (!_is_mark_join || _mark_join_conjuncts.empty())) + ? 0 + : _left_table_data_types.size(); size_t idx = 0; for (const auto* slot : slots_to_check) { auto data_type = slot->get_data_type_ptr(); - const auto slot_on_left = idx < right_col_idx; + const auto slot_on_left = idx < _right_col_idx; + + if (slot_on_left) { + if (idx >= _left_table_data_types.size()) { + return Status::InternalError( + "Join node(id={}, OP={}) intermediate slot({}, #{})'s on left table " + "idx out bound of _left_table_data_types: {} vs {}", + _node_id, _join_op, slot->col_name(), slot->id(), idx, + _left_table_data_types.size()); + } + } else if (idx - _right_col_idx >= _right_table_data_types.size()) { + return Status::InternalError( + "Join node(id={}, OP={}) intermediate slot({}, #{})'s on right table " + "idx out bound of _right_table_data_types: {} vs {}(idx = {}, _right_col_idx = " + "{})", + _node_id, _join_op, slot->col_name(), slot->id(), idx - _right_col_idx, + _right_table_data_types.size(), idx, _right_col_idx); + } + auto target_data_type = slot_on_left ? _left_table_data_types[idx] - : _right_table_data_types[idx - right_col_idx]; + : _right_table_data_types[idx - _right_col_idx]; ++idx; if (data_type->equals(*target_data_type)) { continue; diff --git a/be/src/pipeline/exec/hashjoin_probe_operator.h b/be/src/pipeline/exec/hashjoin_probe_operator.h index 3c75a8d10dea8b..7b73b3bf321faa 100644 --- a/be/src/pipeline/exec/hashjoin_probe_operator.h +++ b/be/src/pipeline/exec/hashjoin_probe_operator.h @@ -138,6 +138,9 @@ class HashJoinProbeLocalState final std::unique_ptr _process_hashtable_ctx_variants = std::make_unique(); + // Index of column(slot) from right table in the `_intermediate_row_desc`. + size_t _right_col_idx; + RuntimeProfile::Counter* _probe_expr_call_timer = nullptr; RuntimeProfile::Counter* _probe_next_timer = nullptr; RuntimeProfile::Counter* _probe_side_output_timer = nullptr; @@ -213,6 +216,9 @@ class HashJoinProbeOperatorX final : public JoinProbeOperatorX _right_output_slot_flags; std::vector _right_table_column_names; const std::vector _partition_exprs; + + // Index of column(slot) from right table in the `_intermediate_row_desc`. + size_t _right_col_idx; }; } // namespace pipeline diff --git a/be/src/vec/common/hash_table/join_hash_table.h b/be/src/vec/common/hash_table/join_hash_table.h index 317987541cdbe1..88695ef9b43119 100644 --- a/be/src/vec/common/hash_table/join_hash_table.h +++ b/be/src/vec/common/hash_table/join_hash_table.h @@ -101,24 +101,23 @@ class JoinHashTable { } } - if constexpr (with_other_conjuncts || - (is_mark_join && JoinOpType != TJoinOp::RIGHT_SEMI_JOIN)) { - if constexpr (!with_other_conjuncts) { - constexpr bool is_null_aware_join = - JoinOpType == TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN || - JoinOpType == TJoinOp::NULL_AWARE_LEFT_SEMI_JOIN; - constexpr bool is_left_half_join = JoinOpType == TJoinOp::LEFT_SEMI_JOIN || - JoinOpType == TJoinOp::LEFT_ANTI_JOIN; - - /// For null aware join or left half(semi/anti) join without other conjuncts and without - /// mark join conjunct. - /// If one row on probe side has one match in build side, we should stop searching the - /// hash table for this row. - if (is_null_aware_join || (is_left_half_join && !has_mark_join_conjunct)) { - return _find_batch_conjunct( - keys, build_idx_map, probe_idx, build_idx, probe_rows, probe_idxs, - build_idxs); - } + if constexpr (with_other_conjuncts) { + return _find_batch_conjunct( + keys, build_idx_map, probe_idx, build_idx, probe_rows, probe_idxs, build_idxs); + } else if constexpr (is_mark_join) { + constexpr bool is_null_aware_join = JoinOpType == TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN || + JoinOpType == TJoinOp::NULL_AWARE_LEFT_SEMI_JOIN; + constexpr bool is_left_half_join = + JoinOpType == TJoinOp::LEFT_SEMI_JOIN || JoinOpType == TJoinOp::LEFT_ANTI_JOIN; + + /// For null aware join or left half(semi/anti) join without other conjuncts and without + /// mark join conjunct. + /// If one row on probe side has one match in build side, we should stop searching the + /// hash table for this row. + if (is_null_aware_join || (is_left_half_join && !has_mark_join_conjunct)) { + return _find_batch_conjunct( + keys, build_idx_map, probe_idx, build_idx, probe_rows, probe_idxs, + build_idxs); } return _find_batch_conjunct( @@ -339,14 +338,7 @@ class JoinHashTable { auto do_the_probe = [&]() { while (build_idx && matched_cnt < batch_size) { - if constexpr (JoinOpType == TJoinOp::RIGHT_ANTI_JOIN || - JoinOpType == TJoinOp::RIGHT_SEMI_JOIN) { - if (!visited[build_idx] && keys[probe_idx] == build_keys[build_idx]) { - probe_idxs[matched_cnt] = probe_idx; - build_idxs[matched_cnt] = build_idx; - matched_cnt++; - } - } else if constexpr (need_judge_null) { + if constexpr (need_judge_null) { if (build_idx == bucket_size) { build_idxs[matched_cnt] = build_idx; probe_idxs[matched_cnt] = probe_idx; diff --git a/be/src/vec/exec/join/process_hash_table_probe.h b/be/src/vec/exec/join/process_hash_table_probe.h index ee46197617a92b..0228310390c81d 100644 --- a/be/src/vec/exec/join/process_hash_table_probe.h +++ b/be/src/vec/exec/join/process_hash_table_probe.h @@ -23,6 +23,7 @@ #include "vec/columns/column.h" #include "vec/columns/columns_number.h" #include "vec/common/arena.h" +#include "vec/common/custom_allocator.h" namespace doris { namespace vectorized { @@ -132,8 +133,15 @@ struct ProcessHashTableProbe { RuntimeProfile::Counter* _probe_side_output_timer = nullptr; RuntimeProfile::Counter* _probe_process_hashtable_timer = nullptr; - int _right_col_idx; + // See `HashJoinProbeOperatorX::_right_col_idx` + const int _right_col_idx; + int _right_col_len; + + // For right semi with mark join conjunct, we need to store the mark join flags + // in the hash table. + // -1 means null, 0 means false, 1 means true + DorisVector mark_join_flags; }; } // namespace vectorized diff --git a/be/src/vec/exec/join/process_hash_table_probe_impl.h b/be/src/vec/exec/join/process_hash_table_probe_impl.h index 9a15df637aaf60..519239ff172ee6 100644 --- a/be/src/vec/exec/join/process_hash_table_probe_impl.h +++ b/be/src/vec/exec/join/process_hash_table_probe_impl.h @@ -57,9 +57,7 @@ ProcessHashTableProbe::ProcessHashTableProbe(Parent* parent, _build_side_output_timer(parent->_build_side_output_timer), _probe_side_output_timer(parent->_probe_side_output_timer), _probe_process_hashtable_timer(parent->_probe_process_hashtable_timer), - _right_col_idx((_is_right_semi_anti && !_have_other_join_conjunct) - ? 0 - : _parent->left_table_data_types().size()), + _right_col_idx(parent->_right_col_idx), _right_col_len(_parent->right_table_data_types().size()) {} template @@ -67,11 +65,10 @@ void ProcessHashTableProbe::build_side_output_column( MutableColumns& mcol, const std::vector& output_slot_flags, int size, bool have_other_join_conjunct, bool is_mark_join) { SCOPED_TIMER(_build_side_output_timer); - constexpr auto is_semi_anti_join = JoinOpType == TJoinOp::RIGHT_ANTI_JOIN || - JoinOpType == TJoinOp::RIGHT_SEMI_JOIN || - JoinOpType == TJoinOp::LEFT_ANTI_JOIN || + constexpr auto is_semi_anti_join = JoinOpType == TJoinOp::LEFT_ANTI_JOIN || + JoinOpType == TJoinOp::LEFT_SEMI_JOIN || JoinOpType == TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN || - JoinOpType == TJoinOp::LEFT_SEMI_JOIN; + JoinOpType == TJoinOp::NULL_AWARE_LEFT_SEMI_JOIN; constexpr auto probe_all = JoinOpType == TJoinOp::LEFT_OUTER_JOIN || JoinOpType == TJoinOp::FULL_OUTER_JOIN; @@ -209,7 +206,7 @@ Status ProcessHashTableProbe::do_process(HashTableType& hash (JoinOpType == doris::TJoinOp::LEFT_ANTI_JOIN || JoinOpType == doris::TJoinOp::LEFT_SEMI_JOIN || JoinOpType == doris::TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN || - (is_mark_join && JoinOpType != doris::TJoinOp::RIGHT_SEMI_JOIN))); + (is_mark_join))); } auto& mcol = mutable_block.mutable_columns(); @@ -268,8 +265,8 @@ Status ProcessHashTableProbe::do_process(HashTableType& hash build_side_output_column(mcol, *_right_output_slot_flags, current_offset, with_other_conjuncts, is_mark_join); - if constexpr (with_other_conjuncts || (JoinOpType != TJoinOp::RIGHT_SEMI_JOIN && - JoinOpType != TJoinOp::RIGHT_ANTI_JOIN)) { + if (with_other_conjuncts || !_parent->_mark_join_conjuncts.empty() || + (JoinOpType != TJoinOp::RIGHT_SEMI_JOIN && JoinOpType != TJoinOp::RIGHT_ANTI_JOIN)) { auto check_all_match_one = [](const std::vector& vecs, uint32_t probe_idx, int size) { if (!size || vecs[0] != probe_idx || vecs[size - 1] != probe_idx + size - 1) { @@ -291,7 +288,7 @@ Status ProcessHashTableProbe::do_process(HashTableType& hash output_block->swap(mutable_block.to_block()); - if constexpr (is_mark_join && JoinOpType != TJoinOp::RIGHT_SEMI_JOIN) { + if constexpr (is_mark_join) { return do_mark_join_conjuncts( output_block, hash_table_ctx.hash_table->get_bucket_size()); } else if constexpr (with_other_conjuncts) { @@ -363,21 +360,31 @@ template template Status ProcessHashTableProbe::do_mark_join_conjuncts( Block* output_block, size_t hash_table_bucket_size) { - DCHECK(JoinOpType == TJoinOp::LEFT_ANTI_JOIN || - JoinOpType == TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN || - JoinOpType == TJoinOp::LEFT_SEMI_JOIN || - JoinOpType == TJoinOp::NULL_AWARE_LEFT_SEMI_JOIN); + if (JoinOpType != TJoinOp::LEFT_ANTI_JOIN && JoinOpType != TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN && + JoinOpType != TJoinOp::LEFT_SEMI_JOIN && JoinOpType != TJoinOp::NULL_AWARE_LEFT_SEMI_JOIN && + JoinOpType != TJoinOp::RIGHT_SEMI_JOIN && JoinOpType != TJoinOp::RIGHT_ANTI_JOIN) { + return Status::InternalError("join type {} is not supported", JoinOpType); + } constexpr bool is_anti_join = JoinOpType == TJoinOp::LEFT_ANTI_JOIN || - JoinOpType == TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN; + JoinOpType == TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN || + JoinOpType == TJoinOp::RIGHT_ANTI_JOIN; constexpr bool is_null_aware_join = JoinOpType == TJoinOp::NULL_AWARE_LEFT_SEMI_JOIN || JoinOpType == TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN; + constexpr bool is_right_half_join = + JoinOpType == TJoinOp::RIGHT_SEMI_JOIN || JoinOpType == TJoinOp::RIGHT_ANTI_JOIN; const auto row_count = output_block->rows(); if (!row_count) { return Status::OK(); } + if constexpr (is_right_half_join) { + if (mark_join_flags.empty() && _build_block != nullptr) { + mark_join_flags.resize(_build_block->rows(), 0); + } + } + auto mark_column_mutable = output_block->get_by_position(_parent->_mark_column_id).column->assume_mutable(); auto& mark_column = assert_cast(*mark_column_mutable); @@ -455,36 +462,70 @@ Status ProcessHashTableProbe::do_mark_join_conjuncts( const bool should_be_null_if_build_side_has_null = *_has_null_in_build_side && is_null_aware_join && !with_other_conjuncts; for (size_t i = 0; i != row_count; ++i) { - bool not_matched_before = _parent->_last_probe_match != _probe_indexs[i]; - if (_build_indexs[i] == 0) { - bool has_null_mark_value = _parent->_last_probe_null_mark == _probe_indexs[i]; - if (not_matched_before) { - filter_map[i] = true; - mark_null_map[i] = has_null_mark_value || should_be_null_if_build_side_has_null; - mark_filter_data[i] = false; + if constexpr (is_right_half_join) { + const auto& build_index = _build_indexs[i]; + if (build_index == 0) { + continue; + } + + if (mark_join_flags[build_index] == 1) { + continue; + } + + if (mark_null_map[i]) { + mark_join_flags[build_index] = -1; + } else if (mark_filter_data[i]) { + mark_join_flags[build_index] = 1; } } else { - if (mark_null_map[i]) { // is null - _parent->_last_probe_null_mark = _probe_indexs[i]; - } else { - if (mark_filter_data[i] && not_matched_before) { - _parent->_last_probe_match = _probe_indexs[i]; + bool not_matched_before = _parent->_last_probe_match != _probe_indexs[i]; + if (_build_indexs[i] == 0) { + bool has_null_mark_value = _parent->_last_probe_null_mark == _probe_indexs[i]; + if (not_matched_before) { filter_map[i] = true; + mark_null_map[i] = has_null_mark_value || should_be_null_if_build_side_has_null; + mark_filter_data[i] = false; + } + } else { + if (mark_null_map[i]) { // is null + _parent->_last_probe_null_mark = _probe_indexs[i]; + } else { + if (mark_filter_data[i] && not_matched_before) { + _parent->_last_probe_match = _probe_indexs[i]; + filter_map[i] = true; + } } } } } - if constexpr (is_anti_join) { - // flip the mark column - for (size_t i = 0; i != row_count; ++i) { - mark_filter_data[i] ^= 1; // not null/ null + if constexpr (is_right_half_join) { + if constexpr (is_anti_join) { + // flip the mark column + for (size_t i = 0; i != row_count; ++i) { + if (mark_join_flags[i] == -1) { + // -1 means null. + continue; + } + + mark_join_flags[i] ^= 1; + } + } + // For right semi/anti join, no rows will be output in probe phase. + output_block->swap(vectorized::Block()); + return Status::OK(); + } else { + if constexpr (is_anti_join) { + // flip the mark column + for (size_t i = 0; i != row_count; ++i) { + mark_filter_data[i] ^= 1; // not null/ null + } } - } - auto result_column_id = output_block->columns(); - output_block->insert({std::move(filter_column), std::make_shared(), ""}); - return Block::filter_block(output_block, result_column_id, result_column_id); + auto result_column_id = output_block->columns(); + output_block->insert({std::move(filter_column), std::make_shared(), ""}); + return Block::filter_block(output_block, result_column_id, result_column_id); + } } template @@ -638,8 +679,31 @@ Status ProcessHashTableProbe::process_data_in_hashtable( _build_indexs.data() + block_size); } + if constexpr (JoinOpType == TJoinOp::RIGHT_ANTI_JOIN || + JoinOpType == TJoinOp::RIGHT_SEMI_JOIN) { + if (is_mark_join) { + if (mark_join_flags.empty() && _build_block != nullptr) { + mark_join_flags.resize(_build_block->rows(), 0); + } + + // mark column is nullable + auto* mark_column = assert_cast( + mcol[_parent->_mark_column_id].get()); + mark_column->resize(block_size); + auto* null_map = mark_column->get_null_map_data().data(); + auto* data = assert_cast(mark_column->get_nested_column()) + .get_data() + .data(); + for (size_t i = 0; i != block_size; ++i) { + const auto build_index = _build_indexs[i]; + null_map[i] = mark_join_flags[build_index] == -1; + data[i] = mark_join_flags[build_index] == 1; + } + } + } + // just resize the left table column in case with other conjunct to make block size is not zero - if (_is_right_semi_anti && _have_other_join_conjunct) { + if (_is_right_semi_anti && _right_col_idx != 0) { for (int i = 0; i < _right_col_idx; ++i) { mcol[i]->resize(block_size); } diff --git a/be/src/vec/exec/join/vhash_join_node.cpp b/be/src/vec/exec/join/vhash_join_node.cpp index 9fec942d1619f9..410b5ddf7ed79c 100644 --- a/be/src/vec/exec/join/vhash_join_node.cpp +++ b/be/src/vec/exec/join/vhash_join_node.cpp @@ -292,6 +292,11 @@ Status HashJoinNode::prepare(RuntimeState* state) { _right_table_data_types = VectorizedUtils::get_data_types(child(1)->row_desc()); _left_table_data_types = VectorizedUtils::get_data_types(child(0)->row_desc()); _right_table_column_names = VectorizedUtils::get_column_names(child(1)->row_desc()); + + _right_col_idx = (_is_right_semi_anti && !_have_other_join_conjunct && + (!_is_mark_join || _mark_join_conjuncts.empty())) + ? 0 + : _left_table_data_types.size(); // Hash Table Init _hash_table_init(state); _construct_mutable_join_block(); diff --git a/be/src/vec/exec/join/vhash_join_node.h b/be/src/vec/exec/join/vhash_join_node.h index 5cfac9f9abaa70..74b1428fc8c025 100644 --- a/be/src/vec/exec/join/vhash_join_node.h +++ b/be/src/vec/exec/join/vhash_join_node.h @@ -358,6 +358,9 @@ class HashJoinNode final : public VJoinNodeBase { int64_t _build_side_last_mem_used = 0; MutableBlock _build_side_mutable_block; + // Index of column(slot) from right table in the `_intermediate_row_desc`. + size_t _right_col_idx; + SharedHashTableContextPtr _shared_hash_table_context = nullptr; Status _materialize_build_side(RuntimeState* state) override; diff --git a/regression-test/data/query_p0/join/mark_join/mark_join.out b/regression-test/data/query_p0/join/mark_join/mark_join.out index ed3575d0e14476..ea3b7ddd6a6d1a 100644 --- a/regression-test/data/query_p0/join/mark_join/mark_join.out +++ b/regression-test/data/query_p0/join/mark_join/mark_join.out @@ -17,3 +17,50 @@ 3 -3 \N c 3 3 \N c +-- !test_right_semi_mark_join -- +1 v1 o1 \N \N +2 v2 o2 \N \N +3 v3 o3 \N \N +4 v4 o4 \N \N +5 v5 o5 \N \N +6 v1 \N \N \N +7 v2 \N \N \N +8 v3 \N \N \N +9 v4 \N \N \N +10 v5 \N \N \N + +-- !test_right_semi_mark_join_2 -- +1 v1 o1 \N \N +2 v2 o2 \N \N +3 v3 o3 \N \N +4 v4 o4 \N \N +5 v5 o5 \N \N +6 v1 \N \N \N +7 v2 \N \N \N +8 v3 \N \N \N +9 v4 \N \N \N +10 v5 \N \N \N + +-- !test_right_semi_mark_join_no_null -- +1 v1 o1 false true +2 v2 o2 false true +3 v3 o3 false true +4 v4 o4 false true +5 v5 o5 false true +6 v1 \N \N \N +7 v2 \N \N \N +8 v3 \N \N \N +9 v4 \N \N \N +10 v5 \N \N \N + +-- !test_right_semi_mark_join_no_null_2 -- +1 v1 o1 false true +2 v2 o2 false true +3 v3 o3 false true +4 v4 o4 false true +5 v5 o5 false true +6 v1 \N \N \N +7 v2 \N \N \N +8 v3 \N \N \N +9 v4 \N \N \N +10 v5 \N \N \N \ No newline at end of file diff --git a/regression-test/suites/query_p0/join/mark_join/mark_join.groovy b/regression-test/suites/query_p0/join/mark_join/mark_join.groovy index 9759a0e9b4cd70..0292fd4ae30780 100644 --- a/regression-test/suites/query_p0/join/mark_join/mark_join.groovy +++ b/regression-test/suites/query_p0/join/mark_join/mark_join.groovy @@ -61,4 +61,218 @@ suite("mark_join") { qt_test """ select * from t1 where t1.k1 not in (select t2.k3 from t2 where t2.k2 = t1.k2) or k1 < 10 order by k1, k2; """ + + sql "drop table if exists tbl1;" + sql "drop table if exists tbl2;" + sql "drop table if exists tbl3;" + + sql """ + CREATE TABLE `tbl1` ( + `unit_name` varchar(1080) NULL, + `cur_unit_name` varchar(1080) NOT NULL + ) ENGINE=OLAP + DUPLICATE KEY(`unit_name`) + DISTRIBUTED BY RANDOM BUCKETS AUTO + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1" + ); + """ + + sql """ + CREATE TABLE `tbl2` ( + `org_code` varchar(150) NOT NULL , + `org_name` varchar(300) NULL + ) ENGINE=OLAP + DUPLICATE KEY(`org_code`) + DISTRIBUTED BY HASH(`org_code`) BUCKETS 4 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1" + ); + """ + + sql """ + CREATE TABLE `tbl3` ( + `id` bigint NOT NULL, + `acntm_name` varchar(500) NULL , + `vendor_name` varchar(500) NULL + ) ENGINE=OLAP + DUPLICATE KEY(`id`) + DISTRIBUTED BY HASH(`id`) BUCKETS AUTO + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1" + ); + """ + + sql """ + insert into tbl1 (unit_name, cur_unit_name) values + ('v1', 'o1'), + ('v2', 'o2'), + ('v3', 'o3'), + ('v4', 'o4'), + ('v5', 'o5'), + (null, 'o1'), + ('v1', 'o1'), + ('v2', 'o2'), + ('v3', 'o3'), + ('v4', 'o4'), + ('v5', 'o5'), + (null, 'o1'), + (null, 'o2'), + (null, 'o3'), + (null, 'o4'), + (null, 'o5'), + ('v1', 'o1'), + ('v2', 'o2'), + ('v3', 'o3'), + ('v4', 'o4'), + ('v5', 'o5'); + """ + + sql """ + insert into tbl2(org_code, org_name) values + ('v1', 'o1'), + ('v2', 'o2'), + ('v3', 'o3'), + ('v4', 'o4'), + ('v5', 'o5'), + ('v1', null), + ('v2', null), + ('v3', null), + ('v4', null), + ('v5', null); + """ + + sql """ + insert into tbl3 (id, vendor_name, acntm_name) + values(1, 'o1', 'v1'), + (2, 'o2', 'v2'), + (3, 'o3', 'v3'), + (4, 'o4', 'v4'), + (5, 'o5', 'v5'), + (6, null, 'v1'), + (7, null, 'v2'), + (8, null, 'v3'), + (9, null, 'v4'), + (10, null, 'v5'); + """ + + sql " analyze table tbl1 with sync;" + sql " analyze table tbl2 with sync;" + sql " analyze table tbl3 with sync;" + + sql "set disable_join_reorder=0;" + qt_test_right_semi_mark_join """ + select + tbl3.id, + tbl3.acntm_name, + tbl3.vendor_name, + tbl3.vendor_name in ( + select + tbl1.unit_name + from + tbl2 + join tbl1 on tbl1.cur_unit_name = tbl2.org_name + where + tbl2.org_code = tbl3.acntm_name + ) v1, + tbl3.vendor_name not in ( + select + tbl1.unit_name + from + tbl2 + join tbl1 on tbl1.cur_unit_name = tbl2.org_name + where + tbl2.org_code = tbl3.acntm_name + ) v2 + from + tbl3 order by 1,2,3,4,5; + """ + + sql "set disable_join_reorder=1;" + qt_test_right_semi_mark_join_2 """ + select + tbl3.id, + tbl3.acntm_name, + tbl3.vendor_name, + tbl3.vendor_name in ( + select + tbl1.unit_name + from + tbl2 + join tbl1 on tbl1.cur_unit_name = tbl2.org_name + where + tbl2.org_code = tbl3.acntm_name + ) v1, + tbl3.vendor_name not in ( + select + tbl1.unit_name + from + tbl2 + join tbl1 on tbl1.cur_unit_name = tbl2.org_name + where + tbl2.org_code = tbl3.acntm_name + ) v2 + from + tbl3 order by 1,2,3,4,5; + """ + + sql "set disable_join_reorder=0;" + qt_test_right_semi_mark_join_no_null """ + select + tbl3.id, + tbl3.acntm_name, + tbl3.vendor_name, + tbl3.vendor_name in ( + select + tbl1.unit_name + from + tbl2 + join tbl1 on tbl1.cur_unit_name = tbl2.org_name + where + tbl2.org_code = tbl3.acntm_name + and tbl1.unit_name is not null + ) v1, + tbl3.vendor_name not in ( + select + tbl1.unit_name + from + tbl2 + join tbl1 on tbl1.cur_unit_name = tbl2.org_name + where + tbl2.org_code = tbl3.acntm_name + and tbl1.unit_name is not null + ) v2 + from + tbl3 order by 1,2,3,4,5; + """ + + sql "set disable_join_reorder=1;" + qt_test_right_semi_mark_join_no_null_2 """ + select + tbl3.id, + tbl3.acntm_name, + tbl3.vendor_name, + tbl3.vendor_name in ( + select + tbl1.unit_name + from + tbl2 + join tbl1 on tbl1.cur_unit_name = tbl2.org_name + where + tbl2.org_code = tbl3.acntm_name + and tbl1.unit_name is not null + ) v1, + tbl3.vendor_name not in ( + select + tbl1.unit_name + from + tbl2 + join tbl1 on tbl1.cur_unit_name = tbl2.org_name + where + tbl2.org_code = tbl3.acntm_name + and tbl1.unit_name is not null + ) v2 + from + tbl3 order by 1,2,3,4,5; + """ }