Skip to content

Commit

Permalink
[fix](join) incorrect result of left semi/anti join with empty build …
Browse files Browse the repository at this point in the history
…side
  • Loading branch information
mrhhsg committed Dec 22, 2023
1 parent b1c5747 commit 82a270d
Show file tree
Hide file tree
Showing 3 changed files with 58 additions and 4 deletions.
37 changes: 37 additions & 0 deletions be/src/vec/common/hash_table/hash_map.h
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,9 @@ class JoinHashMapTable : public HashMapTable<Key, Cell, Hash, Grower, Allocator>
template <int JoinOpType>
void prepare_build(size_t num_elem, int batch_size, bool has_null_key) {
_has_null_key = has_null_key;

// the first row in build side is not really from build side table
_empty_build_side = num_elem <= 1;
max_batch_size = batch_size;
bucket_size = calc_bucket_size(num_elem + 1);
first.resize(bucket_size + 1);
Expand Down Expand Up @@ -262,6 +265,15 @@ class JoinHashMapTable : public HashMapTable<Key, Cell, Hash, Grower, Allocator>
uint32_t* __restrict probe_idxs, bool& probe_visited,
uint32_t* __restrict build_idxs,
doris::vectorized::ColumnFilterHelper* mark_column) {
if constexpr (JoinOpType == doris::TJoinOp::LEFT_ANTI_JOIN ||
JoinOpType == doris::TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN) {
if (_empty_build_side) {
return _process_left_anti_join_for_empty_build_side<
JoinOpType, with_other_conjuncts, is_mark_join>(
probe_idx, probe_rows, probe_idxs, build_idxs, mark_column);
}
}

if constexpr (is_mark_join) {
return _find_batch_mark<JoinOpType, with_other_conjuncts>(
keys, build_idx_map, probe_idx, probe_rows, probe_idxs, build_idxs,
Expand Down Expand Up @@ -367,6 +379,30 @@ class JoinHashMapTable : public HashMapTable<Key, Cell, Hash, Grower, Allocator>
return std::tuple {probe_idx, 0U, matched_cnt};
}

template <int JoinOpType, bool with_other_conjuncts, bool is_mark_join>
auto _process_left_anti_join_for_empty_build_side(
int probe_idx, int probe_rows, uint32_t* __restrict probe_idxs,
uint32_t* __restrict build_idxs, doris::vectorized::ColumnFilterHelper* mark_column) {
static_assert(JoinOpType == doris::TJoinOp::LEFT_ANTI_JOIN ||
JoinOpType == doris::TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN);
auto matched_cnt = 0;
const auto batch_size = max_batch_size;

while (probe_idx < probe_rows && matched_cnt < batch_size) {
probe_idxs[matched_cnt] = probe_idx++;
if constexpr (is_mark_join) {
build_idxs[matched_cnt] = 0;
}
++matched_cnt;
}

if constexpr (is_mark_join && !with_other_conjuncts) {
mark_column->resize_fill(matched_cnt, 1);
}

return std::tuple {probe_idx, 0U, matched_cnt};
}

auto _find_batch_right_semi_anti(const Key* __restrict keys,
const uint32_t* __restrict build_idx_map, int probe_idx,
int probe_rows) {
Expand Down Expand Up @@ -532,6 +568,7 @@ class JoinHashMapTable : public HashMapTable<Key, Cell, Hash, Grower, Allocator>
Cell cell;
doris::vectorized::Arena* pool;
bool _has_null_key = false;
bool _empty_build_side = true;
};

template <typename Key, typename Mapped, typename Hash = DefaultHash<Key>,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,10 @@

-- !select --

-- !anti_emtpy_right --
\N
1
3

-- !semi_emtpy_right --

Original file line number Diff line number Diff line change
Expand Up @@ -60,11 +60,21 @@ suite("test_null_aware_left_anti_join") {
sql """ set parallel_pipeline_task_num=2; """
qt_select """ select ${tableName2}.k1 from ${tableName2} where k1 not in (select ${tableName1}.k1 from ${tableName1}) order by ${tableName2}.k1; """

sql """
drop table if exists ${tableName2};
// In left anti join, if right side is empty, all rows(null included) of left should be output.
qt_anti_emtpy_right """
select
*
from ${tableName1} t1 where k1 not in (
select k1 from ${tableName2} t2 where t2.k1 > 2
) order by 1;
"""

sql """
drop table if exists ${tableName1};
// In left semi join, if right side is empty, no row should be output.
qt_semi_emtpy_right """
select
*
from ${tableName1} t1 where k1 in (
select k1 from ${tableName2} t2 where t2.k1 > 2
) order by 1;
"""
}

0 comments on commit 82a270d

Please sign in to comment.