Skip to content

Commit

Permalink
[vec][opt] opt hash join build resize hash table before insert data (#…
Browse files Browse the repository at this point in the history
…9735)

Co-authored-by: lihaopeng <lihaopeng@baidu.com>
  • Loading branch information
HappenLee and lihaopeng authored May 23, 2022
1 parent fdd5bc0 commit 5039ec4
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 2 deletions.
6 changes: 6 additions & 0 deletions be/src/vec/common/hash_table/hash_table.h
Original file line number Diff line number Diff line change
Expand Up @@ -732,6 +732,12 @@ class HashTable : private boost::noncopyable,
}

public:
void expanse_for_add_elem(size_t num_elem) {
if (add_elem_size_overflow(num_elem)) {
resize(grower.buf_size() + num_elem);
}
}

/// Insert a value. In the case of any more complex values, it is better to use the `emplace` function.
std::pair<LookupResult, bool> ALWAYS_INLINE insert(const value_type& x) {
std::pair<LookupResult, bool> res;
Expand Down
8 changes: 6 additions & 2 deletions be/src/vec/exec/join/vhash_join_node.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,10 @@ struct ProcessHashTableBuild {
KeyGetter key_getter(_build_raw_ptrs, _join_node->_build_key_sz, nullptr);

SCOPED_TIMER(_join_node->_build_table_insert_timer);
// only not build_unique, we need expanse hash table before insert data
if constexpr (!build_unique) {
hash_table_ctx.hash_table.expanse_for_add_elem(_rows);
}
hash_table_ctx.hash_table.reset_resize_timer();

vector<int>& inserted_rows = _join_node->_inserted_rows[&_acquired_block];
Expand Down Expand Up @@ -1002,7 +1006,7 @@ Status HashJoinNode::_hash_table_build(RuntimeState* state) {

// make one block for each 4 gigabytes
constexpr static auto BUILD_BLOCK_MAX_SIZE = 4 * 1024UL * 1024UL * 1024UL;
if (_mem_used - last_mem_used > BUILD_BLOCK_MAX_SIZE) {
if (UNLIKELY(_mem_used - last_mem_used > BUILD_BLOCK_MAX_SIZE)) {
_build_blocks.emplace_back(mutable_block.to_block());
// TODO:: Rethink may we should do the proess after we recevie all build blocks ?
// which is better.
Expand Down Expand Up @@ -1118,7 +1122,7 @@ Status HashJoinNode::extract_probe_join_column(Block& block, NullMap& null_map,
Status HashJoinNode::_process_build_block(RuntimeState* state, Block& block, uint8_t offset) {
SCOPED_TIMER(_build_table_timer);
size_t rows = block.rows();
if (rows == 0) {
if (UNLIKELY(rows == 0)) {
return Status::OK();
}
COUNTER_UPDATE(_build_rows_counter, rows);
Expand Down

0 comments on commit 5039ec4

Please sign in to comment.