Skip to content

Commit

Permalink
one level old hash; two level new hash
Browse files Browse the repository at this point in the history
Signed-off-by: guo-shaoge <shaoge1994@163.com>
  • Loading branch information
guo-shaoge committed Dec 4, 2024
1 parent 71b6ecd commit 40ceb08
Show file tree
Hide file tree
Showing 5 changed files with 44 additions and 30 deletions.
3 changes: 2 additions & 1 deletion dbms/src/Common/HashTable/HashTable.h
Original file line number Diff line number Diff line change
Expand Up @@ -1020,7 +1020,8 @@ class HashTable
}

/// Copy the cell from another hash table. It is assumed that the cell is not zero, and also that there was no such key in the table yet.
void ALWAYS_INLINE insertUniqueNonZero(const Cell * cell, size_t hash_value)
template <typename InsertCellType>
void ALWAYS_INLINE insertUniqueNonZero(const InsertCellType * cell, size_t hash_value)
{
size_t place_value = findEmptyCell(grower.place(hash_value));

Expand Down
9 changes: 5 additions & 4 deletions dbms/src/Common/HashTable/TwoLevelHashTable.h
Original file line number Diff line number Diff line change
Expand Up @@ -115,9 +115,9 @@ class TwoLevelHashTable : private boost::noncopyable

/// Copy the data from another (normal) hash table. It should have the same hash function.
template <typename Source>
explicit TwoLevelHashTable(const Source & src)
explicit TwoLevelHashTable(Source & src)
{
typename Source::const_iterator it = src.begin();
typename Source::iterator it = src.begin();

/// It is assumed that the zero key (stored separately) is first in iteration order.
if (it != src.end() && it.getPtr()->isZero(src))
Expand All @@ -128,8 +128,9 @@ class TwoLevelHashTable : private boost::noncopyable

for (; it != src.end(); ++it)
{
const Cell * cell = it.getPtr();
size_t hash_value = cell->getHash(src);
auto * cell = it.getPtr();
// size_t hash_value = cell->getHash(src);
size_t hash_value = Hash::operator()(cell->getKey());
size_t buck = getBucketFromHash(hash_value);
impls[buck].insertUniqueNonZero(cell, hash_value);
}
Expand Down
30 changes: 21 additions & 9 deletions dbms/src/Common/HashTable/TwoLevelStringHashTable.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,32 +65,40 @@ class TwoLevelStringHashTable : private boost::noncopyable
TwoLevelStringHashTable() = default;

template <typename Source>
explicit TwoLevelStringHashTable(const Source & src)
explicit TwoLevelStringHashTable(Source & src)
{
if (src.m0.hasZero())
impls[0].m0.setHasZero(*src.m0.zeroValue());

for (auto & v : src.m1)
{
size_t hash_value = v.getHash(src.m1);
// size_t hash_value = v.getHash(src.m1);
const size_t hash_value = ImplTable::T1::Hash::operator()(v.getKey());
v.setHash(hash_value);
size_t buck = getBucketFromHash(hash_value);
impls[buck].m1.insertUniqueNonZero(&v, hash_value);
}
for (auto & v : src.m2)
{
size_t hash_value = v.getHash(src.m2);
// size_t hash_value = v.getHash(src.m2);
const size_t hash_value = ImplTable::T2::Hash::operator()(v.getKey());
v.setHash(hash_value);
size_t buck = getBucketFromHash(hash_value);
impls[buck].m2.insertUniqueNonZero(&v, hash_value);
}
for (auto & v : src.m3)
{
size_t hash_value = v.getHash(src.m3);
// size_t hash_value = v.getHash(src.m3);
const size_t hash_value = ImplTable::T3::Hash::operator()(v.getKey());
v.setHash(hash_value);
size_t buck = getBucketFromHash(hash_value);
impls[buck].m3.insertUniqueNonZero(&v, hash_value);
}
for (auto & v : src.ms)
{
size_t hash_value = v.getHash(src.ms);
// size_t hash_value = v.getHash(src.ms);
const size_t hash_value = ImplTable::Ts::Hash::operator()(v.getKey());
v.setHash(hash_value);
size_t buck = getBucketFromHash(hash_value);
impls[buck].ms.insertUniqueNonZero(&v, hash_value);
}
Expand Down Expand Up @@ -296,7 +304,8 @@ struct StringHashTableSubMapSelector<0, true, Data>
template <typename Data>
struct StringHashTableSubMapSelector<1, true, Data>
{
using Hash = StringHashTableHash;
// using Hash = StringHashTableHash;
using Hash = HashWithMixSeed<StringKey8>;

static typename Data::Impl::T1 & getSubMap(size_t hashval, Data & data)
{
Expand All @@ -308,7 +317,8 @@ struct StringHashTableSubMapSelector<1, true, Data>
template <typename Data>
struct StringHashTableSubMapSelector<2, true, Data>
{
using Hash = StringHashTableHash;
// using Hash = StringHashTableHash;
using Hash = HashWithMixSeed<StringKey16>;

static typename Data::Impl::T2 & getSubMap(size_t hashval, Data & data)
{
Expand All @@ -320,7 +330,8 @@ struct StringHashTableSubMapSelector<2, true, Data>
template <typename Data>
struct StringHashTableSubMapSelector<3, true, Data>
{
using Hash = StringHashTableHash;
// using Hash = StringHashTableHash;
using Hash = HashWithMixSeed<StringKey24>;

static typename Data::Impl::T3 & getSubMap(size_t hashval, Data & data)
{
Expand All @@ -332,7 +343,8 @@ struct StringHashTableSubMapSelector<3, true, Data>
template <typename Data>
struct StringHashTableSubMapSelector<4, true, Data>
{
using Hash = StringHashTableHash;
// using Hash = StringHashTableHash;
using Hash = StringRefHash;

static typename Data::Impl::Ts & getSubMap(size_t hashval, Data & data)
{
Expand Down
30 changes: 15 additions & 15 deletions dbms/src/Interpreters/Aggregator.h
Original file line number Diff line number Diff line change
Expand Up @@ -88,16 +88,16 @@ using AggregatedDataWithInt256Key = HashMap<Int256, AggregateDataPtr, HashCRC32<
using AggregatedDataWithKeys128 = HashMap<UInt128, AggregateDataPtr, HashCRC32<UInt128>>;
using AggregatedDataWithKeys256 = HashMap<UInt256, AggregateDataPtr, HashCRC32<UInt256>>;

using AggregatedDataWithUInt32KeyTwoLevel = TwoLevelHashMap<UInt32, AggregateDataPtr, HashCRC32<UInt32>>;
using AggregatedDataWithUInt64KeyTwoLevel = TwoLevelHashMap<UInt64, AggregateDataPtr, HashCRC32<UInt64>>;
using AggregatedDataWithUInt32KeyTwoLevel = TwoLevelHashMap<UInt32, AggregateDataPtr, HashWithMixSeed<UInt32>>;
using AggregatedDataWithUInt64KeyTwoLevel = TwoLevelHashMap<UInt64, AggregateDataPtr, HashWithMixSeed<UInt64>>;

using AggregatedDataWithInt256KeyTwoLevel = TwoLevelHashMap<Int256, AggregateDataPtr, HashCRC32<Int256>>;
using AggregatedDataWithInt256KeyTwoLevel = TwoLevelHashMap<Int256, AggregateDataPtr, HashWithMixSeed<Int256>>;

using AggregatedDataWithShortStringKeyTwoLevel = TwoLevelStringHashMap<AggregateDataPtr>;
using AggregatedDataWithStringKeyTwoLevel = TwoLevelHashMapWithSavedHash<StringRef, AggregateDataPtr>;

using AggregatedDataWithKeys128TwoLevel = TwoLevelHashMap<UInt128, AggregateDataPtr, HashCRC32<UInt128>>;
using AggregatedDataWithKeys256TwoLevel = TwoLevelHashMap<UInt256, AggregateDataPtr, HashCRC32<UInt256>>;
using AggregatedDataWithKeys128TwoLevel = TwoLevelHashMap<UInt128, AggregateDataPtr, HashWithMixSeed<UInt128>>;
using AggregatedDataWithKeys256TwoLevel = TwoLevelHashMap<UInt256, AggregateDataPtr, HashWithMixSeed<UInt256>>;

/** Variants with better hash function, using more than 32 bits for hash.
* Using for merging phase of external aggregation, where number of keys may be far greater than 4 billion,
Expand Down Expand Up @@ -125,7 +125,7 @@ struct AggregationMethodOneNumber
AggregationMethodOneNumber() = default;

template <typename Other>
explicit AggregationMethodOneNumber(const Other & other)
explicit AggregationMethodOneNumber(Other & other)
: data(other.data)
{}

Expand Down Expand Up @@ -179,7 +179,7 @@ struct AggregationMethodString
AggregationMethodString() = default;

template <typename Other>
explicit AggregationMethodString(const Other & other)
explicit AggregationMethodString(Other & other)
: data(other.data)
{}

Expand Down Expand Up @@ -227,7 +227,7 @@ struct AggregationMethodStringNoCache
AggregationMethodStringNoCache() = default;

template <typename Other>
explicit AggregationMethodStringNoCache(const Other & other)
explicit AggregationMethodStringNoCache(Other & other)
: data(other.data)
{}

Expand Down Expand Up @@ -275,7 +275,7 @@ struct AggregationMethodOneKeyStringNoCache
AggregationMethodOneKeyStringNoCache() = default;

template <typename Other>
explicit AggregationMethodOneKeyStringNoCache(const Other & other)
explicit AggregationMethodOneKeyStringNoCache(Other & other)
: data(other.data)
{}

Expand Down Expand Up @@ -325,7 +325,7 @@ struct AggregationMethodMultiStringNoCache
AggregationMethodMultiStringNoCache() = default;
template <typename Other>
explicit AggregationMethodMultiStringNoCache(const Other & other)
explicit AggregationMethodMultiStringNoCache(Other & other)
: data(other.data)
{}
Expand Down Expand Up @@ -355,7 +355,7 @@ struct AggregationMethodFastPathTwoKeysNoCache
AggregationMethodFastPathTwoKeysNoCache() = default;

template <typename Other>
explicit AggregationMethodFastPathTwoKeysNoCache(const Other & other)
explicit AggregationMethodFastPathTwoKeysNoCache(Other & other)
: data(other.data)
{}

Expand Down Expand Up @@ -475,7 +475,7 @@ struct AggregationMethodFixedString
AggregationMethodFixedString() = default;

template <typename Other>
explicit AggregationMethodFixedString(const Other & other)
explicit AggregationMethodFixedString(Other & other)
: data(other.data)
{}

Expand Down Expand Up @@ -523,7 +523,7 @@ struct AggregationMethodFixedStringNoCache
AggregationMethodFixedStringNoCache() = default;

template <typename Other>
explicit AggregationMethodFixedStringNoCache(const Other & other)
explicit AggregationMethodFixedStringNoCache(Other & other)
: data(other.data)
{}

Expand Down Expand Up @@ -572,7 +572,7 @@ struct AggregationMethodKeysFixed
AggregationMethodKeysFixed() = default;

template <typename Other>
explicit AggregationMethodKeysFixed(const Other & other)
explicit AggregationMethodKeysFixed(Other & other)
: data(other.data)
{}

Expand Down Expand Up @@ -679,7 +679,7 @@ struct AggregationMethodSerialized
AggregationMethodSerialized() = default;

template <typename Other>
explicit AggregationMethodSerialized(const Other & other)
explicit AggregationMethodSerialized(Other & other)
: data(other.data)
{}

Expand Down
2 changes: 1 addition & 1 deletion dbms/src/Interpreters/Settings.h
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ struct Settings
M(SettingLoadBalancing, load_balancing, LoadBalancing::RANDOM, "Which replicas (among healthy replicas) to preferably send a query to (on the first attempt) for distributed processing.") \
\
M(SettingUInt64, group_by_two_level_threshold, 100000, "From what number of keys, a two-level aggregation starts. 0 - the threshold is not set.") \
M(SettingUInt64, group_by_two_level_threshold_bytes, 100000000, "From what size of the aggregation state in bytes, a two-level aggregation begins to be used. 0 - the threshold is not set. " \
M(SettingUInt64, group_by_two_level_threshold_bytes, 32000000, "From what size of the aggregation state in bytes, a two-level aggregation begins to be used. 0 - the threshold is not set. " \
"Two-level aggregation is used when at least one of the thresholds is triggered.") \
M(SettingUInt64, aggregation_memory_efficient_merge_threads, 0, "Number of threads to use for merge intermediate aggregation results in memory efficient mode. When bigger, then more memory is " \
"consumed. 0 means - same as 'max_threads'.") \
Expand Down

0 comments on commit 40ceb08

Please sign in to comment.