forked from UWQuickstep/quickstep
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathPartitionedHashTablePool.hpp
173 lines (150 loc) · 5.64 KB
/
PartitionedHashTablePool.hpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
**/
#ifndef QUICKSTEP_STORAGE_PARTITIONED_HASH_TABLE_POOL_HPP_
#define QUICKSTEP_STORAGE_PARTITIONED_HASH_TABLE_POOL_HPP_
#include <algorithm>
#include <cstddef>
#include <memory>
#include <vector>
#include "storage/HashTableBase.hpp"
#include "storage/HashTableFactory.hpp"
#include "utility/Macros.hpp"
#include "glog/logging.h"
namespace quickstep {
class AggregationHandle;
class StorageManager;
class Type;
/** \addtogroup Storage
* @{
*/
/**
* @brief A pool of HashTables used for a single aggregation handle. Each
* HashTable represents values from a given partition, which is
* determined by the keys in the group by clause.
**/
class PartitionedHashTablePool {
public:
/**
* @brief Constructor.
*
* @note This constructor is relevant for the HashTable specialized for
* aggregation.
*
* @param estimated_num_entries The maximum number of entries in a hash table.
* @param num_partitions The number of partitions (i.e. number of HashTables)
* @param hash_table_impl_type The type of hash table implementation.
* @param group_by_types A vector of pointer of types which form the group by
* key.
* @param handles The aggregation handles.
* @param storage_manager A pointer to the storage manager.
**/
PartitionedHashTablePool(const std::size_t estimated_num_entries,
const std::size_t num_partitions,
const HashTableImplType hash_table_impl_type,
const std::vector<const Type *> &group_by_types,
const std::vector<AggregationHandle *> &handles,
StorageManager *storage_manager)
: estimated_num_entries_(
setHashTableSize(estimated_num_entries, num_partitions)),
num_partitions_(num_partitions),
hash_table_impl_type_(hash_table_impl_type),
group_by_types_(group_by_types),
handles_(handles),
storage_manager_(DCHECK_NOTNULL(storage_manager)) {
initializeAllHashTables();
}
/**
* @brief Check out a hash table for insertion.
*
* @param partition_id The ID of the partitioned HashTable.
*
* @return A hash table pointer for the given HashTable.
**/
AggregationStateHashTableBase* getHashTable(const std::size_t partition_id) {
DCHECK_LT(partition_id, num_partitions_);
DCHECK_LT(partition_id, hash_tables_.size());
return hash_tables_[partition_id].get();
}
/**
* @brief Get all the hash tables from the pool.
*
* @warning The caller should ensure that this call is made when no hash table
* is being checked in or checked out from the pool. In other words
* the hash table pool is in read-only state.
*
* @param All the hash tables in the pool.
*
**/
std::vector<std::unique_ptr<AggregationStateHashTableBase>>*
getAllHashTables() {
return &hash_tables_;
}
/**
* @brief Get the number of partitions used for the aggregation.
**/
inline std::size_t getNumPartitions() const {
return num_partitions_;
}
/**
* @brief Get the total memory consumed by the hash tables in this pool.
**/
std::size_t getMemoryConsumptionPoolBytes() const {
std::size_t memory = 0;
for (std::size_t ht_id = 0; ht_id < hash_tables_.size(); ++ht_id) {
if (hash_tables_[ht_id] != nullptr) {
memory += hash_tables_[ht_id]->getMemoryConsumptionBytes();
}
}
return memory;
}
private:
void initializeAllHashTables() {
for (std::size_t part_num = 0; part_num < num_partitions_; ++part_num) {
AggregationStateHashTableBase *part_hash_table = createNewHashTable();
hash_tables_.push_back(
std::unique_ptr<AggregationStateHashTableBase>(part_hash_table));
}
}
AggregationStateHashTableBase* createNewHashTable() {
return AggregationStateHashTableFactory::CreateResizable(
hash_table_impl_type_,
group_by_types_,
estimated_num_entries_,
handles_,
storage_manager_);
}
inline std::size_t setHashTableSize(const std::size_t overall_estimate,
const std::size_t num_partitions) const {
CHECK_NE(num_partitions, 0Lu);
// The minimum size of the hash table is set to 100.
return std::max(static_cast<std::size_t>(overall_estimate / num_partitions),
100Lu);
}
std::vector<std::unique_ptr<AggregationStateHashTableBase>> hash_tables_;
const std::size_t estimated_num_entries_;
const std::size_t num_partitions_;
const HashTableImplType hash_table_impl_type_;
const std::vector<const Type *> group_by_types_;
const std::vector<AggregationHandle *> handles_;
StorageManager *storage_manager_;
DISALLOW_COPY_AND_ASSIGN(PartitionedHashTablePool);
};
/** @} */
} // namespace quickstep
#endif // QUICKSTEP_STORAGE_HASH_TABLE_POOL_HPP_