-
Notifications
You must be signed in to change notification settings - Fork 6.3k
/
filter_policy.h
238 lines (207 loc) · 10 KB
/
filter_policy.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
// Copyright (c) 2012 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
//
// A database can be configured with a custom FilterPolicy object.
// This object is responsible for creating a small filter from a set
// of keys. These filters are stored in rocksdb and are consulted
// automatically by rocksdb to decide whether or not to read some
// information from disk. In many cases, a filter can cut down the
// number of disk seeks form a handful to a single disk seek per
// DB::Get() call.
//
// Most people will want to use the builtin bloom filter support (see
// NewBloomFilterPolicy() below).
#pragma once
#include <stdlib.h>
#include <algorithm>
#include <memory>
#include <stdexcept>
#include <string>
#include <vector>
#include "rocksdb/advanced_options.h"
#include "rocksdb/status.h"
namespace ROCKSDB_NAMESPACE {
class Slice;
struct BlockBasedTableOptions;
struct ConfigOptions;
// A class that takes a bunch of keys, then generates filter
class FilterBitsBuilder {
public:
virtual ~FilterBitsBuilder() {}
// Add Key to filter, you could use any way to store the key.
// Such as: storing hashes or original keys
// Keys are in sorted order and duplicated keys are possible.
virtual void AddKey(const Slice& key) = 0;
// Generate the filter using the keys that are added
// The return value of this function would be the filter bits,
// The ownership of actual data is set to buf
virtual Slice Finish(std::unique_ptr<const char[]>* buf) = 0;
// Approximate the number of keys that can be added and generate a filter
// <= the specified number of bytes. Callers (including RocksDB) should
// only use this result for optimizing performance and not as a guarantee.
// This default implementation is for compatibility with older custom
// FilterBitsBuilders only implementing deprecated CalculateNumEntry.
virtual size_t ApproximateNumEntries(size_t bytes) {
bytes = std::min(bytes, size_t{0xffffffff});
return static_cast<size_t>(CalculateNumEntry(static_cast<uint32_t>(bytes)));
}
// Old, DEPRECATED version of ApproximateNumEntries. This is not
// called by RocksDB except as the default implementation of
// ApproximateNumEntries for API compatibility.
virtual int CalculateNumEntry(const uint32_t bytes) {
// DEBUG: ideally should not rely on this implementation
assert(false);
// RELEASE: something reasonably conservative: 2 bytes per entry
return static_cast<int>(bytes / 2);
}
};
// A class that checks if a key can be in filter
// It should be initialized by Slice generated by BitsBuilder
class FilterBitsReader {
public:
virtual ~FilterBitsReader() {}
// Check if the entry match the bits in filter
virtual bool MayMatch(const Slice& entry) = 0;
// Check if an array of entries match the bits in filter
virtual void MayMatch(int num_keys, Slice** keys, bool* may_match) {
for (int i = 0; i < num_keys; ++i) {
may_match[i] = MayMatch(*keys[i]);
}
}
};
// Contextual information passed to BloomFilterPolicy at filter building time.
// Used in overriding FilterPolicy::GetBuilderWithContext(). References other
// structs because this is expected to be a temporary, stack-allocated object.
struct FilterBuildingContext {
// This constructor is for internal use only and subject to change.
FilterBuildingContext(const BlockBasedTableOptions& table_options);
// Options for the table being built
const BlockBasedTableOptions& table_options;
// Name of the column family for the table (or empty string if unknown)
std::string column_family_name;
// The compactions style in effect for the table
CompactionStyle compaction_style = kCompactionStyleLevel;
// The table level at time of constructing the SST file, or -1 if unknown.
// (The table file could later be used at a different level.)
int level_at_creation = -1;
// An optional logger for reporting errors, warnings, etc.
Logger* info_log = nullptr;
};
// We add a new format of filter block called full filter block
// This new interface gives you more space of customization
//
// For the full filter block, you can plug in your version by implement
// the FilterBitsBuilder and FilterBitsReader
//
// There are two sets of interface in FilterPolicy
// Set 1: CreateFilter, KeyMayMatch: used for blockbased filter
// Set 2: GetFilterBitsBuilder, GetFilterBitsReader, they are used for
// full filter.
// Set 1 MUST be implemented correctly, Set 2 is optional
// RocksDB would first try using functions in Set 2. if they return nullptr,
// it would use Set 1 instead.
// You can choose filter type in NewBloomFilterPolicy
class FilterPolicy {
public:
virtual ~FilterPolicy();
// Creates a new FilterPolicy based on the input value string and returns the
// result The value might be an ID, and ID with properties, or an old-style
// policy string.
// The value describes the FilterPolicy being created.
// For BloomFilters, value may be a ":"-delimited value of the form:
// "bloomfilter:[bits_per_key]:[use_block_based_builder]",
// e.g. ""bloomfilter:4:true"
// The above string is equivalent to calling NewBloomFilterPolicy(4, true).
static Status CreateFromString(const ConfigOptions& config_options,
const std::string& value,
std::shared_ptr<const FilterPolicy>* result);
// Return the name of this policy. Note that if the filter encoding
// changes in an incompatible way, the name returned by this method
// must be changed. Otherwise, old incompatible filters may be
// passed to methods of this type.
virtual const char* Name() const = 0;
// keys[0,n-1] contains a list of keys (potentially with duplicates)
// that are ordered according to the user supplied comparator.
// Append a filter that summarizes keys[0,n-1] to *dst.
//
// Warning: do not change the initial contents of *dst. Instead,
// append the newly constructed filter to *dst.
virtual void CreateFilter(const Slice* keys, int n,
std::string* dst) const = 0;
// "filter" contains the data appended by a preceding call to
// CreateFilter() on this class. This method must return true if
// the key was in the list of keys passed to CreateFilter().
// This method may return true or false if the key was not on the
// list, but it should aim to return false with a high probability.
virtual bool KeyMayMatch(const Slice& key, const Slice& filter) const = 0;
// Return a new FilterBitsBuilder for full or partitioned filter blocks, or
// nullptr if using block-based filter.
// NOTE: This function is only called by GetBuilderWithContext() below for
// custom FilterPolicy implementations. Thus, it is not necessary to
// override this function if overriding GetBuilderWithContext().
virtual FilterBitsBuilder* GetFilterBitsBuilder() const { return nullptr; }
// A newer variant of GetFilterBitsBuilder that allows a FilterPolicy
// to customize the builder for contextual constraints and hints.
// (Name changed to avoid triggering -Werror=overloaded-virtual.)
// If overriding GetFilterBitsBuilder() suffices, it is not necessary to
// override this function.
virtual FilterBitsBuilder* GetBuilderWithContext(
const FilterBuildingContext&) const {
return GetFilterBitsBuilder();
}
// Return a new FilterBitsReader for full or partitioned filter blocks, or
// nullptr if using block-based filter.
// As here, the input slice should NOT be deleted by FilterPolicy.
virtual FilterBitsReader* GetFilterBitsReader(
const Slice& /*contents*/) const {
return nullptr;
}
};
// Return a new filter policy that uses a bloom filter with approximately
// the specified number of bits per key.
//
// bits_per_key: average bits allocated per key in bloom filter. A good
// choice is 9.9, which yields a filter with ~ 1% false positive rate.
// When format_version < 5, the value will be rounded to the nearest
// integer. Recommend using no more than three decimal digits after the
// decimal point, as in 6.667.
//
// use_block_based_builder: use deprecated block based filter (true) rather
// than full or partitioned filter (false).
//
// Callers must delete the result after any database that is using the
// result has been closed.
//
// Note: if you are using a custom comparator that ignores some parts
// of the keys being compared, you must not use NewBloomFilterPolicy()
// and must provide your own FilterPolicy that also ignores the
// corresponding parts of the keys. For example, if the comparator
// ignores trailing spaces, it would be incorrect to use a
// FilterPolicy (like NewBloomFilterPolicy) that does not ignore
// trailing spaces in keys.
extern const FilterPolicy* NewBloomFilterPolicy(
double bits_per_key, bool use_block_based_builder = false);
// An EXPERIMENTAL new Bloom alternative that saves about 30% space
// compared to Bloom filters, with about 3-4x construction time and
// similar query times. For example, if you pass in 10 for
// bloom_equivalent_bits_per_key, you'll get the same 0.95% FP rate
// as Bloom filter but only using about 7 bits per key. (This
// way of configuring the new filter is considered experimental
// and/or transitional, so is expected to go away.)
//
// Ribbon filters are ignored by previous versions of RocksDB, as if
// no filter was used.
//
// Note: this policy can generate Bloom filters in some cases.
// For very small filters (well under 1KB), Bloom fallback is by
// design, as the current Ribbon schema is not optimized to save vs.
// Bloom for such small filters. Other cases of Bloom fallback should
// be exceptional and log an appropriate warning.
extern const FilterPolicy* NewExperimentalRibbonFilterPolicy(
double bloom_equivalent_bits_per_key);
} // namespace ROCKSDB_NAMESPACE