Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit 26a5b2e

Browse files
committedMar 10, 2023
draft
1 parent 4f882d4 commit 26a5b2e

File tree

11 files changed

+133
-31
lines changed

11 files changed

+133
-31
lines changed
 

‎HISTORY.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99
### New Features
1010
* Add statistics rocksdb.secondary.cache.filter.hits, rocksdb.secondary.cache.index.hits, and rocksdb.secondary.cache.filter.hits
11-
* Add new statistics rocksdb.table.prefetch.tail.read.bytes to report number of bytes read in prefetching contents from the end of SST table during table open
11+
* Add new statistics rocksdb.table.open.prefetch.tail.read.bytes, rocksdb.table.open.prefetch.tail.{miss|hit}
1212

1313
## 8.0.0 (02/19/2023)
1414
### Behavior changes

‎file/file_prefetch_buffer.cc

+40-1
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,9 @@ Status FilePrefetchBuffer::Prefetch(const IOOptions& opts,
162162

163163
Status s = Read(opts, reader, rate_limiter_priority, read_len, chunk_len,
164164
rounddown_offset, curr_);
165+
if (s.ok() && usage_ == FilePrefetchBufferUsage::kTableOpenPrefetchTail) {
166+
RecordInHistogram(stats_, TABLE_OPEN_PREFETCH_TAIL_READ_BYTES, read_len);
167+
}
165168
return s;
166169
}
167170

@@ -612,7 +615,13 @@ bool FilePrefetchBuffer::TryReadFromCache(const IOOptions& opts,
612615
if (track_min_offset_ && offset < min_offset_read_) {
613616
min_offset_read_ = static_cast<size_t>(offset);
614617
}
615-
if (!enable_ || (offset < bufs_[curr_].offset_)) {
618+
if (!enable_) {
619+
return false;
620+
}
621+
if (offset < bufs_[curr_].offset_) {
622+
if (usage_ == FilePrefetchBufferUsage::kTableOpenPrefetchTail) {
623+
RecordTick(stats_, TABLE_OPEN_PREFETCH_TAIL_MISS);
624+
}
616625
return false;
617626
}
618627

@@ -635,6 +644,9 @@ bool FilePrefetchBuffer::TryReadFromCache(const IOOptions& opts,
635644
if (!IsEligibleForPrefetch(offset, n)) {
636645
// Ignore status as Prefetch is not called.
637646
s.PermitUncheckedError();
647+
if (usage_ == FilePrefetchBufferUsage::kTableOpenPrefetchTail) {
648+
RecordTick(stats_, TABLE_OPEN_PREFETCH_TAIL_MISS);
649+
}
638650
return false;
639651
}
640652
}
@@ -648,17 +660,26 @@ bool FilePrefetchBuffer::TryReadFromCache(const IOOptions& opts,
648660
#ifndef NDEBUG
649661
IGNORE_STATUS_IF_ERROR(s);
650662
#endif
663+
if (usage_ == FilePrefetchBufferUsage::kTableOpenPrefetchTail) {
664+
RecordTick(stats_, TABLE_OPEN_PREFETCH_TAIL_MISS);
665+
}
651666
return false;
652667
}
653668
readahead_size_ = std::min(max_readahead_size_, readahead_size_ * 2);
654669
} else {
670+
if (usage_ == FilePrefetchBufferUsage::kTableOpenPrefetchTail) {
671+
RecordTick(stats_, TABLE_OPEN_PREFETCH_TAIL_MISS);
672+
}
655673
return false;
656674
}
657675
}
658676
UpdateReadPattern(offset, n, false /*decrease_readaheadsize*/);
659677

660678
uint64_t offset_in_buffer = offset - bufs_[curr_].offset_;
661679
*result = Slice(bufs_[curr_].buffer_.BufferStart() + offset_in_buffer, n);
680+
if (usage_ == FilePrefetchBufferUsage::kTableOpenPrefetchTail) {
681+
RecordTick(stats_, TABLE_OPEN_PREFETCH_TAIL_HIT);
682+
}
662683
return true;
663684
}
664685

@@ -684,11 +705,17 @@ bool FilePrefetchBuffer::TryReadFromCacheAsync(
684705
bufs_[curr_].buffer_.Clear();
685706
bufs_[curr_ ^ 1].buffer_.Clear();
686707
explicit_prefetch_submitted_ = false;
708+
if (usage_ == FilePrefetchBufferUsage::kTableOpenPrefetchTail) {
709+
RecordTick(stats_, TABLE_OPEN_PREFETCH_TAIL_MISS);
710+
}
687711
return false;
688712
}
689713
}
690714

691715
if (!explicit_prefetch_submitted_ && offset < bufs_[curr_].offset_) {
716+
if (usage_ == FilePrefetchBufferUsage::kTableOpenPrefetchTail) {
717+
RecordTick(stats_, TABLE_OPEN_PREFETCH_TAIL_MISS);
718+
}
692719
return false;
693720
}
694721

@@ -714,6 +741,9 @@ bool FilePrefetchBuffer::TryReadFromCacheAsync(
714741
if (!IsEligibleForPrefetch(offset, n)) {
715742
// Ignore status as Prefetch is not called.
716743
s.PermitUncheckedError();
744+
if (usage_ == FilePrefetchBufferUsage::kTableOpenPrefetchTail) {
745+
RecordTick(stats_, TABLE_OPEN_PREFETCH_TAIL_MISS);
746+
}
717747
return false;
718748
}
719749
}
@@ -729,10 +759,16 @@ bool FilePrefetchBuffer::TryReadFromCacheAsync(
729759
#ifndef NDEBUG
730760
IGNORE_STATUS_IF_ERROR(s);
731761
#endif
762+
if (usage_ == FilePrefetchBufferUsage::kTableOpenPrefetchTail) {
763+
RecordTick(stats_, TABLE_OPEN_PREFETCH_TAIL_MISS);
764+
}
732765
return false;
733766
}
734767
prefetched = explicit_prefetch_submitted_ ? false : true;
735768
} else {
769+
if (usage_ == FilePrefetchBufferUsage::kTableOpenPrefetchTail) {
770+
RecordTick(stats_, TABLE_OPEN_PREFETCH_TAIL_MISS);
771+
}
736772
return false;
737773
}
738774
}
@@ -748,6 +784,9 @@ bool FilePrefetchBuffer::TryReadFromCacheAsync(
748784
if (prefetched) {
749785
readahead_size_ = std::min(max_readahead_size_, readahead_size_ * 2);
750786
}
787+
if (usage_ == FilePrefetchBufferUsage::kTableOpenPrefetchTail) {
788+
RecordTick(stats_, TABLE_OPEN_PREFETCH_TAIL_HIT);
789+
}
751790
return true;
752791
}
753792

‎file/file_prefetch_buffer.h

+16-8
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,11 @@ struct BufferInfo {
5454
uint32_t pos_ = 0;
5555
};
5656

57+
enum FilePrefetchBufferUsage {
58+
kTableOpenPrefetchTail,
59+
kUnknown,
60+
};
61+
5762
// FilePrefetchBuffer is a smart buffer to store and read data from a file.
5863
class FilePrefetchBuffer {
5964
public:
@@ -78,13 +83,13 @@ class FilePrefetchBuffer {
7883
// and max_readahead_size are passed in.
7984
// A user can construct a FilePrefetchBuffer without any arguments, but use
8085
// `Prefetch` to load data into the buffer.
81-
FilePrefetchBuffer(size_t readahead_size = 0, size_t max_readahead_size = 0,
82-
bool enable = true, bool track_min_offset = false,
83-
bool implicit_auto_readahead = false,
84-
uint64_t num_file_reads = 0,
85-
uint64_t num_file_reads_for_auto_readahead = 0,
86-
FileSystem* fs = nullptr, SystemClock* clock = nullptr,
87-
Statistics* stats = nullptr)
86+
FilePrefetchBuffer(
87+
size_t readahead_size = 0, size_t max_readahead_size = 0,
88+
bool enable = true, bool track_min_offset = false,
89+
bool implicit_auto_readahead = false, uint64_t num_file_reads = 0,
90+
uint64_t num_file_reads_for_auto_readahead = 0, FileSystem* fs = nullptr,
91+
SystemClock* clock = nullptr, Statistics* stats = nullptr,
92+
FilePrefetchBufferUsage usage = FilePrefetchBufferUsage::kUnknown)
8893
: curr_(0),
8994
readahead_size_(readahead_size),
9095
initial_auto_readahead_size_(readahead_size),
@@ -100,7 +105,8 @@ class FilePrefetchBuffer {
100105
explicit_prefetch_submitted_(false),
101106
fs_(fs),
102107
clock_(clock),
103-
stats_(stats) {
108+
stats_(stats),
109+
usage_(usage) {
104110
assert((num_file_reads_ >= num_file_reads_for_auto_readahead_ + 1) ||
105111
(num_file_reads_ == 0));
106112
// If ReadOptions.async_io is enabled, data is asynchronously filled in
@@ -442,5 +448,7 @@ class FilePrefetchBuffer {
442448
FileSystem* fs_;
443449
SystemClock* clock_;
444450
Statistics* stats_;
451+
452+
FilePrefetchBufferUsage usage_;
445453
};
446454
} // namespace ROCKSDB_NAMESPACE

‎file/prefetch_test.cc

+23
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,7 @@ TEST_P(PrefetchTest, Basic) {
125125
std::unique_ptr<Env> env(new CompositeEnvWrapper(env_, fs));
126126
Options options;
127127
SetGenericOptions(env.get(), use_direct_io, options);
128+
options.statistics = CreateDBStatistics();
128129

129130
const int kNumKeys = 1100;
130131
int buff_prefetch_count = 0;
@@ -167,9 +168,25 @@ TEST_P(PrefetchTest, Basic) {
167168
Slice least(start_key.data(), start_key.size());
168169
Slice greatest(end_key.data(), end_key.size());
169170

171+
HistogramData prev_table_open_prefetch_tail_read;
172+
options.statistics->histogramData(TABLE_OPEN_PREFETCH_TAIL_READ_BYTES,
173+
&prev_table_open_prefetch_tail_read);
174+
const uint64_t prev_table_open_prefetch_tail_miss =
175+
options.statistics->getTickerCount(TABLE_OPEN_PREFETCH_TAIL_MISS);
176+
const uint64_t prev_table_open_prefetch_tail_hit =
177+
options.statistics->getTickerCount(TABLE_OPEN_PREFETCH_TAIL_HIT);
178+
170179
// commenting out the line below causes the example to work correctly
171180
ASSERT_OK(db_->CompactRange(CompactRangeOptions(), &least, &greatest));
172181

182+
HistogramData cur_table_open_prefetch_tail_read;
183+
options.statistics->histogramData(TABLE_OPEN_PREFETCH_TAIL_READ_BYTES,
184+
&cur_table_open_prefetch_tail_read);
185+
const uint64_t cur_table_open_prefetch_tail_miss =
186+
options.statistics->getTickerCount(TABLE_OPEN_PREFETCH_TAIL_MISS);
187+
const uint64_t cur_table_open_prefetch_tail_hit =
188+
options.statistics->getTickerCount(TABLE_OPEN_PREFETCH_TAIL_HIT);
189+
173190
if (support_prefetch && !use_direct_io) {
174191
// If underline file system supports prefetch, and directIO is not enabled
175192
// make sure prefetch() is called and FilePrefetchBuffer is not used.
@@ -182,6 +199,12 @@ TEST_P(PrefetchTest, Basic) {
182199
// used.
183200
ASSERT_FALSE(fs->IsPrefetchCalled());
184201
ASSERT_GT(buff_prefetch_count, 0);
202+
ASSERT_GT(cur_table_open_prefetch_tail_read.count,
203+
prev_table_open_prefetch_tail_read.count);
204+
ASSERT_GT(cur_table_open_prefetch_tail_hit,
205+
prev_table_open_prefetch_tail_hit);
206+
ASSERT_GE(cur_table_open_prefetch_tail_miss,
207+
prev_table_open_prefetch_tail_miss);
185208
buff_prefetch_count = 0;
186209
}
187210

‎include/rocksdb/statistics.h

+12-3
Original file line numberDiff line numberDiff line change
@@ -420,6 +420,15 @@ enum Tickers : uint32_t {
420420
SECONDARY_CACHE_INDEX_HITS,
421421
SECONDARY_CACHE_DATA_HITS,
422422

423+
// Number of read into the prefetched tail (see
424+
// `TABLE_OPEN_PREFETCH_TAIL_READ_BYTES`)
425+
// that can't find its data for table open
426+
TABLE_OPEN_PREFETCH_TAIL_MISS,
427+
// Number of read into the prefetched tail (see
428+
// `TABLE_OPEN_PREFETCH_TAIL_READ_BYTES`)
429+
// that finds its data for table open
430+
TABLE_OPEN_PREFETCH_TAIL_HIT,
431+
423432
TICKER_ENUM_MAX
424433
};
425434

@@ -533,9 +542,9 @@ enum Histograms : uint32_t {
533542
// Wait time for aborting async read in FilePrefetchBuffer destructor
534543
ASYNC_PREFETCH_ABORT_MICROS,
535544

536-
// Number of bytes read in prefetching contents from the end of SST table
537-
// during table open
538-
TABLE_PREFETCH_TAIL_READ_BYTES,
545+
// Number of bytes read in RocksDB's prefetching contents (as opposed to file
546+
// system's prefetch) from the end of SST table during block based table open
547+
TABLE_OPEN_PREFETCH_TAIL_READ_BYTES,
539548

540549
HISTOGRAM_ENUM_MAX
541550
};

‎java/rocksjni/portal.h

+11-2
Original file line numberDiff line numberDiff line change
@@ -5125,6 +5125,10 @@ class TickerTypeJni {
51255125
return -0x38;
51265126
case ROCKSDB_NAMESPACE::Tickers::SECONDARY_CACHE_DATA_HITS:
51275127
return -0x39;
5128+
case ROCKSDB_NAMESPACE::Tickers::TABLE_OPEN_PREFETCH_TAIL_MISS:
5129+
return -0x3A;
5130+
case ROCKSDB_NAMESPACE::Tickers::TABLE_OPEN_PREFETCH_TAIL_HIT:
5131+
return -0x3B;
51285132
case ROCKSDB_NAMESPACE::Tickers::TICKER_ENUM_MAX:
51295133
// 0x5F was the max value in the initial copy of tickers to Java.
51305134
// Since these values are exposed directly to Java clients, we keep
@@ -5482,6 +5486,10 @@ class TickerTypeJni {
54825486
return ROCKSDB_NAMESPACE::Tickers::SECONDARY_CACHE_INDEX_HITS;
54835487
case -0x39:
54845488
return ROCKSDB_NAMESPACE::Tickers::SECONDARY_CACHE_DATA_HITS;
5489+
case -0x3A:
5490+
return ROCKSDB_NAMESPACE::Tickers::TABLE_OPEN_PREFETCH_TAIL_MISS;
5491+
case -0x3B:
5492+
return ROCKSDB_NAMESPACE::Tickers::TABLE_OPEN_PREFETCH_TAIL_HIT;
54855493
case 0x5F:
54865494
// 0x5F was the max value in the initial copy of tickers to Java.
54875495
// Since these values are exposed directly to Java clients, we keep
@@ -5609,7 +5617,7 @@ class HistogramTypeJni {
56095617
return 0x37;
56105618
case ASYNC_PREFETCH_ABORT_MICROS:
56115619
return 0x38;
5612-
case ROCKSDB_NAMESPACE::Histograms::TABLE_PREFETCH_TAIL_READ_BYTES:
5620+
case ROCKSDB_NAMESPACE::Histograms::TABLE_OPEN_PREFETCH_TAIL_READ_BYTES:
56135621
return 0x39;
56145622
case ROCKSDB_NAMESPACE::Histograms::HISTOGRAM_ENUM_MAX:
56155623
// 0x1F for backwards compatibility on current minor version.
@@ -5728,7 +5736,8 @@ class HistogramTypeJni {
57285736
case 0x38:
57295737
return ROCKSDB_NAMESPACE::Histograms::ASYNC_PREFETCH_ABORT_MICROS;
57305738
case 0x39:
5731-
return ROCKSDB_NAMESPACE::Histograms::TABLE_PREFETCH_TAIL_READ_BYTES;
5739+
return ROCKSDB_NAMESPACE::Histograms::
5740+
TABLE_OPEN_PREFETCH_TAIL_READ_BYTES;
57325741
case 0x1F:
57335742
// 0x1F for backwards compatibility on current minor version.
57345743
return ROCKSDB_NAMESPACE::Histograms::HISTOGRAM_ENUM_MAX;

‎java/src/main/java/org/rocksdb/HistogramType.java

+4-3
Original file line numberDiff line numberDiff line change
@@ -163,10 +163,11 @@ public enum HistogramType {
163163
ASYNC_READ_BYTES((byte) 0x33),
164164

165165
/**
166-
* Number of bytes read in prefetching contents from the end of SST table
167-
* during table open
166+
* Number of bytes read in RocksDB's prefetching contents
167+
* (as opposed to file system's prefetch)
168+
* from the end of SST table during block based table open
168169
*/
169-
TABLE_PREFETCH_TAIL_READ_BYTES((byte) 0x39),
170+
TABLE_OPEN_PREFETCH_TAIL_READ_BYTES((byte) 0x39),
170171

171172
// 0x1F for backwards compatibility on current minor version.
172173
HISTOGRAM_ENUM_MAX((byte) 0x1F);

‎java/src/main/java/org/rocksdb/TickerType.java

+14
Original file line numberDiff line numberDiff line change
@@ -740,6 +740,20 @@ public enum TickerType {
740740
*/
741741
BLOB_DB_CACHE_BYTES_WRITE((byte) -0x34),
742742

743+
/**
744+
* Number of read into the prefetched tail (see
745+
* `TABLE_OPEN_PREFETCH_TAIL_READ_BYTES`)
746+
* that can't find its data for table open
747+
*/
748+
TABLE_OPEN_PREFETCH_TAIL_MISS((byte) -0x3A),
749+
750+
/**
751+
* Number of read into the prefetched tail (see
752+
* `TABLE_OPEN_PREFETCH_TAIL_READ_BYTES`)
753+
* that finds its data for table open
754+
*/
755+
TABLE_OPEN_PREFETCH_TAIL_HIT((byte) -0x3B),
756+
743757
TICKER_ENUM_MAX((byte) 0x5F);
744758

745759
private final byte value;

‎monitoring/statistics.cc

+6-2
Original file line numberDiff line numberDiff line change
@@ -216,7 +216,10 @@ const std::vector<std::pair<Tickers, std::string>> TickersNameMap = {
216216
{ASYNC_READ_ERROR_COUNT, "rocksdb.async.read.error.count"},
217217
{SECONDARY_CACHE_FILTER_HITS, "rocksdb.secondary.cache.filter.hits"},
218218
{SECONDARY_CACHE_INDEX_HITS, "rocksdb.secondary.cache.index.hits"},
219-
{SECONDARY_CACHE_DATA_HITS, "rocksdb.secondary.cache.data.hits"}};
219+
{SECONDARY_CACHE_DATA_HITS, "rocksdb.secondary.cache.data.hits"},
220+
{TABLE_OPEN_PREFETCH_TAIL_MISS, "rocksdb.table.open.prefetch.tail.miss"},
221+
{TABLE_OPEN_PREFETCH_TAIL_HIT, "rocksdb.table.open.prefetch.tail.hit"},
222+
};
220223

221224
const std::vector<std::pair<Histograms, std::string>> HistogramsNameMap = {
222225
{DB_GET, "rocksdb.db.get.micros"},
@@ -272,7 +275,8 @@ const std::vector<std::pair<Histograms, std::string>> HistogramsNameMap = {
272275
{MULTIGET_IO_BATCH_SIZE, "rocksdb.multiget.io.batch.size"},
273276
{NUM_LEVEL_READ_PER_MULTIGET, "rocksdb.num.level.read.per.multiget"},
274277
{ASYNC_PREFETCH_ABORT_MICROS, "rocksdb.async.prefetch.abort.micros"},
275-
{TABLE_PREFETCH_TAIL_READ_BYTES, "rocksdb.table.prefetch.tail.read.bytes"},
278+
{TABLE_OPEN_PREFETCH_TAIL_READ_BYTES,
279+
"rocksdb.table.open.prefetch.tail.read.bytes"},
276280
};
277281

278282
std::shared_ptr<Statistics> CreateDBStatistics() {

‎table/block_based/block_based_table_reader.cc

+6-5
Original file line numberDiff line numberDiff line change
@@ -834,7 +834,6 @@ Status BlockBasedTable::PrefetchTail(
834834
if (!file->use_direct_io() && !force_direct_prefetch) {
835835
if (!file->Prefetch(prefetch_off, prefetch_len, ro.rate_limiter_priority)
836836
.IsNotSupported()) {
837-
RecordInHistogram(stats, TABLE_PREFETCH_TAIL_READ_BYTES, prefetch_len);
838837
prefetch_buffer->reset(new FilePrefetchBuffer(
839838
0 /* readahead_size */, 0 /* max_readahead_size */,
840839
false /* enable */, true /* track_min_offset */));
@@ -843,17 +842,19 @@ Status BlockBasedTable::PrefetchTail(
843842
}
844843

845844
// Use `FilePrefetchBuffer`
846-
prefetch_buffer->reset(
847-
new FilePrefetchBuffer(0 /* readahead_size */, 0 /* max_readahead_size */,
848-
true /* enable */, true /* track_min_offset */));
845+
prefetch_buffer->reset(new FilePrefetchBuffer(
846+
0 /* readahead_size */, 0 /* max_readahead_size */, true /* enable */,
847+
true /* track_min_offset */, false /* implicit_auto_readahead */,
848+
0 /* num_file_reads */, 0 /* num_file_reads_for_auto_readahead */,
849+
nullptr /* fs */, nullptr /* clock */, stats,
850+
FilePrefetchBufferUsage::kTableOpenPrefetchTail));
849851

850852
IOOptions opts;
851853
Status s = file->PrepareIOOptions(ro, opts);
852854
if (s.ok()) {
853855
s = (*prefetch_buffer)
854856
->Prefetch(opts, file, prefetch_off, prefetch_len,
855857
ro.rate_limiter_priority);
856-
RecordInHistogram(stats, TABLE_PREFETCH_TAIL_READ_BYTES, prefetch_len);
857858
}
858859
return s;
859860
}

‎table/block_based/block_based_table_reader_test.cc

-6
Original file line numberDiff line numberDiff line change
@@ -232,18 +232,12 @@ TEST_P(BlockBasedTableReaderTest, MultiGet) {
232232

233233
std::unique_ptr<BlockBasedTable> table;
234234
Options options;
235-
options.statistics = CreateDBStatistics();
236235
ImmutableOptions ioptions(options);
237236
FileOptions foptions;
238237
foptions.use_direct_reads = use_direct_reads_;
239238
InternalKeyComparator comparator(options.comparator);
240239
NewBlockBasedTableReader(foptions, ioptions, comparator, table_name, &table);
241240

242-
HistogramData table_prefetch_tail_read_bytes;
243-
options.statistics->histogramData(TABLE_PREFETCH_TAIL_READ_BYTES,
244-
&table_prefetch_tail_read_bytes);
245-
ASSERT_EQ(table_prefetch_tail_read_bytes.count, 1);
246-
247241
// Ensure that keys are not in cache before MultiGet.
248242
for (auto& key : keys) {
249243
ASSERT_FALSE(table->TEST_KeyInCache(ReadOptions(), key));

0 commit comments

Comments
 (0)
Please sign in to comment.