Skip to content

Commit 151242c

Browse files
hx235facebook-github-bot
authored andcommitted
Group rocksdb.sst.read.micros stat by IOActivity flush and compaction (#11288)
Summary: **Context:** The existing stat rocksdb.sst.read.micros does not reflect each of compaction and flush cases but aggregate them, which is not so helpful for us to understand IO read behavior of each of them. **Summary** - Update `StopWatch` and `RandomAccessFileReader` to record `rocksdb.sst.read.micros` and `rocksdb.file.{flush/compaction}.read.micros` - Fixed the default histogram in `RandomAccessFileReader` - New field `ReadOptions/IOOptions::io_activity`; Pass `ReadOptions` through paths under db open, flush and compaction to where we can prepare `IOOptions` and pass it to `RandomAccessFileReader` - Use `thread_status_util` for assertion in `DbStressFSWrapper` for continuous testing on we are passing correct `io_activity` under db open, flush and compaction Pull Request resolved: #11288 Test Plan: - **Stress test** - **Db bench 1: rocksdb.sst.read.micros COUNT ≈ sum of rocksdb.file.read.flush.micros's and rocksdb.file.read.compaction.micros's.** (without blob) - May not be exactly the same due to `HistogramStat::Add` only guarantees atomic not accuracy across threads. ``` ./db_bench -db=/dev/shm/testdb/ -statistics=true -benchmarks="fillseq" -key_size=32 -value_size=512 -num=50000 -write_buffer_size=655 -target_file_size_base=655 -disable_auto_compactions=false -compression_type=none -bloom_bits=3 (-use_plain_table=1 -prefix_size=10) ``` ``` // BlockBasedTable rocksdb.sst.read.micros P50 : 2.009374 P95 : 4.968548 P99 : 8.110362 P100 : 43.000000 COUNT : 40456 SUM : 114805 rocksdb.file.read.flush.micros P50 : 1.871841 P95 : 3.872407 P99 : 5.540541 P100 : 43.000000 COUNT : 2250 SUM : 6116 rocksdb.file.read.compaction.micros P50 : 2.023109 P95 : 5.029149 P99 : 8.196910 P100 : 26.000000 COUNT : 38206 SUM : 108689 // PlainTable Does not apply ``` - **Db bench 2: performance** **Read** SETUP: db with 900 files ``` ./db_bench -db=/dev/shm/testdb/ -benchmarks="fillseq" -key_size=32 -value_size=512 -num=50000 -write_buffer_size=655 -disable_auto_compactions=true -target_file_size_base=655 -compression_type=none ```run till convergence ``` ./db_bench -seed=1678564177044286 -use_existing_db=true -db=/dev/shm/testdb -benchmarks=readrandom[-X60] -statistics=true -num=1000000 -disable_auto_compactions=true -compression_type=none -bloom_bits=3 ``` Pre-change `readrandom [AVG 60 runs] : 21568 (± 248) ops/sec` Post-change (no regression, -0.3%) `readrandom [AVG 60 runs] : 21486 (± 236) ops/sec` **Compaction/Flush**run till convergence ``` ./db_bench -db=/dev/shm/testdb2/ -seed=1678564177044286 -benchmarks="fillseq[-X60]" -key_size=32 -value_size=512 -num=50000 -write_buffer_size=655 -disable_auto_compactions=false -target_file_size_base=655 -compression_type=none rocksdb.sst.read.micros COUNT : 33820 rocksdb.sst.read.flush.micros COUNT : 1800 rocksdb.sst.read.compaction.micros COUNT : 32020 ``` Pre-change `fillseq [AVG 46 runs] : 1391 (± 214) ops/sec; 0.7 (± 0.1) MB/sec` Post-change (no regression, ~-0.4%) `fillseq [AVG 46 runs] : 1385 (± 216) ops/sec; 0.7 (± 0.1) MB/sec` Reviewed By: ajkr Differential Revision: D44007011 Pulled By: hx235 fbshipit-source-id: a54c89e4846dfc9a135389edf3f3eedfea257132
1 parent 0a774a1 commit 151242c

File tree

116 files changed

+1181
-545
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

116 files changed

+1181
-545
lines changed

HISTORY.md

+1
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
### New Features
1818
* Add experimental `PerfContext` counters `iter_{next|prev|seek}_count` for db iterator, each counting the times of corresponding API being called.
1919
* Allow runtime changes to whether `WriteBufferManager` allows stall or not by calling `SetAllowStall()`
20+
* New statistics `rocksdb.file.read.{flush|compaction}.micros` that measure read time of block-based SST tables or blob files during flush or compaction.
2021

2122
### Bug Fixes
2223
* In block cache tracing, fixed some cases of bad hit/miss information (and more) with MultiGet.

db/blob/blob_file_cache.cc

+2-2
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ BlobFileCache::BlobFileCache(Cache* cache,
3737
}
3838

3939
Status BlobFileCache::GetBlobFileReader(
40-
uint64_t blob_file_number,
40+
const ReadOptions& read_options, uint64_t blob_file_number,
4141
CacheHandleGuard<BlobFileReader>* blob_file_reader) {
4242
assert(blob_file_reader);
4343
assert(blob_file_reader->IsEmpty());
@@ -73,7 +73,7 @@ Status BlobFileCache::GetBlobFileReader(
7373
{
7474
assert(file_options_);
7575
const Status s = BlobFileReader::Create(
76-
*immutable_options_, *file_options_, column_family_id_,
76+
*immutable_options_, read_options, *file_options_, column_family_id_,
7777
blob_file_read_hist_, blob_file_number, io_tracer_, &reader);
7878
if (!s.ok()) {
7979
RecordTick(statistics, NO_FILE_ERRORS);

db/blob/blob_file_cache.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,8 @@ class BlobFileCache {
3232
BlobFileCache(const BlobFileCache&) = delete;
3333
BlobFileCache& operator=(const BlobFileCache&) = delete;
3434

35-
Status GetBlobFileReader(uint64_t blob_file_number,
35+
Status GetBlobFileReader(const ReadOptions& read_options,
36+
uint64_t blob_file_number,
3637
CacheHandleGuard<BlobFileReader>* blob_file_reader);
3738

3839
private:

db/blob/blob_file_cache_test.cc

+17-8
Original file line numberDiff line numberDiff line change
@@ -118,15 +118,18 @@ TEST_F(BlobFileCacheTest, GetBlobFileReader) {
118118
// First try: reader should be opened and put in cache
119119
CacheHandleGuard<BlobFileReader> first;
120120

121-
ASSERT_OK(blob_file_cache.GetBlobFileReader(blob_file_number, &first));
121+
const ReadOptions read_options;
122+
ASSERT_OK(blob_file_cache.GetBlobFileReader(read_options, blob_file_number,
123+
&first));
122124
ASSERT_NE(first.GetValue(), nullptr);
123125
ASSERT_EQ(options.statistics->getTickerCount(NO_FILE_OPENS), 1);
124126
ASSERT_EQ(options.statistics->getTickerCount(NO_FILE_ERRORS), 0);
125127

126128
// Second try: reader should be served from cache
127129
CacheHandleGuard<BlobFileReader> second;
128130

129-
ASSERT_OK(blob_file_cache.GetBlobFileReader(blob_file_number, &second));
131+
ASSERT_OK(blob_file_cache.GetBlobFileReader(read_options, blob_file_number,
132+
&second));
130133
ASSERT_NE(second.GetValue(), nullptr);
131134
ASSERT_EQ(options.statistics->getTickerCount(NO_FILE_OPENS), 1);
132135
ASSERT_EQ(options.statistics->getTickerCount(NO_FILE_ERRORS), 0);
@@ -163,19 +166,21 @@ TEST_F(BlobFileCacheTest, GetBlobFileReader_Race) {
163166
CacheHandleGuard<BlobFileReader> first;
164167
CacheHandleGuard<BlobFileReader> second;
165168

169+
const ReadOptions read_options;
166170
SyncPoint::GetInstance()->SetCallBack(
167171
"BlobFileCache::GetBlobFileReader:DoubleCheck", [&](void* /* arg */) {
168172
// Disabling sync points to prevent infinite recursion
169173
SyncPoint::GetInstance()->DisableProcessing();
170-
171-
ASSERT_OK(blob_file_cache.GetBlobFileReader(blob_file_number, &second));
174+
ASSERT_OK(blob_file_cache.GetBlobFileReader(read_options,
175+
blob_file_number, &second));
172176
ASSERT_NE(second.GetValue(), nullptr);
173177
ASSERT_EQ(options.statistics->getTickerCount(NO_FILE_OPENS), 1);
174178
ASSERT_EQ(options.statistics->getTickerCount(NO_FILE_ERRORS), 0);
175179
});
176180
SyncPoint::GetInstance()->EnableProcessing();
177181

178-
ASSERT_OK(blob_file_cache.GetBlobFileReader(blob_file_number, &first));
182+
ASSERT_OK(blob_file_cache.GetBlobFileReader(read_options, blob_file_number,
183+
&first));
179184
ASSERT_NE(first.GetValue(), nullptr);
180185
ASSERT_EQ(options.statistics->getTickerCount(NO_FILE_OPENS), 1);
181186
ASSERT_EQ(options.statistics->getTickerCount(NO_FILE_ERRORS), 0);
@@ -213,8 +218,10 @@ TEST_F(BlobFileCacheTest, GetBlobFileReader_IOError) {
213218

214219
CacheHandleGuard<BlobFileReader> reader;
215220

221+
const ReadOptions read_options;
216222
ASSERT_TRUE(
217-
blob_file_cache.GetBlobFileReader(blob_file_number, &reader).IsIOError());
223+
blob_file_cache.GetBlobFileReader(read_options, blob_file_number, &reader)
224+
.IsIOError());
218225
ASSERT_EQ(reader.GetValue(), nullptr);
219226
ASSERT_EQ(options.statistics->getTickerCount(NO_FILE_OPENS), 1);
220227
ASSERT_EQ(options.statistics->getTickerCount(NO_FILE_ERRORS), 1);
@@ -253,8 +260,10 @@ TEST_F(BlobFileCacheTest, GetBlobFileReader_CacheFull) {
253260
// strict_capacity_limit is set
254261
CacheHandleGuard<BlobFileReader> reader;
255262

256-
ASSERT_TRUE(blob_file_cache.GetBlobFileReader(blob_file_number, &reader)
257-
.IsMemoryLimit());
263+
const ReadOptions read_options;
264+
ASSERT_TRUE(
265+
blob_file_cache.GetBlobFileReader(read_options, blob_file_number, &reader)
266+
.IsMemoryLimit());
258267
ASSERT_EQ(reader.GetValue(), nullptr);
259268
ASSERT_EQ(options.statistics->getTickerCount(NO_FILE_OPENS), 1);
260269
ASSERT_EQ(options.statistics->getTickerCount(NO_FILE_ERRORS), 1);

db/blob/blob_file_reader.cc

+39-20
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,10 @@
2626
namespace ROCKSDB_NAMESPACE {
2727

2828
Status BlobFileReader::Create(
29-
const ImmutableOptions& immutable_options, const FileOptions& file_options,
30-
uint32_t column_family_id, HistogramImpl* blob_file_read_hist,
31-
uint64_t blob_file_number, const std::shared_ptr<IOTracer>& io_tracer,
29+
const ImmutableOptions& immutable_options, const ReadOptions& read_options,
30+
const FileOptions& file_options, uint32_t column_family_id,
31+
HistogramImpl* blob_file_read_hist, uint64_t blob_file_number,
32+
const std::shared_ptr<IOTracer>& io_tracer,
3233
std::unique_ptr<BlobFileReader>* blob_file_reader) {
3334
assert(blob_file_reader);
3435
assert(!*blob_file_reader);
@@ -52,15 +53,17 @@ Status BlobFileReader::Create(
5253
CompressionType compression_type = kNoCompression;
5354

5455
{
55-
const Status s = ReadHeader(file_reader.get(), column_family_id, statistics,
56-
&compression_type);
56+
const Status s =
57+
ReadHeader(file_reader.get(), read_options, column_family_id,
58+
statistics, &compression_type);
5759
if (!s.ok()) {
5860
return s;
5961
}
6062
}
6163

6264
{
63-
const Status s = ReadFooter(file_reader.get(), file_size, statistics);
65+
const Status s =
66+
ReadFooter(file_reader.get(), read_options, file_size, statistics);
6467
if (!s.ok()) {
6568
return s;
6669
}
@@ -134,6 +137,7 @@ Status BlobFileReader::OpenFile(
134137
}
135138

136139
Status BlobFileReader::ReadHeader(const RandomAccessFileReader* file_reader,
140+
const ReadOptions& read_options,
137141
uint32_t column_family_id,
138142
Statistics* statistics,
139143
CompressionType* compression_type) {
@@ -151,9 +155,10 @@ Status BlobFileReader::ReadHeader(const RandomAccessFileReader* file_reader,
151155
constexpr size_t read_size = BlobLogHeader::kSize;
152156

153157
// TODO: rate limit reading headers from blob files.
154-
const Status s = ReadFromFile(file_reader, read_offset, read_size,
155-
statistics, &header_slice, &buf, &aligned_buf,
156-
Env::IO_TOTAL /* rate_limiter_priority */);
158+
const Status s =
159+
ReadFromFile(file_reader, read_options, read_offset, read_size,
160+
statistics, &header_slice, &buf, &aligned_buf,
161+
Env::IO_TOTAL /* rate_limiter_priority */);
157162
if (!s.ok()) {
158163
return s;
159164
}
@@ -187,6 +192,7 @@ Status BlobFileReader::ReadHeader(const RandomAccessFileReader* file_reader,
187192
}
188193

189194
Status BlobFileReader::ReadFooter(const RandomAccessFileReader* file_reader,
195+
const ReadOptions& read_options,
190196
uint64_t file_size, Statistics* statistics) {
191197
assert(file_size >= BlobLogHeader::kSize + BlobLogFooter::kSize);
192198
assert(file_reader);
@@ -202,9 +208,10 @@ Status BlobFileReader::ReadFooter(const RandomAccessFileReader* file_reader,
202208
constexpr size_t read_size = BlobLogFooter::kSize;
203209

204210
// TODO: rate limit reading footers from blob files.
205-
const Status s = ReadFromFile(file_reader, read_offset, read_size,
206-
statistics, &footer_slice, &buf, &aligned_buf,
207-
Env::IO_TOTAL /* rate_limiter_priority */);
211+
const Status s =
212+
ReadFromFile(file_reader, read_options, read_offset, read_size,
213+
statistics, &footer_slice, &buf, &aligned_buf,
214+
Env::IO_TOTAL /* rate_limiter_priority */);
208215
if (!s.ok()) {
209216
return s;
210217
}
@@ -232,6 +239,7 @@ Status BlobFileReader::ReadFooter(const RandomAccessFileReader* file_reader,
232239
}
233240

234241
Status BlobFileReader::ReadFromFile(const RandomAccessFileReader* file_reader,
242+
const ReadOptions& read_options,
235243
uint64_t read_offset, size_t read_size,
236244
Statistics* statistics, Slice* slice,
237245
Buffer* buf, AlignedBuf* aligned_buf,
@@ -246,17 +254,23 @@ Status BlobFileReader::ReadFromFile(const RandomAccessFileReader* file_reader,
246254

247255
Status s;
248256

257+
IOOptions io_options;
258+
s = file_reader->PrepareIOOptions(read_options, io_options);
259+
if (!s.ok()) {
260+
return s;
261+
}
262+
249263
if (file_reader->use_direct_io()) {
250264
constexpr char* scratch = nullptr;
251265

252-
s = file_reader->Read(IOOptions(), read_offset, read_size, slice, scratch,
266+
s = file_reader->Read(io_options, read_offset, read_size, slice, scratch,
253267
aligned_buf, rate_limiter_priority);
254268
} else {
255269
buf->reset(new char[read_size]);
256270
constexpr AlignedBuf* aligned_scratch = nullptr;
257271

258-
s = file_reader->Read(IOOptions(), read_offset, read_size, slice,
259-
buf->get(), aligned_scratch, rate_limiter_priority);
272+
s = file_reader->Read(io_options, read_offset, read_size, slice, buf->get(),
273+
aligned_scratch, rate_limiter_priority);
260274
}
261275

262276
if (!s.ok()) {
@@ -324,8 +338,13 @@ Status BlobFileReader::GetBlob(
324338
Status s;
325339
constexpr bool for_compaction = true;
326340

341+
IOOptions io_options;
342+
s = file_reader_->PrepareIOOptions(read_options, io_options);
343+
if (!s.ok()) {
344+
return s;
345+
}
327346
prefetched = prefetch_buffer->TryReadFromCache(
328-
IOOptions(), file_reader_.get(), record_offset,
347+
io_options, file_reader_.get(), record_offset,
329348
static_cast<size_t>(record_size), &record_slice, &s,
330349
read_options.rate_limiter_priority, for_compaction);
331350
if (!s.ok()) {
@@ -338,10 +357,10 @@ Status BlobFileReader::GetBlob(
338357
PERF_COUNTER_ADD(blob_read_count, 1);
339358
PERF_COUNTER_ADD(blob_read_byte, record_size);
340359
PERF_TIMER_GUARD(blob_read_time);
341-
const Status s = ReadFromFile(file_reader_.get(), record_offset,
342-
static_cast<size_t>(record_size), statistics_,
343-
&record_slice, &buf, &aligned_buf,
344-
read_options.rate_limiter_priority);
360+
const Status s = ReadFromFile(
361+
file_reader_.get(), read_options, record_offset,
362+
static_cast<size_t>(record_size), statistics_, &record_slice, &buf,
363+
&aligned_buf, read_options.rate_limiter_priority);
345364
if (!s.ok()) {
346365
return s;
347366
}

db/blob/blob_file_reader.h

+5-1
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ class Statistics;
2929
class BlobFileReader {
3030
public:
3131
static Status Create(const ImmutableOptions& immutable_options,
32+
const ReadOptions& read_options,
3233
const FileOptions& file_options,
3334
uint32_t column_family_id,
3435
HistogramImpl* blob_file_read_hist,
@@ -74,15 +75,18 @@ class BlobFileReader {
7475
std::unique_ptr<RandomAccessFileReader>* file_reader);
7576

7677
static Status ReadHeader(const RandomAccessFileReader* file_reader,
78+
const ReadOptions& read_options,
7779
uint32_t column_family_id, Statistics* statistics,
7880
CompressionType* compression_type);
7981

8082
static Status ReadFooter(const RandomAccessFileReader* file_reader,
81-
uint64_t file_size, Statistics* statistics);
83+
const ReadOptions& read_options, uint64_t file_size,
84+
Statistics* statistics);
8285

8386
using Buffer = std::unique_ptr<char[]>;
8487

8588
static Status ReadFromFile(const RandomAccessFileReader* file_reader,
89+
const ReadOptions& read_options,
8690
uint64_t read_offset, size_t read_size,
8791
Statistics* statistics, Slice* slice, Buffer* buf,
8892
AlignedBuf* aligned_buf,

0 commit comments

Comments
 (0)