Skip to content

Commit b4f5774

Browse files
committed
Add/Populate SST_READ_FLUSH/COMPACTION_MICROS new stat by IOActivity, get IOActivity from TableReaderCaller, fallback behavior on TableCache for SST_READ stat
1 parent 92195a9 commit b4f5774

23 files changed

+141
-91
lines changed

HISTORY.md

+1
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
### New Features
1111
* Add statistics rocksdb.secondary.cache.filter.hits, rocksdb.secondary.cache.index.hits, and rocksdb.secondary.cache.filter.hits
1212
* Added a new PerfContext counter `internal_merge_count_point_lookups` which tracks the number of Merge operands applied while serving point lookup queries.
13+
* Added new statistics rocksdb.sst.read.{flush|COMPACTION}.micros, which essentially groups rocksdb.sst.read.micros by flush and compaction.
1314

1415
## 8.0.0 (02/19/2023)
1516
### Behavior changes

db/blob/blob_file_reader.cc

+3-3
Original file line numberDiff line numberDiff line change
@@ -126,9 +126,9 @@ Status BlobFileReader::OpenFile(
126126

127127
file_reader->reset(new RandomAccessFileReader(
128128
std::move(file), blob_file_path, immutable_options.clock, io_tracer,
129-
immutable_options.stats, BLOB_DB_BLOB_FILE_READ_MICROS,
130-
blob_file_read_hist, immutable_options.rate_limiter.get(),
131-
immutable_options.listeners));
129+
immutable_options.stats, IOActivity::kBlob,
130+
Histograms::HISTOGRAM_ENUM_MAX, blob_file_read_hist,
131+
immutable_options.rate_limiter.get(), immutable_options.listeners));
132132

133133
return Status::OK();
134134
}

db/builder.cc

+3-4
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ Status BuildTable(
5757
const std::string& dbname, VersionSet* versions,
5858
const ImmutableDBOptions& db_options, const TableBuilderOptions& tboptions,
5959
const FileOptions& file_options, TableCache* table_cache,
60-
InternalIterator* iter,
60+
TableReaderCaller table_reader_caller, InternalIterator* iter,
6161
std::vector<std::unique_ptr<FragmentedRangeTombstoneIterator>>
6262
range_del_iters,
6363
FileMetaData* meta, std::vector<BlobFileAddition>* blob_file_additions,
@@ -256,8 +256,7 @@ Status BuildTable(
256256
approx_opts.files_size_error_margin = 0.1;
257257
meta->compensated_range_deletion_size += versions->ApproximateSize(
258258
approx_opts, version, kv.first.Encode(), tombstone_end.Encode(),
259-
0 /* start_level */, -1 /* end_level */,
260-
TableReaderCaller::kFlush);
259+
0 /* start_level */, -1 /* end_level */, table_reader_caller);
261260
}
262261
last_tombstone_start_user_key = range_del_it->start_key();
263262
}
@@ -376,7 +375,7 @@ Status BuildTable(
376375
nullptr,
377376
(internal_stats == nullptr) ? nullptr
378377
: internal_stats->GetFileReadHist(0),
379-
TableReaderCaller::kFlush, /*arena=*/nullptr,
378+
table_reader_caller, /*arena=*/nullptr,
380379
/*skip_filter=*/false, tboptions.level_at_creation,
381380
MaxFileSizeForL0MetaPin(mutable_cf_options),
382381
/*smallest_compaction_key=*/nullptr,

db/builder.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ extern Status BuildTable(
5454
const std::string& dbname, VersionSet* versions,
5555
const ImmutableDBOptions& db_options, const TableBuilderOptions& tboptions,
5656
const FileOptions& file_options, TableCache* table_cache,
57-
InternalIterator* iter,
57+
TableReaderCaller table_reader_caller, InternalIterator* iter,
5858
std::vector<std::unique_ptr<FragmentedRangeTombstoneIterator>>
5959
range_del_iters,
6060
FileMetaData* meta, std::vector<BlobFileAddition>* blob_file_additions,

db/convenience.cc

+2-1
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,8 @@ Status VerifySstFileChecksum(const Options& options,
5757
new RandomAccessFileReader(
5858
std::move(file), file_path, ioptions.clock, nullptr /* io_tracer */,
5959
nullptr /* stats */, IOActivity::kUnknown,
60-
nullptr /* file_read_hist */, ioptions.rate_limiter.get()));
60+
Histograms::HISTOGRAM_ENUM_MAX, nullptr /* file_read_hist */,
61+
ioptions.rate_limiter.get()));
6162
const bool kImmortal = true;
6263
auto reader_options = TableReaderOptions(
6364
ioptions, options.prefix_extractor, env_options, internal_comparator,

db/db_impl/db_impl_open.cc

+11-10
Original file line numberDiff line numberDiff line change
@@ -1538,16 +1538,17 @@ Status DBImpl::WriteLevel0TableForRecovery(int job_id, ColumnFamilyData* cfd,
15381538
SeqnoToTimeMapping empty_seqno_time_mapping;
15391539
Version* version = cfd->current();
15401540
version->Ref();
1541-
s = BuildTable(
1542-
dbname_, versions_.get(), immutable_db_options_, tboptions,
1543-
file_options_for_compaction_, cfd->table_cache(), iter.get(),
1544-
std::move(range_del_iters), &meta, &blob_file_additions,
1545-
snapshot_seqs, earliest_write_conflict_snapshot, kMaxSequenceNumber,
1546-
snapshot_checker, paranoid_file_checks, cfd->internal_stats(), &io_s,
1547-
io_tracer_, BlobFileCreationReason::kRecovery,
1548-
empty_seqno_time_mapping, &event_logger_, job_id, Env::IO_HIGH,
1549-
nullptr /* table_properties */, write_hint,
1550-
nullptr /*full_history_ts_low*/, &blob_callback_, version);
1541+
s = BuildTable(dbname_, versions_.get(), immutable_db_options_, tboptions,
1542+
file_options_for_compaction_, cfd->table_cache(),
1543+
TableReaderCaller::kFlush, iter.get(),
1544+
std::move(range_del_iters), &meta, &blob_file_additions,
1545+
snapshot_seqs, earliest_write_conflict_snapshot,
1546+
kMaxSequenceNumber, snapshot_checker, paranoid_file_checks,
1547+
cfd->internal_stats(), &io_s, io_tracer_,
1548+
BlobFileCreationReason::kRecovery,
1549+
empty_seqno_time_mapping, &event_logger_, job_id,
1550+
Env::IO_HIGH, nullptr /* table_properties */, write_hint,
1551+
nullptr /*full_history_ts_low*/, &blob_callback_, version);
15511552
version->Unref();
15521553
LogFlush(immutable_db_options_.info_log);
15531554
ROCKS_LOG_DEBUG(immutable_db_options_.info_log,

db/flush_job.cc

+12-11
Original file line numberDiff line numberDiff line change
@@ -930,17 +930,18 @@ Status FlushJob::WriteLevel0Table() {
930930
meta_.fd.GetNumber());
931931
const SequenceNumber job_snapshot_seq =
932932
job_context_->GetJobSnapshotSequence();
933-
s = BuildTable(
934-
dbname_, versions_, db_options_, tboptions, file_options_,
935-
cfd_->table_cache(), iter.get(), std::move(range_del_iters), &meta_,
936-
&blob_file_additions, existing_snapshots_,
937-
earliest_write_conflict_snapshot_, job_snapshot_seq,
938-
snapshot_checker_, mutable_cf_options_.paranoid_file_checks,
939-
cfd_->internal_stats(), &io_s, io_tracer_,
940-
BlobFileCreationReason::kFlush, seqno_to_time_mapping_, event_logger_,
941-
job_context_->job_id, io_priority, &table_properties_, write_hint,
942-
full_history_ts_low, blob_callback_, base_, &num_input_entries,
943-
&memtable_payload_bytes, &memtable_garbage_bytes);
933+
s = BuildTable(dbname_, versions_, db_options_, tboptions, file_options_,
934+
cfd_->table_cache(), TableReaderCaller::kFlush, iter.get(),
935+
std::move(range_del_iters), &meta_, &blob_file_additions,
936+
existing_snapshots_, earliest_write_conflict_snapshot_,
937+
job_snapshot_seq, snapshot_checker_,
938+
mutable_cf_options_.paranoid_file_checks,
939+
cfd_->internal_stats(), &io_s, io_tracer_,
940+
BlobFileCreationReason::kFlush, seqno_to_time_mapping_,
941+
event_logger_, job_context_->job_id, io_priority,
942+
&table_properties_, write_hint, full_history_ts_low,
943+
blob_callback_, base_, &num_input_entries,
944+
&memtable_payload_bytes, &memtable_garbage_bytes);
944945
// TODO: Cleanup io_status in BuildTable and table builders
945946
assert(!s.ok() || io_s.ok());
946947
io_s.PermitUncheckedError();

db/repair.cc

+8-8
Original file line numberDiff line numberDiff line change
@@ -456,13 +456,14 @@ class Repairer {
456456
SeqnoToTimeMapping empty_seqno_time_mapping;
457457
status = BuildTable(
458458
dbname_, /* versions */ nullptr, immutable_db_options_, tboptions,
459-
file_options_, table_cache_.get(), iter.get(),
460-
std::move(range_del_iters), &meta, nullptr /* blob_file_additions */,
461-
{}, kMaxSequenceNumber, kMaxSequenceNumber, snapshot_checker,
462-
false /* paranoid_file_checks*/, nullptr /* internal_stats */, &io_s,
463-
nullptr /*IOTracer*/, BlobFileCreationReason::kRecovery,
464-
empty_seqno_time_mapping, nullptr /* event_logger */, 0 /* job_id */,
465-
Env::IO_HIGH, nullptr /* table_properties */, write_hint);
459+
file_options_, table_cache_.get(), TableReaderCaller::kFlush,
460+
iter.get(), std::move(range_del_iters), &meta,
461+
nullptr /* blob_file_additions */, {}, kMaxSequenceNumber,
462+
kMaxSequenceNumber, snapshot_checker, false /* paranoid_file_checks*/,
463+
nullptr /* internal_stats */, &io_s, nullptr /*IOTracer*/,
464+
BlobFileCreationReason::kRecovery, empty_seqno_time_mapping,
465+
nullptr /* event_logger */, 0 /* job_id */, Env::IO_HIGH,
466+
nullptr /* table_properties */, write_hint);
466467
ROCKS_LOG_INFO(db_options_.info_log,
467468
"Log #%" PRIu64 ": %d ops saved to Table #%" PRIu64 " %s",
468469
log, counter, meta.fd.GetNumber(),
@@ -809,4 +810,3 @@ Status RepairDB(const std::string& dbname, const Options& options) {
809810
}
810811

811812
} // namespace ROCKSDB_NAMESPACE
812-

db/table_cache.cc

+45-25
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@ Status TableCache::GetTableReader(
9292
const InternalKeyComparator& internal_comparator,
9393
const FileMetaData& file_meta, bool sequential_mode, bool record_read_stats,
9494
HistogramImpl* file_read_hist, std::unique_ptr<TableReader>* table_reader,
95+
TableReaderCaller caller,
9596
const std::shared_ptr<const SliceTransform>& prefix_extractor,
9697
bool skip_filters, int level, bool prefetch_index_and_filter_in_cache,
9798
size_t max_file_size_for_l0_meta_pin, Temperature file_temperature) {
@@ -124,11 +125,23 @@ Status TableCache::GetTableReader(
124125
if (!sequential_mode && ioptions_.advise_random_on_open) {
125126
file->Hint(FSRandomAccessFile::kRandom);
126127
}
127-
StopWatch sw(ioptions_.clock, ioptions_.stats, TABLE_OPEN_IO_MICROS);
128+
IOActivity io_activity;
129+
if (caller == TableReaderCaller::kFlush) {
130+
io_activity = IOActivity::kFlush;
131+
} else if (caller == TableReaderCaller::kCompaction ||
132+
caller == TableReaderCaller::kCompactionRefill) {
133+
io_activity = IOActivity::kCompaction;
134+
} else {
135+
io_activity = IOActivity::kUnknown;
136+
}
137+
StopWatch sw(ioptions_.clock, ioptions_.stats, {TABLE_OPEN_IO_MICROS});
128138
std::unique_ptr<RandomAccessFileReader> file_reader(
129139
new RandomAccessFileReader(
130140
std::move(file), fname, ioptions_.clock, io_tracer_,
131-
record_read_stats ? ioptions_.stats : nullptr, SST_READ_MICROS,
141+
record_read_stats ? ioptions_.stats : nullptr, io_activity,
142+
io_activity == IOActivity::kUnknown
143+
? Histograms::SST_READ_MICROS
144+
: Histograms::HISTOGRAM_ENUM_MAX,
132145
file_read_hist, ioptions_.rate_limiter.get(), ioptions_.listeners,
133146
file_temperature, level == ioptions_.num_levels - 1));
134147
UniqueId64x2 expected_unique_id;
@@ -156,6 +169,7 @@ Status TableCache::FindTable(
156169
const ReadOptions& ro, const FileOptions& file_options,
157170
const InternalKeyComparator& internal_comparator,
158171
const FileMetaData& file_meta, TypedHandle** handle,
172+
TableReaderCaller caller,
159173
const std::shared_ptr<const SliceTransform>& prefix_extractor,
160174
const bool no_io, bool record_read_stats, HistogramImpl* file_read_hist,
161175
bool skip_filters, int level, bool prefetch_index_and_filter_in_cache,
@@ -182,7 +196,7 @@ Status TableCache::FindTable(
182196
Status s =
183197
GetTableReader(ro, file_options, internal_comparator, file_meta,
184198
false /* sequential mode */, record_read_stats,
185-
file_read_hist, &table_reader, prefix_extractor,
199+
file_read_hist, &table_reader, caller, prefix_extractor,
186200
skip_filters, level, prefetch_index_and_filter_in_cache,
187201
max_file_size_for_l0_meta_pin, file_temperature);
188202
if (!s.ok()) {
@@ -226,7 +240,7 @@ InternalIterator* TableCache::NewIterator(
226240
table_reader = fd.table_reader;
227241
if (table_reader == nullptr) {
228242
s = FindTable(
229-
options, file_options, icomparator, file_meta, &handle,
243+
options, file_options, icomparator, file_meta, &handle, caller,
230244
prefix_extractor, options.read_tier == kBlockCacheTier /* no_io */,
231245
!for_compaction /* record_read_stats */, file_read_hist, skip_filters,
232246
level, true /* prefetch_index_and_filter_in_cache */,
@@ -317,7 +331,7 @@ Status TableCache::GetRangeTombstoneIterator(
317331
TypedHandle* handle = nullptr;
318332
if (t == nullptr) {
319333
s = FindTable(options, file_options_, internal_comparator, file_meta,
320-
&handle);
334+
&handle, TableReaderCaller::kUncategorized);
321335
if (s.ok()) {
322336
t = cache_.Value(handle);
323337
}
@@ -430,7 +444,8 @@ Status TableCache::Get(
430444
assert(s.ok());
431445
if (t == nullptr) {
432446
s = FindTable(options, file_options_, internal_comparator, file_meta,
433-
&handle, prefix_extractor,
447+
&handle, TableReaderCaller::kUncategorized,
448+
prefix_extractor,
434449
options.read_tier == kBlockCacheTier /* no_io */,
435450
true /* record_read_stats */, file_read_hist, skip_filters,
436451
level, true /* prefetch_index_and_filter_in_cache */,
@@ -531,12 +546,13 @@ Status TableCache::MultiGetFilter(
531546
MultiGetContext::Range tombstone_range(*mget_range, mget_range->begin(),
532547
mget_range->end());
533548
if (t == nullptr) {
534-
s = FindTable(
535-
options, file_options_, internal_comparator, file_meta, &handle,
536-
prefix_extractor, options.read_tier == kBlockCacheTier /* no_io */,
537-
true /* record_read_stats */, file_read_hist, /*skip_filters=*/false,
538-
level, true /* prefetch_index_and_filter_in_cache */,
539-
/*max_file_size_for_l0_meta_pin=*/0, file_meta.temperature);
549+
s = FindTable(options, file_options_, internal_comparator, file_meta,
550+
&handle, TableReaderCaller::kUncategorized, prefix_extractor,
551+
options.read_tier == kBlockCacheTier /* no_io */,
552+
true /* record_read_stats */, file_read_hist,
553+
/*skip_filters=*/false, level,
554+
true /* prefetch_index_and_filter_in_cache */,
555+
/*max_file_size_for_l0_meta_pin=*/0, file_meta.temperature);
540556
if (s.ok()) {
541557
t = cache_.Value(handle);
542558
}
@@ -574,8 +590,10 @@ Status TableCache::GetTableProperties(
574590
}
575591

576592
TypedHandle* table_handle = nullptr;
577-
Status s = FindTable(ReadOptions(), file_options, internal_comparator,
578-
file_meta, &table_handle, prefix_extractor, no_io);
593+
Status s =
594+
FindTable(ReadOptions(), file_options, internal_comparator, file_meta,
595+
&table_handle, TableReaderCaller::kUncategorized,
596+
prefix_extractor, no_io);
579597
if (!s.ok()) {
580598
return s;
581599
}
@@ -593,7 +611,8 @@ Status TableCache::ApproximateKeyAnchors(
593611
TableReader* t = file_meta.fd.table_reader;
594612
TypedHandle* handle = nullptr;
595613
if (t == nullptr) {
596-
s = FindTable(ro, file_options_, internal_comparator, file_meta, &handle);
614+
s = FindTable(ro, file_options_, internal_comparator, file_meta, &handle,
615+
TableReaderCaller::kUncategorized);
597616
if (s.ok()) {
598617
t = cache_.Value(handle);
599618
}
@@ -619,8 +638,9 @@ size_t TableCache::GetMemoryUsageByTableReader(
619638
}
620639

621640
TypedHandle* table_handle = nullptr;
622-
Status s = FindTable(ReadOptions(), file_options, internal_comparator,
623-
file_meta, &table_handle, prefix_extractor, true);
641+
Status s = FindTable(
642+
ReadOptions(), file_options, internal_comparator, file_meta,
643+
&table_handle, TableReaderCaller::kUncategorized, prefix_extractor, true);
624644
if (!s.ok()) {
625645
return 0;
626646
}
@@ -644,10 +664,10 @@ uint64_t TableCache::ApproximateOffsetOf(
644664
TypedHandle* table_handle = nullptr;
645665
if (table_reader == nullptr) {
646666
const bool for_compaction = (caller == TableReaderCaller::kCompaction);
647-
Status s =
648-
FindTable(ReadOptions(), file_options_, internal_comparator, file_meta,
649-
&table_handle, prefix_extractor, false /* no_io */,
650-
!for_compaction /* record_read_stats */);
667+
Status s = FindTable(
668+
ReadOptions(), file_options_, internal_comparator, file_meta,
669+
&table_handle, TableReaderCaller::kUncategorized, prefix_extractor,
670+
false /* no_io */, !for_compaction /* record_read_stats */);
651671
if (s.ok()) {
652672
table_reader = cache_.Value(table_handle);
653673
}
@@ -672,10 +692,10 @@ uint64_t TableCache::ApproximateSize(
672692
TypedHandle* table_handle = nullptr;
673693
if (table_reader == nullptr) {
674694
const bool for_compaction = (caller == TableReaderCaller::kCompaction);
675-
Status s =
676-
FindTable(ReadOptions(), file_options_, internal_comparator, file_meta,
677-
&table_handle, prefix_extractor, false /* no_io */,
678-
!for_compaction /* record_read_stats */);
695+
Status s = FindTable(
696+
ReadOptions(), file_options_, internal_comparator, file_meta,
697+
&table_handle, TableReaderCaller::kUncategorized, prefix_extractor,
698+
false /* no_io */, !for_compaction /* record_read_stats */);
679699
if (s.ok()) {
680700
table_reader = cache_.Value(table_handle);
681701
}

db/table_cache.h

+2-2
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,7 @@ class TableCache {
164164
Status FindTable(
165165
const ReadOptions& ro, const FileOptions& toptions,
166166
const InternalKeyComparator& internal_comparator,
167-
const FileMetaData& file_meta, TypedHandle**,
167+
const FileMetaData& file_meta, TypedHandle**, TableReaderCaller caller,
168168
const std::shared_ptr<const SliceTransform>& prefix_extractor = nullptr,
169169
const bool no_io = false, bool record_read_stats = true,
170170
HistogramImpl* file_read_hist = nullptr, bool skip_filters = false,
@@ -234,7 +234,7 @@ class TableCache {
234234
const InternalKeyComparator& internal_comparator,
235235
const FileMetaData& file_meta, bool sequential_mode,
236236
bool record_read_stats, HistogramImpl* file_read_hist,
237-
std::unique_ptr<TableReader>* table_reader,
237+
std::unique_ptr<TableReader>* table_reader, TableReaderCaller caller,
238238
const std::shared_ptr<const SliceTransform>& prefix_extractor = nullptr,
239239
bool skip_filters = false, int level = -1,
240240
bool prefetch_index_and_filter_in_cache = true,

db/table_cache_sync_and_async.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,8 @@ DEFINE_SYNC_AND_ASYNC(Status, TableCache::MultiGet)
6565
if (t == nullptr) {
6666
assert(handle == nullptr);
6767
s = FindTable(options, file_options_, internal_comparator, file_meta,
68-
&handle, prefix_extractor,
68+
&handle, TableReaderCaller::kUncategorized,
69+
prefix_extractor,
6970
options.read_tier == kBlockCacheTier /* no_io */,
7071
true /* record_read_stats */, file_read_hist, skip_filters,
7172
level, true /* prefetch_index_and_filter_in_cache */,

db/version_builder.cc

+2-1
Original file line numberDiff line numberDiff line change
@@ -1326,7 +1326,8 @@ class VersionBuilder::Rep {
13261326
statuses[file_idx] = table_cache_->FindTable(
13271327
ReadOptions(), file_options_,
13281328
*(base_vstorage_->InternalComparator()), *file_meta, &handle,
1329-
prefix_extractor, false /*no_io */, true /* record_read_stats */,
1329+
TableReaderCaller::kUncategorized, prefix_extractor,
1330+
false /*no_io */, true /* record_read_stats */,
13301331
internal_stats->GetFileReadHist(level), false, level,
13311332
prefetch_index_and_filter_in_cache, max_file_size_for_l0_meta_pin,
13321333
file_meta->temperature);

0 commit comments

Comments
 (0)