diff --git a/db/db_impl/db_impl.cc b/db/db_impl/db_impl.cc index 3bc2be1f047..fe685aa4ec3 100644 --- a/db/db_impl/db_impl.cc +++ b/db/db_impl/db_impl.cc @@ -5897,6 +5897,7 @@ Status DBImpl::IngestExternalFile( Status DBImpl::IngestExternalFiles( const std::vector& args) { + PERF_TIMER_GUARD(file_ingestion_nanos); // TODO: plumb Env::IOActivity, Env::IOPriority const WriteOptions write_options; @@ -6041,6 +6042,7 @@ Status DBImpl::IngestExternalFiles( if (two_write_queues_) { nonmem_write_thread_.EnterUnbatched(&nonmem_w, &mutex_); } + PERF_TIMER_GUARD(file_ingestion_blocking_live_writes_nanos); // When unordered_write is enabled, the keys are writing to memtable in an // unordered way. If the ingestion job checks memtable key range before the @@ -6203,6 +6205,7 @@ Status DBImpl::IngestExternalFiles( nonmem_write_thread_.ExitUnbatched(&nonmem_w); } write_thread_.ExitUnbatched(&w); + PERF_TIMER_STOP(file_ingestion_blocking_live_writes_nanos); if (status.ok()) { for (auto& job : ingestion_jobs) { diff --git a/db/external_sst_file_basic_test.cc b/db/external_sst_file_basic_test.cc index a6c42cbd6e1..cf46e0af199 100644 --- a/db/external_sst_file_basic_test.cc +++ b/db/external_sst_file_basic_test.cc @@ -11,6 +11,7 @@ #include "port/stack_trace.h" #include "rocksdb/advanced_options.h" #include "rocksdb/options.h" +#include "rocksdb/perf_context.h" #include "rocksdb/sst_file_writer.h" #include "test_util/testharness.h" #include "test_util/testutil.h" @@ -261,13 +262,28 @@ TEST_F(ExternalSSTFileBasicTest, Basic) { DestroyAndReopen(options); // Add file using file path + SetPerfLevel(kEnableTimeExceptForMutex); + PerfContext* perf_ctx = get_perf_context(); + perf_ctx->Reset(); s = DeprecatedAddFile({file1}); + ASSERT_GT(perf_context.file_ingestion_nanos, 0); + ASSERT_GT(perf_context.file_ingestion_blocking_live_writes_nanos, 0); ASSERT_OK(s) << s.ToString(); ASSERT_EQ(db_->GetLatestSequenceNumber(), 0U); for (int k = 0; k < 100; k++) { ASSERT_EQ(Get(Key(k)), Key(k) + "_val"); } + // Re-ingest the file just to check the perf context not enabled at and below + // kEnableWait. + SetPerfLevel(kEnableWait); + perf_ctx->Reset(); + IngestExternalFileOptions opts; + opts.allow_global_seqno = true; + opts.allow_blocking_flush = true; + ASSERT_OK(db_->IngestExternalFile({file1}, opts)); + ASSERT_EQ(perf_context.file_ingestion_nanos, 0); + ASSERT_EQ(perf_context.file_ingestion_blocking_live_writes_nanos, 0); DestroyAndRecreateExternalSSTFilesDir(); } diff --git a/include/rocksdb/iostats_context.h b/include/rocksdb/iostats_context.h index 559d44c57a6..592bc0c4670 100644 --- a/include/rocksdb/iostats_context.h +++ b/include/rocksdb/iostats_context.h @@ -10,8 +10,18 @@ #include "rocksdb/perf_level.h" +/* + * NOTE: + * If you plan to add new metrics, please read documentation in perf_level.h and + * try to come up with a metric name that follows the naming conventions + * mentioned there. It helps to indicate the metric's starting enabling P + * erfLevel. Document this starting PerfLevel if the metric name cannot meet the + * naming conventions. + */ + // A thread local context for gathering io-stats efficiently and transparently. // Use SetPerfLevel(PerfLevel::kEnableTime) to enable time stats. +// namespace ROCKSDB_NAMESPACE { diff --git a/include/rocksdb/perf_context.h b/include/rocksdb/perf_context.h index 42452be1517..5647eba5bdd 100644 --- a/include/rocksdb/perf_context.h +++ b/include/rocksdb/perf_context.h @@ -20,6 +20,12 @@ namespace ROCKSDB_NAMESPACE { * add/remove fields to this structure, builds would fail. The way to fix the * builds would be to add the appropriate fields to the * DEF_PERF_CONTEXT_LEVEL_METRICS() macro in the perf_context.cc file. + * + * If you plan to add new metrics, please read documentation in perf_level.h and + * try to come up with a metric name that follows the naming conventions + * mentioned there. It helps to indicate the metric's starting enabling P + * erfLevel. Document this starting PerfLevel if the metric name cannot meet the + * naming conventions. */ // Break down performance counters by level and store per-level perf context in @@ -189,6 +195,7 @@ struct PerfContextBase { // total nanos spent on writing to WAL uint64_t write_wal_time; // total nanos spent on writing to mem tables + // This metric gets collected starting from PerfLevel::kEnableWait uint64_t write_memtable_time; // total nanos spent on delaying or throttling write uint64_t write_delay_time; @@ -230,6 +237,8 @@ struct PerfContextBase { uint64_t bloom_sst_miss_count; // Time spent waiting on key locks in transaction lock manager. + // This metric gets collected starting from + // PerfLevel::kEnableTimeExceptForMutex uint64_t key_lock_wait_time; // number of times acquiring a lock was blocked by another transaction. uint64_t key_lock_wait_count; @@ -275,6 +284,12 @@ struct PerfContextBase { uint64_t decrypt_data_nanos; uint64_t number_async_seek; + + // Metrics for file ingestion + // Time spent end to end in an IngestExternalFile call. + uint64_t file_ingestion_nanos; + // Time IngestExternalFile blocked live writes. + uint64_t file_ingestion_blocking_live_writes_nanos; }; struct PerfContext : public PerfContextBase { diff --git a/include/rocksdb/perf_level.h b/include/rocksdb/perf_level.h index abd40faac25..0ebf1c57a41 100644 --- a/include/rocksdb/perf_level.h +++ b/include/rocksdb/perf_level.h @@ -14,19 +14,39 @@ namespace ROCKSDB_NAMESPACE { // How much perf stats to collect. Affects perf_context and iostats_context. +// These levels are incremental, which means a new set of metrics will get +// collected when PerfLevel is upgraded from level n to level n + 1. +// Each level's documentation specifies the incremental set of metrics it +// enables. As an example, kEnableWait will also enable collecting all the +// metrics that kEnableCount enables, and its documentation only specifies which +// extra metrics it also enables. +// +// These metrics are identified with some naming conventions, but not all +// metrics follow exactly this convention. The metrics' own documentation should +// be source of truth if they diverge. enum PerfLevel : unsigned char { - kUninitialized = 0, // unknown setting - kDisable = 1, // disable perf stats - kEnableCount = 2, // enable only count stats - kEnableWait = 3, // measure time spent by user threads - // blocked in RocksDB, and not external - // resources such as mutexes and IO - kEnableTimeExceptForMutex = 4, // Other than count stats, also enable time - // stats except for mutexes - // Other than time, also measure CPU time counters. Still don't measure - // time (neither wall time nor CPU time) for mutexes. + // Unknown setting + kUninitialized = 0, + // Disable perf stats + kDisable = 1, + // Starts enabling count metrics. These metrics usually don't have time + // related keywords, and are likely to have keywords like "count" or "byte". + kEnableCount = 2, + // Starts enabling metrics that measure time spent by user threads blocked in + // RocksDB waiting for RocksDB to take actions, as opposed to waiting for + // external resources such as mutexes and IO. + // These metrics usually have this pattern: "_[wait|delay]_*_[time|nanos]". + kEnableWait = 3, + // Starts enabling metrics that measure the end to end time of an operation. + // These metrics' names have keywords "time" or "nanos". Check other time + // measuring metrics with similar but more specific naming conventions. + kEnableTimeExceptForMutex = 4, + // Starts enabling metrics that measure the cpu time of an operation. These + // metrics' name usually this pattern "_cpu_*_[time|nanos]". kEnableTimeAndCPUTimeExceptForMutex = 5, - kEnableTime = 6, // enable count and time stats + // Starts enabling metrics that measure time for mutex. These metrics' name + // usually have this pattern: "_[mutex|condition]_*_[time|nanos]". + kEnableTime = 6, kOutOfBounds = 7 // N.B. Must always be the last value! }; diff --git a/monitoring/perf_context.cc b/monitoring/perf_context.cc index 2f25f68c238..59f5f19f66d 100644 --- a/monitoring/perf_context.cc +++ b/monitoring/perf_context.cc @@ -153,7 +153,9 @@ struct PerfContextByLevelInt { defCmd(iter_seek_count) \ defCmd(encrypt_data_nanos) \ defCmd(decrypt_data_nanos) \ - defCmd(number_async_seek) + defCmd(number_async_seek) \ + defCmd(file_ingestion_nanos) \ + defCmd(file_ingestion_blocking_live_writes_nanos) // clang-format on struct PerfContextInt { diff --git a/unreleased_history/public_api_changes/ingestion_time_in_perf_context.md b/unreleased_history/public_api_changes/ingestion_time_in_perf_context.md new file mode 100644 index 00000000000..5184ecc23ee --- /dev/null +++ b/unreleased_history/public_api_changes/ingestion_time_in_perf_context.md @@ -0,0 +1 @@ +Add `file_ingestion_nanos` and `file_ingestion_blocking_live_writes_nanos` in PerfContext to observe file ingestions \ No newline at end of file