Skip to content

Commit

Permalink
Add time measuring metrics for file ingestion in PerfContext
Browse files Browse the repository at this point in the history
  • Loading branch information
jowlyzhang committed Dec 19, 2024
1 parent 1d919ac commit 2cf4597
Show file tree
Hide file tree
Showing 7 changed files with 79 additions and 12 deletions.
3 changes: 3 additions & 0 deletions db/db_impl/db_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -5897,6 +5897,7 @@ Status DBImpl::IngestExternalFile(

Status DBImpl::IngestExternalFiles(
const std::vector<IngestExternalFileArg>& args) {
PERF_TIMER_GUARD(file_ingestion_nanos);
// TODO: plumb Env::IOActivity, Env::IOPriority
const WriteOptions write_options;

Expand Down Expand Up @@ -6041,6 +6042,7 @@ Status DBImpl::IngestExternalFiles(
if (two_write_queues_) {
nonmem_write_thread_.EnterUnbatched(&nonmem_w, &mutex_);
}
PERF_TIMER_GUARD(file_ingestion_blocking_live_writes_nanos);

// When unordered_write is enabled, the keys are writing to memtable in an
// unordered way. If the ingestion job checks memtable key range before the
Expand Down Expand Up @@ -6203,6 +6205,7 @@ Status DBImpl::IngestExternalFiles(
nonmem_write_thread_.ExitUnbatched(&nonmem_w);
}
write_thread_.ExitUnbatched(&w);
PERF_TIMER_STOP(file_ingestion_blocking_live_writes_nanos);

if (status.ok()) {
for (auto& job : ingestion_jobs) {
Expand Down
16 changes: 16 additions & 0 deletions db/external_sst_file_basic_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include "port/stack_trace.h"
#include "rocksdb/advanced_options.h"
#include "rocksdb/options.h"
#include "rocksdb/perf_context.h"
#include "rocksdb/sst_file_writer.h"
#include "test_util/testharness.h"
#include "test_util/testutil.h"
Expand Down Expand Up @@ -261,13 +262,28 @@ TEST_F(ExternalSSTFileBasicTest, Basic) {

DestroyAndReopen(options);
// Add file using file path
SetPerfLevel(kEnableTimeExceptForMutex);
PerfContext* perf_ctx = get_perf_context();
perf_ctx->Reset();
s = DeprecatedAddFile({file1});
ASSERT_GT(perf_context.file_ingestion_nanos, 0);
ASSERT_GT(perf_context.file_ingestion_blocking_live_writes_nanos, 0);
ASSERT_OK(s) << s.ToString();
ASSERT_EQ(db_->GetLatestSequenceNumber(), 0U);
for (int k = 0; k < 100; k++) {
ASSERT_EQ(Get(Key(k)), Key(k) + "_val");
}

// Re-ingest the file just to check the perf context not enabled at and below
// kEnableWait.
SetPerfLevel(kEnableWait);
perf_ctx->Reset();
IngestExternalFileOptions opts;
opts.allow_global_seqno = true;
opts.allow_blocking_flush = true;
ASSERT_OK(db_->IngestExternalFile({file1}, opts));
ASSERT_EQ(perf_context.file_ingestion_nanos, 0);
ASSERT_EQ(perf_context.file_ingestion_blocking_live_writes_nanos, 0);
DestroyAndRecreateExternalSSTFilesDir();
}

Expand Down
10 changes: 10 additions & 0 deletions include/rocksdb/iostats_context.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,18 @@

#include "rocksdb/perf_level.h"

/*
* NOTE:
* If you plan to add new metrics, please read documentation in perf_level.h and
* try to come up with a metric name that follows the naming conventions
* mentioned there. It helps to indicate the metric's starting enabling P
* erfLevel. Document this starting PerfLevel if the metric name cannot meet the
* naming conventions.
*/

// A thread local context for gathering io-stats efficiently and transparently.
// Use SetPerfLevel(PerfLevel::kEnableTime) to enable time stats.
//

namespace ROCKSDB_NAMESPACE {

Expand Down
15 changes: 15 additions & 0 deletions include/rocksdb/perf_context.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,12 @@ namespace ROCKSDB_NAMESPACE {
* add/remove fields to this structure, builds would fail. The way to fix the
* builds would be to add the appropriate fields to the
* DEF_PERF_CONTEXT_LEVEL_METRICS() macro in the perf_context.cc file.
*
* If you plan to add new metrics, please read documentation in perf_level.h and
* try to come up with a metric name that follows the naming conventions
* mentioned there. It helps to indicate the metric's starting enabling P
* erfLevel. Document this starting PerfLevel if the metric name cannot meet the
* naming conventions.
*/

// Break down performance counters by level and store per-level perf context in
Expand Down Expand Up @@ -189,6 +195,7 @@ struct PerfContextBase {
// total nanos spent on writing to WAL
uint64_t write_wal_time;
// total nanos spent on writing to mem tables
// This metric gets collected starting from PerfLevel::kEnableWait
uint64_t write_memtable_time;
// total nanos spent on delaying or throttling write
uint64_t write_delay_time;
Expand Down Expand Up @@ -230,6 +237,8 @@ struct PerfContextBase {
uint64_t bloom_sst_miss_count;

// Time spent waiting on key locks in transaction lock manager.
// This metric gets collected starting from
// PerfLevel::kEnableTimeExceptForMutex
uint64_t key_lock_wait_time;
// number of times acquiring a lock was blocked by another transaction.
uint64_t key_lock_wait_count;
Expand Down Expand Up @@ -275,6 +284,12 @@ struct PerfContextBase {
uint64_t decrypt_data_nanos;

uint64_t number_async_seek;

// Metrics for file ingestion
// Time spent end to end in an IngestExternalFile call.
uint64_t file_ingestion_nanos;
// Time IngestExternalFile blocked live writes.
uint64_t file_ingestion_blocking_live_writes_nanos;
};

struct PerfContext : public PerfContextBase {
Expand Down
42 changes: 31 additions & 11 deletions include/rocksdb/perf_level.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,19 +14,39 @@
namespace ROCKSDB_NAMESPACE {

// How much perf stats to collect. Affects perf_context and iostats_context.
// These levels are incremental, which means a new set of metrics will get
// collected when PerfLevel is upgraded from level n to level n + 1.
// Each level's documentation specifies the incremental set of metrics it
// enables. As an example, kEnableWait will also enable collecting all the
// metrics that kEnableCount enables, and its documentation only specifies which
// extra metrics it also enables.
//
// These metrics are identified with some naming conventions, but not all
// metrics follow exactly this convention. The metrics' own documentation should
// be source of truth if they diverge.
enum PerfLevel : unsigned char {
kUninitialized = 0, // unknown setting
kDisable = 1, // disable perf stats
kEnableCount = 2, // enable only count stats
kEnableWait = 3, // measure time spent by user threads
// blocked in RocksDB, and not external
// resources such as mutexes and IO
kEnableTimeExceptForMutex = 4, // Other than count stats, also enable time
// stats except for mutexes
// Other than time, also measure CPU time counters. Still don't measure
// time (neither wall time nor CPU time) for mutexes.
// Unknown setting
kUninitialized = 0,
// Disable perf stats
kDisable = 1,
// Starts enabling count metrics. These metrics usually don't have time
// related keywords, and are likely to have keywords like "count" or "byte".
kEnableCount = 2,
// Starts enabling metrics that measure time spent by user threads blocked in
// RocksDB waiting for RocksDB to take actions, as opposed to waiting for
// external resources such as mutexes and IO.
// These metrics usually have this pattern: "_[wait|delay]_*_[time|nanos]".
kEnableWait = 3,
// Starts enabling metrics that measure the end to end time of an operation.
// These metrics' names have keywords "time" or "nanos". Check other time
// measuring metrics with similar but more specific naming conventions.
kEnableTimeExceptForMutex = 4,
// Starts enabling metrics that measure the cpu time of an operation. These
// metrics' name usually this pattern "_cpu_*_[time|nanos]".
kEnableTimeAndCPUTimeExceptForMutex = 5,
kEnableTime = 6, // enable count and time stats
// Starts enabling metrics that measure time for mutex. These metrics' name
// usually have this pattern: "_[mutex|condition]_*_[time|nanos]".
kEnableTime = 6,
kOutOfBounds = 7 // N.B. Must always be the last value!
};

Expand Down
4 changes: 3 additions & 1 deletion monitoring/perf_context.cc
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,9 @@ struct PerfContextByLevelInt {
defCmd(iter_seek_count) \
defCmd(encrypt_data_nanos) \
defCmd(decrypt_data_nanos) \
defCmd(number_async_seek)
defCmd(number_async_seek) \
defCmd(file_ingestion_nanos) \
defCmd(file_ingestion_blocking_live_writes_nanos)
// clang-format on

struct PerfContextInt {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add `file_ingestion_nanos` and `file_ingestion_blocking_live_writes_nanos` in PerfContext to observe file ingestions

0 comments on commit 2cf4597

Please sign in to comment.