Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add time measuring metrics for file ingestion in PerfContext #13219

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions db/db_impl/db_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -5897,6 +5897,7 @@ Status DBImpl::IngestExternalFile(

Status DBImpl::IngestExternalFiles(
const std::vector<IngestExternalFileArg>& args) {
PERF_TIMER_GUARD(file_ingestion_nanos);
// TODO: plumb Env::IOActivity, Env::IOPriority
const WriteOptions write_options;

Expand Down Expand Up @@ -6041,6 +6042,7 @@ Status DBImpl::IngestExternalFiles(
if (two_write_queues_) {
nonmem_write_thread_.EnterUnbatched(&nonmem_w, &mutex_);
}
PERF_TIMER_GUARD(file_ingestion_blocking_live_writes_nanos);

// When unordered_write is enabled, the keys are writing to memtable in an
// unordered way. If the ingestion job checks memtable key range before the
Expand Down Expand Up @@ -6203,6 +6205,7 @@ Status DBImpl::IngestExternalFiles(
nonmem_write_thread_.ExitUnbatched(&nonmem_w);
}
write_thread_.ExitUnbatched(&w);
PERF_TIMER_STOP(file_ingestion_blocking_live_writes_nanos);

if (status.ok()) {
for (auto& job : ingestion_jobs) {
Expand Down
16 changes: 16 additions & 0 deletions db/external_sst_file_basic_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include "port/stack_trace.h"
#include "rocksdb/advanced_options.h"
#include "rocksdb/options.h"
#include "rocksdb/perf_context.h"
#include "rocksdb/sst_file_writer.h"
#include "test_util/testharness.h"
#include "test_util/testutil.h"
Expand Down Expand Up @@ -261,13 +262,28 @@ TEST_F(ExternalSSTFileBasicTest, Basic) {

DestroyAndReopen(options);
// Add file using file path
SetPerfLevel(kEnableTimeExceptForMutex);
PerfContext* perf_ctx = get_perf_context();
perf_ctx->Reset();
s = DeprecatedAddFile({file1});
ASSERT_GT(perf_context.file_ingestion_nanos, 0);
ASSERT_GT(perf_context.file_ingestion_blocking_live_writes_nanos, 0);
ASSERT_OK(s) << s.ToString();
ASSERT_EQ(db_->GetLatestSequenceNumber(), 0U);
for (int k = 0; k < 100; k++) {
ASSERT_EQ(Get(Key(k)), Key(k) + "_val");
}

// Re-ingest the file just to check the perf context not enabled at and below
// kEnableWait.
SetPerfLevel(kEnableWait);
perf_ctx->Reset();
IngestExternalFileOptions opts;
opts.allow_global_seqno = true;
opts.allow_blocking_flush = true;
ASSERT_OK(db_->IngestExternalFile({file1}, opts));
ASSERT_EQ(perf_context.file_ingestion_nanos, 0);
ASSERT_EQ(perf_context.file_ingestion_blocking_live_writes_nanos, 0);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I assumed your test ensure blocking live writes will happen so this proves your point.

DestroyAndRecreateExternalSSTFilesDir();
}

Expand Down
10 changes: 10 additions & 0 deletions include/rocksdb/iostats_context.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,18 @@

#include "rocksdb/perf_level.h"

/*
* NOTE:
* If you plan to add new metrics, please read documentation in perf_level.h and
* try to come up with a metric name that follows the naming conventions
* mentioned there. It helps to indicate the metric's starting enabling P
* erfLevel. Document this starting PerfLevel if the metric name cannot meet the
* naming conventions.
*/

// A thread local context for gathering io-stats efficiently and transparently.
// Use SetPerfLevel(PerfLevel::kEnableTime) to enable time stats.
//

namespace ROCKSDB_NAMESPACE {

Expand Down
15 changes: 15 additions & 0 deletions include/rocksdb/perf_context.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,12 @@ namespace ROCKSDB_NAMESPACE {
* add/remove fields to this structure, builds would fail. The way to fix the
* builds would be to add the appropriate fields to the
* DEF_PERF_CONTEXT_LEVEL_METRICS() macro in the perf_context.cc file.
*
* If you plan to add new metrics, please read documentation in perf_level.h and
* try to come up with a metric name that follows the naming conventions
* mentioned there. It helps to indicate the metric's starting enabling P
* erfLevel. Document this starting PerfLevel if the metric name cannot meet the
* naming conventions.
*/

// Break down performance counters by level and store per-level perf context in
Expand Down Expand Up @@ -189,6 +195,7 @@ struct PerfContextBase {
// total nanos spent on writing to WAL
uint64_t write_wal_time;
// total nanos spent on writing to mem tables
// This metric gets collected starting from PerfLevel::kEnableWait
uint64_t write_memtable_time;
// total nanos spent on delaying or throttling write
uint64_t write_delay_time;
Expand Down Expand Up @@ -230,6 +237,8 @@ struct PerfContextBase {
uint64_t bloom_sst_miss_count;

// Time spent waiting on key locks in transaction lock manager.
// This metric gets collected starting from
// PerfLevel::kEnableTimeExceptForMutex
uint64_t key_lock_wait_time;
// number of times acquiring a lock was blocked by another transaction.
uint64_t key_lock_wait_count;
Expand Down Expand Up @@ -275,6 +284,12 @@ struct PerfContextBase {
uint64_t decrypt_data_nanos;

uint64_t number_async_seek;

// Metrics for file ingestion
// Time spent end to end in an IngestExternalFile call.
uint64_t file_ingestion_nanos;
// Time IngestExternalFile blocked live writes.
uint64_t file_ingestion_blocking_live_writes_nanos;
};

struct PerfContext : public PerfContextBase {
Expand Down
42 changes: 31 additions & 11 deletions include/rocksdb/perf_level.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,19 +14,39 @@
namespace ROCKSDB_NAMESPACE {

// How much perf stats to collect. Affects perf_context and iostats_context.
// These levels are incremental, which means a new set of metrics will get
// collected when PerfLevel is upgraded from level n to level n + 1.
// Each level's documentation specifies the incremental set of metrics it
// enables. As an example, kEnableWait will also enable collecting all the
// metrics that kEnableCount enables, and its documentation only specifies which
// extra metrics it also enables.
//
// These metrics are identified with some naming conventions, but not all
// metrics follow exactly this convention. The metrics' own documentation should
// be source of truth if they diverge.
enum PerfLevel : unsigned char {
kUninitialized = 0, // unknown setting
kDisable = 1, // disable perf stats
kEnableCount = 2, // enable only count stats
kEnableWait = 3, // measure time spent by user threads
// blocked in RocksDB, and not external
// resources such as mutexes and IO
kEnableTimeExceptForMutex = 4, // Other than count stats, also enable time
// stats except for mutexes
// Other than time, also measure CPU time counters. Still don't measure
// time (neither wall time nor CPU time) for mutexes.
// Unknown setting
kUninitialized = 0,
// Disable perf stats
kDisable = 1,
// Starts enabling count metrics. These metrics usually don't have time
// related keywords, and are likely to have keywords like "count" or "byte".
kEnableCount = 2,
// Starts enabling metrics that measure time spent by user threads blocked in
// RocksDB waiting for RocksDB to take actions, as opposed to waiting for
// external resources such as mutexes and IO.
// These metrics usually have this pattern: "_[wait|delay]_*_[time|nanos]".
kEnableWait = 3,
// Starts enabling metrics that measure the end to end time of an operation.
// These metrics' names have keywords "time" or "nanos". Check other time
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: "time" or time unit e.g, nanos, micros.

// measuring metrics with similar but more specific naming conventions.
kEnableTimeExceptForMutex = 4,
// Starts enabling metrics that measure the cpu time of an operation. These
// metrics' name usually this pattern "_cpu_*_[time|nanos]".
kEnableTimeAndCPUTimeExceptForMutex = 5,
kEnableTime = 6, // enable count and time stats
// Starts enabling metrics that measure time for mutex. These metrics' name
// usually have this pattern: "_[mutex|condition]_*_[time|nanos]".
kEnableTime = 6,
kOutOfBounds = 7 // N.B. Must always be the last value!
};

Expand Down
4 changes: 3 additions & 1 deletion monitoring/perf_context.cc
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,9 @@ struct PerfContextByLevelInt {
defCmd(iter_seek_count) \
defCmd(encrypt_data_nanos) \
defCmd(decrypt_data_nanos) \
defCmd(number_async_seek)
defCmd(number_async_seek) \
defCmd(file_ingestion_nanos) \
defCmd(file_ingestion_blocking_live_writes_nanos)
// clang-format on

struct PerfContextInt {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add `file_ingestion_nanos` and `file_ingestion_blocking_live_writes_nanos` in PerfContext to observe file ingestions
Loading