Skip to content

Commit c8bc2b6

Browse files
jowlyzhangfacebook-github-bot
authored andcommitted
Add time measuring metrics for file ingestion in PerfContext (facebook#13219)
Summary: As titled. And also added some documentation for an approach to name perf context metrics that can help identify the starting `PerfLevel` that enables collecting it. Pull Request resolved: facebook#13219 Test Plan: Unit test Reviewed By: hx235 Differential Revision: D67362022 Pulled By: jowlyzhang fbshipit-source-id: 7ed1bb475b5497961612d4e331600609da42074b
1 parent 1acd315 commit c8bc2b6

File tree

7 files changed

+107
-13
lines changed

7 files changed

+107
-13
lines changed

db/db_impl/db_impl.cc

+3
Original file line numberDiff line numberDiff line change
@@ -5897,6 +5897,7 @@ Status DBImpl::IngestExternalFile(
58975897

58985898
Status DBImpl::IngestExternalFiles(
58995899
const std::vector<IngestExternalFileArg>& args) {
5900+
PERF_TIMER_GUARD(file_ingestion_nanos);
59005901
// TODO: plumb Env::IOActivity, Env::IOPriority
59015902
const WriteOptions write_options;
59025903

@@ -6041,6 +6042,7 @@ Status DBImpl::IngestExternalFiles(
60416042
if (two_write_queues_) {
60426043
nonmem_write_thread_.EnterUnbatched(&nonmem_w, &mutex_);
60436044
}
6045+
PERF_TIMER_GUARD(file_ingestion_blocking_live_writes_nanos);
60446046

60456047
// When unordered_write is enabled, the keys are writing to memtable in an
60466048
// unordered way. If the ingestion job checks memtable key range before the
@@ -6203,6 +6205,7 @@ Status DBImpl::IngestExternalFiles(
62036205
nonmem_write_thread_.ExitUnbatched(&nonmem_w);
62046206
}
62056207
write_thread_.ExitUnbatched(&w);
6208+
PERF_TIMER_STOP(file_ingestion_blocking_live_writes_nanos);
62066209

62076210
if (status.ok()) {
62086211
for (auto& job : ingestion_jobs) {

db/external_sst_file_basic_test.cc

+44-1
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#include "port/stack_trace.h"
1212
#include "rocksdb/advanced_options.h"
1313
#include "rocksdb/options.h"
14+
#include "rocksdb/perf_context.h"
1415
#include "rocksdb/sst_file_writer.h"
1516
#include "test_util/testharness.h"
1617
#include "test_util/testutil.h"
@@ -260,14 +261,56 @@ TEST_F(ExternalSSTFileBasicTest, Basic) {
260261
ASSERT_NOK(s) << s.ToString();
261262

262263
DestroyAndReopen(options);
264+
265+
SyncPoint::GetInstance()->LoadDependency({
266+
{"DBImpl::IngestExternalFile:AfterIncIngestFileCounter",
267+
"ExternalSSTFileBasicTest.LiveWriteStart"},
268+
{"ExternalSSTFileBasicTest.LiveWriteStart",
269+
"DBImpl::IngestExternalFiles:InstallSVForFirstCF:0"},
270+
});
271+
SyncPoint::GetInstance()->EnableProcessing();
272+
PerfContext* write_thread_perf_context;
273+
std::thread write_thread([&] {
274+
TEST_SYNC_POINT("ExternalSSTFileBasicTest.LiveWriteStart");
275+
SetPerfLevel(kEnableWait);
276+
write_thread_perf_context = get_perf_context();
277+
write_thread_perf_context->Reset();
278+
ASSERT_OK(db_->Put(WriteOptions(), "bar", "v2"));
279+
ASSERT_GT(write_thread_perf_context->write_thread_wait_nanos, 0);
280+
// Test sync points were used to make sure this live write enter write
281+
// thread after the file ingestion entered write thread. So by the time this
282+
// live write finishes, the latest seqno is 1 means file ingestion used
283+
// seqno 0.
284+
ASSERT_EQ(db_->GetLatestSequenceNumber(), 1U);
285+
});
286+
263287
// Add file using file path
288+
SetPerfLevel(kEnableTimeExceptForMutex);
289+
PerfContext* perf_ctx = get_perf_context();
290+
perf_ctx->Reset();
264291
s = DeprecatedAddFile({file1});
292+
ASSERT_GT(perf_context.file_ingestion_nanos, 0);
293+
ASSERT_GT(perf_context.file_ingestion_blocking_live_writes_nanos, 0);
265294
ASSERT_OK(s) << s.ToString();
266-
ASSERT_EQ(db_->GetLatestSequenceNumber(), 0U);
267295
for (int k = 0; k < 100; k++) {
268296
ASSERT_EQ(Get(Key(k)), Key(k) + "_val");
269297
}
270298

299+
write_thread.join();
300+
ASSERT_GT(perf_context.file_ingestion_blocking_live_writes_nanos,
301+
write_thread_perf_context->write_thread_wait_nanos);
302+
SyncPoint::GetInstance()->DisableProcessing();
303+
304+
// Re-ingest the file just to check the perf context not enabled at and below
305+
// kEnableWait.
306+
SetPerfLevel(kEnableWait);
307+
perf_ctx->Reset();
308+
IngestExternalFileOptions opts;
309+
opts.allow_global_seqno = true;
310+
opts.allow_blocking_flush = true;
311+
ASSERT_OK(db_->IngestExternalFile({file1}, opts));
312+
ASSERT_EQ(perf_context.file_ingestion_nanos, 0);
313+
ASSERT_EQ(perf_context.file_ingestion_blocking_live_writes_nanos, 0);
271314
DestroyAndRecreateExternalSSTFilesDir();
272315
}
273316

include/rocksdb/iostats_context.h

+10
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,18 @@
1010

1111
#include "rocksdb/perf_level.h"
1212

13+
/*
14+
* NOTE:
15+
* If you plan to add new metrics, please read documentation in perf_level.h and
16+
* try to come up with a metric name that follows the naming conventions
17+
* mentioned there. It helps to indicate the metric's starting enabling P
18+
* erfLevel. Document this starting PerfLevel if the metric name cannot meet the
19+
* naming conventions.
20+
*/
21+
1322
// A thread local context for gathering io-stats efficiently and transparently.
1423
// Use SetPerfLevel(PerfLevel::kEnableTime) to enable time stats.
24+
//
1525

1626
namespace ROCKSDB_NAMESPACE {
1727

include/rocksdb/perf_context.h

+15
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,12 @@ namespace ROCKSDB_NAMESPACE {
2020
* add/remove fields to this structure, builds would fail. The way to fix the
2121
* builds would be to add the appropriate fields to the
2222
* DEF_PERF_CONTEXT_LEVEL_METRICS() macro in the perf_context.cc file.
23+
*
24+
* If you plan to add new metrics, please read documentation in perf_level.h and
25+
* try to come up with a metric name that follows the naming conventions
26+
* mentioned there. It helps to indicate the metric's starting enabling P
27+
* erfLevel. Document this starting PerfLevel if the metric name cannot meet the
28+
* naming conventions.
2329
*/
2430

2531
// Break down performance counters by level and store per-level perf context in
@@ -189,6 +195,7 @@ struct PerfContextBase {
189195
// total nanos spent on writing to WAL
190196
uint64_t write_wal_time;
191197
// total nanos spent on writing to mem tables
198+
// This metric gets collected starting from PerfLevel::kEnableWait
192199
uint64_t write_memtable_time;
193200
// total nanos spent on delaying or throttling write
194201
uint64_t write_delay_time;
@@ -230,6 +237,8 @@ struct PerfContextBase {
230237
uint64_t bloom_sst_miss_count;
231238

232239
// Time spent waiting on key locks in transaction lock manager.
240+
// This metric gets collected starting from
241+
// PerfLevel::kEnableTimeExceptForMutex
233242
uint64_t key_lock_wait_time;
234243
// number of times acquiring a lock was blocked by another transaction.
235244
uint64_t key_lock_wait_count;
@@ -275,6 +284,12 @@ struct PerfContextBase {
275284
uint64_t decrypt_data_nanos;
276285

277286
uint64_t number_async_seek;
287+
288+
// Metrics for file ingestion
289+
// Time spent end to end in an IngestExternalFile call.
290+
uint64_t file_ingestion_nanos;
291+
// Time IngestExternalFile blocked live writes.
292+
uint64_t file_ingestion_blocking_live_writes_nanos;
278293
};
279294

280295
struct PerfContext : public PerfContextBase {

include/rocksdb/perf_level.h

+31-11
Original file line numberDiff line numberDiff line change
@@ -14,19 +14,39 @@
1414
namespace ROCKSDB_NAMESPACE {
1515

1616
// How much perf stats to collect. Affects perf_context and iostats_context.
17+
// These levels are incremental, which means a new set of metrics will get
18+
// collected when PerfLevel is upgraded from level n to level n + 1.
19+
// Each level's documentation specifies the incremental set of metrics it
20+
// enables. As an example, kEnableWait will also enable collecting all the
21+
// metrics that kEnableCount enables, and its documentation only specifies which
22+
// extra metrics it also enables.
23+
//
24+
// These metrics are identified with some naming conventions, but not all
25+
// metrics follow exactly this convention. The metrics' own documentation should
26+
// be source of truth if they diverge.
1727
enum PerfLevel : unsigned char {
18-
kUninitialized = 0, // unknown setting
19-
kDisable = 1, // disable perf stats
20-
kEnableCount = 2, // enable only count stats
21-
kEnableWait = 3, // measure time spent by user threads
22-
// blocked in RocksDB, and not external
23-
// resources such as mutexes and IO
24-
kEnableTimeExceptForMutex = 4, // Other than count stats, also enable time
25-
// stats except for mutexes
26-
// Other than time, also measure CPU time counters. Still don't measure
27-
// time (neither wall time nor CPU time) for mutexes.
28+
// Unknown setting
29+
kUninitialized = 0,
30+
// Disable perf stats
31+
kDisable = 1,
32+
// Starts enabling count metrics. These metrics usually don't have time
33+
// related keywords, and are likely to have keywords like "count" or "byte".
34+
kEnableCount = 2,
35+
// Starts enabling metrics that measure time spent by user threads blocked in
36+
// RocksDB waiting for RocksDB to take actions, as opposed to waiting for
37+
// external resources such as mutexes and IO.
38+
// These metrics usually have this pattern: "_[wait|delay]_*_[time|nanos]".
39+
kEnableWait = 3,
40+
// Starts enabling metrics that measure the end to end time of an operation.
41+
// These metrics' names have keywords "time" or "nanos". Check other time
42+
// measuring metrics with similar but more specific naming conventions.
43+
kEnableTimeExceptForMutex = 4,
44+
// Starts enabling metrics that measure the cpu time of an operation. These
45+
// metrics' name usually this pattern "_cpu_*_[time|nanos]".
2846
kEnableTimeAndCPUTimeExceptForMutex = 5,
29-
kEnableTime = 6, // enable count and time stats
47+
// Starts enabling metrics that measure time for mutex. These metrics' name
48+
// usually have this pattern: "_[mutex|condition]_*_[time|nanos]".
49+
kEnableTime = 6,
3050
kOutOfBounds = 7 // N.B. Must always be the last value!
3151
};
3252

monitoring/perf_context.cc

+3-1
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,9 @@ struct PerfContextByLevelInt {
153153
defCmd(iter_seek_count) \
154154
defCmd(encrypt_data_nanos) \
155155
defCmd(decrypt_data_nanos) \
156-
defCmd(number_async_seek)
156+
defCmd(number_async_seek) \
157+
defCmd(file_ingestion_nanos) \
158+
defCmd(file_ingestion_blocking_live_writes_nanos)
157159
// clang-format on
158160

159161
struct PerfContextInt {
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Add `file_ingestion_nanos` and `file_ingestion_blocking_live_writes_nanos` in PerfContext to observe file ingestions

0 commit comments

Comments
 (0)