From 3de4cc218406258204579ef1f3ef109d2b8e9390 Mon Sep 17 00:00:00 2001 From: LemonHX Date: Fri, 13 Aug 2021 16:02:26 +0800 Subject: [PATCH] Add bitmap control for perf context (#237) * `PerfFlag` implementation aside the `PerfLevel` design spec in on: https://docs.google.com/document/d/1JYmWMIZwYV0AZW6rNv_oFVZLBCAkxLLYslt39eEZstM/edit?usp=sharing Signed-off-by: lemonhx Co-authored-by: Xinye Tao --- CMakeLists.txt | 1 + db/c.cc | 12 ++++ db/perf_context_test.cc | 20 ++++++ include/rocksdb/c.h | 3 + include/rocksdb/perf_flag.h | 20 ++++++ include/rocksdb/perf_flag_defs.h | 108 +++++++++++++++++++++++++++++++ monitoring/iostats_context_imp.h | 13 ++-- monitoring/perf_context_imp.h | 68 +++++++++---------- monitoring/perf_flag.cc | 29 +++++++++ monitoring/perf_flag_imp.h | 10 +++ monitoring/perf_step_timer.h | 6 +- src.mk | 1 + 12 files changed, 250 insertions(+), 41 deletions(-) create mode 100644 include/rocksdb/perf_flag.h create mode 100644 include/rocksdb/perf_flag_defs.h create mode 100644 monitoring/perf_flag.cc create mode 100644 monitoring/perf_flag_imp.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 441173a29a1..b66f0dfaee9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -585,6 +585,7 @@ set(SOURCES monitoring/iostats_context.cc monitoring/perf_context.cc monitoring/perf_level.cc + monitoring/perf_flag.cc monitoring/persistent_stats_history.cc monitoring/statistics.cc monitoring/thread_status_impl.cc diff --git a/db/c.cc b/db/c.cc index 144d8f90ded..63fb575f855 100644 --- a/db/c.cc +++ b/db/c.cc @@ -25,6 +25,7 @@ #include "rocksdb/merge_operator.h" #include "rocksdb/options.h" #include "rocksdb/perf_context.h" +#include "rocksdb/perf_flag.h" #include "rocksdb/rate_limiter.h" #include "rocksdb/slice_transform.h" #include "rocksdb/statistics.h" @@ -113,6 +114,9 @@ using rocksdb::Checkpoint; using rocksdb::TransactionLogIterator; using rocksdb::BatchResult; using rocksdb::PerfLevel; +using rocksdb::EnablePerfFlag; +using rocksdb::DisablePerfFlag; +using rocksdb::CheckPerfFlag; using rocksdb::PerfContext; using rocksdb::MemoryUtil; @@ -2748,6 +2752,14 @@ void rocksdb_set_perf_level(int v) { SetPerfLevel(level); } +void rocksdb_enable_perf_flag(uint64_t flag) { EnablePerfFlag(flag); } + +void rocksdb_disable_perf_flag(uint64_t flag) { DisablePerfFlag(flag); } + +int rocksdb_check_perf_flag(uint64_t flag) { + return static_cast(CheckPerfFlag(flag)); +} + rocksdb_perfcontext_t* rocksdb_perfcontext_create() { rocksdb_perfcontext_t* context = new rocksdb_perfcontext_t; context->rep = rocksdb::get_perf_context(); diff --git a/db/perf_context_test.cc b/db/perf_context_test.cc index 94eabff7ff5..4f0d74e52f8 100644 --- a/db/perf_context_test.cc +++ b/db/perf_context_test.cc @@ -938,6 +938,26 @@ TEST_F(PerfContextTest, CPUTimer) { ASSERT_EQ(count, get_perf_context()->iter_seek_cpu_nanos); } } + +TEST_F(PerfContextTest, BitMapControl) { + DestroyDB(kDbName, Options()); + auto db = OpenDb(); + WriteOptions write_options; + SetPerfLevel(PerfLevel::kDisable); + EnablePerfFlag(flag_user_key_comparison_count); + EnablePerfFlag(flag_write_wal_time); + + for (int i = 0; i < FLAGS_total_keys; ++i) { + std::string i_str = ToString(i); + std::string key = "k" + i_str; + std::string value = "v" + i_str; + + db->Put(write_options, key, value); + } + ASSERT_GT(get_perf_context()->user_key_comparison_count, 0); + ASSERT_GT(get_perf_context()->write_wal_time, 0); +} + } // namespace rocksdb int main(int argc, char** argv) { diff --git a/include/rocksdb/c.h b/include/rocksdb/c.h index f21dbcf7d4a..1db70697df1 100644 --- a/include/rocksdb/c.h +++ b/include/rocksdb/c.h @@ -1120,6 +1120,9 @@ enum { }; extern ROCKSDB_LIBRARY_API void rocksdb_set_perf_level(int); +extern ROCKSDB_LIBRARY_API void rocksdb_enable_perf_flag(uint64_t); +extern ROCKSDB_LIBRARY_API void rocksdb_disable_perf_flag(uint64_t); +extern ROCKSDB_LIBRARY_API int rocksdb_check_perf_flag(uint64_t); extern ROCKSDB_LIBRARY_API rocksdb_perfcontext_t* rocksdb_perfcontext_create(); extern ROCKSDB_LIBRARY_API void rocksdb_perfcontext_reset( rocksdb_perfcontext_t* context); diff --git a/include/rocksdb/perf_flag.h b/include/rocksdb/perf_flag.h new file mode 100644 index 00000000000..57536b9a7c5 --- /dev/null +++ b/include/rocksdb/perf_flag.h @@ -0,0 +1,20 @@ +// complements to perf_level +#pragma once + +#include + +#include "perf_flag_defs.h" + +#define GET_FLAG(flag) perf_flags[(uint64_t)(flag) >> 3] + +// FLAGS_LEN = ceiling(FLAG_END / bits(uint8_t)) +#define FLAGS_LEN \ + (((uint64_t)FLAG_END & (uint64_t)0b111) == 0 \ + ? ((uint64_t)FLAG_END >> 3) \ + : ((uint64_t)FLAG_END >> 3) + 1) + +namespace rocksdb { +void EnablePerfFlag(uint64_t flag); +void DisablePerfFlag(uint64_t flag); +bool CheckPerfFlag(uint64_t flag); +} // namespace rocksdb diff --git a/include/rocksdb/perf_flag_defs.h b/include/rocksdb/perf_flag_defs.h new file mode 100644 index 00000000000..d2cddf98e25 --- /dev/null +++ b/include/rocksdb/perf_flag_defs.h @@ -0,0 +1,108 @@ +#pragma once +#include + +enum { + flag_user_key_comparison_count = 0, + flag_block_cache_hit_count, + flag_block_read_count, + flag_block_read_byte, + flag_block_read_time, + flag_block_cache_index_hit_count, + flag_index_block_read_count, + flag_block_cache_filter_hit_count, + flag_filter_block_read_count, + flag_compression_dict_block_read_count, + flag_block_checksum_time, + flag_block_decompress_time, + flag_get_read_bytes, + flag_multiget_read_bytes, + flag_iter_read_bytes, + flag_internal_key_skipped_count, + flag_internal_delete_skipped_count, + flag_internal_recent_skipped_count, + flag_internal_merge_count, + flag_get_snapshot_time, + flag_get_from_memtable_time, + flag_get_from_memtable_count, + flag_get_post_process_time, + flag_get_from_output_files_time, + flag_seek_on_memtable_time, + flag_seek_on_memtable_count, + flag_next_on_memtable_count, + flag_prev_on_memtable_count, + flag_seek_child_seek_time, + flag_seek_child_seek_count, + flag_seek_min_heap_time, + flag_seek_max_heap_time, + flag_seek_internal_seek_time, + flag_find_next_user_entry_time, + flag_write_wal_time, + flag_write_memtable_time, + flag_write_delay_time, + flag_write_scheduling_flushes_compactions_time, + flag_write_pre_and_post_process_time, + flag_write_thread_wait_nanos, + flag_db_mutex_lock_nanos, + flag_db_condition_wait_nanos, + flag_merge_operator_time_nanos, + flag_read_index_block_nanos, + flag_read_filter_block_nanos, + flag_new_table_block_iter_nanos, + flag_new_table_iterator_nanos, + flag_block_seek_nanos, + flag_find_table_nanos, + flag_bloom_memtable_hit_count, + flag_bloom_memtable_miss_count, + flag_bloom_sst_hit_count, + flag_bloom_sst_miss_count, + flag_key_lock_wait_time, + flag_key_lock_wait_count, + flag_env_new_sequential_file_nanos, + flag_env_new_random_access_file_nanos, + flag_env_new_writable_file_nanos, + flag_env_reuse_writable_file_nanos, + flag_env_new_random_rw_file_nanos, + flag_env_new_directory_nanos, + flag_env_file_exists_nanos, + flag_env_get_children_nanos, + flag_env_get_children_file_attributes_nanos, + flag_env_delete_file_nanos, + flag_env_create_dir_nanos, + flag_env_create_dir_if_missing_nanos, + flag_env_delete_dir_nanos, + flag_env_get_file_size_nanos, + flag_env_get_file_modification_time_nanos, + flag_env_rename_file_nanos, + flag_env_link_file_nanos, + flag_env_lock_file_nanos, + flag_env_unlock_file_nanos, + flag_env_new_logger_nanos, + flag_get_cpu_nanos, + flag_iter_next_cpu_nanos, + flag_iter_prev_cpu_nanos, + flag_iter_seek_cpu_nanos, + flag_encrypt_data_nanos, + flag_decrypt_data_nanos, + + flag_get_from_table_nanos, + flag_user_key_return_count, + flag_block_cache_miss_count, + flag_bloom_filter_full_positive, + flag_bloom_filter_useful, + flag_bloom_filter_full_true_positive, + + flag_bytes_read, + flag_bytes_written, + flag_open_nanos, + flag_allocate_nanos, + flag_write_nanos, + flag_read_nanos, + flag_range_sync_nanos, + flag_prepare_write_nanos, + flag_fsync_nanos, + flag_logger_nanos, + flag_cpu_read_nanos, + flag_cpu_write_nanos, + // Should always be the last + FLAG_END +}; diff --git a/monitoring/iostats_context_imp.h b/monitoring/iostats_context_imp.h index 19e34209b1b..41b432543ba 100644 --- a/monitoring/iostats_context_imp.h +++ b/monitoring/iostats_context_imp.h @@ -33,15 +33,16 @@ extern __thread IOStatsContext iostats_context; #define IOSTATS(metric) (iostats_context.metric) // Declare and set start time of the timer -#define IOSTATS_TIMER_GUARD(metric) \ - PerfStepTimer iostats_step_timer_##metric(&(iostats_context.metric)); \ +#define IOSTATS_TIMER_GUARD(metric) \ + PerfStepTimer iostats_step_timer_##metric(&(iostats_context.metric), \ + CheckPerfFlag(flag_##metric)); \ iostats_step_timer_##metric.Start(); // Declare and set start time of the timer -#define IOSTATS_CPU_TIMER_GUARD(metric, env) \ - PerfStepTimer iostats_step_timer_##metric( \ - &(iostats_context.metric), env, true, \ - PerfLevel::kEnableTimeAndCPUTimeExceptForMutex); \ +#define IOSTATS_CPU_TIMER_GUARD(metric, env) \ + PerfStepTimer iostats_step_timer_##metric( \ + &(iostats_context.metric), CheckPerfFlag(flag_##metric), env, true, \ + PerfLevel::kEnableTimeAndCPUTimeExceptForMutex); \ iostats_step_timer_##metric.Start(); #else // ROCKSDB_SUPPORT_THREAD_LOCAL diff --git a/monitoring/perf_context_imp.h b/monitoring/perf_context_imp.h index e0ff8afc58e..ed61e6ec26f 100644 --- a/monitoring/perf_context_imp.h +++ b/monitoring/perf_context_imp.h @@ -37,29 +37,31 @@ extern thread_local PerfContext perf_context; #define PERF_TIMER_START(metric) perf_step_timer_##metric.Start(); // Declare and set start time of the timer -#define PERF_TIMER_GUARD(metric) \ - PerfStepTimer perf_step_timer_##metric(&(perf_context.metric)); \ +#define PERF_TIMER_GUARD(metric) \ + PerfStepTimer perf_step_timer_##metric(&(perf_context.metric), \ + CheckPerfFlag(flag_##metric)); \ perf_step_timer_##metric.Start(); // Declare and set start time of the timer -#define PERF_TIMER_GUARD_WITH_ENV(metric, env) \ - PerfStepTimer perf_step_timer_##metric(&(perf_context.metric), env); \ +#define PERF_TIMER_GUARD_WITH_ENV(metric, env) \ + PerfStepTimer perf_step_timer_##metric(&(perf_context.metric), \ + CheckPerfFlag(flag_##metric), env); \ perf_step_timer_##metric.Start(); // Declare and set start time of the timer -#define PERF_CPU_TIMER_GUARD(metric, env) \ - PerfStepTimer perf_step_timer_##metric( \ - &(perf_context.metric), env, true, \ - PerfLevel::kEnableTimeAndCPUTimeExceptForMutex); \ +#define PERF_CPU_TIMER_GUARD(metric, env) \ + PerfStepTimer perf_step_timer_##metric( \ + &(perf_context.metric), CheckPerfFlag(flag_##metric), env, true, \ + PerfLevel::kEnableTimeAndCPUTimeExceptForMutex); \ perf_step_timer_##metric.Start(); -#define PERF_CONDITIONAL_TIMER_FOR_MUTEX_GUARD(metric, condition, stats, \ - ticker_type) \ - PerfStepTimer perf_step_timer_##metric(&(perf_context.metric), nullptr, \ - false, PerfLevel::kEnableTime, stats, \ - ticker_type); \ - if (condition) { \ - perf_step_timer_##metric.Start(); \ +#define PERF_CONDITIONAL_TIMER_FOR_MUTEX_GUARD(metric, condition, stats, \ + ticker_type) \ + PerfStepTimer perf_step_timer_##metric( \ + &(perf_context.metric), CheckPerfFlag(flag_##metric), nullptr, false, \ + PerfLevel::kEnableTime, stats, ticker_type); \ + if (condition) { \ + perf_step_timer_##metric.Start(); \ } // Update metric with time elapsed since last START. start time is reset @@ -67,27 +69,27 @@ extern thread_local PerfContext perf_context; #define PERF_TIMER_MEASURE(metric) perf_step_timer_##metric.Measure(); // Increase metric value -#define PERF_COUNTER_ADD(metric, value) \ - if (perf_level >= PerfLevel::kEnableCount) { \ - perf_context.metric += value; \ +#define PERF_COUNTER_ADD(metric, value) \ + if (perf_level >= PerfLevel::kEnableCount || CheckPerfFlag(flag_##metric)) { \ + perf_context.metric += value; \ } // Increase metric value -#define PERF_COUNTER_BY_LEVEL_ADD(metric, value, level) \ - if (perf_level >= PerfLevel::kEnableCount && \ - perf_context.per_level_perf_context_enabled && \ - perf_context.level_to_perf_context) { \ - if ((*(perf_context.level_to_perf_context)).find(level) != \ - (*(perf_context.level_to_perf_context)).end()) { \ - (*(perf_context.level_to_perf_context))[level].metric += value; \ - } \ - else { \ - PerfContextByLevel empty_context; \ - (*(perf_context.level_to_perf_context))[level] = empty_context; \ - (*(perf_context.level_to_perf_context))[level].metric += value; \ - } \ - } \ +#define PERF_COUNTER_BY_LEVEL_ADD(metric, value, level) \ + if ((perf_level >= PerfLevel::kEnableCount || \ + CheckPerfFlag(flag_##metric)) && \ + perf_context.per_level_perf_context_enabled && \ + perf_context.level_to_perf_context) { \ + if ((*(perf_context.level_to_perf_context)).find(level) != \ + (*(perf_context.level_to_perf_context)).end()) { \ + (*(perf_context.level_to_perf_context))[level].metric += value; \ + } else { \ + PerfContextByLevel empty_context; \ + (*(perf_context.level_to_perf_context))[level] = empty_context; \ + (*(perf_context.level_to_perf_context))[level].metric += value; \ + } \ + } #endif -} +} // namespace rocksdb diff --git a/monitoring/perf_flag.cc b/monitoring/perf_flag.cc new file mode 100644 index 00000000000..a3bdbda353e --- /dev/null +++ b/monitoring/perf_flag.cc @@ -0,0 +1,29 @@ +#include "rocksdb/perf_flag.h" + +namespace rocksdb { +#ifdef ROCKSDB_SUPPORT_THREAD_LOCAL +__thread uint8_t perf_flags[FLAGS_LEN] = {0}; +#else +uint8_t perf_flags[FLAGS_LEN] = {0}; +#endif + +void EnablePerfFlag(uint64_t flag) { + if (!CheckPerfFlag(flag)) { + // & 0b111 means find the flag location is a alternative way to do mod + // operation + GET_FLAG(flag) ^= (uint64_t)0b1 << ((uint64_t)flag & (uint64_t)0b111); + } +} + +void DisablePerfFlag(uint64_t flag) { + if (CheckPerfFlag(flag)) { + GET_FLAG(flag) ^= (uint64_t)0b1 << ((uint64_t)flag & (uint64_t)0b111); + } +} + +bool CheckPerfFlag(uint64_t flag) { + return ((uint64_t)GET_FLAG(flag) & + (uint64_t)0b1 << (flag & (uint64_t)0b111)) != 0; +} + +} // namespace rocksdb diff --git a/monitoring/perf_flag_imp.h b/monitoring/perf_flag_imp.h new file mode 100644 index 00000000000..453c5e03db8 --- /dev/null +++ b/monitoring/perf_flag_imp.h @@ -0,0 +1,10 @@ +#include +#include "rocksdb/perf_flag.h" + +namespace rocksdb { +#ifdef ROCKSDB_SUPPORT_THREAD_LOCAL +extern __thread uint8_t perf_flags[FLAGS_LEN]; +#else +extern uint8_t perf_flags[FLAGS_LEN]; +#endif +} diff --git a/monitoring/perf_step_timer.h b/monitoring/perf_step_timer.h index 6501bd54aba..85fb2b13cbd 100644 --- a/monitoring/perf_step_timer.h +++ b/monitoring/perf_step_timer.h @@ -4,6 +4,7 @@ // (found in the LICENSE.Apache file in the root directory). // #pragma once +#include "monitoring/perf_flag_imp.h" #include "monitoring/perf_level_imp.h" #include "rocksdb/env.h" #include "util/stop_watch.h" @@ -13,10 +14,11 @@ namespace rocksdb { class PerfStepTimer { public: explicit PerfStepTimer( - uint64_t* metric, Env* env = nullptr, bool use_cpu_time = false, + uint64_t* metric, bool enable_flag = false, Env* env = nullptr, + bool use_cpu_time = false, PerfLevel enable_level = PerfLevel::kEnableTimeExceptForMutex, Statistics* statistics = nullptr, uint32_t ticker_type = 0) - : perf_counter_enabled_(perf_level >= enable_level), + : perf_counter_enabled_(perf_level >= enable_level || enable_flag), use_cpu_time_(use_cpu_time), env_((perf_counter_enabled_ || statistics != nullptr) ? ((env != nullptr) ? env : Env::Default()) diff --git a/src.mk b/src.mk index 12bb2e0601a..13923612f6d 100644 --- a/src.mk +++ b/src.mk @@ -94,6 +94,7 @@ LIB_SOURCES = \ monitoring/iostats_context.cc \ monitoring/perf_context.cc \ monitoring/perf_level.cc \ + monitoring/perf_flag.cc \ monitoring/persistent_stats_history.cc \ monitoring/statistics.cc \ monitoring/thread_status_impl.cc \