From 05daa123323b1471bde4723dc441763d687fd825 Mon Sep 17 00:00:00 2001 From: Hui Xiao Date: Wed, 30 Aug 2023 14:57:08 -0700 Subject: [PATCH] Change compaction_readahead_size default value to 2MB (#11762) Summary: **Context/Summary:** After https://github.com/facebook/rocksdb/pull/11631, we rely on `compaction_readahead_size` for how much to read ahead for compaction read under non-direct IO case. https://github.com/facebook/rocksdb/pull/11658 therefore also sanitized 0 `compaction_readahead_size` to 2MB under non-direct IO, which is consistent with the existing sanitization with direct IO. However, this makes disabling compaction readahead impossible as well as add one more scenario to the inconsistent effects between `Options.compaction_readahead_size=0` during DB open and `SetDBOptions("compaction_readahead_size", "0")` . - `SetDBOptions("compaction_readahead_size", "0")` will disable compaction readahead as its logic never goes through sanitization above while `Options.compaction_readahead_size=0` will go through sanitization. Therefore we decided to do this PR. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11762 Test Plan: Modified existing UTs to cover this PR Reviewed By: ajkr Differential Revision: D48759560 Pulled By: hx235 fbshipit-source-id: b3f85e58bda362a6fa1dc26bd8a87aa0e171af79 --- db/db_compaction_test.cc | 7 +-- db/db_impl/db_impl_open.cc | 7 --- db/db_options_test.cc | 51 +++++++++++-------- include/rocksdb/options.h | 4 +- ...fered_io_compaction_readahead_size_zero.md | 1 + ...compaction_readahead_size_option_change.md | 1 + 6 files changed, 34 insertions(+), 37 deletions(-) create mode 100644 unreleased_history/behavior_changes/buffered_io_compaction_readahead_size_zero.md create mode 100644 unreleased_history/public_api_changes/compaction_readahead_size_option_change.md diff --git a/db/db_compaction_test.cc b/db/db_compaction_test.cc index 24445ecdb70..8713b9b8045 100644 --- a/db/db_compaction_test.cc +++ b/db/db_compaction_test.cc @@ -6025,23 +6025,18 @@ TEST_P(DBCompactionDirectIOTest, DirectIO) { options.use_direct_io_for_flush_and_compaction = GetParam(); options.env = MockEnv::Create(Env::Default()); Reopen(options); - bool readahead = false; SyncPoint::GetInstance()->SetCallBack( "CompactionJob::OpenCompactionOutputFile", [&](void* arg) { bool* use_direct_writes = static_cast(arg); ASSERT_EQ(*use_direct_writes, options.use_direct_io_for_flush_and_compaction); }); - if (options.use_direct_io_for_flush_and_compaction) { - SyncPoint::GetInstance()->SetCallBack( - "SanitizeOptions:direct_io", [&](void* /*arg*/) { readahead = true; }); - } SyncPoint::GetInstance()->EnableProcessing(); CreateAndReopenWithCF({"pikachu"}, options); MakeTables(3, "p", "q", 1); ASSERT_EQ("1,1,1", FilesPerLevel(1)); Compact(1, "p", "q"); - ASSERT_EQ(readahead, options.use_direct_reads); + ASSERT_EQ(false, options.use_direct_reads); ASSERT_EQ("0,0,1", FilesPerLevel(1)); Destroy(options); delete options.env; diff --git a/db/db_impl/db_impl_open.cc b/db/db_impl/db_impl_open.cc index 8db53dac93f..d9d1f932afa 100644 --- a/db/db_impl/db_impl_open.cc +++ b/db/db_impl/db_impl_open.cc @@ -143,13 +143,6 @@ DBOptions SanitizeOptions(const std::string& dbname, const DBOptions& src, result.wal_dir = result.wal_dir.substr(0, result.wal_dir.size() - 1); } - if (result.compaction_readahead_size == 0) { - if (result.use_direct_reads) { - TEST_SYNC_POINT_CALLBACK("SanitizeOptions:direct_io", nullptr); - } - result.compaction_readahead_size = 1024 * 1024 * 2; - } - // Force flush on DB open if 2PC is enabled, since with 2PC we have no // guarantee that consecutive log files have consecutive sequence id, which // make recovery complicated. diff --git a/db/db_options_test.cc b/db/db_options_test.cc index b7c132aeef3..c3910a9787b 100644 --- a/db/db_options_test.cc +++ b/db/db_options_test.cc @@ -1034,30 +1034,37 @@ TEST_F(DBOptionsTest, SetFIFOCompactionOptions) { } TEST_F(DBOptionsTest, CompactionReadaheadSizeChange) { - SpecialEnv env(env_); - Options options; - options.env = &env; - - options.compaction_readahead_size = 0; - options.level0_file_num_compaction_trigger = 2; - const std::string kValue(1024, 'v'); - Reopen(options); + for (bool use_direct_reads : {true, false}) { + SpecialEnv env(env_); + Options options; + options.env = &env; + + options.use_direct_reads = use_direct_reads; + options.level0_file_num_compaction_trigger = 2; + const std::string kValue(1024, 'v'); + Status s = TryReopen(options); + if (use_direct_reads && (s.IsNotSupported() || s.IsInvalidArgument())) { + continue; + } else { + ASSERT_OK(s); + } - ASSERT_EQ(1024 * 1024 * 2, - dbfull()->GetDBOptions().compaction_readahead_size); - ASSERT_OK(dbfull()->SetDBOptions({{"compaction_readahead_size", "256"}})); - ASSERT_EQ(256, dbfull()->GetDBOptions().compaction_readahead_size); - for (int i = 0; i < 1024; i++) { - ASSERT_OK(Put(Key(i), kValue)); - } - ASSERT_OK(Flush()); - for (int i = 0; i < 1024 * 2; i++) { - ASSERT_OK(Put(Key(i), kValue)); + ASSERT_EQ(1024 * 1024 * 2, + dbfull()->GetDBOptions().compaction_readahead_size); + ASSERT_OK(dbfull()->SetDBOptions({{"compaction_readahead_size", "256"}})); + ASSERT_EQ(256, dbfull()->GetDBOptions().compaction_readahead_size); + for (int i = 0; i < 1024; i++) { + ASSERT_OK(Put(Key(i), kValue)); + } + ASSERT_OK(Flush()); + for (int i = 0; i < 1024 * 2; i++) { + ASSERT_OK(Put(Key(i), kValue)); + } + ASSERT_OK(Flush()); + ASSERT_OK(dbfull()->TEST_WaitForCompact()); + ASSERT_EQ(256, env_->compaction_readahead_size_); + Close(); } - ASSERT_OK(Flush()); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - ASSERT_EQ(256, env_->compaction_readahead_size_); - Close(); } TEST_F(DBOptionsTest, FIFOTtlBackwardCompatible) { diff --git a/include/rocksdb/options.h b/include/rocksdb/options.h index 8f62c2dfb82..d11ccc62f53 100644 --- a/include/rocksdb/options.h +++ b/include/rocksdb/options.h @@ -955,10 +955,10 @@ struct DBOptions { // running RocksDB on spinning disks, you should set this to at least 2MB. // That way RocksDB's compaction is doing sequential instead of random reads. // - // Default: 0 + // Default: 2MB // // Dynamically changeable through SetDBOptions() API. - size_t compaction_readahead_size = 0; + size_t compaction_readahead_size = 2 * 1024 * 1024; // This is a maximum buffer size that is used by WinMmapReadableFile in // unbuffered disk I/O mode. We need to maintain an aligned buffer for diff --git a/unreleased_history/behavior_changes/buffered_io_compaction_readahead_size_zero.md b/unreleased_history/behavior_changes/buffered_io_compaction_readahead_size_zero.md new file mode 100644 index 00000000000..430101766fc --- /dev/null +++ b/unreleased_history/behavior_changes/buffered_io_compaction_readahead_size_zero.md @@ -0,0 +1 @@ +Compaction read performance will regress when `Options::compaction_readahead_size` is explicitly set to 0 diff --git a/unreleased_history/public_api_changes/compaction_readahead_size_option_change.md b/unreleased_history/public_api_changes/compaction_readahead_size_option_change.md new file mode 100644 index 00000000000..f86fd82ea14 --- /dev/null +++ b/unreleased_history/public_api_changes/compaction_readahead_size_option_change.md @@ -0,0 +1 @@ +`Options::compaction_readahead_size` 's default value is changed from 0 to 2MB.