From 39f15a5bb9ea858b1875a3b451f2df7cd902165e Mon Sep 17 00:00:00 2001 From: Hui Xiao Date: Tue, 29 Aug 2023 14:34:10 -0700 Subject: [PATCH] Change default value of compaction_readahead_size --- db/db_compaction_test.cc | 7 +-- db/db_impl/db_impl_open.cc | 7 --- db/db_options_test.cc | 51 +++++++++++-------- include/rocksdb/options.h | 5 +- ...fered_io_compaction_readahead_size_zero.md | 1 + ...compaction_readahead_size_option_change.md | 1 + 6 files changed, 34 insertions(+), 38 deletions(-) create mode 100644 unreleased_history/behavior_changes/buffered_io_compaction_readahead_size_zero.md create mode 100644 unreleased_history/public_api_changes/compaction_readahead_size_option_change.md diff --git a/db/db_compaction_test.cc b/db/db_compaction_test.cc index 24445ecdb70e..8713b9b80453 100644 --- a/db/db_compaction_test.cc +++ b/db/db_compaction_test.cc @@ -6025,23 +6025,18 @@ TEST_P(DBCompactionDirectIOTest, DirectIO) { options.use_direct_io_for_flush_and_compaction = GetParam(); options.env = MockEnv::Create(Env::Default()); Reopen(options); - bool readahead = false; SyncPoint::GetInstance()->SetCallBack( "CompactionJob::OpenCompactionOutputFile", [&](void* arg) { bool* use_direct_writes = static_cast(arg); ASSERT_EQ(*use_direct_writes, options.use_direct_io_for_flush_and_compaction); }); - if (options.use_direct_io_for_flush_and_compaction) { - SyncPoint::GetInstance()->SetCallBack( - "SanitizeOptions:direct_io", [&](void* /*arg*/) { readahead = true; }); - } SyncPoint::GetInstance()->EnableProcessing(); CreateAndReopenWithCF({"pikachu"}, options); MakeTables(3, "p", "q", 1); ASSERT_EQ("1,1,1", FilesPerLevel(1)); Compact(1, "p", "q"); - ASSERT_EQ(readahead, options.use_direct_reads); + ASSERT_EQ(false, options.use_direct_reads); ASSERT_EQ("0,0,1", FilesPerLevel(1)); Destroy(options); delete options.env; diff --git a/db/db_impl/db_impl_open.cc b/db/db_impl/db_impl_open.cc index e6d97b125700..d0fba7344223 100644 --- a/db/db_impl/db_impl_open.cc +++ b/db/db_impl/db_impl_open.cc @@ -143,13 +143,6 @@ DBOptions SanitizeOptions(const std::string& dbname, const DBOptions& src, result.wal_dir = result.wal_dir.substr(0, result.wal_dir.size() - 1); } - if (result.compaction_readahead_size == 0) { - if (result.use_direct_reads) { - TEST_SYNC_POINT_CALLBACK("SanitizeOptions:direct_io", nullptr); - } - result.compaction_readahead_size = 1024 * 1024 * 2; - } - // Force flush on DB open if 2PC is enabled, since with 2PC we have no // guarantee that consecutive log files have consecutive sequence id, which // make recovery complicated. diff --git a/db/db_options_test.cc b/db/db_options_test.cc index b7c132aeef3b..c3910a9787bc 100644 --- a/db/db_options_test.cc +++ b/db/db_options_test.cc @@ -1034,30 +1034,37 @@ TEST_F(DBOptionsTest, SetFIFOCompactionOptions) { } TEST_F(DBOptionsTest, CompactionReadaheadSizeChange) { - SpecialEnv env(env_); - Options options; - options.env = &env; - - options.compaction_readahead_size = 0; - options.level0_file_num_compaction_trigger = 2; - const std::string kValue(1024, 'v'); - Reopen(options); + for (bool use_direct_reads : {true, false}) { + SpecialEnv env(env_); + Options options; + options.env = &env; + + options.use_direct_reads = use_direct_reads; + options.level0_file_num_compaction_trigger = 2; + const std::string kValue(1024, 'v'); + Status s = TryReopen(options); + if (use_direct_reads && (s.IsNotSupported() || s.IsInvalidArgument())) { + continue; + } else { + ASSERT_OK(s); + } - ASSERT_EQ(1024 * 1024 * 2, - dbfull()->GetDBOptions().compaction_readahead_size); - ASSERT_OK(dbfull()->SetDBOptions({{"compaction_readahead_size", "256"}})); - ASSERT_EQ(256, dbfull()->GetDBOptions().compaction_readahead_size); - for (int i = 0; i < 1024; i++) { - ASSERT_OK(Put(Key(i), kValue)); - } - ASSERT_OK(Flush()); - for (int i = 0; i < 1024 * 2; i++) { - ASSERT_OK(Put(Key(i), kValue)); + ASSERT_EQ(1024 * 1024 * 2, + dbfull()->GetDBOptions().compaction_readahead_size); + ASSERT_OK(dbfull()->SetDBOptions({{"compaction_readahead_size", "256"}})); + ASSERT_EQ(256, dbfull()->GetDBOptions().compaction_readahead_size); + for (int i = 0; i < 1024; i++) { + ASSERT_OK(Put(Key(i), kValue)); + } + ASSERT_OK(Flush()); + for (int i = 0; i < 1024 * 2; i++) { + ASSERT_OK(Put(Key(i), kValue)); + } + ASSERT_OK(Flush()); + ASSERT_OK(dbfull()->TEST_WaitForCompact()); + ASSERT_EQ(256, env_->compaction_readahead_size_); + Close(); } - ASSERT_OK(Flush()); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - ASSERT_EQ(256, env_->compaction_readahead_size_); - Close(); } TEST_F(DBOptionsTest, FIFOTtlBackwardCompatible) { diff --git a/include/rocksdb/options.h b/include/rocksdb/options.h index 4dee0c952853..79fd021716a7 100644 --- a/include/rocksdb/options.h +++ b/include/rocksdb/options.h @@ -952,16 +952,15 @@ struct DBOptions { AccessHint access_hint_on_compaction_start = NORMAL; // The size RocksDB uses to perform readahead during compaction read. - // If set zero, RocksDB will sanitize it to be 2MB during db open. // If you're // running RocksDB on spinning disks, you should set this to at least 2MB. // That way RocksDB's compaction is doing sequential instead of random reads. // // - // Default: 0 + // Default: 2MB // // Dynamically changeable through SetDBOptions() API. - size_t compaction_readahead_size = 0; + size_t compaction_readahead_size = 2 * 1024 * 1024; // This is a maximum buffer size that is used by WinMmapReadableFile in // unbuffered disk I/O mode. We need to maintain an aligned buffer for diff --git a/unreleased_history/behavior_changes/buffered_io_compaction_readahead_size_zero.md b/unreleased_history/behavior_changes/buffered_io_compaction_readahead_size_zero.md new file mode 100644 index 000000000000..34117d4bd884 --- /dev/null +++ b/unreleased_history/behavior_changes/buffered_io_compaction_readahead_size_zero.md @@ -0,0 +1 @@ +As `Options::access_hint_on_compaction_start` is made to have no effect since #11658, under non-direct IO, setting `Options::compaction_readahead_size` to 0 may regress compaction read performance, because it will not have the readahead issued by Kernel, which is the behavior prior to #11658, nor the intended replacement of Kernel-issued readahead by RocksDB-issued readahead introduced in #11631 now requiring `Options::compaction_readahead_size > 0` diff --git a/unreleased_history/public_api_changes/compaction_readahead_size_option_change.md b/unreleased_history/public_api_changes/compaction_readahead_size_option_change.md new file mode 100644 index 000000000000..f86fd82ea147 --- /dev/null +++ b/unreleased_history/public_api_changes/compaction_readahead_size_option_change.md @@ -0,0 +1 @@ +`Options::compaction_readahead_size` 's default value is changed from 0 to 2MB.