From 268c2d1d8d3545cc64652452db6100882930eab6 Mon Sep 17 00:00:00 2001 From: Hui Xiao Date: Mon, 28 Aug 2023 15:18:03 -0700 Subject: [PATCH] Use special sentinel for default & handle set 0 differently for direc and non-direct IO --- db/db_impl/db_impl_open.cc | 7 +-- db/db_options_test.cc | 54 +++++++++++-------- include/rocksdb/options.h | 9 ++-- ...compaction_readahead_size_option_change.md | 1 + 4 files changed, 43 insertions(+), 28 deletions(-) create mode 100644 unreleased_history/behavior_changes/compaction_readahead_size_option_change.md diff --git a/db/db_impl/db_impl_open.cc b/db/db_impl/db_impl_open.cc index e6d97b125700..502a46d30beb 100644 --- a/db/db_impl/db_impl_open.cc +++ b/db/db_impl/db_impl_open.cc @@ -143,11 +143,12 @@ DBOptions SanitizeOptions(const std::string& dbname, const DBOptions& src, result.wal_dir = result.wal_dir.substr(0, result.wal_dir.size() - 1); } - if (result.compaction_readahead_size == 0) { - if (result.use_direct_reads) { + if (result.compaction_readahead_size == Options().compaction_readahead_size || + (result.use_direct_reads && result.compaction_readahead_size == 0)) { + if (result.use_direct_reads && result.compaction_readahead_size == 0) { TEST_SYNC_POINT_CALLBACK("SanitizeOptions:direct_io", nullptr); } - result.compaction_readahead_size = 1024 * 1024 * 2; + result.compaction_readahead_size = 2 * 1024 * 1024; } // Force flush on DB open if 2PC is enabled, since with 2PC we have no diff --git a/db/db_options_test.cc b/db/db_options_test.cc index b7c132aeef3b..92b9e2dac447 100644 --- a/db/db_options_test.cc +++ b/db/db_options_test.cc @@ -1034,30 +1034,40 @@ TEST_F(DBOptionsTest, SetFIFOCompactionOptions) { } TEST_F(DBOptionsTest, CompactionReadaheadSizeChange) { - SpecialEnv env(env_); - Options options; - options.env = &env; - - options.compaction_readahead_size = 0; - options.level0_file_num_compaction_trigger = 2; const std::string kValue(1024, 'v'); - Reopen(options); - - ASSERT_EQ(1024 * 1024 * 2, - dbfull()->GetDBOptions().compaction_readahead_size); - ASSERT_OK(dbfull()->SetDBOptions({{"compaction_readahead_size", "256"}})); - ASSERT_EQ(256, dbfull()->GetDBOptions().compaction_readahead_size); - for (int i = 0; i < 1024; i++) { - ASSERT_OK(Put(Key(i), kValue)); - } - ASSERT_OK(Flush()); - for (int i = 0; i < 1024 * 2; i++) { - ASSERT_OK(Put(Key(i), kValue)); + for (bool direct_io : {true, false}) { + SpecialEnv env(env_); + Options options; + options.env = &env; + options.level0_file_num_compaction_trigger = 2; + options.use_direct_reads = direct_io; + Reopen(options); + // Verify the default value of `Option::compaction_readahead_size` + ASSERT_EQ(2 * 1024 * 1024, + dbfull()->GetDBOptions().compaction_readahead_size); + Close(); + options.compaction_readahead_size = 0; + Reopen(options); + // Verify the effect of setting `Option::compaction_readahead_size` to be + // zero under direct IO and non-Direct IO + ASSERT_EQ(direct_io ? 2 * 1024 * 1024 : 0, + dbfull()->GetDBOptions().compaction_readahead_size); + // Verify `Option::compaction_readahead_size` is set dynamically to positive + // value correctly + ASSERT_OK(dbfull()->SetDBOptions({{"compaction_readahead_size", "256"}})); + ASSERT_EQ(256, dbfull()->GetDBOptions().compaction_readahead_size); + for (int i = 0; i < 1024; i++) { + ASSERT_OK(Put(Key(i), kValue)); + } + ASSERT_OK(Flush()); + for (int i = 0; i < 1024 * 2; i++) { + ASSERT_OK(Put(Key(i), kValue)); + } + ASSERT_OK(Flush()); + ASSERT_OK(dbfull()->TEST_WaitForCompact()); + ASSERT_EQ(256, env_->compaction_readahead_size_); + Close(); } - ASSERT_OK(Flush()); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - ASSERT_EQ(256, env_->compaction_readahead_size_); - Close(); } TEST_F(DBOptionsTest, FIFOTtlBackwardCompatible) { diff --git a/include/rocksdb/options.h b/include/rocksdb/options.h index 4dee0c952853..79ecb2f3ed45 100644 --- a/include/rocksdb/options.h +++ b/include/rocksdb/options.h @@ -952,16 +952,19 @@ struct DBOptions { AccessHint access_hint_on_compaction_start = NORMAL; // The size RocksDB uses to perform readahead during compaction read. - // If set zero, RocksDB will sanitize it to be 2MB during db open. // If you're // running RocksDB on spinning disks, you should set this to at least 2MB. // That way RocksDB's compaction is doing sequential instead of random reads. // // - // Default: 0 + // Default: 2MB + // + // For direct IO, if set 0, RocksDB will sanitize the value to the default. + // For non-direct IO, if set 0, RocksDB will disable compaction readahead + // instead. // // Dynamically changeable through SetDBOptions() API. - size_t compaction_readahead_size = 0; + size_t compaction_readahead_size = 0xfffffffffffffffe; // This is a maximum buffer size that is used by WinMmapReadableFile in // unbuffered disk I/O mode. We need to maintain an aligned buffer for diff --git a/unreleased_history/behavior_changes/compaction_readahead_size_option_change.md b/unreleased_history/behavior_changes/compaction_readahead_size_option_change.md new file mode 100644 index 000000000000..879067b986df --- /dev/null +++ b/unreleased_history/behavior_changes/compaction_readahead_size_option_change.md @@ -0,0 +1 @@ +`Options::compaction_readahead_size` 's default value is changed from 0 to 2MB. If it's explictly set 0, for direct IO, RocksDB will sanitize the value to the default, while for non-direct IO, RocksDB will disable compaction readahead instead.