From 6c20a2a4d80e9d0f629931e92f95bd6100bc3746 Mon Sep 17 00:00:00 2001 From: Jakob Meier Date: Tue, 30 Aug 2022 11:45:37 +0200 Subject: [PATCH 1/4] feat(estimator): compact RocksDB in testbed setup Run compaction on the DB after inserting all records from the state dump. This brings the layout of SST files to a more stable and optimal layout. See #4771 for more details. --- core/store/src/db.rs | 8 ++++++++ core/store/src/db/rocksdb.rs | 8 ++++++++ core/store/src/lib.rs | 5 +++++ runtime/runtime-params-estimator/src/testbed.rs | 1 + 4 files changed, 22 insertions(+) diff --git a/core/store/src/db.rs b/core/store/src/db.rs index 9ce57f63c7d..33db821145d 100644 --- a/core/store/src/db.rs +++ b/core/store/src/db.rs @@ -137,6 +137,14 @@ pub trait Database: Sync + Send { /// This is a no-op for in-memory databases. fn flush(&self) -> io::Result<()>; + /// Compact database representation. + /// + /// If the database supports it a form of compaction, calling this function + /// is blocking until compaction finishes. Otherwise, this is a no-op. + fn compact(&self) -> io::Result<()> { + Ok(()) + } + /// Returns statistics about the database if available. fn get_store_statistics(&self) -> Option; } diff --git a/core/store/src/db/rocksdb.rs b/core/store/src/db/rocksdb.rs index 6947193cd0b..3350a8eaf26 100644 --- a/core/store/src/db/rocksdb.rs +++ b/core/store/src/db/rocksdb.rs @@ -247,6 +247,14 @@ impl Database for RocksDB { self.db.write(batch).map_err(into_other) } + fn compact(&self) -> io::Result<()> { + let none = Option::<&[u8]>::None; + for col in DBCol::iter() { + self.db.compact_range_cf(self.cf_handle(col), none, none); + } + Ok(()) + } + fn flush(&self) -> io::Result<()> { self.db.flush().map_err(into_other) } diff --git a/core/store/src/lib.rs b/core/store/src/lib.rs index da067177aac..6518024c07e 100644 --- a/core/store/src/lib.rs +++ b/core/store/src/lib.rs @@ -181,6 +181,11 @@ impl Store { self.storage.flush() } + /// Blocking compaction request if supported by storage. + pub fn compact(&self) -> io::Result<()> { + self.storage.compact() + } + pub fn get_store_statistics(&self) -> Option { self.storage.get_store_statistics() } diff --git a/runtime/runtime-params-estimator/src/testbed.rs b/runtime/runtime-params-estimator/src/testbed.rs index f46054789d4..4e146869fce 100644 --- a/runtime/runtime-params-estimator/src/testbed.rs +++ b/runtime/runtime-params-estimator/src/testbed.rs @@ -28,6 +28,7 @@ impl RuntimeTestbed { pub fn from_state_dump(dump_dir: &Path) -> Self { let workdir = tempfile::Builder::new().prefix("runtime_testbed").tempdir().unwrap(); let StateDump { store, roots } = StateDump::from_dir(dump_dir, workdir.path()); + store.compact().expect("compaction failed"); let tries = ShardTries::test(store, 1); assert!(roots.len() <= 1, "Parameter estimation works with one shard only."); From 97e8334d12458db20ef568b56bce58ae99d26f50 Mon Sep 17 00:00:00 2001 From: Jakob Meier Date: Tue, 30 Aug 2022 12:23:26 +0200 Subject: [PATCH 2/4] remove whitespace --- core/store/src/db.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/store/src/db.rs b/core/store/src/db.rs index 33db821145d..9d26919df05 100644 --- a/core/store/src/db.rs +++ b/core/store/src/db.rs @@ -138,7 +138,7 @@ pub trait Database: Sync + Send { fn flush(&self) -> io::Result<()>; /// Compact database representation. - /// + /// /// If the database supports it a form of compaction, calling this function /// is blocking until compaction finishes. Otherwise, this is a no-op. fn compact(&self) -> io::Result<()> { From c65db1795cd6bc34fc9de64f3a9907b01ab571af Mon Sep 17 00:00:00 2001 From: Jakob Meier Date: Tue, 30 Aug 2022 12:51:40 +0200 Subject: [PATCH 3/4] address reviewer comments - short comment why `compact()` is called - document the issue in `runtime/runtime-params-estimator/src/lib.rs` - remove default implementation of `Database::copmact` --- core/store/src/db.rs | 4 +--- core/store/src/db/testdb.rs | 4 ++++ runtime/runtime-params-estimator/src/lib.rs | 8 ++++++++ runtime/runtime-params-estimator/src/testbed.rs | 1 + 4 files changed, 14 insertions(+), 3 deletions(-) diff --git a/core/store/src/db.rs b/core/store/src/db.rs index 9d26919df05..b9dd731c31a 100644 --- a/core/store/src/db.rs +++ b/core/store/src/db.rs @@ -141,9 +141,7 @@ pub trait Database: Sync + Send { /// /// If the database supports it a form of compaction, calling this function /// is blocking until compaction finishes. Otherwise, this is a no-op. - fn compact(&self) -> io::Result<()> { - Ok(()) - } + fn compact(&self) -> io::Result<()>; /// Returns statistics about the database if available. fn get_store_statistics(&self) -> Option; diff --git a/core/store/src/db/testdb.rs b/core/store/src/db/testdb.rs index 5c703ab8c3a..dea66b90e4e 100644 --- a/core/store/src/db/testdb.rs +++ b/core/store/src/db/testdb.rs @@ -88,6 +88,10 @@ impl Database for TestDB { Ok(()) } + fn compact(&self) -> io::Result<()> { + Ok(()) + } + fn get_store_statistics(&self) -> Option { None } diff --git a/runtime/runtime-params-estimator/src/lib.rs b/runtime/runtime-params-estimator/src/lib.rs index 2c5c429cee0..ec5fbe3fd57 100644 --- a/runtime/runtime-params-estimator/src/lib.rs +++ b/runtime/runtime-params-estimator/src/lib.rs @@ -38,6 +38,14 @@ //! costs and don't want to just run everything in order (as that would be to //! slow), we have a very simple manual caching infrastructure in place. //! +//! To run estimations on a non-empty DB with standardised content, we first +//! dump all records to a `StateDump` written to a file. Then for each +//! iteration of a an estimation, we first load the records from this dump into +//! a fresh database. Afterwards, it is crucial to run compaction on RocksDB +//! before starting measrments. Otherwise the SST file layout can be very +//! ineifficient, as there wass no time to restructure them. Also, compaction +//! may start during the measurement and makes the results unstable. +//! //! Notes on code architecture: //! //! To keep estimations comprehensible, each estimation has a simple function diff --git a/runtime/runtime-params-estimator/src/testbed.rs b/runtime/runtime-params-estimator/src/testbed.rs index 4e146869fce..c637e29cdcd 100644 --- a/runtime/runtime-params-estimator/src/testbed.rs +++ b/runtime/runtime-params-estimator/src/testbed.rs @@ -28,6 +28,7 @@ impl RuntimeTestbed { pub fn from_state_dump(dump_dir: &Path) -> Self { let workdir = tempfile::Builder::new().prefix("runtime_testbed").tempdir().unwrap(); let StateDump { store, roots } = StateDump::from_dir(dump_dir, workdir.path()); + // Ensure decent RocksDB SST file layout. store.compact().expect("compaction failed"); let tries = ShardTries::test(store, 1); From e42dcd0343c20bae8cd9e34d94f52a38cb554001 Mon Sep 17 00:00:00 2001 From: Jakob Meier Date: Tue, 30 Aug 2022 13:10:28 +0200 Subject: [PATCH 4/4] fix typos in comment and elaborate a bit more --- runtime/runtime-params-estimator/src/lib.rs | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/runtime/runtime-params-estimator/src/lib.rs b/runtime/runtime-params-estimator/src/lib.rs index ec5fbe3fd57..1db2fe91adf 100644 --- a/runtime/runtime-params-estimator/src/lib.rs +++ b/runtime/runtime-params-estimator/src/lib.rs @@ -42,10 +42,13 @@ //! dump all records to a `StateDump` written to a file. Then for each //! iteration of a an estimation, we first load the records from this dump into //! a fresh database. Afterwards, it is crucial to run compaction on RocksDB -//! before starting measrments. Otherwise the SST file layout can be very -//! ineifficient, as there wass no time to restructure them. Also, compaction -//! may start during the measurement and makes the results unstable. -//! +//! before starting measurements. Otherwise, the SST file layout can be very +//! inefficient, as there was no time to restructure them. We assume that in +//! production, the inflow of new data is not as bulky and therefore it should +//! always be reasonably compacted. Also, without forcing it before +//! measurements start, compaction may start during the measurement and makes +//! the results unstable. +//! //! Notes on code architecture: //! //! To keep estimations comprehensible, each estimation has a simple function