From 5974027bedc715a0dfca92a26d59626171b9b0b3 Mon Sep 17 00:00:00 2001 From: Piotr Sarna Date: Tue, 3 Oct 2023 16:23:58 +0200 Subject: [PATCH 1/2] bottomless: checkpoint before initializing bottomless Due to a bug in wallog recovery, we need to checkpoint the database *strictly before* we initialize bottomless. A proper fix should be to use our virtual WAL methods for checkpointing, but there's an initialization cycle and resolving it will be a larger patch - a connection with WAL methods wants us to already have the replication logger created, and replication logger wants to perform a checkpoint on creation. As a mid-term solution, we just perform the forbidden regular checkpoint before bottomless is ever launched. Combined with the fact that bottomless treats existing databases as the source of truth, it just creates a new backup generation and continues working properly. The following scenario was buggy before: 1. We leave the db in the state where some WAL frames still exist in data-wal file 2. We restart sqld 3. bottomless is initialized, it reuses the existing db and WAL frames and uploads them to S3, to avoid creating a potentially costly snapshot 4. ReplicationLogger::new() incorrectly calls sqlite3_wal_checkpoint which swipes data from under bottomless. 5. Bottomless thinks it hasn't checkpointed and continues to write WAL frames. As a result, it writes garbage to S3, because the db was checkpointed outside of bottomless control --- bottomless/src/replicator.rs | 5 +++-- sqld/src/namespace/mod.rs | 12 ++++++++++++ sqld/src/replication/primary/logger.rs | 5 ++++- 3 files changed, 19 insertions(+), 3 deletions(-) diff --git a/bottomless/src/replicator.rs b/bottomless/src/replicator.rs index 633d8ff9..bfc138ef 100644 --- a/bottomless/src/replicator.rs +++ b/bottomless/src/replicator.rs @@ -1302,8 +1302,9 @@ impl Replicator { }, }; - tracing::info!("Restoring from generation {}", generation); - self.restore_from(generation, timestamp).await + let (action, recovered) = self.restore_from(generation, timestamp).await?; + tracing::info!("Restoring from generation {generation}: action={action:?}, recovered={recovered}"); + Ok((action, recovered)) } pub async fn get_last_consistent_frame(&self, generation: &Uuid) -> Result { diff --git a/sqld/src/namespace/mod.rs b/sqld/src/namespace/mod.rs index 6651fe6d..b1c550e7 100644 --- a/sqld/src/namespace/mod.rs +++ b/sqld/src/namespace/mod.rs @@ -656,6 +656,18 @@ impl Namespace { tokio::fs::create_dir_all(&db_path).await?; + // FIXME: due to a bug in logger::checkpoint_db we call regular checkpointing code + // instead of our virtual WAL one. It's a bit tangled to fix right now, because + // we need WAL context for checkpointing, and WAL context needs the ReplicationLogger... + // So instead we checkpoint early, *before* bottomless gets initialized. That way + // we're sure bottomless won't try to back up any existing WAL frames and will instead + // treat the existing db file as the source of truth. + if config.bottomless_replication.is_some() { + tracing::debug!("Checkpointing before initializing bottomless"); + crate::replication::primary::logger::checkpoint_db(&db_path.join("data"))?; + tracing::debug!("Checkpointed before initializing bottomless"); + } + let bottomless_replicator = if let Some(options) = &config.bottomless_replication { let options = make_bottomless_options(options, name.clone()); let (replicator, did_recover) = diff --git a/sqld/src/replication/primary/logger.rs b/sqld/src/replication/primary/logger.rs index 9f69b876..580aa8e2 100644 --- a/sqld/src/replication/primary/logger.rs +++ b/sqld/src/replication/primary/logger.rs @@ -77,6 +77,7 @@ unsafe impl WalHook for ReplicationLoggerHook { assert_eq!(page_size, 4096); let wal_ptr = wal as *mut _; let last_valid_frame = wal.hdr.mxFrame; + tracing::trace!("Last valid frame before applying: {last_valid_frame}"); let ctx = Self::wal_extract_ctx(wal); let mut frame_count = 0; @@ -948,7 +949,9 @@ impl ReplicationLogger { } } -fn checkpoint_db(data_path: &Path) -> anyhow::Result<()> { +// FIXME: calling rusqlite::Connection's checkpoint here is a bug, +// we need to always call our virtual WAL methods. +pub fn checkpoint_db(data_path: &Path) -> anyhow::Result<()> { let wal_path = match data_path.parent() { Some(path) => path.join("data-wal"), None => return Ok(()), From 87abb69bba3f8bc81f22995761df5541b28fd2d3 Mon Sep 17 00:00:00 2001 From: Piotr Sarna Date: Tue, 3 Oct 2023 16:38:11 +0200 Subject: [PATCH 2/2] fmt fix --- bottomless/src/replicator.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/bottomless/src/replicator.rs b/bottomless/src/replicator.rs index bfc138ef..76a25f1b 100644 --- a/bottomless/src/replicator.rs +++ b/bottomless/src/replicator.rs @@ -1303,7 +1303,9 @@ impl Replicator { }; let (action, recovered) = self.restore_from(generation, timestamp).await?; - tracing::info!("Restoring from generation {generation}: action={action:?}, recovered={recovered}"); + tracing::info!( + "Restoring from generation {generation}: action={action:?}, recovered={recovered}" + ); Ok((action, recovered)) }