Skip to content

Commit

Permalink
aptos-debugger: gen-replay-verify-jobs command
Browse files Browse the repository at this point in the history
  • Loading branch information
msmouse committed Sep 12, 2024
1 parent 3da5ac6 commit 175e9be
Show file tree
Hide file tree
Showing 3 changed files with 134 additions and 0 deletions.
4 changes: 4 additions & 0 deletions storage/backup/backup-cli/src/metadata/view.rs
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,10 @@ impl MetadataView {
self.compaction_timestamps.clone()
}

pub fn all_state_snapshots(&self) -> &[StateSnapshotBackupMeta] {
&self.state_snapshot_backups
}

pub fn select_state_snapshot(
&self,
target_version: Version,
Expand Down
126 changes: 126 additions & 0 deletions storage/db-tool/src/gen_replay_verify_jobs.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
// Copyright (c) Aptos Foundation
// SPDX-License-Identifier: Apache-2.0

use aptos_backup_cli::{
metadata::{
cache::{sync_and_load, MetadataCacheOpt},
StateSnapshotBackupMeta,
},
storage::DBToolStorageOpt,
utils::ConcurrentDownloadsOpt,
};
use aptos_logger::warn;
use aptos_types::transaction::Version;
use clap::Parser;
use itertools::Itertools;
use std::{io::Write, iter::once, path::PathBuf};

#[derive(Parser)]
pub struct Opt {
#[clap(flatten)]
metadata_cache_opt: MetadataCacheOpt,
#[clap(flatten)]
storage: DBToolStorageOpt,
#[clap(flatten)]
concurrent_downloads: ConcurrentDownloadsOpt,
#[clap(
long,
help = "The first transaction version required to be replayed and verified. [Defaults to 0]"
)]
start_version: Option<Version>,
#[clap(
long,
help = "Target number of transactions for each job to replay",
default_value = "20000000"
)]
target_job_size: u64,
#[clap(
long,
help = "Determines the oldest epoch to replay, relative to the latest",
default_value = "4000"
)]
max_epochs: u64,
#[clap(long, help = "Output job ranges")]
output_file: PathBuf,
}

impl Opt {
pub async fn run(self) -> anyhow::Result<()> {
let storage = self.storage.init_storage().await?;
let metadata_view = sync_and_load(
&self.metadata_cache_opt,
storage,
self.concurrent_downloads.get(),
)
.await?;

let storage_state = metadata_view.get_storage_state()?;
let global_end_version = storage_state
.latest_transaction_version
.expect("No transaction backups.")
+ 1;
let latest_epoch = storage_state
.latest_state_snapshot_epoch
.expect("No state snapshots.");
let max_epochs = self.max_epochs.min(latest_epoch + 1);
let global_min_epoch = latest_epoch + 1 - max_epochs;

let mut file = std::fs::File::create(&self.output_file)?;

let fake_end = StateSnapshotBackupMeta {
epoch: latest_epoch,
version: global_end_version,
manifest: "".to_string(),
};
metadata_view
.all_state_snapshots()
.iter()
.skip_while(|s| s.epoch < global_min_epoch)
.chain(once(&fake_end))
.collect_vec()
.iter()
.rev()
.tuple_windows()
// to simplify things, if start_version appears in the middle of a range, give up the range
.take_while(|(_end, begin)| begin.version >= self.start_version.unwrap_or(0))
.peekable()
.batching(|it| {
match it.next() {
Some((end, mut begin)) => {
if end.version - begin.version >= self.target_job_size {
// cut big range short, this hopefully automatically skips load tests
let msg = if end.epoch - begin.epoch > 15 {
"!!! Need more snapshots !!!"
} else {
""
};
warn!(
begin = begin,
end = end,
"Big gap between snapshots. {} versions in {} epochs. {}",
end.version - begin.version,
end.epoch - begin.epoch,
msg,
);
Some((begin.version, begin.version + self.target_job_size))
} else {
while let Some((_prev_end, prev_begin)) = it.peek() {
if end.version - prev_begin.version > self.target_job_size {
break;
}
begin = prev_begin;
let _ = it.next();
}
Some((begin.version, end.version))
}
},
None => None,
}
})
.for_each(|(begin, end)| {
writeln!(file, "{} {}", begin, end).unwrap();
});

Ok(())
}
}
4 changes: 4 additions & 0 deletions storage/db-tool/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ extern crate core;
mod backup;
mod backup_maintenance;
mod bootstrap;
mod gen_replay_verify_jobs;
mod replay_verify;
pub mod restore;
#[cfg(test)]
Expand Down Expand Up @@ -33,6 +34,8 @@ pub enum DBTool {

ReplayVerify(replay_verify::Opt),

GenReplayVerifyJobs(gen_replay_verify_jobs::Opt),

#[clap(subcommand)]
Restore(restore::Command),
}
Expand All @@ -49,6 +52,7 @@ impl DBTool {
info!("Replay verify result: {:?}", ret);
ret
},
DBTool::GenReplayVerifyJobs(cmd) => cmd.run().await,
DBTool::Restore(cmd) => cmd.run().await,
}
}
Expand Down

0 comments on commit 175e9be

Please sign in to comment.