Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adds diff to accounts-hash-cache-tool #1772

Merged
merged 1 commit into from
Jun 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion accounts-db/accounts-hash-cache-tool/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "agave-accounts-hash-cache-tool"
description = "Tool to inspect accounts hash cache files"
description = "Tool for accounts hash cache files"
publish = false
version = { workspace = true }
authors = { workspace = true }
Expand Down
292 changes: 240 additions & 52 deletions accounts-db/accounts-hash-cache-tool/src/main.rs
Original file line number Diff line number Diff line change
@@ -1,75 +1,91 @@
use {
bytemuck::Zeroable as _,
clap::{crate_description, crate_name, value_t_or_exit, App, Arg},
clap::{
crate_description, crate_name, value_t_or_exit, App, AppSettings, Arg, ArgMatches,
SubCommand,
},
solana_accounts_db::{CacheHashDataFileEntry, CacheHashDataFileHeader},
std::{
collections::HashMap,
fs::File,
io::{self, BufReader, Read as _},
mem::size_of,
num::Saturating,
path::Path,
},
};

fn main() {
let matches = App::new(crate_name!())
.about(crate_description!())
.version(solana_version::version!())
.arg(
Arg::with_name("path")
.index(1)
.takes_value(true)
.value_name("PATH")
.help("Accounts hash cache file to inspect"),
.global_setting(AppSettings::ArgRequiredElseHelp)
.global_setting(AppSettings::ColoredHelp)
.global_setting(AppSettings::InferSubcommands)
.global_setting(AppSettings::UnifiedHelpMessage)
.global_setting(AppSettings::VersionlessSubcommands)
.subcommand(
SubCommand::with_name("inspect")
.about(
"Inspect an accounts hash cache file and display \
each account's address, hash, and balance",
)
.arg(
Arg::with_name("force")
.long("force")
.takes_value(false)
.help("Continue even if sanity checks fail"),
)
.arg(
Arg::with_name("path")
.index(1)
.takes_value(true)
.value_name("PATH")
.help("Accounts hash cache file to inspect"),
),
)
.arg(
Arg::with_name("force")
.long("force")
.takes_value(false)
.help("Continue even if sanity checks fail"),
.subcommand(
SubCommand::with_name("diff")
.about("Diff two accounts hash cache files")
.arg(
Arg::with_name("path1")
.index(1)
.takes_value(true)
.value_name("PATH1")
.help("Accounts hash cache file 1 to diff"),
)
.arg(
Arg::with_name("path2")
.index(2)
.takes_value(true)
.value_name("PATH2")
.help("Accounts hash cache file 2 to diff"),
),
)
.get_matches();

let force = matches.is_present("force");
let path = value_t_or_exit!(matches, "path", String);

let file = File::open(&path).unwrap_or_else(|err| {
eprintln!("Failed to open accounts hash cache file '{path}': {err}");
std::process::exit(1);
});
let actual_file_size = file
.metadata()
.unwrap_or_else(|err| {
eprintln!("Failed to query file metadata: {err}");
std::process::exit(1);
})
.len();
let mut reader = BufReader::new(file);

let header = {
let mut header = CacheHashDataFileHeader::zeroed();
reader
.read_exact(bytemuck::bytes_of_mut(&mut header))
.unwrap_or_else(|err| {
eprintln!("Failed to read header: {err}");
std::process::exit(1);
});
header
};

// Sanity checks -- ensure the actual file size matches the expected file size
let expected_file_size = size_of::<CacheHashDataFileHeader>()
.saturating_add(size_of::<CacheHashDataFileEntry>().saturating_mul(header.count));
if actual_file_size != expected_file_size as u64 {
eprintln!(
"Failed sanitization: actual file size does not match expected file size! \
actual: {actual_file_size}, expected: {expected_file_size}",
);
if !force {
std::process::exit(1);
match matches.subcommand() {
("inspect", Some(subcommand_matches)) => do_inspect(&matches, subcommand_matches)
.map_err(|err| format!("inspection failed: {err}")),
("diff", Some(subcommand_matches)) => {
do_diff(&matches, subcommand_matches).map_err(|err| format!("diff failed: {err}"))
}
eprintln!("Forced. Continuing... Results may be incorrect.");
_ => unreachable!(),
}
.unwrap_or_else(|err| {
eprintln!("Error: {err}");
std::process::exit(1);
});
}

fn do_inspect(
_app_matches: &ArgMatches<'_>,
subcommand_matches: &ArgMatches<'_>,
) -> Result<(), String> {
let force = subcommand_matches.is_present("force");
let path = value_t_or_exit!(subcommand_matches, "path", String);
let (mut reader, header) = open_file(&path, force)
.map_err(|err| format!("failed to open accounts hash cache file '{path}': {err}"))?;
let count_width = (header.count as f64).log10().ceil() as usize;
let mut count = Saturating(0usize);
loop {
Expand All @@ -80,10 +96,13 @@ fn main() {
Err(err) => {
if err.kind() == io::ErrorKind::UnexpectedEof && count.0 == header.count {
// we've hit the expected end of the file
break;
} else {
eprintln!("Failed to read entry {count}: {err}");
return Err(format!(
"failed to read entry {count}, expected {}: {err}",
header.count,
));
}
break;
}
};
println!(
Expand All @@ -96,4 +115,173 @@ fn main() {
}

println!("actual entries: {count}, expected: {}", header.count);
Ok(())
}

fn do_diff(
_app_matches: &ArgMatches<'_>,
subcommand_matches: &ArgMatches<'_>,
) -> Result<(), String> {
let force = false; // skipping sanity checks is not supported when diffing
let path1 = value_t_or_exit!(subcommand_matches, "path1", String);
let path2 = value_t_or_exit!(subcommand_matches, "path2", String);
let (mut reader1, header1) = open_file(&path1, force)
.map_err(|err| format!("failed to open accounts hash cache file 1 '{path1}': {err}"))?;
let (mut reader2, header2) = open_file(&path2, force)
.map_err(|err| format!("failed to open accounts hash cache file 2 '{path2}': {err}"))?;
// Note: Purposely open both files before reading either one. This way, if there's an error
// opening file 2, we can bail early without having to wait for file 1 to be read completely.

// extract the entries from both files
let do_extract = |num, reader: &mut BufReader<_>, header: &CacheHashDataFileHeader| {
let mut entries = HashMap::<_, _>::default();
loop {
let mut entry = CacheHashDataFileEntry::zeroed();
let result = reader.read_exact(bytemuck::bytes_of_mut(&mut entry));
match result {
Ok(()) => {}
Err(err) => {
if err.kind() == io::ErrorKind::UnexpectedEof && entries.len() == header.count {
// we've hit the expected end of the file
break;
} else {
return Err(format!(
"failed to read entry {}, expected {}: {err}",
entries.len(),
header.count,
));
}
}
};
let CacheHashDataFileEntry {
hash,
lamports,
pubkey,
} = entry;
let old_value = entries.insert(pubkey, (hash, lamports));
if let Some(old_value) = old_value {
let new_value = entries.get(&pubkey);
return Err(format!("found duplicate pubkey in file {num}: {pubkey}, old value: {old_value:?}, new value: {new_value:?}"));
}
}
Ok(entries)
};
let entries1 = do_extract(1, &mut reader1, &header1)?;
let entries2 = do_extract(2, &mut reader2, &header2)?;

// compute the differences between the files
let do_compute = |lhs: &HashMap<_, (_, _)>, rhs: &HashMap<_, (_, _)>| {
let mut unique_entries = Vec::new();
let mut mismatch_entries = Vec::new();
for (lhs_key, lhs_value) in lhs.iter() {
if let Some(rhs_value) = rhs.get(lhs_key) {
if lhs_value != rhs_value {
mismatch_entries.push((
CacheHashDataFileEntry {
hash: lhs_value.0,
lamports: lhs_value.1,
pubkey: *lhs_key,
},
CacheHashDataFileEntry {
hash: rhs_value.0,
lamports: rhs_value.1,
pubkey: *lhs_key,
},
));
}
} else {
unique_entries.push(CacheHashDataFileEntry {
hash: lhs_value.0,
lamports: lhs_value.1,
pubkey: *lhs_key,
});
}
}
unique_entries.sort_unstable_by(|a, b| a.pubkey.cmp(&b.pubkey));
mismatch_entries.sort_unstable_by(|a, b| a.0.pubkey.cmp(&b.0.pubkey));
(unique_entries, mismatch_entries)
};
let (unique_entries1, mismatch_entries) = do_compute(&entries1, &entries2);
let (unique_entries2, _) = do_compute(&entries2, &entries1);

// display the unique entries in each file
let do_print = |entries: &[CacheHashDataFileEntry]| {
let count_width = (entries.len() as f64).log10().ceil() as usize;
if entries.is_empty() {
println!("(none)");
} else {
for (i, entry) in entries.iter().enumerate() {
println!(
"{i:count_width$}: pubkey: {:44}, hash: {:44}, lamports: {}",
entry.pubkey.to_string(),
entry.hash.0.to_string(),
entry.lamports,
);
}
}
};
println!("Unique entries in file 1:");
do_print(&unique_entries1);
println!("Unique entries in file 2:");
do_print(&unique_entries2);

println!("Mismatch values:");
let count_width = (mismatch_entries.len() as f64).log10().ceil() as usize;
if mismatch_entries.is_empty() {
println!("(none)");
} else {
for (i, (lhs, rhs)) in mismatch_entries.iter().enumerate() {
println!(
"{i:count_width$}: pubkey: {:44}, hash: {:44}, lamports: {}",
lhs.pubkey.to_string(),
lhs.hash.0.to_string(),
lhs.lamports,
);
println!(
"{i:count_width$}: file 2: {:44}, hash: {:44}, lamports: {}",
"(same)".to_string(),
rhs.hash.0.to_string(),
rhs.lamports,
);
}
}

Ok(())
}

fn open_file(
path: impl AsRef<Path>,
force: bool,
) -> Result<(BufReader<File>, CacheHashDataFileHeader), String> {
let file = File::open(path).map_err(|err| format!("{err}"))?;
let actual_file_size = file
.metadata()
.map_err(|err| format!("failed to query file metadata: {err}"))?
.len();
let mut reader = BufReader::new(file);

let header = {
let mut header = CacheHashDataFileHeader::zeroed();
reader
.read_exact(bytemuck::bytes_of_mut(&mut header))
.map_err(|err| format!("failed to read header: {err}"))?;
header
};

// Sanity checks -- ensure the actual file size matches the expected file size
let expected_file_size = size_of::<CacheHashDataFileHeader>()
.saturating_add(size_of::<CacheHashDataFileEntry>().saturating_mul(header.count));
if actual_file_size != expected_file_size as u64 {
let err_msg = format!(
"failed sanitization: actual file size does not match expected file size! \
actual: {actual_file_size}, expected: {expected_file_size}",
);
if force {
eprintln!("Warning: {err_msg}\nForced. Continuing... Results may be incorrect.");
} else {
return Err(err_msg);
}
}

Ok((reader, header))
}