From 5f0354528eb1b84b206fe2c8031ccab87895a5e4 Mon Sep 17 00:00:00 2001 From: jim Date: Thu, 15 Jun 2023 17:12:41 -0400 Subject: [PATCH] v0.1.4 --- CHANGELOG.md | 12 +++++++++++- Cargo.lock | 2 +- Cargo.toml | 2 +- src/chain.rs | 4 ++-- src/file_io.rs | 2 +- src/main.rs | 10 +++++----- src/triangle.rs | 2 +- 7 files changed, 22 insertions(+), 12 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b2dccb5..6104acc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,14 @@ -### v0.1.3 (pre)released - 2023-05-09 +### v0.1.4 released - 2023-06-14 + +#### Major +* skani triangle had a bug where if more than 5000 queries were present and --sparse or -E was not specified, the intermediate batch of 5000 queries would be written in sparse mode. +* skani triangle -o was giving different upper triangle matrix instead of lower triangle (skani triangle > res gives lower triangle). Matrices are consistently lower triangle now. +* Changed to lto = true for release mode. I see anywhere from a 5-10% speedup for this. + +#### Minor +* Changed some dependencies so no more dependencies on old crates that will deprecate. + +### v0.1.3 released - 2023-05-09 #### Major * Fixed a bug where memory was blowing up in `dist` and `triangle` when the marker-index was activated. For big datasets, there could be > 100 GBs of wasted memory. diff --git a/Cargo.lock b/Cargo.lock index ffae3a2..3a9b0a3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1563,7 +1563,7 @@ dependencies = [ [[package]] name = "skani" -version = "0.1.3" +version = "0.1.4" dependencies = [ "assert_cmd", "bincode", diff --git a/Cargo.toml b/Cargo.toml index 9c98b53..7915867 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "skani" -version = "0.1.3" +version = "0.1.4" edition = "2021" license = "MIT OR Apache-2.0" description = "skani is a fast tool for calculating ANI between metagenomic sequences, such as metagenome-assembled genomes (MAGs). It is extremely fast and is robust against incompleteness and fragmentation, giving accurate ANI estimates." diff --git a/src/chain.rs b/src/chain.rs index 4fcc531..c475dc2 100644 --- a/src/chain.rs +++ b/src/chain.rs @@ -750,7 +750,7 @@ fn get_anchors( for anchor in anchors { if last_query_contig != anchor.query_contig || anchor.query_pos > curr_end_point { if query_positions_all[last_query_contig as usize].is_empty() { - warn!("{}", &query_sketch.contigs[(last_query_contig as usize)]); + warn!("{}", &query_sketch.contigs[last_query_contig as usize]); continue; } let mut _num_seeds_in_block = 0; @@ -800,7 +800,7 @@ fn get_anchors( let mut seed_pos_in_block = vec![]; loop { if query_positions_all[last_query_contig as usize].is_empty() { - warn!("{}", &query_sketch.contigs[(last_query_contig as usize)]); + warn!("{}", &query_sketch.contigs[last_query_contig as usize]); continue; } if running_counter >= query_positions_all[last_query_contig as usize].len() { diff --git a/src/file_io.rs b/src/file_io.rs index 8abde38..1255fba 100644 --- a/src/file_io.rs +++ b/src/file_io.rs @@ -414,7 +414,7 @@ pub fn write_phyllip_matrix( write!(&mut af_file, "{}", name).unwrap(); let end = sketches.len(); for j in 0..end { - let full_cond = (full_matrix && i >= j) || (i < j); + let full_cond = (full_matrix && i >= j) || (i > j); if i == j { if full_cond { write!(&mut ani_file, "\t{:.2}", perfect).unwrap(); diff --git a/src/main.rs b/src/main.rs index 4259ea2..71ab456 100644 --- a/src/main.rs +++ b/src/main.rs @@ -21,7 +21,7 @@ static GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc; fn main() { let matches = Command::new("skani") .setting(AppSettings::ArgRequiredElseHelp) - .version("0.1.3") + .version("0.1.4") .about("fast, robust ANI calculation and database searching for metagenomic contigs and assemblies. \n\nQuick ANI calculation:\nskani dist genome1.fa genome2.fa \n\nMemory-efficient database search:\nskani sketch genomes/* -o database; skani search -d database query1.fa query2.fa ...\n\nAll-to-all comparison:\nskani triangle genomes/*") .subcommand( SubCommand::with_name("help").setting(AppSettings::Hidden) @@ -112,7 +112,7 @@ fn main() { ) .help_heading("MISC") - .arg(Arg::new("v").short('v').help("Debug level verbosity.")) + .arg(Arg::new("v").short('v').long("debug").help("Debug level verbosity.")) .arg(Arg::new("trace").long("trace").help("Trace level verbosity.")) ) @@ -299,7 +299,7 @@ fn main() { .help(H_NO_FULL_INDEX), ) .help_heading("MISC") - .arg(Arg::new("v").short('v').help("Debug level verbosity.")) + .arg(Arg::new("v").short('v').long("debug").help("Debug level verbosity.")) .arg(Arg::new("trace").long("trace").help("Trace level verbosity.")) ) .subcommand( @@ -451,7 +451,7 @@ fn main() { .help("Estimate median identity instead of average (mean) identity."), ) .help_heading("MISC") - .arg(Arg::new("v").short('v').help("Debug level verbosity.")) + .arg(Arg::new("v").short('v').long("debug").help("Debug level verbosity.")) .arg(Arg::new("trace").long("trace").help("Trace level verbosity.")) ) .subcommand( @@ -571,7 +571,7 @@ fn main() { .help("Estimate median identity instead of average (mean) identity."), ) .help_heading("MISC") - .arg(Arg::new("v").short('v').help("Debug level verbosity.")) + .arg(Arg::new("v").short('v').long("debug").help("Debug level verbosity.")) .arg(Arg::new("trace").long("trace").help("Trace level verbosity.")) ) diff --git a/src/triangle.rs b/src/triangle.rs index 74e4b3b..710706a 100644 --- a/src/triangle.rs +++ b/src/triangle.rs @@ -103,7 +103,7 @@ pub fn triangle(command_params: CommandParams, mut sketch_params: SketchParams) } if c % 100 == 0 && c != 0 { info!("{} query sequences processed.", c); - if c % INTERMEDIATE_WRITE_COUNT == 0 && c != 0 { + if c % INTERMEDIATE_WRITE_COUNT == 0 && c != 0 && command_params.sparse{ let moved_anis: FxHashMap<_,_>; { let mut locked = anis.lock().unwrap();