From c2193a94e0f0f7148fe3ceb423e2218737fac11c Mon Sep 17 00:00:00 2001 From: Rob Patro Date: Fri, 16 Feb 2024 15:28:42 -0500 Subject: [PATCH] update dist, update deps, reorganize help menu, add defaults --- .github/workflows/release.yml | 6 +-- Cargo.lock | 40 ++++++++-------- Cargo.toml | 6 +-- clean_build.sh | 10 ++++ src/main.rs | 14 +++--- src/piscem_commands.rs | 89 ++++++++++++++++++++++++++--------- 6 files changed, 110 insertions(+), 55 deletions(-) create mode 100755 clean_build.sh diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index aab1204..7d2b810 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -62,7 +62,7 @@ jobs: # we specify bash to get pipefail; it guards against the `curl` command # failing. otherwise `sh` won't catch that `curl` returned non-0 shell: bash - run: "curl --proto '=https' --tlsv1.2 -LsSf https://github.com/axodotdev/cargo-dist/releases/download/v0.9.0/cargo-dist-installer.sh | sh" + run: "curl --proto '=https' --tlsv1.2 -LsSf https://github.com/axodotdev/cargo-dist/releases/download/v0.10.0/cargo-dist-installer.sh | sh" # sure would be cool if github gave us proper conditionals... # so here's a doubly-nested ternary-via-truthiness to try to provide the best possible # functionality based on whether this is a pull_request, and whether it's from a fork. @@ -161,7 +161,7 @@ jobs: with: submodules: recursive - name: Install cargo-dist - run: "curl --proto '=https' --tlsv1.2 -LsSf https://github.com/axodotdev/cargo-dist/releases/download/v0.9.0/cargo-dist-installer.sh | sh" + run: "curl --proto '=https' --tlsv1.2 -LsSf https://github.com/axodotdev/cargo-dist/releases/download/v0.10.0/cargo-dist-installer.sh | sh" # Get all the local artifacts for the global tasks to use (for e.g. checksums) - name: Fetch local artifacts uses: actions/download-artifact@v4 @@ -206,7 +206,7 @@ jobs: with: submodules: recursive - name: Install cargo-dist - run: "curl --proto '=https' --tlsv1.2 -LsSf https://github.com/axodotdev/cargo-dist/releases/download/v0.9.0/cargo-dist-installer.sh | sh" + run: "curl --proto '=https' --tlsv1.2 -LsSf https://github.com/axodotdev/cargo-dist/releases/download/v0.10.0/cargo-dist-installer.sh | sh" # Fetch artifacts from scratch-storage - name: Fetch artifacts uses: actions/download-artifact@v4 diff --git a/Cargo.lock b/Cargo.lock index 97cd32b..af7a7c1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -33,9 +33,9 @@ dependencies = [ [[package]] name = "anstyle" -version = "1.0.5" +version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2faccea4cc4ab4a667ce676a30e8ec13922a692c99bb8f5b11f1502c72e04220" +checksum = "8901269c6307e8d93993578286ac0edf7f195079ffff5ebdeea6a59ffb7e36bc" [[package]] name = "anstyle-parse" @@ -140,9 +140,9 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "clap" -version = "4.4.18" +version = "4.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e578d6ec4194633722ccf9544794b71b1385c3c027efe0c55db226fc880865c" +checksum = "c918d541ef2913577a0f9566e9ce27cb35b6df072075769e0b26cb5a554520da" dependencies = [ "clap_builder", "clap_derive", @@ -150,9 +150,9 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.4.18" +version = "4.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4df4df40ec50c46000231c914968278b1eb05098cf8f1b3a518a95030e71d1c7" +checksum = "9f3e7391dad68afb0c2ede1bf619f579a3dc9c2ec67f089baa397123a2f3d1eb" dependencies = [ "anstream", "anstyle", @@ -163,9 +163,9 @@ dependencies = [ [[package]] name = "clap_derive" -version = "4.4.7" +version = "4.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf9804afaaf59a91e75b022a30fb7229a7901f60c755489cc61c9b423b836442" +checksum = "307bc0538d5f0f83b8248db3087aa92fe504e4691294d0c96c0eabc33f47ba47" dependencies = [ "heck", "proc-macro2", @@ -175,9 +175,9 @@ dependencies = [ [[package]] name = "clap_lex" -version = "0.6.0" +version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "702fc72eb24e5a1e48ce58027a675bc24edd52096d5397d4aea7c6dd9eca0bd1" +checksum = "98cc8fbded0c607b7ba9dd60cd98df59af97e84d24e49c8557331cfc26d301ce" [[package]] name = "cmake" @@ -205,9 +205,9 @@ dependencies = [ [[package]] name = "crc32fast" -version = "1.3.2" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b540bd8bc810d3885c6ea91e2018302f68baba2129ab3e88f32389ee9370880d" +checksum = "b3855a8a784b474f333699ef2bbca9db2c4a1f6d9088a90a2d25b1eb53111eaa" dependencies = [ "cfg-if", ] @@ -270,9 +270,9 @@ checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" [[package]] name = "hermit-abi" -version = "0.3.4" +version = "0.3.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d3d0e0f38255e7fa3cf31335b3a56f05febd18025f4db5ef7a0cfb4f8da651f" +checksum = "bd5256b483761cd23699d0da46cc6fd2ee3be420bbe6d020ae4a091e70b7e9fd" [[package]] name = "hex" @@ -412,9 +412,9 @@ dependencies = [ [[package]] name = "pkg-config" -version = "0.3.29" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2900ede94e305130c13ddd391e0ab7cbaeb783945ae07a279c268cb05109c6cb" +checksum = "d231b230927b5e4ad203db57bbcbee2802f6bce620b1e4a9024a07d94e2907ec" [[package]] name = "prepare_fasta" @@ -587,15 +587,15 @@ checksum = "e6ecd384b10a64542d77071bd64bd7b231f4ed5940fba55e98c3de13824cf3d7" [[package]] name = "strsim" -version = "0.10.0" +version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" +checksum = "5ee073c9e4cd00e28217186dbe12796d692868f432bf2e97ee73bed0c56dfa01" [[package]] name = "syn" -version = "2.0.48" +version = "2.0.49" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0f3531638e407dfc0814761abb7c00a5b54992b849452a0646b7f65c9f770f3f" +checksum = "915aea9e586f80826ee59f8453c1101f9d1c4b3964cd2460185ee8e299ada496" dependencies = [ "proc-macro2", "quote", diff --git a/Cargo.toml b/Cargo.toml index c6f4244..1ee6a82 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "piscem" -version = "0.7.2" +version = "0.7.3" edition = "2021" build = "build.rs" repository = "https://github.com/COMBINE-lab/piscem" @@ -13,7 +13,7 @@ cmake = "0.1" [dependencies] num_cpus = "1.16.0" anyhow = "1.0" -clap = { version = "4.4.18", features = ["cargo", "derive", "env", "wrap_help"] } +clap = { version = "4.5.1", features = ["cargo", "derive", "env", "wrap_help"] } tracing = "0.1.40" tracing-subscriber = { version = "0.3.18", default-features = true, features = ["env-filter"] } prepare_fasta = "0.1.0" @@ -29,7 +29,7 @@ inherits = "release" # Config for 'cargo dist' [workspace.metadata.dist] # The preferred cargo-dist version to use in CI (Cargo.toml SemVer syntax) -cargo-dist-version = "0.9.0" +cargo-dist-version = "0.10.0" # The installers to generate for each app installers = ["shell"] # Target platforms to build apps for (Rust target-triple syntax) diff --git a/clean_build.sh b/clean_build.sh new file mode 100755 index 0000000..8583f96 --- /dev/null +++ b/clean_build.sh @@ -0,0 +1,10 @@ +#!/usr/bin/env bash + +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) + +echo "cleaning external dependency directories of cuttlefish and piscem-cpp." +rm -fr ${SCRIPT_DIR}/cuttlefish/external/* +rm -fr ${SCRIPT_DIR}/piscem-cpp/external/zlib-cloudflare + +echo "invoking cargo clean" +cargo clean --target-dir ${SCRIPT_DIR}/target diff --git a/src/main.rs b/src/main.rs index ee2901f..9ab838a 100644 --- a/src/main.rs +++ b/src/main.rs @@ -4,7 +4,6 @@ use std::io; use std::os::raw::{c_char, c_int}; use std::path::PathBuf; -use prepare_fasta; use anyhow::{bail, Result}; use clap::{Parser, Subcommand}; use tracing::{error, info, warn, Level}; @@ -175,12 +174,13 @@ fn main() -> Result<(), anyhow::Error> { if let Some(seqs) = ref_seqs { if !seqs.is_empty() { - let out_stem = PathBuf::from(output.as_path().to_string_lossy().into_owned() + ".sigs"); - let configs = prepare_fasta::RecordParseConfig{ - input: seqs.clone(), - output_stem: out_stem, - polya_clip_length: None - }; + let out_stem = + PathBuf::from(output.as_path().to_string_lossy().into_owned() + ".sigs"); + let configs = prepare_fasta::RecordParseConfig { + input: seqs.clone(), + output_stem: out_stem, + polya_clip_length: None, + }; info!("Computing and recording reference signatures..."); prepare_fasta::parse_records(configs)?; info!("done."); diff --git a/src/piscem_commands.rs b/src/piscem_commands.rs index 57b1513..691ddbe 100644 --- a/src/piscem_commands.rs +++ b/src/piscem_commands.rs @@ -1,4 +1,4 @@ -use anyhow::{bail, Result}; +use anyhow::{anyhow, bail, Result}; use clap::{ArgGroup, Args}; use std::ffi::CString; use std::path::{Path, PathBuf}; @@ -30,6 +30,19 @@ pub trait AsArgv { fn as_argv(&self) -> Result>; } +fn klen_is_good(s: &str) -> Result { + let k: usize = s + .parse() + .map_err(|_| anyhow!("`{s}` can't be parsed as a number"))?; + if k > 31 { + bail!("klen = {k} must be <= 31"); + } else if (k & 1) == 0 { + bail!("klen = {k} must be odd"); + } else { + Ok(k) + } +} + #[derive(Args, Clone, Debug)] #[command(arg_required_else_help = true)] #[command(group( @@ -39,28 +52,33 @@ pub trait AsArgv { ))] pub(crate) struct BuildOpts { /// ',' separated list of reference FASTA files - #[arg(short = 's', long, value_delimiter = ',', required = true)] + #[arg(short = 's', long, help_heading = "Input", value_delimiter = ',')] pub ref_seqs: Option>, /// ',' separated list of files (each listing input FASTA files) - #[arg(short = 'l', long, value_delimiter = ',', required = true)] + #[arg(short = 'l', long, help_heading = "Input", value_delimiter = ',')] pub ref_lists: Option>, /// ',' separated list of directories (all FASTA files in each directory will be indexed, /// but not recursively). - #[arg(short = 'd', long, value_delimiter = ',', required = true)] + #[arg(short = 'd', long, help_heading = "Input", value_delimiter = ',')] pub ref_dirs: Option>, - /// length of k-mer to use - #[arg(short, long)] + /// length of k-mer to use, must be <= 31 and odd + #[arg(short, long, help_heading = "Index Construction Parameters", default_value_t = 31, value_parser = klen_is_good)] pub klen: usize, - /// length of minimizer to use - #[arg(short, long)] + /// length of minimizer to use; must be < `klen` + #[arg( + short, + long, + help_heading = "Index Construction Parameters", + default_value_t = 19 + )] pub mlen: usize, /// number of threads to use - #[arg(short, long)] + #[arg(short, long, help_heading = "Index Construction Parameters")] pub threads: usize, /// output file stem @@ -69,20 +87,20 @@ pub(crate) struct BuildOpts { /// retain the reduced format GFA files produced by cuttlefish that /// describe the reference cDBG (the default is to remove these). - #[arg(long)] + #[arg(long, help_heading = "Indexing Details")] pub keep_intermediate_dbg: bool, /// working directory where temporary files should be placed. - #[arg(short = 'w', long, default_value_os_t = PathBuf::from("."))] + #[arg(short = 'w', long, help_heading = "Indexing Details", default_value_os_t = PathBuf::from("./workdir.noindex"))] pub work_dir: PathBuf, /// overwite an existing index if the output path is the same. - #[arg(long)] + #[arg(long, help_heading = "Indexing Details")] pub overwrite: bool, /// skip the construction of the equivalence class lookup table /// when building the index (not recommended). - #[arg(long)] + #[arg(long, help_heading = "Index Construction Parameters")] pub no_ec_table: bool, /// path to (optional) ',' sparated list of decoy sequences used to insert poison @@ -90,17 +108,20 @@ pub(crate) struct BuildOpts { #[arg(long, value_delimiter = ',')] pub decoy_paths: Option>, - /// index construction seed (seed value passed to SSHash index construction; useful if empty /// buckets occur). - #[arg(long = "seed", default_value_t = 1)] + #[arg( + long = "seed", + help_heading = "Index Construction Parameters", + default_value_t = 1 + )] pub seed: u64, } #[derive(Args, Clone, Debug)] pub(crate) struct MapSCOpts { /// input index prefix - #[arg(short, long)] + #[arg(short, long, help_heading = "Input")] pub index: String, /// geometry of barcode, umi and read @@ -108,11 +129,23 @@ pub(crate) struct MapSCOpts { pub geometry: String, /// path to list of read 1 files - #[arg(short = '1', long, value_delimiter = ',', required = true)] + #[arg( + short = '1', + long, + help_heading = "Input", + value_delimiter = ',', + required = true + )] pub read1: Vec, /// path to list of read 2 files - #[arg(short = '2', long, value_delimiter = ',', required = true)] + #[arg( + short = '2', + long, + help_heading = "Input", + value_delimiter = ',', + required = true + )] pub read2: Vec, /// number of threads to use @@ -178,19 +211,31 @@ pub(crate) struct MapSCOpts { ))] pub(crate) struct MapBulkOpts { /// input index prefix - #[arg(short, long)] + #[arg(short, long, help_heading = "Input")] pub index: String, /// path to list of read 1 files - #[arg(short = '1', long, value_delimiter = ',', requires = "read2")] + #[arg( + short = '1', + long, + help_heading = "Input", + value_delimiter = ',', + requires = "read2" + )] pub read1: Option>, /// path to list of read 2 files - #[arg(short = '2', long, value_delimiter = ',', requires = "read1")] + #[arg( + short = '2', + long, + help_heading = "Input", + value_delimiter = ',', + requires = "read1" + )] pub read2: Option>, /// path to list of read unpaired read files - #[arg(short = 'r', long, value_delimiter = ',', conflicts_with_all = ["read1", "read2"])] + #[arg(short = 'r', long, help_heading = "Input", value_delimiter = ',', conflicts_with_all = ["read1", "read2"])] pub reads: Option>, /// number of threads to use