diff --git a/Cargo.lock b/Cargo.lock index 3d80bc6..8537395 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -77,9 +77,9 @@ dependencies = [ [[package]] name = "anstyle" -version = "1.0.4" +version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7079075b41f533b8c61d2a4d073c4676e1f8b249ff94a393b0595db304e0dd87" +checksum = "2faccea4cc4ab4a667ce676a30e8ec13922a692c99bb8f5b11f1502c72e04220" [[package]] name = "anstyle-parse" @@ -1189,6 +1189,21 @@ dependencies = [ "adler", ] +[[package]] +name = "minreq" +version = "2.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb3371dfc7b772c540da1380123674a8e20583aca99907087d990ca58cf44203" +dependencies = [ + "log", + "once_cell", + "rustls", + "rustls-webpki", + "serde", + "serde_json", + "webpki-roots", +] + [[package]] name = "multiversion" version = "0.7.3" @@ -2048,6 +2063,20 @@ version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f" +[[package]] +name = "ring" +version = "0.17.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "688c63d65483050968b2a8937f7995f443e27041a0f7700aa59b0822aedebb74" +dependencies = [ + "cc", + "getrandom", + "libc", + "spin", + "untrusted", + "windows-sys 0.48.0", +] + [[package]] name = "roers" version = "0.3.0" @@ -2105,6 +2134,28 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "rustls" +version = "0.21.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f9d5a6813c0759e4609cd494e8e725babae6a2ca7b62a5536a13daaec6fcb7ba" +dependencies = [ + "log", + "ring", + "rustls-webpki", + "sct", +] + +[[package]] +name = "rustls-webpki" +version = "0.101.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b6275d1ee7a1cd780b64aca7726599a1dbc893b1e64144529e55c3c2f745765" +dependencies = [ + "ring", + "untrusted", +] + [[package]] name = "rustversion" version = "1.0.14" @@ -2129,6 +2180,16 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" +[[package]] +name = "sct" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da046153aa2352493d6cb7da4b6e5c0c057d8a1d0a9aa8560baffdd945acd414" +dependencies = [ + "ring", + "untrusted", +] + [[package]] name = "semver" version = "1.0.21" @@ -2264,6 +2325,7 @@ dependencies = [ "jrsonnet-cli", "jrsonnet-evaluator", "jrsonnet-parser", + "minreq", "phf", "roers", "semver", @@ -2302,6 +2364,12 @@ dependencies = [ "version_check", ] +[[package]] +name = "spin" +version = "0.9.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" + [[package]] name = "sqlparser" version = "0.38.0" @@ -2630,6 +2698,12 @@ version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e51733f11c9c4f72aa0c160008246859e340b00807569a0da0e7a1079b27ba85" +[[package]] +name = "untrusted" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" + [[package]] name = "utf8parse" version = "0.2.1" @@ -2714,6 +2788,12 @@ version = "0.2.90" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4d91413b1c31d7539ba5ef2451af3f0b833a005eb27a631cec32bc0635a8602b" +[[package]] +name = "webpki-roots" +version = "0.25.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1778a42e8b3b90bff8d0f5032bf22250792889a5cdc752aa0020c84abe3aaf10" + [[package]] name = "which" version = "6.0.0" diff --git a/Cargo.toml b/Cargo.toml index 88a4f53..e267ec9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -53,6 +53,7 @@ phf = { version = "0.11.2", features = ["macros"] } chrono = "0.4.33" tabled = "0.15.0" csv = "1.3.0" +minreq = { version = "2.11.0", features = ["serde", "serde_json", "https", "json-using-serde"] } [profile.release] lto = "thin" diff --git a/resources/permit_list_info.json b/resources/permit_list_info.json new file mode 100644 index 0000000..69f467d --- /dev/null +++ b/resources/permit_list_info.json @@ -0,0 +1,10 @@ +{ + "10xv2" : { + "filename" : "10x_v2_permit.txt", + "url" : "https://umd.box.com/shared/static/jbs2wszgbj7k4ic2hass9ts6nhqkwq1p" + }, + "10xv3" : { + "filename" : "10x_v3_permit.txt", + "url" : "https://umd.box.com/shared/static/vc9zd4qyjj581gvtolw5kj638wmg4f3s" + } +} diff --git a/src/simpleaf_commands.rs b/src/simpleaf_commands.rs index 46fe11c..6dbc6d5 100644 --- a/src/simpleaf_commands.rs +++ b/src/simpleaf_commands.rs @@ -395,6 +395,17 @@ pub struct IndexOpts { )] pub decoy_paths: Option>, + /// seed value to use in SSHash index construction + /// (try changing this in the rare event index build fails). + #[arg( + long = "seed", + conflicts_with = "use_piscem", + help_heading = "Piscem Index Options", + default_value_t = 1, + display_order = 4 + )] + pub hash_seed: u64, + /// path to output directory (will be created if it doesn't exist) #[arg(short, long, display_order = 1)] pub output: PathBuf, diff --git a/src/simpleaf_commands/indexing.rs b/src/simpleaf_commands/indexing.rs index e26c09a..bc0142f 100644 --- a/src/simpleaf_commands/indexing.rs +++ b/src/simpleaf_commands/indexing.rs @@ -90,11 +90,11 @@ pub fn build_ref_and_index(af_home_path: &Path, opts: IndexOpts) -> anyhow::Resu run_fun!(mkdir -p $outref)?; let roers_opts = roers::AugRefOpts { - /// The path to a genome fasta file. + // The path to a genome fasta file. genome: fasta.clone(), - /// The path to a gene annotation gtf/gff3 file. + // The path to a gene annotation gtf/gff3 file. genes: gtf.clone(), - /// The path to the output directory (will be created if it doesn't exist). + // The path to the output directory (will be created if it doesn't exist). out_dir: outref.clone(), aug_type, no_transcript: false, @@ -193,7 +193,9 @@ pub fn build_ref_and_index(af_home_path: &Path, opts: IndexOpts) -> anyhow::Resu .arg("-o") .arg(&output_index_stem) .arg("-s") - .arg(&ref_seq); + .arg(&ref_seq) + .arg("--seed") + .arg(opts.hash_seed.to_string()); // if the user requested to overwrite, then pass this option if opts.overwrite { @@ -222,7 +224,7 @@ pub fn build_ref_and_index(af_home_path: &Path, opts: IndexOpts) -> anyhow::Resu // piscem version is at least 0.7.0 if let Some(decoy_paths) = opts.decoy_paths { if let Ok(_piscem_ver) = prog_utils::check_version_constraints( - "alevin-fry", + "piscem", ">=0.7.0, <1.0.0", &piscem_prog_info.version, ) { diff --git a/src/utils/af_utils.rs b/src/utils/af_utils.rs index cb9412d..d4c0b54 100644 --- a/src/utils/af_utils.rs +++ b/src/utils/af_utils.rs @@ -1,4 +1,4 @@ -use anyhow::{anyhow, bail, Result}; +use anyhow::{bail, Result}; use cmd_lib::run_fun; use phf::phf_map; use seq_geom_parser::{AppendToCmdArgs, FragmentGeomDesc, PiscemGeomDesc, SalmonSeparateGeomDesc}; @@ -6,6 +6,10 @@ use seq_geom_xform::{FifoXFormData, FragmentGeomDescExt}; use std::path::{Path, PathBuf}; use tracing::error; +use crate::utils::prog_utils; +//use ureq; +//use minreq::Response; + /// The map from pre-specified chemistry types that salmon knows /// to the corresponding command line flag that salmon should be passed /// to use this chemistry. @@ -163,39 +167,99 @@ pub fn add_chemistry_to_args_piscem(chem_str: &str, cmd: &mut std::process::Comm } pub fn get_permit_if_absent(af_home: &Path, chem: &Chemistry) -> Result { - let chem_file; - let dl_url; + // check if the file already exists + let odir = af_home.join("plist"); match chem { Chemistry::TenxV2 => { - chem_file = "10x_v2_permit.txt"; - dl_url = "https://umd.box.com/shared/static/jbs2wszgbj7k4ic2hass9ts6nhqkwq1p"; + let chem_file = "10x_v2_permit.txt"; + if odir.join(chem_file).exists() { + return Ok(PermitListResult::AlreadyPresent(odir.join(chem_file))); + } } Chemistry::TenxV3 => { - chem_file = "10x_v3_permit.txt"; - dl_url = "https://umd.box.com/shared/static/vc9zd4qyjj581gvtolw5kj638wmg4f3s"; + let chem_file = "10x_v3_permit.txt"; + if odir.join(chem_file).exists() { + return Ok(PermitListResult::AlreadyPresent(odir.join(chem_file))); + } } _ => { return Ok(PermitListResult::UnregisteredChemistry); } } - let odir = af_home.join("plist"); - if odir.join(chem_file).exists() { - Ok(PermitListResult::AlreadyPresent(odir.join(chem_file))) - } else { - run_fun!(mkdir -p $odir)?; - let mut dl_cmd = std::process::Command::new("wget"); - dl_cmd - .arg("-v") - .arg("-O") - .arg(odir.join(chem_file).to_string_lossy().to_string()) - .arg("-L") - .arg(dl_url); - let r = dl_cmd.output()?; - if !r.status.success() { - return Err(anyhow!("failed to download permit list {:?}", r.status)); + // the file doesn't exist, so get the json file that gives us + // the chemistry name to permit list URL mapping. + let permit_dict_url = "https://raw.githubusercontent.com/COMBINE-lab/simpleaf/dev/resources/permit_list_info.json"; + let permit_dict: serde_json::Value = minreq::get(permit_dict_url) + .send()? + .json::()?; + let opt_chem_file: Option; + let opt_dl_url: Option; + // parse the JSON appropriately based on the chemistry we have + match chem { + Chemistry::TenxV2 => { + if let Some(d) = permit_dict.get("10xv2") { + opt_chem_file = d + .get("filename") + .expect("value for filename field should be a string") + .as_str() + .map(|cf| cf.to_string()); + opt_dl_url = d + .get("url") + .expect("value for url field should be a string") + .as_str() + .map(|url| url.to_string()); + } else { + bail!( + "could not obtain \"10xv2\" key from the fetched permit_dict at {} = {:?}", + permit_dict_url, + permit_dict + ) + } } - Ok(PermitListResult::DownloadSuccessful(odir.join(chem_file))) + Chemistry::TenxV3 => { + if let Some(d) = permit_dict.get("10xv3") { + opt_chem_file = d + .get("filename") + .expect("value for filename field should be a string") + .as_str() + .map(|cf| cf.to_string()); + opt_dl_url = d + .get("url") + .expect("value for url field should be a string") + .as_str() + .map(|url| url.to_string()); + } else { + bail!( + "could not obtain \"10xv3\" key from the fetched permit_dict at {} = {:?}", + permit_dict_url, + permit_dict + ) + } + } + _ => { + return Ok(PermitListResult::UnregisteredChemistry); + } + } + + // actually download the permit list if we need it and don't have it. + if let (Some(chem_file), Some(dl_url)) = (opt_chem_file, opt_dl_url) { + if odir.join(&chem_file).exists() { + Ok(PermitListResult::AlreadyPresent(odir.join(&chem_file))) + } else { + run_fun!(mkdir -p $odir)?; + + let output_file = odir.join(&chem_file).to_string_lossy().to_string(); + prog_utils::download_to_file(dl_url, &output_file)?; + + Ok(PermitListResult::DownloadSuccessful(odir.join(&chem_file))) + } + } else { + bail!( + "could not properly parse the permit dictionary obtained from {} = {:?}", + permit_dict_url, + permit_dict + ); } } diff --git a/src/utils/prog_utils.rs b/src/utils/prog_utils.rs index 682605b..c51da67 100644 --- a/src/utils/prog_utils.rs +++ b/src/utils/prog_utils.rs @@ -1,4 +1,4 @@ -use anyhow::{anyhow, Context, Result}; +use anyhow::{anyhow, bail, Context, Result}; use cmd_lib::run_fun; use semver::{Version, VersionReq}; use serde::{Deserialize, Serialize}; @@ -7,7 +7,7 @@ use std::ffi::{OsStr, OsString}; use std::path::{Path, PathBuf}; use std::process::Command; use std::sync::Once; -use tracing::{error, info}; +use tracing::{debug, error, info}; use which::which; // The below functions are taken from the [`execute`](https://crates.io/crates/execute) @@ -48,6 +48,38 @@ pub fn shell>(cmd: S) -> Command { command } +pub fn download_to_file>(url: T, filename: &str) -> Result<()> { + let url = url.as_ref(); + + debug!( + "Downloading file from {} and writing to file {}", + url, filename + ); + + let request = minreq::get(url).with_timeout(120).send()?; + match request.status_code { + 200..=299 => { + // success + debug!( + "Obtained status code {} from final url {}", + request.status_code, request.url + ); + } + x => { + bail!( + "could not obtain the permit list; HTTP status code {}, reason {}", + x, + request.reason_phrase + ); + } + } + + let mut out_file = std::fs::File::create(filename)?; + use std::io::Write; + out_file.write_all(request.as_bytes())?; + Ok(()) +} + pub fn get_cmd_line_string(prog: &std::process::Command) -> String { let mut prog_vec = vec![prog.get_program().to_string_lossy().to_string()]; prog_vec.extend( diff --git a/src/utils/workflow_utils.rs b/src/utils/workflow_utils.rs index 7915d4a..791e1d3 100644 --- a/src/utils/workflow_utils.rs +++ b/src/utils/workflow_utils.rs @@ -1291,23 +1291,8 @@ pub fn get_protocol_estuary>( run_cmd!(mkdir -p $pe_dir)?; } - // download github repo as a zip file - let mut dl_cmd = std::process::Command::new("wget"); - dl_cmd - .arg("-v") - .arg("-O") - .arg(pe_zip_file.to_string_lossy().to_string()) - .arg("-L") - .arg(dl_url); - match prog_utils::execute_command(&mut dl_cmd, CommandVerbosityLevel::Quiet) { - Ok(_output) => {} - Err(e) => { - return Err(anyhow!( - "failed to download protocol-estuary GitHub repository; error: {:?}", - e - )); - } - } + let out_fname = pe_zip_file.to_string_lossy().to_string(); + prog_utils::download_to_file(dl_url, &out_fname)?; // unzip let mut unzip_cmd = std::process::Command::new("unzip");