Skip to content

Commit

Permalink
Merge pull request #129 from COMBINE-lab/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
rob-p authored Feb 1, 2024
2 parents 636320e + 1235098 commit 4bc344e
Show file tree
Hide file tree
Showing 8 changed files with 234 additions and 49 deletions.
84 changes: 82 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ phf = { version = "0.11.2", features = ["macros"] }
chrono = "0.4.33"
tabled = "0.15.0"
csv = "1.3.0"
minreq = { version = "2.11.0", features = ["serde", "serde_json", "https", "json-using-serde"] }

[profile.release]
lto = "thin"
Expand Down
10 changes: 10 additions & 0 deletions resources/permit_list_info.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
{
"10xv2" : {
"filename" : "10x_v2_permit.txt",
"url" : "https://umd.box.com/shared/static/jbs2wszgbj7k4ic2hass9ts6nhqkwq1p"
},
"10xv3" : {
"filename" : "10x_v3_permit.txt",
"url" : "https://umd.box.com/shared/static/vc9zd4qyjj581gvtolw5kj638wmg4f3s"
}
}
11 changes: 11 additions & 0 deletions src/simpleaf_commands.rs
Original file line number Diff line number Diff line change
Expand Up @@ -395,6 +395,17 @@ pub struct IndexOpts {
)]
pub decoy_paths: Option<Vec<PathBuf>>,

/// seed value to use in SSHash index construction
/// (try changing this in the rare event index build fails).
#[arg(
long = "seed",
conflicts_with = "use_piscem",
help_heading = "Piscem Index Options",
default_value_t = 1,
display_order = 4
)]
pub hash_seed: u64,

/// path to output directory (will be created if it doesn't exist)
#[arg(short, long, display_order = 1)]
pub output: PathBuf,
Expand Down
12 changes: 7 additions & 5 deletions src/simpleaf_commands/indexing.rs
Original file line number Diff line number Diff line change
Expand Up @@ -90,11 +90,11 @@ pub fn build_ref_and_index(af_home_path: &Path, opts: IndexOpts) -> anyhow::Resu
run_fun!(mkdir -p $outref)?;

let roers_opts = roers::AugRefOpts {
/// The path to a genome fasta file.
// The path to a genome fasta file.
genome: fasta.clone(),
/// The path to a gene annotation gtf/gff3 file.
// The path to a gene annotation gtf/gff3 file.
genes: gtf.clone(),
/// The path to the output directory (will be created if it doesn't exist).
// The path to the output directory (will be created if it doesn't exist).
out_dir: outref.clone(),
aug_type,
no_transcript: false,
Expand Down Expand Up @@ -193,7 +193,9 @@ pub fn build_ref_and_index(af_home_path: &Path, opts: IndexOpts) -> anyhow::Resu
.arg("-o")
.arg(&output_index_stem)
.arg("-s")
.arg(&ref_seq);
.arg(&ref_seq)
.arg("--seed")
.arg(opts.hash_seed.to_string());

// if the user requested to overwrite, then pass this option
if opts.overwrite {
Expand Down Expand Up @@ -222,7 +224,7 @@ pub fn build_ref_and_index(af_home_path: &Path, opts: IndexOpts) -> anyhow::Resu
// piscem version is at least 0.7.0
if let Some(decoy_paths) = opts.decoy_paths {
if let Ok(_piscem_ver) = prog_utils::check_version_constraints(
"alevin-fry",
"piscem",
">=0.7.0, <1.0.0",
&piscem_prog_info.version,
) {
Expand Down
110 changes: 87 additions & 23 deletions src/utils/af_utils.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,15 @@
use anyhow::{anyhow, bail, Result};
use anyhow::{bail, Result};
use cmd_lib::run_fun;
use phf::phf_map;
use seq_geom_parser::{AppendToCmdArgs, FragmentGeomDesc, PiscemGeomDesc, SalmonSeparateGeomDesc};
use seq_geom_xform::{FifoXFormData, FragmentGeomDescExt};
use std::path::{Path, PathBuf};
use tracing::error;

use crate::utils::prog_utils;
//use ureq;
//use minreq::Response;

/// The map from pre-specified chemistry types that salmon knows
/// to the corresponding command line flag that salmon should be passed
/// to use this chemistry.
Expand Down Expand Up @@ -163,39 +167,99 @@ pub fn add_chemistry_to_args_piscem(chem_str: &str, cmd: &mut std::process::Comm
}

pub fn get_permit_if_absent(af_home: &Path, chem: &Chemistry) -> Result<PermitListResult> {
let chem_file;
let dl_url;
// check if the file already exists
let odir = af_home.join("plist");
match chem {
Chemistry::TenxV2 => {
chem_file = "10x_v2_permit.txt";
dl_url = "https://umd.box.com/shared/static/jbs2wszgbj7k4ic2hass9ts6nhqkwq1p";
let chem_file = "10x_v2_permit.txt";
if odir.join(chem_file).exists() {
return Ok(PermitListResult::AlreadyPresent(odir.join(chem_file)));
}
}
Chemistry::TenxV3 => {
chem_file = "10x_v3_permit.txt";
dl_url = "https://umd.box.com/shared/static/vc9zd4qyjj581gvtolw5kj638wmg4f3s";
let chem_file = "10x_v3_permit.txt";
if odir.join(chem_file).exists() {
return Ok(PermitListResult::AlreadyPresent(odir.join(chem_file)));
}
}
_ => {
return Ok(PermitListResult::UnregisteredChemistry);
}
}

let odir = af_home.join("plist");
if odir.join(chem_file).exists() {
Ok(PermitListResult::AlreadyPresent(odir.join(chem_file)))
} else {
run_fun!(mkdir -p $odir)?;
let mut dl_cmd = std::process::Command::new("wget");
dl_cmd
.arg("-v")
.arg("-O")
.arg(odir.join(chem_file).to_string_lossy().to_string())
.arg("-L")
.arg(dl_url);
let r = dl_cmd.output()?;
if !r.status.success() {
return Err(anyhow!("failed to download permit list {:?}", r.status));
// the file doesn't exist, so get the json file that gives us
// the chemistry name to permit list URL mapping.
let permit_dict_url = "https://raw.githubusercontent.com/COMBINE-lab/simpleaf/dev/resources/permit_list_info.json";
let permit_dict: serde_json::Value = minreq::get(permit_dict_url)
.send()?
.json::<serde_json::Value>()?;
let opt_chem_file: Option<String>;
let opt_dl_url: Option<String>;
// parse the JSON appropriately based on the chemistry we have
match chem {
Chemistry::TenxV2 => {
if let Some(d) = permit_dict.get("10xv2") {
opt_chem_file = d
.get("filename")
.expect("value for filename field should be a string")
.as_str()
.map(|cf| cf.to_string());
opt_dl_url = d
.get("url")
.expect("value for url field should be a string")
.as_str()
.map(|url| url.to_string());
} else {
bail!(
"could not obtain \"10xv2\" key from the fetched permit_dict at {} = {:?}",
permit_dict_url,
permit_dict
)
}
}
Ok(PermitListResult::DownloadSuccessful(odir.join(chem_file)))
Chemistry::TenxV3 => {
if let Some(d) = permit_dict.get("10xv3") {
opt_chem_file = d
.get("filename")
.expect("value for filename field should be a string")
.as_str()
.map(|cf| cf.to_string());
opt_dl_url = d
.get("url")
.expect("value for url field should be a string")
.as_str()
.map(|url| url.to_string());
} else {
bail!(
"could not obtain \"10xv3\" key from the fetched permit_dict at {} = {:?}",
permit_dict_url,
permit_dict
)
}
}
_ => {
return Ok(PermitListResult::UnregisteredChemistry);
}
}

// actually download the permit list if we need it and don't have it.
if let (Some(chem_file), Some(dl_url)) = (opt_chem_file, opt_dl_url) {
if odir.join(&chem_file).exists() {
Ok(PermitListResult::AlreadyPresent(odir.join(&chem_file)))
} else {
run_fun!(mkdir -p $odir)?;

let output_file = odir.join(&chem_file).to_string_lossy().to_string();
prog_utils::download_to_file(dl_url, &output_file)?;

Ok(PermitListResult::DownloadSuccessful(odir.join(&chem_file)))
}
} else {
bail!(
"could not properly parse the permit dictionary obtained from {} = {:?}",
permit_dict_url,
permit_dict
);
}
}

Expand Down
Loading

0 comments on commit 4bc344e

Please sign in to comment.