Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

implment logic to separate RnaChemistry::Other() and CustomChemistry. #173

Merged
merged 5 commits into from
Dec 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 45 additions & 3 deletions resources/chemistries.json
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@
}
},
"10xv2": {
"geometry": "__builtin",
"geometry": "1{b[16]u[10]x:}2{r:}",
"expected_ori": "fw",
"plist_name": "dda0309f511ded5d801081a55c66b9a44cab4edbf0e07a9223f539e248d8e090",
"version": "0.1.0",
Expand All @@ -80,7 +80,7 @@
"remote_url": "https://umd.box.com/shared/static/jbs2wszgbj7k4ic2hass9ts6nhqkwq1p"
},
"10xv3": {
"geometry": "__builtin",
"geometry": "1{b[16]u[12]x:}2{r:}",
"expected_ori": "fw",
"plist_name": "2c9dfb98babe5a57ae763778adb9ebb7bfa531e105823bc26163892089333f8c",
"version": "0.1.0",
Expand All @@ -100,7 +100,7 @@
"remote_url": "https://umd.box.com/shared/static/cbpv1c4zi6ty81nvcgy3pyta2oj7vea1.txt"
},
"10xv4-3p": {
"geometry": "__builtin",
"geometry": "1{b[16]u[12]x:}2{r:}",
"expected_ori": "fw",
"plist_name": "0bfa4a0bea1d636e7ec1908aabb94307c53778185c12fc164da32dd085848131",
"version": "0.1.0",
Expand Down Expand Up @@ -138,5 +138,47 @@
"meta": {
"cr_filename": "737K-arc-v1.txt"
}
},
"dropseq": {
"geometry": "1{b[12]u[8]x:}2{r:}",
"expected_ori": "fw",
"plist_name": null,
"remote_url": null,
"version": "0.1.0"
},
"indropv2": {
"geometry": "1{r:}2{b[8-11]f[GAGTGATTGCTTGTGACGCCTT]b[8]u[6]x:}",
"expected_ori": "fw",
"plist_name": null,
"remote_url": null,
"version": "0.1.0"
},
"celseq2": {
"geometry": "1{u[6]b[6]x:}2{r:}",
"expected_ori": "fw",
"plist_name": null,
"remote_url": null,
"version": "0.1.0"
},
"splitseqv1": {
"geometry": "1{r[66]}2{u[10]b[8]f[CACCGGCTACAAAGCGTAGCCGCATGCTGA]b[8]f[TAGGTGCACGAACTCTCCGGTCTCGTAAGC]b[8]}",
"expected_ori": "fw",
"plist_name": null,
"remote_url": null,
"version": "0.1.0"
},
"splitseqv2": {
"geometry": "1{r[66]}2{u[10]b[8]f[CACCGGCTACAAAGCGTAGCCGCATGCTGA]b[8]f[TAGGTGCACGAACTCTGACACC]b[8]}",
"expected_ori": "fw",
"plist_name": null,
"remote_url": null,
"version": "0.1.0"
},
"sciseq3": {
"geometry": "1{b[9-10]f[GTCTCG]u[8][10]x:}2{r:}",
"expected_ori": "fw",
"plist_name": null,
"remote_url": null,
"version": "0.1.0"
}
}
4 changes: 2 additions & 2 deletions src/simpleaf_commands.rs
Original file line number Diff line number Diff line change
Expand Up @@ -479,7 +479,7 @@ pub struct ChemistryAddOpts {
/// the geometry to which the chemistry maps, wrapped in quotes.
#[arg(short, long)]
pub geometry: String,
/// the expected orientation to give to the chemistry
/// the expected orientation indicating the direction of biological reads to reference sequences.
#[arg(short, long, value_parser = clap::builder::PossibleValuesParser::new(["fw", "rc", "both"]))]
pub expected_ori: String,
/// the (fully-qualified) path to a local file that will be copied into
Expand All @@ -497,7 +497,7 @@ pub struct ChemistryAddOpts {
/// optionally assign a version number to this chemistry. A chemistry's
/// entry can be updated in the future by adding it again with a higher
/// version number.
#[arg(long)]
#[arg(long, default_value = "0.0.0")]
pub version: Option<String>,
}

Expand Down
17 changes: 7 additions & 10 deletions src/simpleaf_commands/chemistry.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use crate::utils::af_utils::*;
use crate::utils::chem_utils::{
custom_chem_hm_to_json, get_custom_chem_hm, get_single_custom_chem_from_file, CustomChemistry,
custom_chem_hm_into_json, get_custom_chem_hm, get_single_custom_chem_from_file, CustomChemistry,
};
use crate::utils::constants::*;
use crate::utils::prog_utils::{self, download_to_file_compute_hash};
Expand Down Expand Up @@ -56,11 +56,8 @@ pub fn add_chemistry(
let chem_p = af_home_path.join(CHEMISTRIES_PATH);

if let Some(existing_entry) = get_single_custom_chem_from_file(&chem_p, &name)? {
let existing_ver_str = existing_entry
.version()
.clone()
.unwrap_or("0.0.0".to_string());
let existing_ver = Version::parse(existing_ver_str.as_ref()).with_context( || format!("could not parse version {} found in existing chemistries.json file. Please correct this entry", existing_ver_str))?;
let existing_ver_str = existing_entry.version();
let existing_ver = Version::parse(existing_ver_str).with_context( || format!("could not parse version {} found in existing chemistries.json file. Please correct this entry", existing_ver_str))?;
if add_ver <= existing_ver {
info!("Attempting to add chemistry with version {:#} which is <= than the existing version ({:#}) for this chemistry. Skipping addition", add_ver, existing_ver);
return Ok(());
Expand Down Expand Up @@ -155,10 +152,10 @@ pub fn add_chemistry(
let custom_chem = CustomChemistry {
name,
geometry,
expected_ori: Some(ExpectedOri::from_str(&add_opts.expected_ori)?),
expected_ori: ExpectedOri::from_str(&add_opts.expected_ori)?,
plist_name: local_plist,
remote_pl_url: add_opts.remote_url,
version: Some(version),
version,
meta: None,
};

Expand All @@ -180,7 +177,7 @@ pub fn add_chemistry(
}

// convert the custom chemistry hashmap to json
let v = custom_chem_hm_to_json(&chem_hm)?;
let v = custom_chem_hm_into_json(chem_hm)?;

// write out the new custom chemistry file
let mut custom_chem_file = std::fs::File::create(&chem_p)
Expand Down Expand Up @@ -385,7 +382,7 @@ pub fn remove_chemistry(
chem_hm.remove(&name);

// convert the custom chemistry hashmap to json
let v = custom_chem_hm_to_json(&chem_hm)?;
let v = custom_chem_hm_into_json(chem_hm)?;

// write out the new custom chemistry file
let mut custom_chem_file = std::fs::File::create(&chem_p)
Expand Down
4 changes: 2 additions & 2 deletions src/simpleaf_commands/inspect.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use crate::atac::commands::AtacChemistry;
use crate::utils::constants::CHEMISTRIES_PATH;
use crate::utils::{
af_utils::RnaChemistry,
chem_utils::{custom_chem_hm_to_json, get_custom_chem_hm},
chem_utils::{custom_chem_hm_into_json, get_custom_chem_hm},
prog_utils::*,
};
use anyhow::Result;
Expand All @@ -19,7 +19,7 @@ pub fn inspect_simpleaf(version: &str, af_home_path: PathBuf) -> Result<()> {
let chem_info_value = if custom_chem_p.is_file() {
// parse the chemistry json file
let custom_chem_hm = get_custom_chem_hm(&custom_chem_p)?;
let v = custom_chem_hm_to_json(&custom_chem_hm)?;
let v = custom_chem_hm_into_json(custom_chem_hm)?;
json!({
"custom_chem_path" : custom_chem_p.display().to_string(),
"custom_geometries" : v
Expand Down
74 changes: 5 additions & 69 deletions src/simpleaf_commands/quant.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use crate::utils::{af_utils::*, chem_utils::*};
use crate::utils::af_utils::*;

use crate::utils::prog_utils;
use crate::utils::prog_utils::{CommandVerbosityLevel, ReqProgs};
Expand Down Expand Up @@ -179,12 +179,6 @@ impl CBListInfo {
}
}

enum IndexType {
Salmon(PathBuf),
Piscem(PathBuf),
NoIndex,
}

fn push_advanced_piscem_options(
piscem_quant_cmd: &mut std::process::Command,
opts: &MapQuantOpts,
Expand Down Expand Up @@ -387,79 +381,21 @@ pub fn map_and_quant(af_home_path: &Path, opts: MapQuantOpts) -> anyhow::Result<
// the chemistries file
let custom_chem_p = af_home_path.join(CHEMISTRIES_PATH);

let chem = match opts.chemistry.as_str() {
"10xv2" => Chemistry::Rna(RnaChemistry::TenxV2),
"10xv2-5p" => Chemistry::Rna(RnaChemistry::TenxV25P),
"10xv3" => Chemistry::Rna(RnaChemistry::TenxV3),
"10xv3-5p" => Chemistry::Rna(RnaChemistry::TenxV35P),
"10xv4-3p" => Chemistry::Rna(RnaChemistry::TenxV43P),
s => {
// we try to extract the single record for the chemistry and ignore the rest
if let Some(chem) =
get_single_custom_chem_from_file(&custom_chem_p, opts.chemistry.as_str())?
{
info!(
"custom chemistry {} maps to geometry {}",
s,
chem.geometry()
);
Chemistry::Custom(chem)
} else {
Chemistry::Custom(CustomChemistry::simple_custom(s).with_context(|| {
format!(
"Could not parse the provided chemistry {}. Please ensure it is a valid chemistry string wrapped by quotes or that it is defined in the custom_chemistries.json file.",
s
)
})?)
}
}
};
let chem = Chemistry::from_str(&index_type, &custom_chem_p, &opts.chemistry)?;

let ori: ExpectedOri;
// if the user set the orientation, then
// use that explicitly
if let Some(o) = opts.expected_ori.clone() {
ori = ExpectedOri::from_str(&o).with_context(|| {
if let Some(o) = &opts.expected_ori {
ori = ExpectedOri::from_str(o).with_context(|| {
format!(
"Could not parse orientation {}. It must be one of the following: {:?}",
o,
ExpectedOri::all_to_str().join(", ")
)
})?;
} else {
// otherwise, this was not set explicitly. In that case
// if we have 10xv2, 10xv3, or 10xv4 (3') chemistry, set ori = "fw"
// if we have 10xv2-5p or 10xv3-5p chemistry, set ori = "fw"
// otherwise set ori = "both"
match &chem {
Chemistry::Rna(RnaChemistry::TenxV2)
| Chemistry::Rna(RnaChemistry::TenxV3)
| Chemistry::Rna(RnaChemistry::TenxV43P) => {
ori = ExpectedOri::Forward;
}
Chemistry::Rna(RnaChemistry::TenxV25P) | Chemistry::Rna(RnaChemistry::TenxV35P) => {
// NOTE: This is because we assume the piscem encoding
// that is, these are treated as potentially paired-end protocols and
// we infer the orientation of the fragment = orientation of read 1.
// So, while the direction we want is the same as the 3' protocols
// above, we separate out the case statement here for clarity.
// Further, we may consider changing this or making it more robust if
// and when we propagate more information about paired-end mappings.
ori = ExpectedOri::Forward;
}
Chemistry::Rna(RnaChemistry::Other(_)) => ori = ExpectedOri::default(),
Chemistry::Custom(cc) => {
// if the custom chemistry has an orientation, use that
if let Some(o) = cc.expected_ori() {
ori = o.clone();
} else {
ori = ExpectedOri::default();
}
}
_ => {
bail!("Encountered non-RNA chemistry in simpleaf quant. This should not happen. Please report this to simpleaf GitHub issues.");
}
}
ori = chem.expected_ori();
}

let mut filter_meth_opt = None;
Expand Down
Loading