From 5629d97a98664f240dba2b7115c0e9689aa9055b Mon Sep 17 00:00:00 2001 From: an-altosian Date: Sat, 21 Dec 2024 18:24:20 +0000 Subject: [PATCH 1/4] known chem to RnaChemistry::Other. Custom chem to CustomChemistry --- src/simpleaf_commands.rs | 4 +- src/simpleaf_commands/chemistry.rs | 17 +- src/simpleaf_commands/inspect.rs | 4 +- src/simpleaf_commands/quant.rs | 70 +------ src/utils/af_utils.rs | 111 +++++++++-- src/utils/chem_utils.rs | 295 +++++++++++++++-------------- 6 files changed, 262 insertions(+), 239 deletions(-) diff --git a/src/simpleaf_commands.rs b/src/simpleaf_commands.rs index 84c5d5a..343a316 100644 --- a/src/simpleaf_commands.rs +++ b/src/simpleaf_commands.rs @@ -479,7 +479,7 @@ pub struct ChemistryAddOpts { /// the geometry to which the chemistry maps, wrapped in quotes. #[arg(short, long)] pub geometry: String, - /// the expected orientation to give to the chemistry + /// the expected orientation indicating the direction to the reference sequences. #[arg(short, long, value_parser = clap::builder::PossibleValuesParser::new(["fw", "rc", "both"]))] pub expected_ori: String, /// the (fully-qualified) path to a local file that will be copied into @@ -497,7 +497,7 @@ pub struct ChemistryAddOpts { /// optionally assign a version number to this chemistry. A chemistry's /// entry can be updated in the future by adding it again with a higher /// version number. - #[arg(long)] + #[arg(long, default_value = "0.0.0")] pub version: Option, } diff --git a/src/simpleaf_commands/chemistry.rs b/src/simpleaf_commands/chemistry.rs index ef757ec..4813b2f 100644 --- a/src/simpleaf_commands/chemistry.rs +++ b/src/simpleaf_commands/chemistry.rs @@ -1,6 +1,6 @@ use crate::utils::af_utils::*; use crate::utils::chem_utils::{ - custom_chem_hm_to_json, get_custom_chem_hm, get_single_custom_chem_from_file, CustomChemistry, + custom_chem_hm_into_json, get_custom_chem_hm, get_single_custom_chem_from_file, CustomChemistry, }; use crate::utils::constants::*; use crate::utils::prog_utils::{self, download_to_file_compute_hash}; @@ -56,11 +56,8 @@ pub fn add_chemistry( let chem_p = af_home_path.join(CHEMISTRIES_PATH); if let Some(existing_entry) = get_single_custom_chem_from_file(&chem_p, &name)? { - let existing_ver_str = existing_entry - .version() - .clone() - .unwrap_or("0.0.0".to_string()); - let existing_ver = Version::parse(existing_ver_str.as_ref()).with_context( || format!("could not parse version {} found in existing chemistries.json file. Please correct this entry", existing_ver_str))?; + let existing_ver_str = existing_entry.version(); + let existing_ver = Version::parse(existing_ver_str).with_context( || format!("could not parse version {} found in existing chemistries.json file. Please correct this entry", existing_ver_str))?; if add_ver <= existing_ver { info!("Attempting to add chemistry with version {:#} which is <= than the existing version ({:#}) for this chemistry. Skipping addition", add_ver, existing_ver); return Ok(()); @@ -155,10 +152,10 @@ pub fn add_chemistry( let custom_chem = CustomChemistry { name, geometry, - expected_ori: Some(ExpectedOri::from_str(&add_opts.expected_ori)?), + expected_ori: ExpectedOri::from_str(&add_opts.expected_ori)?, plist_name: local_plist, remote_pl_url: add_opts.remote_url, - version: Some(version), + version, meta: None, }; @@ -180,7 +177,7 @@ pub fn add_chemistry( } // convert the custom chemistry hashmap to json - let v = custom_chem_hm_to_json(&chem_hm)?; + let v = custom_chem_hm_into_json(chem_hm)?; // write out the new custom chemistry file let mut custom_chem_file = std::fs::File::create(&chem_p) @@ -381,7 +378,7 @@ pub fn remove_chemistry( chem_hm.remove(&name); // convert the custom chemistry hashmap to json - let v = custom_chem_hm_to_json(&chem_hm)?; + let v = custom_chem_hm_into_json(chem_hm)?; // write out the new custom chemistry file let mut custom_chem_file = std::fs::File::create(&chem_p) diff --git a/src/simpleaf_commands/inspect.rs b/src/simpleaf_commands/inspect.rs index 03028ef..c5e5988 100644 --- a/src/simpleaf_commands/inspect.rs +++ b/src/simpleaf_commands/inspect.rs @@ -2,7 +2,7 @@ use crate::atac::commands::AtacChemistry; use crate::utils::constants::CHEMISTRIES_PATH; use crate::utils::{ af_utils::RnaChemistry, - chem_utils::{custom_chem_hm_to_json, get_custom_chem_hm}, + chem_utils::{custom_chem_hm_into_json, get_custom_chem_hm}, prog_utils::*, }; use anyhow::Result; @@ -19,7 +19,7 @@ pub fn inspect_simpleaf(version: &str, af_home_path: PathBuf) -> Result<()> { let chem_info_value = if custom_chem_p.is_file() { // parse the chemistry json file let custom_chem_hm = get_custom_chem_hm(&custom_chem_p)?; - let v = custom_chem_hm_to_json(&custom_chem_hm)?; + let v = custom_chem_hm_into_json(custom_chem_hm)?; json!({ "custom_chem_path" : custom_chem_p.display().to_string(), "custom_geometries" : v diff --git a/src/simpleaf_commands/quant.rs b/src/simpleaf_commands/quant.rs index ad0fc67..760fcc4 100644 --- a/src/simpleaf_commands/quant.rs +++ b/src/simpleaf_commands/quant.rs @@ -1,4 +1,4 @@ -use crate::utils::{af_utils::*, chem_utils::*}; +use crate::utils::af_utils::*; use crate::utils::prog_utils; use crate::utils::prog_utils::{CommandVerbosityLevel, ReqProgs}; @@ -179,12 +179,6 @@ impl CBListInfo { } } -enum IndexType { - Salmon(PathBuf), - Piscem(PathBuf), - NoIndex, -} - fn push_advanced_piscem_options( piscem_quant_cmd: &mut std::process::Command, opts: &MapQuantOpts, @@ -387,33 +381,7 @@ pub fn map_and_quant(af_home_path: &Path, opts: MapQuantOpts) -> anyhow::Result< // the chemistries file let custom_chem_p = af_home_path.join(CHEMISTRIES_PATH); - let chem = match opts.chemistry.as_str() { - "10xv2" => Chemistry::Rna(RnaChemistry::TenxV2), - "10xv2-5p" => Chemistry::Rna(RnaChemistry::TenxV25P), - "10xv3" => Chemistry::Rna(RnaChemistry::TenxV3), - "10xv3-5p" => Chemistry::Rna(RnaChemistry::TenxV35P), - "10xv4-3p" => Chemistry::Rna(RnaChemistry::TenxV43P), - s => { - // we try to extract the single record for the chemistry and ignore the rest - if let Some(chem) = - get_single_custom_chem_from_file(&custom_chem_p, opts.chemistry.as_str())? - { - info!( - "custom chemistry {} maps to geometry {}", - s, - chem.geometry() - ); - Chemistry::Custom(chem) - } else { - Chemistry::Custom(CustomChemistry::simple_custom(s).with_context(|| { - format!( - "Could not parse the provided chemistry {}. Please ensure it is a valid chemistry string wrapped by quotes or that it is defined in the custom_chemistries.json file.", - s - ) - })?) - } - } - }; + let chem = Chemistry::from_str(&index_type, &custom_chem_p, &opts.chemistry)?; let ori: ExpectedOri; // if the user set the orientation, then @@ -427,39 +395,7 @@ pub fn map_and_quant(af_home_path: &Path, opts: MapQuantOpts) -> anyhow::Result< ) })?; } else { - // otherwise, this was not set explicitly. In that case - // if we have 10xv2, 10xv3, or 10xv4 (3') chemistry, set ori = "fw" - // if we have 10xv2-5p or 10xv3-5p chemistry, set ori = "fw" - // otherwise set ori = "both" - match &chem { - Chemistry::Rna(RnaChemistry::TenxV2) - | Chemistry::Rna(RnaChemistry::TenxV3) - | Chemistry::Rna(RnaChemistry::TenxV43P) => { - ori = ExpectedOri::Forward; - } - Chemistry::Rna(RnaChemistry::TenxV25P) | Chemistry::Rna(RnaChemistry::TenxV35P) => { - // NOTE: This is because we assume the piscem encoding - // that is, these are treated as potentially paired-end protocols and - // we infer the orientation of the fragment = orientation of read 1. - // So, while the direction we want is the same as the 3' protocols - // above, we separate out the case statement here for clarity. - // Further, we may consider changing this or making it more robust if - // and when we propagate more information about paired-end mappings. - ori = ExpectedOri::Forward; - } - Chemistry::Rna(RnaChemistry::Other(_)) => ori = ExpectedOri::default(), - Chemistry::Custom(cc) => { - // if the custom chemistry has an orientation, use that - if let Some(o) = cc.expected_ori() { - ori = o.clone(); - } else { - ori = ExpectedOri::default(); - } - } - _ => { - bail!("Encountered non-RNA chemistry in simpleaf quant. This should not happen. Please report this to simpleaf GitHub issues."); - } - } + ori = chem.expected_ori(); } let mut filter_meth_opt = None; diff --git a/src/utils/af_utils.rs b/src/utils/af_utils.rs index 3f084df..fe47a38 100644 --- a/src/utils/af_utils.rs +++ b/src/utils/af_utils.rs @@ -53,6 +53,25 @@ static KNOWN_CHEM_MAP_PISCEM: phf::Map<&'static str, &'static str> = phf_map! { "10xv4-3p" => "chromium_v4_3p" }; +pub enum IndexType { + Salmon(PathBuf), + Piscem(PathBuf), + NoIndex, +} + +// Note that It is totally valid, even though +// "MapperType" instead of "IndexType" would be +// a better place to implement this method. +impl IndexType { + pub fn is_known_chem(&self, chem: &str) -> bool { + match self { + IndexType::Salmon(_) => KNOWN_CHEM_MAP_SALMON.contains_key(chem), + IndexType::Piscem(_) => KNOWN_CHEM_MAP_PISCEM.contains_key(chem), + IndexType::NoIndex => false, + } + } +} + /// The types of "mappers" we know about #[derive(Debug, Clone)] pub enum MapperType { @@ -142,19 +161,6 @@ impl QueryInRegistry for Chemistry { } } -/// The builtin geometry types that have special handling to -/// reduce necessary options in the common case, as well as the -/// `Other` variant that covers custom geometries. -#[derive(EnumIter, Clone, PartialEq)] -pub enum RnaChemistry { - TenxV2, - TenxV25P, - TenxV3, - TenxV35P, - TenxV43P, - Other(String), // this will never be used because we have Chemistry::Custom -} - impl QueryInRegistry for RnaChemistry { fn registry_key(&self) -> &str { self.as_str() @@ -178,6 +184,85 @@ impl Chemistry { Chemistry::Custom(custom_chem) => custom_chem.geometry(), } } + + pub fn expected_ori(&self) -> ExpectedOri { + match self { + Chemistry::Custom(custom_chem) => custom_chem.expected_ori().clone(), + Chemistry::Rna(RnaChemistry::TenxV2) + | Chemistry::Rna(RnaChemistry::TenxV3) + | Chemistry::Rna(RnaChemistry::TenxV43P) => ExpectedOri::Forward, + Chemistry::Rna(RnaChemistry::TenxV25P) | Chemistry::Rna(RnaChemistry::TenxV35P) => { + // NOTE: This is because we assume the piscem encoding + // that is, these are treated as potentially paired-end protocols and + // we infer the orientation of the fragment = orientation of read 1. + // So, while the direction we want is the same as the 3' protocols + // above, we separate out the case statement here for clarity. + // Further, we may consider changing this or making it more robust if + // and when we propagate more information about paired-end mappings. + ExpectedOri::Forward + } + _ => ExpectedOri::default(), + } + } + + pub fn from_str( + index_type: &IndexType, + custom_chem_p: &Path, + chem_str: &str, + ) -> Result { + // First, we check if the chemistry is a 10x chem + let chem = match chem_str { + // 10xv2, 10xv3 and 10xv4-3p are valid in both mappers + "10xv2" => Chemistry::Rna(RnaChemistry::TenxV2), + "10xv3" => Chemistry::Rna(RnaChemistry::TenxV3), + "10xv4-3p" => Chemistry::Rna(RnaChemistry::TenxV43P), + // TODO: we want to keep the 10xv2-5p and 10xv3-5p + // only because we want to directly assign their orientation as fw. + // If we think we can retrieve its ori from chemistries.json, delete it + // otherwise, delete the comment. + "10xv3-5p" => Chemistry::Rna(RnaChemistry::TenxV35P), + "10xv2-5p" => Chemistry::Rna(RnaChemistry::TenxV25P), + s => { + // first, we check if the chemistry is a known chemistry for the given mapper + // Second, we check if its a registered custom chemistry + // Third, we check if its a custom chemistry string + if index_type.is_known_chem(s) { + Chemistry::Rna(RnaChemistry::Other(s.to_string())) + } else if let Some(chem) = + get_single_custom_chem_from_file(custom_chem_p, chem_str)? + { + info!( + "custom chemistry {} maps to geometry {}", + s, + chem.geometry() + ); + Chemistry::Custom(chem) + } else { + Chemistry::Custom(CustomChemistry::simple_custom(s).with_context(|| { + format!( + "Could not parse the provided chemistry {}. Please ensure it is a valid chemistry string wrapped by quotes or that it is defined in the custom_chemistries.json file.", + s + ) + })?) + } + } + }; + + Ok(chem) + } +} + +/// The builtin geometry types that have special handling to +/// reduce necessary options in the common case, as well as the +/// `Other` variant that covers custom geometries. +#[derive(EnumIter, Clone, PartialEq)] +pub enum RnaChemistry { + TenxV2, + TenxV25P, + TenxV3, + TenxV35P, + TenxV43P, + Other(String), // this will never be used because we have Chemistry::Custom } impl RnaChemistry { diff --git a/src/utils/chem_utils.rs b/src/utils/chem_utils.rs index f60f187..c697a26 100644 --- a/src/utils/chem_utils.rs +++ b/src/utils/chem_utils.rs @@ -1,11 +1,12 @@ -use crate::utils::af_utils::{parse_resource_json_file, validate_geometry, ExpectedOri}; +use crate::utils::af_utils::{ + extract_geometry, parse_resource_json_file, validate_geometry, ExpectedOri, +}; use crate::utils::constants::*; use anyhow::{anyhow, bail, Context, Result}; use semver::Version; use serde_json::{json, Map, Value}; use std::collections::HashMap; use std::path::Path; -use tracing::info; // TODO: Change to main repo when we are ready @@ -26,8 +27,8 @@ pub trait QueryInRegistry { pub struct CustomChemistry { pub name: String, pub geometry: String, - pub expected_ori: Option, - pub version: Option, + pub expected_ori: ExpectedOri, + pub version: String, pub plist_name: Option, pub remote_pl_url: Option, pub meta: Option, @@ -42,13 +43,12 @@ impl QueryInRegistry for CustomChemistry { #[allow(dead_code)] impl CustomChemistry { pub fn simple_custom(geometry: &str) -> Result { - // TODO: once we ensure the geometry must be a valid geometry, we do validation here - // extract_geometry(geometry)?; + extract_geometry(geometry)?; Ok(CustomChemistry { name: geometry.to_string(), geometry: geometry.to_string(), - expected_ori: None, - version: None, + expected_ori: ExpectedOri::default(), + version: CustomChemistry::default_version(), plist_name: None, remote_pl_url: None, meta: None, @@ -62,11 +62,11 @@ impl CustomChemistry { self.name.as_str() } - pub fn expected_ori(&self) -> &Option { + pub fn expected_ori(&self) -> &ExpectedOri { &self.expected_ori } - pub fn version(&self) -> &Option { + pub fn version(&self) -> &String { &self.version } @@ -82,6 +82,128 @@ impl CustomChemistry { &self.remote_pl_url } } +// IO +impl CustomChemistry { + pub fn into_value(self) -> Value { + let mut value = json!({ + GEOMETRY_KEY: self.geometry + }); + value[EXPECTED_ORI_KEY] = json!(self.expected_ori.to_string()); + value[VERSION_KEY] = json!(self.version); + value[LOCAL_PL_PATH_KEY] = if let Some(lpp) = self.plist_name { + json!(lpp) + } else { + json!(null) + }; + value[REMOTE_PL_URL_KEY] = if let Some(rpu) = self.remote_pl_url { + json!(rpu) + } else { + json!(null) + }; + if let Some(meta) = self.meta { + value[META_KEY] = meta; + } + value + } + + /// Parse the value that corresponds to a key in the top-level custom chemistry JSON object. + /// The key ONLY is used for error messages and assigning the name field of the CustomChemistry struct. + /// The value must be an json value object with a valid geometry field that can be parsed into a CustomChemistry struct. + /// The value corresponding to this key can be either + /// 1. An object having the associated / expected keys + /// 2. A string representing the geometry + /// The second case here is legacy from older versions of simpleaf and deprecated, so we should + /// warn by default when we see it. + pub fn from_value(key: &str, value: &Value) -> Result { + match value { + // deprecated case. Need to warn and return an error + Value::String(record_v) => { + match validate_geometry(record_v) { + Ok(_) => Err(anyhow!( + "Found string version of custom chemistry {}: {}. This is deprecated. Please add the chemistry again using simpleaf chem add.", + key, + record_v + )), + Err(_) => Err(anyhow!( + "Found invalid custom chemistry record for {}: {}", + key, + record_v + )), + } + } + + Value::Object(obj) => { + let geometry = try_get_str_from_json( + GEOMETRY_KEY, obj, + FieldType::Mandatory, + None + )?; + + let geometry = geometry.unwrap(); // we made this Some, safe to unwrap + // check if geometry is valid + validate_geometry(&geometry)?; + + let expected_ori = try_get_str_from_json( + EXPECTED_ORI_KEY, + obj, + FieldType::Optional, + Some(ExpectedOri::default().to_string()), + )? + .unwrap(); // we made this Some, safe to unwrap + + let expected_ori = + ExpectedOri::from_str(&expected_ori) + .with_context(|| { + format!( + "Found invalid {} string for the custom chemistry {}: {}. It should be one of {}", + EXPECTED_ORI_KEY, + key, &expected_ori, + ExpectedOri::all_to_str().join(", ") + ) + })?; + + let version = try_get_str_from_json( + VERSION_KEY, + obj, + FieldType::Optional, + Some(CustomChemistry::default_version()), + )?.unwrap(); // we made this Some, safe to unwrap + + Version::parse(&version).with_context(|| { + format!( + "Found invalid {} string for the custom chemistry {}: {}", + VERSION_KEY, + key, + &version + ) + })?; + + let plist_name = + try_get_str_from_json(LOCAL_PL_PATH_KEY, obj, FieldType::Optional, None)?; + + let remote_pl_url = + try_get_str_from_json(REMOTE_PL_URL_KEY, obj, FieldType::Optional, None)?; + + let meta = obj.get(META_KEY).cloned(); + + Ok(CustomChemistry { + name: key.to_string(), + geometry, + expected_ori, + version, + plist_name, + remote_pl_url, + meta, + }) + } + _ => Err(anyhow!( + "Found invalid custom chemistry record for {}: {}.", + key, + value + )), + } + } +} /// This function gets the custom chemistry from the `af_home_path` directory. /// If the file doesn't exist, it downloads the file from the `url` and saves it @@ -115,7 +237,7 @@ pub fn get_custom_chem_hm_from_value(v: Value) -> Result { - if mandatory == FieldType::Mandatory { + if default.is_none() && mandatory.is_mandatory() { Err(anyhow!( "The mandatory field {} is null in the json object {:#?}", key, @@ -154,145 +276,28 @@ pub fn try_get_str_from_json( } } -/// Takes a key and value from the top-level custom chemistry JSON object, and returns the -/// CustomChemistry struct corresponding to this key. -/// The value corresponding to this key can be either -/// 1. An object having the associated / expected keys -/// 2. A string representing the geometry -/// The second case here is legacy from older versions of simpleaf and deprecated, so we should -/// warn by default when we see it. -pub fn parse_single_custom_chem_from_value(key: &str, value: &Value) -> Result { - match value { - // deprecated case. Need to warn and return an error - Value::String(record_v) => { - match validate_geometry(record_v) { - Ok(_) => Err(anyhow!( - "Found string version of custom chemistry {}: {}. This is deprecated. Please add the chemistry again using simpleaf chem add.", - key, - record_v - )), - Err(_) => Err(anyhow!( - "Found invalid custom chemistry record for {}: {}", - key, - record_v - )), - } - } - - Value::Object(obj) => { - let geometry = try_get_str_from_json(GEOMETRY_KEY, obj, FieldType::Mandatory, None)?; - let geometry = geometry.unwrap(); // we made this Some, safe to unwrap - // check if geometry is valid - validate_geometry(&geometry)?; - - let expected_ori = try_get_str_from_json( - EXPECTED_ORI_KEY, - obj, - FieldType::Optional, - Some(ExpectedOri::default().to_string()), - )? - .unwrap(); // we made this Some, safe to unwrap - - let expected_ori = Some( - ExpectedOri::from_str(&expected_ori) - .with_context(|| { - format!( - "Found invalid {} string for the custom chemistry {}: {}. It should be one of {}", - EXPECTED_ORI_KEY, - key, &expected_ori, - ExpectedOri::all_to_str().join(", ") - ) - })? - ); - - let version = try_get_str_from_json( - VERSION_KEY, - obj, - FieldType::Optional, - Some(CustomChemistry::default_version()), - )?; - if let Some(version) = &version { - Version::parse(version).with_context(|| { - format!( - "Found invalid {} string for the custom chemistry {}: {}", - VERSION_KEY, - key, - &version - ) - })?; - }; - - let plist_name = - try_get_str_from_json(LOCAL_PL_PATH_KEY, obj, FieldType::Optional, None)?; - - let remote_pl_url = - try_get_str_from_json(REMOTE_PL_URL_KEY, obj, FieldType::Optional, None)?; - - let meta = obj.get(META_KEY).cloned(); - - Ok(CustomChemistry { - name: key.to_string(), - geometry, - expected_ori, - version, - plist_name, - remote_pl_url, - meta, - }) - } - _ => Err(anyhow!( - "Found invalid custom chemistry record for {}: {}.", - key, - value - )), - } -} - #[derive(Debug, Clone, PartialEq)] pub(crate) enum FieldType { Mandatory, Optional, } -pub fn custom_chem_hm_to_json(custom_chem_hm: &HashMap) -> Result { +impl FieldType { + pub fn is_mandatory(&self) -> bool { + match self { + FieldType::Mandatory => true, + FieldType::Optional => false, + } + } +} + +pub fn custom_chem_hm_into_json(custom_chem_hm: HashMap) -> Result { // first create the name to geometry mapping let v: Value = custom_chem_hm - .iter() + .into_iter() .map(|(k, v)| { - let mut value = json!({ - GEOMETRY_KEY: v.geometry.clone() - }); - value[EXPECTED_ORI_KEY] = if let Some(eo) = &v.expected_ori { - json!(eo.as_str()) - } else { - info!( - "`expected_ori` is missing for custom chemistry {}; Set as {}", - k, - ExpectedOri::default().as_str() - ); - json!(ExpectedOri::default().as_str()) - }; - value[VERSION_KEY] = if let Some(ver) = &v.version { - json!(ver) - } else { - info!( - "`version` is missing for custom chemistry {}; Set as {}", - k, - CustomChemistry::default_version() - ); - json!(CustomChemistry::default_version()) - }; - value[LOCAL_PL_PATH_KEY] = if let Some(lpp) = &v.plist_name { - json!(lpp) - } else { - json!(null) - }; - value[REMOTE_PL_URL_KEY] = if let Some(rpu) = &v.remote_pl_url { - json!(rpu) - } else { - json!(null) - }; - (k.clone(), value) + let value = v.into_value(); + (k, value) }) .collect(); @@ -302,11 +307,11 @@ pub fn custom_chem_hm_to_json(custom_chem_hm: &HashMap) /// This function tries to extract the custom chemistry with the specified name from the custom_chemistries.json file in the `af_home_path` directory. pub fn get_single_custom_chem_from_file( custom_chem_p: &Path, - chem_name: &str, + key: &str, ) -> Result> { let v: Value = parse_resource_json_file(custom_chem_p, Some(CHEMISTRIES_URL))?; - if let Some(chem_v) = v.get(chem_name) { - let custom_chem = parse_single_custom_chem_from_value(chem_name, chem_v)?; + if let Some(chem_v) = v.get(key) { + let custom_chem = CustomChemistry::from_value(key, chem_v)?; Ok(Some(custom_chem)) } else { Ok(None) From e06c4d9a4cf6e8701027a185a76c9dddf48c9b57 Mon Sep 17 00:00:00 2001 From: an-altosian Date: Sat, 21 Dec 2024 19:13:11 +0000 Subject: [PATCH 2/4] tweak around --- src/simpleaf_commands.rs | 2 +- src/simpleaf_commands/quant.rs | 4 ++-- src/utils/af_utils.rs | 5 +++-- src/utils/chem_utils.rs | 7 +------ 4 files changed, 7 insertions(+), 11 deletions(-) diff --git a/src/simpleaf_commands.rs b/src/simpleaf_commands.rs index 8f1b0e8..f4e5f7b 100644 --- a/src/simpleaf_commands.rs +++ b/src/simpleaf_commands.rs @@ -479,7 +479,7 @@ pub struct ChemistryAddOpts { /// the geometry to which the chemistry maps, wrapped in quotes. #[arg(short, long)] pub geometry: String, - /// the expected orientation indicating the direction to the reference sequences. + /// the expected orientation indicating the direction of biological reads to reference sequences. #[arg(short, long, value_parser = clap::builder::PossibleValuesParser::new(["fw", "rc", "both"]))] pub expected_ori: String, /// the (fully-qualified) path to a local file that will be copied into diff --git a/src/simpleaf_commands/quant.rs b/src/simpleaf_commands/quant.rs index 760fcc4..d01e449 100644 --- a/src/simpleaf_commands/quant.rs +++ b/src/simpleaf_commands/quant.rs @@ -386,8 +386,8 @@ pub fn map_and_quant(af_home_path: &Path, opts: MapQuantOpts) -> anyhow::Result< let ori: ExpectedOri; // if the user set the orientation, then // use that explicitly - if let Some(o) = opts.expected_ori.clone() { - ori = ExpectedOri::from_str(&o).with_context(|| { + if let Some(o) = &opts.expected_ori { + ori = ExpectedOri::from_str(o).with_context(|| { format!( "Could not parse orientation {}. It must be one of the following: {:?}", o, diff --git a/src/utils/af_utils.rs b/src/utils/af_utils.rs index cda2b5e..1c75cb6 100644 --- a/src/utils/af_utils.rs +++ b/src/utils/af_utils.rs @@ -14,7 +14,7 @@ use strum_macros::EnumIter; use tracing::{error, info, warn}; use crate::atac::commands::AtacChemistry; -use crate::utils::chem_utils::CustomChemistry; +use crate::utils::chem_utils::{get_single_custom_chem_from_file, CustomChemistry}; use crate::utils::{self, prog_utils}; use super::chem_utils::{QueryInRegistry, LOCAL_PL_PATH_KEY, REMOTE_PL_URL_KEY}; @@ -201,6 +201,7 @@ impl Chemistry { // and when we propagate more information about paired-end mappings. ExpectedOri::Forward } + Chemistry::Rna(RnaChemistry::Other(_)) => ExpectedOri::default(), _ => ExpectedOri::default(), } } @@ -225,7 +226,7 @@ impl Chemistry { s => { // first, we check if the chemistry is a known chemistry for the given mapper // Second, we check if its a registered custom chemistry - // Third, we check if its a custom chemistry string + // Third, we check if its a custom geometry string if index_type.is_known_chem(s) { Chemistry::Rna(RnaChemistry::Other(s.to_string())) } else if let Some(chem) = diff --git a/src/utils/chem_utils.rs b/src/utils/chem_utils.rs index c697a26..777e309 100644 --- a/src/utils/chem_utils.rs +++ b/src/utils/chem_utils.rs @@ -107,13 +107,8 @@ impl CustomChemistry { } /// Parse the value that corresponds to a key in the top-level custom chemistry JSON object. - /// The key ONLY is used for error messages and assigning the name field of the CustomChemistry struct. + /// The key is ONLY used for error messages and assigning the name field of the CustomChemistry struct. /// The value must be an json value object with a valid geometry field that can be parsed into a CustomChemistry struct. - /// The value corresponding to this key can be either - /// 1. An object having the associated / expected keys - /// 2. A string representing the geometry - /// The second case here is legacy from older versions of simpleaf and deprecated, so we should - /// warn by default when we see it. pub fn from_value(key: &str, value: &Value) -> Result { match value { // deprecated case. Need to warn and return an error From f2b24bbacfe87d27cf88f725533e193fd03abc80 Mon Sep 17 00:00:00 2001 From: an-altosian Date: Sat, 21 Dec 2024 19:53:01 +0000 Subject: [PATCH 3/4] propose chems for known chems in chemistries.json. Need review --- resources/chemistries.json | 55 +++++++++++++++++++++++++++++++++++--- 1 file changed, 52 insertions(+), 3 deletions(-) diff --git a/resources/chemistries.json b/resources/chemistries.json index 7572e6f..e9020c9 100644 --- a/resources/chemistries.json +++ b/resources/chemistries.json @@ -60,7 +60,7 @@ } }, "10xv2": { - "geometry": "__builtin", + "geometry": "1{b[16]u[10]x:}2{r:}", "expected_ori": "fw", "plist_name": "dda0309f511ded5d801081a55c66b9a44cab4edbf0e07a9223f539e248d8e090", "version": "0.1.0", @@ -80,7 +80,7 @@ "remote_url": "https://umd.box.com/shared/static/jbs2wszgbj7k4ic2hass9ts6nhqkwq1p" }, "10xv3": { - "geometry": "__builtin", + "geometry": "1{b[16]u[12]x:}2{r:}", "expected_ori": "fw", "plist_name": "2c9dfb98babe5a57ae763778adb9ebb7bfa531e105823bc26163892089333f8c", "version": "0.1.0", @@ -100,7 +100,7 @@ "remote_url": "https://umd.box.com/shared/static/cbpv1c4zi6ty81nvcgy3pyta2oj7vea1.txt" }, "10xv4-3p": { - "geometry": "__builtin", + "geometry": "1{b[16]u[12]x:}2{r:}", "expected_ori": "fw", "plist_name": "0bfa4a0bea1d636e7ec1908aabb94307c53778185c12fc164da32dd085848131", "version": "0.1.0", @@ -138,5 +138,54 @@ "meta": { "cr_filename": "737K-arc-v1.txt" } + }, + "dropseq": { + "geometry": "1{b[12]u[8]x:}2{r:}", + "expected_ori": "fw", + "plist_name": null, + "remote_url": null, + "version": "0.1.0" + }, + "indropv2": { + "geometry": "1{r:}2{b[8-11]f[GAGTGATTGCTTGTGACGCCTT]+b[8]u[6]x:}", + "expected_ori": "fw", + "plist_name": null, + "remote_url": null, + "version": "0.1.0" + }, + "gemcode": { + "geometry": "__builtin", + "expected_ori": "fw", + "plist_name": null, + "remote_url": null, + "version": "0.1.0" + }, + "celseq2": { + "geometry": "1{b[8]x:}2{r:}", + "expected_ori": "fw", + "plist_name": null, + "remote_url": null, + "version": "0.1.0" + }, + "splitseqv1": { + "geometry": "1{r[66]}2{u[10]b[8]f[CACCGGCTACAAAGCGTAGCCGCATGCTGA]b[8]f[TAGGTGCACGAACTCTCCGGTCTCGTAAGC]b[8]}", + "expected_ori": "fw", + "plist_name": null, + "remote_url": null, + "version": "0.1.0" + }, + "splitseqv2": { + "geometry": "1{r[66]}2{u[10]b[8]f[CACCGGCTACAAAGCGTAGCCGCATGCTGA]b[8]f[TAGGTGCACGAACTCTGACACC]b[8]}", + "expected_ori": "fw", + "plist_name": null, + "remote_url": null, + "version": "0.1.0" + }, + "sciseq3": { + "geometry": "1{b[9-10]f[GTCTCG]u[8][10]x:}2{r:}", + "expected_ori": "fw", + "plist_name": null, + "remote_url": null, + "version": "0.1.0" } } From f3daf2d97744edf3edb2abbccdcf4931ac18222e Mon Sep 17 00:00:00 2001 From: an-altosian Date: Sat, 21 Dec 2024 20:20:54 +0000 Subject: [PATCH 4/4] exclude gemcode and celseq from salmon known chem as af could not handle them --- resources/chemistries.json | 11 ++--------- src/utils/af_utils.rs | 34 ++++++++++++++++++++++++++++++++-- 2 files changed, 34 insertions(+), 11 deletions(-) diff --git a/resources/chemistries.json b/resources/chemistries.json index e9020c9..f5c376b 100644 --- a/resources/chemistries.json +++ b/resources/chemistries.json @@ -147,21 +147,14 @@ "version": "0.1.0" }, "indropv2": { - "geometry": "1{r:}2{b[8-11]f[GAGTGATTGCTTGTGACGCCTT]+b[8]u[6]x:}", - "expected_ori": "fw", - "plist_name": null, - "remote_url": null, - "version": "0.1.0" - }, - "gemcode": { - "geometry": "__builtin", + "geometry": "1{r:}2{b[8-11]f[GAGTGATTGCTTGTGACGCCTT]b[8]u[6]x:}", "expected_ori": "fw", "plist_name": null, "remote_url": null, "version": "0.1.0" }, "celseq2": { - "geometry": "1{b[8]x:}2{r:}", + "geometry": "1{u[6]b[6]x:}2{r:}", "expected_ori": "fw", "plist_name": null, "remote_url": null, diff --git a/src/utils/af_utils.rs b/src/utils/af_utils.rs index 1c75cb6..d8d7a63 100644 --- a/src/utils/af_utils.rs +++ b/src/utils/af_utils.rs @@ -34,8 +34,11 @@ static KNOWN_CHEM_MAP_SALMON: phf::Map<&'static str, &'static str> = phf_map! { "dropseq" => "--dropseq", "indropv2" => "--indropV2", "citeseq" => "--citeseq", - "gemcode" => "--gemcode", - "celseq" => "--celseq", + + // commented out because they are UMI free + // "gemcode" => "--gemcode", + // "celseq" => "--celseq", + "celseq2" => "--celseq2", "splitseqv1" => "--splitseqV1", "splitseqv2" => "--splitseqV2", @@ -53,6 +56,7 @@ static KNOWN_CHEM_MAP_PISCEM: phf::Map<&'static str, &'static str> = phf_map! { "10xv4-3p" => "chromium_v4_3p" }; +#[derive(Debug, Clone, PartialEq)] pub enum IndexType { Salmon(PathBuf), Piscem(PathBuf), @@ -70,6 +74,28 @@ impl IndexType { IndexType::NoIndex => false, } } + pub fn is_unsupported_known_chem(&self, chem: &str) -> bool { + match self { + IndexType::Salmon(_) => KNOWN_CHEM_MAP_PISCEM.contains_key(chem), + IndexType::Piscem(_) => KNOWN_CHEM_MAP_SALMON.contains_key(chem), + IndexType::NoIndex => true, + } + } + pub fn as_str(&self) -> &str { + match self { + IndexType::Salmon(_) => "salmon", + IndexType::Piscem(_) => "piscem", + IndexType::NoIndex => "no_index", + } + } + + pub fn counterpart(&self) -> IndexType { + match self { + IndexType::Salmon(p) => IndexType::Piscem(p.clone()), + IndexType::Piscem(p) => IndexType::Salmon(p.clone()), + IndexType::NoIndex => IndexType::NoIndex, + } + } } /// The types of "mappers" we know about @@ -239,6 +265,10 @@ impl Chemistry { ); Chemistry::Custom(chem) } else { + // we want to bail with an error if the chemistry is not supported + if index_type.is_unsupported_known_chem(s) { + bail!("The chemistry {} is not supported by the given mapper {}. Please switch to {}", s, index_type.as_str(), index_type.counterpart().as_str()); + } Chemistry::Custom(CustomChemistry::simple_custom(s).with_context(|| { format!( "Could not parse the provided chemistry {}. Please ensure it is a valid chemistry string wrapped by quotes or that it is defined in the custom_chemistries.json file.",