Skip to content

Commit

Permalink
Fixed more issues with ontologies
Browse files Browse the repository at this point in the history
  • Loading branch information
douweschulte committed Oct 27, 2023
1 parent 90cbd0f commit 2312e35
Show file tree
Hide file tree
Showing 15 changed files with 134 additions and 90 deletions.
20 changes: 6 additions & 14 deletions src/build/gnome.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use std::{collections::HashMap, ffi::OsString, io::Write, path::Path};

use crate::{build::csv::parse_csv, glycan::*};

use super::{obo::OboOntology, GnoComposition, Modification};
use super::{obo::OboOntology, ontology_modification::OntologyList, GnoComposition, Modification};

pub fn build_gnome_ontology(out_dir: &OsString, debug: bool) {
// Get all the basic info
Expand All @@ -26,21 +26,10 @@ pub fn build_gnome_ontology(out_dir: &OsString, debug: bool) {
let final_mods = mods
.into_values()
.filter(|m| m.mass.is_some())
.take(10)
.map(|m| (0_usize, m.code_name.clone(), m.into_mod()))
.collect::<Vec<_>>();
file.write_all(&bincode::serialize(&final_mods).unwrap())
file.write_all(&bincode::serialize::<OntologyList>(&final_mods).unwrap())
.unwrap();
// let mut writer = BufWriter::new(file);
// writeln!(
// writer,
// "pub const GNOME_ONTOLOGY: &[(usize, &str, Modification)] = &["
// )
// .unwrap();
// for modification in mods.values().filter(|m| m.mass.is_some()) {
// writeln!(writer, "{},", modification.to_code()).unwrap();
// }
// writeln!(writer, "];").unwrap();
}

fn find_mass(mods: &HashMap<String, GNOmeModification>, mut name: String) -> Option<f64> {
Expand Down Expand Up @@ -138,7 +127,10 @@ impl GNOmeModification {
if let Some(structure) = self.structure {
Modification::Gno(GnoComposition::Structure(structure), self.code_name)
} else if let Some(mass) = self.mass {
Modification::Gno(GnoComposition::Mass(mass), self.code_name)
Modification::Gno(
GnoComposition::Mass(crate::system::f64::da(mass)),
self.code_name,
)
} else {
panic!("unreachable")
}
Expand Down
16 changes: 10 additions & 6 deletions src/build/ontology_modification.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,17 @@ pub struct OntologyModification {
}

impl OntologyModification {
pub fn into_mod(self) -> Modification {
Modification::Predefined(
self.diff_formula,
self.rules,
self.ontology,
self.code_name,
pub fn into_mod(self) -> (usize, String, Modification) {
(
self.id,
self.code_name.to_ascii_lowercase(),
Modification::Predefined(
self.diff_formula,
self.rules,
self.ontology,
self.code_name,
self.id,
),
)
}
}
Expand Down
4 changes: 2 additions & 2 deletions src/build/psi_mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use crate::{formula::MolecularFormula, ELEMENT_PARSE_LIST};

use super::{
obo::OboOntology,
ontology_modification::{OntologyModification, PlacementRule, Position},
ontology_modification::{OntologyList, OntologyModification, PlacementRule, Position},
};

pub fn build_psi_mod_ontology(out_dir: &OsString, debug: bool) {
Expand All @@ -13,7 +13,7 @@ pub fn build_psi_mod_ontology(out_dir: &OsString, debug: bool) {
let dest_path = Path::new(&out_dir).join("psimod.dat");
let mut file = std::fs::File::create(dest_path).unwrap();
let final_mods = mods.into_iter().map(|m| m.into_mod()).collect::<Vec<_>>();
file.write_all(&bincode::serialize(&final_mods).unwrap())
file.write_all(&bincode::serialize::<OntologyList>(&final_mods).unwrap())
.unwrap();
}

Expand Down
4 changes: 2 additions & 2 deletions src/build/unimod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use crate::{formula::MolecularFormula, glycan::MonoSaccharide, print, Element};

use super::{
obo::OboOntology,
ontology_modification::{OntologyModification, PlacementRule},
ontology_modification::{OntologyList, OntologyModification, PlacementRule},
};

pub fn build_unimod_ontology(out_dir: &OsString, debug: bool) {
Expand All @@ -15,7 +15,7 @@ pub fn build_unimod_ontology(out_dir: &OsString, debug: bool) {
let dest_path = Path::new(&out_dir).join("unimod.dat");
let mut file = std::fs::File::create(dest_path).unwrap();
let final_mods = mods.into_iter().map(|m| m.into_mod()).collect::<Vec<_>>();
file.write_all(&bincode::serialize(&final_mods).unwrap())
file.write_all(&bincode::serialize::<OntologyList>(&final_mods).unwrap())
.unwrap();
}

Expand Down
29 changes: 25 additions & 4 deletions src/complex_peptide.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
use std::fmt::Display;

use itertools::Itertools;

use crate::{
Expand All @@ -21,6 +23,25 @@ pub enum ComplexPeptide {
Multimeric(Vec<LinearPeptide>),
}

impl Display for ComplexPeptide {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Singular(s) => write!(f, "{s}"),
Self::Multimeric(m) => {
let mut first = true;
for pep in m {
if !first {
write!(f, "+")?;
}
write!(f, "{pep}")?;
first = false;
}
Ok(())
}
}
}
}

impl ComplexPeptide {
/// [Pro Forma specification](https://github.com/HUPO-PSI/ProForma)
/// Only supports a subset of the specification (see `proforma_grammar.md` for an overview of what is supported), some functions are not possible to be represented.
Expand Down Expand Up @@ -134,7 +155,7 @@ impl ComplexPeptide {
}
ch => {
peptide.sequence.push(SequenceElement::new(
ch.try_into().map_err(|_| {
ch.try_into().map_err(|()| {
CustomError::error(
"Invalid amino acid",
"This character is not a valid amino acid",
Expand Down Expand Up @@ -207,7 +228,7 @@ impl ComplexPeptide {
.flat_err()?;
for aa in line[at_index..end_index].split(',') {
global_modifications.push(GlobalModification::Fixed(
aa.try_into().map_err(|_| {
aa.try_into().map_err(|()| {
CustomError::error(
"Invalid global modification",
"The location could not be read as an amino acid",
Expand Down Expand Up @@ -398,7 +419,7 @@ impl ComplexPeptide {
chars[offset+count_len..].iter()
.take_while(|c| c.is_ascii_alphabetic())
.count();
let element: Element = std::str::from_utf8(&chars[offset+count_len..offset+count_len+element_len]).unwrap().try_into().map_err(|_| CustomError::error(
let element: Element = std::str::from_utf8(&chars[offset+count_len..offset+count_len+element_len]).unwrap().try_into().map_err(|()| CustomError::error(
"Invalid adduct ion",
"Invalid element symbol",
Context::line(0, line, offset+count_len, element_len),
Expand Down Expand Up @@ -489,7 +510,7 @@ impl ComplexPeptide {
}
(false, ch) => {
peptide.sequence.push(SequenceElement::new(
ch.try_into().map_err(|_| CustomError::error(
ch.try_into().map_err(|()| CustomError::error(
"Invalid amino acid",
"This character is not a valid amino acid",
Context::line(0, line, index, 1),
Expand Down
4 changes: 2 additions & 2 deletions src/formula.rs
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,7 @@ fn to_subscript_num(input: isize) -> String {
if *c == b'-' {
output.push('\u{208B}');
} else {
output.push(char::from_u32(*c as u32 + 0x2080 - 0x30).unwrap());
output.push(char::from_u32(u32::from(*c) + 0x2080 - 0x30).unwrap());
}
}
output
Expand All @@ -235,7 +235,7 @@ fn to_superscript_num(input: isize) -> String {
} else if *c == b'3' {
output.push('\u{00B3}');
} else {
output.push(char::from_u32(*c as u32 + 0x2070 - 0x30).unwrap());
output.push(char::from_u32(u32::from(*c) + 0x2070 - 0x30).unwrap());
}
}
output
Expand Down
34 changes: 19 additions & 15 deletions src/glycan.rs
Original file line number Diff line number Diff line change
Expand Up @@ -424,24 +424,24 @@ mod test {
assert_eq!(
GlycanStructure::from_str("Hep(Hex)").unwrap(),
GlycanStructure {
sugar: MonoSaccharide::new(BaseSugar::Heptose(None), &[]),
sugar: MonoSaccharide::new(BaseSugar::Heptose(None), &[]).with_name("Hep"),
branches: vec![GlycanStructure {
sugar: MonoSaccharide::new(BaseSugar::Hexose(None), &[]),
sugar: MonoSaccharide::new(BaseSugar::Hexose(None), &[]).with_name("Hex"),
branches: Vec::new()
}],
}
);
assert_eq!(
GlycanStructure::from_str("Hex(Hex,Hep)").unwrap(),
GlycanStructure {
sugar: MonoSaccharide::new(BaseSugar::Hexose(None), &[]),
sugar: MonoSaccharide::new(BaseSugar::Hexose(None), &[]).with_name("Hex"),
branches: vec![
GlycanStructure {
sugar: MonoSaccharide::new(BaseSugar::Hexose(None), &[]),
sugar: MonoSaccharide::new(BaseSugar::Hexose(None), &[]).with_name("Hex"),
branches: Vec::new()
},
GlycanStructure {
sugar: MonoSaccharide::new(BaseSugar::Heptose(None), &[]),
sugar: MonoSaccharide::new(BaseSugar::Heptose(None), &[]).with_name("Hep"),
branches: Vec::new()
}
],
Expand All @@ -450,17 +450,18 @@ mod test {
assert_eq!(
GlycanStructure::from_str("Hex(Hex(Hex),Hep)").unwrap(),
GlycanStructure {
sugar: MonoSaccharide::new(BaseSugar::Hexose(None), &[]),
sugar: MonoSaccharide::new(BaseSugar::Hexose(None), &[]).with_name("Hex"),
branches: vec![
GlycanStructure {
sugar: MonoSaccharide::new(BaseSugar::Hexose(None), &[]),
sugar: MonoSaccharide::new(BaseSugar::Hexose(None), &[]).with_name("Hex"),
branches: vec![GlycanStructure {
sugar: MonoSaccharide::new(BaseSugar::Hexose(None), &[]),
sugar: MonoSaccharide::new(BaseSugar::Hexose(None), &[])
.with_name("Hex"),
branches: Vec::new()
}]
},
GlycanStructure {
sugar: MonoSaccharide::new(BaseSugar::Heptose(None), &[]),
sugar: MonoSaccharide::new(BaseSugar::Heptose(None), &[]).with_name("Hep"),
branches: Vec::new()
}
],
Expand All @@ -469,21 +470,24 @@ mod test {
assert_eq!(
GlycanStructure::from_str("Hep(Hex(Hex(Hex(Hep),Hex)))").unwrap(),
GlycanStructure {
sugar: MonoSaccharide::new(BaseSugar::Heptose(None), &[]),
sugar: MonoSaccharide::new(BaseSugar::Heptose(None), &[]).with_name("Hep"),
branches: vec![GlycanStructure {
sugar: MonoSaccharide::new(BaseSugar::Hexose(None), &[]),
sugar: MonoSaccharide::new(BaseSugar::Hexose(None), &[]).with_name("Hex"),
branches: vec![GlycanStructure {
sugar: MonoSaccharide::new(BaseSugar::Hexose(None), &[]),
sugar: MonoSaccharide::new(BaseSugar::Hexose(None), &[]).with_name("Hex"),
branches: vec![
GlycanStructure {
sugar: MonoSaccharide::new(BaseSugar::Hexose(None), &[]),
sugar: MonoSaccharide::new(BaseSugar::Hexose(None), &[])
.with_name("Hex"),
branches: vec![GlycanStructure {
sugar: MonoSaccharide::new(BaseSugar::Heptose(None), &[]),
sugar: MonoSaccharide::new(BaseSugar::Heptose(None), &[])
.with_name("Hep"),
branches: Vec::new(),
}],
},
GlycanStructure {
sugar: MonoSaccharide::new(BaseSugar::Hexose(None), &[]),
sugar: MonoSaccharide::new(BaseSugar::Hexose(None), &[])
.with_name("Hex"),
branches: Vec::new(),
},
],
Expand Down
57 changes: 33 additions & 24 deletions src/isobaric_sets.rs
Original file line number Diff line number Diff line change
Expand Up @@ -94,14 +94,17 @@ pub fn find_isobaric_sets(
.iter()
.flat_map(|aa| {
let mut options = vec![SequenceElement::new(*aa, None)];
options.extend(modifications.iter().filter_map(|m| {
can_be_placed(m, *aa, 0, 1).then(|| SequenceElement {
aminoacid: *aa,
ambiguous: None,
modifications: vec![m.clone()],
possible_modifications: Vec::new(),
})
}));
options.extend(
modifications
.iter()
.filter(|&m| can_be_placed(m, *aa, 0, 1))
.map(|m| SequenceElement {
aminoacid: *aa,
ambiguous: None,
modifications: vec![m.clone()],
possible_modifications: Vec::new(),
}),
);
options
})
.map(|s| {
Expand All @@ -116,14 +119,17 @@ pub fn find_isobaric_sets(
.iter()
.flat_map(|aa| {
let mut options = vec![SequenceElement::new(*aa, None)];
options.extend(modifications.iter().filter_map(|m| {
can_be_placed(m, *aa, 1, 2).then(|| SequenceElement {
aminoacid: *aa,
ambiguous: None,
modifications: vec![m.clone()],
possible_modifications: Vec::new(),
})
}));
options.extend(
modifications
.iter()
.filter(|&m| can_be_placed(m, *aa, 1, 2))
.map(|m| SequenceElement {
aminoacid: *aa,
ambiguous: None,
modifications: vec![m.clone()],
possible_modifications: Vec::new(),
}),
);
options
})
.map(|s| {
Expand All @@ -138,14 +144,17 @@ pub fn find_isobaric_sets(
.iter()
.flat_map(|aa| {
let mut options = vec![SequenceElement::new(*aa, None)];
options.extend(modifications.iter().filter_map(|m| {
can_be_placed(m, *aa, 1, 1).then(|| SequenceElement {
aminoacid: *aa,
ambiguous: None,
modifications: vec![m.clone()],
possible_modifications: Vec::new(),
})
}));
options.extend(
modifications
.iter()
.filter(|&m| can_be_placed(m, *aa, 1, 1))
.map(|m| SequenceElement {
aminoacid: *aa,
ambiguous: None,
modifications: vec![m.clone()],
possible_modifications: Vec::new(),
}),
);
options
})
.map(|s| {
Expand Down
15 changes: 5 additions & 10 deletions src/linear_peptide.rs
Original file line number Diff line number Diff line change
Expand Up @@ -134,11 +134,8 @@ impl LinearPeptide {
+ self.sequence[index]
.possible_modifications
.iter()
.filter_map(|am| {
ambiguous_local
.contains(&&am.id)
.then(|| am.modification.formula())
})
.filter(|&am| ambiguous_local.contains(&&am.id))
.map(|am| am.modification.formula())
.sum::<MolecularFormula>()
})
.map(|m| {
Expand Down Expand Up @@ -536,11 +533,8 @@ impl SequenceElement {
+ self
.possible_modifications
.iter()
.filter_map(|m| {
selected_ambiguous
.contains(&m.id)
.then(|| m.modification.formula())
})
.filter(|&m| selected_ambiguous.contains(&m.id))
.map(|m| m.modification.formula())
.sum::<MolecularFormula>(),
)
}
Expand All @@ -551,6 +545,7 @@ impl SequenceElement {
if self.aminoacid == AminoAcid::B || self.aminoacid == AminoAcid::Z {
None
} else {
#[allow(clippy::filter_map_bool_then)] // otherwise crashes
Some(
self.aminoacid.formula()
+ self
Expand Down
Loading

0 comments on commit 2312e35

Please sign in to comment.