diff --git a/.vscode/settings.json b/.vscode/settings.json index 0ed6770..9afbdce 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -53,6 +53,7 @@ "Heptopyranose", "Heptose", "heptoses", + "Hexf", "Hexphosphate", "Hydropathy", "hydroxybutyryl", @@ -88,6 +89,7 @@ "Quant", "rawfile", "repr", + "reroot", "Ribf", "Ribofuranose", "Ribopyranose", @@ -113,4 +115,4 @@ "Xylulose" ], "rust-analyzer.showUnlinkedFileNotification": false -} +} \ No newline at end of file diff --git a/rustyms/src/fragment.rs b/rustyms/src/fragment.rs index 0176ba6..8cb8584 100644 --- a/rustyms/src/fragment.rs +++ b/rustyms/src/fragment.rs @@ -240,19 +240,19 @@ pub enum FragmentType { z(Position), /// z· z·(Position), - /// glycan A fragment + /// glycan A fragment (Never generated) A(GlycanPosition), /// glycan B fragment B(GlycanPosition), - /// glycan C fragment + /// glycan C fragment (Never generated) C(GlycanPosition), - /// glycan X fragment + /// glycan X fragment (Never generated) X(GlycanPosition), - /// glycan Y fragment - Y(GlycanPosition), - /// glycan Z fragment + /// glycan Y fragment, generated by one or more branches broken + Y(Vec), + /// glycan Z fragment (Never generated) Z(GlycanPosition), - /// glycan Z fragment + /// Internal glycan fragment, meaning both a B and Y breakages (and potentially multiple of both), resulting in a set of monosaccharides InternalGlycan(Vec), /// precursor precursor, @@ -279,7 +279,7 @@ impl FragmentType { /// Get the glycan position of this ion (or None nor applicable) pub const fn glycan_position(&self) -> Option<&GlycanPosition> { match self { - Self::A(n) | Self::B(n) | Self::C(n) | Self::X(n) | Self::Y(n) | Self::Z(n) => Some(n), + Self::A(n) | Self::B(n) | Self::C(n) | Self::X(n) | Self::Z(n) => Some(n), _ => None, } } @@ -297,9 +297,8 @@ impl FragmentType { | Self::y(n) | Self::z(n) | Self::z·(n) => Some(n.series_number.to_string()), - Self::A(n) | Self::B(n) | Self::C(n) | Self::X(n) | Self::Y(n) | Self::Z(n) => { - Some(n.label()) - } + Self::A(n) | Self::B(n) | Self::C(n) | Self::X(n) | Self::Z(n) => Some(n.label()), + Self::Y(bonds) => Some(bonds.iter().map(GlycanPosition::label).join("")), Self::InternalGlycan(breakages) => Some( breakages .iter() @@ -355,7 +354,7 @@ impl Display for FragmentType { Self::B(pos) => format!("B{}", pos.label()), Self::C(pos) => format!("C{}", pos.label()), Self::X(pos) => format!("X{}", pos.label()), - Self::Y(pos) => format!("Y{}", pos.label()), + Self::Y(pos) => format!("Y{}", pos.iter().map(GlycanPosition::label).join("")), Self::Z(pos) => format!("Z{}", pos.label()), Self::InternalGlycan(positions) => positions .iter() diff --git a/rustyms/src/fragmentation_tests.rs b/rustyms/src/fragmentation_tests.rs index 548dd92..59fe5ae 100644 --- a/rustyms/src/fragmentation_tests.rs +++ b/rustyms/src/fragmentation_tests.rs @@ -44,6 +44,7 @@ fn triple_a() { .unwrap(), &model, 1.0, + true, ); } @@ -97,6 +98,7 @@ fn with_modifications() { .unwrap(), &model, 1.0, + true, ); } @@ -133,6 +135,7 @@ fn with_possible_modifications() { .unwrap(), &model, 1.0, + true, ); } @@ -175,6 +178,7 @@ fn higher_charges() { .unwrap(), &model, 5.0, + false, ); } @@ -340,6 +344,51 @@ fn all_aminoacids() { .unwrap(), &model, 1.0, + false, + ); +} + +#[test] +fn glycan_fragmentation() { + #[allow(clippy::unreadable_literal)] + let theoretical_fragments = &[ + (4593.06932015166, "N4H5S1"), + (4301.97390015166, "N4H5"), + (4139.92108015166, "N4H4"), + (3977.86826015166, "N4H3"), + (3936.84171015166, "N3H4"), + (3774.78889015166, "N3H3"), + (3612.73607015166, "N3H2"), + (3571.70952015166, "N2H3"), + (3409.65670015166, "N2H2"), + (3247.60388015166, "N2H1"), + (3085.55106015166, "N2"), + (2882.47169015166, "N"), + (2679.39232015166, "Base"), + ]; + let model = Model { + a: (Location::None, vec![]), + b: (Location::None, vec![]), + c: (Location::None, vec![]), + d: (Location::None, vec![]), + v: (Location::None, vec![]), + w: (Location::None, vec![]), + x: (Location::None, vec![]), + y: (Location::None, vec![]), + z: (Location::None, vec![]), + precursor: vec![], + ppm: MassOverCharge::new::(20.0), + glycan_fragmentation: Some(vec![ + // NeutralLoss::Loss(molecular_formula!(H 2 O 1).unwrap()), + // NeutralLoss::Loss(molecular_formula!(H 4 O 2).unwrap()), + ]), + }; + test( + theoretical_fragments, + &LinearPeptide::pro_forma("MVSHHN[GNO:G43728NL]LTTGATLINEQWLLTTAK").unwrap(), + &model, + 1.0, + true, ); } @@ -348,6 +397,7 @@ fn test( peptide: &LinearPeptide, model: &Model, charge: f64, + allow_left_over_generated: bool, ) { let mut calculated_fragments = peptide.generate_theoretical_fragments(Charge::new::(charge), model); @@ -383,9 +433,11 @@ fn test( println!("Not found: {mass} {name}"); } assert_eq!(not_found.len(), 0, "Not all needed fragments are found"); - assert_eq!( - calculated_fragments.len(), - 0, - "Not all generated fragments are accounted for" - ); + if !allow_left_over_generated { + assert_eq!( + calculated_fragments.len(), + 0, + "Not all generated fragments are accounted for" + ); + } } diff --git a/rustyms/src/glycan.rs b/rustyms/src/glycan.rs index a89464e..fc47e3a 100644 --- a/rustyms/src/glycan.rs +++ b/rustyms/src/glycan.rs @@ -281,8 +281,33 @@ impl PositionedGlycanStructure { .as_ref() .map_or(vec![], |neutral_losses| { // Get all base fragments from this node and all its children - let base_fragments = - self.base_theoretical_fragments(peptide_index, full_formula, attachment); + let mut base_fragments = self.oxonium_fragments(peptide_index, attachment); + // Generate all Y fragments + base_fragments.extend( + self.internal_break_points(attachment) + .iter() + .filter(|(_, bonds)| { + bonds.iter().all(|b| !matches!(b, GlycanBreakPos::B(_))) + }) + .flat_map(move |(f, bonds)| { + full_formula.iter().map(move |full| { + Fragment::new( + full - self.formula() + f, + Charge::zero(), + peptide_index, + FragmentType::Y( + bonds + .iter() + .filter(|b| !matches!(b, GlycanBreakPos::End(_))) + .map(GlycanBreakPos::position) + .cloned() + .collect(), + ), + String::new(), + ) + }) + }), + ); // Apply all neutral losses and all charge options let charge_options = charge_carriers.all_charge_options(); base_fragments @@ -294,13 +319,12 @@ impl PositionedGlycanStructure { } /// Generate all fragments without charge and neutral loss options - fn base_theoretical_fragments( + fn oxonium_fragments( &self, peptide_index: usize, - full_formula: &Multi, attachment: (AminoAcid, usize), ) -> Vec { - // Generate the basic single breakage fragments + // Generate the basic single breakage B fragments let mut base_fragments = vec![Fragment::new( self.formula(), Charge::zero(), @@ -313,20 +337,6 @@ impl PositionedGlycanStructure { }), String::new(), )]; - base_fragments.extend(full_formula.iter().map(|f| { - Fragment::new( - f - &self.formula(), - Charge::zero(), - peptide_index, - FragmentType::Y(GlycanPosition { - inner_depth: self.inner_depth, - series_number: self.inner_depth, - branch: self.branch.clone(), - attachment, - }), - String::new(), - ) - })); // Extend with all internal fragments, meaning multiple breaking bonds base_fragments.extend( self.internal_break_points(attachment) @@ -364,13 +374,14 @@ impl PositionedGlycanStructure { ); // Extend with the theoretical fragments for all branches of this position base_fragments.extend( - self.branches.iter().flat_map(|b| { - b.base_theoretical_fragments(peptide_index, full_formula, attachment) - }), + self.branches + .iter() + .flat_map(|b| b.oxonium_fragments(peptide_index, attachment)), ); base_fragments } + /// All possible bonds that can be broken and the molecular formula that would be held over if these bonds all broke and the broken off parts are lost. fn internal_break_points( &self, attachment: (AminoAcid, usize), @@ -438,8 +449,6 @@ impl PositionedGlycanStructure { #[cfg(test)] #[allow(clippy::missing_panics_doc)] mod test { - use crate::Modification; - use super::*; #[test] @@ -538,7 +547,18 @@ mod test { for fragment in &fragments { println!("{fragment}"); } - assert_eq!(fragments.len(), 31); + assert_eq!(fragments.len(), 34); + } + + #[test] + fn correct_masses() { + let (sugar, _) = MonoSaccharide::from_short_iupac("Neu5Ac", 0, 0).unwrap(); + dbg!(&sugar); + + assert_eq!( + sugar.formula(), + molecular_formula!(C 11 H 17 N 1 O 8).unwrap() + ); } #[test] @@ -546,14 +566,14 @@ mod test { // Furanoses added for error detection let structure = GlycanStructure::from_short_iupac( "Neu5Ac(?2-?)Galf(?1-?)GlcNAc(?1-?)Man(?1-?)[Galf(?1-?)GlcNAc(?1-?)Man(?1-?)]Man(?1-?)GlcNAc(?1-?)GlcNAc", - 0..101, + 0..101, 0 ) - .unwrap(); + .unwrap(); assert_eq!( structure.to_string(), - "HexNAc(HexNAc(Hex(Hex(HexNAc(Hexf(NonNdAAc))),Hex(HexNAc(Hexf)))))" + "HexNAc(HexNAc(Hex(Hex(HexNAc(Hexf(NonNAAc))),Hex(HexNAc(Hexf)))))" ); } diff --git a/rustyms/src/shared/glycan.rs b/rustyms/src/shared/glycan.rs index f3d6bb0..076d716 100644 --- a/rustyms/src/shared/glycan.rs +++ b/rustyms/src/shared/glycan.rs @@ -528,11 +528,7 @@ const BASE_SUGARS: &[(&str, BaseSugar, &[GlycanSubstituent])] = &[ ( "Neu", BaseSugar::Nonose, - &[ - GlycanSubstituent::Amino, - GlycanSubstituent::Deoxy, - GlycanSubstituent::Acid, - ], + &[GlycanSubstituent::Amino, GlycanSubstituent::Acid], ), ( "Sia",