Skip to content

Commit

Permalink
Generate glycan Y fragments with multiple bonds broken
Browse files Browse the repository at this point in the history
  • Loading branch information
douweschulte committed Mar 25, 2024
1 parent 191ecba commit 50f3176
Show file tree
Hide file tree
Showing 5 changed files with 120 additions and 51 deletions.
4 changes: 3 additions & 1 deletion .vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@
"Heptopyranose",
"Heptose",
"heptoses",
"Hexf",
"Hexphosphate",
"Hydropathy",
"hydroxybutyryl",
Expand Down Expand Up @@ -88,6 +89,7 @@
"Quant",
"rawfile",
"repr",
"reroot",
"Ribf",
"Ribofuranose",
"Ribopyranose",
Expand All @@ -113,4 +115,4 @@
"Xylulose"
],
"rust-analyzer.showUnlinkedFileNotification": false
}
}
23 changes: 11 additions & 12 deletions rustyms/src/fragment.rs
Original file line number Diff line number Diff line change
Expand Up @@ -240,19 +240,19 @@ pub enum FragmentType {
z(Position),
/// z·
(Position),
/// glycan A fragment
/// glycan A fragment (Never generated)
A(GlycanPosition),
/// glycan B fragment
B(GlycanPosition),
/// glycan C fragment
/// glycan C fragment (Never generated)
C(GlycanPosition),
/// glycan X fragment
/// glycan X fragment (Never generated)
X(GlycanPosition),
/// glycan Y fragment
Y(GlycanPosition),
/// glycan Z fragment
/// glycan Y fragment, generated by one or more branches broken
Y(Vec<GlycanPosition>),
/// glycan Z fragment (Never generated)
Z(GlycanPosition),
/// glycan Z fragment
/// Internal glycan fragment, meaning both a B and Y breakages (and potentially multiple of both), resulting in a set of monosaccharides
InternalGlycan(Vec<GlycanBreakPos>),
/// precursor
precursor,
Expand All @@ -279,7 +279,7 @@ impl FragmentType {
/// Get the glycan position of this ion (or None nor applicable)
pub const fn glycan_position(&self) -> Option<&GlycanPosition> {
match self {
Self::A(n) | Self::B(n) | Self::C(n) | Self::X(n) | Self::Y(n) | Self::Z(n) => Some(n),
Self::A(n) | Self::B(n) | Self::C(n) | Self::X(n) | Self::Z(n) => Some(n),
_ => None,
}
}
Expand All @@ -297,9 +297,8 @@ impl FragmentType {
| Self::y(n)
| Self::z(n)
| Self::(n) => Some(n.series_number.to_string()),
Self::A(n) | Self::B(n) | Self::C(n) | Self::X(n) | Self::Y(n) | Self::Z(n) => {
Some(n.label())
}
Self::A(n) | Self::B(n) | Self::C(n) | Self::X(n) | Self::Z(n) => Some(n.label()),
Self::Y(bonds) => Some(bonds.iter().map(GlycanPosition::label).join("")),
Self::InternalGlycan(breakages) => Some(
breakages
.iter()
Expand Down Expand Up @@ -355,7 +354,7 @@ impl Display for FragmentType {
Self::B(pos) => format!("B{}", pos.label()),
Self::C(pos) => format!("C{}", pos.label()),
Self::X(pos) => format!("X{}", pos.label()),
Self::Y(pos) => format!("Y{}", pos.label()),
Self::Y(pos) => format!("Y{}", pos.iter().map(GlycanPosition::label).join("")),
Self::Z(pos) => format!("Z{}", pos.label()),
Self::InternalGlycan(positions) => positions
.iter()
Expand Down
62 changes: 57 additions & 5 deletions rustyms/src/fragmentation_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ fn triple_a() {
.unwrap(),
&model,
1.0,
true,
);
}

Expand Down Expand Up @@ -97,6 +98,7 @@ fn with_modifications() {
.unwrap(),
&model,
1.0,
true,
);
}

Expand Down Expand Up @@ -133,6 +135,7 @@ fn with_possible_modifications() {
.unwrap(),
&model,
1.0,
true,
);
}

Expand Down Expand Up @@ -175,6 +178,7 @@ fn higher_charges() {
.unwrap(),
&model,
5.0,
false,
);
}

Expand Down Expand Up @@ -340,6 +344,51 @@ fn all_aminoacids() {
.unwrap(),
&model,
1.0,
false,
);
}

#[test]
fn glycan_fragmentation() {
#[allow(clippy::unreadable_literal)]
let theoretical_fragments = &[
(4593.06932015166, "N4H5S1"),
(4301.97390015166, "N4H5"),
(4139.92108015166, "N4H4"),
(3977.86826015166, "N4H3"),
(3936.84171015166, "N3H4"),
(3774.78889015166, "N3H3"),
(3612.73607015166, "N3H2"),
(3571.70952015166, "N2H3"),
(3409.65670015166, "N2H2"),
(3247.60388015166, "N2H1"),
(3085.55106015166, "N2"),
(2882.47169015166, "N"),
(2679.39232015166, "Base"),
];
let model = Model {
a: (Location::None, vec![]),
b: (Location::None, vec![]),
c: (Location::None, vec![]),
d: (Location::None, vec![]),
v: (Location::None, vec![]),
w: (Location::None, vec![]),
x: (Location::None, vec![]),
y: (Location::None, vec![]),
z: (Location::None, vec![]),
precursor: vec![],
ppm: MassOverCharge::new::<mz>(20.0),
glycan_fragmentation: Some(vec![
// NeutralLoss::Loss(molecular_formula!(H 2 O 1).unwrap()),
// NeutralLoss::Loss(molecular_formula!(H 4 O 2).unwrap()),
]),
};
test(
theoretical_fragments,
&LinearPeptide::pro_forma("MVSHHN[GNO:G43728NL]LTTGATLINEQWLLTTAK").unwrap(),
&model,
1.0,
true,
);
}

Expand All @@ -348,6 +397,7 @@ fn test(
peptide: &LinearPeptide,
model: &Model,
charge: f64,
allow_left_over_generated: bool,
) {
let mut calculated_fragments =
peptide.generate_theoretical_fragments(Charge::new::<e>(charge), model);
Expand Down Expand Up @@ -383,9 +433,11 @@ fn test(
println!("Not found: {mass} {name}");
}
assert_eq!(not_found.len(), 0, "Not all needed fragments are found");
assert_eq!(
calculated_fragments.len(),
0,
"Not all generated fragments are accounted for"
);
if !allow_left_over_generated {
assert_eq!(
calculated_fragments.len(),
0,
"Not all generated fragments are accounted for"
);
}
}
76 changes: 48 additions & 28 deletions rustyms/src/glycan.rs
Original file line number Diff line number Diff line change
Expand Up @@ -281,8 +281,33 @@ impl PositionedGlycanStructure {
.as_ref()
.map_or(vec![], |neutral_losses| {
// Get all base fragments from this node and all its children
let base_fragments =
self.base_theoretical_fragments(peptide_index, full_formula, attachment);
let mut base_fragments = self.oxonium_fragments(peptide_index, attachment);
// Generate all Y fragments
base_fragments.extend(
self.internal_break_points(attachment)
.iter()
.filter(|(_, bonds)| {
bonds.iter().all(|b| !matches!(b, GlycanBreakPos::B(_)))
})
.flat_map(move |(f, bonds)| {
full_formula.iter().map(move |full| {
Fragment::new(
full - self.formula() + f,
Charge::zero(),
peptide_index,
FragmentType::Y(
bonds
.iter()
.filter(|b| !matches!(b, GlycanBreakPos::End(_)))
.map(GlycanBreakPos::position)
.cloned()
.collect(),
),
String::new(),
)
})
}),
);
// Apply all neutral losses and all charge options
let charge_options = charge_carriers.all_charge_options();
base_fragments
Expand All @@ -294,13 +319,12 @@ impl PositionedGlycanStructure {
}

/// Generate all fragments without charge and neutral loss options
fn base_theoretical_fragments(
fn oxonium_fragments(
&self,
peptide_index: usize,
full_formula: &Multi<MolecularFormula>,
attachment: (AminoAcid, usize),
) -> Vec<Fragment> {
// Generate the basic single breakage fragments
// Generate the basic single breakage B fragments
let mut base_fragments = vec![Fragment::new(
self.formula(),
Charge::zero(),
Expand All @@ -313,20 +337,6 @@ impl PositionedGlycanStructure {
}),
String::new(),
)];
base_fragments.extend(full_formula.iter().map(|f| {
Fragment::new(
f - &self.formula(),
Charge::zero(),
peptide_index,
FragmentType::Y(GlycanPosition {
inner_depth: self.inner_depth,
series_number: self.inner_depth,
branch: self.branch.clone(),
attachment,
}),
String::new(),
)
}));
// Extend with all internal fragments, meaning multiple breaking bonds
base_fragments.extend(
self.internal_break_points(attachment)
Expand Down Expand Up @@ -364,13 +374,14 @@ impl PositionedGlycanStructure {
);
// Extend with the theoretical fragments for all branches of this position
base_fragments.extend(
self.branches.iter().flat_map(|b| {
b.base_theoretical_fragments(peptide_index, full_formula, attachment)
}),
self.branches
.iter()
.flat_map(|b| b.oxonium_fragments(peptide_index, attachment)),
);
base_fragments
}

/// All possible bonds that can be broken and the molecular formula that would be held over if these bonds all broke and the broken off parts are lost.
fn internal_break_points(
&self,
attachment: (AminoAcid, usize),
Expand Down Expand Up @@ -438,8 +449,6 @@ impl PositionedGlycanStructure {
#[cfg(test)]
#[allow(clippy::missing_panics_doc)]
mod test {
use crate::Modification;

use super::*;

#[test]
Expand Down Expand Up @@ -538,22 +547,33 @@ mod test {
for fragment in &fragments {
println!("{fragment}");
}
assert_eq!(fragments.len(), 31);
assert_eq!(fragments.len(), 34);
}

#[test]
fn correct_masses() {
let (sugar, _) = MonoSaccharide::from_short_iupac("Neu5Ac", 0, 0).unwrap();
dbg!(&sugar);

assert_eq!(
sugar.formula(),
molecular_formula!(C 11 H 17 N 1 O 8).unwrap()
);
}

#[test]
fn correct_structure_g43728nl() {
// Furanoses added for error detection
let structure = GlycanStructure::from_short_iupac(
"Neu5Ac(?2-?)Galf(?1-?)GlcNAc(?1-?)Man(?1-?)[Galf(?1-?)GlcNAc(?1-?)Man(?1-?)]Man(?1-?)GlcNAc(?1-?)GlcNAc",
0..101,
0..101,
0
)
.unwrap();
.unwrap();

assert_eq!(
structure.to_string(),
"HexNAc(HexNAc(Hex(Hex(HexNAc(Hexf(NonNdAAc))),Hex(HexNAc(Hexf)))))"
"HexNAc(HexNAc(Hex(Hex(HexNAc(Hexf(NonNAAc))),Hex(HexNAc(Hexf)))))"
);
}

Expand Down
6 changes: 1 addition & 5 deletions rustyms/src/shared/glycan.rs
Original file line number Diff line number Diff line change
Expand Up @@ -528,11 +528,7 @@ const BASE_SUGARS: &[(&str, BaseSugar, &[GlycanSubstituent])] = &[
(
"Neu",
BaseSugar::Nonose,
&[
GlycanSubstituent::Amino,
GlycanSubstituent::Deoxy,
GlycanSubstituent::Acid,
],
&[GlycanSubstituent::Amino, GlycanSubstituent::Acid],
),
(
"Sia",
Expand Down

0 comments on commit 50f3176

Please sign in to comment.