Skip to content

Commit

Permalink
Tested out HUPO-PSI/ProForma#8
Browse files Browse the repository at this point in the history
Signed-off-by: Douwe Schulte <d.schulte@uu.nl>
  • Loading branch information
douweschulte committed Apr 11, 2024
1 parent 1c9b22a commit 35a69d7
Show file tree
Hide file tree
Showing 3 changed files with 172 additions and 47 deletions.
192 changes: 149 additions & 43 deletions rustyms/src/complex_peptide.rs
Original file line number Diff line number Diff line change
Expand Up @@ -644,23 +644,26 @@ fn parse_charge_state(
index: usize,
line: &str,
) -> Result<(usize, MolecularCharge), CustomError> {
let (charge_len, charge) = next_num(chars, index + 1, false).ok_or_else(|| {
let (charge_len, total_charge) = next_num(chars, index + 1, false).ok_or_else(|| {
CustomError::error(
"Invalid peptide charge state",
"There should be a number dictating the total charge of the peptide",
Context::line(0, line, index + 1, 1),
)
})?;
if index + 1 + charge_len < chars.len() && chars[index + 1 + charge_len] == b'[' {
let end_index = next_char(chars, index + 2 + charge_len, b']').ok_or_else(|| {
CustomError::error(
"Invalid adduct ion",
"No valid closing delimiter",
Context::line(0, line, index + 2 + charge_len, 1),
)
})?;
let end_index =
end_of_enclosure(chars, index + 2 + charge_len, b'[', b']').ok_or_else(|| {
CustomError::error(
"Invalid adduct ion",
"No valid closing delimiter",
Context::line(0, line, index + 2 + charge_len, 1),
)
})?;
let mut offset = index + 2 + charge_len;
let mut charge_carriers = Vec::new();
let mut found_charge = 0;

for set in chars[index + 2 + charge_len..end_index].split(|c| *c == b',') {
// num
let (count_len, count) = next_num(chars, offset, true).ok_or_else(|| {
Expand All @@ -670,52 +673,75 @@ fn parse_charge_state(
Context::line(0, line, offset, 1),
)
})?;
// element
let element_len = chars[offset + count_len..]
.iter()
.take_while(|c| c.is_ascii_alphabetic())
.count();
let element: Element =
std::str::from_utf8(&chars[offset + count_len..offset + count_len + element_len])
.unwrap()
.try_into()
.map_err(|()| {

// charge
let charge_len = set.iter().rev().take_while(|c| c.is_ascii_digit()).count();
let charge = if charge_len == 0 {
1
} else {
line[offset + set.len() - charge_len..offset + set.len()]
.parse::<i32>()
.map_err(|err| {
CustomError::error(
"Invalid adduct ion",
"Invalid element symbol",
Context::line(0, line, offset + count_len, element_len),
format!("The adduct ion number {err}"),
Context::line(0, line, offset + set.len() - charge_len, charge_len),
)
})?;
// charge
let (_, ion_charge) = next_num(chars, offset + count_len + element_len, true)
.ok_or_else(|| {
CustomError::error(
})?
};
let (charge_len, charge) = match set[set.len() - charge_len - 1] {
b'+' => (charge_len + 1, charge),
b'-' => (charge_len + 1, -charge),
_ => {
return Err(CustomError::error(
"Invalid adduct ion",
"Invalid adduct ion charge",
Context::line(0, line, offset + count_len + element_len, 1),
)
})?;
"The adduct ion number should be preceded by a sign",
Context::line(0, line, offset + set.len() - charge_len - 1, 1),
))
}
};

let formula = MolecularFormula::new(&[
(element, None, 1),
(Element::Electron, None, -ion_charge as i32),
])
.ok_or_else(|| {
CustomError::error(
"Invalid charge carrier formula",
"The given molecular formula contains a part that does not have a defined mass",
Context::line(0, line, index + 2 + charge_len, offset),
)
})?;
// Check for empty formula
if count_len + charge_len == set.len() {
return Err(CustomError::error(
"Invalid adduct ion",
"The adduct ion should have a formula defined",
Context::line(0, line, offset, set.len()),
));
}

charge_carriers.push((count, formula));
// formula
let mut formula = MolecularFormula::from_pro_forma_inner(
line,
offset + count_len..offset + set.len() - charge_len,
)?;
let _ = formula.add((Element::Electron, None, -charge));

// Deduplicate
if let Some((amount, _)) = charge_carriers.iter_mut().find(|(_, f)| *f == formula) {
*amount += count;
} else {
charge_carriers.push((count, formula));
}

offset += set.len() + 1;
found_charge += count * charge as isize;
}
if total_charge == found_charge {
Ok((end_index + 1, MolecularCharge::new(&charge_carriers)))
} else {
Err(CustomError::error(
"Invalid peptide charge state",
"The peptide charge state number has to be equal to the sum of all separate adduct ions",
Context::line(0, line, index, offset),
))
}
Ok((end_index + 1, MolecularCharge::new(&charge_carriers)))
} else {
// If no adduct ions are provided assume it is just protons
Ok((index + charge_len + 1, MolecularCharge::proton(charge)))
Ok((
index + charge_len + 1,
MolecularCharge::proton(total_charge),
))
}
}

Expand Down Expand Up @@ -1035,4 +1061,84 @@ mod tests {
assert!(parse("<[+5]@D,E,R,Te>").is_err());
assert!(parse("<[+5]@D,E,R,N-term:OO>").is_err());
}

#[test]
fn charge_state() {
let parse = |str: &str| {
parse_charge_state(str.as_bytes(), 0, str).map(|(len, res)| {
assert_eq!(
len,
str.len(),
"Not full parsed: '{str}', amount parsed: {len} as '{res}'"
);
res
})
};
assert_eq!(parse("/1"), Ok(MolecularCharge::proton(1)));
assert_eq!(parse("/5"), Ok(MolecularCharge::proton(5)));
assert_eq!(parse("/-5"), Ok(MolecularCharge::proton(-5)));
assert_eq!(parse("/1[+H+]"), Ok(MolecularCharge::proton(1)));
assert_eq!(parse("/2[+H+,+H+]"), Ok(MolecularCharge::proton(2)));
assert_eq!(
parse("/1[+Na+]"),
Ok(MolecularCharge::new(&[(
1,
molecular_formula!(Na 1 Electron -1)
)]))
);
assert_eq!(
parse("/3[2Na+1,1H1+1]"),
Ok(MolecularCharge::new(&[
(2, molecular_formula!(Na 1 Electron -1)),
(1, molecular_formula!(H 1 Electron -1))
]))
);
assert_eq!(
parse("/1[-OH-]"),
Ok(MolecularCharge::new(&[(
-1,
molecular_formula!(O 1 H 1 Electron 1)
),]))
);
assert_eq!(
parse("/1[+N1H3+]"),
Ok(MolecularCharge::new(&[(
1,
molecular_formula!(N 1 H 3 Electron -1)
),]))
);
assert_eq!(
parse("/1[+[15N1]+]"),
Ok(MolecularCharge::new(&[(
1,
molecular_formula!((15)N 1 Electron -1)
),]))
);
assert_eq!(
parse("/3[+Fe+3]"),
Ok(MolecularCharge::new(&[(
1,
molecular_formula!(Fe 1 Electron -3)
),]))
);
assert_eq!(
parse("/3[+ Fe +3]"),
Ok(MolecularCharge::new(&[(
1,
molecular_formula!(Fe 1 Electron -3)
),]))
);
assert!(parse("/3[+Fe+]").is_err());
assert!(parse("/3[+Fe]").is_err());
assert!(parse("/3[+Fe 1]").is_err());
assert!(parse("/3[+[54Fe1+3]").is_err());
assert!(parse("/3[+54Fe1]+3]").is_err());
assert!(parse("/1[1H1-1]").is_err());
assert!(parse("/1[1H1+1").is_err());
assert!(parse("/1[1+1]").is_err());
assert!(parse("/1[H+1]").is_err());
assert!(parse("/1[1H]").is_err());
assert!(parse("/1[1H1]").is_err());
assert!(parse("/ 1 [ 1 H 1]").is_err());
}
}
4 changes: 3 additions & 1 deletion rustyms/src/molecular_charge.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@ use serde::{Deserialize, Serialize};
/// A selection of ions that together define the charge of a peptide
#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Debug, Serialize, Deserialize, Hash)]
pub struct MolecularCharge {
/// The ions that together define the charge of the peptide
/// The ions that together define the charge of the peptide.
/// The first number is the amount of times this adduct ion occurs, the molecular formula is the full formula for the adduct ion.
/// The charge for each ion is saved as the number of electrons missing or gained in the molecular formula.
pub charge_carriers: Vec<(isize, MolecularFormula)>,
}

Expand Down
23 changes: 20 additions & 3 deletions rustyms/src/shared/formula.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ use crate::{
use std::{
hash::Hash,
num::NonZeroU16,
ops::{Add, AddAssign, Mul, Neg, Sub},
ops::{Add, AddAssign, Mul, Neg, RangeBounds, Sub},
};

/// A molecular formula, a selection of elements of specified isotopes together forming a structure
Expand Down Expand Up @@ -81,11 +81,28 @@ impl MolecularFormula {
/// It can panic if the string contains not UTF8 symbols.
#[allow(dead_code)]
pub fn from_pro_forma(value: &str) -> Result<Self, CustomError> {
let mut index = 0;
Self::from_pro_forma_inner(value, ..)
}

/// See [`Self::from_pro_forma`]. This is a variant to help in parsing a part of a larger line.
pub(crate) fn from_pro_forma_inner(
value: &str,
range: impl RangeBounds<usize>,
) -> Result<Self, CustomError> {
let mut index = match range.start_bound() {
std::ops::Bound::Unbounded => 0,
std::ops::Bound::Included(s) => *s,
std::ops::Bound::Excluded(s) => s + 1,
};
let end = match range.end_bound() {
std::ops::Bound::Unbounded => value.len() - 1,
std::ops::Bound::Included(s) => *s,
std::ops::Bound::Excluded(s) => s - 1,
};
let mut element = None;
let bytes = value.as_bytes();
let mut result = Self::default();
while index < value.len() {
while index <= end {
match bytes[index] {
b'[' => {
index += 1; // Skip the open square bracket
Expand Down

0 comments on commit 35a69d7

Please sign in to comment.