Skip to content

Commit

Permalink
Standardised more features of identified peptides
Browse files Browse the repository at this point in the history
  • Loading branch information
douweschulte committed Oct 24, 2024
1 parent 9df74c4 commit c26d0e9
Show file tree
Hide file tree
Showing 8 changed files with 130 additions and 58 deletions.
68 changes: 65 additions & 3 deletions rustyms/src/identification/identified_peptide.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@ use std::path::Path;
use serde::{Deserialize, Serialize};

use super::{
fasta::FastaData, novor::NovorData, opair::OpairData, peaks::PeaksData, MSFraggerData,
MZTabData, MaxQuantData, SageData,
fasta::FastaData, novor::NovorData, opair::OpairData, peaks::PeaksData, system::MassOverCharge,
MSFraggerData, MZTabData, MaxQuantData, SageData,
};
use crate::{
error::CustomError, ontologies::CustomDatabase, peptide::SemiAmbiguous, system::usize::Charge,
Expand Down Expand Up @@ -162,9 +162,71 @@ impl IdentifiedPeptide {
| MetaData::MaxQuant(MaxQuantData { raw_file, .. })
| MetaData::Sage(SageData { raw_file, .. }) => Some(raw_file),
MetaData::MSFragger(MSFraggerData { spectrum, .. }) => Some(&spectrum.file),
MetaData::Novor(_) | MetaData::Fasta(_) | MetaData::None | MetaData::MZTab(_) => None,
MetaData::MZTab(MZTabData { spectra_ref, .. }) => {
spectra_ref.first().map(|r| r.0.as_path()) // TODO: Could contain multiple files
}
MetaData::Novor(_) | MetaData::Fasta(_) | MetaData::None => None,
}
}

/// Get the mz as experimentally determined
pub fn experimental_mz(&self) -> Option<MassOverCharge> {
match &self.metadata {
MetaData::Peaks(PeaksData { mz, .. })
| MetaData::Novor(NovorData { mz, .. })
| MetaData::Opair(OpairData { mz, .. })
| MetaData::MSFragger(MSFraggerData { mz, .. }) => Some(*mz),
MetaData::MZTab(MZTabData { mz, .. }) | MetaData::MaxQuant(MaxQuantData { mz, .. }) => {
*mz
}
MetaData::Sage(SageData {
mass: experimental_mass,
z,
..
}) => Some(MassOverCharge::new::<crate::system::mz>(
experimental_mass.value / (z.value as f64),
)),
MetaData::Fasta(_) | MetaData::None => None,
}
}

/// Get the mass as experimentally determined
pub fn experimental_mass(&self) -> Option<crate::system::Mass> {
match &self.metadata {
MetaData::Peaks(PeaksData { mass, .. })
| MetaData::Novor(NovorData { mass, .. })
| MetaData::Opair(OpairData { mass, .. })
| MetaData::MSFragger(MSFraggerData { mass, .. })
| MetaData::Sage(SageData { mass, .. }) => Some(*mass),
MetaData::MaxQuant(MaxQuantData { mass, .. }) => *mass,
MetaData::MZTab(MZTabData { mz, z, .. }) => mz.map(|mz| mz * z.to_float()),
MetaData::Fasta(_) | MetaData::None => None,
}
}

/// Get the absolute ppm error between the experimental and theoretical precursor mz
pub fn ppm_error(&self) -> Option<crate::system::Ratio> {
let exp_mz = self.experimental_mz()?;
let z = self.charge()?.to_float();
let mass = self
.peptide()
.and_then(|p| p.formulas().to_vec().pop())
.map(|f| f.monoisotopic_mass())?;
let theo_mz = mass / z;

Some(theo_mz.ppm(exp_mz))
}

/// Get the absolute mass error between the experimental and theoretical precursor mass
pub fn mass_error(&self) -> Option<crate::system::Mass> {
let exp_mass = self.experimental_mass()?;
let theo_mass = self
.peptide()
.and_then(|p| p.formulas().to_vec().pop())
.map(|f| f.monoisotopic_mass())?;

Some((exp_mass - theo_mass).abs())
}
}

/// The required methods for any source of identified peptides
Expand Down
12 changes: 1 addition & 11 deletions rustyms/src/identification/maxquant.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ use crate::{
helper_functions::InvertResult,
ontologies::CustomDatabase,
peptide::{SemiAmbiguous, SloppyParsingParameters},
system::{usize::Charge, Mass, MassOverCharge, Ratio, Time},
system::{usize::Charge, Mass, MassOverCharge, Time},
LinearPeptide,
};
use serde::{Deserialize, Serialize};
Expand Down Expand Up @@ -74,8 +74,6 @@ format_family!(
labeling_state: bool, |location: Location, _| location.or_empty().ignore("-1").parse::<u8>(BOOL_ERROR).map(|n| n.map(|n| n != 0));
localisation_probability: f64, |location: Location, _| location.parse::<f64>(NUMBER_ERROR);
mass_analyser: String, |location: Location, _| Ok(location.get_string());
mass_error_da: Mass, |location: Location, _| location.parse::<f64>(NUMBER_ERROR).map(Mass::new::<crate::system::dalton>);
mass_error_ppm: Ratio, |location: Location, _| location.parse::<f64>(NUMBER_ERROR).map(Ratio::new::<crate::system::ratio::ppm>);
mass: Mass, |location: Location, _| location.parse::<f64>(NUMBER_ERROR).map(Mass::new::<crate::system::dalton>);
missed_cleavages: usize, |location: Location, _| location.parse::<usize>(NUMBER_ERROR);
modified_peptide_id:usize, |location: Location, _| location.parse::<usize>(NUMBER_ERROR);
Expand Down Expand Up @@ -170,8 +168,6 @@ pub const MSMS: MaxQuantFormat = MaxQuantFormat {
labeling_state: None,
localisation_probability: Some("localization prob"),
mass_analyser: Some("mass analyzer"),
mass_error_da: Some("mass error [da]"),
mass_error_ppm: Some("mass error [ppm]"),
mass: Some("mass"),
missed_cleavages: Some("missed cleavages"),
modifications: "modifications",
Expand Down Expand Up @@ -235,8 +231,6 @@ pub const MSMS_SCANS: MaxQuantFormat = MaxQuantFormat {
labeling_state: None,
localisation_probability: None,
mass_analyser: Some("mass analyzer"),
mass_error_da: None,
mass_error_ppm: None,
mass: Some("mass"),
missed_cleavages: None,
modifications: "modifications",
Expand Down Expand Up @@ -300,8 +294,6 @@ pub const NOVO_MSMS_SCANS: MaxQuantFormat = MaxQuantFormat {
labeling_state: None,
localisation_probability: None,
mass_analyser: Some("mass analyzer"),
mass_error_da: None,
mass_error_ppm: None,
mass: Some("mass"),
missed_cleavages: None,
modifications: "modifications",
Expand Down Expand Up @@ -365,8 +357,6 @@ pub const SILAC: MaxQuantFormat = MaxQuantFormat {
labeling_state: Some("labeling state"),
localisation_probability: None,
mass_analyser: None,
mass_error_da: Some("mass error [da]"),
mass_error_ppm: Some("mass error [ppm]"),
mass: Some("mass"),
missed_cleavages: None,
modifications: "modifications",
Expand Down
14 changes: 8 additions & 6 deletions rustyms/src/identification/msfragger.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,11 @@ format_family!(
extended_peptide: String, |location: Location, _| Ok(location.get_string());
z: Charge, |location: Location, _| location.parse::<usize>(NUMBER_ERROR).map(Charge::new::<crate::system::e>);
rt: Time, |location: Location, _| location.parse::<f64>(NUMBER_ERROR).map(Time::new::<crate::system::time::s>);
experimental_mass: Mass, |location: Location, _| location.parse::<f64>(NUMBER_ERROR).map(Mass::new::<crate::system::dalton>);
/// Experimental mass
mass: Mass, |location: Location, _| location.parse::<f64>(NUMBER_ERROR).map(Mass::new::<crate::system::dalton>);
calibrated_experimental_mass: Mass, |location: Location, _| location.parse::<f64>(NUMBER_ERROR).map(Mass::new::<crate::system::dalton>);
experimental_mz: MassOverCharge, |location: Location, _| location.parse::<f64>(NUMBER_ERROR).map(MassOverCharge::new::<crate::system::mz>);
/// Experimental mz
mz: MassOverCharge, |location: Location, _| location.parse::<f64>(NUMBER_ERROR).map(MassOverCharge::new::<crate::system::mz>);
calibrated_experimental_mz: MassOverCharge, |location: Location, _| location.parse::<f64>(NUMBER_ERROR).map(MassOverCharge::new::<crate::system::mz>);
expectation: f64, |location: Location, _| location.parse(NUMBER_ERROR);
hyperscore: f64, |location: Location, _| location.parse(NUMBER_ERROR).map(|s: f64| s / 100.0);
Expand Down Expand Up @@ -117,9 +119,9 @@ pub const V21: MSFraggerFormat = MSFraggerFormat {
extended_peptide: "extended peptide",
z: "charge",
rt: "retention",
experimental_mass: "observed mass",
mass: "observed mass",
calibrated_experimental_mass: "calibrated observed mass",
experimental_mz: "observed m/z",
mz: "observed m/z",
calibrated_experimental_mz: "calibrated observed m/z",
expectation: "expectation",
hyperscore: "hyperscore",
Expand Down Expand Up @@ -153,9 +155,9 @@ pub const V22: MSFraggerFormat = MSFraggerFormat {
extended_peptide: "extended peptide",
z: "charge",
rt: "retention",
experimental_mass: "observed mass",
mass: "observed mass",
calibrated_experimental_mass: "calibrated observed mass",
experimental_mz: "observed m/z",
mz: "observed m/z",
calibrated_experimental_mz: "calibrated observed m/z",
expectation: "expectation",
hyperscore: "hyperscore",
Expand Down
46 changes: 38 additions & 8 deletions rustyms/src/identification/mztab.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ use std::{
fs::File,
io::{BufRead, BufReader},
ops::Range,
path::PathBuf,
str::FromStr,
};

Expand Down Expand Up @@ -49,11 +50,11 @@ pub struct MZTabData {
/// The charge for this peptide.
pub z: Charge,
/// The experimental mz
pub exp_mz: Option<MassOverCharge>,
pub mz: Option<MassOverCharge>,
/// A URI pointing to the PSM's entry in the experiment it was identified in (e.g. the peptide’s PRIDE entry).
pub uri: Option<String>,
/// The raw file path, the CV term describing the file format, the spectrum identifer, and the CV term describing the identifier format
pub spectra_ref: Vec<(String, Option<CVTerm>, SpectrumId, Option<CVTerm>)>,
pub spectra_ref: Vec<(PathBuf, Option<CVTerm>, SpectrumId, Option<CVTerm>)>,
/// The amino acide before this peptide
pub preceding_aa: FlankingResidue,
/// The amino acide after this peptide
Expand Down Expand Up @@ -450,7 +451,7 @@ impl MZTabData {
.map(|v| Charge::new::<crate::system::e>(v))?
}
},
exp_mz: line
mz: line
.optional_column("exp_mass_to_charge")
.and_then(|(v, r)| {
(v.to_ascii_lowercase() != "null").then(|| {
Expand Down Expand Up @@ -522,7 +523,7 @@ impl MZTabData {

let id = scan_index.map_or_else(|| SpectrumId::Native(scan_id.to_string()), SpectrumId::Index);

Ok((path, file_format.clone(), id, identifier_type.clone()))
Ok((path.into(), file_format.clone(), id, identifier_type.clone()))
})).collect::<Result<Vec<_>, CustomError>>()?
},
preceding_aa: line.required_column("pre")?.0.parse().map_err(|()| {
Expand Down Expand Up @@ -709,6 +710,15 @@ impl Default for SpectrumId {
}
}

impl std::fmt::Display for SpectrumId {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Index(i) => write!(f, "{i}"),
Self::Native(n) => write!(f, "{n}"),
}
}
}

impl SpectrumId {
/// Get the index if this is an index
pub const fn index(&self) -> Option<usize> {
Expand Down Expand Up @@ -750,6 +760,16 @@ impl std::str::FromStr for FlankingResidue {
}
}

impl std::fmt::Display for FlankingResidue {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Unknown => write!(f, "Unknown"),
Self::Terminal => write!(f, "Terminal"),
Self::AminoAcid(a) => write!(f, "{a}"),
}
}
}

/// A CV term
#[derive(Clone, PartialEq, Eq, Debug, Default, Serialize, Deserialize)]
pub struct CVTerm {
Expand All @@ -771,10 +791,10 @@ impl std::str::FromStr for CVTerm {
let value = &value[1..value.len() - 1];
let mut split = value.splitn(4, ',');
Ok(Self {
ontology: split.next().unwrap_or_default().to_string(),
id: split.next().unwrap_or_default().to_string(),
term: split.next().unwrap_or_default().to_string(),
comment: split.next().unwrap_or_default().to_string(),
ontology: split.next().unwrap_or_default().trim().to_string(),
id: split.next().unwrap_or_default().trim().to_string(),
term: split.next().unwrap_or_default().trim().to_string(),
comment: split.next().unwrap_or_default().trim().to_string(),
})
} else {
Err(CustomError::error(
Expand All @@ -796,6 +816,16 @@ pub enum PSMReliability {
Poor,
}

impl std::fmt::Display for PSMReliability {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::High => write!(f, "High"),
Self::Medium => write!(f, "Medium"),
Self::Poor => write!(f, "Poor"),
}
}
}

/// A basic structure for a mzTab file line
#[allow(clippy::upper_case_acronyms)]
enum MZTabLine {
Expand Down
10 changes: 0 additions & 10 deletions rustyms/src/identification/novor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ format_family!(
mz: MassOverCharge, |location: Location, _| location.parse::<f64>(NUMBER_ERROR).map(MassOverCharge::new::<crate::system::mz>);
z: Charge, |location: Location, _| location.parse::<usize>(NUMBER_ERROR).map(Charge::new::<crate::system::e>);
mass: Mass, |location: Location, _| location.parse::<f64>(NUMBER_ERROR).map(Mass::new::<crate::system::dalton>);
ppm: Ratio, |location: Location, _| location.parse(NUMBER_ERROR).map(Ratio::new::<crate::system::ratio::ppm>);
score: f64, |location: Location, _| location.parse::<f64>(NUMBER_ERROR).map(|f| f / 100.0);
peptide: LinearPeptide<SemiAmbiguous>, |location: Location, custom_database: Option<&CustomDatabase>| LinearPeptide::sloppy_pro_forma(
location.full_line(),
Expand All @@ -48,7 +47,6 @@ format_family!(
}) // Skip the F of the F{num} definition
.parse::<usize>(NUMBER_ERROR);
rt: Time, |location: Location, _| location.parse::<f64>(NUMBER_ERROR).map(Time::new::<crate::system::time::min>);
mass_err: Mass, |location: Location, _| location.parse::<f64>(NUMBER_ERROR).map(Mass::new::<crate::system::dalton>);
length: usize, |location: Location, _| location.parse::<usize>(NUMBER_ERROR);
peptide_no_ptm: String, |location: Location, _| Ok(Some(location.get_string()));
protein: usize, |location: Location, _| location.parse::<usize>(NUMBER_ERROR);
Expand Down Expand Up @@ -118,14 +116,12 @@ pub const OLD_DENOVO: NovorFormat = NovorFormat {
mz: "m/z",
z: "z",
mass: "peptide mass",
ppm: "error (ppm)",
score: "score",
peptide: "de novo peptide",
id: None,
spectra_id: None,
fraction: Some("fraction"),
rt: None,
mass_err: None,
length: Some("length"),
peptide_no_ptm: None,
protein: None,
Expand Down Expand Up @@ -155,14 +151,12 @@ pub const OLD_PSM: NovorFormat = NovorFormat {
mz: "m/z",
z: "z",
mass: "mass",
ppm: "error (ppm)",
score: "score",
peptide: "sequence",
id: Some("id"),
spectra_id: None,
fraction: Some("fraction"),
rt: None,
mass_err: None,
length: None,
peptide_no_ptm: None,
protein: Some("# proteins"),
Expand All @@ -180,14 +174,12 @@ pub const NEW_DENOVO: NovorFormat = NovorFormat {
mz: "mz(data)",
z: "z",
mass: "pepmass(denovo)",
ppm: "ppm(1e6*err/(mz*z))",
score: "score",
peptide: "peptide",
id: Some("# id"),
spectra_id: None,
fraction: Some("fraction"),
rt: None,
mass_err: Some("err(data-denovo)"),
length: None,
peptide_no_ptm: None,
protein: Some("# proteins"),
Expand All @@ -205,14 +197,12 @@ pub const NEW_PSM: NovorFormat = NovorFormat {
mz: "mz",
z: "z",
mass: "pepmass",
ppm: "ppm",
score: "score",
peptide: "peptide",
id: Some("#id"),
spectra_id: None,
fraction: Some("fraction"),
rt: None,
mass_err: Some("err(data-denovo)"),
length: Some("length"),
peptide_no_ptm: Some("noptmpeptide"),
protein: Some("protein"),
Expand Down
Loading

0 comments on commit c26d0e9

Please sign in to comment.