Skip to content

Commit

Permalink
Merge pull request #247 from MannLabs/refactor_readers_V
Browse files Browse the repository at this point in the history
Refactor readers v
  • Loading branch information
mschwoer authored Jan 9, 2025
2 parents 3a137d3 + e056315 commit d120c64
Show file tree
Hide file tree
Showing 12 changed files with 453 additions and 238 deletions.
1 change: 0 additions & 1 deletion alphabase/constants/const_files/psm_reader.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,6 @@ library_reader_base:
reader_type: library_reader_base
rt_unit: irt
fixed_C57: False
csv_sep: "\t"
mod_seq_columns:
- 'ModifiedPeptideSequence'
- 'ModifiedPeptide'
Expand Down
5 changes: 1 addition & 4 deletions alphabase/psm_reader/alphapept_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
from alphabase.psm_reader.psm_reader import (
PSMReaderBase,
psm_reader_provider,
psm_reader_yaml,
)


Expand Down Expand Up @@ -53,6 +52,7 @@ class AlphaPeptReader(PSMReaderBase):
"""Reader for AlphaPept's *.ms_data.hdf files."""

_reader_type = "alphapept"
_modification_type = "alphapept"

def __init__(
self,
Expand All @@ -73,9 +73,6 @@ def __init__(
)
self.hdf_dataset = "identifications"

def _init_modification_mapping(self) -> None:
self.modification_mapping = psm_reader_yaml["alphapept"]["modification_mapping"]

def _load_file(self, filename: str) -> pd.DataFrame:
with h5py.File(filename, "r") as _hdf:
dataset = _hdf[self.hdf_dataset]
Expand Down
9 changes: 1 addition & 8 deletions alphabase/psm_reader/maxquant_reader.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
"""Reader for MaxQuant data."""

import copy
import warnings
from typing import List, Optional

Expand All @@ -12,7 +11,6 @@
from alphabase.psm_reader.psm_reader import (
PSMReaderBase,
psm_reader_provider,
psm_reader_yaml,
)

# make sure all warnings are shown
Expand Down Expand Up @@ -128,6 +126,7 @@ class MaxQuantReader(PSMReaderBase):

_reader_type = "maxquant"
_add_unimod_to_mod_mapping = True
_modification_type = "maxquant"

def __init__( # noqa: PLR0913 many arguments in function definition
self,
Expand Down Expand Up @@ -196,12 +195,6 @@ def __init__( # noqa: PLR0913 many arguments in function definition
self._mod_seq_columns = mod_seq_columns
self.mod_seq_column = "Modified sequence"

def _init_modification_mapping(self) -> None:
self.modification_mapping = copy.deepcopy(
# otherwise maxquant reader will modify the dict inplace
psm_reader_yaml["maxquant"]["modification_mapping"]
)

def _translate_decoy(self) -> None:
if PsmDfCols.DECOY in self._psm_df.columns:
self._psm_df[PsmDfCols.DECOY] = (
Expand Down
170 changes: 170 additions & 0 deletions alphabase/psm_reader/modification_mapper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
"""Module to handle modification mappings for different search engines."""

import copy
from collections import defaultdict
from typing import Dict, Optional

from alphabase.psm_reader.utils import MOD_TO_UNIMOD_DICT, get_extended_modifications


class ModificationMapper:
"""Class to handle modification mappings for different search engines."""

def __init__(
self,
custom_modification_mapping: Optional[Dict[str, str]],
reader_yaml: Dict,
modification_type: Optional[str],
*,
add_unimod_to_mod_mapping: bool,
):
"""Initialize the ModificationMapper.
Parameters
----------
custom_modification_mapping:
A custom mapping or a string referencing one of the mappings in the reader_yaml
The key of dict is a modification name in AlphaBase format;
the value could be a str or a list, see below
```
add_modification_mapping({
'Dimethyl@K': ['K(Dimethyl)'], # list
'Dimethyl@Any_N-term': '_(Dimethyl)', # str
})
reader_yaml:
the yaml (read from file) containing the modification mappings
modification_type:
the type of modification mapping ("maxquant" or "alphapept")
add_unimod_to_mod_mapping:
whether unimod modifications should be added to the mapping
"""
self._psm_reader_yaml = reader_yaml
self._add_unimod_to_mod_mapping = add_unimod_to_mod_mapping
self._modification_type = modification_type

self.modification_mapping = None
self.rev_mod_mapping = None
self.set_modification_mapping()
self.add_modification_mapping(custom_modification_mapping)

def add_modification_mapping(self, custom_modification_mapping: dict) -> None:
"""Append additional modification mappings for the search engine.
Also creates a reverse mapping from the modification format used by the search engine to the AlphaBase format.
Parameters
----------
custom_modification_mapping : dict
The key of dict is a modification name in AlphaBase format;
the value could be a str or a list, see below
```
add_modification_mapping({
'Dimethyl@K': ['K(Dimethyl)'], # list
'Dimethyl@Any_N-term': '_(Dimethyl)', # str
})
```
"""
if not isinstance(custom_modification_mapping, dict):
return

new_modification_mapping = defaultdict(list)
for key, val in list(custom_modification_mapping.items()):
if isinstance(val, str):
new_modification_mapping[key].append(val)
else:
new_modification_mapping[key].extend(val)

if new_modification_mapping:
self.set_modification_mapping(
self.modification_mapping | new_modification_mapping
)

def set_modification_mapping(
self, modification_mapping: Optional[Dict] = None
) -> None:
"""Set the modification mapping for the search engine.
Also creates a reverse mapping from the modification format used by the search engine to the AlphaBase format.
Parameters
----------
modification_mapping:
If dictionary: the current modification_mapping will be overwritten by this.
If str: the parameter will be interpreted as a reader type, and the modification_mapping is read from the
"modification_mapping" section of the psm_reader_yaml
"""
if modification_mapping is None:
self._init_modification_mapping()
elif isinstance(
modification_mapping, str
): # TODO: remove this overloading of the parameter by introducing yaml key "modification_mapping_type"
if modification_mapping in self._psm_reader_yaml:
self.modification_mapping = self._psm_reader_yaml[modification_mapping][
"modification_mapping"
]
else:
raise ValueError(
f"Unknown modification mapping: {modification_mapping}"
)
else:
self.modification_mapping = copy.deepcopy(modification_mapping)

self._str_mods_to_lists()

if self._add_unimod_to_mod_mapping:
self._add_all_unimod()
self._extend_mod_brackets()

self.rev_mod_mapping = self._get_reversed_mod_mapping()

def _init_modification_mapping(self) -> None:
"""Initialize the modification mapping from the psm_reader_yaml or as an empty dictionary."""
if self._modification_type is not None:
self.modification_mapping = self._psm_reader_yaml[self._modification_type][
"modification_mapping"
]
else:
self.modification_mapping = {}

def _add_all_unimod(self) -> None:
"""Add all unimod modifications to the modification mapping."""
for mod_name, unimod in MOD_TO_UNIMOD_DICT.items():
if mod_name in self.modification_mapping:
self.modification_mapping[mod_name].append(unimod)
else:
self.modification_mapping[mod_name] = [unimod]

def _extend_mod_brackets(self) -> None:
"""Update modification_mapping to include different bracket types."""
for key, mod_list in list(self.modification_mapping.items()):
self.modification_mapping[key] = get_extended_modifications(mod_list)

def _str_mods_to_lists(self) -> None:
"""Convert all single strings to lists containing one item in self.modification_mapping."""
for mod, val in list(self.modification_mapping.items()):
if isinstance(val, str):
self.modification_mapping[mod] = [val]

def _get_reversed_mod_mapping(self) -> Dict[str, str]:
"""Create a reverse mapping from the modification format used by the search engine to the AlphaBase format."""
rev_mod_mapping = {}
for mod_alphabase_format, mod_other_format in self.modification_mapping.items():
if isinstance(mod_other_format, (list, tuple)):
for mod_other_format_ in mod_other_format:
if (
mod_other_format_ in rev_mod_mapping
and mod_alphabase_format.endswith("Protein_N-term")
):
continue

rev_mod_mapping[mod_other_format_] = mod_alphabase_format
else:
rev_mod_mapping[mod_other_format] = mod_alphabase_format

return rev_mod_mapping
Loading

0 comments on commit d120c64

Please sign in to comment.