Skip to content

Commit

Permalink
Merge pull request #231 from MannLabs/add_psm_reader_tests
Browse files Browse the repository at this point in the history
Add psm reader tests
  • Loading branch information
mschwoer authored Nov 15, 2024
2 parents 469d6a6 + d7ab5af commit 42646da
Show file tree
Hide file tree
Showing 17 changed files with 246 additions and 34 deletions.
2 changes: 1 addition & 1 deletion alphabase/psm_reader/msfragger_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ def _translate_decoy(self, origin_df=None):
)

self._psm_df.proteins = self._psm_df.proteins.apply(lambda x: ";".join(x))
if not self.keep_decoy:
if not self._keep_decoy:
self._psm_df["to_remove"] += self._psm_df.decoy > 0

def _translate_score(self, origin_df=None):
Expand Down
19 changes: 8 additions & 11 deletions alphabase/psm_reader/psm_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def translate_other_modification(mod_str: str, mod_dict: dict) -> str:
return ";".join(ret_mods), []


def keep_modifications(mod_str: str, mod_set: set) -> str:
def _keep_modifications(mod_str: str, mod_set: set) -> str:
"""
Check if modifications of `mod_str` are in `mod_set`.
Expand Down Expand Up @@ -162,8 +162,8 @@ def __init__(
self._init_column_mapping()

self._psm_df = pd.DataFrame()
self.keep_fdr = fdr
self.keep_decoy = keep_decoy
self._keep_fdr = fdr
self._keep_decoy = keep_decoy
self._min_max_rt_norm = False
self._engine_rt_unit = rt_unit
self._min_irt_value = -100
Expand Down Expand Up @@ -311,7 +311,7 @@ def _translate_score(self, origin_df: pd.DataFrame = None):
def _get_table_delimiter(self, _filename):
return get_delimiter(_filename)

def normalize_rt(self):
def _normalize_rt(self):
if "rt" in self.psm_df.columns:
if self._engine_rt_unit == "second":
# self.psm_df['rt_sec'] = self.psm_df.rt
Expand All @@ -336,14 +336,11 @@ def normalize_rt(self):
(self.psm_df.rt - min_rt) / (max_rt - min_rt)
).clip(0, 1)

def norm_rt(self):
self.normalize_rt()

def normalize_rt_by_raw_name(self):
if "rt" not in self.psm_df.columns:
return
if "rt_norm" not in self.psm_df.columns:
self.norm_rt()
self._normalize_rt()
if "raw_name" not in self.psm_df.columns:
return
for _, df_group in self.psm_df.groupby("raw_name"):
Expand Down Expand Up @@ -490,8 +487,8 @@ def _post_process(self, origin_df: pd.DataFrame):

keep_rows = np.ones(len(self._psm_df), dtype=bool)
if "fdr" in self._psm_df.columns:
keep_rows &= self._psm_df.fdr <= self.keep_fdr
if "decoy" in self._psm_df.columns and not self.keep_decoy:
keep_rows &= self._psm_df.fdr <= self._keep_fdr
if "decoy" in self._psm_df.columns and not self._keep_decoy:
keep_rows &= self._psm_df.decoy == 0

self._psm_df = self._psm_df[keep_rows]
Expand Down Expand Up @@ -528,7 +525,7 @@ def filter_psm_by_modifications(
]
)
self._psm_df.mods = self._psm_df.mods.apply(
keep_modifications, mod_set=include_mod_set
_keep_modifications, mod_set=include_mod_set
)

self._psm_df.dropna(subset=["mods"], inplace=True)
Expand Down
8 changes: 4 additions & 4 deletions alphabase/psm_reader/sage_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -606,12 +606,12 @@ def _transform_table(self, origin_df):
self.psm_df.drop(columns=["scannr"], inplace=True)

def _translate_decoy(self, origin_df):
if not self.keep_decoy:
if not self._keep_decoy:
self._psm_df = self.psm_df[~self.psm_df["decoy"]]

self._psm_df = self.psm_df[self.psm_df["fdr"] <= self.keep_fdr]
self._psm_df = self.psm_df[self.psm_df["peptide_fdr"] <= self.keep_fdr]
self._psm_df = self.psm_df[self.psm_df["protein_fdr"] <= self.keep_fdr]
self._psm_df = self.psm_df[self.psm_df["fdr"] <= self._keep_fdr]
self._psm_df = self.psm_df[self.psm_df["peptide_fdr"] <= self._keep_fdr]
self._psm_df = self.psm_df[self.psm_df["protein_fdr"] <= self._keep_fdr]

# drop peptide_fdr, protein_fdr
self._psm_df.drop(columns=["peptide_fdr", "protein_fdr"], inplace=True)
Expand Down
2 changes: 1 addition & 1 deletion alphabase/spectral_library/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ def _get_fragment_intensity(self, lib_df: pd.DataFrame):
]

# by default, all non-fragment columns are used to group the library
non_fragment_columns = list(set(lib_df.columns) - set(fragment_columns))
non_fragment_columns = sorted(list(set(lib_df.columns) - set(fragment_columns)))

for keys, df_group in tqdm(lib_df.groupby(non_fragment_columns)):
precursor_columns = dict(zip(non_fragment_columns, keys))
Expand Down
26 changes: 12 additions & 14 deletions nbs_tests/psm_reader/psm_reader.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,7 @@
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from alphabase.psm_reader.psm_reader import *"
]
"source": "from alphabase.psm_reader.psm_reader import translate_other_modification, PSMReaderBase, psm_reader_yaml"
},
{
"cell_type": "code",
Expand All @@ -43,11 +41,11 @@
"outputs": [],
"source": [
"#| hide\n",
"assert 'a',[] == translate_other_modification('A', {'A':'a','B':'b'})\n",
"assert 'b',[] == translate_other_modification('B', {'A':'a','B':'b'})\n",
"assert 'a;a',[] == translate_other_modification('A;A', {'A':'a','B':'b'})\n",
"assert 'a;b',[] == translate_other_modification('A;B', {'A':'a','B':'b'})\n",
"assert 'a;b',['X'] == translate_other_modification('A;B;X', {'A':'a','B':'b'})"
"assert 'a', [] == translate_other_modification('A', {'A': 'a', 'B': 'b'})\n",
"assert 'b', [] == translate_other_modification('B', {'A': 'a', 'B': 'b'})\n",
"assert 'a;a', [] == translate_other_modification('A;A', {'A': 'a', 'B': 'b'})\n",
"assert 'a;b', [] == translate_other_modification('A;B', {'A': 'a', 'B': 'b'})\n",
"assert 'a;b', ['X'] == translate_other_modification('A;B;X', {'A': 'a', 'B': 'b'})"
]
},
{
Expand Down Expand Up @@ -106,34 +104,34 @@
" 'Acetyl@Protein_N-term': [\n",
" '_(Acetyl (Protein_N-term))',\n",
" '_(ac)',\n",
" ]\n",
" ],\n",
" 'Carbamidomethyl@C': [\n",
" 'C(Carbamidomethyl (C))',\n",
" ]\n",
" ],\n",
" 'Oxidation@M': [\n",
" 'M(Oxidation (M))',\n",
" 'M(ox)',\n",
" ]\n",
" ],\n",
" 'Phospho@S': [\n",
" 'S(Phospho (S))',\n",
" 'S(Phospho (ST))',\n",
" 'S(Phospho (STY))',\n",
" 'S(ph)',\n",
" 'pS',\n",
" ]\n",
" ],\n",
" 'Phospho@T': [\n",
" 'T(Phospho (T))',\n",
" 'T(Phospho (ST))',\n",
" 'T(Phospho (STY))',\n",
" 'T(ph)',\n",
" 'pT',\n",
" ]\n",
" ],\n",
" 'Phospho@Y': [\n",
" 'Y(Phospho (Y))',\n",
" 'Y(Phospho (STY))',\n",
" 'Y(ph)',\n",
" 'pY',\n",
" ]\n",
" ],\n",
" 'Deamidated@N': ['N(Deamidation (NQ))','N(de)']\n",
" 'Deamidated@Q': ['Q(Deamidation (NQ))','Q(de)']\n",
" 'GlyGly@K': ['K(GlyGly (K))', 'K(gl)']\n",
Expand Down
Empty file added tests/__init__.py
Empty file.
Empty file added tests/integration/__init__.py
Empty file.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading

0 comments on commit 42646da

Please sign in to comment.