Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add psm reader tests #231

Merged
merged 8 commits into from
Nov 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion alphabase/psm_reader/msfragger_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ def _translate_decoy(self, origin_df=None):
)

self._psm_df.proteins = self._psm_df.proteins.apply(lambda x: ";".join(x))
if not self.keep_decoy:
if not self._keep_decoy:
self._psm_df["to_remove"] += self._psm_df.decoy > 0

def _translate_score(self, origin_df=None):
Expand Down
19 changes: 8 additions & 11 deletions alphabase/psm_reader/psm_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def translate_other_modification(mod_str: str, mod_dict: dict) -> str:
return ";".join(ret_mods), []


def keep_modifications(mod_str: str, mod_set: set) -> str:
def _keep_modifications(mod_str: str, mod_set: set) -> str:
"""
Check if modifications of `mod_str` are in `mod_set`.

Expand Down Expand Up @@ -162,8 +162,8 @@ def __init__(
self._init_column_mapping()

self._psm_df = pd.DataFrame()
self.keep_fdr = fdr
self.keep_decoy = keep_decoy
self._keep_fdr = fdr
self._keep_decoy = keep_decoy
self._min_max_rt_norm = False
self._engine_rt_unit = rt_unit
self._min_irt_value = -100
Expand Down Expand Up @@ -311,7 +311,7 @@ def _translate_score(self, origin_df: pd.DataFrame = None):
def _get_table_delimiter(self, _filename):
return get_delimiter(_filename)

def normalize_rt(self):
def _normalize_rt(self):
if "rt" in self.psm_df.columns:
if self._engine_rt_unit == "second":
# self.psm_df['rt_sec'] = self.psm_df.rt
Expand All @@ -336,14 +336,11 @@ def normalize_rt(self):
(self.psm_df.rt - min_rt) / (max_rt - min_rt)
).clip(0, 1)

def norm_rt(self):
self.normalize_rt()

def normalize_rt_by_raw_name(self):
if "rt" not in self.psm_df.columns:
return
if "rt_norm" not in self.psm_df.columns:
self.norm_rt()
self._normalize_rt()
if "raw_name" not in self.psm_df.columns:
return
for _, df_group in self.psm_df.groupby("raw_name"):
Expand Down Expand Up @@ -490,8 +487,8 @@ def _post_process(self, origin_df: pd.DataFrame):

keep_rows = np.ones(len(self._psm_df), dtype=bool)
if "fdr" in self._psm_df.columns:
keep_rows &= self._psm_df.fdr <= self.keep_fdr
if "decoy" in self._psm_df.columns and not self.keep_decoy:
keep_rows &= self._psm_df.fdr <= self._keep_fdr
if "decoy" in self._psm_df.columns and not self._keep_decoy:
keep_rows &= self._psm_df.decoy == 0

self._psm_df = self._psm_df[keep_rows]
Expand Down Expand Up @@ -528,7 +525,7 @@ def filter_psm_by_modifications(
]
)
self._psm_df.mods = self._psm_df.mods.apply(
keep_modifications, mod_set=include_mod_set
_keep_modifications, mod_set=include_mod_set
)

self._psm_df.dropna(subset=["mods"], inplace=True)
Expand Down
8 changes: 4 additions & 4 deletions alphabase/psm_reader/sage_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -606,12 +606,12 @@ def _transform_table(self, origin_df):
self.psm_df.drop(columns=["scannr"], inplace=True)

def _translate_decoy(self, origin_df):
if not self.keep_decoy:
if not self._keep_decoy:
self._psm_df = self.psm_df[~self.psm_df["decoy"]]

self._psm_df = self.psm_df[self.psm_df["fdr"] <= self.keep_fdr]
self._psm_df = self.psm_df[self.psm_df["peptide_fdr"] <= self.keep_fdr]
self._psm_df = self.psm_df[self.psm_df["protein_fdr"] <= self.keep_fdr]
self._psm_df = self.psm_df[self.psm_df["fdr"] <= self._keep_fdr]
self._psm_df = self.psm_df[self.psm_df["peptide_fdr"] <= self._keep_fdr]
self._psm_df = self.psm_df[self.psm_df["protein_fdr"] <= self._keep_fdr]

# drop peptide_fdr, protein_fdr
self._psm_df.drop(columns=["peptide_fdr", "protein_fdr"], inplace=True)
Expand Down
2 changes: 1 addition & 1 deletion alphabase/spectral_library/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ def _get_fragment_intensity(self, lib_df: pd.DataFrame):
]

# by default, all non-fragment columns are used to group the library
non_fragment_columns = list(set(lib_df.columns) - set(fragment_columns))
non_fragment_columns = sorted(list(set(lib_df.columns) - set(fragment_columns)))
mschwoer marked this conversation as resolved.
Show resolved Hide resolved

for keys, df_group in tqdm(lib_df.groupby(non_fragment_columns)):
precursor_columns = dict(zip(non_fragment_columns, keys))
Expand Down
26 changes: 12 additions & 14 deletions nbs_tests/psm_reader/psm_reader.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,7 @@
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from alphabase.psm_reader.psm_reader import *"
]
"source": "from alphabase.psm_reader.psm_reader import translate_other_modification, PSMReaderBase, psm_reader_yaml"
},
{
"cell_type": "code",
Expand All @@ -43,11 +41,11 @@
"outputs": [],
"source": [
"#| hide\n",
"assert 'a',[] == translate_other_modification('A', {'A':'a','B':'b'})\n",
"assert 'b',[] == translate_other_modification('B', {'A':'a','B':'b'})\n",
"assert 'a;a',[] == translate_other_modification('A;A', {'A':'a','B':'b'})\n",
"assert 'a;b',[] == translate_other_modification('A;B', {'A':'a','B':'b'})\n",
"assert 'a;b',['X'] == translate_other_modification('A;B;X', {'A':'a','B':'b'})"
"assert 'a', [] == translate_other_modification('A', {'A': 'a', 'B': 'b'})\n",
"assert 'b', [] == translate_other_modification('B', {'A': 'a', 'B': 'b'})\n",
"assert 'a;a', [] == translate_other_modification('A;A', {'A': 'a', 'B': 'b'})\n",
"assert 'a;b', [] == translate_other_modification('A;B', {'A': 'a', 'B': 'b'})\n",
"assert 'a;b', ['X'] == translate_other_modification('A;B;X', {'A': 'a', 'B': 'b'})"
]
},
{
Expand Down Expand Up @@ -106,34 +104,34 @@
" 'Acetyl@Protein_N-term': [\n",
" '_(Acetyl (Protein_N-term))',\n",
" '_(ac)',\n",
" ]\n",
" ],\n",
" 'Carbamidomethyl@C': [\n",
" 'C(Carbamidomethyl (C))',\n",
" ]\n",
" ],\n",
" 'Oxidation@M': [\n",
" 'M(Oxidation (M))',\n",
" 'M(ox)',\n",
" ]\n",
" ],\n",
" 'Phospho@S': [\n",
" 'S(Phospho (S))',\n",
" 'S(Phospho (ST))',\n",
" 'S(Phospho (STY))',\n",
" 'S(ph)',\n",
" 'pS',\n",
" ]\n",
" ],\n",
" 'Phospho@T': [\n",
" 'T(Phospho (T))',\n",
" 'T(Phospho (ST))',\n",
" 'T(Phospho (STY))',\n",
" 'T(ph)',\n",
" 'pT',\n",
" ]\n",
" ],\n",
" 'Phospho@Y': [\n",
" 'Y(Phospho (Y))',\n",
" 'Y(Phospho (STY))',\n",
" 'Y(ph)',\n",
" 'pY',\n",
" ]\n",
" ],\n",
" 'Deamidated@N': ['N(Deamidation (NQ))','N(de)']\n",
" 'Deamidated@Q': ['Q(Deamidation (NQ))','Q(de)']\n",
" 'GlyGly@K': ['K(GlyGly (K))', 'K(gl)']\n",
Expand Down
Empty file added tests/__init__.py
Empty file.
Empty file added tests/integration/__init__.py
Empty file.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading
Loading