Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

FIX https://github.com/MannLabs/alphapeptdeep/issues/119; FIX adding fix mods #127

Merged
merged 1 commit into from
Dec 26, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
83 changes: 47 additions & 36 deletions alphabase/protein/fasta.py
Original file line number Diff line number Diff line change
Expand Up @@ -488,10 +488,16 @@ def parse_labels(labels:list):
cterm_label_mod = label
return label_aas, label_mod_dict, nterm_label_mod, cterm_label_mod

def create_labeling_peptide_df(peptide_df:pd.DataFrame, labels:list):
def create_labeling_peptide_df(
peptide_df:pd.DataFrame, labels:list,
inplace:bool=False
):
if len(peptide_df) == 0: return peptide_df

df = peptide_df.copy()
if inplace:
df = peptide_df
else:
df = peptide_df.copy()

(
label_aas, label_mod_dict,
Expand Down Expand Up @@ -789,12 +795,12 @@ def __init__(self,
self._parse_fix_and_var_mods()

def _parse_fix_and_var_mods(self):
self.fix_mod_aas = ''
self.fix_mod_prot_nterm_dict = {}
self.fix_mod_prot_cterm_dict = {}
self.fix_mod_pep_nterm_dict = {}
self.fix_mod_pep_cterm_dict = {}
self.fix_mod_dict = {}
# self.fix_mod_aas = ''
# self.fix_mod_prot_nterm_dict = {}
# self.fix_mod_prot_cterm_dict = {}
# self.fix_mod_pep_nterm_dict = {}
# self.fix_mod_pep_cterm_dict = {}
# self.fix_mod_dict = {}

def _set_term_mod(term_mod,
prot_nterm, prot_cterm, pep_nterm, pep_cterm,
Expand Down Expand Up @@ -828,19 +834,19 @@ def _set_dict(term_dict,site,mod,
allow_conflicts
)

for mod in self.fix_mods:
if mod.find('@')+2 == len(mod):
self.fix_mod_aas += mod[-1]
self.fix_mod_dict[mod[-1]] = mod
else:
_set_term_mod(
mod,
self.fix_mod_prot_nterm_dict,
self.fix_mod_prot_cterm_dict,
self.fix_mod_pep_nterm_dict,
self.fix_mod_pep_cterm_dict,
allow_conflicts=False
)
# for mod in self.fix_mods:
# if mod.find('@')+2 == len(mod):
# self.fix_mod_aas += mod[-1]
# self.fix_mod_dict[mod[-1]] = mod
# else:
# _set_term_mod(
# mod,
# self.fix_mod_prot_nterm_dict,
# self.fix_mod_prot_cterm_dict,
# self.fix_mod_pep_nterm_dict,
# self.fix_mod_pep_cterm_dict,
# allow_conflicts=False
# )

self.var_mod_aas = ''
self.var_mod_prot_nterm_dict = {}
Expand All @@ -863,7 +869,7 @@ def _set_dict(term_dict,site,mod,
else:
for mod in self.var_mods:
if mod.find('@')+2 == len(mod):
if mod[-1] in self.fix_mod_dict: continue
# if mod[-1] in self.fix_mod_dict: continue
self.var_mod_aas += mod[-1]
self.var_mod_dict[mod[-1]] = mod
get_var_mods_per_sites = get_var_mods_per_sites_single_mod_on_aa
Expand Down Expand Up @@ -1122,18 +1128,18 @@ def add_mods_for_one_seq(self, sequence:str,
list[str]: list of modification names
list[str]: list of modification sites
"""
fix_mods, fix_mod_sites = get_fix_mods(
sequence, self.fix_mod_aas, self.fix_mod_dict
)
#TODO add prot and pep C-term fix mods
#TODO add prot and pep N-term fix mods

if len(fix_mods) == 0:
fix_mods = ['']
fix_mod_sites = ['']
else:
fix_mods = [fix_mods]
fix_mod_sites = [fix_mod_sites]
# fix_mods, fix_mod_sites = get_fix_mods(
# sequence, self.fix_mod_aas, self.fix_mod_dict
# )
# #TODO add prot and pep C-term fix mods
# #TODO add prot and pep N-term fix mods

# if len(fix_mods) == 0:
# fix_mods = ['']
# fix_mod_sites = ['']
# else:
# fix_mods = [fix_mods]
# fix_mod_sites = [fix_mod_sites]

var_mods_list, var_mod_sites_list = get_var_mods(
sequence, self.var_mod_aas, self.var_mod_dict,
Expand All @@ -1160,12 +1166,12 @@ def add_mods_for_one_seq(self, sequence:str,
return (
list(
';'.join([i for i in items if i]) for items in itertools.product(
fix_mods, nterm_var_mods, var_mods_list
nterm_var_mods, var_mods_list
)
),
list(
';'.join([i for i in items if i]) for items in itertools.product(
fix_mod_sites, nterm_var_mod_sites, var_mod_sites_list
nterm_var_mod_sites, var_mod_sites_list
)
),
)
Expand Down Expand Up @@ -1195,6 +1201,11 @@ def add_modifications(self):
self._precursor_df,
['mods','mod_sites']
)
self._precursor_df.dropna(subset=['mods'], inplace=True)
self._precursor_df = create_labeling_peptide_df(
self._precursor_df, self.fix_mods,
inplace=True
)
self._precursor_df.reset_index(drop=True, inplace=True)

def add_special_modifications(self):
Expand Down
26 changes: 13 additions & 13 deletions nbdev_nbs/constants/aa.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -29,7 +29,7 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -38,7 +38,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": null,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -258,7 +258,7 @@
"90 Z C(1000000) 1.200000e+07"
]
},
"execution_count": 3,
"execution_count": null,
"metadata": {},
"output_type": "execute_result"
}
Expand Down Expand Up @@ -293,7 +293,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -314,7 +314,7 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -339,7 +339,7 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -359,7 +359,7 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": null,
"metadata": {},
"outputs": [
{
Expand All @@ -382,7 +382,7 @@
" 453.26996726, 396.24850354, 259.18959168, 146.1055277 ]])}"
]
},
"execution_count": 7,
"execution_count": null,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -404,7 +404,7 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": null,
"metadata": {},
"outputs": [
{
Expand All @@ -424,7 +424,7 @@
" 1.28094963e+02]])"
]
},
"execution_count": 8,
"execution_count": null,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -436,7 +436,7 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -447,7 +447,7 @@
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
Expand Down
Loading