MannLabs · mschwoer · Jul 17, 2024 · Jun 25, 2024 · Jun 25, 2024 · Jun 25, 2024
diff --git a/alphabase/constants/const_files/__used_mod.yaml b/alphabase/constants/const_files/__used_mod.yaml
diff --git a/alphabase/constants/const_files/modification.tsv b/alphabase/constants/const_files/modification.tsv
diff --git a/alphabase/constants/const_files/psm_reader.yaml b/alphabase/constants/const_files/psm_reader.yaml
@@ -19,7 +19,7 @@ alphapept:
     'Phospho@S': 'pS'
     'Phospho@T': 'pT'
     'Phospho@Y': 'pY'
-    'Acetyl@Protein N-term': 'a'
+    'Acetyl@Protein_N-term': 'a'
 
 maxquant:
   reader_type: maxquant
@@ -52,10 +52,10 @@ maxquant:
       - 'K(Dimethyl)'
     'Dimethyl@R':
       - 'R(Dimethyl)'
-    'Dimethyl@Any N-term':
+    'Dimethyl@Any_N-term':
       - '(Dimethyl)'
-    'Acetyl@Protein N-term':
-      - '_(Acetyl (Protein N-term))'
+    'Acetyl@Protein_N-term':
+      - '_(Acetyl (Protein_N-term))'
       - '_(ac)'
     'Carbamidomethyl@C':
       - 'C(Carbamidomethyl (C))'
@@ -124,10 +124,10 @@ msfragger_pepxml:
     - 'Phospho@S' #TY are not needed here
     - 'GlyGly@K'
     - 'Cysteinyl@C'
-    - 'Acetyl@Any N-term'
-    - 'Glu->pyro-Glu@E^Any N-term'
-    - 'Gln->pyro-Glu@Q^Any N-term'
-    - 'Dimethyl@K' # Any N-term is not needed here as it will be infered in-the-fly
+    - 'Acetyl@Any_N-term'
+    - 'Glu->pyro-Glu@E^Any_N-term'
+    - 'Gln->pyro-Glu@Q^Any_N-term'
+    - 'Dimethyl@K' # Any_N-term is not needed here as it will be infered in-the-fly
     - 'Methyl@E' #an example of a PTM that can be C-term
   mod_mass_tol: 0.1 # Da
 

diff --git a/alphabase/constants/modification.py b/alphabase/constants/modification.py
@@ -90,14 +90,18 @@ def load_mod_df(
 ):
     global MOD_DF
     MOD_DF = pd.read_table(tsv, keep_default_na=False)
-    _df = MOD_DF[MOD_DF.mod_name.str.contains(" ", regex=False)].copy()
-    _df["mod_name"] = MOD_DF.mod_name.str.replace(" ", "_", regex=False)
-    MOD_DF = pd.concat([MOD_DF, _df], ignore_index=True).drop_duplicates("mod_name")
+
+    if any(mask := MOD_DF["mod_name"].str.contains(" ", regex=False)):
+        raise ValueError(
+            f"Modification names must not contain spaces: {MOD_DF[mask]['mod_name'].values}"
+        )
+
+    MOD_DF.drop_duplicates("mod_name", inplace=True)
     MOD_DF.fillna("", inplace=True)
-    MOD_DF["unimod_id"] = MOD_DF.unimod_id.astype(np.int32)
+    MOD_DF["unimod_id"] = MOD_DF["unimod_id"].astype(np.int32)
     MOD_DF.set_index("mod_name", drop=False, inplace=True)
-    MOD_DF["mass"] = MOD_DF.composition.apply(calc_mass_from_formula)
-    MOD_DF["modloss_original"] = MOD_DF.modloss_composition.apply(
+    MOD_DF["mass"] = MOD_DF["composition"].apply(calc_mass_from_formula)
+    MOD_DF["modloss_original"] = MOD_DF["modloss_composition"].apply(
         calc_mass_from_formula
     )
     MOD_DF["modloss"] = MOD_DF["modloss_original"]

diff --git a/alphabase/peptide/precursor.py b/alphabase/peptide/precursor.py
@@ -296,7 +296,7 @@ def hash_precursor_df(precursor_df: pd.DataFrame, *, seed: int = 0) -> pd.DataFr
 
 def get_mod_seq_formula(seq: str, mods: str) -> list:
     """
-    'PEPTIDE','Acetyl@Any N-term' --> [('C',n), ('H',m), ...]
+    'PEPTIDE','Acetyl@Any_N-term' --> [('C',n), ('H',m), ...]
     """
     formula = {}
     for aa in seq:

diff --git a/alphabase/protein/fasta.py b/alphabase/protein/fasta.py
@@ -468,9 +468,9 @@ def parse_labels(labels: list):
         if len(aa) == 1:
             label_aas += aa
             label_mod_dict[aa] = label
-        elif aa == "Any N-term" or aa == "Any_N-term":
+        elif aa == "Any_N-term":
             nterm_label_mod = label
-        elif aa == "Any C-term" or aa == "Any_C-term":
+        elif aa == "Any_C-term":
             cterm_label_mod = label
     return label_aas, label_mod_dict, nterm_label_mod, cterm_label_mod
 
@@ -815,13 +815,13 @@ def _set_dict(term_dict, site, mod, allow_conflicts):
                     term_dict[site] = term_mod
 
             site, term = parse_term_mod(term_mod)
-            if term == "Any N-term" or term == "Any_N-term":
+            if term == "Any_N-term":
                 _set_dict(pep_nterm, site, term_mod, allow_conflicts)
-            elif term == "Protein N-term" or term == "Protein_N-term":
+            elif term == "Protein_N-term":
                 _set_dict(prot_nterm, site, term_mod, allow_conflicts)
-            elif term == "Any C-term" or term == "Any_C-term":
+            elif term == "Any_C-term":
                 _set_dict(pep_cterm, site, term_mod, allow_conflicts)
-            elif term == "Protein C-term" or term == "Protein_C-term":
+            elif term == "Protein_C-term":
                 _set_dict(prot_cterm, site, term_mod, allow_conflicts)
 
         # for mod in self.fix_mods:

diff --git a/alphabase/psm_reader/msfragger_reader.py b/alphabase/psm_reader/msfragger_reader.py
@@ -46,9 +46,9 @@ def _get_mods_from_masses(sequence, msf_aa_mods):
         for mod_name in mass_mapped_mods:
             if abs(mod_mass - MOD_MASS[mod_name]) < mod_mass_tol:
                 if site == 0:
-                    _mod = mod_name.split("@")[0] + "@Any N-term"
+                    _mod = mod_name.split("@")[0] + "@Any_N-term"
                 elif site == 1:
-                    if mod_name.endswith("^Any N-term"):
+                    if mod_name.endswith("^Any_N-term"):
                         _mod = mod_name
                         site_str = "0"
                     else:
@@ -58,7 +58,7 @@ def _get_mods_from_masses(sequence, msf_aa_mods):
                         _mod = mod_name
                     else:
                         _mod = (
-                            mod_name.split("@")[0] + "@Any C-term"
+                            mod_name.split("@")[0] + "@Any_C-term"
                         )  # what if only Protein C-term is listed?
                     site_str = "-1"
                 else:

diff --git a/alphabase/psm_reader/pfind_reader.py b/alphabase/psm_reader/pfind_reader.py
@@ -23,21 +23,21 @@ def convert_one_pFind_mod(mod):
     if len(site) == 1:
         return name + "@" + site
     elif site == "AnyN-term":
-        return name + "@" + "Any N-term"
+        return name + "@" + "Any_N-term"
     elif site == "ProteinN-term":
-        return name + "@" + "Protein N-term"
+        return name + "@" + "Protein_N-term"
     elif site.startswith("AnyN-term"):
-        return name + "@" + site[-1] + "^Any N-term"
+        return name + "@" + site[-1] + "^Any_N-term"
     elif site.startswith("ProteinN-term"):
-        return name + "@" + site[-1] + "^Protein N-term"
+        return name + "@" + site[-1] + "^Protein_N-term"
     elif site == "AnyC-term":
-        return name + "@" + "Any C-term"
+        return name + "@" + "Any_C-term"
     elif site == "ProteinC-term":
-        return name + "@" + "Protein C-term"
+        return name + "@" + "Protein_C-term"
     elif site.startswith("AnyC-term"):
-        return name + "@" + site[-1] + "^Any C-term"
+        return name + "@" + site[-1] + "^Any_C-term"
     elif site.startswith("ProteinC-term"):
-        return name + "@" + site[-1] + "^Protein C-term"
+        return name + "@" + site[-1] + "^Protein_C-term"
     else:
         return None
 

diff --git a/alphabase/psm_reader/psm_reader.py b/alphabase/psm_reader/psm_reader.py
@@ -185,7 +185,7 @@ def add_modification_mapping(self, modification_mapping: dict):
             ```
             add_modification_mapping({
             'Dimethyl@K': ['K(Dimethyl)'], # list
-            'Dimethyl@Any N-term': '_(Dimethyl)', # str
+            'Dimethyl@Any_N-term': '_(Dimethyl)', # str
             })
             ```
         """
@@ -241,7 +241,7 @@ def _reverse_mod_mapping(self):
             if isinstance(other_mod, (list, tuple)):
                 for _mod in other_mod:
                     if _mod in self.rev_mod_mapping:
-                        if this_mod.endswith("Protein N-term"):
+                        if this_mod.endswith("Protein_N-term"):
                             continue
                     self.rev_mod_mapping[_mod] = this_mod
             else:
@@ -516,7 +516,7 @@ def filter_psm_by_modifications(
                 "Phospho@S",
                 "Phospho@T",
                 "Phospho@Y",
-                "Acetyl@Protein N-term",
+                "Acetyl@Protein_N-term",
             ]
         ),
     ):