Merge pull request #148 from MannLabs/development

Development
MannLabs · Mar 22, 2024 · 8989a1c · 8989a1c
2 parents 065449a + 7492330
commit 8989a1c
Show file tree

Hide file tree

Showing 20 changed files with 161 additions and 2,835 deletions.
diff --git a/.bumpversion.cfg b/.bumpversion.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 1.2.2
+current_version = 1.2.3
 commit = True
 tag = False
 parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\-(?P<release>[a-z]+)(?P<build>\d+))?

diff --git a/alphabase/__init__.py b/alphabase/__init__.py
@@ -2,7 +2,7 @@
 
 
 __project__ = "alphabase"
-__version__ = "1.2.2"
+__version__ = "1.2.3"
 __license__ = "Apache"
 __description__ = "An infrastructure Python package of the AlphaX ecosystem"
 __author__ = "Mann Labs"

diff --git a/alphabase/peptide/fragment.py b/alphabase/peptide/fragment.py
@@ -261,7 +261,7 @@ def init_fragment_by_precursor_dataframe(
 
 def update_sliced_fragment_dataframe(
     fragment_df: pd.DataFrame,
-    fragment_mzs: np.ndarray,
+    fragment_df_vals: np.ndarray,
     values: np.ndarray,
     frag_start_end_list: List[Tuple[int,int]],
     charged_frag_types: List[str]=None,
@@ -275,8 +275,8 @@ def update_sliced_fragment_dataframe(
     fragment_df : pd.DataFrame
         fragment dataframe to set the values
 
-    fragment_mzs : np.ndarray
-        The copy np.ndarry of fragment_df
+    fragment_df_vals : np.ndarray
+        The `fragment_df.to_numpy(copy=True)`, to prevent readonly assignment.
 
     values : np.ndarray
         values to set
@@ -294,13 +294,13 @@ def update_sliced_fragment_dataframe(
     frag_slice_list = [slice(start,end) for start,end in frag_start_end_list]
     frag_slices = np.r_[tuple(frag_slice_list)]
     if charged_frag_types is None or len(charged_frag_types)==0:
-        fragment_mzs[frag_slices, :] = values.astype(fragment_mzs.dtype)
+        fragment_df_vals[frag_slices, :] = values.astype(fragment_df_vals.dtype)
     else:
         charged_frag_idxes = [fragment_df.columns.get_loc(c) for c in charged_frag_types]
         fragment_df.iloc[
             frag_slices, charged_frag_idxes
-        ] = values.astype(fragment_mzs.dtype)
-        fragment_mzs[frag_slices] = fragment_df.values[frag_slices]
+        ] = values.astype(fragment_df_vals.dtype)
+        fragment_df_vals[frag_slices] = fragment_df.values[frag_slices]
 
 def get_sliced_fragment_dataframe(
     fragment_df: pd.DataFrame,

diff --git a/alphabase/peptide/precursor.py b/alphabase/peptide/precursor.py
@@ -485,13 +485,13 @@ def _count_batchify_df(df_group, mp_batch_size):
             count += 1
     return count
 
-# `process_bar` should be replaced by more advanced tqdm wrappers created by Sander
+# `progress_bar` should be replaced by more advanced tqdm wrappers created by Sander
 # I will leave it to alphabase.utils
 def calc_precursor_isotope_info_mp(
     precursor_df:pd.DataFrame, 
     processes:int=8,
     mp_batch_size:int=10000,
-    process_bar=None,
+    progress_bar=None,
     min_right_most_intensity:float=0.2,
     min_precursor_num_to_run_mp:int=10000,
 )->pd.DataFrame:
@@ -510,7 +510,7 @@ def calc_precursor_isotope_info_mp(
     mp_batch_size : int
         Multiprocessing batch size. Optional, by default 100000.
         
-    process_bar : Callable
+    progress_bar : Callable
         The tqdm-based callback function 
         to check multiprocessing. Defaults to None.
 
@@ -538,8 +538,8 @@ def calc_precursor_isotope_info_mp(
                 min_right_most_intensity=min_right_most_intensity
             ), _batchify_df(df_group, mp_batch_size)
         )
-        if process_bar:
-            processing = process_bar(
+        if progress_bar:
+            processing = progress_bar(
                 processing, _count_batchify_df(
                     df_group, mp_batch_size
                 )

diff --git a/alphabase/quantification/quant_reader/config_dict_loader.py b/alphabase/quantification/quant_reader/config_dict_loader.py
@@ -9,39 +9,45 @@
 INTABLE_CONFIG = os.path.join(pathlib.Path(__file__).parent.absolute(), "../../../alphabase/constants/const_files/quant_reader_config.yaml") #the yaml config is located one directory below the python library files
 
 def get_input_type_and_config_dict(input_file, input_type_to_use = None):
-    config_dict = _load_config(INTABLE_CONFIG)
-    type2relevant_columns = _get_type2relevant_cols(config_dict)
+    all_config_dicts = _load_config(INTABLE_CONFIG)
+    type2relevant_columns = _get_type2relevant_cols(all_config_dicts)
 
     if "aq_reformat.tsv" in input_file:
         input_file = _get_original_file_from_aq_reformat(input_file)
 
-    filename = str(input_file)
-    if '.csv' in filename:
-        sep=','
-    if '.tsv' in filename:
-        sep='\t'
-    if '.txt' in filename:
-        sep='\t'
-
-    if 'sep' not in locals():
-        raise TypeError(f"neither of the file extensions (.tsv, .csv, .txt) detected for file {input_file}! Your filename has to contain one of these extensions. Please modify your file name accordingly.")
+    sep = _get_seperator(input_file)
 
-    uploaded_data_columns = set(pd.read_csv(input_file, sep=sep, nrows=1, encoding ='latin1').columns)
+    uploaded_data_columns = set(pd.read_csv(input_file, sep=sep, nrows=1).columns)
 
     for input_type in type2relevant_columns.keys():
         if (input_type_to_use is not None) and (input_type!=input_type_to_use):
             continue
         relevant_columns = type2relevant_columns.get(input_type)
         relevant_columns = [x for x in relevant_columns if x] #filter None values
         if set(relevant_columns).issubset(uploaded_data_columns):
-            config_dict_type =  config_dict.get(input_type)
-            return input_type, config_dict_type, sep
+            config_dict =  all_config_dicts.get(input_type)
+            return input_type, config_dict, sep
+
     raise TypeError("format not specified in intable_config.yaml!")
 
 def _get_original_file_from_aq_reformat(input_file):
     matched = re.match("(.*)(\..*\.)(aq_reformat\.tsv)",input_file)
     return matched.group(1)
 
+def _get_seperator(input_file):
+    filename = str(input_file)
+    if '.csv' in filename:
+        sep=','
+    if '.tsv' in filename:
+        sep='\t'
+    if '.txt' in filename:
+        sep='\t'
+
+    if 'sep' not in locals():
+        raise TypeError(f"neither of the file extensions (.tsv, .csv, .txt) detected for file {input_file}! Your filename has to contain one of these extensions. Please modify your file name accordingly.")
+    return sep
+
+
 
 def _load_config(config_yaml):
     with open(config_yaml, 'r') as stream:

diff --git a/alphabase/quantification/quant_reader/quant_reader_manager.py b/alphabase/quantification/quant_reader/quant_reader_manager.py
@@ -44,5 +44,8 @@ def reformat_and_save_input_file(input_file, input_type_to_use = None, use_alpha
         raise Exception('Format not recognized!')
     return outfile_name
 
+def set_quanttable_config_location(quanttable_config_file):
+    config_dict_loader.INTABLE_CONFIG = quanttable_config_file
+
 
 
diff --git a/alphabase/spectral_library/base.py b/alphabase/spectral_library/base.py
@@ -4,6 +4,7 @@
 import logging
 import copy
 import warnings
+import re
 
 import alphabase.peptide.fragment as fragment
 import alphabase.peptide.precursor as precursor
@@ -135,9 +136,9 @@ def fragment_intensity_df(self)->pd.DataFrame:
         return self._fragment_intensity_df
 
 
-    def available_fragment_dfs(self)->list:
+    def available_dense_fragment_dfs(self)->list:
         """
-        Return the available fragment dataframes
+        Return the available dense fragment dataframes
         By dynamically checking the attributes of the object.
         a fragment dataframe is matched with the pattern '_fragment_[attribute_name]_df'
 
@@ -148,7 +149,7 @@ def available_fragment_dfs(self)->list:
         """
         return [
             attr for attr in dir(self) 
-            if attr.startswith('_fragment') and attr.endswith('_df')
+            if re.match(r'_fragment_.*_df', attr)
         ]
 
     def copy(self):
@@ -197,7 +198,7 @@ def append(
             
         """
         if remove_unused_dfs:
-            current_frag_dfs = self.available_fragment_dfs()
+            current_frag_dfs = self.available_dense_fragment_dfs()
             for attr in current_frag_dfs:
                 if attr not in dfs_to_append:
                     delattr(self, attr)
@@ -500,7 +501,7 @@ def remove_unused_fragments(self):
         Fragment dataframes are updated inplace and overwritten.
         """
 
-        available_fragments_df = self.available_fragment_dfs()
+        available_fragments_df = self.available_dense_fragment_dfs()
         non_zero_dfs = [
             df for df in available_fragments_df 
             if len(getattr(self, df)) > 0

diff --git a/alphabase/spectral_library/flat.py b/alphabase/spectral_library/flat.py
@@ -1,5 +1,7 @@
 import pandas as pd
 import numpy as np
+import warnings
+
 from alphabase.spectral_library.base import (
     SpecLibBase
 )
@@ -77,10 +79,22 @@ def fragment_df(self)->pd.DataFrame:
     def protein_df(self)->pd.DataFrame:
         """ Protein dataframe """
         return self._protein_df
-
+
+    def available_dense_fragment_dfs(self):
+        """Return the available dense fragment dataframes.
+        This method is inherited from :class:`SpecLibBase` and will return an empty list for a flat library.
+        """
+        return []
+
+    def remove_unused_fragments(self):
+        """Remove unused fragments from fragment_df.
+        This method is inherited from :class:`SpecLibBase` and has not been implemented for a flat library.
+        """
+        raise NotImplementedError("remove_unused_fragments is not implemented for a flat library")
+
     def parse_base_library(self, 
         library:SpecLibBase,
-        keep_original_frag_dfs:bool=True,
+        keep_original_frag_dfs:bool=False,
         copy_precursor_df:bool=False,
         **kwargs
     ):
@@ -121,12 +135,16 @@ def parse_base_library(self,
             self._protein_df = pd.DataFrame()
 
         if keep_original_frag_dfs:
+
             self.charged_frag_types = library.fragment_mz_df.columns.values
-            self._fragment_mz_df = library.fragment_mz_df
-            self._fragment_intensity_df = library.fragment_intensity_df
-        else:
-            self._fragment_mz_df = pd.DataFrame()
-            self._fragment_intensity_df = pd.DataFrame()
+            for dense_frag_df in library.available_dense_fragment_dfs():
+                setattr(self, dense_frag_df, getattr(library, dense_frag_df))
+
+            warnings.warn(
+                "The SpecLibFlat object will have a strictly flat representation in the future. keep_original_frag_dfs=True will be deprecated.",
+                DeprecationWarning
+            )
+
 
     def save_hdf(self, hdf_file:str):
         """Save library dataframes into hdf_file.

diff --git a/docs/conf.py b/docs/conf.py
@@ -23,7 +23,7 @@
 copyright = '2022, Mann Labs, MPIB'
 author = 'Mann Labs, MPIB'
 
-release = "1.2.2"
+release = "1.2.3"
 
 # -- General configuration ---------------------------------------------------