Skip to content

Commit

Permalink
Merge pull request #148 from MannLabs/development
Browse files Browse the repository at this point in the history
Development
  • Loading branch information
jalew188 authored Mar 22, 2024
2 parents 065449a + 7492330 commit 8989a1c
Show file tree
Hide file tree
Showing 20 changed files with 161 additions and 2,835 deletions.
2 changes: 1 addition & 1 deletion .bumpversion.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[bumpversion]
current_version = 1.2.2
current_version = 1.2.3
commit = True
tag = False
parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\-(?P<release>[a-z]+)(?P<build>\d+))?
Expand Down
2 changes: 1 addition & 1 deletion alphabase/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@


__project__ = "alphabase"
__version__ = "1.2.2"
__version__ = "1.2.3"
__license__ = "Apache"
__description__ = "An infrastructure Python package of the AlphaX ecosystem"
__author__ = "Mann Labs"
Expand Down
12 changes: 6 additions & 6 deletions alphabase/peptide/fragment.py
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,7 @@ def init_fragment_by_precursor_dataframe(

def update_sliced_fragment_dataframe(
fragment_df: pd.DataFrame,
fragment_mzs: np.ndarray,
fragment_df_vals: np.ndarray,
values: np.ndarray,
frag_start_end_list: List[Tuple[int,int]],
charged_frag_types: List[str]=None,
Expand All @@ -275,8 +275,8 @@ def update_sliced_fragment_dataframe(
fragment_df : pd.DataFrame
fragment dataframe to set the values
fragment_mzs : np.ndarray
The copy np.ndarry of fragment_df
fragment_df_vals : np.ndarray
The `fragment_df.to_numpy(copy=True)`, to prevent readonly assignment.
values : np.ndarray
values to set
Expand All @@ -294,13 +294,13 @@ def update_sliced_fragment_dataframe(
frag_slice_list = [slice(start,end) for start,end in frag_start_end_list]
frag_slices = np.r_[tuple(frag_slice_list)]
if charged_frag_types is None or len(charged_frag_types)==0:
fragment_mzs[frag_slices, :] = values.astype(fragment_mzs.dtype)
fragment_df_vals[frag_slices, :] = values.astype(fragment_df_vals.dtype)
else:
charged_frag_idxes = [fragment_df.columns.get_loc(c) for c in charged_frag_types]
fragment_df.iloc[
frag_slices, charged_frag_idxes
] = values.astype(fragment_mzs.dtype)
fragment_mzs[frag_slices] = fragment_df.values[frag_slices]
] = values.astype(fragment_df_vals.dtype)
fragment_df_vals[frag_slices] = fragment_df.values[frag_slices]

def get_sliced_fragment_dataframe(
fragment_df: pd.DataFrame,
Expand Down
10 changes: 5 additions & 5 deletions alphabase/peptide/precursor.py
Original file line number Diff line number Diff line change
Expand Up @@ -485,13 +485,13 @@ def _count_batchify_df(df_group, mp_batch_size):
count += 1
return count

# `process_bar` should be replaced by more advanced tqdm wrappers created by Sander
# `progress_bar` should be replaced by more advanced tqdm wrappers created by Sander
# I will leave it to alphabase.utils
def calc_precursor_isotope_info_mp(
precursor_df:pd.DataFrame,
processes:int=8,
mp_batch_size:int=10000,
process_bar=None,
progress_bar=None,
min_right_most_intensity:float=0.2,
min_precursor_num_to_run_mp:int=10000,
)->pd.DataFrame:
Expand All @@ -510,7 +510,7 @@ def calc_precursor_isotope_info_mp(
mp_batch_size : int
Multiprocessing batch size. Optional, by default 100000.
process_bar : Callable
progress_bar : Callable
The tqdm-based callback function
to check multiprocessing. Defaults to None.
Expand Down Expand Up @@ -538,8 +538,8 @@ def calc_precursor_isotope_info_mp(
min_right_most_intensity=min_right_most_intensity
), _batchify_df(df_group, mp_batch_size)
)
if process_bar:
processing = process_bar(
if progress_bar:
processing = progress_bar(
processing, _count_batchify_df(
df_group, mp_batch_size
)
Expand Down
36 changes: 21 additions & 15 deletions alphabase/quantification/quant_reader/config_dict_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,39 +9,45 @@
INTABLE_CONFIG = os.path.join(pathlib.Path(__file__).parent.absolute(), "../../../alphabase/constants/const_files/quant_reader_config.yaml") #the yaml config is located one directory below the python library files

def get_input_type_and_config_dict(input_file, input_type_to_use = None):
config_dict = _load_config(INTABLE_CONFIG)
type2relevant_columns = _get_type2relevant_cols(config_dict)
all_config_dicts = _load_config(INTABLE_CONFIG)
type2relevant_columns = _get_type2relevant_cols(all_config_dicts)

if "aq_reformat.tsv" in input_file:
input_file = _get_original_file_from_aq_reformat(input_file)

filename = str(input_file)
if '.csv' in filename:
sep=','
if '.tsv' in filename:
sep='\t'
if '.txt' in filename:
sep='\t'

if 'sep' not in locals():
raise TypeError(f"neither of the file extensions (.tsv, .csv, .txt) detected for file {input_file}! Your filename has to contain one of these extensions. Please modify your file name accordingly.")
sep = _get_seperator(input_file)

uploaded_data_columns = set(pd.read_csv(input_file, sep=sep, nrows=1, encoding ='latin1').columns)
uploaded_data_columns = set(pd.read_csv(input_file, sep=sep, nrows=1).columns)

for input_type in type2relevant_columns.keys():
if (input_type_to_use is not None) and (input_type!=input_type_to_use):
continue
relevant_columns = type2relevant_columns.get(input_type)
relevant_columns = [x for x in relevant_columns if x] #filter None values
if set(relevant_columns).issubset(uploaded_data_columns):
config_dict_type = config_dict.get(input_type)
return input_type, config_dict_type, sep
config_dict = all_config_dicts.get(input_type)
return input_type, config_dict, sep

raise TypeError("format not specified in intable_config.yaml!")

def _get_original_file_from_aq_reformat(input_file):
matched = re.match("(.*)(\..*\.)(aq_reformat\.tsv)",input_file)
return matched.group(1)

def _get_seperator(input_file):
filename = str(input_file)
if '.csv' in filename:
sep=','
if '.tsv' in filename:
sep='\t'
if '.txt' in filename:
sep='\t'

if 'sep' not in locals():
raise TypeError(f"neither of the file extensions (.tsv, .csv, .txt) detected for file {input_file}! Your filename has to contain one of these extensions. Please modify your file name accordingly.")
return sep



def _load_config(config_yaml):
with open(config_yaml, 'r') as stream:
Expand Down
3 changes: 3 additions & 0 deletions alphabase/quantification/quant_reader/quant_reader_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,5 +44,8 @@ def reformat_and_save_input_file(input_file, input_type_to_use = None, use_alpha
raise Exception('Format not recognized!')
return outfile_name

def set_quanttable_config_location(quanttable_config_file):
config_dict_loader.INTABLE_CONFIG = quanttable_config_file



11 changes: 6 additions & 5 deletions alphabase/spectral_library/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import logging
import copy
import warnings
import re

import alphabase.peptide.fragment as fragment
import alphabase.peptide.precursor as precursor
Expand Down Expand Up @@ -135,9 +136,9 @@ def fragment_intensity_df(self)->pd.DataFrame:
return self._fragment_intensity_df


def available_fragment_dfs(self)->list:
def available_dense_fragment_dfs(self)->list:
"""
Return the available fragment dataframes
Return the available dense fragment dataframes
By dynamically checking the attributes of the object.
a fragment dataframe is matched with the pattern '_fragment_[attribute_name]_df'
Expand All @@ -148,7 +149,7 @@ def available_fragment_dfs(self)->list:
"""
return [
attr for attr in dir(self)
if attr.startswith('_fragment') and attr.endswith('_df')
if re.match(r'_fragment_.*_df', attr)
]

def copy(self):
Expand Down Expand Up @@ -197,7 +198,7 @@ def append(
"""
if remove_unused_dfs:
current_frag_dfs = self.available_fragment_dfs()
current_frag_dfs = self.available_dense_fragment_dfs()
for attr in current_frag_dfs:
if attr not in dfs_to_append:
delattr(self, attr)
Expand Down Expand Up @@ -500,7 +501,7 @@ def remove_unused_fragments(self):
Fragment dataframes are updated inplace and overwritten.
"""

available_fragments_df = self.available_fragment_dfs()
available_fragments_df = self.available_dense_fragment_dfs()
non_zero_dfs = [
df for df in available_fragments_df
if len(getattr(self, df)) > 0
Expand Down
32 changes: 25 additions & 7 deletions alphabase/spectral_library/flat.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import pandas as pd
import numpy as np
import warnings

from alphabase.spectral_library.base import (
SpecLibBase
)
Expand Down Expand Up @@ -77,10 +79,22 @@ def fragment_df(self)->pd.DataFrame:
def protein_df(self)->pd.DataFrame:
""" Protein dataframe """
return self._protein_df


def available_dense_fragment_dfs(self):
"""Return the available dense fragment dataframes.
This method is inherited from :class:`SpecLibBase` and will return an empty list for a flat library.
"""
return []

def remove_unused_fragments(self):
"""Remove unused fragments from fragment_df.
This method is inherited from :class:`SpecLibBase` and has not been implemented for a flat library.
"""
raise NotImplementedError("remove_unused_fragments is not implemented for a flat library")

def parse_base_library(self,
library:SpecLibBase,
keep_original_frag_dfs:bool=True,
keep_original_frag_dfs:bool=False,
copy_precursor_df:bool=False,
**kwargs
):
Expand Down Expand Up @@ -121,12 +135,16 @@ def parse_base_library(self,
self._protein_df = pd.DataFrame()

if keep_original_frag_dfs:

self.charged_frag_types = library.fragment_mz_df.columns.values
self._fragment_mz_df = library.fragment_mz_df
self._fragment_intensity_df = library.fragment_intensity_df
else:
self._fragment_mz_df = pd.DataFrame()
self._fragment_intensity_df = pd.DataFrame()
for dense_frag_df in library.available_dense_fragment_dfs():
setattr(self, dense_frag_df, getattr(library, dense_frag_df))

warnings.warn(
"The SpecLibFlat object will have a strictly flat representation in the future. keep_original_frag_dfs=True will be deprecated.",
DeprecationWarning
)


def save_hdf(self, hdf_file:str):
"""Save library dataframes into hdf_file.
Expand Down
2 changes: 1 addition & 1 deletion docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
copyright = '2022, Mann Labs, MPIB'
author = 'Mann Labs, MPIB'

release = "1.2.2"
release = "1.2.3"

# -- General configuration ---------------------------------------------------

Expand Down
Loading

0 comments on commit 8989a1c

Please sign in to comment.