diff --git a/alphabase/psm_reader/alphapept_reader.py b/alphabase/psm_reader/alphapept_reader.py index 905c7057..c6abbf48 100644 --- a/alphabase/psm_reader/alphapept_reader.py +++ b/alphabase/psm_reader/alphapept_reader.py @@ -1,4 +1,4 @@ -import os +from pathlib import Path from typing import Optional import h5py @@ -18,7 +18,7 @@ def parse_ap(precursor): """Parser to parse peptide strings.""" items = precursor.split("_") - decoy = 1 if len(items) == 3 else 0 + decoy = 1 if len(items) == 3 else 0 # noqa: PLR2004 magic value modseq = items[0] charge = items[-1] @@ -77,7 +77,7 @@ def _load_file(self, filename): with h5py.File(filename, "r") as _hdf: dataset = _hdf[self.hdf_dataset] df = pd.DataFrame({col: dataset[col] for col in dataset}) - df[PsmDfCols.RAW_NAME] = os.path.basename(filename)[: -len(".ms_data.hdf")] + df[PsmDfCols.RAW_NAME] = Path(filename).name[: -len(".ms_data.hdf")] df["precursor"] = df["precursor"].str.decode("utf-8") # df['naked_sequence'] = df['naked_sequence'].str.decode('utf-8') if "scan_no" in df.columns: diff --git a/alphabase/psm_reader/dia_psm_reader.py b/alphabase/psm_reader/dia_psm_reader.py index 024d612e..08fb63a2 100644 --- a/alphabase/psm_reader/dia_psm_reader.py +++ b/alphabase/psm_reader/dia_psm_reader.py @@ -21,7 +21,7 @@ class SpectronautReader(MaxQuantReader): """ - def __init__( + def __init__( # noqa: PLR0913 many arguments in function definition self, *, column_mapping: Optional[dict] = None, @@ -66,7 +66,7 @@ def _load_file(self, filename): class SwathReader(SpectronautReader): - def __init__( + def __init__( # noqa: PLR0913 many arguments in function definition self, *, column_mapping: Optional[dict] = None, @@ -90,7 +90,7 @@ def __init__( class DiannReader(SpectronautReader): - def __init__( + def __init__( # noqa: PLR0913 many arguments in function definition self, *, column_mapping: Optional[dict] = None, @@ -144,7 +144,7 @@ class SpectronautReportReader(MaxQuantReader): """ - def __init__( + def __init__( # noqa: PLR0913 many arguments in function definition self, *, column_mapping: Optional[dict] = None, diff --git a/alphabase/psm_reader/maxquant_reader.py b/alphabase/psm_reader/maxquant_reader.py index 3837e8e7..6ed62ea0 100644 --- a/alphabase/psm_reader/maxquant_reader.py +++ b/alphabase/psm_reader/maxquant_reader.py @@ -18,8 +18,8 @@ warnings.filterwarnings("always") mod_to_unimod_dict = {} -for mod_name, unimod_id in MOD_DF[["mod_name", "unimod_id"]].values: - unimod_id = int(unimod_id) +for mod_name, unimod_id_ in MOD_DF[["mod_name", "unimod_id"]].to_numpy(): + unimod_id = int(unimod_id_) if unimod_id in (-1, "-1"): continue if mod_name[-2] == "@": @@ -81,14 +81,14 @@ def parse_mod_seq( 0 for N-term; -1 for C-term; 1 to N for normal modifications. """ - PeptideModSeq = modseq + peptide_mod_seq = modseq underscore_for_ncterm = modseq[0] == "_" mod_list = [] site_list = [] - site = PeptideModSeq.find(mod_sep[0]) + site = peptide_mod_seq.find(mod_sep[0]) while site != -1: - site_end = PeptideModSeq.find(mod_sep[1], site + 1) + 1 - if site_end < len(PeptideModSeq) and PeptideModSeq[site_end] == mod_sep[1]: + site_end = peptide_mod_seq.find(mod_sep[1], site + 1) + 1 + if site_end < len(peptide_mod_seq) and peptide_mod_seq[site_end] == mod_sep[1]: site_end += 1 if underscore_for_ncterm: site_list.append(site - 1) @@ -97,42 +97,42 @@ def parse_mod_seq( start_mod = site if start_mod > 0: start_mod -= 1 - mod_list.append(PeptideModSeq[start_mod:site_end]) - PeptideModSeq = PeptideModSeq[:site] + PeptideModSeq[site_end:] - site = PeptideModSeq.find(mod_sep[0], site) + mod_list.append(peptide_mod_seq[start_mod:site_end]) + peptide_mod_seq = peptide_mod_seq[:site] + peptide_mod_seq[site_end:] + site = peptide_mod_seq.find(mod_sep[0], site) # patch for phos. How many other modification formats does MQ have? - site = PeptideModSeq.find("p") + site = peptide_mod_seq.find("p") while site != -1: - mod_list.append(PeptideModSeq[site : site + 2]) + mod_list.append(peptide_mod_seq[site : site + 2]) site_list = [i - 1 if i > site else i for i in site_list] if underscore_for_ncterm: site_list.append(site) else: site_list.append(site + 1) - PeptideModSeq = PeptideModSeq[:site] + PeptideModSeq[site + 1 :] - site = PeptideModSeq.find("p", site) + peptide_mod_seq = peptide_mod_seq[:site] + peptide_mod_seq[site + 1 :] + site = peptide_mod_seq.find("p", site) if fixed_C57: - site = PeptideModSeq.find("C") + site = peptide_mod_seq.find("C") while site != -1: if underscore_for_ncterm: site_list.append(site) else: site_list.append(site + 1) mod_list.append("C" + "Carbamidomethyl (C)".join(mod_sep)) - site = PeptideModSeq.find("C", site + 1) - sequence = PeptideModSeq.strip("_") - nAA = len(sequence) + site = peptide_mod_seq.find("C", site + 1) + sequence = peptide_mod_seq.strip("_") + n_aa = len(sequence) return ( sequence, ";".join(mod_list), - ";".join([str(i) if i <= nAA else "-1" for i in site_list]), + ";".join([str(i) if i <= n_aa else "-1" for i in site_list]), ) class MaxQuantReader(PSMReaderBase): - def __init__( + def __init__( # noqa: PLR0913 many arguments in function definition self, *, column_mapping: Optional[dict] = None, diff --git a/alphabase/psm_reader/msfragger_reader.py b/alphabase/psm_reader/msfragger_reader.py index a2129b88..a191c9dd 100644 --- a/alphabase/psm_reader/msfragger_reader.py +++ b/alphabase/psm_reader/msfragger_reader.py @@ -23,7 +23,7 @@ def _is_fragger_decoy(proteins): mod_mass_tol = psm_reader_yaml["msfragger_pepxml"]["mod_mass_tol"] -def _get_mods_from_masses(sequence, msf_aa_mods): +def _get_mods_from_masses(sequence, msf_aa_mods): # noqa: PLR0912, C901 many branches, too complex TODO: refactor mods = [] mod_sites = [] aa_mass_diffs = [] @@ -78,7 +78,7 @@ def _get_mods_from_masses(sequence, msf_aa_mods): ) -class MSFragger_PSM_TSV_Reader(PSMReaderBase): +class MSFragger_PSM_TSV_Reader(PSMReaderBase): # noqa: N801 name should use CapWords convention TODO: refactor def __init__( self, *, @@ -93,7 +93,7 @@ def __init__( class MSFraggerPepXML(PSMReaderBase): - def __init__( + def __init__( # noqa: PLR0913 many arguments in function definition self, *, column_mapping: Optional[dict] = None, @@ -129,7 +129,7 @@ def _load_file(self, filename): msf_df[PsmDfCols.RAW_NAME] = ( msf_df["spectrum"].str.split(".").apply(lambda x: x[0]) ) - msf_df["to_remove"] = 0 # TODO revisit + msf_df["to_remove"] = 0 # TODO: revisit self.column_mapping[PsmDfCols.TO_REMOVE] = "to_remove" return msf_df diff --git a/alphabase/psm_reader/pfind_reader.py b/alphabase/psm_reader/pfind_reader.py index 99f48d23..0230c296 100644 --- a/alphabase/psm_reader/pfind_reader.py +++ b/alphabase/psm_reader/pfind_reader.py @@ -12,7 +12,7 @@ ) -def convert_one_pFind_mod(mod): +def _convert_one_pfind_mod(mod: str) -> Optional[str]: # noqa: C901 too complex (11 > 10) TODO: refactor if mod[-1] == ")": mod = mod[: (mod.find("(") - 1)] idx = mod.rfind("[") @@ -22,40 +22,44 @@ def convert_one_pFind_mod(mod): idx = mod.rfind("[") name = mod[:idx] site = mod[(idx + 1) : -1] + if len(site) == 1: - return name + "@" + site - if site == "AnyN-term": - return name + "@" + "Any_N-term" - if site == "ProteinN-term": - return name + "@" + "Protein_N-term" - if site.startswith("AnyN-term"): - return name + "@" + site[-1] + "^Any_N-term" - if site.startswith("ProteinN-term"): - return name + "@" + site[-1] + "^Protein_N-term" - if site == "AnyC-term": - return name + "@" + "Any_C-term" - if site == "ProteinC-term": - return name + "@" + "Protein_C-term" - if site.startswith("AnyC-term"): - return name + "@" + site[-1] + "^Any_C-term" - if site.startswith("ProteinC-term"): - return name + "@" + site[-1] + "^Protein_C-term" - return None - - -def translate_pFind_mod(mod_str): + return_value = name + "@" + site + elif site == "AnyN-term": + return_value = name + "@" + "Any_N-term" + elif site == "ProteinN-term": + return_value = name + "@" + "Protein_N-term" + elif site.startswith("AnyN-term"): + return_value = name + "@" + site[-1] + "^Any_N-term" + elif site.startswith("ProteinN-term"): + return_value = name + "@" + site[-1] + "^Protein_N-term" + elif site == "AnyC-term": + return_value = name + "@" + "Any_C-term" + elif site == "ProteinC-term": + return_value = name + "@" + "Protein_C-term" + elif site.startswith("AnyC-term"): + return_value = name + "@" + site[-1] + "^Any_C-term" + elif site.startswith("ProteinC-term"): + return_value = name + "@" + site[-1] + "^Protein_C-term" + else: + return_value = None + + return return_value + + +def translate_pFind_mod(mod_str): # noqa: N802 name `get_pFind_mods` should be lowercase TODO: used by peptdeep if not mod_str: return "" ret_mods = [] - for mod in mod_str.split(";"): - mod = convert_one_pFind_mod(mod) + for mod_ in mod_str.split(";"): + mod = _convert_one_pfind_mod(mod_) if not mod or mod not in ap_mod.MOD_INFO_DICT: return pd.NA ret_mods.append(mod) return ";".join(ret_mods) -def get_pFind_mods(pfind_mod_str): +def get_pFind_mods(pfind_mod_str): # noqa: N802 name `get_pFind_mods` should be lowercase TODO: used by peptdeep pfind_mod_str = pfind_mod_str.strip(";") if not pfind_mod_str: return "", "" @@ -84,7 +88,7 @@ def parse_pfind_protein(protein, keep_reverse=True): ) -class pFindReader(PSMReaderBase): +class pFindReader(PSMReaderBase): # noqa: N801 name `pFindReader` should use CapWords convention TODO: used by peptdeep, alpharaw def __init__( self, *, diff --git a/alphabase/psm_reader/psm_reader.py b/alphabase/psm_reader/psm_reader.py index 447f4715..811c6733 100644 --- a/alphabase/psm_reader/psm_reader.py +++ b/alphabase/psm_reader/psm_reader.py @@ -1,6 +1,6 @@ import copy -import os import warnings +from pathlib import Path from typing import NoReturn, Optional import numpy as np @@ -77,7 +77,7 @@ def _keep_modifications(mod_str: str, mod_set: set) -> str: #: See `psm_reader.yaml `_ -psm_reader_yaml = load_yaml(os.path.join(CONST_FILE_FOLDER, "psm_reader.yaml")) +psm_reader_yaml = load_yaml(Path(CONST_FILE_FOLDER) / "psm_reader.yaml") class PSMReaderBase: @@ -268,9 +268,7 @@ def load(self, _file) -> pd.DataFrame: return self.import_file(_file) def import_files(self, file_list: list): - df_list = [] - for _file in file_list: - df_list.append(self.import_file(_file)) + df_list = [self.import_file(file) for file in file_list] self._psm_df = pd.concat(df_list, ignore_index=True) return self._psm_df diff --git a/alphabase/psm_reader/sage_reader.py b/alphabase/psm_reader/sage_reader.py index 39001fd6..85b2a222 100644 --- a/alphabase/psm_reader/sage_reader.py +++ b/alphabase/psm_reader/sage_reader.py @@ -1,8 +1,8 @@ import logging import multiprocessing as mp import re -import typing from functools import partial +from typing import Generator, List, NoReturn, Optional, Tuple import numpy as np import pandas as pd @@ -94,7 +94,7 @@ def __call__(self, psm_df: pd.DataFrame) -> pd.DataFrame: translated_psm_df = _apply_translate_modifications_mp(psm_df, translation_df) # 5. Drop PSMs with missing modifications - is_null = translated_psm_df[PsmDfCols.MOD_SITES].isnull() + is_null = translated_psm_df[PsmDfCols.MOD_SITES].isna() translated_psm_df = translated_psm_df[~is_null] if np.sum(is_null) > 0: logging.warning( @@ -105,7 +105,7 @@ def __call__(self, psm_df: pd.DataFrame) -> pd.DataFrame: def _annotate_from_custom_translation( self, discovered_modifications_df: pd.DataFrame, translation_df: pd.DataFrame - ) -> typing.Tuple[pd.DataFrame, pd.DataFrame]: + ) -> Tuple[pd.DataFrame, pd.DataFrame]: """Annotate modifications from custom translation df, if provided. Discovered modifications are first matched using the custom translation dataframe. If no match is found, the modifications are returned for matching using UniMod. @@ -129,7 +129,7 @@ def _annotate_from_custom_translation( self.custom_translation_df, on="modification", how="left" ) for _, row in discovered_modifications_df[ - discovered_modifications_df["matched_mod_name"].isnull() + discovered_modifications_df["matched_mod_name"].isna() ].iterrows(): logging.warning( f"No modification found for mass {row['modification']} at position {row['previous_aa']} found in custom_translation_df, will be matched using UniMod" @@ -139,12 +139,12 @@ def _annotate_from_custom_translation( [ translation_df, discovered_modifications_df[ - discovered_modifications_df["matched_mod_name"].notnull() + discovered_modifications_df["matched_mod_name"].notna() ], ] ) discovered_modifications_df = discovered_modifications_df[ - discovered_modifications_df["matched_mod_name"].isnull() + discovered_modifications_df["matched_mod_name"].isna() ] return discovered_modifications_df, translation_df @@ -182,7 +182,7 @@ def _annotate_from_unimod( ) ) for _, row in discovered_modifications_df[ - discovered_modifications_df["matched_mod_name"].isnull() + discovered_modifications_df["matched_mod_name"].isna() ].iterrows(): logging.warning( f"UniMod lookup failed for mass {row['modification']} at position {row['previous_aa']}, will be removed." @@ -191,7 +191,7 @@ def _annotate_from_unimod( [ translation_df, discovered_modifications_df[ - discovered_modifications_df["matched_mod_name"].notnull() + discovered_modifications_df["matched_mod_name"].notna() ], ] ) @@ -217,7 +217,7 @@ def _discover_modifications(psm_df: pd.DataFrame) -> pd.DataFrame: .explode() .unique() ) - modifications = modifications[~pd.isnull(modifications)] + modifications = modifications[~pd.isna(modifications)] return pd.DataFrame( list(modifications), columns=["modification", "previous_aa", "is_nterm", "is_cterm", "mass"], @@ -226,7 +226,7 @@ def _discover_modifications(psm_df: pd.DataFrame) -> pd.DataFrame: def _match_modified_sequence( sequence: str, -) -> typing.List[typing.Tuple[str, str, bool, bool, float]]: +) -> List[Tuple[str, str, bool, bool, float]]: """Get all matches with the amino acid location. P[-100.0]EPTIDE -> [('[-100.0]', 'P', False, False, -100.0)] @@ -292,7 +292,7 @@ def _lookup_modification( The name of the matched modification in alphabase format. """ - mass_distance = mod_annotated_df["mass"].values - mass_observed + mass_distance = mod_annotated_df["mass"].to_numpy() - mass_observed ppm_distance = mass_distance / mass_observed * 1e6 ppm_distance = np.abs(ppm_distance) @@ -323,7 +323,7 @@ def _lookup_modification( def _translate_modifications( sequence: str, mod_translation_df: pd.DataFrame -) -> typing.Tuple[typing.Optional[str], typing.Optional[str]]: +) -> Tuple[Optional[str], Optional[str]]: """Translate modifications in the sequence to alphabase style modifications. Parameters @@ -431,7 +431,7 @@ def _apply_translate_modifications( return psm_df -def _batchify_df(df: pd.DataFrame, mp_batch_size: int) -> typing.Generator: +def _batchify_df(df: pd.DataFrame, mp_batch_size: int) -> Generator: """Internal funciton for applying translation modifications in parallel. Parameters @@ -548,11 +548,11 @@ def _sage_spec_idx_from_scan_nr(scan_indicator_str: str) -> int: class SageReaderBase(PSMReaderBase): - def __init__( + def __init__( # noqa: PLR0913 many arguments in function definition self, *, - column_mapping: typing.Optional[dict] = None, - modification_mapping: typing.Optional[dict] = None, + column_mapping: Optional[dict] = None, + modification_mapping: Optional[dict] = None, fdr=0.01, keep_decoy=False, rt_unit="second", @@ -575,7 +575,7 @@ def __init__( def _init_column_mapping(self) -> None: self.column_mapping = psm_reader_yaml["sage"]["column_mapping"] - def _load_file(self, filename) -> typing.NoReturn: + def _load_file(self, filename) -> NoReturn: raise NotImplementedError def _transform_table(self, origin_df) -> None: diff --git a/alphabase/spectral_library/reader.py b/alphabase/spectral_library/reader.py index 42b25b28..30d24c52 100644 --- a/alphabase/spectral_library/reader.py +++ b/alphabase/spectral_library/reader.py @@ -1,4 +1,4 @@ -import typing +from typing import List, Optional import numpy as np import pandas as pd @@ -6,7 +6,6 @@ from alphabase.constants._const import PEAK_INTENSITY_DTYPE from alphabase.peptide.mobility import mobility_to_ccs_for_df -from alphabase.psm_reader import psm_reader_provider from alphabase.psm_reader.keys import LibPsmDfCols, PsmDfCols from alphabase.psm_reader.maxquant_reader import MaxQuantReader from alphabase.psm_reader.psm_reader import psm_reader_yaml @@ -14,9 +13,9 @@ class LibraryReaderBase(MaxQuantReader, SpecLibBase): - def __init__( + def __init__( # noqa: PLR0913 many arguments in function definition self, - charged_frag_types: typing.List[str] = [ + charged_frag_types: List[str] = [ "b_z1", "b_z2", "y_z1", @@ -26,15 +25,15 @@ def __init__( "y_modloss_z1", "y_modloss_z2", ], - column_mapping: typing.Optional[dict] = None, - modification_mapping: typing.Optional[dict] = None, - fdr=0.01, - fixed_C57=False, + column_mapping: Optional[dict] = None, + modification_mapping: Optional[dict] = None, + fdr: float = 0.01, + fixed_C57: bool = False, mod_seq_columns=psm_reader_yaml["library_reader_base"]["mod_seq_columns"], rt_unit="irt", precursor_mz_min: float = 400, precursor_mz_max: float = 2000, - decoy: typing.Optional[str] = None, + decoy: Optional[str] = None, **kwargs, ): """Base class for reading spectral libraries from long format csv files. @@ -125,7 +124,7 @@ def _find_key_columns(self, lib_df: pd.DataFrame) -> None: if PsmDfCols.MOD_SITES not in lib_df.columns: lib_df[PsmDfCols.MOD_SITES] = "" - def _get_fragment_intensity(self, lib_df: pd.DataFrame): + def _get_fragment_intensity(self, lib_df: pd.DataFrame): # noqa: PLR0912, C901 too many branches, too complex TODO: refactor """Create the self._fragment_intensity dataframe from a given spectral library. In the process, the input dataframe is converted from long format to a precursor dataframe and returned. @@ -152,7 +151,7 @@ def _get_fragment_intensity(self, lib_df: pd.DataFrame): precursor_df_list = [] frag_intens_list = [] - nAA_list = [] + n_aa_list = [] fragment_columns = [ LibPsmDfCols.FRAGMENT_MZ, @@ -169,13 +168,13 @@ def _get_fragment_intensity(self, lib_df: pd.DataFrame): for keys, df_group in tqdm(lib_df.groupby(non_fragment_columns)): precursor_columns = dict(zip(non_fragment_columns, keys)) - nAA = len(precursor_columns[PsmDfCols.SEQUENCE]) + n_aa = len(precursor_columns[PsmDfCols.SEQUENCE]) - intens = np.zeros( - (nAA - 1, len(self.charged_frag_types)), + intensities = np.zeros( + (n_aa - 1, len(self.charged_frag_types)), dtype=PEAK_INTENSITY_DTYPE, ) - for frag_type, frag_num, loss_type, frag_charge, inten in df_group[ + for frag_type_, frag_num_, loss_type, frag_charge, intensity in df_group[ [ LibPsmDfCols.FRAGMENT_TYPE, LibPsmDfCols.FRAGMENT_SERIES, @@ -183,39 +182,39 @@ def _get_fragment_intensity(self, lib_df: pd.DataFrame): LibPsmDfCols.FRAGMENT_CHARGE, LibPsmDfCols.FRAGMENT_INTENSITY, ] - ].values: - if frag_type in "abc": - frag_num -= 1 - elif frag_type in "xyz": - frag_num = nAA - frag_num - 1 + ].to_numpy(): + if frag_type_ in "abc": + frag_num = frag_num_ - 1 + elif frag_type_ in "xyz": + frag_num = n_aa - frag_num_ - 1 else: continue if loss_type == "": - frag_type = f"{frag_type}_z{frag_charge}" + frag_type = f"{frag_type_}_z{frag_charge}" elif loss_type == "H3PO4": - frag_type = f"{frag_type}_modloss_z{frag_charge}" + frag_type = f"{frag_type_}_modloss_z{frag_charge}" elif loss_type == "H2O": - frag_type = f"{frag_type}_H2O_z{frag_charge}" + frag_type = f"{frag_type_}_H2O_z{frag_charge}" elif loss_type == "NH3": - frag_type = f"{frag_type}_NH3_z{frag_charge}" + frag_type = f"{frag_type_}_NH3_z{frag_charge}" elif loss_type == "unknown": # DiaNN+fragger - frag_type = f"{frag_type}_z{frag_charge}" + frag_type = f"{frag_type_}_z{frag_charge}" else: continue if frag_type not in frag_col_dict: continue frag_col_idx = frag_col_dict[frag_type] - intens[frag_num, frag_col_idx] = inten - max_inten = np.max(intens) - if max_inten <= 0: + intensities[frag_num, frag_col_idx] = intensity + max_intensity = np.max(intensities) + if max_intensity <= 0: continue - intens /= max_inten + normalized_intensities = intensities / max_intensity precursor_df_list.append(precursor_columns) - frag_intens_list.append(intens) - nAA_list.append(nAA) + frag_intens_list.append(normalized_intensities) + n_aa_list.append(n_aa) df = pd.DataFrame(precursor_df_list) @@ -223,8 +222,8 @@ def _get_fragment_intensity(self, lib_df: pd.DataFrame): np.concatenate(frag_intens_list), columns=self.charged_frag_types ) - indices = np.zeros(len(nAA_list) + 1, dtype=np.int64) - indices[1:] = np.array(nAA_list) - 1 + indices = np.zeros(len(n_aa_list) + 1, dtype=np.int64) + indices[1:] = np.array(n_aa_list) - 1 indices = np.cumsum(indices) df[LibPsmDfCols.FRAG_START_IDX] = indices[:-1] @@ -302,57 +301,3 @@ def _post_process( # legacy SWATHLibraryReader = LibraryReaderBase - - -class LibraryReaderFromRawData(SpecLibBase): - def __init__( - self, - charged_frag_types: typing.Optional[typing.List[str]] = None, - precursor_mz_min: float = 400, - precursor_mz_max: float = 2000, - decoy: typing.Optional[str] = None, - **kwargs, - ): - if charged_frag_types is None: - charged_frag_types = [ - "b_z1", - "b_z2", - "y_z1", - "y_z2", - "b_modloss_z1", - "b_modloss_z2", - "y_modloss_z1", - "y_modloss_z2", - ] - super().__init__( - charged_frag_types=charged_frag_types, - precursor_mz_min=precursor_mz_min, - precursor_mz_max=precursor_mz_max, - decoy=decoy, - ) - - def import_psms(self, psm_files: list, psm_type: str) -> None: - psm_reader = psm_reader_provider.get_reader(psm_type) - if isinstance(psm_files, str): - self._precursor_df = psm_reader.import_file(psm_files) - self._psm_df = self._precursor_df - else: - psm_df_list = [] - for psm_file in psm_files: - psm_df_list.append(psm_reader.import_file(psm_file)) - self._precursor_df = pd.concat(psm_df_list, ignore_index=True) - self._psm_df = self._precursor_df - - def extract_fragments(self, raw_files: list) -> None: - """Include two steps: - 1. self.calc_fragment_mz_df() to generate self.fragment_mz_df - 2. Extract self.fragment_intensity_df from RAW files using AlphaRAW. - - Parameters - ---------- - raw_files : list - RAW file paths - - """ - self.calc_fragment_mz_df() - # TODO Use AlphaRAW to extract fragment intensities diff --git a/nbs_tests/constants/aa.ipynb b/nbs_tests/constants/aa.ipynb index 02b112cd..e3d1c22e 100644 --- a/nbs_tests/constants/aa.ipynb +++ b/nbs_tests/constants/aa.ipynb @@ -33,7 +33,12 @@ "metadata": {}, "outputs": [], "source": [ - "from alphabase.constants.aa import *" + "import numpy as np\n", + "\n", + "from alphabase.constants.aa import AA_DF, calc_AA_masses, calc_AA_masses_for_same_len_seqs, \\\n", + " calc_sequence_masses_for_same_len_seqs, calc_AA_masses_for_var_len_seqs, update_an_AA, AA_ASCII_MASS, \\\n", + " AA_Composition, replace_atoms, aa_formula\n", + "from alphabase.constants.atom import MASS_H2O" ] }, { @@ -318,9 +323,7 @@ "output_type": "execute_result" } ], - "source": [ - "AA_DF.loc[ord('A'):ord('Z'),:]" - ] + "source": "AA_DF.loc[ord('A'):ord('Z'),:]" }, { "cell_type": "markdown", diff --git a/nbs_tests/constants/atom.ipynb b/nbs_tests/constants/atom.ipynb index 24aedbe3..daa0388f 100644 --- a/nbs_tests/constants/atom.ipynb +++ b/nbs_tests/constants/atom.ipynb @@ -61,9 +61,7 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": [ - "from alphabase.constants.atom import *" - ] + "source": "from alphabase.constants.atom import parse_formula, calc_mass_from_formula" }, { "cell_type": "code", diff --git a/nbs_tests/constants/isotope.ipynb b/nbs_tests/constants/isotope.ipynb index d49c0b76..bf85075d 100644 --- a/nbs_tests/constants/isotope.ipynb +++ b/nbs_tests/constants/isotope.ipynb @@ -45,8 +45,11 @@ "metadata": {}, "outputs": [], "source": [ - "from alphabase.constants.isotope import *\n", - "from alphabase.constants.atom import *" + "import numpy as np\n", + "\n", + "from alphabase.constants.isotope import one_element_dist, formula_dist, abundance_convolution, IsotopeDistribution\n", + "from alphabase.constants.atom import CHEM_ISOTOPE_DIST, CHEM_MONO_IDX, parse_formula, CHEM_MONO_MASS, MAX_ISOTOPE_LEN, \\\n", + " EMPTY_DIST" ] }, { diff --git a/nbs_tests/constants/modification.ipynb b/nbs_tests/constants/modification.ipynb index 09e41e0e..521d78c8 100644 --- a/nbs_tests/constants/modification.ipynb +++ b/nbs_tests/constants/modification.ipynb @@ -35,8 +35,16 @@ "metadata": {}, "outputs": [], "source": [ - "from alphabase.constants.modification import *\n", - "import alphabase.constants.modification as modification" + "import os\n", + "\n", + "import numpy as np\n", + "import pandas as pd\n", + "\n", + "from alphabase.constants._const import CONST_FILE_FOLDER\n", + "import alphabase.constants.modification as modification\n", + "from alphabase.constants.modification import MOD_DF, calc_modification_mass, load_mod_df, calc_modloss_mass, \\\n", + " add_modifications_for_lower_case_AA, update_all_by_MOD_DF, MOD_INFO_DICT, add_new_modifications, MOD_Composition, \\\n", + " MOD_MASS, MOD_LOSS_IMPORTANCE" ] }, { @@ -330,9 +338,7 @@ "output_type": "execute_result" } ], - "source": [ - "modification.MOD_DF" - ] + "source": "modification.MOD_DF" }, { "cell_type": "markdown", @@ -1448,7 +1454,6 @@ } ], "source": [ - "\n", "modification.MOD_DF = modification.MOD_DF[\n", " (modification.MOD_DF['classification'].isin(['Post-translational','O-linked glycosylation','AA substitution','Multiple','Non-standard residue','Pre-translational']))\n", " & modification.MOD_DF['lower_case_AA']\n", diff --git a/nbs_tests/peptide/fragment.ipynb b/nbs_tests/peptide/fragment.ipynb index 96baf7a0..abdda314 100644 --- a/nbs_tests/peptide/fragment.ipynb +++ b/nbs_tests/peptide/fragment.ipynb @@ -10,10 +10,10 @@ ] }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "cell_type": "code", "outputs": [], + "execution_count": null, "source": [ "#| hide\n", "%reload_ext autoreload\n", @@ -26,8 +26,14 @@ "metadata": {}, "outputs": [], "source": [ - "from alphabase.peptide.fragment import *\n", - "from alphabase.peptide.precursor import *" + "import numpy as np\n", + "import pandas as pd\n", + "\n", + "from alphabase.constants.atom import calc_mass_from_formula\n", + "from alphabase.peptide.fragment import get_charged_frag_types, parse_charged_frag_type, \\\n", + " create_fragment_mz_dataframe_by_sort_precursor, get_sliced_fragment_dataframe, update_sliced_fragment_dataframe, \\\n", + " create_fragment_mz_dataframe, flatten_fragments, remove_unused_fragments, join_left, calc_fragment_cardinality\n", + "from alphabase.peptide.precursor import update_precursor_mz " ] }, { diff --git a/nbs_tests/peptide/mass_calc.ipynb b/nbs_tests/peptide/mass_calc.ipynb index 1672ffba..142a141c 100644 --- a/nbs_tests/peptide/mass_calc.ipynb +++ b/nbs_tests/peptide/mass_calc.ipynb @@ -24,10 +24,10 @@ ] }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "cell_type": "code", "outputs": [], + "execution_count": null, "source": [ "#| hide\n", "%reload_ext autoreload\n", @@ -35,12 +35,14 @@ ] }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "cell_type": "code", "outputs": [], + "execution_count": null, "source": [ - "from alphabase.peptide.mass_calc import *" + "import numpy as np\n", + "from alphabase.peptide.mass_calc import calc_b_y_and_peptide_mass, calc_b_y_and_peptide_masses_for_same_len_seqs, \\\n", + " calc_peptide_masses_for_same_len_seqs" ] }, { diff --git a/nbs_tests/peptide/precursor.ipynb b/nbs_tests/peptide/precursor.ipynb index c41f8c52..e7ec8631 100644 --- a/nbs_tests/peptide/precursor.ipynb +++ b/nbs_tests/peptide/precursor.ipynb @@ -23,7 +23,12 @@ "metadata": {}, "outputs": [], "source": [ - "from alphabase.peptide.precursor import *" + "import numpy as np\n", + "import pandas as pd\n", + "\n", + "from alphabase.constants.atom import MASS_ISOTOPE\n", + "from alphabase.peptide.precursor import calc_precursor_isotope_info, hash_precursor_df, get_mod_seq_hash, \\\n", + " get_mod_seq_charge_hash, calc_precursor_isotope_intensity, calc_precursor_isotope_intensity_mp" ] }, { diff --git a/nbs_tests/protein/fasta.ipynb b/nbs_tests/protein/fasta.ipynb index 09d84dcd..0fdc7849 100644 --- a/nbs_tests/protein/fasta.ipynb +++ b/nbs_tests/protein/fasta.ipynb @@ -30,8 +30,14 @@ } ], "source": [ - "from alphabase.protein.fasta import *\n", - "import alphabase.protein.fasta as fasta" + "import numpy as np\n", + "import pandas as pd\n", + "\n", + "import alphabase.protein.fasta as fasta\n", + "from alphabase.protein.fasta import get_uniprot_gene_name, protease_dict, Digest, get_fix_mods, get_candidate_sites, \\\n", + " get_var_mod_sites, get_var_mods_per_sites_multi_mods_on_aa, get_var_mods, get_var_mods_per_sites_single_mod_on_aa, \\\n", + " parse_term_mod, parse_labels, add_single_peptide_labeling, create_labeling_peptide_df, protein_idxes_to_names, \\\n", + " append_special_modifications, SpecLibFasta" ] }, { @@ -49,9 +55,7 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": [ - "assert 'trypsin_not_p' in protease_dict, 'trypsin_not_p not in protease_dict, why?'" - ] + "source": "assert 'trypsin_not_p' in protease_dict, 'trypsin_not_p not in protease_dict, why?'" }, { "cell_type": "code", @@ -233,7 +237,7 @@ ], "source": [ "#| hide\n", - "fasta.get_var_mods_per_sites = get_var_mods_per_sites_multi_mods_on_aa\n", + "fasta.get_var_mods_per_sites = get_var_mods_per_sites_multi_mods_on_aa # note: fasta module is patched here!\n", "seq = 'AMCMSTYK'\n", "candidate_sites = get_candidate_sites(seq, 'MSTY')\n", "mod_sites_list = get_var_mod_sites(seq, 'MSTY', 0, 3, 20)\n", @@ -305,7 +309,7 @@ ], "source": [ "#| hide\n", - "fasta.get_var_mods_per_sites = get_var_mods_per_sites_single_mod_on_aa\n", + "fasta.get_var_mods_per_sites = get_var_mods_per_sites_single_mod_on_aa # note: fasta module is patched here!\n", "seq = 'AMCMSTYK'\n", "candidate_sites = get_candidate_sites(seq, 'MSTY')\n", "mod_sites_list = get_var_mod_sites(seq, 'MSTY', 1, 3, 20)\n", diff --git a/nbs_tests/protein/test_lcp.ipynb b/nbs_tests/protein/test_lcp.ipynb index f5428247..48b1d1c2 100644 --- a/nbs_tests/protein/test_lcp.ipynb +++ b/nbs_tests/protein/test_lcp.ipynb @@ -5,9 +5,7 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": [ - "# from alphabase.protein.lcp_digest import *" - ] + "source": "# from alphabase.protein.lcp_digest import get_substring_indices" }, { "cell_type": "code", diff --git a/nbs_tests/psm_reader/alphapept_reader.ipynb b/nbs_tests/psm_reader/alphapept_reader.ipynb index d1af6d84..f905196b 100644 --- a/nbs_tests/psm_reader/alphapept_reader.ipynb +++ b/nbs_tests/psm_reader/alphapept_reader.ipynb @@ -29,7 +29,10 @@ "metadata": {}, "outputs": [], "source": [ - "from alphabase.psm_reader.alphapept_reader import *\n", + "import os\n", + "from alphabase.psm_reader.alphapept_reader import register_readers\n", + "from alphabase.psm_reader import psm_reader_yaml, psm_reader_provider\n", + "\n", "register_readers()" ] }, @@ -59,9 +62,7 @@ "output_type": "execute_result" } ], - "source": [ - "psm_reader_yaml['alphapept']['column_mapping']" - ] + "source": "psm_reader_yaml['alphapept']['column_mapping']" }, { "cell_type": "code", diff --git a/nbs_tests/psm_reader/dia_psm_reader.ipynb b/nbs_tests/psm_reader/dia_psm_reader.ipynb index 2d15c477..49bae49d 100644 --- a/nbs_tests/psm_reader/dia_psm_reader.ipynb +++ b/nbs_tests/psm_reader/dia_psm_reader.ipynb @@ -36,7 +36,10 @@ "metadata": {}, "outputs": [], "source": [ - "from alphabase.psm_reader.dia_psm_reader import *\n", + "import numpy as np\n", + "\n", + "from alphabase.psm_reader import psm_reader_yaml, psm_reader_provider\n", + "from alphabase.psm_reader.dia_psm_reader import register_readers\n", "register_readers()" ] }, @@ -1039,10 +1042,10 @@ "evalue": "", "output_type": "error", "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mAssertionError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[13], line 25\u001b[0m\n\u001b[1;32m 23\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m np\u001b[38;5;241m.\u001b[39msum(\u001b[38;5;241m~\u001b[39mdiann_reader\u001b[38;5;241m.\u001b[39mpsm_df\u001b[38;5;241m.\u001b[39mmods\u001b[38;5;241m.\u001b[39mstr\u001b[38;5;241m.\u001b[39mcontains(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mAcetyl@Any_N-term\u001b[39m\u001b[38;5;124m'\u001b[39m)) \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m4\u001b[39m\n\u001b[1;32m 24\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m np\u001b[38;5;241m.\u001b[39msum(diann_reader\u001b[38;5;241m.\u001b[39mpsm_df\u001b[38;5;241m.\u001b[39mmods\u001b[38;5;241m.\u001b[39mstr\u001b[38;5;241m.\u001b[39mcontains(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mOxidation@M\u001b[39m\u001b[38;5;124m'\u001b[39m)) \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m2\u001b[39m\n\u001b[0;32m---> 25\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28mset\u001b[39m(diann_reader\u001b[38;5;241m.\u001b[39mmodification_mapping[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mPhospho@S\u001b[39m\u001b[38;5;124m'\u001b[39m])\u001b[38;5;241m==\u001b[39m\u001b[38;5;28mset\u001b[39m([\n\u001b[1;32m 26\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mpS\u001b[39m\u001b[38;5;124m'\u001b[39m,\n\u001b[1;32m 27\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mS(ph)\u001b[39m\u001b[38;5;124m'\u001b[39m,\n\u001b[1;32m 28\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mS(UniMod:21)\u001b[39m\u001b[38;5;124m'\u001b[39m,\n\u001b[1;32m 29\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mS(Phospho (S))\u001b[39m\u001b[38;5;124m'\u001b[39m,\n\u001b[1;32m 30\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mS(Phospho (ST))\u001b[39m\u001b[38;5;124m'\u001b[39m,\n\u001b[1;32m 31\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mS(Phospho (STY))\u001b[39m\u001b[38;5;124m'\u001b[39m,\n\u001b[1;32m 32\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mS[ph]\u001b[39m\u001b[38;5;124m'\u001b[39m,\n\u001b[1;32m 33\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mS[UniMod:21]\u001b[39m\u001b[38;5;124m'\u001b[39m,\n\u001b[1;32m 34\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mS[Phospho (S)]\u001b[39m\u001b[38;5;124m'\u001b[39m,\n\u001b[1;32m 35\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mS[Phospho (ST)]\u001b[39m\u001b[38;5;124m'\u001b[39m,\n\u001b[1;32m 36\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mS[Phospho (STY)]\u001b[39m\u001b[38;5;124m'\u001b[39m])\n", - "\u001b[0;31mAssertionError\u001b[0m: " + "\u001B[0;31m---------------------------------------------------------------------------\u001B[0m", + "\u001B[0;31mAssertionError\u001B[0m Traceback (most recent call last)", + "Cell \u001B[0;32mIn[13], line 25\u001B[0m\n\u001B[1;32m 23\u001B[0m \u001B[38;5;28;01massert\u001B[39;00m np\u001B[38;5;241m.\u001B[39msum(\u001B[38;5;241m~\u001B[39mdiann_reader\u001B[38;5;241m.\u001B[39mpsm_df\u001B[38;5;241m.\u001B[39mmods\u001B[38;5;241m.\u001B[39mstr\u001B[38;5;241m.\u001B[39mcontains(\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mAcetyl@Any_N-term\u001B[39m\u001B[38;5;124m'\u001B[39m)) \u001B[38;5;241m==\u001B[39m \u001B[38;5;241m4\u001B[39m\n\u001B[1;32m 24\u001B[0m \u001B[38;5;28;01massert\u001B[39;00m np\u001B[38;5;241m.\u001B[39msum(diann_reader\u001B[38;5;241m.\u001B[39mpsm_df\u001B[38;5;241m.\u001B[39mmods\u001B[38;5;241m.\u001B[39mstr\u001B[38;5;241m.\u001B[39mcontains(\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mOxidation@M\u001B[39m\u001B[38;5;124m'\u001B[39m)) \u001B[38;5;241m==\u001B[39m \u001B[38;5;241m2\u001B[39m\n\u001B[0;32m---> 25\u001B[0m \u001B[38;5;28;01massert\u001B[39;00m \u001B[38;5;28mset\u001B[39m(diann_reader\u001B[38;5;241m.\u001B[39mmodification_mapping[\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mPhospho@S\u001B[39m\u001B[38;5;124m'\u001B[39m])\u001B[38;5;241m==\u001B[39m\u001B[38;5;28mset\u001B[39m([\n\u001B[1;32m 26\u001B[0m \u001B[38;5;124m'\u001B[39m\u001B[38;5;124mpS\u001B[39m\u001B[38;5;124m'\u001B[39m,\n\u001B[1;32m 27\u001B[0m \u001B[38;5;124m'\u001B[39m\u001B[38;5;124mS(ph)\u001B[39m\u001B[38;5;124m'\u001B[39m,\n\u001B[1;32m 28\u001B[0m \u001B[38;5;124m'\u001B[39m\u001B[38;5;124mS(UniMod:21)\u001B[39m\u001B[38;5;124m'\u001B[39m,\n\u001B[1;32m 29\u001B[0m \u001B[38;5;124m'\u001B[39m\u001B[38;5;124mS(Phospho (S))\u001B[39m\u001B[38;5;124m'\u001B[39m,\n\u001B[1;32m 30\u001B[0m \u001B[38;5;124m'\u001B[39m\u001B[38;5;124mS(Phospho (ST))\u001B[39m\u001B[38;5;124m'\u001B[39m,\n\u001B[1;32m 31\u001B[0m \u001B[38;5;124m'\u001B[39m\u001B[38;5;124mS(Phospho (STY))\u001B[39m\u001B[38;5;124m'\u001B[39m,\n\u001B[1;32m 32\u001B[0m \u001B[38;5;124m'\u001B[39m\u001B[38;5;124mS[ph]\u001B[39m\u001B[38;5;124m'\u001B[39m,\n\u001B[1;32m 33\u001B[0m \u001B[38;5;124m'\u001B[39m\u001B[38;5;124mS[UniMod:21]\u001B[39m\u001B[38;5;124m'\u001B[39m,\n\u001B[1;32m 34\u001B[0m \u001B[38;5;124m'\u001B[39m\u001B[38;5;124mS[Phospho (S)]\u001B[39m\u001B[38;5;124m'\u001B[39m,\n\u001B[1;32m 35\u001B[0m \u001B[38;5;124m'\u001B[39m\u001B[38;5;124mS[Phospho (ST)]\u001B[39m\u001B[38;5;124m'\u001B[39m,\n\u001B[1;32m 36\u001B[0m \u001B[38;5;124m'\u001B[39m\u001B[38;5;124mS[Phospho (STY)]\u001B[39m\u001B[38;5;124m'\u001B[39m])\n", + "\u001B[0;31mAssertionError\u001B[0m: " ] } ], diff --git a/nbs_tests/psm_reader/maxquant_reader.ipynb b/nbs_tests/psm_reader/maxquant_reader.ipynb index 16b5f745..63c5f8a7 100644 --- a/nbs_tests/psm_reader/maxquant_reader.ipynb +++ b/nbs_tests/psm_reader/maxquant_reader.ipynb @@ -22,7 +22,9 @@ "metadata": {}, "outputs": [], "source": [ - "from alphabase.psm_reader.maxquant_reader import *\n", + "from alphabase.psm_reader import psm_reader_yaml, psm_reader_provider\n", + "from alphabase.psm_reader.maxquant_reader import register_readers, parse_mod_seq\n", + "\n", "register_readers()" ] }, diff --git a/nbs_tests/psm_reader/msfragger_reader.ipynb b/nbs_tests/psm_reader/msfragger_reader.ipynb index b9551103..bd8a931e 100644 --- a/nbs_tests/psm_reader/msfragger_reader.ipynb +++ b/nbs_tests/psm_reader/msfragger_reader.ipynb @@ -22,7 +22,9 @@ "metadata": {}, "outputs": [], "source": [ - "from alphabase.psm_reader.msfragger_reader import *\n", + "from pyteomics import pepxml\n", + "\n", + "from alphabase.psm_reader.msfragger_reader import register_readers, MSFraggerPepXML\n", "from alphabase.peptide.fragment import create_fragment_mz_dataframe\n", "register_readers()" ] diff --git a/nbs_tests/psm_reader/pfind_reader.ipynb b/nbs_tests/psm_reader/pfind_reader.ipynb index fc392e3b..abe907e1 100644 --- a/nbs_tests/psm_reader/pfind_reader.ipynb +++ b/nbs_tests/psm_reader/pfind_reader.ipynb @@ -22,6 +22,7 @@ "metadata": {}, "outputs": [], "source": [ + "from alphabase.psm_reader import psm_reader_yaml, psm_reader_provider\n", "#| hide\n", "%reload_ext autoreload\n", "%autoreload 2" @@ -33,7 +34,7 @@ "metadata": {}, "outputs": [], "source": [ - "from alphabase.psm_reader.pfind_reader import *\n", + "from alphabase.psm_reader.pfind_reader import register_readers, parse_pfind_protein, get_pFind_mods\n", "register_readers()" ] }, @@ -85,9 +86,7 @@ "output_type": "execute_result" } ], - "source": [ - "psm_reader_yaml['pfind']['column_mapping']" - ] + "source": "psm_reader_yaml['pfind']['column_mapping']" }, { "cell_type": "markdown", diff --git a/nbs_tests/spectral_library/decoy_library.ipynb b/nbs_tests/spectral_library/decoy_library.ipynb index 94093772..c853efff 100644 --- a/nbs_tests/spectral_library/decoy_library.ipynb +++ b/nbs_tests/spectral_library/decoy_library.ipynb @@ -22,7 +22,8 @@ "metadata": {}, "outputs": [], "source": [ - "from alphabase.spectral_library.decoy import *" + "from alphabase.spectral_library.base import SpecLibBase\n", + "from alphabase.spectral_library.decoy import decoy_lib_provider " ] }, { diff --git a/nbs_tests/spectral_library/flat_library.ipynb b/nbs_tests/spectral_library/flat_library.ipynb index 6f80b275..9f65d848 100644 --- a/nbs_tests/spectral_library/flat_library.ipynb +++ b/nbs_tests/spectral_library/flat_library.ipynb @@ -15,7 +15,11 @@ "metadata": {}, "outputs": [], "source": [ - "from alphabase.spectral_library.flat import *" + "import numpy as np\n", + "import pandas as pd\n", + "\n", + "from alphabase.spectral_library.base import SpecLibBase\n", + "from alphabase.spectral_library.flat import SpecLibFlat " ] }, { diff --git a/nbs_tests/spectral_library/library_reader.ipynb b/nbs_tests/spectral_library/library_reader.ipynb index 67d00bf6..1723b54c 100644 --- a/nbs_tests/spectral_library/library_reader.ipynb +++ b/nbs_tests/spectral_library/library_reader.ipynb @@ -38,7 +38,10 @@ } ], "source": [ - "from alphabase.spectral_library.reader import *" + "import numpy as np\n", + "import pandas as pd\n", + "\n", + "from alphabase.spectral_library.reader import LibraryReaderBase " ] }, { diff --git a/nbs_tests/spectral_library/translate.ipynb b/nbs_tests/spectral_library/translate.ipynb index 89915061..99414857 100644 --- a/nbs_tests/spectral_library/translate.ipynb +++ b/nbs_tests/spectral_library/translate.ipynb @@ -29,7 +29,11 @@ "metadata": {}, "outputs": [], "source": [ - "from alphabase.spectral_library.translate import *" + "import numpy as np\n", + "import pandas as pd\n", + "\n", + "from alphabase.spectral_library.base import SpecLibBase\n", + "from alphabase.spectral_library.translate import create_modified_sequence, speclib_to_single_df, translate_to_tsv" ] }, { diff --git a/ruff-lint-psm-readers.toml b/ruff-lint-psm-readers.toml index 814c380d..b40ce591 100644 --- a/ruff-lint-psm-readers.toml +++ b/ruff-lint-psm-readers.toml @@ -6,8 +6,6 @@ select = [ # TODO excluding explicity is not great but it is a workaround for now exclude = [ - "**/psm_reader/*.py", # TODO remove - "**/spectral_library/reader.py", # TODO remove "**/*.ipynb", "**/tests/*", "setup.py", @@ -34,6 +32,14 @@ ignore = [ # "PD002", #pandas-use-of-inplace-argument # TODO revisit + # these still need to be resolved: + "D", "ANN", "ARG002", + "N803", # argument name should be lowercase + "FBT002", "FBT001", # Boolean default positional argument in function definition + + # reader-specific + "FA100", # Add `from __future__ import annotations` to simplify `typing.Optional` + # same as pyproject.toml "E501", # Line too long (ruff wraps code, but not docstrings) "B028", # No explicit `stacklevel` keyword argument found (for warnings)