diff --git a/alphabase/spectral_library/base.py b/alphabase/spectral_library/base.py index 671b333f..5361bd45 100644 --- a/alphabase/spectral_library/base.py +++ b/alphabase/spectral_library/base.py @@ -193,6 +193,7 @@ def append( other: "SpecLibBase", dfs_to_append: typing.List[str] = [ "_precursor_df", + "_fragment_df", "_fragment_intensity_df", "_fragment_mz_df", "_fragment_intensity_predicted_df", @@ -224,6 +225,7 @@ def append( None """ + if remove_unused_dfs: current_frag_dfs = self.available_dense_fragment_dfs() for attr in current_frag_dfs: @@ -263,15 +265,16 @@ def check_matching_columns(df1, df2): else: matching_columns.append([]) - n_fragments = [] + n_dense_fragments = [] + # get subset of dfs_to_append starting with _fragment for attr in dfs_to_append: - if attr.startswith("_fragment") and hasattr(self, attr): + if attr in self.available_dense_fragment_dfs() and hasattr(self, attr): n_current_fragments = len(getattr(self, attr)) if n_current_fragments > 0: - n_fragments += [n_current_fragments] + n_dense_fragments += [n_current_fragments] - if not np.all(np.array(n_fragments) == n_fragments[0]): + if len(set(n_dense_fragments)) > 1: raise ValueError( "The libraries can't be appended as the number of fragments in the current libraries are not the same." ) @@ -284,19 +287,44 @@ def check_matching_columns(df1, df2): other_df = getattr(other, attr)[column].copy() if attr.startswith("_precursor"): - frag_idx_increment = 0 - for fragment_df in ["_fragment_intensity_df", "_fragment_mz_df"]: - if ( - hasattr(self, fragment_df) - and len(getattr(self, fragment_df)) > 0 - ): - frag_idx_increment = len(getattr(self, fragment_df)) - - if "frag_start_idx" in other_df.columns: - other_df["frag_start_idx"] += frag_idx_increment - - if "frag_stop_idx" in other_df.columns: - other_df["frag_stop_idx"] += frag_idx_increment + # we iterate over the types of fragment dataframes + fragment_df_mapping = { + # dense fragment dataframes + "": ["_fragment_intensity_df", "_fragment_mz_df"], + # flat fragment dataframes + "flat_": ["_fragment_df"], + } + + # Update indices for each fragment dataframe type + for prefix, fragment_df_list in fragment_df_mapping.items(): + # obtain frag_idx_increment and check if it is the same for all fragment dataframes + # an increment of 0 is allowed, but if not 0, it must be the same for all dense fragment dataframes + frag_idx_increment = 0 + for fragment_df in fragment_df_list: + if ( + hasattr(self, fragment_df) + and len(getattr(self, fragment_df)) > 0 + ): + if ( + frag_idx_increment != 0 + and len(getattr(self, fragment_df)) != 0 + and frag_idx_increment + != len(getattr(self, fragment_df)) + ): + raise ValueError( + f"The number of fragments in the {fragment_df} dataframe must be the same as in all other dense fragment dataframes" + ) + else: + frag_idx_increment = len(getattr(self, fragment_df)) + + # update the indices + start_col = f"{prefix}frag_start_idx" + stop_col = f"{prefix}frag_stop_idx" + + if start_col in other_df.columns: + other_df[start_col] += frag_idx_increment + if stop_col in other_df.columns: + other_df[stop_col] += frag_idx_increment setattr( self, diff --git a/alphabase/spectral_library/flat.py b/alphabase/spectral_library/flat.py index 62e9db7c..e1944c05 100644 --- a/alphabase/spectral_library/flat.py +++ b/alphabase/spectral_library/flat.py @@ -81,12 +81,6 @@ def protein_df(self) -> pd.DataFrame: """Protein dataframe""" return self._protein_df - def available_dense_fragment_dfs(self): - """Return the available dense fragment dataframes. - This method is inherited from :class:`SpecLibBase` and will return an empty list for a flat library. - """ - return [] - def remove_unused_fragments(self): """Remove unused fragments from fragment_df. This method is inherited from :class:`SpecLibBase` and has not been implemented for a flat library. diff --git a/nbs_tests/spectral_library/flat_library.ipynb b/nbs_tests/spectral_library/flat_library.ipynb index d371863e..6f80b275 100644 --- a/nbs_tests/spectral_library/flat_library.ipynb +++ b/nbs_tests/spectral_library/flat_library.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -11,24 +11,16 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "OMP: Info #276: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.\n" - ] - } - ], + "outputs": [], "source": [ "from alphabase.spectral_library.flat import *" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -41,341 +33,7 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|██████████| 6/6 [00:00<00:00, 2663.05it/s]\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
mzintensitytype
0609.3007810.450362121
1511.3238531.000000121
2510.2323300.106543121
3412.2554320.374123121
4411.1639400.069116121
5313.1870420.173858121
6321.6947020.515072121
7545.3293460.745664121
8326.1710510.14370398
9432.2452701.000000121
10397.2081600.09488898
11361.2081600.377585121
12496.2765810.05498098
13686.2756350.103734121
14588.2987671.000000121
15349.1717830.092058121
16661.2956540.198974121
17563.3187870.774316121
18494.2973021.000000121
19256.1291810.64971598
20347.2288820.882733121
21403.1976010.35178198
22490.2296140.40047498
23701.2905880.24435098
24603.3136600.63100098
25762.2583010.084908121
26664.2814330.328738121
27497.2830510.284129121
28496.1915280.276969121
29268.1655580.05755498
30398.2146300.262853121
31267.0740360.08743998
32329.1931761.000000121
33435.1639400.06162798
34698.3120730.602346121
35600.3351441.000000121
36611.2800290.141106121
37513.3031010.705295121
38498.1959530.108914121
39400.2190550.279959121
40331.1976010.492018121
\n", - "
" - ], - "text/plain": [ - " mz intensity type\n", - "0 609.300781 0.450362 121\n", - "1 511.323853 1.000000 121\n", - "2 510.232330 0.106543 121\n", - "3 412.255432 0.374123 121\n", - "4 411.163940 0.069116 121\n", - "5 313.187042 0.173858 121\n", - "6 321.694702 0.515072 121\n", - "7 545.329346 0.745664 121\n", - "8 326.171051 0.143703 98\n", - "9 432.245270 1.000000 121\n", - "10 397.208160 0.094888 98\n", - "11 361.208160 0.377585 121\n", - "12 496.276581 0.054980 98\n", - "13 686.275635 0.103734 121\n", - "14 588.298767 1.000000 121\n", - "15 349.171783 0.092058 121\n", - "16 661.295654 0.198974 121\n", - "17 563.318787 0.774316 121\n", - "18 494.297302 1.000000 121\n", - "19 256.129181 0.649715 98\n", - "20 347.228882 0.882733 121\n", - "21 403.197601 0.351781 98\n", - "22 490.229614 0.400474 98\n", - "23 701.290588 0.244350 98\n", - "24 603.313660 0.631000 98\n", - "25 762.258301 0.084908 121\n", - "26 664.281433 0.328738 121\n", - "27 497.283051 0.284129 121\n", - "28 496.191528 0.276969 121\n", - "29 268.165558 0.057554 98\n", - "30 398.214630 0.262853 121\n", - "31 267.074036 0.087439 98\n", - "32 329.193176 1.000000 121\n", - "33 435.163940 0.061627 98\n", - "34 698.312073 0.602346 121\n", - "35 600.335144 1.000000 121\n", - "36 611.280029 0.141106 121\n", - "37 513.303101 0.705295 121\n", - "38 498.195953 0.108914 121\n", - "39 400.219055 0.279959 121\n", - "40 331.197601 0.492018 121" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "#| hide\n", "tsv_str = \"\"\"PrecursorCharge\tModifiedPeptide\tStrippedPeptide\tiRT\tLabeledPeptide\tPrecursorMz\tFragmentLossType\tFragmentNumber\tFragmentType\tFragmentCharge\tFragmentMz\tRelativeIntensity\tIonMobility\n", @@ -439,182 +97,7 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
nAAsequencechargemobilityprecursor_mzrtmodsmod_sitesfrag_start_idxfrag_stop_idxrt_normccsflat_frag_start_idxflat_frag_stop_idx
07AVVVSPK20.9390.206779-22.849740Phospho@S5060.075327366.85887706
17DPLAVDK20.9379.208161-15.0871006120.199375367.043100613
27MGSLDSK20.9409.161712-27.563500Phospho@S312180.000000366.5644381316
37SVSFSLK10.9847.39611235.014110Phospho@S318241.000000183.1781711625
47VSVSPGR20.9431.167001-23.930850Phospho@S;Phospho@S2;424300.058050366.2548332534
57YSLSPSK20.9431.191326-6.428198Phospho@S430360.337745366.2545093441
\n", - "
" - ], - "text/plain": [ - " nAA sequence charge mobility precursor_mz rt \\\n", - "0 7 AVVVSPK 2 0.9 390.206779 -22.849740 \n", - "1 7 DPLAVDK 2 0.9 379.208161 -15.087100 \n", - "2 7 MGSLDSK 2 0.9 409.161712 -27.563500 \n", - "3 7 SVSFSLK 1 0.9 847.396112 35.014110 \n", - "4 7 VSVSPGR 2 0.9 431.167001 -23.930850 \n", - "5 7 YSLSPSK 2 0.9 431.191326 -6.428198 \n", - "\n", - " mods mod_sites frag_start_idx frag_stop_idx rt_norm \\\n", - "0 Phospho@S 5 0 6 0.075327 \n", - "1 6 12 0.199375 \n", - "2 Phospho@S 3 12 18 0.000000 \n", - "3 Phospho@S 3 18 24 1.000000 \n", - "4 Phospho@S;Phospho@S 2;4 24 30 0.058050 \n", - "5 Phospho@S 4 30 36 0.337745 \n", - "\n", - " ccs flat_frag_start_idx flat_frag_stop_idx \n", - "0 366.858877 0 6 \n", - "1 367.043100 6 13 \n", - "2 366.564438 13 16 \n", - "3 183.178171 16 25 \n", - "4 366.254833 25 34 \n", - "5 366.254509 34 41 " - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "#| hide\n", "flat_lib.precursor_df" @@ -622,7 +105,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ @@ -641,7 +124,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ @@ -665,529 +148,7 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|██████████| 6/6 [00:00<00:00, 2457.12it/s]\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
b_z1b_z2y_z1y_z2b_modloss_z1b_modloss_z2y_modloss_z1y_modloss_z2
00.0000000.0000000.0000000.0000000.0000000.00.0000000.0
10.0000000.0000000.4503620.0000000.0000000.01.0000000.0
20.0000000.0000000.1065430.0000000.0000000.00.3741230.0
30.0000000.0000000.0691160.0000000.0000000.00.1738580.0
40.0000000.0000000.0000000.0000000.0000000.00.0000000.0
50.0000000.0000000.0000000.0000000.0000000.00.0000000.0
60.0000000.0000000.0000000.5150720.0000000.00.0000000.0
70.0000000.0000000.7456640.0000000.0000000.00.0000000.0
80.1437030.0000001.0000000.0000000.0000000.00.0000000.0
90.0948880.0000000.3775850.0000000.0000000.00.0000000.0
100.0549800.0000000.0000000.0000000.0000000.00.0000000.0
110.0000000.0000000.0000000.0000000.0000000.00.0000000.0
120.0000000.0000000.1037340.0000000.0000000.01.0000000.0
130.0000000.0000000.0000000.0000000.0000000.00.0000000.0
140.0000000.0000000.0000000.0000000.0000000.00.0000000.0
150.0000000.0000000.0920580.0000000.0000000.00.0000000.0
160.0000000.0000000.0000000.0000000.0000000.00.0000000.0
170.0000000.0000000.0000000.0000000.0000000.00.0000000.0
180.0000000.0000000.0000000.0000000.0000000.00.0000000.0
190.0000000.0000000.1989740.0000000.0000000.00.7743160.0
200.0000000.0000001.0000000.0000000.6497150.00.0000000.0
210.0000000.0000000.8827330.0000000.3517810.00.0000000.0
220.0000000.0000000.0000000.0000000.4004740.00.0000000.0
230.2443500.0000000.0000000.0000000.6310000.00.0000000.0
240.0000000.0000000.0849080.0000000.0000000.00.3287380.0
250.0000000.0000000.0000000.0000000.0000000.00.2841290.0
260.0000000.0000000.2769690.0000000.0575540.00.2628530.0
270.0000000.0874391.0000000.0000000.0616270.00.0000000.0
280.0000000.0000000.0000000.0000000.0000000.00.0000000.0
290.0000000.0000000.0000000.0000000.0000000.00.0000000.0
300.0000000.0000000.6023460.0000000.0000000.01.0000000.0
310.0000000.0000000.1411060.0000000.0000000.00.7052950.0
320.0000000.0000000.1089140.0000000.0000000.00.2799590.0
330.0000000.0000000.4920180.0000000.0000000.00.0000000.0
340.0000000.0000000.0000000.0000000.0000000.00.0000000.0
350.0000000.0000000.0000000.0000000.0000000.00.0000000.0
\n", - "
" - ], - "text/plain": [ - " b_z1 b_z2 y_z1 y_z2 b_modloss_z1 b_modloss_z2 \\\n", - "0 0.000000 0.000000 0.000000 0.000000 0.000000 0.0 \n", - "1 0.000000 0.000000 0.450362 0.000000 0.000000 0.0 \n", - "2 0.000000 0.000000 0.106543 0.000000 0.000000 0.0 \n", - "3 0.000000 0.000000 0.069116 0.000000 0.000000 0.0 \n", - "4 0.000000 0.000000 0.000000 0.000000 0.000000 0.0 \n", - "5 0.000000 0.000000 0.000000 0.000000 0.000000 0.0 \n", - "6 0.000000 0.000000 0.000000 0.515072 0.000000 0.0 \n", - "7 0.000000 0.000000 0.745664 0.000000 0.000000 0.0 \n", - "8 0.143703 0.000000 1.000000 0.000000 0.000000 0.0 \n", - "9 0.094888 0.000000 0.377585 0.000000 0.000000 0.0 \n", - "10 0.054980 0.000000 0.000000 0.000000 0.000000 0.0 \n", - "11 0.000000 0.000000 0.000000 0.000000 0.000000 0.0 \n", - "12 0.000000 0.000000 0.103734 0.000000 0.000000 0.0 \n", - "13 0.000000 0.000000 0.000000 0.000000 0.000000 0.0 \n", - "14 0.000000 0.000000 0.000000 0.000000 0.000000 0.0 \n", - "15 0.000000 0.000000 0.092058 0.000000 0.000000 0.0 \n", - "16 0.000000 0.000000 0.000000 0.000000 0.000000 0.0 \n", - "17 0.000000 0.000000 0.000000 0.000000 0.000000 0.0 \n", - "18 0.000000 0.000000 0.000000 0.000000 0.000000 0.0 \n", - "19 0.000000 0.000000 0.198974 0.000000 0.000000 0.0 \n", - "20 0.000000 0.000000 1.000000 0.000000 0.649715 0.0 \n", - "21 0.000000 0.000000 0.882733 0.000000 0.351781 0.0 \n", - "22 0.000000 0.000000 0.000000 0.000000 0.400474 0.0 \n", - "23 0.244350 0.000000 0.000000 0.000000 0.631000 0.0 \n", - "24 0.000000 0.000000 0.084908 0.000000 0.000000 0.0 \n", - "25 0.000000 0.000000 0.000000 0.000000 0.000000 0.0 \n", - "26 0.000000 0.000000 0.276969 0.000000 0.057554 0.0 \n", - "27 0.000000 0.087439 1.000000 0.000000 0.061627 0.0 \n", - "28 0.000000 0.000000 0.000000 0.000000 0.000000 0.0 \n", - "29 0.000000 0.000000 0.000000 0.000000 0.000000 0.0 \n", - "30 0.000000 0.000000 0.602346 0.000000 0.000000 0.0 \n", - "31 0.000000 0.000000 0.141106 0.000000 0.000000 0.0 \n", - "32 0.000000 0.000000 0.108914 0.000000 0.000000 0.0 \n", - "33 0.000000 0.000000 0.492018 0.000000 0.000000 0.0 \n", - "34 0.000000 0.000000 0.000000 0.000000 0.000000 0.0 \n", - "35 0.000000 0.000000 0.000000 0.000000 0.000000 0.0 \n", - "\n", - " y_modloss_z1 y_modloss_z2 \n", - "0 0.000000 0.0 \n", - "1 1.000000 0.0 \n", - "2 0.374123 0.0 \n", - "3 0.173858 0.0 \n", - "4 0.000000 0.0 \n", - "5 0.000000 0.0 \n", - "6 0.000000 0.0 \n", - "7 0.000000 0.0 \n", - "8 0.000000 0.0 \n", - "9 0.000000 0.0 \n", - "10 0.000000 0.0 \n", - "11 0.000000 0.0 \n", - "12 1.000000 0.0 \n", - "13 0.000000 0.0 \n", - "14 0.000000 0.0 \n", - "15 0.000000 0.0 \n", - "16 0.000000 0.0 \n", - "17 0.000000 0.0 \n", - "18 0.000000 0.0 \n", - "19 0.774316 0.0 \n", - "20 0.000000 0.0 \n", - "21 0.000000 0.0 \n", - "22 0.000000 0.0 \n", - "23 0.000000 0.0 \n", - "24 0.328738 0.0 \n", - "25 0.284129 0.0 \n", - "26 0.262853 0.0 \n", - "27 0.000000 0.0 \n", - "28 0.000000 0.0 \n", - "29 0.000000 0.0 \n", - "30 1.000000 0.0 \n", - "31 0.705295 0.0 \n", - "32 0.279959 0.0 \n", - "33 0.000000 0.0 \n", - "34 0.000000 0.0 \n", - "35 0.000000 0.0 " - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "target = LibraryReaderBase()\n", "target.import_file(StringIO(tsv_str))\n", @@ -1198,100 +159,7 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
mzintensitytypeloss_typechargeposition
0609.3007810.450362121011
1511.3238531.0000001219811
2510.2323300.106543121012
3412.2554320.3741231219812
4411.1639400.069116121013
\n", - "
" - ], - "text/plain": [ - " mz intensity type loss_type charge position\n", - "0 609.300781 0.450362 121 0 1 1\n", - "1 511.323853 1.000000 121 98 1 1\n", - "2 510.232330 0.106543 121 0 1 2\n", - "3 412.255432 0.374123 121 98 1 2\n", - "4 411.163940 0.069116 121 0 1 3" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# Flatten original library to use it as a test input with target the original library\n", "flat_lib = SpecLibFlat(custom_fragment_df_columns=['type','charge','position','loss_type'])\n", @@ -1303,100 +171,7 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
y_z2b_z2y_modloss_z1b_z1y_z1b_modloss_z1
00.00.00.0000000.00.0000000.0
10.00.01.0000000.00.4503620.0
20.00.00.3741230.00.1065430.0
30.00.00.1738580.00.0691160.0
40.00.00.0000000.00.0000000.0
\n", - "
" - ], - "text/plain": [ - " y_z2 b_z2 y_modloss_z1 b_z1 y_z1 b_modloss_z1\n", - "0 0.0 0.0 0.000000 0.0 0.000000 0.0\n", - "1 0.0 0.0 1.000000 0.0 0.450362 0.0\n", - "2 0.0 0.0 0.374123 0.0 0.106543 0.0\n", - "3 0.0 0.0 0.173858 0.0 0.069116 0.0\n", - "4 0.0 0.0 0.000000 0.0 0.000000 0.0" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "back_to_base = flat_lib.to_SpecLibBase()\n", "back_to_base.fragment_intensity_df.head()" @@ -1406,25 +181,14 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['_fragment_intensity_df', '_fragment_mz_df']" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "back_to_base.available_dense_fragment_dfs()" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "metadata": {}, "outputs": [], "source": [ @@ -1439,7 +203,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "metadata": {}, "outputs": [], "source": [ @@ -1450,7 +214,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "metadata": {}, "outputs": [], "source": [ @@ -1461,17 +225,62 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 15, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "# enable appending of flat libraries while making sure the flat_frag_start_idx is updated\n", + "\n", + "lib1 = SpecLibBase()\n", + "lib1.precursor_df = pd.DataFrame({\n", + " 'sequence': ['PEPTI', 'SEQUE'],\n", + " 'charge': [2, 3],\n", + " 'mod_sites': ['', ''],\n", + " 'mods': ['', ''],\n", + "})\n", + "lib1.calc_precursor_mz()\n", + "lib1.calc_fragment_mz_df()\n", + "lib1._fragment_intensity_df = lib1.fragment_mz_df.copy()\n", + "lib1._fragment_intensity_df.iloc[:] = 0\n", + "lib2 = lib1.copy()\n", + "lib2._fragment_intensity_df.iloc[:] = 1\n", + "\n", + "flatlib_1 = SpecLibFlat(min_fragment_intensity=0)\n", + "flatlib_1.parse_base_library(lib1, keep_original_frag_dfs=True)\n", + "flatlib_2 = SpecLibFlat(min_fragment_intensity=0)\n", + "flatlib_2.parse_base_library(lib2, keep_original_frag_dfs=True)\n", + "\n", + "flatlib_1.append(flatlib_2)\n", + "\n", + "assert flatlib_1.precursor_df.shape[0] == 4\n", + "assert flatlib_1.fragment_mz_df.shape[0] == 16\n", + "assert flatlib_1.fragment_df.shape[0] == np.prod(flatlib_1.fragment_mz_df.shape)\n", + "\n", + "assert np.all(flatlib_1.precursor_df['frag_start_idx'] == [0, 4, 8, 12])\n", + "assert np.all(flatlib_1.precursor_df['flat_frag_start_idx'] == [0, 16, 32, 48])\n", + "\n", + "assert np.all(flatlib_1.fragment_df['intensity'] == np.repeat([0, 1], 32))\n", + "assert np.all(flatlib_1.fragment_intensity_df.values.flatten() == np.repeat([0, 1], 32))" + ] } ], "metadata": { "kernelspec": { - "display_name": "python3", + "display_name": "metaptcm", "language": "python", "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.5" } }, "nbformat": 4,