diff --git a/alphabase/spectral_library/base.py b/alphabase/spectral_library/base.py
index 671b333f..5361bd45 100644
--- a/alphabase/spectral_library/base.py
+++ b/alphabase/spectral_library/base.py
@@ -193,6 +193,7 @@ def append(
other: "SpecLibBase",
dfs_to_append: typing.List[str] = [
"_precursor_df",
+ "_fragment_df",
"_fragment_intensity_df",
"_fragment_mz_df",
"_fragment_intensity_predicted_df",
@@ -224,6 +225,7 @@ def append(
None
"""
+
if remove_unused_dfs:
current_frag_dfs = self.available_dense_fragment_dfs()
for attr in current_frag_dfs:
@@ -263,15 +265,16 @@ def check_matching_columns(df1, df2):
else:
matching_columns.append([])
- n_fragments = []
+ n_dense_fragments = []
+
# get subset of dfs_to_append starting with _fragment
for attr in dfs_to_append:
- if attr.startswith("_fragment") and hasattr(self, attr):
+ if attr in self.available_dense_fragment_dfs() and hasattr(self, attr):
n_current_fragments = len(getattr(self, attr))
if n_current_fragments > 0:
- n_fragments += [n_current_fragments]
+ n_dense_fragments += [n_current_fragments]
- if not np.all(np.array(n_fragments) == n_fragments[0]):
+ if len(set(n_dense_fragments)) > 1:
raise ValueError(
"The libraries can't be appended as the number of fragments in the current libraries are not the same."
)
@@ -284,19 +287,44 @@ def check_matching_columns(df1, df2):
other_df = getattr(other, attr)[column].copy()
if attr.startswith("_precursor"):
- frag_idx_increment = 0
- for fragment_df in ["_fragment_intensity_df", "_fragment_mz_df"]:
- if (
- hasattr(self, fragment_df)
- and len(getattr(self, fragment_df)) > 0
- ):
- frag_idx_increment = len(getattr(self, fragment_df))
-
- if "frag_start_idx" in other_df.columns:
- other_df["frag_start_idx"] += frag_idx_increment
-
- if "frag_stop_idx" in other_df.columns:
- other_df["frag_stop_idx"] += frag_idx_increment
+ # we iterate over the types of fragment dataframes
+ fragment_df_mapping = {
+ # dense fragment dataframes
+ "": ["_fragment_intensity_df", "_fragment_mz_df"],
+ # flat fragment dataframes
+ "flat_": ["_fragment_df"],
+ }
+
+ # Update indices for each fragment dataframe type
+ for prefix, fragment_df_list in fragment_df_mapping.items():
+ # obtain frag_idx_increment and check if it is the same for all fragment dataframes
+ # an increment of 0 is allowed, but if not 0, it must be the same for all dense fragment dataframes
+ frag_idx_increment = 0
+ for fragment_df in fragment_df_list:
+ if (
+ hasattr(self, fragment_df)
+ and len(getattr(self, fragment_df)) > 0
+ ):
+ if (
+ frag_idx_increment != 0
+ and len(getattr(self, fragment_df)) != 0
+ and frag_idx_increment
+ != len(getattr(self, fragment_df))
+ ):
+ raise ValueError(
+ f"The number of fragments in the {fragment_df} dataframe must be the same as in all other dense fragment dataframes"
+ )
+ else:
+ frag_idx_increment = len(getattr(self, fragment_df))
+
+ # update the indices
+ start_col = f"{prefix}frag_start_idx"
+ stop_col = f"{prefix}frag_stop_idx"
+
+ if start_col in other_df.columns:
+ other_df[start_col] += frag_idx_increment
+ if stop_col in other_df.columns:
+ other_df[stop_col] += frag_idx_increment
setattr(
self,
diff --git a/alphabase/spectral_library/flat.py b/alphabase/spectral_library/flat.py
index 62e9db7c..e1944c05 100644
--- a/alphabase/spectral_library/flat.py
+++ b/alphabase/spectral_library/flat.py
@@ -81,12 +81,6 @@ def protein_df(self) -> pd.DataFrame:
"""Protein dataframe"""
return self._protein_df
- def available_dense_fragment_dfs(self):
- """Return the available dense fragment dataframes.
- This method is inherited from :class:`SpecLibBase` and will return an empty list for a flat library.
- """
- return []
-
def remove_unused_fragments(self):
"""Remove unused fragments from fragment_df.
This method is inherited from :class:`SpecLibBase` and has not been implemented for a flat library.
diff --git a/nbs_tests/spectral_library/flat_library.ipynb b/nbs_tests/spectral_library/flat_library.ipynb
index d371863e..6f80b275 100644
--- a/nbs_tests/spectral_library/flat_library.ipynb
+++ b/nbs_tests/spectral_library/flat_library.ipynb
@@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
@@ -11,24 +11,16 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 2,
"metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "OMP: Info #276: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"from alphabase.spectral_library.flat import *"
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
@@ -41,341 +33,7 @@
"cell_type": "code",
"execution_count": null,
"metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "100%|██████████| 6/6 [00:00<00:00, 2663.05it/s]\n"
- ]
- },
- {
- "data": {
- "text/html": [
- "
\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " mz | \n",
- " intensity | \n",
- " type | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " 609.300781 | \n",
- " 0.450362 | \n",
- " 121 | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " 511.323853 | \n",
- " 1.000000 | \n",
- " 121 | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " 510.232330 | \n",
- " 0.106543 | \n",
- " 121 | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " 412.255432 | \n",
- " 0.374123 | \n",
- " 121 | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " 411.163940 | \n",
- " 0.069116 | \n",
- " 121 | \n",
- "
\n",
- " \n",
- " 5 | \n",
- " 313.187042 | \n",
- " 0.173858 | \n",
- " 121 | \n",
- "
\n",
- " \n",
- " 6 | \n",
- " 321.694702 | \n",
- " 0.515072 | \n",
- " 121 | \n",
- "
\n",
- " \n",
- " 7 | \n",
- " 545.329346 | \n",
- " 0.745664 | \n",
- " 121 | \n",
- "
\n",
- " \n",
- " 8 | \n",
- " 326.171051 | \n",
- " 0.143703 | \n",
- " 98 | \n",
- "
\n",
- " \n",
- " 9 | \n",
- " 432.245270 | \n",
- " 1.000000 | \n",
- " 121 | \n",
- "
\n",
- " \n",
- " 10 | \n",
- " 397.208160 | \n",
- " 0.094888 | \n",
- " 98 | \n",
- "
\n",
- " \n",
- " 11 | \n",
- " 361.208160 | \n",
- " 0.377585 | \n",
- " 121 | \n",
- "
\n",
- " \n",
- " 12 | \n",
- " 496.276581 | \n",
- " 0.054980 | \n",
- " 98 | \n",
- "
\n",
- " \n",
- " 13 | \n",
- " 686.275635 | \n",
- " 0.103734 | \n",
- " 121 | \n",
- "
\n",
- " \n",
- " 14 | \n",
- " 588.298767 | \n",
- " 1.000000 | \n",
- " 121 | \n",
- "
\n",
- " \n",
- " 15 | \n",
- " 349.171783 | \n",
- " 0.092058 | \n",
- " 121 | \n",
- "
\n",
- " \n",
- " 16 | \n",
- " 661.295654 | \n",
- " 0.198974 | \n",
- " 121 | \n",
- "
\n",
- " \n",
- " 17 | \n",
- " 563.318787 | \n",
- " 0.774316 | \n",
- " 121 | \n",
- "
\n",
- " \n",
- " 18 | \n",
- " 494.297302 | \n",
- " 1.000000 | \n",
- " 121 | \n",
- "
\n",
- " \n",
- " 19 | \n",
- " 256.129181 | \n",
- " 0.649715 | \n",
- " 98 | \n",
- "
\n",
- " \n",
- " 20 | \n",
- " 347.228882 | \n",
- " 0.882733 | \n",
- " 121 | \n",
- "
\n",
- " \n",
- " 21 | \n",
- " 403.197601 | \n",
- " 0.351781 | \n",
- " 98 | \n",
- "
\n",
- " \n",
- " 22 | \n",
- " 490.229614 | \n",
- " 0.400474 | \n",
- " 98 | \n",
- "
\n",
- " \n",
- " 23 | \n",
- " 701.290588 | \n",
- " 0.244350 | \n",
- " 98 | \n",
- "
\n",
- " \n",
- " 24 | \n",
- " 603.313660 | \n",
- " 0.631000 | \n",
- " 98 | \n",
- "
\n",
- " \n",
- " 25 | \n",
- " 762.258301 | \n",
- " 0.084908 | \n",
- " 121 | \n",
- "
\n",
- " \n",
- " 26 | \n",
- " 664.281433 | \n",
- " 0.328738 | \n",
- " 121 | \n",
- "
\n",
- " \n",
- " 27 | \n",
- " 497.283051 | \n",
- " 0.284129 | \n",
- " 121 | \n",
- "
\n",
- " \n",
- " 28 | \n",
- " 496.191528 | \n",
- " 0.276969 | \n",
- " 121 | \n",
- "
\n",
- " \n",
- " 29 | \n",
- " 268.165558 | \n",
- " 0.057554 | \n",
- " 98 | \n",
- "
\n",
- " \n",
- " 30 | \n",
- " 398.214630 | \n",
- " 0.262853 | \n",
- " 121 | \n",
- "
\n",
- " \n",
- " 31 | \n",
- " 267.074036 | \n",
- " 0.087439 | \n",
- " 98 | \n",
- "
\n",
- " \n",
- " 32 | \n",
- " 329.193176 | \n",
- " 1.000000 | \n",
- " 121 | \n",
- "
\n",
- " \n",
- " 33 | \n",
- " 435.163940 | \n",
- " 0.061627 | \n",
- " 98 | \n",
- "
\n",
- " \n",
- " 34 | \n",
- " 698.312073 | \n",
- " 0.602346 | \n",
- " 121 | \n",
- "
\n",
- " \n",
- " 35 | \n",
- " 600.335144 | \n",
- " 1.000000 | \n",
- " 121 | \n",
- "
\n",
- " \n",
- " 36 | \n",
- " 611.280029 | \n",
- " 0.141106 | \n",
- " 121 | \n",
- "
\n",
- " \n",
- " 37 | \n",
- " 513.303101 | \n",
- " 0.705295 | \n",
- " 121 | \n",
- "
\n",
- " \n",
- " 38 | \n",
- " 498.195953 | \n",
- " 0.108914 | \n",
- " 121 | \n",
- "
\n",
- " \n",
- " 39 | \n",
- " 400.219055 | \n",
- " 0.279959 | \n",
- " 121 | \n",
- "
\n",
- " \n",
- " 40 | \n",
- " 331.197601 | \n",
- " 0.492018 | \n",
- " 121 | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " mz intensity type\n",
- "0 609.300781 0.450362 121\n",
- "1 511.323853 1.000000 121\n",
- "2 510.232330 0.106543 121\n",
- "3 412.255432 0.374123 121\n",
- "4 411.163940 0.069116 121\n",
- "5 313.187042 0.173858 121\n",
- "6 321.694702 0.515072 121\n",
- "7 545.329346 0.745664 121\n",
- "8 326.171051 0.143703 98\n",
- "9 432.245270 1.000000 121\n",
- "10 397.208160 0.094888 98\n",
- "11 361.208160 0.377585 121\n",
- "12 496.276581 0.054980 98\n",
- "13 686.275635 0.103734 121\n",
- "14 588.298767 1.000000 121\n",
- "15 349.171783 0.092058 121\n",
- "16 661.295654 0.198974 121\n",
- "17 563.318787 0.774316 121\n",
- "18 494.297302 1.000000 121\n",
- "19 256.129181 0.649715 98\n",
- "20 347.228882 0.882733 121\n",
- "21 403.197601 0.351781 98\n",
- "22 490.229614 0.400474 98\n",
- "23 701.290588 0.244350 98\n",
- "24 603.313660 0.631000 98\n",
- "25 762.258301 0.084908 121\n",
- "26 664.281433 0.328738 121\n",
- "27 497.283051 0.284129 121\n",
- "28 496.191528 0.276969 121\n",
- "29 268.165558 0.057554 98\n",
- "30 398.214630 0.262853 121\n",
- "31 267.074036 0.087439 98\n",
- "32 329.193176 1.000000 121\n",
- "33 435.163940 0.061627 98\n",
- "34 698.312073 0.602346 121\n",
- "35 600.335144 1.000000 121\n",
- "36 611.280029 0.141106 121\n",
- "37 513.303101 0.705295 121\n",
- "38 498.195953 0.108914 121\n",
- "39 400.219055 0.279959 121\n",
- "40 331.197601 0.492018 121"
- ]
- },
- "execution_count": null,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"#| hide\n",
"tsv_str = \"\"\"PrecursorCharge\tModifiedPeptide\tStrippedPeptide\tiRT\tLabeledPeptide\tPrecursorMz\tFragmentLossType\tFragmentNumber\tFragmentType\tFragmentCharge\tFragmentMz\tRelativeIntensity\tIonMobility\n",
@@ -439,182 +97,7 @@
"cell_type": "code",
"execution_count": null,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " nAA | \n",
- " sequence | \n",
- " charge | \n",
- " mobility | \n",
- " precursor_mz | \n",
- " rt | \n",
- " mods | \n",
- " mod_sites | \n",
- " frag_start_idx | \n",
- " frag_stop_idx | \n",
- " rt_norm | \n",
- " ccs | \n",
- " flat_frag_start_idx | \n",
- " flat_frag_stop_idx | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " 7 | \n",
- " AVVVSPK | \n",
- " 2 | \n",
- " 0.9 | \n",
- " 390.206779 | \n",
- " -22.849740 | \n",
- " Phospho@S | \n",
- " 5 | \n",
- " 0 | \n",
- " 6 | \n",
- " 0.075327 | \n",
- " 366.858877 | \n",
- " 0 | \n",
- " 6 | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " 7 | \n",
- " DPLAVDK | \n",
- " 2 | \n",
- " 0.9 | \n",
- " 379.208161 | \n",
- " -15.087100 | \n",
- " | \n",
- " | \n",
- " 6 | \n",
- " 12 | \n",
- " 0.199375 | \n",
- " 367.043100 | \n",
- " 6 | \n",
- " 13 | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " 7 | \n",
- " MGSLDSK | \n",
- " 2 | \n",
- " 0.9 | \n",
- " 409.161712 | \n",
- " -27.563500 | \n",
- " Phospho@S | \n",
- " 3 | \n",
- " 12 | \n",
- " 18 | \n",
- " 0.000000 | \n",
- " 366.564438 | \n",
- " 13 | \n",
- " 16 | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " 7 | \n",
- " SVSFSLK | \n",
- " 1 | \n",
- " 0.9 | \n",
- " 847.396112 | \n",
- " 35.014110 | \n",
- " Phospho@S | \n",
- " 3 | \n",
- " 18 | \n",
- " 24 | \n",
- " 1.000000 | \n",
- " 183.178171 | \n",
- " 16 | \n",
- " 25 | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " 7 | \n",
- " VSVSPGR | \n",
- " 2 | \n",
- " 0.9 | \n",
- " 431.167001 | \n",
- " -23.930850 | \n",
- " Phospho@S;Phospho@S | \n",
- " 2;4 | \n",
- " 24 | \n",
- " 30 | \n",
- " 0.058050 | \n",
- " 366.254833 | \n",
- " 25 | \n",
- " 34 | \n",
- "
\n",
- " \n",
- " 5 | \n",
- " 7 | \n",
- " YSLSPSK | \n",
- " 2 | \n",
- " 0.9 | \n",
- " 431.191326 | \n",
- " -6.428198 | \n",
- " Phospho@S | \n",
- " 4 | \n",
- " 30 | \n",
- " 36 | \n",
- " 0.337745 | \n",
- " 366.254509 | \n",
- " 34 | \n",
- " 41 | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " nAA sequence charge mobility precursor_mz rt \\\n",
- "0 7 AVVVSPK 2 0.9 390.206779 -22.849740 \n",
- "1 7 DPLAVDK 2 0.9 379.208161 -15.087100 \n",
- "2 7 MGSLDSK 2 0.9 409.161712 -27.563500 \n",
- "3 7 SVSFSLK 1 0.9 847.396112 35.014110 \n",
- "4 7 VSVSPGR 2 0.9 431.167001 -23.930850 \n",
- "5 7 YSLSPSK 2 0.9 431.191326 -6.428198 \n",
- "\n",
- " mods mod_sites frag_start_idx frag_stop_idx rt_norm \\\n",
- "0 Phospho@S 5 0 6 0.075327 \n",
- "1 6 12 0.199375 \n",
- "2 Phospho@S 3 12 18 0.000000 \n",
- "3 Phospho@S 3 18 24 1.000000 \n",
- "4 Phospho@S;Phospho@S 2;4 24 30 0.058050 \n",
- "5 Phospho@S 4 30 36 0.337745 \n",
- "\n",
- " ccs flat_frag_start_idx flat_frag_stop_idx \n",
- "0 366.858877 0 6 \n",
- "1 367.043100 6 13 \n",
- "2 366.564438 13 16 \n",
- "3 183.178171 16 25 \n",
- "4 366.254833 25 34 \n",
- "5 366.254509 34 41 "
- ]
- },
- "execution_count": null,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"#| hide\n",
"flat_lib.precursor_df"
@@ -622,7 +105,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
@@ -641,7 +124,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
@@ -665,529 +148,7 @@
"cell_type": "code",
"execution_count": null,
"metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "100%|██████████| 6/6 [00:00<00:00, 2457.12it/s]\n"
- ]
- },
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " b_z1 | \n",
- " b_z2 | \n",
- " y_z1 | \n",
- " y_z2 | \n",
- " b_modloss_z1 | \n",
- " b_modloss_z2 | \n",
- " y_modloss_z1 | \n",
- " y_modloss_z2 | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.0 | \n",
- " 0.000000 | \n",
- " 0.0 | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.450362 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.0 | \n",
- " 1.000000 | \n",
- " 0.0 | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.106543 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.0 | \n",
- " 0.374123 | \n",
- " 0.0 | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.069116 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.0 | \n",
- " 0.173858 | \n",
- " 0.0 | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.0 | \n",
- " 0.000000 | \n",
- " 0.0 | \n",
- "
\n",
- " \n",
- " 5 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.0 | \n",
- " 0.000000 | \n",
- " 0.0 | \n",
- "
\n",
- " \n",
- " 6 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.515072 | \n",
- " 0.000000 | \n",
- " 0.0 | \n",
- " 0.000000 | \n",
- " 0.0 | \n",
- "
\n",
- " \n",
- " 7 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.745664 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.0 | \n",
- " 0.000000 | \n",
- " 0.0 | \n",
- "
\n",
- " \n",
- " 8 | \n",
- " 0.143703 | \n",
- " 0.000000 | \n",
- " 1.000000 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.0 | \n",
- " 0.000000 | \n",
- " 0.0 | \n",
- "
\n",
- " \n",
- " 9 | \n",
- " 0.094888 | \n",
- " 0.000000 | \n",
- " 0.377585 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.0 | \n",
- " 0.000000 | \n",
- " 0.0 | \n",
- "
\n",
- " \n",
- " 10 | \n",
- " 0.054980 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.0 | \n",
- " 0.000000 | \n",
- " 0.0 | \n",
- "
\n",
- " \n",
- " 11 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.0 | \n",
- " 0.000000 | \n",
- " 0.0 | \n",
- "
\n",
- " \n",
- " 12 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.103734 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.0 | \n",
- " 1.000000 | \n",
- " 0.0 | \n",
- "
\n",
- " \n",
- " 13 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.0 | \n",
- " 0.000000 | \n",
- " 0.0 | \n",
- "
\n",
- " \n",
- " 14 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.0 | \n",
- " 0.000000 | \n",
- " 0.0 | \n",
- "
\n",
- " \n",
- " 15 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.092058 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.0 | \n",
- " 0.000000 | \n",
- " 0.0 | \n",
- "
\n",
- " \n",
- " 16 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.0 | \n",
- " 0.000000 | \n",
- " 0.0 | \n",
- "
\n",
- " \n",
- " 17 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.0 | \n",
- " 0.000000 | \n",
- " 0.0 | \n",
- "
\n",
- " \n",
- " 18 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.0 | \n",
- " 0.000000 | \n",
- " 0.0 | \n",
- "
\n",
- " \n",
- " 19 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.198974 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.0 | \n",
- " 0.774316 | \n",
- " 0.0 | \n",
- "
\n",
- " \n",
- " 20 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 1.000000 | \n",
- " 0.000000 | \n",
- " 0.649715 | \n",
- " 0.0 | \n",
- " 0.000000 | \n",
- " 0.0 | \n",
- "
\n",
- " \n",
- " 21 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.882733 | \n",
- " 0.000000 | \n",
- " 0.351781 | \n",
- " 0.0 | \n",
- " 0.000000 | \n",
- " 0.0 | \n",
- "
\n",
- " \n",
- " 22 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.400474 | \n",
- " 0.0 | \n",
- " 0.000000 | \n",
- " 0.0 | \n",
- "
\n",
- " \n",
- " 23 | \n",
- " 0.244350 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.631000 | \n",
- " 0.0 | \n",
- " 0.000000 | \n",
- " 0.0 | \n",
- "
\n",
- " \n",
- " 24 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.084908 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.0 | \n",
- " 0.328738 | \n",
- " 0.0 | \n",
- "
\n",
- " \n",
- " 25 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.0 | \n",
- " 0.284129 | \n",
- " 0.0 | \n",
- "
\n",
- " \n",
- " 26 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.276969 | \n",
- " 0.000000 | \n",
- " 0.057554 | \n",
- " 0.0 | \n",
- " 0.262853 | \n",
- " 0.0 | \n",
- "
\n",
- " \n",
- " 27 | \n",
- " 0.000000 | \n",
- " 0.087439 | \n",
- " 1.000000 | \n",
- " 0.000000 | \n",
- " 0.061627 | \n",
- " 0.0 | \n",
- " 0.000000 | \n",
- " 0.0 | \n",
- "
\n",
- " \n",
- " 28 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.0 | \n",
- " 0.000000 | \n",
- " 0.0 | \n",
- "
\n",
- " \n",
- " 29 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.0 | \n",
- " 0.000000 | \n",
- " 0.0 | \n",
- "
\n",
- " \n",
- " 30 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.602346 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.0 | \n",
- " 1.000000 | \n",
- " 0.0 | \n",
- "
\n",
- " \n",
- " 31 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.141106 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.0 | \n",
- " 0.705295 | \n",
- " 0.0 | \n",
- "
\n",
- " \n",
- " 32 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.108914 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.0 | \n",
- " 0.279959 | \n",
- " 0.0 | \n",
- "
\n",
- " \n",
- " 33 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.492018 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.0 | \n",
- " 0.000000 | \n",
- " 0.0 | \n",
- "
\n",
- " \n",
- " 34 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.0 | \n",
- " 0.000000 | \n",
- " 0.0 | \n",
- "
\n",
- " \n",
- " 35 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.0 | \n",
- " 0.000000 | \n",
- " 0.0 | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " b_z1 b_z2 y_z1 y_z2 b_modloss_z1 b_modloss_z2 \\\n",
- "0 0.000000 0.000000 0.000000 0.000000 0.000000 0.0 \n",
- "1 0.000000 0.000000 0.450362 0.000000 0.000000 0.0 \n",
- "2 0.000000 0.000000 0.106543 0.000000 0.000000 0.0 \n",
- "3 0.000000 0.000000 0.069116 0.000000 0.000000 0.0 \n",
- "4 0.000000 0.000000 0.000000 0.000000 0.000000 0.0 \n",
- "5 0.000000 0.000000 0.000000 0.000000 0.000000 0.0 \n",
- "6 0.000000 0.000000 0.000000 0.515072 0.000000 0.0 \n",
- "7 0.000000 0.000000 0.745664 0.000000 0.000000 0.0 \n",
- "8 0.143703 0.000000 1.000000 0.000000 0.000000 0.0 \n",
- "9 0.094888 0.000000 0.377585 0.000000 0.000000 0.0 \n",
- "10 0.054980 0.000000 0.000000 0.000000 0.000000 0.0 \n",
- "11 0.000000 0.000000 0.000000 0.000000 0.000000 0.0 \n",
- "12 0.000000 0.000000 0.103734 0.000000 0.000000 0.0 \n",
- "13 0.000000 0.000000 0.000000 0.000000 0.000000 0.0 \n",
- "14 0.000000 0.000000 0.000000 0.000000 0.000000 0.0 \n",
- "15 0.000000 0.000000 0.092058 0.000000 0.000000 0.0 \n",
- "16 0.000000 0.000000 0.000000 0.000000 0.000000 0.0 \n",
- "17 0.000000 0.000000 0.000000 0.000000 0.000000 0.0 \n",
- "18 0.000000 0.000000 0.000000 0.000000 0.000000 0.0 \n",
- "19 0.000000 0.000000 0.198974 0.000000 0.000000 0.0 \n",
- "20 0.000000 0.000000 1.000000 0.000000 0.649715 0.0 \n",
- "21 0.000000 0.000000 0.882733 0.000000 0.351781 0.0 \n",
- "22 0.000000 0.000000 0.000000 0.000000 0.400474 0.0 \n",
- "23 0.244350 0.000000 0.000000 0.000000 0.631000 0.0 \n",
- "24 0.000000 0.000000 0.084908 0.000000 0.000000 0.0 \n",
- "25 0.000000 0.000000 0.000000 0.000000 0.000000 0.0 \n",
- "26 0.000000 0.000000 0.276969 0.000000 0.057554 0.0 \n",
- "27 0.000000 0.087439 1.000000 0.000000 0.061627 0.0 \n",
- "28 0.000000 0.000000 0.000000 0.000000 0.000000 0.0 \n",
- "29 0.000000 0.000000 0.000000 0.000000 0.000000 0.0 \n",
- "30 0.000000 0.000000 0.602346 0.000000 0.000000 0.0 \n",
- "31 0.000000 0.000000 0.141106 0.000000 0.000000 0.0 \n",
- "32 0.000000 0.000000 0.108914 0.000000 0.000000 0.0 \n",
- "33 0.000000 0.000000 0.492018 0.000000 0.000000 0.0 \n",
- "34 0.000000 0.000000 0.000000 0.000000 0.000000 0.0 \n",
- "35 0.000000 0.000000 0.000000 0.000000 0.000000 0.0 \n",
- "\n",
- " y_modloss_z1 y_modloss_z2 \n",
- "0 0.000000 0.0 \n",
- "1 1.000000 0.0 \n",
- "2 0.374123 0.0 \n",
- "3 0.173858 0.0 \n",
- "4 0.000000 0.0 \n",
- "5 0.000000 0.0 \n",
- "6 0.000000 0.0 \n",
- "7 0.000000 0.0 \n",
- "8 0.000000 0.0 \n",
- "9 0.000000 0.0 \n",
- "10 0.000000 0.0 \n",
- "11 0.000000 0.0 \n",
- "12 1.000000 0.0 \n",
- "13 0.000000 0.0 \n",
- "14 0.000000 0.0 \n",
- "15 0.000000 0.0 \n",
- "16 0.000000 0.0 \n",
- "17 0.000000 0.0 \n",
- "18 0.000000 0.0 \n",
- "19 0.774316 0.0 \n",
- "20 0.000000 0.0 \n",
- "21 0.000000 0.0 \n",
- "22 0.000000 0.0 \n",
- "23 0.000000 0.0 \n",
- "24 0.328738 0.0 \n",
- "25 0.284129 0.0 \n",
- "26 0.262853 0.0 \n",
- "27 0.000000 0.0 \n",
- "28 0.000000 0.0 \n",
- "29 0.000000 0.0 \n",
- "30 1.000000 0.0 \n",
- "31 0.705295 0.0 \n",
- "32 0.279959 0.0 \n",
- "33 0.000000 0.0 \n",
- "34 0.000000 0.0 \n",
- "35 0.000000 0.0 "
- ]
- },
- "execution_count": null,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"target = LibraryReaderBase()\n",
"target.import_file(StringIO(tsv_str))\n",
@@ -1198,100 +159,7 @@
"cell_type": "code",
"execution_count": null,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " mz | \n",
- " intensity | \n",
- " type | \n",
- " loss_type | \n",
- " charge | \n",
- " position | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " 609.300781 | \n",
- " 0.450362 | \n",
- " 121 | \n",
- " 0 | \n",
- " 1 | \n",
- " 1 | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " 511.323853 | \n",
- " 1.000000 | \n",
- " 121 | \n",
- " 98 | \n",
- " 1 | \n",
- " 1 | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " 510.232330 | \n",
- " 0.106543 | \n",
- " 121 | \n",
- " 0 | \n",
- " 1 | \n",
- " 2 | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " 412.255432 | \n",
- " 0.374123 | \n",
- " 121 | \n",
- " 98 | \n",
- " 1 | \n",
- " 2 | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " 411.163940 | \n",
- " 0.069116 | \n",
- " 121 | \n",
- " 0 | \n",
- " 1 | \n",
- " 3 | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " mz intensity type loss_type charge position\n",
- "0 609.300781 0.450362 121 0 1 1\n",
- "1 511.323853 1.000000 121 98 1 1\n",
- "2 510.232330 0.106543 121 0 1 2\n",
- "3 412.255432 0.374123 121 98 1 2\n",
- "4 411.163940 0.069116 121 0 1 3"
- ]
- },
- "execution_count": null,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"# Flatten original library to use it as a test input with target the original library\n",
"flat_lib = SpecLibFlat(custom_fragment_df_columns=['type','charge','position','loss_type'])\n",
@@ -1303,100 +171,7 @@
"cell_type": "code",
"execution_count": null,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " y_z2 | \n",
- " b_z2 | \n",
- " y_modloss_z1 | \n",
- " b_z1 | \n",
- " y_z1 | \n",
- " b_modloss_z1 | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " 0.0 | \n",
- " 0.0 | \n",
- " 0.000000 | \n",
- " 0.0 | \n",
- " 0.000000 | \n",
- " 0.0 | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " 0.0 | \n",
- " 0.0 | \n",
- " 1.000000 | \n",
- " 0.0 | \n",
- " 0.450362 | \n",
- " 0.0 | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " 0.0 | \n",
- " 0.0 | \n",
- " 0.374123 | \n",
- " 0.0 | \n",
- " 0.106543 | \n",
- " 0.0 | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " 0.0 | \n",
- " 0.0 | \n",
- " 0.173858 | \n",
- " 0.0 | \n",
- " 0.069116 | \n",
- " 0.0 | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " 0.0 | \n",
- " 0.0 | \n",
- " 0.000000 | \n",
- " 0.0 | \n",
- " 0.000000 | \n",
- " 0.0 | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " y_z2 b_z2 y_modloss_z1 b_z1 y_z1 b_modloss_z1\n",
- "0 0.0 0.0 0.000000 0.0 0.000000 0.0\n",
- "1 0.0 0.0 1.000000 0.0 0.450362 0.0\n",
- "2 0.0 0.0 0.374123 0.0 0.106543 0.0\n",
- "3 0.0 0.0 0.173858 0.0 0.069116 0.0\n",
- "4 0.0 0.0 0.000000 0.0 0.000000 0.0"
- ]
- },
- "execution_count": null,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"back_to_base = flat_lib.to_SpecLibBase()\n",
"back_to_base.fragment_intensity_df.head()"
@@ -1406,25 +181,14 @@
"cell_type": "code",
"execution_count": null,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "['_fragment_intensity_df', '_fragment_mz_df']"
- ]
- },
- "execution_count": null,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"back_to_base.available_dense_fragment_dfs()"
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
@@ -1439,7 +203,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
@@ -1450,7 +214,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
@@ -1461,17 +225,62 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 15,
"metadata": {},
"outputs": [],
- "source": []
+ "source": [
+ "# enable appending of flat libraries while making sure the flat_frag_start_idx is updated\n",
+ "\n",
+ "lib1 = SpecLibBase()\n",
+ "lib1.precursor_df = pd.DataFrame({\n",
+ " 'sequence': ['PEPTI', 'SEQUE'],\n",
+ " 'charge': [2, 3],\n",
+ " 'mod_sites': ['', ''],\n",
+ " 'mods': ['', ''],\n",
+ "})\n",
+ "lib1.calc_precursor_mz()\n",
+ "lib1.calc_fragment_mz_df()\n",
+ "lib1._fragment_intensity_df = lib1.fragment_mz_df.copy()\n",
+ "lib1._fragment_intensity_df.iloc[:] = 0\n",
+ "lib2 = lib1.copy()\n",
+ "lib2._fragment_intensity_df.iloc[:] = 1\n",
+ "\n",
+ "flatlib_1 = SpecLibFlat(min_fragment_intensity=0)\n",
+ "flatlib_1.parse_base_library(lib1, keep_original_frag_dfs=True)\n",
+ "flatlib_2 = SpecLibFlat(min_fragment_intensity=0)\n",
+ "flatlib_2.parse_base_library(lib2, keep_original_frag_dfs=True)\n",
+ "\n",
+ "flatlib_1.append(flatlib_2)\n",
+ "\n",
+ "assert flatlib_1.precursor_df.shape[0] == 4\n",
+ "assert flatlib_1.fragment_mz_df.shape[0] == 16\n",
+ "assert flatlib_1.fragment_df.shape[0] == np.prod(flatlib_1.fragment_mz_df.shape)\n",
+ "\n",
+ "assert np.all(flatlib_1.precursor_df['frag_start_idx'] == [0, 4, 8, 12])\n",
+ "assert np.all(flatlib_1.precursor_df['flat_frag_start_idx'] == [0, 16, 32, 48])\n",
+ "\n",
+ "assert np.all(flatlib_1.fragment_df['intensity'] == np.repeat([0, 1], 32))\n",
+ "assert np.all(flatlib_1.fragment_intensity_df.values.flatten() == np.repeat([0, 1], 32))"
+ ]
}
],
"metadata": {
"kernelspec": {
- "display_name": "python3",
+ "display_name": "metaptcm",
"language": "python",
"name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.12.5"
}
},
"nbformat": 4,