From 76e02ff6846e39e1eace9cc1ff355646a1a42f05 Mon Sep 17 00:00:00 2001 From: GeorgWa Date: Tue, 27 Sep 2022 15:27:26 +0200 Subject: [PATCH 01/52] added loess regression module --- nbdev_nbs/statistics/regression.ipynb | 333 ++++++++++++++++++++++++++ 1 file changed, 333 insertions(+) create mode 100644 nbdev_nbs/statistics/regression.ipynb diff --git a/nbdev_nbs/statistics/regression.ipynb b/nbdev_nbs/statistics/regression.ipynb new file mode 100644 index 00000000..d5f95b80 --- /dev/null +++ b/nbdev_nbs/statistics/regression.ipynb @@ -0,0 +1,333 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Scikit-learn style implementation of the LOESS LOcally Estimated Scatterplot Smoothing regression." + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "import numpy as np\n", + "\n", + "from sklearn.preprocessing import PolynomialFeatures\n", + "from sklearn.base import BaseEstimator, RegressorMixin\n", + "\n", + "EPSILON = 1e-6\n", + "\n", + "class LOESSRegression(BaseEstimator, RegressorMixin):\n", + "\n", + " def __init__(self, \n", + " n_kernels: int = 6, \n", + " kernel_size: float = 2., \n", + " polynomial_degree: int = 2):\n", + " \"\"\"scikit-learn estimator which implements a LOESS style local polynomial regression.\n", + " The number of basis functions or kernels can be explicitly defined which allows for faster and cheaper training and inference.\n", + " \n", + " Parameters\n", + " -----------\n", + "\n", + " n_kernels: int, default = 6\n", + " The number of local polynomial functions used to approximate the data. The location and extend of the kernels will be distributed to contain an equal number of datapoints in the training set.\n", + "\n", + " kernel_size: float, default = 2\n", + " A factor increasing the kernel size to overlap with the neighboring kernel.\n", + "\n", + " polynomial_degree: int, default = 2\n", + " Degree of the polynomial functions used for the local approximation.\n", + "\n", + " \"\"\"\n", + " self.n_kernels = n_kernels\n", + " self.kernel_size = kernel_size\n", + " self.polynomial_degree = polynomial_degree\n", + "\n", + " def get_params(self, deep: bool = True):\n", + " return super().get_params(deep)\n", + "\n", + " def set_params(self, **params):\n", + " return super().set_params(**params)\n", + "\n", + " def _more_tags(self):\n", + " return {'X_types': ['1darray']}\n", + "\n", + " def calculate_kernel_indices(self, x: np.ndarray):\n", + " \"\"\"Determine the indices of the datapoints belonging to each kernel.\n", + "\n", + " Parameters\n", + " -----------\n", + " x : numpy.ndarray, float, of shape (n_datapoints)\n", + "\n", + " Returns\n", + " --------\n", + " numpy.ndarray, int, of shape (n_kernels, 2)\n", + " \n", + " \"\"\"\n", + "\n", + " num_datapoints = len(x)\n", + " interval_size = num_datapoints // self.n_kernels\n", + "\n", + " start = np.arange(0,self.n_kernels) * interval_size\n", + " end = start + interval_size\n", + "\n", + " interval_extension = ((interval_size * self.kernel_size - interval_size) //2)\n", + "\n", + " start = start - interval_extension\n", + " start = np.maximum(0,start)\n", + "\n", + " end = end + interval_extension\n", + " end = np.minimum(num_datapoints,end)\n", + "\n", + " return np.column_stack([start,end]).astype(int)\n", + "\n", + " \n", + " def fit(self, x: np.ndarray, y: np.ndarray):\n", + " \"\"\"fit the model passed on provided training data.\n", + " \n", + " Parameters\n", + " -----------\n", + "\n", + " x: numpy.ndarray, float, of shape (n_samples,) or (n_samples, 1)\n", + " Training data. Note that only a single feature is supported at the moment.\n", + "\n", + " y: numpy.ndarray, float, of shape (n_samples,) or (n_samples, 1)\n", + " Target values.\n", + "\n", + " Returns\n", + " ---------\n", + "\n", + " self: object\n", + " Returns the fitted estimator.\n", + "\n", + " \"\"\"\n", + " \n", + " # As required by scikit-learn estimator guidelines\n", + " self.n_features_in_ = 1\n", + "\n", + " # Does not yet work with more than one input dimension\n", + " # axis-wise scaling and improved distance function need to be implemented\n", + " if len(x.shape) > 1:\n", + " if x.shape[1] > 1:\n", + " raise ValueError('Input arrays with more than one feature not yet supported. Please provide a matrix of shape (n_datapoints, 1) or (n_datapoints,)')\n", + "\n", + " # create flat version of the array for \n", + " idx_sorted = np.argsort(x.flat)\n", + " x_sorted = x.flat[idx_sorted]\n", + "\n", + " if len(x.shape) == 1:\n", + " x = x[...,np.newaxis]\n", + "\n", + " if len(y.shape) == 1:\n", + " y = y[...,np.newaxis]\n", + "\n", + " # kernel indices will only be calculated during fitting\n", + " kernel_indices = self.calculate_kernel_indices(x_sorted)\n", + "\n", + " # scale max and scale mean will then be used for calculating the weighht matrix\n", + " self.scale_mean = np.zeros((self.n_kernels))\n", + " self.scale_max = np.zeros((self.n_kernels))\n", + "\n", + " # scale mean and max are calculated and contain the scaling before applying the kernel\n", + " for i, area in enumerate(kernel_indices):\n", + " area_slice = slice(*area)\n", + " self.scale_mean[i] = x_sorted[area_slice].mean()\n", + " self.scale_max[i] = np.max(np.abs(x_sorted[area_slice] - self.scale_mean[i]))\n", + "\n", + " # from here on, the original column arrays are used\n", + " w = self.get_weight_matrix(x)\n", + "\n", + " # build design matrix\n", + " polynomial_transform = PolynomialFeatures(self.polynomial_degree)\n", + " x_design = polynomial_transform.fit_transform(x)\n", + " number_of_dimensions = len(x_design[0])\n", + "\n", + " self.beta = np.zeros((number_of_dimensions,self.n_kernels))\n", + "\n", + " for i, weights in enumerate(w.T):\n", + "\n", + " loadings = np.linalg.inv(x_design.T * weights @ x_design)@x_design.T\n", + " beta = (loadings*weights)@y\n", + " y_m = np.sum(x_design @ beta, axis=1)\n", + " self.beta[:,i] = np.ravel((loadings*weights)@y)\n", + " \n", + " return self\n", + "\n", + " def predict(self, x: np.ndarray):\n", + " \"\"\"Predict using the LOESS model.\n", + " \n", + " Parameters\n", + " -----------\n", + "\n", + " x: numpy.ndarray, float, of shape (n_samples,) or (n_samples, 1)\n", + " Feature data. Note that only a single feature is supported at the moment.\n", + " \n", + " Returns\n", + " ---------\n", + "\n", + " y: numpy.ndarray, float, of shape (n_samples,)\n", + " Target values.\n", + "\n", + " \"\"\"\n", + "\n", + " if len(x.shape) == 1:\n", + " x = x[...,np.newaxis]\n", + "\n", + " w = self.get_weight_matrix(x)\n", + " polynomial_transform = PolynomialFeatures(self.polynomial_degree)\n", + " x_design = polynomial_transform.fit_transform(x)\n", + "\n", + " return np.sum(x_design @ self.beta * w, axis=1)\n", + " \n", + "\n", + " def get_weight_matrix(self, x: np.ndarray):\n", + " \"\"\"Applies the fitted scaling parameter and the kernel to yield a weight matrix.\n", + "\n", + " The weight matrix is calculated based on the self.scale_mean and self.scale_max parameters which need to be calculated before calling this function.\n", + " They define the center and extend of the tricubic kernels. The first and last column are one-padded at the start and beginning to allow for extrapolation. \n", + "\n", + " Parameters\n", + " ----------\n", + "\n", + " x: numpy.ndarray\n", + " Numpy array of shape (n_datapoints, 1) which should be transformed to weights.\n", + "\n", + "\n", + " Returns\n", + " ----------\n", + " \n", + " numpy.ndarray\n", + " Weight matrix with the shape (n_datapoints, n_kernels).\n", + " \n", + " \"\"\"\n", + " w = np.tile(x,(1,self.n_kernels))\n", + "\n", + " w = np.abs(w - self.scale_mean)\n", + " w = w/self.scale_max\n", + " \n", + " # apply weighting kernel\n", + " w = self.tricubic(w)\n", + " \n", + " # perform epsilon padding at the start and end of the weight matrix to allow for extrapolation.\n", + "\n", + " # Does not work well yet\n", + "\n", + " # START\n", + " #idx_values = np.where(w[:,0] > 0)[0]\n", + " #min_idx, max_idx = idx_values[[0, -1]]\n", + " #w[:min_idx,0] = EPSILON\n", + "\n", + " # END\n", + " #idx_values = np.where(w[:,-1] > 0)[0]\n", + " #min_idx, max_idx = idx_values[[0, -1]]\n", + " #w[max_idx:,-1] = EPSILON\n", + "\n", + " # normalize column wise\n", + "\n", + " w = w/np.sum(w, axis=1, keepdims=True)\n", + "\n", + " return w\n", + " \n", + "\n", + " @staticmethod\n", + " def tricubic(x):\n", + " \"\"\"tricubic weight kernel\"\"\"\n", + " epsilon = EPSILON\n", + " mask = np.abs(x) <= 1\n", + " return mask * (np.power(1-np.power(np.abs(x),3),3) + epsilon)\n", + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Unit tests" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/georgwallmann/miniconda3/envs/alphadia/lib/python3.8/site-packages/sklearn/utils/estimator_checks.py:290: SkipTestWarning: Can't test estimator LOESSRegression which requires input of type ['1darray']\n", + " warnings.warn(\n" + ] + } + ], + "source": [ + "from sklearn.utils.estimator_checks import check_estimator\n", + "check_estimator(LOESSRegression())" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import matplotlib.pyplot as plt\n", + "def noisy_1d(x):\n", + " y = np.sin(x)\n", + " y_err = np.random.normal(y,0.5)\n", + " return y + y_err + 0.5 * x\n", + "\n", + "x_train = np.linspace(0,15,100)\n", + "y_train = noisy_1d(x_train)\n", + "\n", + "x_test = np.linspace(0,15,200)\n", + "y_test = LOESSRegression().fit(x_train, y_train).predict(x_test)\n", + "\n", + "plt.scatter(x_train,y_train)\n", + "plt.plot(x_test,y_test,c='r')\n", + "plt.show()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.8.13 ('alphadia')", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.13" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "73529403064d3d77076e792311ea0c557580cb84bf922625b9090c035bb21740" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} From 3d768d9fac0039b111b9f011e6083094d6f25597 Mon Sep 17 00:00:00 2001 From: GeorgWa Date: Tue, 27 Sep 2022 15:38:00 +0200 Subject: [PATCH 02/52] add nbdev export, nbdev_prepare --- alphabase/_modidx.py | 20 +++ alphabase/statistics/__init__.py | 0 alphabase/statistics/regression.py | 236 ++++++++++++++++++++++++++ nbdev_nbs/statistics/regression.ipynb | 48 +++--- 4 files changed, 281 insertions(+), 23 deletions(-) create mode 100644 alphabase/statistics/__init__.py create mode 100644 alphabase/statistics/regression.py diff --git a/alphabase/_modidx.py b/alphabase/_modidx.py index 8fdbfbfe..cc04e3eb 100644 --- a/alphabase/_modidx.py +++ b/alphabase/_modidx.py @@ -467,6 +467,26 @@ 'alphabase/spectral_library/library_base.py'), 'alphabase.spectral_library.library_base.SpecLibBase.update_precursor_mz': ( 'spectral_library/library_base.html#speclibbase.update_precursor_mz', 'alphabase/spectral_library/library_base.py')}, + 'alphabase.statistics.regression': { 'alphabase.statistics.regression.LOESSRegression': ( 'statistics/regression.html#loessregression', + 'alphabase/statistics/regression.py'), + 'alphabase.statistics.regression.LOESSRegression.__init__': ( 'statistics/regression.html#loessregression.__init__', + 'alphabase/statistics/regression.py'), + 'alphabase.statistics.regression.LOESSRegression._more_tags': ( 'statistics/regression.html#loessregression._more_tags', + 'alphabase/statistics/regression.py'), + 'alphabase.statistics.regression.LOESSRegression.calculate_kernel_indices': ( 'statistics/regression.html#loessregression.calculate_kernel_indices', + 'alphabase/statistics/regression.py'), + 'alphabase.statistics.regression.LOESSRegression.fit': ( 'statistics/regression.html#loessregression.fit', + 'alphabase/statistics/regression.py'), + 'alphabase.statistics.regression.LOESSRegression.get_params': ( 'statistics/regression.html#loessregression.get_params', + 'alphabase/statistics/regression.py'), + 'alphabase.statistics.regression.LOESSRegression.get_weight_matrix': ( 'statistics/regression.html#loessregression.get_weight_matrix', + 'alphabase/statistics/regression.py'), + 'alphabase.statistics.regression.LOESSRegression.predict': ( 'statistics/regression.html#loessregression.predict', + 'alphabase/statistics/regression.py'), + 'alphabase.statistics.regression.LOESSRegression.set_params': ( 'statistics/regression.html#loessregression.set_params', + 'alphabase/statistics/regression.py'), + 'alphabase.statistics.regression.LOESSRegression.tricubic': ( 'statistics/regression.html#loessregression.tricubic', + 'alphabase/statistics/regression.py')}, 'alphabase.utils': { 'alphabase.utils._flatten': ('utils.html#_flatten', 'alphabase/utils.py'), 'alphabase.utils.explode_multiple_columns': ('utils.html#explode_multiple_columns', 'alphabase/utils.py'), 'alphabase.utils.process_bar': ('utils.html#process_bar', 'alphabase/utils.py')}, diff --git a/alphabase/statistics/__init__.py b/alphabase/statistics/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/alphabase/statistics/regression.py b/alphabase/statistics/regression.py new file mode 100644 index 00000000..6439b461 --- /dev/null +++ b/alphabase/statistics/regression.py @@ -0,0 +1,236 @@ +# AUTOGENERATED! DO NOT EDIT! File to edit: ../../nbdev_nbs/statistics/regression.ipynb. + +# %% auto 0 +__all__ = ['EPSILON', 'LOESSRegression'] + +# %% ../../nbdev_nbs/statistics/regression.ipynb 2 +import numpy as np + +from sklearn.preprocessing import PolynomialFeatures +from sklearn.base import BaseEstimator, RegressorMixin + +from sklearn.utils.estimator_checks import check_estimator +import matplotlib.pyplot as plt + +# %% ../../nbdev_nbs/statistics/regression.ipynb 3 +EPSILON = 1e-6 + +class LOESSRegression(BaseEstimator, RegressorMixin): + + def __init__(self, + n_kernels: int = 6, + kernel_size: float = 2., + polynomial_degree: int = 2): + """scikit-learn estimator which implements a LOESS style local polynomial regression. + The number of basis functions or kernels can be explicitly defined which allows for faster and cheaper training and inference. + + Parameters + ----------- + + n_kernels: int, default = 6 + The number of local polynomial functions used to approximate the data. The location and extend of the kernels will be distributed to contain an equal number of datapoints in the training set. + + kernel_size: float, default = 2 + A factor increasing the kernel size to overlap with the neighboring kernel. + + polynomial_degree: int, default = 2 + Degree of the polynomial functions used for the local approximation. + + """ + self.n_kernels = n_kernels + self.kernel_size = kernel_size + self.polynomial_degree = polynomial_degree + + def get_params(self, deep: bool = True): + return super().get_params(deep) + + def set_params(self, **params): + return super().set_params(**params) + + def _more_tags(self): + return {'X_types': ['1darray']} + + def calculate_kernel_indices(self, x: np.ndarray): + """Determine the indices of the datapoints belonging to each kernel. + + Parameters + ----------- + x : numpy.ndarray, float, of shape (n_datapoints) + + Returns + -------- + numpy.ndarray, int, of shape (n_kernels, 2) + + """ + + num_datapoints = len(x) + interval_size = num_datapoints // self.n_kernels + + start = np.arange(0,self.n_kernels) * interval_size + end = start + interval_size + + interval_extension = ((interval_size * self.kernel_size - interval_size) //2) + + start = start - interval_extension + start = np.maximum(0,start) + + end = end + interval_extension + end = np.minimum(num_datapoints,end) + + return np.column_stack([start,end]).astype(int) + + + def fit(self, x: np.ndarray, y: np.ndarray): + """fit the model passed on provided training data. + + Parameters + ----------- + + x: numpy.ndarray, float, of shape (n_samples,) or (n_samples, 1) + Training data. Note that only a single feature is supported at the moment. + + y: numpy.ndarray, float, of shape (n_samples,) or (n_samples, 1) + Target values. + + Returns + --------- + + self: object + Returns the fitted estimator. + + """ + + # As required by scikit-learn estimator guidelines + self.n_features_in_ = 1 + + # Does not yet work with more than one input dimension + # axis-wise scaling and improved distance function need to be implemented + if len(x.shape) > 1: + if x.shape[1] > 1: + raise ValueError('Input arrays with more than one feature not yet supported. Please provide a matrix of shape (n_datapoints, 1) or (n_datapoints,)') + + # create flat version of the array for + idx_sorted = np.argsort(x.flat) + x_sorted = x.flat[idx_sorted] + + if len(x.shape) == 1: + x = x[...,np.newaxis] + + if len(y.shape) == 1: + y = y[...,np.newaxis] + + # kernel indices will only be calculated during fitting + kernel_indices = self.calculate_kernel_indices(x_sorted) + + # scale max and scale mean will then be used for calculating the weighht matrix + self.scale_mean = np.zeros((self.n_kernels)) + self.scale_max = np.zeros((self.n_kernels)) + + # scale mean and max are calculated and contain the scaling before applying the kernel + for i, area in enumerate(kernel_indices): + area_slice = slice(*area) + self.scale_mean[i] = x_sorted[area_slice].mean() + self.scale_max[i] = np.max(np.abs(x_sorted[area_slice] - self.scale_mean[i])) + + # from here on, the original column arrays are used + w = self.get_weight_matrix(x) + + # build design matrix + polynomial_transform = PolynomialFeatures(self.polynomial_degree) + x_design = polynomial_transform.fit_transform(x) + number_of_dimensions = len(x_design[0]) + + self.beta = np.zeros((number_of_dimensions,self.n_kernels)) + + for i, weights in enumerate(w.T): + + loadings = np.linalg.inv(x_design.T * weights @ x_design)@x_design.T + beta = (loadings*weights)@y + y_m = np.sum(x_design @ beta, axis=1) + self.beta[:,i] = np.ravel((loadings*weights)@y) + + return self + + def predict(self, x: np.ndarray): + """Predict using the LOESS model. + + Parameters + ----------- + + x: numpy.ndarray, float, of shape (n_samples,) or (n_samples, 1) + Feature data. Note that only a single feature is supported at the moment. + + Returns + --------- + + y: numpy.ndarray, float, of shape (n_samples,) + Target values. + + """ + + if len(x.shape) == 1: + x = x[...,np.newaxis] + + w = self.get_weight_matrix(x) + polynomial_transform = PolynomialFeatures(self.polynomial_degree) + x_design = polynomial_transform.fit_transform(x) + + return np.sum(x_design @ self.beta * w, axis=1) + + + def get_weight_matrix(self, x: np.ndarray): + """Applies the fitted scaling parameter and the kernel to yield a weight matrix. + + The weight matrix is calculated based on the self.scale_mean and self.scale_max parameters which need to be calculated before calling this function. + They define the center and extend of the tricubic kernels. The first and last column are one-padded at the start and beginning to allow for extrapolation. + + Parameters + ---------- + + x: numpy.ndarray + Numpy array of shape (n_datapoints, 1) which should be transformed to weights. + + + Returns + ---------- + + numpy.ndarray + Weight matrix with the shape (n_datapoints, n_kernels). + + """ + w = np.tile(x,(1,self.n_kernels)) + + w = np.abs(w - self.scale_mean) + w = w/self.scale_max + + # apply weighting kernel + w = self.tricubic(w) + + # perform epsilon padding at the start and end of the weight matrix to allow for extrapolation. + + # Does not work well yet + + # START + #idx_values = np.where(w[:,0] > 0)[0] + #min_idx, max_idx = idx_values[[0, -1]] + #w[:min_idx,0] = EPSILON + + # END + #idx_values = np.where(w[:,-1] > 0)[0] + #min_idx, max_idx = idx_values[[0, -1]] + #w[max_idx:,-1] = EPSILON + + # normalize column wise + + w = w/np.sum(w, axis=1, keepdims=True) + + return w + + + @staticmethod + def tricubic(x): + """tricubic weight kernel""" + epsilon = EPSILON + mask = np.abs(x) <= 1 + return mask * (np.power(1-np.power(np.abs(x),3),3) + epsilon) + diff --git a/nbdev_nbs/statistics/regression.ipynb b/nbdev_nbs/statistics/regression.ipynb index d5f95b80..27802451 100644 --- a/nbdev_nbs/statistics/regression.ipynb +++ b/nbdev_nbs/statistics/regression.ipynb @@ -9,7 +9,16 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| default_exp statistics.regression" + ] + }, + { + "cell_type": "code", + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -19,6 +28,18 @@ "from sklearn.preprocessing import PolynomialFeatures\n", "from sklearn.base import BaseEstimator, RegressorMixin\n", "\n", + "from sklearn.utils.estimator_checks import check_estimator\n", + "import matplotlib.pyplot as plt" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "\n", "EPSILON = 1e-6\n", "\n", "class LOESSRegression(BaseEstimator, RegressorMixin):\n", @@ -251,7 +272,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -264,13 +285,12 @@ } ], "source": [ - "from sklearn.utils.estimator_checks import check_estimator\n", "check_estimator(LOESSRegression())" ] }, { "cell_type": "code", - "execution_count": 25, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -285,7 +305,7 @@ } ], "source": [ - "import matplotlib.pyplot as plt\n", + "\n", "def noisy_1d(x):\n", " y = np.sin(x)\n", " y_err = np.random.normal(y,0.5)\n", @@ -308,24 +328,6 @@ "display_name": "Python 3.8.13 ('alphadia')", "language": "python", "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.13" - }, - "orig_nbformat": 4, - "vscode": { - "interpreter": { - "hash": "73529403064d3d77076e792311ea0c557580cb84bf922625b9090c035bb21740" - } } }, "nbformat": 4, From a5ee67a128cb93c2cb1eeb922dfbc42101278c48 Mon Sep 17 00:00:00 2001 From: GeorgWa Date: Tue, 27 Sep 2022 15:52:20 +0200 Subject: [PATCH 03/52] fixed extrapolation bug --- alphabase/statistics/regression.py | 19 +++++++++---------- nbdev_nbs/statistics/regression.ipynb | 19 +++++++++---------- 2 files changed, 18 insertions(+), 20 deletions(-) diff --git a/alphabase/statistics/regression.py b/alphabase/statistics/regression.py index 6439b461..42e7e375 100644 --- a/alphabase/statistics/regression.py +++ b/alphabase/statistics/regression.py @@ -206,19 +206,18 @@ def get_weight_matrix(self, x: np.ndarray): # apply weighting kernel w = self.tricubic(w) - # perform epsilon padding at the start and end of the weight matrix to allow for extrapolation. - - # Does not work well yet - + #perform epsilon padding at the start and end of the weight matrix to allow for extrapolation. # START - #idx_values = np.where(w[:,0] > 0)[0] - #min_idx, max_idx = idx_values[[0, -1]] - #w[:min_idx,0] = EPSILON + idx_values = np.where(w[:,0] > 0)[0] + if len(idx_values) > 0: + min_idx, max_idx = idx_values[[0, -1]] + w[:min_idx,0] = EPSILON # END - #idx_values = np.where(w[:,-1] > 0)[0] - #min_idx, max_idx = idx_values[[0, -1]] - #w[max_idx:,-1] = EPSILON + idx_values = np.where(w[:,-1] > 0)[0] + if len(idx_values) > 0: + min_idx, max_idx = idx_values[[0, -1]] + w[max_idx:,-1] = EPSILON # normalize column wise diff --git a/nbdev_nbs/statistics/regression.ipynb b/nbdev_nbs/statistics/regression.ipynb index 27802451..f54dbd57 100644 --- a/nbdev_nbs/statistics/regression.ipynb +++ b/nbdev_nbs/statistics/regression.ipynb @@ -233,19 +233,18 @@ " # apply weighting kernel\n", " w = self.tricubic(w)\n", " \n", - " # perform epsilon padding at the start and end of the weight matrix to allow for extrapolation.\n", - "\n", - " # Does not work well yet\n", - "\n", + " #perform epsilon padding at the start and end of the weight matrix to allow for extrapolation.\n", " # START\n", - " #idx_values = np.where(w[:,0] > 0)[0]\n", - " #min_idx, max_idx = idx_values[[0, -1]]\n", - " #w[:min_idx,0] = EPSILON\n", + " idx_values = np.where(w[:,0] > 0)[0]\n", + " if len(idx_values) > 0:\n", + " min_idx, max_idx = idx_values[[0, -1]]\n", + " w[:min_idx,0] = EPSILON\n", "\n", " # END\n", - " #idx_values = np.where(w[:,-1] > 0)[0]\n", - " #min_idx, max_idx = idx_values[[0, -1]]\n", - " #w[max_idx:,-1] = EPSILON\n", + " idx_values = np.where(w[:,-1] > 0)[0]\n", + " if len(idx_values) > 0:\n", + " min_idx, max_idx = idx_values[[0, -1]]\n", + " w[max_idx:,-1] = EPSILON\n", "\n", " # normalize column wise\n", "\n", From 77449a866c80fb296791b3cf13360716e1ed9d54 Mon Sep 17 00:00:00 2001 From: GeorgWa Date: Tue, 27 Sep 2022 17:50:25 +0200 Subject: [PATCH 04/52] updated docstrings --- alphabase/statistics/regression.py | 69 ++++----- nbdev_nbs/statistics/regression.ipynb | 200 +++++++++++++++++++++----- 2 files changed, 201 insertions(+), 68 deletions(-) diff --git a/alphabase/statistics/regression.py b/alphabase/statistics/regression.py index 42e7e375..9bb08590 100644 --- a/alphabase/statistics/regression.py +++ b/alphabase/statistics/regression.py @@ -10,33 +10,33 @@ from sklearn.base import BaseEstimator, RegressorMixin from sklearn.utils.estimator_checks import check_estimator -import matplotlib.pyplot as plt -# %% ../../nbdev_nbs/statistics/regression.ipynb 3 + +# %% ../../nbdev_nbs/statistics/regression.ipynb 4 EPSILON = 1e-6 class LOESSRegression(BaseEstimator, RegressorMixin): + """scikit-learn estimator which implements a LOESS style local polynomial regression. The number of basis functions or kernels can be explicitly defined which allows for faster and cheaper training and inference. + + Parameters + ---------- + + n_kernels : int + default = 6, The number of local polynomial functions used to approximate the data. The location and extend of the kernels will be distributed to contain an equal number of datapoints in the training set. + + kernel_size : float + default = 2, A factor increasing the kernel size to overlap with the neighboring kernel. + + polynomial_degree : int + default = 2, Degree of the polynomial functions used for the local approximation. + + """ def __init__(self, n_kernels: int = 6, kernel_size: float = 2., polynomial_degree: int = 2): - """scikit-learn estimator which implements a LOESS style local polynomial regression. - The number of basis functions or kernels can be explicitly defined which allows for faster and cheaper training and inference. - - Parameters - ----------- - - n_kernels: int, default = 6 - The number of local polynomial functions used to approximate the data. The location and extend of the kernels will be distributed to contain an equal number of datapoints in the training set. - kernel_size: float, default = 2 - A factor increasing the kernel size to overlap with the neighboring kernel. - - polynomial_degree: int, default = 2 - Degree of the polynomial functions used for the local approximation. - - """ self.n_kernels = n_kernels self.kernel_size = kernel_size self.polynomial_degree = polynomial_degree @@ -54,12 +54,14 @@ def calculate_kernel_indices(self, x: np.ndarray): """Determine the indices of the datapoints belonging to each kernel. Parameters - ----------- - x : numpy.ndarray, float, of shape (n_datapoints) + ---------- + x : numpy.ndarray + float, of shape (n_datapoints) Returns - -------- - numpy.ndarray, int, of shape (n_kernels, 2) + ------- + numpy.ndarray, int + of shape (n_kernels, 2) """ @@ -84,16 +86,16 @@ def fit(self, x: np.ndarray, y: np.ndarray): """fit the model passed on provided training data. Parameters - ----------- + ---------- - x: numpy.ndarray, float, of shape (n_samples,) or (n_samples, 1) - Training data. Note that only a single feature is supported at the moment. + x : numpy.ndarray + float, of shape (n_samples,) or (n_samples, 1), Training data. Note that only a single feature is supported at the moment. - y: numpy.ndarray, float, of shape (n_samples,) or (n_samples, 1) - Target values. + y : numpy.ndarray, float + of shape (n_samples,) or (n_samples, 1) Target values. Returns - --------- + ------- self: object Returns the fitted estimator. @@ -155,15 +157,16 @@ def predict(self, x: np.ndarray): """Predict using the LOESS model. Parameters - ----------- + ---------- - x: numpy.ndarray, float, of shape (n_samples,) or (n_samples, 1) - Feature data. Note that only a single feature is supported at the moment. + x : numpy.ndarray + float, of shape (n_samples,) or (n_samples, 1) Feature data. Note that only a single feature is supported at the moment. Returns - --------- + ------- - y: numpy.ndarray, float, of shape (n_samples,) + y : numpy.ndarray, float + of shape (n_samples,) Target values. """ @@ -192,7 +195,7 @@ def get_weight_matrix(self, x: np.ndarray): Returns - ---------- + ------- numpy.ndarray Weight matrix with the shape (n_datapoints, n_kernels). diff --git a/nbdev_nbs/statistics/regression.ipynb b/nbdev_nbs/statistics/regression.ipynb index f54dbd57..035d6fff 100644 --- a/nbdev_nbs/statistics/regression.ipynb +++ b/nbdev_nbs/statistics/regression.ipynb @@ -4,7 +4,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Scikit-learn style implementation of the LOESS LOcally Estimated Scatterplot Smoothing regression." + "### Regression" ] }, { @@ -28,7 +28,16 @@ "from sklearn.preprocessing import PolynomialFeatures\n", "from sklearn.base import BaseEstimator, RegressorMixin\n", "\n", - "from sklearn.utils.estimator_checks import check_estimator\n", + "from sklearn.utils.estimator_checks import check_estimator\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| hide\n", "import matplotlib.pyplot as plt" ] }, @@ -43,27 +52,27 @@ "EPSILON = 1e-6\n", "\n", "class LOESSRegression(BaseEstimator, RegressorMixin):\n", + " \"\"\"scikit-learn estimator which implements a LOESS style local polynomial regression. The number of basis functions or kernels can be explicitly defined which allows for faster and cheaper training and inference.\n", + " \n", + " Parameters\n", + " ----------\n", + "\n", + " n_kernels : int \n", + " default = 6, The number of local polynomial functions used to approximate the data. The location and extend of the kernels will be distributed to contain an equal number of datapoints in the training set.\n", + "\n", + " kernel_size : float\n", + " default = 2, A factor increasing the kernel size to overlap with the neighboring kernel.\n", + "\n", + " polynomial_degree : int\n", + " default = 2, Degree of the polynomial functions used for the local approximation.\n", + "\n", + " \"\"\"\n", "\n", " def __init__(self, \n", " n_kernels: int = 6, \n", " kernel_size: float = 2., \n", " polynomial_degree: int = 2):\n", - " \"\"\"scikit-learn estimator which implements a LOESS style local polynomial regression.\n", - " The number of basis functions or kernels can be explicitly defined which allows for faster and cheaper training and inference.\n", - " \n", - " Parameters\n", - " -----------\n", - "\n", - " n_kernels: int, default = 6\n", - " The number of local polynomial functions used to approximate the data. The location and extend of the kernels will be distributed to contain an equal number of datapoints in the training set.\n", - "\n", - " kernel_size: float, default = 2\n", - " A factor increasing the kernel size to overlap with the neighboring kernel.\n", - "\n", - " polynomial_degree: int, default = 2\n", - " Degree of the polynomial functions used for the local approximation.\n", "\n", - " \"\"\"\n", " self.n_kernels = n_kernels\n", " self.kernel_size = kernel_size\n", " self.polynomial_degree = polynomial_degree\n", @@ -81,12 +90,14 @@ " \"\"\"Determine the indices of the datapoints belonging to each kernel.\n", "\n", " Parameters\n", - " -----------\n", - " x : numpy.ndarray, float, of shape (n_datapoints)\n", + " ----------\n", + " x : numpy.ndarray\n", + " float, of shape (n_datapoints)\n", "\n", " Returns\n", - " --------\n", - " numpy.ndarray, int, of shape (n_kernels, 2)\n", + " -------\n", + " numpy.ndarray, int\n", + " of shape (n_kernels, 2)\n", " \n", " \"\"\"\n", "\n", @@ -111,16 +122,16 @@ " \"\"\"fit the model passed on provided training data.\n", " \n", " Parameters\n", - " -----------\n", + " ----------\n", "\n", - " x: numpy.ndarray, float, of shape (n_samples,) or (n_samples, 1)\n", - " Training data. Note that only a single feature is supported at the moment.\n", + " x : numpy.ndarray\n", + " float, of shape (n_samples,) or (n_samples, 1), Training data. Note that only a single feature is supported at the moment.\n", "\n", - " y: numpy.ndarray, float, of shape (n_samples,) or (n_samples, 1)\n", - " Target values.\n", + " y : numpy.ndarray, float\n", + " of shape (n_samples,) or (n_samples, 1) Target values.\n", "\n", " Returns\n", - " ---------\n", + " -------\n", "\n", " self: object\n", " Returns the fitted estimator.\n", @@ -182,15 +193,16 @@ " \"\"\"Predict using the LOESS model.\n", " \n", " Parameters\n", - " -----------\n", + " ----------\n", "\n", - " x: numpy.ndarray, float, of shape (n_samples,) or (n_samples, 1)\n", - " Feature data. Note that only a single feature is supported at the moment.\n", + " x : numpy.ndarray\n", + " float, of shape (n_samples,) or (n_samples, 1) Feature data. Note that only a single feature is supported at the moment.\n", " \n", " Returns\n", - " ---------\n", + " -------\n", "\n", - " y: numpy.ndarray, float, of shape (n_samples,)\n", + " y : numpy.ndarray, float\n", + " of shape (n_samples,)\n", " Target values.\n", "\n", " \"\"\"\n", @@ -219,7 +231,7 @@ "\n", "\n", " Returns\n", - " ----------\n", + " -------\n", " \n", " numpy.ndarray\n", " Weight matrix with the shape (n_datapoints, n_kernels).\n", @@ -262,11 +274,121 @@ " " ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#|hide\n", + "from nbdev.showdoc import show_doc" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/markdown": [ + "---\n", + "\n", + "[source](https://github.com/MannLabs/alphabase/blob/main/alphabase/statistics/regression.py#L85){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### LOESSRegression.fit\n", + "\n", + "> LOESSRegression.fit (x:numpy.ndarray, y:numpy.ndarray)\n", + "\n", + "fit the model passed on provided training data.\n", + "\n", + "| | **Type** | **Details** |\n", + "| -- | -------- | ----------- |\n", + "| x | ndarray | float, of shape (n_samples,) or (n_samples, 1), Training data. Note that only a single feature is supported at the moment. |\n", + "| y | ndarray | of shape (n_samples,) or (n_samples, 1) Target values. |\n", + "| **Returns** | **self: object** | **Returns the fitted estimator.** |" + ], + "text/plain": [ + "---\n", + "\n", + "[source](https://github.com/MannLabs/alphabase/blob/main/alphabase/statistics/regression.py#L85){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### LOESSRegression.fit\n", + "\n", + "> LOESSRegression.fit (x:numpy.ndarray, y:numpy.ndarray)\n", + "\n", + "fit the model passed on provided training data.\n", + "\n", + "| | **Type** | **Details** |\n", + "| -- | -------- | ----------- |\n", + "| x | ndarray | float, of shape (n_samples,) or (n_samples, 1), Training data. Note that only a single feature is supported at the moment. |\n", + "| y | ndarray | of shape (n_samples,) or (n_samples, 1) Target values. |\n", + "| **Returns** | **self: object** | **Returns the fitted estimator.** |" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "show_doc(LOESSRegression.fit)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/markdown": [ + "---\n", + "\n", + "[source](https://github.com/MannLabs/alphabase/blob/main/alphabase/statistics/regression.py#L156){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### LOESSRegression.predict\n", + "\n", + "> LOESSRegression.predict (x:numpy.ndarray)\n", + "\n", + "Predict using the LOESS model.\n", + "\n", + "| | **Type** | **Details** |\n", + "| -- | -------- | ----------- |\n", + "| x | ndarray | float, of shape (n_samples,) or (n_samples, 1) Feature data. Note that only a single feature is supported at the moment. |\n", + "| **Returns** | **numpy.ndarray, float** | |" + ], + "text/plain": [ + "---\n", + "\n", + "[source](https://github.com/MannLabs/alphabase/blob/main/alphabase/statistics/regression.py#L156){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### LOESSRegression.predict\n", + "\n", + "> LOESSRegression.predict (x:numpy.ndarray)\n", + "\n", + "Predict using the LOESS model.\n", + "\n", + "| | **Type** | **Details** |\n", + "| -- | -------- | ----------- |\n", + "| x | ndarray | float, of shape (n_samples,) or (n_samples, 1) Feature data. Note that only a single feature is supported at the moment. |\n", + "| **Returns** | **numpy.ndarray, float** | |" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "show_doc(LOESSRegression.predict)" + ] + }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Unit tests" + "### Application example" ] }, { @@ -284,6 +406,7 @@ } ], "source": [ + "#| hide\n", "check_estimator(LOESSRegression())" ] }, @@ -294,7 +417,7 @@ "outputs": [ { "data": { - "image/png": "", + "image/png": "", "text/plain": [ "
" ] @@ -320,6 +443,13 @@ "plt.plot(x_test,y_test,c='r')\n", "plt.show()" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { From 157aaa3b7e975bcfe5d5650f1da50c24c4823fb9 Mon Sep 17 00:00:00 2001 From: GeorgWa Date: Wed, 28 Sep 2022 23:33:13 +0200 Subject: [PATCH 05/52] close #42 improved edge case handling and testing --- alphabase/statistics/regression.py | 38 +++++++++-- nbdev_nbs/statistics/regression.ipynb | 91 ++++++++++++++++++++++++--- 2 files changed, 115 insertions(+), 14 deletions(-) diff --git a/alphabase/statistics/regression.py b/alphabase/statistics/regression.py index 9bb08590..ad457386 100644 --- a/alphabase/statistics/regression.py +++ b/alphabase/statistics/regression.py @@ -105,25 +105,50 @@ def fit(self, x: np.ndarray, y: np.ndarray): # As required by scikit-learn estimator guidelines self.n_features_in_ = 1 + # === start === sanity checks === # Does not yet work with more than one input dimension # axis-wise scaling and improved distance function need to be implemented if len(x.shape) > 1: if x.shape[1] > 1: raise ValueError('Input arrays with more than one feature not yet supported. Please provide a matrix of shape (n_datapoints, 1) or (n_datapoints,)') - # create flat version of the array for - idx_sorted = np.argsort(x.flat) - x_sorted = x.flat[idx_sorted] + # at least two datapoints required + if len(x.flat) < 2: + raise ValueError('At least two datapoints required for fitting.') + + # sanity check for number of datapoints, reduce n_kernels if needed + degrees_freedom = (1 + self.polynomial_degree) * self.n_kernels + + if len(x.flat) < degrees_freedom: + print(f"Curve fitting with {self.n_kernels} kernels and polynomials of {self.polynomial_degree} degree requires at least {degrees_freedom} datapoints.") + + self.n_kernels = np.max([len(x.flat) // (1 + self.polynomial_degree),1]) + + print(f"Number of kernels will be reduced to {self.n_kernels} kernels.") + # sanity check for number of datapoints, reduce degree of polynomial if necessary + degrees_freedom = (1 + self.polynomial_degree) * self.n_kernels + if len(x.flat) < degrees_freedom: + self.polynomial_degree = len(x.flat) - 1 + + print(f"Polynomial degree will be reduced to {self.polynomial_degree}.") + + # reshape both arrays to column arrays if len(x.shape) == 1: x = x[...,np.newaxis] if len(y.shape) == 1: y = y[...,np.newaxis] + + # === end === sanity checks === + + # create flat version of the array for + idx_sorted = np.argsort(x.flat) + x_sorted = x.flat[idx_sorted] # kernel indices will only be calculated during fitting kernel_indices = self.calculate_kernel_indices(x_sorted) - + # scale max and scale mean will then be used for calculating the weighht matrix self.scale_mean = np.zeros((self.n_kernels)) self.scale_max = np.zeros((self.n_kernels)) @@ -137,11 +162,13 @@ def fit(self, x: np.ndarray, y: np.ndarray): # from here on, the original column arrays are used w = self.get_weight_matrix(x) + # build design matrix polynomial_transform = PolynomialFeatures(self.polynomial_degree) x_design = polynomial_transform.fit_transform(x) number_of_dimensions = len(x_design[0]) + self.beta = np.zeros((number_of_dimensions,self.n_kernels)) for i, weights in enumerate(w.T): @@ -150,7 +177,8 @@ def fit(self, x: np.ndarray, y: np.ndarray): beta = (loadings*weights)@y y_m = np.sum(x_design @ beta, axis=1) self.beta[:,i] = np.ravel((loadings*weights)@y) - + + return self def predict(self, x: np.ndarray): diff --git a/nbdev_nbs/statistics/regression.ipynb b/nbdev_nbs/statistics/regression.ipynb index 035d6fff..fbcdcc99 100644 --- a/nbdev_nbs/statistics/regression.ipynb +++ b/nbdev_nbs/statistics/regression.ipynb @@ -141,25 +141,50 @@ " # As required by scikit-learn estimator guidelines\n", " self.n_features_in_ = 1\n", "\n", + " # === start === sanity checks ===\n", " # Does not yet work with more than one input dimension\n", " # axis-wise scaling and improved distance function need to be implemented\n", " if len(x.shape) > 1:\n", " if x.shape[1] > 1:\n", " raise ValueError('Input arrays with more than one feature not yet supported. Please provide a matrix of shape (n_datapoints, 1) or (n_datapoints,)')\n", "\n", - " # create flat version of the array for \n", - " idx_sorted = np.argsort(x.flat)\n", - " x_sorted = x.flat[idx_sorted]\n", + " # at least two datapoints required\n", + " if len(x.flat) < 2:\n", + " raise ValueError('At least two datapoints required for fitting.')\n", + "\n", + " # sanity check for number of datapoints, reduce n_kernels if needed\n", + " degrees_freedom = (1 + self.polynomial_degree) * self.n_kernels\n", + "\n", + " if len(x.flat) < degrees_freedom:\n", + " print(f\"Curve fitting with {self.n_kernels} kernels and polynomials of {self.polynomial_degree} degree requires at least {degrees_freedom} datapoints.\")\n", + " \n", + " self.n_kernels = np.max([len(x.flat) // (1 + self.polynomial_degree),1])\n", + " \n", + " print(f\"Number of kernels will be reduced to {self.n_kernels} kernels.\")\n", + "\n", + " # sanity check for number of datapoints, reduce degree of polynomial if necessary\n", + " degrees_freedom = (1 + self.polynomial_degree) * self.n_kernels\n", + " if len(x.flat) < degrees_freedom:\n", + " self.polynomial_degree = len(x.flat) - 1\n", + "\n", + " print(f\"Polynomial degree will be reduced to {self.polynomial_degree}.\")\n", "\n", + " # reshape both arrays to column arrays\n", " if len(x.shape) == 1:\n", " x = x[...,np.newaxis]\n", "\n", " if len(y.shape) == 1:\n", " y = y[...,np.newaxis]\n", + " \n", + " # === end === sanity checks ===\n", + "\n", + " # create flat version of the array for \n", + " idx_sorted = np.argsort(x.flat)\n", + " x_sorted = x.flat[idx_sorted]\n", "\n", " # kernel indices will only be calculated during fitting\n", " kernel_indices = self.calculate_kernel_indices(x_sorted)\n", - "\n", + " \n", " # scale max and scale mean will then be used for calculating the weighht matrix\n", " self.scale_mean = np.zeros((self.n_kernels))\n", " self.scale_max = np.zeros((self.n_kernels))\n", @@ -173,11 +198,13 @@ " # from here on, the original column arrays are used\n", " w = self.get_weight_matrix(x)\n", "\n", + "\n", " # build design matrix\n", " polynomial_transform = PolynomialFeatures(self.polynomial_degree)\n", " x_design = polynomial_transform.fit_transform(x)\n", " number_of_dimensions = len(x_design[0])\n", "\n", + " \n", " self.beta = np.zeros((number_of_dimensions,self.n_kernels))\n", "\n", " for i, weights in enumerate(w.T):\n", @@ -186,7 +213,8 @@ " beta = (loadings*weights)@y\n", " y_m = np.sum(x_design @ beta, axis=1)\n", " self.beta[:,i] = np.ravel((loadings*weights)@y)\n", - " \n", + " \n", + "\n", " return self\n", "\n", " def predict(self, x: np.ndarray):\n", @@ -417,7 +445,7 @@ "outputs": [ { "data": { - "image/png": "", + "image/png": "", "text/plain": [ "
" ] @@ -433,7 +461,7 @@ " y_err = np.random.normal(y,0.5)\n", " return y + y_err + 0.5 * x\n", "\n", - "x_train = np.linspace(0,15,100)\n", + "x_train = np.linspace(0,15,200)\n", "y_train = noisy_1d(x_train)\n", "\n", "x_test = np.linspace(0,15,200)\n", @@ -444,12 +472,57 @@ "plt.show()" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#| hide\n", + "### Unit Tests for Edge Cases" + ] + }, { "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Curve fitting with 4 kernels and polynomials of 2 degree requires at least 12 datapoints.\n", + "Number of kernels will be reduced to 2 kernels.\n" + ] + } + ], + "source": [ + "#| hide\n", + "\n", + "def noisy_1d(x):\n", + " y = np.sin(x)\n", + " y_err = np.random.normal(y,0.5)\n", + " return y + y_err + 0.5 * x\n", + "\n", + "# To few datapoints for choosen number of kernels\n", + "\n", + "x_train = np.linspace(0,15,6)\n", + "y_train = noisy_1d(x_train)\n", + "x_test = np.linspace(0,15,10)\n", + "y_test = LOESSRegression(n_kernels=4, polynomial_degree=2).fit(x_train, y_train).predict(x_test)\n", + "\n", + "# Extrapolation\n", + "\n", + "x_train = np.linspace(0,15,60)\n", + "y_train = noisy_1d(x_train)\n", + "x_test = np.linspace(-10,25,10)\n", + "y_test = LOESSRegression(n_kernels=4, polynomial_degree=2).fit(x_train, y_train).predict(x_test)\n", + "\n", + "# single datapoint inference\n", + "\n", + "x_train = np.linspace(0,15,100)\n", + "y_train = noisy_1d(x_train)\n", + "x_test = np.linspace(10,10,1)\n", + "y_test = LOESSRegression(n_kernels=4, polynomial_degree=2).fit(x_train, y_train).predict(x_test)" + ] } ], "metadata": { From 2f91919ba64650f5c803d40ccf4d3259438f1327 Mon Sep 17 00:00:00 2001 From: "Zeng, Wen-Feng" Date: Tue, 4 Oct 2022 11:35:17 +0200 Subject: [PATCH 06/52] sklearn in req --- requirements/requirements.txt | 3 ++- requirements/requirements_development.txt | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/requirements/requirements.txt b/requirements/requirements.txt index beaeccae..896ad272 100644 --- a/requirements/requirements.txt +++ b/requirements/requirements.txt @@ -7,4 +7,5 @@ contextlib2 mmh3 biopython psutil -tqdm \ No newline at end of file +tqdm +scikit-learn \ No newline at end of file diff --git a/requirements/requirements_development.txt b/requirements/requirements_development.txt index 22a41174..09674432 100644 --- a/requirements/requirements_development.txt +++ b/requirements/requirements_development.txt @@ -8,6 +8,7 @@ pipdeptree ipykernel nbdev pyteomics +scikit-learn tqdm psutil From a2621653ee7e959bc95e7f985519d1ceacec8181 Mon Sep 17 00:00:00 2001 From: "Zeng, Wen-Feng" Date: Mon, 10 Oct 2022 16:26:08 +0200 Subject: [PATCH 07/52] percolator --- alphabase/_modidx.py | 43 ++ alphabase/scoring/__init__.py | 0 alphabase/scoring/fdr.py | 169 +++++ alphabase/scoring/feature_extraction_base.py | 49 ++ alphabase/scoring/ml_scoring_base.py | 293 ++++++++ nbdev_nbs/psm_reader/dia_psm_reader.ipynb | 9 + nbdev_nbs/scoring/fdr.ipynb | 625 ++++++++++++++++++ .../scoring/feature_extraction_base.ipynb | 98 +++ nbdev_nbs/scoring/ml_scoring_base.ipynb | 445 +++++++++++++ 9 files changed, 1731 insertions(+) create mode 100644 alphabase/scoring/__init__.py create mode 100644 alphabase/scoring/fdr.py create mode 100644 alphabase/scoring/feature_extraction_base.py create mode 100644 alphabase/scoring/ml_scoring_base.py create mode 100644 nbdev_nbs/scoring/fdr.ipynb create mode 100644 nbdev_nbs/scoring/feature_extraction_base.ipynb create mode 100644 nbdev_nbs/scoring/ml_scoring_base.ipynb diff --git a/alphabase/_modidx.py b/alphabase/_modidx.py index bd864a2f..27d6f5bc 100644 --- a/alphabase/_modidx.py +++ b/alphabase/_modidx.py @@ -387,6 +387,49 @@ 'alphabase/psm_reader/psm_reader.py'), 'alphabase.psm_reader.psm_reader.translate_other_modification': ( 'psm_reader/psm_reader.html#translate_other_modification', 'alphabase/psm_reader/psm_reader.py')}, + 'alphabase.scoring.fdr': { 'alphabase.scoring.fdr.calculate_fdr': ( 'scoring/fdr.html#calculate_fdr', + 'alphabase/scoring/fdr.py'), + 'alphabase.scoring.fdr.calculate_fdr_from_ref': ( 'scoring/fdr.html#calculate_fdr_from_ref', + 'alphabase/scoring/fdr.py'), + 'alphabase.scoring.fdr.fdr_from_ref': ('scoring/fdr.html#fdr_from_ref', 'alphabase/scoring/fdr.py'), + 'alphabase.scoring.fdr.fdr_to_q_values': ( 'scoring/fdr.html#fdr_to_q_values', + 'alphabase/scoring/fdr.py')}, + 'alphabase.scoring.feature_extraction_base': { 'alphabase.scoring.feature_extraction_base.BaseFeatureExtractor': ( 'scoring/feature_extraction_base.html#basefeatureextractor', + 'alphabase/scoring/feature_extraction_base.py'), + 'alphabase.scoring.feature_extraction_base.BaseFeatureExtractor.__init__': ( 'scoring/feature_extraction_base.html#basefeatureextractor.__init__', + 'alphabase/scoring/feature_extraction_base.py'), + 'alphabase.scoring.feature_extraction_base.BaseFeatureExtractor.extract_features': ( 'scoring/feature_extraction_base.html#basefeatureextractor.extract_features', + 'alphabase/scoring/feature_extraction_base.py'), + 'alphabase.scoring.feature_extraction_base.BaseFeatureExtractor.feature_list': ( 'scoring/feature_extraction_base.html#basefeatureextractor.feature_list', + 'alphabase/scoring/feature_extraction_base.py')}, + 'alphabase.scoring.ml_scoring_base': { 'alphabase.scoring.ml_scoring_base.Percolator': ( 'scoring/ml_scoring_base.html#percolator', + 'alphabase/scoring/ml_scoring_base.py'), + 'alphabase.scoring.ml_scoring_base.Percolator.__init__': ( 'scoring/ml_scoring_base.html#percolator.__init__', + 'alphabase/scoring/ml_scoring_base.py'), + 'alphabase.scoring.ml_scoring_base.Percolator._cv_score': ( 'scoring/ml_scoring_base.html#percolator._cv_score', + 'alphabase/scoring/ml_scoring_base.py'), + 'alphabase.scoring.ml_scoring_base.Percolator._estimate_fdr': ( 'scoring/ml_scoring_base.html#percolator._estimate_fdr', + 'alphabase/scoring/ml_scoring_base.py'), + 'alphabase.scoring.ml_scoring_base.Percolator._estimate_fdr_per_raw': ( 'scoring/ml_scoring_base.html#percolator._estimate_fdr_per_raw', + 'alphabase/scoring/ml_scoring_base.py'), + 'alphabase.scoring.ml_scoring_base.Percolator._estimate_psm_fdr': ( 'scoring/ml_scoring_base.html#percolator._estimate_psm_fdr', + 'alphabase/scoring/ml_scoring_base.py'), + 'alphabase.scoring.ml_scoring_base.Percolator._predict': ( 'scoring/ml_scoring_base.html#percolator._predict', + 'alphabase/scoring/ml_scoring_base.py'), + 'alphabase.scoring.ml_scoring_base.Percolator._train': ( 'scoring/ml_scoring_base.html#percolator._train', + 'alphabase/scoring/ml_scoring_base.py'), + 'alphabase.scoring.ml_scoring_base.Percolator.extract_features': ( 'scoring/ml_scoring_base.html#percolator.extract_features', + 'alphabase/scoring/ml_scoring_base.py'), + 'alphabase.scoring.ml_scoring_base.Percolator.feature_extractor': ( 'scoring/ml_scoring_base.html#percolator.feature_extractor', + 'alphabase/scoring/ml_scoring_base.py'), + 'alphabase.scoring.ml_scoring_base.Percolator.feature_list': ( 'scoring/ml_scoring_base.html#percolator.feature_list', + 'alphabase/scoring/ml_scoring_base.py'), + 'alphabase.scoring.ml_scoring_base.Percolator.ml_model': ( 'scoring/ml_scoring_base.html#percolator.ml_model', + 'alphabase/scoring/ml_scoring_base.py'), + 'alphabase.scoring.ml_scoring_base.Percolator.rescore': ( 'scoring/ml_scoring_base.html#percolator.rescore', + 'alphabase/scoring/ml_scoring_base.py'), + 'alphabase.scoring.ml_scoring_base.Percolator.run': ( 'scoring/ml_scoring_base.html#percolator.run', + 'alphabase/scoring/ml_scoring_base.py')}, 'alphabase.spectral_library.decoy_library': { 'alphabase.spectral_library.decoy_library.DecoyLib': ( 'spectral_library/decoy_library.html#decoylib', 'alphabase/spectral_library/decoy_library.py'), 'alphabase.spectral_library.decoy_library.DecoyLib.__init__': ( 'spectral_library/decoy_library.html#decoylib.__init__', diff --git a/alphabase/scoring/__init__.py b/alphabase/scoring/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/alphabase/scoring/fdr.py b/alphabase/scoring/fdr.py new file mode 100644 index 00000000..caf13781 --- /dev/null +++ b/alphabase/scoring/fdr.py @@ -0,0 +1,169 @@ +# AUTOGENERATED! DO NOT EDIT! File to edit: ../../nbdev_nbs/scoring/fdr.ipynb. + +# %% auto 0 +__all__ = ['calc_fdr_for_df', 'calc_fdr_from_ref_for_df', 'fdr_to_q_values', 'calculate_fdr', 'fdr_from_ref', + 'calculate_fdr_from_ref'] + +# %% ../../nbdev_nbs/scoring/fdr.ipynb 3 +import numba +import numpy as np +import pandas as pd + +# %% ../../nbdev_nbs/scoring/fdr.ipynb 4 +@numba.njit +def fdr_to_q_values( + fdr_values:np.ndarray +)->np.ndarray: + """convert FDR values to q_values. + + Parameters + ---------- + fdr_values : np.ndarray + FDR values, they should be + sorted according to the descending order of the `score` + + Returns + ------- + np.ndarray + q_values + + """ + q_values = np.zeros_like(fdr_values) + min_q_value = np.max(fdr_values) + for i in range(len(fdr_values) - 1, -1, -1): + fdr = fdr_values[i] + if fdr < min_q_value: + min_q_value = fdr + q_values[i] = min_q_value + return q_values + +def calculate_fdr( + df:pd.DataFrame, + score_column:str, + decoy_column:str='decoy' +)->pd.DataFrame: + """Calculate FDR values (q_values in fact) for the given dataframe + + Parameters + ---------- + df : pd.DataFrame + PSM dataframe to calculate FDRs + + score_column : str + score column to sort in decending order + + decoy_column : str, optional + decoy column in the dataframe. + 1=target, 0=decoy. Defaults to 'decoy'. + + Returns + ------- + pd.DataFrame + PSM dataframe with 'fdr' column added + + """ + df = df.reset_index(drop=True).sort_values( + [score_column,decoy_column], ascending=False + ) + target_values = 1-df[decoy_column].values + decoy_cumsum = np.cumsum(df[decoy_column].values) + target_cumsum = np.cumsum(target_values) + fdr_values = decoy_cumsum/target_cumsum + df['fdr'] = fdr_to_q_values(fdr_values) + return df + +#wrapper +calc_fdr_for_df = calculate_fdr + +@numba.njit +def fdr_from_ref( + sorted_scores:np.ndarray, + ref_scores:np.ndarray, + ref_fdr_values:np.ndarray +)->np.ndarray: + """ Calculate FDR values from the given reference scores and fdr_values. + It is used to extend peptide-level or sequence-level FDR (reference) + to each PSM, as PSMs are more useful for quantification. + + Parameters + ---------- + sorted_scores : np.array + the scores to calculate FDRs, + they must be sorted in decending order. + + ref_scores : np.array + reference scores that used to + calculate ref_fdr_values, also sorted in decending order. + + ref_fdr_values : np.array + fdr values corresponding to ref_scores + + Returns + ------- + np.array + fdr values corresponding to sorted_scores. + + """ + q_values = np.zeros_like(sorted_scores) + i,j = 0,0 + while i < len(sorted_scores) and j < len(ref_scores): + if sorted_scores[i] >= ref_scores[j]: + q_values[i] = ref_fdr_values[j] + i += 1 + else: + j += 1 + while i < len(sorted_scores): + q_values[i] = ref_fdr_values[-1] + i += 1 + return q_values + +def calculate_fdr_from_ref( + df: pd.DataFrame, + ref_scores:np.ndarray, + ref_fdr_values:np.ndarray, + score_column:str, + decoy_column:str='decoy' +)->pd.DataFrame: + """ Calculate FDR values for a PSM dataframe from the given reference + scores and fdr_values. It is used to extend peptide-level or + sequence-level FDR (reference) to each PSM, as PSMs are more useful + for quantification. + `` + + Parameters + ---------- + df : pd.DataFrame + PSM dataframe + + ref_scores : np.array + reference scores that used to + calculate ref_fdr_values, also sorted in decending order. + + ref_fdr_values : np.array + fdr values corresponding to ref_scores + + score_column : str + score column in the dataframe + + decoy_column : str, optional + decoy column in the dataframe. + 1=target, 0=decoy. Defaults to 'decoy'. + + Returns + ------- + pd.DataFrame + dataframe with 'fdr' column added + + """ + df = df.reset_index(drop=True).sort_values( + [score_column,decoy_column], ascending=False + ) + sorted_idxes = np.argsort(ref_fdr_values) + ref_scores = ref_scores[sorted_idxes] + ref_q_values = ref_fdr_values[sorted_idxes] + df['fdr'] = fdr_from_ref( + df.score.values, ref_scores, ref_q_values + ) + return df + +calc_fdr_from_ref_for_df = calculate_fdr_from_ref diff --git a/alphabase/scoring/feature_extraction_base.py b/alphabase/scoring/feature_extraction_base.py new file mode 100644 index 00000000..02778b8f --- /dev/null +++ b/alphabase/scoring/feature_extraction_base.py @@ -0,0 +1,49 @@ +# AUTOGENERATED! DO NOT EDIT! File to edit: ../../nbdev_nbs/scoring/feature_extraction_base.ipynb. + +# %% auto 0 +__all__ = ['BaseFeatureExtractor'] + +# %% ../../nbdev_nbs/scoring/feature_extraction_base.ipynb 2 +import pandas as pd + +# %% ../../nbdev_nbs/scoring/feature_extraction_base.ipynb 3 +class BaseFeatureExtractor: + def __init__(self): + self._feature_list = ['score','nAA','charge'] + + @property + def feature_list(self)->list: + """ + This is a property. It tells ML scoring modules + what features (columns) are extracted by + this FeatureExtractor for scoring. + + Returns + ------- + list + feature names (columns) in the PSM dataframe + """ + + self._feature_list = list(set(self._feature_list)) + return self._feature_list + + def extract_features(self, + psm_df:pd.DataFrame, + *args, **kwargs + )->pd.DataFrame: + """ + Extract the scoring features (self._feature_list) + and append them inplace into candidate PSMs (psm_df). + + Parameters + ---------- + psm_df : pd.DataFrame + PSMs to be rescore. + + Returns + ------- + pd.DataFrame + psm_df with appended the feature list extracted by this extractor. + """ + return psm_df + diff --git a/alphabase/scoring/ml_scoring_base.py b/alphabase/scoring/ml_scoring_base.py new file mode 100644 index 00000000..7f146a56 --- /dev/null +++ b/alphabase/scoring/ml_scoring_base.py @@ -0,0 +1,293 @@ +# AUTOGENERATED! DO NOT EDIT! File to edit: ../../nbdev_nbs/scoring/ml_scoring_base.ipynb. + +# %% auto 0 +__all__ = ['Percolator'] + +# %% ../../nbdev_nbs/scoring/ml_scoring_base.ipynb 2 +import pandas as pd +import numpy as np + +from sklearn.linear_model import LogisticRegression +from sklearn.base import BaseEstimator + +from .feature_extraction_base import BaseFeatureExtractor +from alphabase.scoring.fdr import ( + calculate_fdr, + calculate_fdr_from_ref, + fdr_to_q_values, + fdr_from_ref, +) + +# %% ../../nbdev_nbs/scoring/ml_scoring_base.ipynb 6 +class Percolator: + def __init__(self): + self._feature_extractor:BaseFeatureExtractor = BaseFeatureExtractor() + self._ml_model = LogisticRegression() + + self.fdr_level = 'psm' # psm, precursor, peptide, or sequence + self.fdr = 0.01 + self.per_raw_fdr = False + + self.max_training_sample = 200000 + self.min_training_sample = 100 + self.cv_fold = 1 + self.iter_num = 1 + + @property + def feature_list(self)->list: + """ The read-only property to get extracted feature_list """ + return self.feature_extractor.feature_list + + @property + def ml_model(self): + return self._ml_model + + @ml_model.setter + def ml_model(self, model): + """ + `model` must be sklearn models or other models but implement + the same methods `fit()` and `decision_function()`/`predict_proba()` + as sklearn models + """ + self._ml_model = model + + @property + def feature_extractor(self)->BaseFeatureExtractor: + return self._feature_extractor + + @feature_extractor.setter + def feature_extractor(self, fe:BaseFeatureExtractor): + self._feature_extractor = fe + + def extract_features(self, + psm_df:pd.DataFrame, + *args, **kwargs + )->pd.DataFrame: + """ + Extract features for rescoring. + + *args and **kwargs are used for + `self.feature_extractor.extract_features`. + + Parameters + ---------- + psm_df : pd.DataFrame + PSM DataFrame + + Returns + ------- + pd.DataFrame + psm_df with feature columns appended inplace. + """ + psm_df['ml_score'] = psm_df.score + psm_df = self._estimate_psm_fdr(psm_df) + return self._feature_extractor.extract_features( + psm_df, *args, **kwargs + ) + + def rescore(self, + df:pd.DataFrame + )->pd.DataFrame: + """Rescore + + Parameters + ---------- + df : pd.DataFrame + psm_df + + Returns + ------- + pd.DataFrame + psm_df with `ml_score` and `fdr` columns updated inplace + """ + for i in range(self.iter_num): + df = self._cv_score(df) + df = self._estimate_fdr(df, 'psm', False) + df = self._estimate_fdr(df) + return df + + def run(self, + psm_df:pd.DataFrame, + *args, **kwargs + )->pd.DataFrame: + """ + Run percolator workflow: + + - self.extract_features() + - self.re_score() + + *args and **kwargs are used for + `self.feature_extractor.extract_features`. + + Parameters + ---------- + psm_df : pd.DataFrame + PSM DataFrame + + Returns + ------- + pd.DataFrame + psm_df with feature columns appended inplace. + """ + df = self.extract_features( + psm_df, *args, **kwargs + ) + return self.rescore(df) + + def _estimate_fdr_per_raw(self, + df:pd.DataFrame, + fdr_level:str + )->pd.DataFrame: + df_list = [] + for raw_name, df_raw in df.groupby('raw_name'): + df_list.append(self._estimate_fdr(df_raw, + fdr_level = fdr_level, + per_raw_fdr = False + )) + return pd.concat(df_list, ignore_index=True) + + def _estimate_psm_fdr(self, + df:pd.DataFrame, + )->pd.DataFrame: + df = df.sort_values( + ['ml_score','decoy'], ascending=False + ).reset_index(drop=True) + target_values = 1-df['decoy'].values + decoy_cumsum = np.cumsum(df['decoy'].values) + target_cumsum = np.cumsum(target_values) + fdr_values = decoy_cumsum/target_cumsum + df['fdr'] = fdr_to_q_values(fdr_values) + return df + + def _estimate_fdr(self, + df:pd.DataFrame, + fdr_level:str=None, + per_raw_fdr:bool=None, + )->pd.DataFrame: + if fdr_level is None: + fdr_level = self.fdr_level + if per_raw_fdr is None: + per_raw_fdr = self.per_raw_fdr + + if per_raw_fdr: + return self._estimate_fdr_per_raw( + df, fdr_level=fdr_level + ) + + if fdr_level == 'psm': + return self._estimate_psm_fdr(df) + else: + if fdr_level == 'precursor': + _df = df.groupby([ + 'sequence','mods','mod_sites','charge','decoy' + ])['ml_score'].max() + elif fdr_level == 'peptide': + _df = df.groupby([ + 'sequence','mods','mod_sites','decoy' + ])['ml_score'].max() + else: + _df = df.groupby(['sequence','decoy'])['ml_score'].max() + _df = self._estimate_psm_fdr(_df) + df['fdr'] = fdr_from_ref( + df['ml_score'].values, _df['ml_score'].values, + _df['fdr'].values + ) + return df + + def _train(self, + train_t_df:pd.DataFrame, + train_d_df:pd.DataFrame + ): + train_t_df = train_t_df[train_t_df.fdr<=self.fdr] + + if len(train_t_df) > self.max_train_sample: + train_t_df = train_t_df.sample( + n=self.max_training_sample, + random_state=1337 + ) + if len(train_d_df) > self.max_train_sample: + train_d_df = train_d_df.sample( + n=self.max_training_sample, + random_state=1337 + ) + + train_df = pd.concat((train_t_df, train_d_df)) + train_label = np.ones(len(train_df),dtype=np.int32) + train_label[len(train_t_df):] = 0 + + self._ml_model.fit( + train_df[self.feature_list].values, + train_label + ) + + def _predict(self, test_df): + try: + test_df['ml_score'] = self._ml_model.decision_function( + test_df[self.feature_list].values + ) + except AttributeError: + test_df['ml_score'] = self._ml_model.predict_proba( + test_df[self.feature_list].values + ) + return test_df + + def _cv_score(self, df:pd.DataFrame)->pd.DataFrame: + """ + Apply cross-validation for rescoring. + + It will split `df` into K folds. For each fold, + its ML scores are predicted by a model which + is trained by other K-1 folds . + + Parameters + ---------- + df : pd.DataFrame + PSMs to be rescored + + Returns + ------- + pd.DataFrame + PSMs after rescoring + """ + df = df.sample( + frac=1, random_state=1337 + ).reset_index(drop=True) + df_target = df[df.decoy == 0] + df_decoy = df[df.decoy != 0] + + if ( + np.sum(df_target.fdr 1: + test_df_list = [] + for i in range(self.cv_fold): + t_mask = np.ones(len(df_target), dtype=bool) + _slice = slice(i, len(df_target), self.cv_fold) + t_mask[_slice] = False + train_t_df = df_target[t_mask] + test_t_df = df_target[_slice] + + d_mask = np.ones(len(df_decoy), dtype=bool) + _slice = slice(i, len(df_decoy), self.cv_fold) + d_mask[_slice] = False + train_d_df = df_decoy[d_mask] + test_d_df = df_decoy[_slice] + + self._train(train_t_df, train_d_df) + + test_df = pd.concat((test_t_df, test_d_df)) + test_df_list.append(self._predict(test_df)) + + return pd.concat(test_df_list, ignore_index=True) + else: + + self._train(df_target, df_decoy) + test_df = pd.concat((df_target, df_decoy),ignore_index=True) + + return self._predict(test_df) + diff --git a/nbdev_nbs/psm_reader/dia_psm_reader.ipynb b/nbdev_nbs/psm_reader/dia_psm_reader.ipynb index bca7cc76..94a6003e 100644 --- a/nbdev_nbs/psm_reader/dia_psm_reader.ipynb +++ b/nbdev_nbs/psm_reader/dia_psm_reader.ipynb @@ -1219,6 +1219,15 @@ "display_name": "Python 3.8.3 ('base')", "language": "python", "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.8.3" + }, + "vscode": { + "interpreter": { + "hash": "8a3b27e141e49c996c9b863f8707e97aabd49c4a7e8445b9b783b34e4a21a9b2" + } } }, "nbformat": 4, diff --git a/nbdev_nbs/scoring/fdr.ipynb b/nbdev_nbs/scoring/fdr.ipynb new file mode 100644 index 00000000..993a76ba --- /dev/null +++ b/nbdev_nbs/scoring/fdr.ipynb @@ -0,0 +1,625 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "#| default_exp scoring.fdr" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# FDR" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Functionalities to calculate FDR.\n", + "\n", + "> In alphabase dataframes, we refer fdr values as q_values without loss of generacity." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "import numba\n", + "import numpy as np\n", + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "@numba.njit\n", + "def fdr_to_q_values(\n", + " fdr_values:np.ndarray\n", + ")->np.ndarray:\n", + " \"\"\"convert FDR values to q_values.\n", + "\n", + " Parameters\n", + " ----------\n", + " fdr_values : np.ndarray\n", + " FDR values, they should be \n", + " sorted according to the descending order of the `score`\n", + "\n", + " Returns\n", + " -------\n", + " np.ndarray\n", + " q_values\n", + "\n", + " \"\"\"\n", + " q_values = np.zeros_like(fdr_values)\n", + " min_q_value = np.max(fdr_values)\n", + " for i in range(len(fdr_values) - 1, -1, -1):\n", + " fdr = fdr_values[i]\n", + " if fdr < min_q_value:\n", + " min_q_value = fdr\n", + " q_values[i] = min_q_value\n", + " return q_values\n", + "\n", + "def calculate_fdr(\n", + " df:pd.DataFrame, \n", + " score_column:str, \n", + " decoy_column:str='decoy'\n", + ")->pd.DataFrame:\n", + " \"\"\"Calculate FDR values (q_values in fact) for the given dataframe\n", + "\n", + " Parameters\n", + " ----------\n", + " df : pd.DataFrame\n", + " PSM dataframe to calculate FDRs\n", + "\n", + " score_column : str\n", + " score column to sort in decending order\n", + "\n", + " decoy_column : str, optional\n", + " decoy column in the dataframe. \n", + " 1=target, 0=decoy. Defaults to 'decoy'.\n", + "\n", + " Returns\n", + " -------\n", + " pd.DataFrame\n", + " PSM dataframe with 'fdr' column added\n", + "\n", + " \"\"\"\n", + " df = df.reset_index(drop=True).sort_values(\n", + " [score_column,decoy_column], ascending=False\n", + " )\n", + " target_values = 1-df[decoy_column].values\n", + " decoy_cumsum = np.cumsum(df[decoy_column].values)\n", + " target_cumsum = np.cumsum(target_values)\n", + " fdr_values = decoy_cumsum/target_cumsum\n", + " df['fdr'] = fdr_to_q_values(fdr_values)\n", + " return df\n", + "\n", + "#wrapper\n", + "calc_fdr_for_df = calculate_fdr\n", + "\n", + "@numba.njit\n", + "def fdr_from_ref(\n", + " sorted_scores:np.ndarray, \n", + " ref_scores:np.ndarray, \n", + " ref_fdr_values:np.ndarray\n", + ")->np.ndarray:\n", + " \"\"\" Calculate FDR values from the given reference scores and fdr_values. \n", + " It is used to extend peptide-level or sequence-level FDR (reference) \n", + " to each PSM, as PSMs are more useful for quantification.\n", + "\n", + " Parameters\n", + " ----------\n", + " sorted_scores : np.array\n", + " the scores to calculate FDRs, \n", + " they must be sorted in decending order.\n", + "\n", + " ref_scores : np.array\n", + " reference scores that used to \n", + " calculate ref_fdr_values, also sorted in decending order.\n", + "\n", + " ref_fdr_values : np.array\n", + " fdr values corresponding to ref_scores\n", + "\n", + " Returns\n", + " -------\n", + " np.array\n", + " fdr values corresponding to sorted_scores.\n", + "\n", + " \"\"\"\n", + " q_values = np.zeros_like(sorted_scores)\n", + " i,j = 0,0\n", + " while i < len(sorted_scores) and j < len(ref_scores):\n", + " if sorted_scores[i] >= ref_scores[j]:\n", + " q_values[i] = ref_fdr_values[j]\n", + " i += 1\n", + " else:\n", + " j += 1\n", + " while i < len(sorted_scores):\n", + " q_values[i] = ref_fdr_values[-1]\n", + " i += 1\n", + " return q_values\n", + "\n", + "def calculate_fdr_from_ref(\n", + " df: pd.DataFrame,\n", + " ref_scores:np.ndarray, \n", + " ref_fdr_values:np.ndarray,\n", + " score_column:str, \n", + " decoy_column:str='decoy'\n", + ")->pd.DataFrame:\n", + " \"\"\" Calculate FDR values for a PSM dataframe from the given reference\n", + " scores and fdr_values. It is used to extend peptide-level or \n", + " sequence-level FDR (reference) to each PSM, as PSMs are more useful \n", + " for quantification.\n", + " ``\n", + "\n", + " Parameters\n", + " ----------\n", + " df : pd.DataFrame\n", + " PSM dataframe\n", + "\n", + " ref_scores : np.array\n", + " reference scores that used to \n", + " calculate ref_fdr_values, also sorted in decending order.\n", + "\n", + " ref_fdr_values : np.array\n", + " fdr values corresponding to ref_scores\n", + "\n", + " score_column : str\n", + " score column in the dataframe\n", + "\n", + " decoy_column : str, optional\n", + " decoy column in the dataframe. \n", + " 1=target, 0=decoy. Defaults to 'decoy'.\n", + "\n", + " Returns\n", + " -------\n", + " pd.DataFrame\n", + " dataframe with 'fdr' column added\n", + "\n", + " \"\"\"\n", + " df = df.reset_index(drop=True).sort_values(\n", + " [score_column,decoy_column], ascending=False\n", + " )\n", + " sorted_idxes = np.argsort(ref_fdr_values)\n", + " ref_scores = ref_scores[sorted_idxes]\n", + " ref_q_values = ref_fdr_values[sorted_idxes]\n", + " df['fdr'] = fdr_from_ref(\n", + " df.score.values, ref_scores, ref_q_values\n", + " )\n", + " return df\n", + "\n", + "calc_fdr_from_ref_for_df = calculate_fdr_from_ref" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
scoredecoykindfdr
41720.9867510True0.000000
48620.9588030True0.000000
4620.9542440True0.000000
13120.8324400True0.000000
23620.8095950True0.000000
...............
11110.0463660False0.504008
7090.0408411False0.504505
12090.0308410False0.504505
9390.0137041False0.505000
14390.0037040False0.505000
\n", + "

1505 rows × 4 columns

\n", + "
" + ], + "text/plain": [ + " score decoy kind fdr\n", + "417 20.986751 0 True 0.000000\n", + "486 20.958803 0 True 0.000000\n", + "46 20.954244 0 True 0.000000\n", + "131 20.832440 0 True 0.000000\n", + "236 20.809595 0 True 0.000000\n", + "... ... ... ... ...\n", + "1111 0.046366 0 False 0.504008\n", + "709 0.040841 1 False 0.504505\n", + "1209 0.030841 0 False 0.504505\n", + "939 0.013704 1 False 0.505000\n", + "1439 0.003704 0 False 0.505000\n", + "\n", + "[1505 rows x 4 columns]" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.DataFrame(\n", + " {\n", + " 'score': np.random.random(500)*10+11,\n", + " 'decoy': 0,\n", + " 'kind': True,\n", + " }\n", + ")\n", + "f_score = np.random.random(500)*9.9\n", + "df = df.append(\n", + " pd.DataFrame(\n", + " {\n", + " 'score': f_score+0.01,\n", + " 'decoy': 1,\n", + " 'kind': False\n", + " }\n", + " )\n", + ")\n", + "df = df.append(\n", + " pd.DataFrame(\n", + " {\n", + " 'score': f_score,\n", + " 'decoy': 0,\n", + " 'kind': False\n", + " }\n", + " )\n", + ")\n", + "df = df.append(\n", + " pd.DataFrame(\n", + " {\n", + " 'score': np.random.random(5)+10,\n", + " 'decoy': 1,\n", + " 'kind': False\n", + " }\n", + " )\n", + ")\n", + "\n", + "df = calculate_fdr(df, 'score', 'decoy')\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
scoredecoykindfdr
41720.9867510True0.0
48620.9588030True0.0
4620.9542440True0.0
13120.8324400True0.0
23620.8095950True0.0
...............
31311.0706950True0.0
22711.0284310True0.0
15311.0143300True0.0
11311.0139780True0.0
4811.0106290True0.0
\n", + "

500 rows × 4 columns

\n", + "
" + ], + "text/plain": [ + " score decoy kind fdr\n", + "417 20.986751 0 True 0.0\n", + "486 20.958803 0 True 0.0\n", + "46 20.954244 0 True 0.0\n", + "131 20.832440 0 True 0.0\n", + "236 20.809595 0 True 0.0\n", + ".. ... ... ... ...\n", + "313 11.070695 0 True 0.0\n", + "227 11.028431 0 True 0.0\n", + "153 11.014330 0 True 0.0\n", + "113 11.013978 0 True 0.0\n", + "48 11.010629 0 True 0.0\n", + "\n", + "[500 rows x 4 columns]" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[(df.fdr < 0.01)&(df.decoy==0)]" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "#| hide\n", + "assert len(df[(df.fdr < 0.01)&(df.decoy==0)]) == 500" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "#| hide\n", + "dff = pd.DataFrame(\n", + " {\n", + " 'score': np.random.random(500)*10+11,\n", + " 'decoy': 0\n", + " }\n", + ")\n", + "f_score = np.random.random(500)*9.9\n", + "dff = dff.append(\n", + " pd.DataFrame(\n", + " {\n", + " 'score': f_score+0.01,\n", + " 'decoy': 1\n", + " }\n", + " )\n", + ")\n", + "dff = dff.append(\n", + " pd.DataFrame(\n", + " {\n", + " 'score': f_score,\n", + " 'decoy': 0\n", + " }\n", + " )\n", + ")\n", + "dff = dff.append(\n", + " pd.DataFrame(\n", + " {\n", + " 'score': np.random.random(5)+10,\n", + " 'decoy': 1\n", + " }\n", + " )\n", + ")\n", + "\n", + "dff['fdr'] = fdr_from_ref(dff.score.values, df.score.values, df.fdr.values)\n", + "\n", + "assert len(dff[(dff.fdr < 0.01)&(dff.decoy==0)]) == 500" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "#| hide\n", + "dff = calculate_fdr_from_ref(dff, df.score.values, df.fdr.values, 'score')\n", + "assert len(dff[(dff.fdr < 0.01)&(dff.decoy==0)]) == 500" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.8.3 ('base')", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.3" + }, + "vscode": { + "interpreter": { + "hash": "8a3b27e141e49c996c9b863f8707e97aabd49c4a7e8445b9b783b34e4a21a9b2" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/nbdev_nbs/scoring/feature_extraction_base.ipynb b/nbdev_nbs/scoring/feature_extraction_base.ipynb new file mode 100644 index 00000000..258ce3f1 --- /dev/null +++ b/nbdev_nbs/scoring/feature_extraction_base.ipynb @@ -0,0 +1,98 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| default_exp scoring.feature_extraction_base" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Base Class of Feature Extractors" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "class BaseFeatureExtractor:\n", + " def __init__(self):\n", + " self._feature_list = ['score','nAA','charge']\n", + "\n", + " @property\n", + " def feature_list(self)->list:\n", + " \"\"\"\n", + " This is a property. It tells ML scoring modules \n", + " what features (columns) are extracted by \n", + " this FeatureExtractor for scoring.\n", + "\n", + " Returns\n", + " -------\n", + " list\n", + " feature names (columns) in the PSM dataframe\n", + " \"\"\"\n", + "\n", + " self._feature_list = list(set(self._feature_list))\n", + " return self._feature_list\n", + "\n", + " def extract_features(self, \n", + " psm_df:pd.DataFrame, \n", + " *args, **kwargs\n", + " )->pd.DataFrame:\n", + " \"\"\"\n", + " Extract the scoring features (self._feature_list) \n", + " and append them inplace into candidate PSMs (psm_df).\n", + "\n", + " Parameters\n", + " ----------\n", + " psm_df : pd.DataFrame\n", + " PSMs to be rescore.\n", + "\n", + " Returns\n", + " -------\n", + " pd.DataFrame\n", + " psm_df with appended the feature list extracted by this extractor.\n", + " \"\"\"\n", + " return psm_df\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.8.3 ('base')", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.8.3" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "8a3b27e141e49c996c9b863f8707e97aabd49c4a7e8445b9b783b34e4a21a9b2" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/nbdev_nbs/scoring/ml_scoring_base.ipynb b/nbdev_nbs/scoring/ml_scoring_base.ipynb new file mode 100644 index 00000000..c0238437 --- /dev/null +++ b/nbdev_nbs/scoring/ml_scoring_base.ipynb @@ -0,0 +1,445 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| default_exp scoring.ml_scoring_base" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Base Class of ML Scoring Methods" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "from sklearn.linear_model import LogisticRegression\n", + "from sklearn.base import BaseEstimator\n", + "\n", + "from alphabase.scoring.feature_extraction_base import BaseFeatureExtractor\n", + "from alphabase.scoring.fdr import (\n", + " calculate_fdr,\n", + " calculate_fdr_from_ref,\n", + " fdr_to_q_values,\n", + " fdr_from_ref,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "There are two key modules in ML-based rescoring: feature extraction and rescoring algorithm. Here we designed these two modules as flexible as possible for future extensions." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Feature extraction\n", + "\n", + "The feature extractor is more important than the ML methods, so we designed a flexible architecture for feature extraction. As shown in `BaseFeatureExtractor`, a feature extractor inherited from `BaseFeatureExtractor` must re-implement `BaseFeatureExtractor.extract_features`, and tells the ML methods what are the extracted features by providing `BaseFeatureExtractor.feature_list`. \n", + "\n", + "For example, if we have two feature extractors, `AlphaPeptFE` and `AlphaPeptDeepFE`:\n", + "\n", + "```python\n", + "class AlphaPeptFE(BaseFeatureExtractor):\n", + " def extract_features(self, psm_df):\n", + " psm_df['ap_f1'] = ...\n", + " self._feature_list.append('ap_f1')\n", + " psm_df['ap_f2'] = ...\n", + " self._feature_list.append('ap_f2')\n", + "\n", + "class AlphaPeptDeepFE(BaseFeatureExtractor):\n", + " def extract_features(self, psm_df):\n", + " psm_df['ad_f1'] = ...\n", + " self._feature_list.append('ad_f1')\n", + " psm_df['ad_f2'] = ...\n", + " self._feature_list.append('ad_f2')\n", + "```\n", + "\n", + "We can easily design a new feature extractor which combines these two and more feature extractors:\n", + "\n", + "```python\n", + "class CombFE(BaseFeatureExtractor):\n", + " def __init__(self):\n", + " self.fe_list = [AlphaPeptFE(),AlphaPeptDeepFE()]\n", + "\n", + " def extract_features(self, psm_df):\n", + " for fe in self.fe_list:\n", + " fe.extract_features(psm_df)\n", + "\n", + " @property\n", + " def feature_list(self):\n", + " f_set = set()\n", + " for fe in self.fe_list:\n", + " f_set.update(fe.feature_list)\n", + " return list(f_set)\n", + "```\n", + "\n", + "This will be useful for rescoring with DL features, for instance, when AlphaPeptDeep is or is not installed." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Rescoring Algorithm\n", + "\n", + "The rescoring algorithm called `Percolator` (Kall et al. 2007) based on the semi-supervised learning algorithm is still the most widely used in MS-based proteomics. Therefore, we used `Percolator` as the base rescoring class and others can re-implement its methods for different algorithms. as well as different \n", + "\n", + "1. Rescoring algorithm. We have provided the base rescoring code structure in `Percolator`. If we are going to support DiaNN's brute-force supervised learning methods, we can define the class like this:\n", + "\n", + "```python\n", + "class DiaNNRescoring(Percolator):\n", + " def _train(self, train_t_df, train_d_df):\n", + " # No target filtration on FDR, which is the same as DiaNN but different in Percolator\n", + " #train_t_df = train_t_df[train_t_df.fdr<=self.fdr]\n", + " train_df = pd.concat((train_t_df, train_d_df))\n", + " train_label = np.ones(len(train_df),dtype=np.int32)\n", + " train_label[len(train_t_df):] = 0\n", + "\n", + " self._ml_model.fit(\n", + " train_df[self.feature_list].values, \n", + " train_label\n", + " )\n", + " def rescore(self, psm_df):\n", + " # We don't need iteration anymore, but cross validation may be still necessary\n", + " df = self._cv_score(df)\n", + " return self._estimate_fdr(df)\n", + "```\n", + "\n", + "2. ML models. Personally, `Percolator` with a linear classifier (SVM or LogisticRegression) is prefered. But as a framework, we should support different ML models. We can easily switch to the random forest by `self.ml_model = RandomForestClassifier()`. We can also use a DL model which provides sklearn-like `fit()` and `decision_function()` APIs for rescoring." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "class Percolator:\n", + " def __init__(self):\n", + " self._feature_extractor:BaseFeatureExtractor = BaseFeatureExtractor()\n", + " self._ml_model = LogisticRegression()\n", + " \n", + " self.fdr_level = 'psm' # psm, precursor, peptide, or sequence\n", + " self.fdr = 0.01\n", + " self.per_raw_fdr = False\n", + "\n", + " self.max_training_sample = 200000\n", + " self.min_training_sample = 100\n", + " self.cv_fold = 1\n", + " self.iter_num = 1\n", + "\n", + " @property\n", + " def feature_list(self)->list:\n", + " \"\"\" The read-only property to get extracted feature_list \"\"\"\n", + " return self.feature_extractor.feature_list\n", + "\n", + " @property\n", + " def ml_model(self):\n", + " return self._ml_model\n", + " \n", + " @ml_model.setter\n", + " def ml_model(self, model):\n", + " \"\"\" \n", + " `model` must be sklearn models or other models but implement \n", + " the same methods `fit()` and `decision_function()`/`predict_proba()` \n", + " as sklearn models\n", + " \"\"\"\n", + " self._ml_model = model\n", + "\n", + " @property\n", + " def feature_extractor(self)->BaseFeatureExtractor:\n", + " return self._feature_extractor\n", + " \n", + " @feature_extractor.setter\n", + " def feature_extractor(self, fe:BaseFeatureExtractor):\n", + " self._feature_extractor = fe\n", + "\n", + " def extract_features(self,\n", + " psm_df:pd.DataFrame,\n", + " *args, **kwargs\n", + " )->pd.DataFrame:\n", + " \"\"\"\n", + " Extract features for rescoring.\n", + "\n", + " *args and **kwargs are used for \n", + " `self.feature_extractor.extract_features`.\n", + "\n", + " Parameters\n", + " ----------\n", + " psm_df : pd.DataFrame\n", + " PSM DataFrame\n", + "\n", + " Returns\n", + " -------\n", + " pd.DataFrame\n", + " psm_df with feature columns appended inplace.\n", + " \"\"\"\n", + " psm_df['ml_score'] = psm_df.score\n", + " psm_df = self._estimate_psm_fdr(psm_df)\n", + " return self._feature_extractor.extract_features(\n", + " psm_df, *args, **kwargs\n", + " )\n", + "\n", + " def rescore(self, \n", + " df:pd.DataFrame\n", + " )->pd.DataFrame:\n", + " \"\"\"Rescore\n", + "\n", + " Parameters\n", + " ----------\n", + " df : pd.DataFrame\n", + " psm_df\n", + "\n", + " Returns\n", + " -------\n", + " pd.DataFrame\n", + " psm_df with `ml_score` and `fdr` columns updated inplace\n", + " \"\"\"\n", + " for i in range(self.iter_num):\n", + " df = self._cv_score(df)\n", + " df = self._estimate_fdr(df, 'psm', False)\n", + " df = self._estimate_fdr(df)\n", + " return df\n", + "\n", + " def run(self,\n", + " psm_df:pd.DataFrame,\n", + " *args, **kwargs\n", + " )->pd.DataFrame:\n", + " \"\"\"\n", + " Run percolator workflow:\n", + "\n", + " - self.extract_features()\n", + " - self.re_score()\n", + "\n", + " *args and **kwargs are used for \n", + " `self.feature_extractor.extract_features`.\n", + "\n", + " Parameters\n", + " ----------\n", + " psm_df : pd.DataFrame\n", + " PSM DataFrame\n", + "\n", + " Returns\n", + " -------\n", + " pd.DataFrame\n", + " psm_df with feature columns appended inplace.\n", + " \"\"\"\n", + " df = self.extract_features(\n", + " psm_df, *args, **kwargs\n", + " )\n", + " return self.rescore(df)\n", + "\n", + " def _estimate_fdr_per_raw(self,\n", + " df:pd.DataFrame,\n", + " fdr_level:str\n", + " )->pd.DataFrame:\n", + " df_list = []\n", + " for raw_name, df_raw in df.groupby('raw_name'):\n", + " df_list.append(self._estimate_fdr(df_raw, \n", + " fdr_level = fdr_level,\n", + " per_raw_fdr = False\n", + " ))\n", + " return pd.concat(df_list, ignore_index=True)\n", + "\n", + " def _estimate_psm_fdr(self,\n", + " df:pd.DataFrame,\n", + " )->pd.DataFrame:\n", + " df = df.sort_values(\n", + " ['ml_score','decoy'], ascending=False\n", + " ).reset_index(drop=True)\n", + " target_values = 1-df['decoy'].values\n", + " decoy_cumsum = np.cumsum(df['decoy'].values)\n", + " target_cumsum = np.cumsum(target_values)\n", + " fdr_values = decoy_cumsum/target_cumsum\n", + " df['fdr'] = fdr_to_q_values(fdr_values)\n", + " return df\n", + " \n", + " def _estimate_fdr(self, \n", + " df:pd.DataFrame,\n", + " fdr_level:str=None,\n", + " per_raw_fdr:bool=None,\n", + " )->pd.DataFrame:\n", + " if fdr_level is None: \n", + " fdr_level = self.fdr_level\n", + " if per_raw_fdr is None: \n", + " per_raw_fdr = self.per_raw_fdr\n", + "\n", + " if per_raw_fdr:\n", + " return self._estimate_fdr_per_raw(\n", + " df, fdr_level=fdr_level\n", + " )\n", + "\n", + " if fdr_level == 'psm':\n", + " return self._estimate_psm_fdr(df)\n", + " else:\n", + " if fdr_level == 'precursor':\n", + " _df = df.groupby([\n", + " 'sequence','mods','mod_sites','charge','decoy'\n", + " ])['ml_score'].max()\n", + " elif fdr_level == 'peptide':\n", + " _df = df.groupby([\n", + " 'sequence','mods','mod_sites','decoy'\n", + " ])['ml_score'].max()\n", + " else:\n", + " _df = df.groupby(['sequence','decoy'])['ml_score'].max()\n", + " _df = self._estimate_psm_fdr(_df)\n", + " df['fdr'] = fdr_from_ref(\n", + " df['ml_score'].values, _df['ml_score'].values, \n", + " _df['fdr'].values\n", + " )\n", + " return df\n", + "\n", + " def _train(self, \n", + " train_t_df:pd.DataFrame, \n", + " train_d_df:pd.DataFrame\n", + " ):\n", + " train_t_df = train_t_df[train_t_df.fdr<=self.fdr]\n", + "\n", + " if len(train_t_df) > self.max_train_sample:\n", + " train_t_df = train_t_df.sample(\n", + " n=self.max_training_sample, \n", + " random_state=1337\n", + " )\n", + " if len(train_d_df) > self.max_train_sample:\n", + " train_d_df = train_d_df.sample(\n", + " n=self.max_training_sample,\n", + " random_state=1337\n", + " )\n", + "\n", + " train_df = pd.concat((train_t_df, train_d_df))\n", + " train_label = np.ones(len(train_df),dtype=np.int32)\n", + " train_label[len(train_t_df):] = 0\n", + "\n", + " self._ml_model.fit(\n", + " train_df[self.feature_list].values, \n", + " train_label\n", + " )\n", + "\n", + " def _predict(self, test_df):\n", + " try:\n", + " test_df['ml_score'] = self._ml_model.decision_function(\n", + " test_df[self.feature_list].values\n", + " )\n", + " except AttributeError:\n", + " test_df['ml_score'] = self._ml_model.predict_proba(\n", + " test_df[self.feature_list].values\n", + " )\n", + " return test_df\n", + "\n", + " def _cv_score(self, df:pd.DataFrame)->pd.DataFrame:\n", + " \"\"\"\n", + " Apply cross-validation for rescoring.\n", + "\n", + " It will split `df` into K folds. For each fold, \n", + " its ML scores are predicted by a model which \n", + " is trained by other K-1 folds .\n", + "\n", + " Parameters\n", + " ----------\n", + " df : pd.DataFrame\n", + " PSMs to be rescored\n", + "\n", + " Returns\n", + " -------\n", + " pd.DataFrame\n", + " PSMs after rescoring\n", + " \"\"\"\n", + " df = df.sample(\n", + " frac=1, random_state=1337\n", + " ).reset_index(drop=True)\n", + " df_target = df[df.decoy == 0]\n", + " df_decoy = df[df.decoy != 0]\n", + "\n", + " if (\n", + " np.sum(df_target.fdr 1:\n", + " test_df_list = []\n", + " for i in range(self.cv_fold):\n", + " t_mask = np.ones(len(df_target), dtype=bool)\n", + " _slice = slice(i, len(df_target), self.cv_fold)\n", + " t_mask[_slice] = False\n", + " train_t_df = df_target[t_mask]\n", + " test_t_df = df_target[_slice]\n", + " \n", + " d_mask = np.ones(len(df_decoy), dtype=bool)\n", + " _slice = slice(i, len(df_decoy), self.cv_fold)\n", + " d_mask[_slice] = False\n", + " train_d_df = df_decoy[d_mask]\n", + " test_d_df = df_decoy[_slice]\n", + "\n", + " self._train(train_t_df, train_d_df)\n", + "\n", + " test_df = pd.concat((test_t_df, test_d_df))\n", + " test_df_list.append(self._predict(test_df))\n", + " \n", + " return pd.concat(test_df_list, ignore_index=True)\n", + " else:\n", + "\n", + " self._train(df_target, df_decoy)\n", + " test_df = pd.concat((df_target, df_decoy),ignore_index=True)\n", + " \n", + " return self._predict(test_df)\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.8.3 ('base')", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.3" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "8a3b27e141e49c996c9b863f8707e97aabd49c4a7e8445b9b783b34e4a21a9b2" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} From d4fa4b32e9865bf6c2e4a29c9ffe88d4cd4bf341 Mon Sep 17 00:00:00 2001 From: "Zeng, Wen-Feng" Date: Mon, 10 Oct 2022 22:05:50 +0200 Subject: [PATCH 08/52] percolator --- alphabase/_modidx.py | 8 +- alphabase/scoring/feature_extraction_base.py | 6 +- alphabase/scoring/ml_scoring_base.py | 162 ++- docs/constants/aa.html | 42 +- docs/constants/element.html | 42 +- docs/constants/isotope.html | 42 +- docs/constants/modification.html | 42 +- docs/index.html | 42 +- docs/io/hdf.html | 42 +- docs/peptide/fragment.html | 42 +- docs/peptide/mass_calc.html | 42 +- docs/peptide/mobility.html | 42 +- docs/peptide/precursor.html | 42 +- docs/protein/fasta.html | 42 +- docs/protein/test_fasta.html | 42 +- docs/psm_reader/alphapept_reader.html | 42 +- docs/psm_reader/dia_psm_reader.html | 42 +- docs/psm_reader/maxquant_reader.html | 42 +- docs/psm_reader/msfragger_reader.html | 42 +- docs/psm_reader/pfind_reader.html | 42 +- docs/psm_reader/psm_reader.html | 42 +- docs/scoring/fdr.html | 971 ++++++++++++++ docs/scoring/feature_extraction_base.html | 520 ++++++++ docs/scoring/ml_scoring_base.html | 1151 +++++++++++++++++ docs/search.json | 49 + docs/sitemap.xml | 60 +- docs/spectral_library/decoy_library.html | 42 +- docs/spectral_library/library_base.html | 42 +- docs/statistics/regression.html | 655 ++++++++++ .../figure-html/cell-5-output-1.png | Bin 0 -> 28935 bytes docs/utils.html | 42 +- docs/yaml_utils.html | 42 +- nbdev_nbs/psm_reader/dia_psm_reader.ipynb | 9 - nbdev_nbs/scoring/fdr.ipynb | 30 +- .../scoring/feature_extraction_base.ipynb | 89 +- nbdev_nbs/scoring/ml_scoring_base.ipynb | 979 +++++++++++++- nbdev_nbs/sidebar.yml | 10 +- 37 files changed, 5437 insertions(+), 186 deletions(-) create mode 100644 docs/scoring/fdr.html create mode 100644 docs/scoring/feature_extraction_base.html create mode 100644 docs/scoring/ml_scoring_base.html create mode 100644 docs/statistics/regression.html create mode 100644 docs/statistics/regression_files/figure-html/cell-5-output-1.png diff --git a/alphabase/_modidx.py b/alphabase/_modidx.py index 27d6f5bc..8f4978ef 100644 --- a/alphabase/_modidx.py +++ b/alphabase/_modidx.py @@ -418,6 +418,8 @@ 'alphabase/scoring/ml_scoring_base.py'), 'alphabase.scoring.ml_scoring_base.Percolator._train': ( 'scoring/ml_scoring_base.html#percolator._train', 'alphabase/scoring/ml_scoring_base.py'), + 'alphabase.scoring.ml_scoring_base.Percolator._train_and_score': ( 'scoring/ml_scoring_base.html#percolator._train_and_score', + 'alphabase/scoring/ml_scoring_base.py'), 'alphabase.scoring.ml_scoring_base.Percolator.extract_features': ( 'scoring/ml_scoring_base.html#percolator.extract_features', 'alphabase/scoring/ml_scoring_base.py'), 'alphabase.scoring.ml_scoring_base.Percolator.feature_extractor': ( 'scoring/ml_scoring_base.html#percolator.feature_extractor', @@ -428,8 +430,10 @@ 'alphabase/scoring/ml_scoring_base.py'), 'alphabase.scoring.ml_scoring_base.Percolator.rescore': ( 'scoring/ml_scoring_base.html#percolator.rescore', 'alphabase/scoring/ml_scoring_base.py'), - 'alphabase.scoring.ml_scoring_base.Percolator.run': ( 'scoring/ml_scoring_base.html#percolator.run', - 'alphabase/scoring/ml_scoring_base.py')}, + 'alphabase.scoring.ml_scoring_base.Percolator.run_rerank_workflow': ( 'scoring/ml_scoring_base.html#percolator.run_rerank_workflow', + 'alphabase/scoring/ml_scoring_base.py'), + 'alphabase.scoring.ml_scoring_base.Percolator.run_rescore_workflow': ( 'scoring/ml_scoring_base.html#percolator.run_rescore_workflow', + 'alphabase/scoring/ml_scoring_base.py')}, 'alphabase.spectral_library.decoy_library': { 'alphabase.spectral_library.decoy_library.DecoyLib': ( 'spectral_library/decoy_library.html#decoylib', 'alphabase/spectral_library/decoy_library.py'), 'alphabase.spectral_library.decoy_library.DecoyLib.__init__': ( 'spectral_library/decoy_library.html#decoylib.__init__', diff --git a/alphabase/scoring/feature_extraction_base.py b/alphabase/scoring/feature_extraction_base.py index 02778b8f..5d624c5f 100644 --- a/alphabase/scoring/feature_extraction_base.py +++ b/alphabase/scoring/feature_extraction_base.py @@ -35,15 +35,17 @@ def extract_features(self, Extract the scoring features (self._feature_list) and append them inplace into candidate PSMs (psm_df). + **All sub-classes must re-implement this method.** + Parameters ---------- psm_df : pd.DataFrame - PSMs to be rescore. + PSMs to be rescored Returns ------- pd.DataFrame - psm_df with appended the feature list extracted by this extractor. + psm_df with appended feature columns extracted by this extractor """ return psm_df diff --git a/alphabase/scoring/ml_scoring_base.py b/alphabase/scoring/ml_scoring_base.py index 7f146a56..5bcbe809 100644 --- a/alphabase/scoring/ml_scoring_base.py +++ b/alphabase/scoring/ml_scoring_base.py @@ -33,26 +33,35 @@ def __init__(self): self.cv_fold = 1 self.iter_num = 1 + self._base_features = ['score','nAA','charge'] + @property def feature_list(self)->list: - """ The read-only property to get extracted feature_list """ - return self.feature_extractor.feature_list + """ Get extracted feature_list. Property, read-only """ + return list(set( + self._base_features+ + self.feature_extractor.feature_list + )) @property def ml_model(self): + """ + ML model in Percolator. + It can be sklearn models or other models but implement + the methods `fit()` and `decision_function()` (or `predict_proba()`) + which are the same as sklearn models. + """ return self._ml_model @ml_model.setter def ml_model(self, model): - """ - `model` must be sklearn models or other models but implement - the same methods `fit()` and `decision_function()`/`predict_proba()` - as sklearn models - """ self._ml_model = model @property def feature_extractor(self)->BaseFeatureExtractor: + """ + The feature extractor inherited from `BaseFeatureExtractor` + """ return self._feature_extractor @feature_extractor.setter @@ -88,7 +97,8 @@ def extract_features(self, def rescore(self, df:pd.DataFrame )->pd.DataFrame: - """Rescore + """ + Estimate ML scores and then FDRs (q-values) Parameters ---------- @@ -106,7 +116,61 @@ def rescore(self, df = self._estimate_fdr(df) return df - def run(self, + def run_rerank_workflow(self, + top_k_psm_df:pd.DataFrame, + rerank_column:str='spec_idx', + *args, **kwargs + )->pd.DataFrame: + """ + Run percolator workflow with reranking + the peptides for each spectrum. + + - self.extract_features() + - self.rescore() + + *args and **kwargs are used for + `self.feature_extractor.extract_features`. + + Parameters + ---------- + top_k_psm_df : pd.DataFrame + PSM DataFrame + + rerank_column : str + The column use to rerank PSMs. + + For example, use the following code to select + the top-ranked peptide for each spectrum. + ``` + rerank_column = 'spec_idx' # scan_num + idx = top_k_psm_df.groupby( + ['raw_name',rerank_column] + )['ml_score'].idxmax() + psm_df = top_k_psm_df.loc[idx].copy() + ``` + Returns + ------- + pd.DataFrame + Only top-scored PSM is returned for + each group of the `rerank_column`. + """ + top_k_psm_df = self.extract_features( + top_k_psm_df, *args, **kwargs + ) + idxmax = top_k_psm_df.groupby( + ['raw_name',rerank_column] + )['ml_score'].idxmax() + + df = top_k_psm_df.loc[idxmax].copy() + self._train_and_score(df) + + top_k_psm_df = self._predict(top_k_psm_df) + idxmax = top_k_psm_df.groupby( + ['raw_name',rerank_column] + )['ml_score'].idxmax() + return top_k_psm_df.loc[idxmax].copy() + + def run_rescore_workflow(self, psm_df:pd.DataFrame, *args, **kwargs )->pd.DataFrame: @@ -114,7 +178,7 @@ def run(self, Run percolator workflow: - self.extract_features() - - self.re_score() + - self.rescore() *args and **kwargs are used for `self.feature_extractor.extract_features`. @@ -200,12 +264,12 @@ def _train(self, ): train_t_df = train_t_df[train_t_df.fdr<=self.fdr] - if len(train_t_df) > self.max_train_sample: + if len(train_t_df) > self.max_training_sample: train_t_df = train_t_df.sample( n=self.max_training_sample, random_state=1337 ) - if len(train_d_df) > self.max_train_sample: + if len(train_d_df) > self.max_training_sample: train_d_df = train_d_df.sample( n=self.max_training_sample, random_state=1337 @@ -231,6 +295,28 @@ def _predict(self, test_df): ) return test_df + def _train_and_score(self, + df:pd.DataFrame + )->pd.DataFrame: + + df_target = df[df.decoy == 0] + df_decoy = df[df.decoy != 0] + + if ( + np.sum(df_target.fdr<=self.fdr) < + self.min_training_sample or + len(df_decoy) < self.min_training_sample + ): + return df + + self._train(df_target, df_decoy) + test_df = pd.concat( + [df_target, df_decoy], + ignore_index=True + ) + + return self._predict(test_df) + def _cv_score(self, df:pd.DataFrame)->pd.DataFrame: """ Apply cross-validation for rescoring. @@ -249,9 +335,14 @@ def _cv_score(self, df:pd.DataFrame)->pd.DataFrame: pd.DataFrame PSMs after rescoring """ + + if self.cv_fold <= 1: + return self._train_and_score(df) + df = df.sample( frac=1, random_state=1337 ).reset_index(drop=True) + df_target = df[df.decoy == 0] df_decoy = df[df.decoy != 0] @@ -263,31 +354,24 @@ def _cv_score(self, df:pd.DataFrame)->pd.DataFrame: ): return df - if self.cv_fold > 1: - test_df_list = [] - for i in range(self.cv_fold): - t_mask = np.ones(len(df_target), dtype=bool) - _slice = slice(i, len(df_target), self.cv_fold) - t_mask[_slice] = False - train_t_df = df_target[t_mask] - test_t_df = df_target[_slice] - - d_mask = np.ones(len(df_decoy), dtype=bool) - _slice = slice(i, len(df_decoy), self.cv_fold) - d_mask[_slice] = False - train_d_df = df_decoy[d_mask] - test_d_df = df_decoy[_slice] - - self._train(train_t_df, train_d_df) - - test_df = pd.concat((test_t_df, test_d_df)) - test_df_list.append(self._predict(test_df)) - - return pd.concat(test_df_list, ignore_index=True) - else: - - self._train(df_target, df_decoy) - test_df = pd.concat((df_target, df_decoy),ignore_index=True) - - return self._predict(test_df) + test_df_list = [] + for i in range(self.cv_fold): + t_mask = np.ones(len(df_target), dtype=bool) + _slice = slice(i, len(df_target), self.cv_fold) + t_mask[_slice] = False + train_t_df = df_target[t_mask] + test_t_df = df_target[_slice] + + d_mask = np.ones(len(df_decoy), dtype=bool) + _slice = slice(i, len(df_decoy), self.cv_fold) + d_mask[_slice] = False + train_d_df = df_decoy[d_mask] + test_d_df = df_decoy[_slice] + + self._train(train_t_df, train_d_df) + + test_df = pd.concat((test_t_df, test_d_df)) + test_df_list.append(self._predict(test_df)) + + return pd.concat(test_df_list, ignore_index=True) diff --git a/docs/constants/aa.html b/docs/constants/aa.html index 75ea6bca..74e8c097 100644 --- a/docs/constants/aa.html +++ b/docs/constants/aa.html @@ -328,7 +328,7 @@

Amino acid information

+ + diff --git a/docs/constants/element.html b/docs/constants/element.html index 8a54c8c3..bdae7455 100644 --- a/docs/constants/element.html +++ b/docs/constants/element.html @@ -262,7 +262,7 @@

Atom element information

+ + diff --git a/docs/constants/isotope.html b/docs/constants/isotope.html index fb0e47f9..1defb1d6 100644 --- a/docs/constants/isotope.html +++ b/docs/constants/isotope.html @@ -325,7 +325,7 @@

Isotope distribution

+ + diff --git a/docs/constants/modification.html b/docs/constants/modification.html index 86463909..d11df70c 100644 --- a/docs/constants/modification.html +++ b/docs/constants/modification.html @@ -328,7 +328,7 @@

Modification information

+ + diff --git a/docs/index.html b/docs/index.html index b3e6f02e..67bb8e94 100644 --- a/docs/index.html +++ b/docs/index.html @@ -325,7 +325,7 @@

AlphaBase

+ + diff --git a/docs/io/hdf.html b/docs/io/hdf.html index e23020e9..64ac49d0 100644 --- a/docs/io/hdf.html +++ b/docs/io/hdf.html @@ -325,7 +325,7 @@

HDF functionalities

+ + diff --git a/docs/peptide/fragment.html b/docs/peptide/fragment.html index ca773653..81b43b52 100644 --- a/docs/peptide/fragment.html +++ b/docs/peptide/fragment.html @@ -328,7 +328,7 @@

Fragment Functionalities

+ + diff --git a/docs/peptide/mass_calc.html b/docs/peptide/mass_calc.html index a581be70..159a4624 100644 --- a/docs/peptide/mass_calc.html +++ b/docs/peptide/mass_calc.html @@ -325,7 +325,7 @@

Mass Calculation

+ + diff --git a/docs/peptide/mobility.html b/docs/peptide/mobility.html index f20f1cc7..db3e1094 100644 --- a/docs/peptide/mobility.html +++ b/docs/peptide/mobility.html @@ -262,7 +262,7 @@

CCS/Mobility Functionalities

+ + diff --git a/docs/peptide/precursor.html b/docs/peptide/precursor.html index 10aa23d7..08a3d147 100644 --- a/docs/peptide/precursor.html +++ b/docs/peptide/precursor.html @@ -325,7 +325,7 @@

Precursor Functionalities

+ + diff --git a/docs/protein/fasta.html b/docs/protein/fasta.html index 4892f583..8682adc6 100644 --- a/docs/protein/fasta.html +++ b/docs/protein/fasta.html @@ -328,7 +328,7 @@

Protein and Peptide Processing

+ + diff --git a/docs/protein/test_fasta.html b/docs/protein/test_fasta.html index 70b86f90..305b2626 100644 --- a/docs/protein/test_fasta.html +++ b/docs/protein/test_fasta.html @@ -328,7 +328,7 @@

Testing fasta

+ + diff --git a/docs/psm_reader/alphapept_reader.html b/docs/psm_reader/alphapept_reader.html index 92db61b2..b3ef4fa6 100644 --- a/docs/psm_reader/alphapept_reader.html +++ b/docs/psm_reader/alphapept_reader.html @@ -325,7 +325,7 @@

AlphaPept PSM Reader

+ + diff --git a/docs/psm_reader/dia_psm_reader.html b/docs/psm_reader/dia_psm_reader.html index 58729b76..ec78d4c5 100644 --- a/docs/psm_reader/dia_psm_reader.html +++ b/docs/psm_reader/dia_psm_reader.html @@ -328,7 +328,7 @@

DIA PSM reader

+ + diff --git a/docs/psm_reader/maxquant_reader.html b/docs/psm_reader/maxquant_reader.html index b68a0465..ef50232a 100644 --- a/docs/psm_reader/maxquant_reader.html +++ b/docs/psm_reader/maxquant_reader.html @@ -328,7 +328,7 @@

MaxQuant PSM reader

+ + diff --git a/docs/psm_reader/msfragger_reader.html b/docs/psm_reader/msfragger_reader.html index b7e94443..9b8765ee 100644 --- a/docs/psm_reader/msfragger_reader.html +++ b/docs/psm_reader/msfragger_reader.html @@ -262,7 +262,7 @@

MSFragger Reader

+ + diff --git a/docs/psm_reader/pfind_reader.html b/docs/psm_reader/pfind_reader.html index 011d3a92..86ee4938 100644 --- a/docs/psm_reader/pfind_reader.html +++ b/docs/psm_reader/pfind_reader.html @@ -325,7 +325,7 @@

pFind PSM Reader

+ + diff --git a/docs/psm_reader/psm_reader.html b/docs/psm_reader/psm_reader.html index f6075814..7c658c64 100644 --- a/docs/psm_reader/psm_reader.html +++ b/docs/psm_reader/psm_reader.html @@ -325,7 +325,7 @@

Base Class for PSM Readers

+ + diff --git a/docs/scoring/fdr.html b/docs/scoring/fdr.html new file mode 100644 index 00000000..769d16c4 --- /dev/null +++ b/docs/scoring/fdr.html @@ -0,0 +1,971 @@ + + + + + + + + + +alphabase - FDR functionalities + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + + + + +
+ +
+
+

FDR functionalities

+
+ + + +
+ + + +
+ + +
+ + +

Functionalities to calculate FDR.

+
+

In alphabase dataframes, we refer fdr values as q_values without loss of generacity.

+
+
+

source

+
+

calculate_fdr_from_ref

+
+
 calculate_fdr_from_ref (df:pandas.core.frame.DataFrame,
+                         ref_scores:numpy.ndarray,
+                         ref_fdr_values:numpy.ndarray, score_column:str,
+                         decoy_column:str='decoy')
+
+

Calculate FDR values for a PSM dataframe from the given reference scores and fdr_values. It is used to extend peptide-level or sequence-level FDR (reference) to each PSM, as PSMs are more useful for quantification. ``

+ ++++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
TypeDefaultDetails
dfDataFramePSM dataframe
ref_scoresndarrayreference scores that used to
calculate ref_fdr_values, also sorted in decending order.
ref_fdr_valuesndarrayfdr values corresponding to ref_scores
score_columnstrscore column in the dataframe
decoy_columnstrdecoydecoy column in the dataframe.
1=target, 0=decoy. Defaults to ‘decoy’.
ReturnsDataFramedataframe with ‘fdr’ column added
+
+

source

+
+
+

fdr_from_ref

+
+
 fdr_from_ref (sorted_scores:numpy.ndarray, ref_scores:numpy.ndarray,
+               ref_fdr_values:numpy.ndarray)
+
+

Calculate FDR values from the given reference scores and fdr_values. It is used to extend peptide-level or sequence-level FDR (reference) to each PSM, as PSMs are more useful for quantification.

+ +++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
TypeDetails
sorted_scoresndarraythe scores to calculate FDRs,
they must be sorted in decending order.
ref_scoresndarrayreference scores that used to
calculate ref_fdr_values, also sorted in decending order.
ref_fdr_valuesndarrayfdr values corresponding to ref_scores
Returnsndarrayfdr values corresponding to sorted_scores.
+
+

source

+
+
+

calculate_fdr

+
+
 calculate_fdr (df:pandas.core.frame.DataFrame, score_column:str,
+                decoy_column:str='decoy')
+
+

Calculate FDR values (q_values in fact) for the given dataframe

+ ++++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
TypeDefaultDetails
dfDataFramePSM dataframe to calculate FDRs
score_columnstrscore column to sort in decending order
decoy_columnstrdecoydecoy column in the dataframe.
1=target, 0=decoy. Defaults to ‘decoy’.
ReturnsDataFramePSM dataframe with ‘fdr’ column added
+
+

source

+
+
+

fdr_to_q_values

+
+
 fdr_to_q_values (fdr_values:numpy.ndarray)
+
+

convert FDR values to q_values.

+ +++++ + + + + + + + + + + + + + + + + + + + +
TypeDetails
fdr_valuesndarrayFDR values, they should be
sorted according to the descending order of the score
Returnsndarrayq_values
+
+
df = pd.DataFrame(
+    {
+        'score': np.random.random(500)*10+11,
+        'decoy': 0,
+        'kind': True,
+    }
+)
+f_score = np.random.random(500)*9.9
+df = df.append(
+    pd.DataFrame(
+        {
+            'score': f_score+0.01,
+            'decoy': 1,
+            'kind': False
+        }
+    )
+)
+df = df.append(
+    pd.DataFrame(
+        {
+            'score': f_score,
+            'decoy': 0,
+            'kind': False
+        }
+    )
+)
+df = df.append(
+    pd.DataFrame(
+        {
+            'score': np.random.random(5)+10,
+            'decoy': 1,
+            'kind': False
+        }
+    )
+)
+
+df = calculate_fdr(df, 'score', 'decoy')
+df
+
+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
scoredecoykindfdr
41720.9867510True0.000000
48620.9588030True0.000000
4620.9542440True0.000000
13120.8324400True0.000000
23620.8095950True0.000000
...............
11110.0463660False0.504008
7090.0408411False0.504505
12090.0308410False0.504505
9390.0137041False0.505000
14390.0037040False0.505000
+

1505 rows × 4 columns

+
+
+
+
+
df[(df.fdr < 0.01)&(df.decoy==0)]
+
+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
scoredecoykindfdr
41720.9867510True0.0
48620.9588030True0.0
4620.9542440True0.0
13120.8324400True0.0
23620.8095950True0.0
...............
31311.0706950True0.0
22711.0284310True0.0
15311.0143300True0.0
11311.0139780True0.0
4811.0106290True0.0
+

500 rows × 4 columns

+
+
+
+ + +
+ +
+ +
+ + + + \ No newline at end of file diff --git a/docs/scoring/feature_extraction_base.html b/docs/scoring/feature_extraction_base.html new file mode 100644 index 00000000..e23a8359 --- /dev/null +++ b/docs/scoring/feature_extraction_base.html @@ -0,0 +1,520 @@ + + + + + + + + + +alphabase - Base Class of Feature Extractors + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + + + + +
+ +
+
+

Base Class of Feature Extractors

+
+ + + +
+ + + +
+ + +
+ + +
+

source

+
+

BaseFeatureExtractor

+
+
 BaseFeatureExtractor ()
+
+

Initialize self. See help(type(self)) for accurate signature.

+
+

source

+
+
+

BaseFeatureExtractor.extract_features

+
+
 BaseFeatureExtractor.extract_features
+                                        (psm_df:pandas.core.frame.DataFram
+                                        e, *args, **kwargs)
+
+

Extract the scoring features (self._feature_list) and append them inplace into candidate PSMs (psm_df).

+

All sub-classes must re-implement this method.

+ +++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
TypeDetails
psm_dfDataFramePSMs to be rescored
args
kwargs
ReturnsDataFramepsm_df with appended feature columns extracted by this extractor
+ + +
+ +
+ +
+ + + + \ No newline at end of file diff --git a/docs/scoring/ml_scoring_base.html b/docs/scoring/ml_scoring_base.html new file mode 100644 index 00000000..2a47c7bf --- /dev/null +++ b/docs/scoring/ml_scoring_base.html @@ -0,0 +1,1151 @@ + + + + + + + + + +alphabase - Base Class of ML Scoring Methods + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + + + + +
+ +
+
+

Base Class of ML Scoring Methods

+
+ + + +
+ + + +
+ + +
+ + +

There are two key modules in ML-based rescoring: feature extraction and rescoring algorithm. Here we designed these two modules as flexible as possible for future extensions.

+
+

Feature extraction

+

The feature extractor is more important than the ML methods, so we designed a flexible architecture for feature extraction. As shown in BaseFeatureExtractor, a feature extractor inherited from BaseFeatureExtractor must re-implement BaseFeatureExtractor.extract_features, and tells the ML methods what are the extracted features by providing BaseFeatureExtractor.feature_list.

+

For example, if we have two feature extractors, AlphaPeptFE and AlphaPeptDeepFE:

+
class AlphaPeptFE(BaseFeatureExtractor):
+    def extract_features(self, psm_df):
+        psm_df['ap_f1'] = ...
+        self._feature_list.append('ap_f1')
+        psm_df['ap_f2'] = ...
+        self._feature_list.append('ap_f2')
+
+class AlphaPeptDeepFE(BaseFeatureExtractor):
+    def extract_features(self, psm_df):
+        psm_df['ad_f1'] = ...
+        self._feature_list.append('ad_f1')
+        psm_df['ad_f2'] = ...
+        self._feature_list.append('ad_f2')
+

We can easily design a new feature extractor which combines these two and more feature extractors:

+
class CombFE(BaseFeatureExtractor):
+    def __init__(self):
+        self.fe_list = [AlphaPeptFE(),AlphaPeptDeepFE()]
+
+    def extract_features(self, psm_df):
+        for fe in self.fe_list:
+            fe.extract_features(psm_df)
+
+    @property
+    def feature_list(self):
+        f_set = set()
+        for fe in self.fe_list:
+            f_set.update(fe.feature_list)
+        return list(f_set)
+

This will be useful for rescoring with DL features, for instance, when AlphaPeptDeep is or is not installed.

+
+
+

Rescoring Algorithm

+

The rescoring algorithm called Percolator (Kall et al. 2007) based on the semi-supervised learning algorithm is still the most widely used in MS-based proteomics. Therefore, we used Percolator as the base rescoring class and others can re-implement its methods for different algorithms. as well as different

+
    +
  1. Rescoring algorithm. We have provided the base rescoring code structure in Percolator. If we are going to support DiaNN’s brute-force supervised learning methods, we can define the class like this:
  2. +
+
class DiaNNRescoring(Percolator):
+    def _train(self, train_t_df, train_d_df):
+        # No target filtration on FDR, which is the same as DiaNN but different from Percolator
+        #train_t_df = train_t_df[train_t_df.fdr<=self.fdr]
+        train_df = pd.concat((train_t_df, train_d_df))
+        train_label = np.ones(len(train_df),dtype=np.int32)
+        train_label[len(train_t_df):] = 0
+
+        self._ml_model.fit(
+            train_df[self.feature_list].values, 
+            train_label
+        )
+    def rescore(self, psm_df):
+        # We don't need iteration anymore, but cross validation is still necessary
+        df = self._cv_score(df)
+        return self._estimate_fdr(df)
+
    +
  1. ML models. Personally, Percolator with a linear classifier (SVM or LogisticRegression) is prefered. But as a framework, we should support different ML models. We can easily switch to the random forest by self.ml_model = RandomForestClassifier(). We can also use a DL model which provides sklearn-like fit() and decision_function() APIs for rescoring.
  2. +
+
+

source

+
+

Percolator

+
+
 Percolator ()
+
+

Initialize self. See help(type(self)) for accurate signature.

+
+
+

Properties of Percolator

+
+

source

+
+
+

Percolator.ml_model

+
+
 Percolator.ml_model ()
+
+

ML model in Percolator. It can be sklearn models or other models but implement the methods fit() and decision_function() (or predict_proba()) which are the same as sklearn models.

+
+

source

+
+
+

Percolator.feature_extractor

+
+
 Percolator.feature_extractor ()
+
+

The feature extractor inherited from BaseFeatureExtractor

+
+

source

+
+
+

Percolator.feature_list

+
+
 Percolator.feature_list ()
+
+

Get extracted feature_list. Property, read-only

+
+
+

Methods of Percolator

+
+

source

+
+
+

Percolator.extract_features

+
+
 Percolator.extract_features (psm_df:pandas.core.frame.DataFrame, *args,
+                              **kwargs)
+
+

Extract features for rescoring.

+

*args and **kwargs are used for self.feature_extractor.extract_features.

+ +++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
TypeDetails
psm_dfDataFramePSM DataFrame
args
kwargs
ReturnsDataFramepsm_df with feature columns appended inplace.
+
+

source

+
+
+

Percolator.rescore

+
+
 Percolator.rescore (df:pandas.core.frame.DataFrame)
+
+

Estimate ML scores and then FDRs (q-values)

+ +++++ + + + + + + + + + + + + + + + + + + + +
TypeDetails
dfDataFramepsm_df
ReturnsDataFramepsm_df with ml_score and fdr columns updated inplace
+
+

source

+
+
+

Percolator.run_rescore_workflow

+
+
 Percolator.run_rescore_workflow (psm_df:pandas.core.frame.DataFrame,
+                                  *args, **kwargs)
+
+

Run percolator workflow:

+
    +
  • self.extract_features()
  • +
  • self.rescore()
  • +
+

*args and **kwargs are used for self.feature_extractor.extract_features.

+ +++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
TypeDetails
psm_dfDataFramePSM DataFrame
args
kwargs
ReturnsDataFramepsm_df with feature columns appended inplace.
+
+

source

+
+
+

Percolator.run_rerank_workflow

+
+
 Percolator.run_rerank_workflow (top_k_psm_df:pandas.core.frame.DataFrame,
+                                 rerank_column:str='spec_idx', *args,
+                                 **kwargs)
+
+

Run percolator workflow with reranking the peptides for each spectrum.

+
    +
  • self.extract_features()
  • +
  • self.rescore()
  • +
+

*args and **kwargs are used for self.feature_extractor.extract_features.

+ ++++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
TypeDefaultDetails
top_k_psm_dfDataFramePSM DataFrame
rerank_columnstrspec_idxThe column use to rerank PSMs.

For example, use the following code to select
the top-ranked peptide for each spectrum.
<br>rerank_column = 'spec_idx' # scan_num<br>idx = top_k_psm_df.groupby(<br> ['raw_name',rerank_column]<br>)['ml_score'].idxmax()<br>psm_df = top_k_psm_df.loc[idx].copy()<br>
args
kwargs
ReturnsDataFrame
+
+
+
+

Simple Examples

+
+
df = pd.DataFrame({
+    'score': list(np.random.uniform(0,100,100))+list(np.random.uniform(0,10,100)),
+    'nAA': list(np.random.randint(7,30,200)),
+    'charge': list(np.random.randint(2,4,200)),
+    'decoy': [0]*100+[1]*100,
+    'spec_idx': np.repeat(np.arange(100),2),
+    'raw_name': 'raw',
+})
+perc = Percolator()
+perc.min_training_sample = 10
+perc.run_rescore_workflow(df)
+
+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
scorenAAchargedecoyspec_idxraw_nameml_scorefdr
099.851979263018raw138.1427660.000000
198.74605273012raw133.8677790.000000
297.415167162016raw133.4477610.000000
396.857314143015raw131.8773180.000000
494.606208173048raw128.7857130.000000
...........................
1950.346523182189raw-17.0086490.979798
1960.703782153182raw-17.2927480.989899
1970.058571223177raw-17.3522931.000000
1980.90198392164raw-17.3577041.000000
1990.32037882031raw-18.3954211.000000
+

200 rows × 8 columns

+
+
+
+
+
perc.run_rerank_workflow(df, rerank_column='spec_idx')
+
+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
scorenAAchargedecoyspec_idxraw_nameml_scorefdr
5444.98600025200raw23.2398710.000000
694.0206587301raw61.7629730.000000
7323.02806814202raw5.3460260.000000
1779.16353728303raw50.5846930.000000
3661.67392323204raw36.5007280.000000
...........................
1702.30608673195raw-11.4759200.744898
1058.10719282196raw-6.7650490.191011
929.717331103197raw-5.4596660.044944
1434.381494293198raw-9.1000270.565217
1305.831152263199raw-8.0403860.423913
+

100 rows × 8 columns

+
+
+
+ + +
+ +
+ +
+ + + + \ No newline at end of file diff --git a/docs/search.json b/docs/search.json index 26705de7..9f935bfb 100644 --- a/docs/search.json +++ b/docs/search.json @@ -6,6 +6,13 @@ "section": "", "text": "source\n\nexplode_multiple_columns\n\n explode_multiple_columns (df:pandas.core.frame.DataFrame, columns:list)\n\n\nsource\n\n\nprocess_bar\n\n process_bar (iterator, len_iter)" }, + { + "objectID": "statistics/regression.html", + "href": "statistics/regression.html", + "title": "alphabase", + "section": "", + "text": "LOESSRegression\n\n LOESSRegression (n_kernels:int=6, kernel_size:float=2.0,\n polynomial_degree:int=2)\n\nscikit-learn estimator which implements a LOESS style local polynomial regression. The number of basis functions or kernels can be explicitly defined which allows for faster and cheaper training and inference.\n\n\n\n\n\n\n\n\n\n\nType\nDefault\nDetails\n\n\n\n\nn_kernels\nint\n6\ndefault = 6, The number of local polynomial functions used to approximate the data. The location and extend of the kernels will be distributed to contain an equal number of datapoints in the training set.\n\n\nkernel_size\nfloat\n2.0\ndefault = 2, A factor increasing the kernel size to overlap with the neighboring kernel.\n\n\npolynomial_degree\nint\n2\ndefault = 2, Degree of the polynomial functions used for the local approximation.\n\n\n\n\nsource\n\n\nLOESSRegression.fit\n\n LOESSRegression.fit (x:numpy.ndarray, y:numpy.ndarray)\n\nfit the model passed on provided training data.\n\n\n\n\n\n\n\n\n\nType\nDetails\n\n\n\n\nx\nndarray\nfloat, of shape (n_samples,) or (n_samples, 1), Training data. Note that only a single feature is supported at the moment.\n\n\ny\nndarray\nof shape (n_samples,) or (n_samples, 1) Target values.\n\n\nReturns\nself: object\nReturns the fitted estimator.\n\n\n\n\nsource\n\n\nLOESSRegression.predict\n\n LOESSRegression.predict (x:numpy.ndarray)\n\nPredict using the LOESS model.\n\n\n\n\n\n\n\n\n\nType\nDetails\n\n\n\n\nx\nndarray\nfloat, of shape (n_samples,) or (n_samples, 1) Feature data. Note that only a single feature is supported at the moment.\n\n\nReturns\nnumpy.ndarray, float\n\n\n\n\n\n\nApplication example\n\ndef noisy_1d(x):\n y = np.sin(x)\n y_err = np.random.normal(y,0.5)\n return y + y_err + 0.5 * x\n\nx_train = np.linspace(0,15,200)\ny_train = noisy_1d(x_train)\n\nx_test = np.linspace(0,15,200)\ny_test = LOESSRegression().fit(x_train, y_train).predict(x_test)\n\nplt.scatter(x_train,y_train)\nplt.plot(x_test,y_test,c='r')\nplt.show()" + }, { "objectID": "psm_reader/msfragger_reader.html", "href": "psm_reader/msfragger_reader.html", @@ -160,6 +167,48 @@ "section": "", "text": "import alphabase.io.hdf\n\n# Other packages used to demonstrate functionality\nimport numpy as np\nimport pandas as pd\nimport os\n\nInstead of relying directly on the h5py interface, we will use an HDF wrapper file to provide consistent access to only those specific HDF features we want. Since components of an HDF file come in three shapes datasets, groups and attributes, we will first define a generic HDF wrapper object to handle these components. Once this is done, the HDF wrapper file can be treated as such an object with additional features to open and close the initial connection.\n\n#| hide\nfrom nbdev.showdoc import show_doc\n\n\n\nHDF_File\n\n HDF_File (file_name:str, read_only:bool=True, truncate:bool=False,\n delete_existing:bool=False)\n\nA generic class to access HDF components." }, + { + "objectID": "scoring/feature_extraction_base.html", + "href": "scoring/feature_extraction_base.html", + "title": "Base Class of Feature Extractors", + "section": "", + "text": "source\n\nBaseFeatureExtractor\n\n BaseFeatureExtractor ()\n\nInitialize self. See help(type(self)) for accurate signature.\n\nsource\n\n\nBaseFeatureExtractor.extract_features\n\n BaseFeatureExtractor.extract_features\n (psm_df:pandas.core.frame.DataFram\n e, *args, **kwargs)\n\nExtract the scoring features (self._feature_list) and append them inplace into candidate PSMs (psm_df).\nAll sub-classes must re-implement this method.\n\n\n\n\n\n\n\n\n\nType\nDetails\n\n\n\n\npsm_df\nDataFrame\nPSMs to be rescored\n\n\nargs\n\n\n\n\nkwargs\n\n\n\n\nReturns\nDataFrame\npsm_df with appended feature columns extracted by this extractor" + }, + { + "objectID": "scoring/ml_scoring_base.html", + "href": "scoring/ml_scoring_base.html", + "title": "Base Class of ML Scoring Methods", + "section": "", + "text": "There are two key modules in ML-based rescoring: feature extraction and rescoring algorithm. Here we designed these two modules as flexible as possible for future extensions." + }, + { + "objectID": "scoring/ml_scoring_base.html#feature-extraction", + "href": "scoring/ml_scoring_base.html#feature-extraction", + "title": "Base Class of ML Scoring Methods", + "section": "Feature extraction", + "text": "Feature extraction\nThe feature extractor is more important than the ML methods, so we designed a flexible architecture for feature extraction. As shown in BaseFeatureExtractor, a feature extractor inherited from BaseFeatureExtractor must re-implement BaseFeatureExtractor.extract_features, and tells the ML methods what are the extracted features by providing BaseFeatureExtractor.feature_list.\nFor example, if we have two feature extractors, AlphaPeptFE and AlphaPeptDeepFE:\nclass AlphaPeptFE(BaseFeatureExtractor):\n def extract_features(self, psm_df):\n psm_df['ap_f1'] = ...\n self._feature_list.append('ap_f1')\n psm_df['ap_f2'] = ...\n self._feature_list.append('ap_f2')\n\nclass AlphaPeptDeepFE(BaseFeatureExtractor):\n def extract_features(self, psm_df):\n psm_df['ad_f1'] = ...\n self._feature_list.append('ad_f1')\n psm_df['ad_f2'] = ...\n self._feature_list.append('ad_f2')\nWe can easily design a new feature extractor which combines these two and more feature extractors:\nclass CombFE(BaseFeatureExtractor):\n def __init__(self):\n self.fe_list = [AlphaPeptFE(),AlphaPeptDeepFE()]\n\n def extract_features(self, psm_df):\n for fe in self.fe_list:\n fe.extract_features(psm_df)\n\n @property\n def feature_list(self):\n f_set = set()\n for fe in self.fe_list:\n f_set.update(fe.feature_list)\n return list(f_set)\nThis will be useful for rescoring with DL features, for instance, when AlphaPeptDeep is or is not installed." + }, + { + "objectID": "scoring/ml_scoring_base.html#rescoring-algorithm", + "href": "scoring/ml_scoring_base.html#rescoring-algorithm", + "title": "Base Class of ML Scoring Methods", + "section": "Rescoring Algorithm", + "text": "Rescoring Algorithm\nThe rescoring algorithm called Percolator (Kall et al. 2007) based on the semi-supervised learning algorithm is still the most widely used in MS-based proteomics. Therefore, we used Percolator as the base rescoring class and others can re-implement its methods for different algorithms. as well as different\n\nRescoring algorithm. We have provided the base rescoring code structure in Percolator. If we are going to support DiaNN’s brute-force supervised learning methods, we can define the class like this:\n\nclass DiaNNRescoring(Percolator):\n def _train(self, train_t_df, train_d_df):\n # No target filtration on FDR, which is the same as DiaNN but different from Percolator\n #train_t_df = train_t_df[train_t_df.fdr<=self.fdr]\n train_df = pd.concat((train_t_df, train_d_df))\n train_label = np.ones(len(train_df),dtype=np.int32)\n train_label[len(train_t_df):] = 0\n\n self._ml_model.fit(\n train_df[self.feature_list].values, \n train_label\n )\n def rescore(self, psm_df):\n # We don't need iteration anymore, but cross validation is still necessary\n df = self._cv_score(df)\n return self._estimate_fdr(df)\n\nML models. Personally, Percolator with a linear classifier (SVM or LogisticRegression) is prefered. But as a framework, we should support different ML models. We can easily switch to the random forest by self.ml_model = RandomForestClassifier(). We can also use a DL model which provides sklearn-like fit() and decision_function() APIs for rescoring.\n\n\nsource\n\nPercolator\n\n Percolator ()\n\nInitialize self. See help(type(self)) for accurate signature.\n\n\nProperties of Percolator\n\nsource\n\n\nPercolator.ml_model\n\n Percolator.ml_model ()\n\nML model in Percolator. It can be sklearn models or other models but implement the methods fit() and decision_function() (or predict_proba()) which are the same as sklearn models.\n\nsource\n\n\nPercolator.feature_extractor\n\n Percolator.feature_extractor ()\n\nThe feature extractor inherited from BaseFeatureExtractor\n\nsource\n\n\nPercolator.feature_list\n\n Percolator.feature_list ()\n\nGet extracted feature_list. Property, read-only\n\n\nMethods of Percolator\n\nsource\n\n\nPercolator.extract_features\n\n Percolator.extract_features (psm_df:pandas.core.frame.DataFrame, *args,\n **kwargs)\n\nExtract features for rescoring.\n*args and **kwargs are used for self.feature_extractor.extract_features.\n\n\n\n\n\n\n\n\n\nType\nDetails\n\n\n\n\npsm_df\nDataFrame\nPSM DataFrame\n\n\nargs\n\n\n\n\nkwargs\n\n\n\n\nReturns\nDataFrame\npsm_df with feature columns appended inplace.\n\n\n\n\nsource\n\n\nPercolator.rescore\n\n Percolator.rescore (df:pandas.core.frame.DataFrame)\n\nEstimate ML scores and then FDRs (q-values)\n\n\n\n\n\n\n\n\n\nType\nDetails\n\n\n\n\ndf\nDataFrame\npsm_df\n\n\nReturns\nDataFrame\npsm_df with ml_score and fdr columns updated inplace\n\n\n\n\nsource\n\n\nPercolator.run_rescore_workflow\n\n Percolator.run_rescore_workflow (psm_df:pandas.core.frame.DataFrame,\n *args, **kwargs)\n\nRun percolator workflow:\n\nself.extract_features()\nself.rescore()\n\n*args and **kwargs are used for self.feature_extractor.extract_features.\n\n\n\n\n\n\n\n\n\nType\nDetails\n\n\n\n\npsm_df\nDataFrame\nPSM DataFrame\n\n\nargs\n\n\n\n\nkwargs\n\n\n\n\nReturns\nDataFrame\npsm_df with feature columns appended inplace.\n\n\n\n\nsource\n\n\nPercolator.run_rerank_workflow\n\n Percolator.run_rerank_workflow (top_k_psm_df:pandas.core.frame.DataFrame,\n rerank_column:str='spec_idx', *args,\n **kwargs)\n\nRun percolator workflow with reranking the peptides for each spectrum.\n\nself.extract_features()\nself.rescore()\n\n*args and **kwargs are used for self.feature_extractor.extract_features.\n\n\n\n\n\n\n\n\n\n\nType\nDefault\nDetails\n\n\n\n\ntop_k_psm_df\nDataFrame\n\nPSM DataFrame\n\n\nrerank_column\nstr\nspec_idx\nThe column use to rerank PSMs. For example, use the following code to select the top-ranked peptide for each spectrum.
rerank_column = 'spec_idx' # scan_num
idx = top_k_psm_df.groupby(
['raw_name',rerank_column]
)['ml_score'].idxmax()
psm_df = top_k_psm_df.loc[idx].copy()
\n\n\nargs\n\n\n\n\n\nkwargs\n\n\n\n\n\nReturns\nDataFrame" + }, + { + "objectID": "scoring/ml_scoring_base.html#simple-examples", + "href": "scoring/ml_scoring_base.html#simple-examples", + "title": "Base Class of ML Scoring Methods", + "section": "Simple Examples", + "text": "Simple Examples\n\ndf = pd.DataFrame({\n 'score': list(np.random.uniform(0,100,100))+list(np.random.uniform(0,10,100)),\n 'nAA': list(np.random.randint(7,30,200)),\n 'charge': list(np.random.randint(2,4,200)),\n 'decoy': [0]*100+[1]*100,\n 'spec_idx': np.repeat(np.arange(100),2),\n 'raw_name': 'raw',\n})\nperc = Percolator()\nperc.min_training_sample = 10\nperc.run_rescore_workflow(df)\n\n\n\n\n\n \n \n \n score\n nAA\n charge\n decoy\n spec_idx\n raw_name\n ml_score\n fdr\n \n \n \n \n 0\n 99.851979\n 26\n 3\n 0\n 18\n raw\n 138.142766\n 0.000000\n \n \n 1\n 98.746052\n 7\n 3\n 0\n 12\n raw\n 133.867779\n 0.000000\n \n \n 2\n 97.415167\n 16\n 2\n 0\n 16\n raw\n 133.447761\n 0.000000\n \n \n 3\n 96.857314\n 14\n 3\n 0\n 15\n raw\n 131.877318\n 0.000000\n \n \n 4\n 94.606208\n 17\n 3\n 0\n 48\n raw\n 128.785713\n 0.000000\n \n \n ...\n ...\n ...\n ...\n ...\n ...\n ...\n ...\n ...\n \n \n 195\n 0.346523\n 18\n 2\n 1\n 89\n raw\n -17.008649\n 0.979798\n \n \n 196\n 0.703782\n 15\n 3\n 1\n 82\n raw\n -17.292748\n 0.989899\n \n \n 197\n 0.058571\n 22\n 3\n 1\n 77\n raw\n -17.352293\n 1.000000\n \n \n 198\n 0.901983\n 9\n 2\n 1\n 64\n raw\n -17.357704\n 1.000000\n \n \n 199\n 0.320378\n 8\n 2\n 0\n 31\n raw\n -18.395421\n 1.000000\n \n \n\n200 rows × 8 columns\n\n\n\n\nperc.run_rerank_workflow(df, rerank_column='spec_idx')\n\n\n\n\n\n \n \n \n score\n nAA\n charge\n decoy\n spec_idx\n raw_name\n ml_score\n fdr\n \n \n \n \n 54\n 44.986000\n 25\n 2\n 0\n 0\n raw\n 23.239871\n 0.000000\n \n \n 6\n 94.020658\n 7\n 3\n 0\n 1\n raw\n 61.762973\n 0.000000\n \n \n 73\n 23.028068\n 14\n 2\n 0\n 2\n raw\n 5.346026\n 0.000000\n \n \n 17\n 79.163537\n 28\n 3\n 0\n 3\n raw\n 50.584693\n 0.000000\n \n \n 36\n 61.673923\n 23\n 2\n 0\n 4\n raw\n 36.500728\n 0.000000\n \n \n ...\n ...\n ...\n ...\n ...\n ...\n ...\n ...\n ...\n \n \n 170\n 2.306086\n 7\n 3\n 1\n 95\n raw\n -11.475920\n 0.744898\n \n \n 105\n 8.107192\n 8\n 2\n 1\n 96\n raw\n -6.765049\n 0.191011\n \n \n 92\n 9.717331\n 10\n 3\n 1\n 97\n raw\n -5.459666\n 0.044944\n \n \n 143\n 4.381494\n 29\n 3\n 1\n 98\n raw\n -9.100027\n 0.565217\n \n \n 130\n 5.831152\n 26\n 3\n 1\n 99\n raw\n -8.040386\n 0.423913\n \n \n\n100 rows × 8 columns" + }, + { + "objectID": "scoring/fdr.html", + "href": "scoring/fdr.html", + "title": "FDR functionalities", + "section": "", + "text": "In alphabase dataframes, we refer fdr values as q_values without loss of generacity.\n\n\nsource\n\ncalculate_fdr_from_ref\n\n calculate_fdr_from_ref (df:pandas.core.frame.DataFrame,\n ref_scores:numpy.ndarray,\n ref_fdr_values:numpy.ndarray, score_column:str,\n decoy_column:str='decoy')\n\nCalculate FDR values for a PSM dataframe from the given reference scores and fdr_values. It is used to extend peptide-level or sequence-level FDR (reference) to each PSM, as PSMs are more useful for quantification. ``\n\n\n\n\n\n\n\n\n\n\nType\nDefault\nDetails\n\n\n\n\ndf\nDataFrame\n\nPSM dataframe\n\n\nref_scores\nndarray\n\nreference scores that used to calculate ref_fdr_values, also sorted in decending order.\n\n\nref_fdr_values\nndarray\n\nfdr values corresponding to ref_scores\n\n\nscore_column\nstr\n\nscore column in the dataframe\n\n\ndecoy_column\nstr\ndecoy\ndecoy column in the dataframe. 1=target, 0=decoy. Defaults to ‘decoy’.\n\n\nReturns\nDataFrame\n\ndataframe with ‘fdr’ column added\n\n\n\n\nsource\n\n\nfdr_from_ref\n\n fdr_from_ref (sorted_scores:numpy.ndarray, ref_scores:numpy.ndarray,\n ref_fdr_values:numpy.ndarray)\n\nCalculate FDR values from the given reference scores and fdr_values. It is used to extend peptide-level or sequence-level FDR (reference) to each PSM, as PSMs are more useful for quantification.\n\n\n\n\n\n\n\n\n\nType\nDetails\n\n\n\n\nsorted_scores\nndarray\nthe scores to calculate FDRs, they must be sorted in decending order.\n\n\nref_scores\nndarray\nreference scores that used to calculate ref_fdr_values, also sorted in decending order.\n\n\nref_fdr_values\nndarray\nfdr values corresponding to ref_scores\n\n\nReturns\nndarray\nfdr values corresponding to sorted_scores.\n\n\n\n\nsource\n\n\ncalculate_fdr\n\n calculate_fdr (df:pandas.core.frame.DataFrame, score_column:str,\n decoy_column:str='decoy')\n\nCalculate FDR values (q_values in fact) for the given dataframe\n\n\n\n\n\n\n\n\n\n\nType\nDefault\nDetails\n\n\n\n\ndf\nDataFrame\n\nPSM dataframe to calculate FDRs\n\n\nscore_column\nstr\n\nscore column to sort in decending order\n\n\ndecoy_column\nstr\ndecoy\ndecoy column in the dataframe. 1=target, 0=decoy. Defaults to ‘decoy’.\n\n\nReturns\nDataFrame\n\nPSM dataframe with ‘fdr’ column added\n\n\n\n\nsource\n\n\nfdr_to_q_values\n\n fdr_to_q_values (fdr_values:numpy.ndarray)\n\nconvert FDR values to q_values.\n\n\n\n\n\n\n\n\n\nType\nDetails\n\n\n\n\nfdr_values\nndarray\nFDR values, they should be sorted according to the descending order of the score\n\n\nReturns\nndarray\nq_values\n\n\n\n\ndf = pd.DataFrame(\n {\n 'score': np.random.random(500)*10+11,\n 'decoy': 0,\n 'kind': True,\n }\n)\nf_score = np.random.random(500)*9.9\ndf = df.append(\n pd.DataFrame(\n {\n 'score': f_score+0.01,\n 'decoy': 1,\n 'kind': False\n }\n )\n)\ndf = df.append(\n pd.DataFrame(\n {\n 'score': f_score,\n 'decoy': 0,\n 'kind': False\n }\n )\n)\ndf = df.append(\n pd.DataFrame(\n {\n 'score': np.random.random(5)+10,\n 'decoy': 1,\n 'kind': False\n }\n )\n)\n\ndf = calculate_fdr(df, 'score', 'decoy')\ndf\n\n\n\n\n\n \n \n \n score\n decoy\n kind\n fdr\n \n \n \n \n 417\n 20.986751\n 0\n True\n 0.000000\n \n \n 486\n 20.958803\n 0\n True\n 0.000000\n \n \n 46\n 20.954244\n 0\n True\n 0.000000\n \n \n 131\n 20.832440\n 0\n True\n 0.000000\n \n \n 236\n 20.809595\n 0\n True\n 0.000000\n \n \n ...\n ...\n ...\n ...\n ...\n \n \n 1111\n 0.046366\n 0\n False\n 0.504008\n \n \n 709\n 0.040841\n 1\n False\n 0.504505\n \n \n 1209\n 0.030841\n 0\n False\n 0.504505\n \n \n 939\n 0.013704\n 1\n False\n 0.505000\n \n \n 1439\n 0.003704\n 0\n False\n 0.505000\n \n \n\n1505 rows × 4 columns\n\n\n\n\ndf[(df.fdr < 0.01)&(df.decoy==0)]\n\n\n\n\n\n \n \n \n score\n decoy\n kind\n fdr\n \n \n \n \n 417\n 20.986751\n 0\n True\n 0.0\n \n \n 486\n 20.958803\n 0\n True\n 0.0\n \n \n 46\n 20.954244\n 0\n True\n 0.0\n \n \n 131\n 20.832440\n 0\n True\n 0.0\n \n \n 236\n 20.809595\n 0\n True\n 0.0\n \n \n ...\n ...\n ...\n ...\n ...\n \n \n 313\n 11.070695\n 0\n True\n 0.0\n \n \n 227\n 11.028431\n 0\n True\n 0.0\n \n \n 153\n 11.014330\n 0\n True\n 0.0\n \n \n 113\n 11.013978\n 0\n True\n 0.0\n \n \n 48\n 11.010629\n 0\n True\n 0.0\n \n \n\n500 rows × 4 columns" + }, { "objectID": "yaml_utils.html", "href": "yaml_utils.html", diff --git a/docs/sitemap.xml b/docs/sitemap.xml index 675d7bc5..9c0e61a8 100644 --- a/docs/sitemap.xml +++ b/docs/sitemap.xml @@ -2,90 +2,106 @@ https://MannLabs.github.io/alphabase/utils.html - 2022-09-29T14:47:40.051Z + 2022-10-10T20:03:27.395Z + + + https://MannLabs.github.io/alphabase/statistics/regression.html + 2022-10-10T20:03:27.923Z https://MannLabs.github.io/alphabase/psm_reader/msfragger_reader.html - 2022-09-29T14:47:40.381Z + 2022-10-10T20:03:28.408Z https://MannLabs.github.io/alphabase/psm_reader/maxquant_reader.html - 2022-09-29T14:47:40.833Z + 2022-10-10T20:03:29.080Z https://MannLabs.github.io/alphabase/psm_reader/psm_reader.html - 2022-09-29T14:47:41.211Z + 2022-10-10T20:03:29.557Z https://MannLabs.github.io/alphabase/psm_reader/alphapept_reader.html - 2022-09-29T14:47:41.591Z + 2022-10-10T20:03:29.877Z https://MannLabs.github.io/alphabase/psm_reader/pfind_reader.html - 2022-09-29T14:47:41.874Z + 2022-10-10T20:03:30.257Z https://MannLabs.github.io/alphabase/psm_reader/dia_psm_reader.html - 2022-09-29T14:47:42.481Z + 2022-10-10T20:03:31.002Z https://MannLabs.github.io/alphabase/peptide/precursor.html - 2022-09-29T14:47:42.825Z + 2022-10-10T20:03:31.427Z https://MannLabs.github.io/alphabase/peptide/fragment.html - 2022-09-29T14:47:43.314Z + 2022-10-10T20:03:32.002Z https://MannLabs.github.io/alphabase/peptide/mass_calc.html - 2022-09-29T14:47:43.612Z + 2022-10-10T20:03:32.413Z https://MannLabs.github.io/alphabase/peptide/mobility.html - 2022-09-29T14:47:43.904Z + 2022-10-10T20:03:32.719Z https://MannLabs.github.io/alphabase/spectral_library/decoy_library.html - 2022-09-29T14:47:44.204Z + 2022-10-10T20:03:33.058Z https://MannLabs.github.io/alphabase/spectral_library/library_base.html - 2022-09-29T14:47:44.564Z + 2022-10-10T20:03:33.503Z https://MannLabs.github.io/alphabase/constants/modification.html - 2022-09-29T14:47:44.929Z + 2022-10-10T20:03:33.952Z https://MannLabs.github.io/alphabase/constants/element.html - 2022-09-29T14:47:45.267Z + 2022-10-10T20:03:34.291Z https://MannLabs.github.io/alphabase/constants/isotope.html - 2022-09-29T14:47:45.583Z + 2022-10-10T20:03:34.685Z https://MannLabs.github.io/alphabase/constants/aa.html - 2022-09-29T14:47:45.887Z + 2022-10-10T20:03:35.049Z https://MannLabs.github.io/alphabase/protein/fasta.html - 2022-09-29T14:47:46.453Z + 2022-10-10T20:03:35.816Z https://MannLabs.github.io/alphabase/protein/test_fasta.html - 2022-09-29T14:47:46.726Z + 2022-10-10T20:03:36.158Z https://MannLabs.github.io/alphabase/io/hdf.html - 2022-09-29T14:47:46.986Z + 2022-10-10T20:03:36.506Z + + + https://MannLabs.github.io/alphabase/scoring/feature_extraction_base.html + 2022-10-10T20:03:36.811Z + + + https://MannLabs.github.io/alphabase/scoring/ml_scoring_base.html + 2022-10-10T20:03:37.270Z + + + https://MannLabs.github.io/alphabase/scoring/fdr.html + 2022-10-10T20:03:37.687Z https://MannLabs.github.io/alphabase/yaml_utils.html - 2022-09-29T14:47:47.257Z + 2022-10-10T20:03:38.068Z https://MannLabs.github.io/alphabase/index.html - 2022-09-29T14:47:47.582Z + 2022-10-10T20:03:38.510Z diff --git a/docs/spectral_library/decoy_library.html b/docs/spectral_library/decoy_library.html index 79f74a46..3c730338 100644 --- a/docs/spectral_library/decoy_library.html +++ b/docs/spectral_library/decoy_library.html @@ -325,7 +325,7 @@

Decoy Libraries

+ + diff --git a/docs/spectral_library/library_base.html b/docs/spectral_library/library_base.html index f3120900..75e8327a 100644 --- a/docs/spectral_library/library_base.html +++ b/docs/spectral_library/library_base.html @@ -328,7 +328,7 @@

Base Class for Spectral Libraries

+ + diff --git a/docs/statistics/regression.html b/docs/statistics/regression.html new file mode 100644 index 00000000..c243268b --- /dev/null +++ b/docs/statistics/regression.html @@ -0,0 +1,655 @@ + + + + + + + + + +alphabase – regression + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+ +
+ + + + + +
+ + + +
+

Regression

+ +
+

source

+
+
+

LOESSRegression

+
+
 LOESSRegression (n_kernels:int=6, kernel_size:float=2.0,
+                  polynomial_degree:int=2)
+
+

scikit-learn estimator which implements a LOESS style local polynomial regression. The number of basis functions or kernels can be explicitly defined which allows for faster and cheaper training and inference.

+ ++++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
TypeDefaultDetails
n_kernelsint6default = 6, The number of local polynomial functions used to approximate the data. The location and extend of the kernels will be distributed to contain an equal number of datapoints in the training set.
kernel_sizefloat2.0default = 2, A factor increasing the kernel size to overlap with the neighboring kernel.
polynomial_degreeint2default = 2, Degree of the polynomial functions used for the local approximation.
+
+

source

+
+
+

LOESSRegression.fit

+
+
 LOESSRegression.fit (x:numpy.ndarray, y:numpy.ndarray)
+
+

fit the model passed on provided training data.

+ +++++ + + + + + + + + + + + + + + + + + + + + + + + + +
TypeDetails
xndarrayfloat, of shape (n_samples,) or (n_samples, 1), Training data. Note that only a single feature is supported at the moment.
yndarrayof shape (n_samples,) or (n_samples, 1) Target values.
Returnsself: objectReturns the fitted estimator.
+
+

source

+
+
+

LOESSRegression.predict

+
+
 LOESSRegression.predict (x:numpy.ndarray)
+
+

Predict using the LOESS model.

+ +++++ + + + + + + + + + + + + + + + + + + + +
TypeDetails
xndarrayfloat, of shape (n_samples,) or (n_samples, 1) Feature data. Note that only a single feature is supported at the moment.
Returnsnumpy.ndarray, float
+
+
+

Application example

+
+
def noisy_1d(x):
+    y = np.sin(x)
+    y_err = np.random.normal(y,0.5)
+    return y + y_err + 0.5 * x
+
+x_train = np.linspace(0,15,200)
+y_train = noisy_1d(x_train)
+
+x_test = np.linspace(0,15,200)
+y_test = LOESSRegression().fit(x_train, y_train).predict(x_test)
+
+plt.scatter(x_train,y_train)
+plt.plot(x_test,y_test,c='r')
+plt.show()
+
+

+
+
+ + +
+ +
+ +
+ + + + \ No newline at end of file diff --git a/docs/statistics/regression_files/figure-html/cell-5-output-1.png b/docs/statistics/regression_files/figure-html/cell-5-output-1.png new file mode 100644 index 0000000000000000000000000000000000000000..9eca090ac3af87b332499959cb2e15936e93a428 GIT binary patch literal 28935 zcmZ^LbyQVR_w7X#losif?vidv>28qjmTpj_ySuwY8l*wGyW>hpcV6P1i{JO&?~OO! z9}bjrcAUM}UTe;|);2^z?h`Ts9s&piLY9;eRRV!t5Q9K48L#1h-&Bvh%>sUKJBevH zDchMkxf(c_fMgAv?5*vbtSt=RyO=mQTG-jLGO{zW(!V!%aIN=gWiZ5Z&lupsg0Pv+-I1q+MI4t^(3M*w>H|Idq} zGYHzvvef_Y!*u72Wq^57mtMdH*z_?vJ}v^i!cDMopl8fpU$MkJf1kbo_e~4|jh3kR z+O&lRu^@PsQf&sFUNKL5Y=;K{oUCL{H?(Rk-UkY`kgOoAeds{*6UR=9-h?o zb^XOUt3GgdPzJj$FxlL3NeMZFj&jkO4%qkXhm^3U6tU6ie{y2n1G_sviHpNs?#&pT zZ4O`^KV3}8-|m%FCi6JcYB$(YkduE$A>^??-HOnOjdjGjas~Y5 zFLGiebt-|b%kFf^y9^%ZYPYOhYMEr_udZv}Nkse}pp%U#L8W?YO)PqikCnQu2pn3_ z2}Lis95`g+Mx|rCr#Pfsiu_F3<{){EJnWyYb?9&~&!VKPY%%xa^OX%43}&;M4IfG4 zmAepqgDT)5uiF#P>`a&oe|T`q%XYnbE>@FheAQ%RwV z&QuRD6;Z`W7RFQ>Z+vu2TVIPF>_I!i(c7ErHD7LrO^K`PzDPnc8k&&)ehFHY;*Mw% zk(zyv{hAA;RM{V4qJm0HD>Z=k=jtJQUQ!)s!a_8x~A-haCu2bI}W`Se0?-H zW)e_)x~j_!HHKFS@{c0qbr~;|O*=RaLdJ*ds2LL&LKVF4NV98`1T2E7?S@pnQn$cl zD3P3;91UZWH-rs(3@Laj0&fN>7Q8NEdpf#*Yu(x?T8tgppG^F6w!-M~_z*6h2ku4{ z+yws6k9R;fESFYrt3j-UzqzmMGjzfAHq3!WR%49M=LBqeHE73gAb@1=h z+nSc$&t5zf+t!U+u%pzOG133?0(>37fGTci%6A;YqZ`zrhYJ~tx}Twkes7Duy-uxD z31g{7`q@#qMVH*q;wk$`ST4$jxfA1s*HFkBk$vV6H;XgD1{!pQamNxXX-NEgnY-?m zBp`sk8QN8VU+R5ym+*Bl)ZY)b9NZkWT~cnLE&>6Uy@_CTIq~zq zq0ki5h9-&)-{f}tpE-%KPw1!9oQi6@{Kxf(+A#KwzEdx+H{m4Ytd^FIx`0HdLtSUr z2*TvWkXB^iZ1&FTkXdl6Ln4B@GL1XQ*PB0DvjsW^Qw#g)>i3e!3_ZIiAJS2nhN}8K zaax@OwJLfK9|Zm_>~e9oYci!yd}imSw$zkQM1_V0n1>(LF`NHhP(1oW8{ zr*+Y?1%19EXZ9kx`S@mx6%H zFIxxKF_=%LIaq%&Ye)STk2n=Pzhu>JzmA(F5DNOb>PvOf`zz(U z8QwV`hiduI=C=HadXGnTRf}l#>4{hnEULZTe_K*js}20=@8Q2X!|i2AlbD)L+6tHTeJ;!f5$0CAd27*Mx6(suERuwvRL(1!<5`hTP z@&RJkyu;+O50^#mC42p_4>PY(MbyoUye;E~+Z@Fnfc&7PVt7B;ZK;Q@h!TIp6ZdZ- zguq0o1ZVsvq_21A=3ZobEpit=38Vs>8&@!M+VdcPE?e_Sfh`d~W+zjK<<;9mD=`*V z2#wN|trnrQaj+m+^DHIpaMiZyDEaegH}FqIUvR~ybmZ=?(n0i#1`*CYNt)gf=h3*6 zWgwEgZCO6IaY?K+IsX!uWf`niR@HdV3VagWu#_mQsS!s2juFw1jjNQ%<$qRdt)NVr zmIcH#pJP24Ew;$)rG|Mz)qo@-pP=}klWI|%6hR!7I+J_hVh}AwBn$Nj+_Xr|biz}o zb(nuf#?e&z)UyGst7O;QUkq3}I!*(HDDd({!{@ho`n=rJO1e^_KLw#{qp(y;q-j_s z>QaXZoLp-KZBl$(bbV*Y-LeM-#mPc-2+(|o*fW$NOTGc$4aBEVfdTG>`HSq1{Z+=D zrpe4=($<*8V9P-5Kw6cXXYOB+|CwW9O+En@QiDZA?k!D<^7w7yYd54UIdo%uOAgxZ z5tJEd55^0Aj=RH?1{|=(RW|gHyLtPoE-xrDkeq>?dzw0aZ6XW6UkXm?DnIiy|9Uk!FviHEPsH;m!mmcG(=Y)_I-%62r{@Wtm0> zTid>Mfa9|K7PWz0$!f4g(k#t>wEEw{g1xGn)}$jQPkz6%zhPD>e;tM`A`!gjm=>#0 zQ71JAGUhUKBneU_X_BXjwAk-7pGP4uE~^wz2&2MvEDD+5RS#5rjQNCcp;KRK%|YCn zzeljDv}H*8FO z`!;&M;YPdhIzdoPNT;ciw}Tds%Kk^6+2WYeXeo3dHT#d3e3>oit>Q!L zwofG4I`P`31P5CIZpsN^A^+kg3Uk7o?r39zw>IG01=?!NhBj}g=CyJdm6y3Tvg;wG z4gc{=11h>{tnI>93Pt-0ol5RDJ#=R7Mh{USujMOumsbe}J0Wxn*Zar_ zX;Hd{29vnJLVaId+@=!{H+|+7ya4c){lu8MQmBaJb+;o51eWS5Lw?h_{nyJ3-E|8d zD~NF)S0w+vjE^3xY|d@V4mf_6=7*Hu1#G;nNk~ucSNAUP%)UHHMAS3PT4JS8EHXJh!JXLywnoFJz_^|8S3-s_Qf7DjpvHlpc zkcbEhF0SOk1Ugoa3v-Mf*EM4(42i0HsT^UrK4tQbCNW`P8^QUeNrU+06@1~En1EG@IbKw zMfu^{Bo)oqn>f$LBjWy($jTh^TGJ*1#JJI7p7sWtt8FLlJbQDGisK-DSh~)n7tN zPV7F`=J(Mnxl$H2a=C>gtL2wi>|23uOQv>5n-}XF6%DJo04$v@R)qES^~GZ|hvMPC zQ${d?l{NaicPZSClv%!y+^lA!wQl#pZ^j zbDkpSRZ=VVPa;u_-jUU1$vmzmBnnE(z=lolB(nxl0zjCH9zaa-bF0~FsacXVy0I=yG9=|P`K6`?9{x(^9G-O4pMI># z$#b>(Jn(2$+4F&&w5otQ4C`ar@IY@S&ScJDHAG%Fq@6Q9#YC4@xbk|2r8_*sa@tWK4TDw%ge^ zr{_BYn4|kvf;yM)BOkymHwuz;{{W=&KMfg8lKJ5e7weW$5Dc$<1@bdr@PzOq4DWIi zzoOuV()X6mCzpGXjH^>oJ^rzs>uZxq?^a_YE_{L}pP^N#-w|beGeAS$`Uo?jKITYS z%Qlt9ht%I|Kl;yu#rOm4a^^s`rFu&pB;dV{`{4l=W&vkZGd=Shn71$n-0Yx!17Y*9 z@=#k5fJ@26)J+sC;D?El9E}>&6X@Gi)U*|z!@vnQFpk$xx2%QUFNIzxE2l<$O%PvD zxWzeNXzv-R&}gfkb%y+z$XV<&84#cV_5!8`{ro_~YH){I@KpI8*<-ec!&qA;edav* z%BRXzU;uSM`Z>}Hh}bFI=${JEn^;kQ1Uz#|o8?=O-ttpzkK^Vt&gkmx$1@T7m}dw! zHvdd>L(>DT?{O<~?eoPvuSC`Bnpx6Y&$otA-?~1F8eL|a!&ChggFSIh+>LMi;zR<= z7I8*rZ>I$+nPR^y179xC9baS+Qc~-pSqBL=SIDYQke+N)iyuM2(%w;qw6sb5n^@jI zIFVnt!ZtJ&+>|PH?Y$35o@--T>k)r(#Ti)RBJd8mcOkaWb(AXHctH&CpJM>Q3g1&h z?4Zxu*s5p;7vX4NFGcGzU8}5QbuZpJ_}C5p?`ak}g@c}XG-vZf?W+9=C5cf;2FdB@;ASIWN&>p}!NO%mI&1*aFIft>lFOxefcVSpK%bgp zh@F;-U;baNt|*8MPOd7h^z~P>-f+LCJ!* zi*^-a((~Sn#<<+J_pTC!SqgkC)ccqTv8gx2W+Z_kq~KgSh6wa0ZG;XxXQEp7;>EcyJl^|4@rP`e9dvuBhbBkKCMxNQ1Zei}ACj5<<@fB+)E z@XytZyg-dcAMTQ{Qvg%~bRhozy!}goa>wWv*mYqNE*MmR*7dIbYDaH ztfXl@{JQ}3pqU=3&g?_`2soj|xzJeo9$m)kgM`^^v-8^B_*b!uSNa`|Go~=6|A)Jr z)m(z0_9d2DL6LB>?#IVU#Jxq|84j=`ucuNZ+)A3MM0}QFZk?+)N;)6beYN&WGDsv1<|fZYTN)J>Xc0#a zR(TC}cqO|xJhs8`Io?6ZZwOaYH1MVVA!{X^ksld!oXK((2$?pG&psSg_fV{xe1unq zNMj<@rjx2UVPWd8XZnvfW^@%iW|`olsX#HnB6k99##*gF$$HJm<_Av)z)%m*lZ#Kq zeXR|#EcU*?!3+ZSLCFOz_v#zqS}_2HiJW%%-i@Y{o&b=h&(rnp=OI4>d)+H>R8ccBb6aHl2(z-79%kSe9?f)7L0;Q%6?e5@*@$D%z%HNk=MKc zbRo%KnI0_9RILt&neHv}xbu_8JqB!ZZt0o)z?lm%6RUP6)LOd=rbj<&@QzN!IZaIAc?(wLl=q5u+Y|M(p^uSYaa|4kaljrbEh5?+#XK zD(Z2p;k>jtK5g0>h8;ML4RzvY@B$RVn$s8@ynKS+*)vtZex>!{V^F`1ZFs5P7z8yn z9op;K#yp7SyhjT=_qhb{zP651)s@%eW$nr2NCs?2!1lExOaAwjje@}*N8@b~VABdM zPM2BHD1`5!Ceh^&Tdvj&H0Li23$u_c`1Ifz449hfxWf2LO)yF5iyBjy=*Rwdr9`Bv zjG1Jtz~(jdbHi2{>d_*`jyiB}V6EBkjJl>pE{zoR-`)UV9avYz=CPElFlnK8#sNXr zOK_heKUNtoFSRpG#5I&iabRVVJa;o;!Jfa;*b^?>*OdNc=sW33zH-|^&jK8Vu*{i7Twh*L)4#tu?3+L7mB`t3+M@3-V;&PlS(F#neW6CJ55s5O5} z$m6tr+DB}*^2>SIag4v^Y>@8acx~+&lpC9xn)+sCW*!{?;`VZfGBBsM#^gjVna*G# z#WVIL(yUa(5UV*L-LTPqOyBhScn84b0A@BAjHf}u=Lm0D^GQ2gYB*oCs(g!v2Gx|) zc5bSS`p!DxiMc#+gkN(y8e=ce;Wp?f1?XE0P?65-%@{F^)b|%{3+q36HGY9DRrT+U zsIi;H$9O}~nha1R|IUv9ZNOzUMFJt?vAyT!PAO3?(n1mt569{JY0zEWF8`da>0RhX z7j8@@YwjPNcUs=P#h6X>`mx%tbm>;2OXCF)KC3C;_b<3+b`1t;vZo}h?z=hwsO;p# zR%bc$p}E3zB=!Cga*P#)iiAYY%sjr(4S2c~pf`?)2$dxHFIx!odMJQf-<|3$4g6q| z?d9J1O=%Pt6p!`eOt}YW)ODvIM#LC>b zSkDs+!y@2w8$YNQ2|`hacRTG8UIooi>8Dd-U1zuS*RT0%mF< zvFScPS(|zIq@+?ed`G8;fweL}HcH*{u!SHvg?|0*FvG1$iZ^L{Hgac#MNC$4+8Q?H zAC)5#_~tGKWoSO_tPauowY@VqLO)l_M1rAp$;!@tRYs zCGt0|-n@vAuYRCL`>%++F7K+nS$lrykX|)p1wR~8(Tb^Wr4>e1w`lRfRgC_@A^Y|) zl4x+CTFIr9OSeq6L$40?uuxyF8s(fHuicmkOoLK3mD%^v3%XZ9#6A+%*7WZvWwPb7 z1fHp6(tvA7X(uSl7v;DGw!toLcV{dL`PYf#gB(FrOP_Ti2{cj)h1~R*OL*y6q#SMyh1@;OS#nWv@`mFf#~{|AXewVy}BwLt!q~W7`_pd!P^~4 zNZr{Pet3Z}Ray%du>~^d)uWf( zeaWL_GR%uIqR*%gFg0_$6CKEOhOc(9_|$#S}MDraK&m%?HZ5MH2jB z_oBk87^fD8?`ON%5~$R@rGv8cL{#{L?wn9OapZbQ>vZrM9#vQd$$GiJN{T(haG@ZnHzgjABMolxP^ ziW+h%uO3rfXae$9OS-tCiq}i~yo(E`uI_G0X=yTg`r)bWx5CB7W!U1QklcIB)V4|Z zk-JMYOq2AG#%o+N_OuUN$Xupt7X%52RlB}Ymz~Jq=yVlru94k=oFepqGr}^W%g^nra^d@ z=p{Giz28qu6#CSQ{C9uaWT|QhAX@qtlMiazl}mX%?;AZZ3+02}yjwre+^|WY?WD@W zd8g;d0FlT-Wjco(pUfL6wDH{;^)0ykDSe5f`xtj%CWqzeWwr?XMI2>~>^lP&A=pjj+MvzMPV>h* zL~|@gKX7*hOx_6|6p=0q+bIAMsjhTAXSc-ICliA52**#%H=l(Y;fZl`3uHQR7wR|^ z4PL!|{X>uQ3I~@J?gQ(<1)}~R@7Cni3xjH>$5Ma1lx^3xnTyO7D(Z4QbUg>JoV=7oRN}i^}DE`;K_G$(5n|7m5*+>VXSuLILhh4O`yqR);t+ z)`Ir7uAE|QBoTfFH8oUm-ApSb!Vc}~8=`u5d2A9Xo@gc-q^_DYrG>x;A`QLeWnWOX zQ^0AXiW89c8IJx@=~$(~1<0GmkMfo0XNh_fco$HtLW)(6O*VDXQg5W|;u78ZJRr5!sg z1DTx@<{Apk$cMVgU0E|n-D}xqH@{MbZ!W!m6zS>%7GfGTg^#hSXWaUV_dWK2*H5-Q zEJyYPO0%n+Ide^o^REX-)w%Z2iiQ{_5C3nS*=TPM>@JgwFTz^d|KNitYu!ROi)rQD zkESnVigeu^#@2O^Q8p4X$=vd$^lfimP43;S(PwghPvb=&iA<}#V=md<)B1^cNk3AY z!V$BiQbn{Q60zc_ejx3<-}wC;%HO&S5*-N66fgvIxD$SA&)xF=;!#c$+@5CPDywK( z&9*;H4KOvrLvEk78OtoD^`k1SlzeZInoq11tDpQt(7 zz{f9l{9KOr@>QL-+1mX>;YM#{QLMW1UucHVKDcgjahZo;aabg%A-Z6OrDdw@ucM9Q zTAeMMw9AelaTl)5$jnPc_`%YP3a7xD|*3Pw_rp-` zIx_H@KGD0G-U?F}dX!X@m9;$irN^QpYG@$GCXY0Vk&@T^7N?CLa)g|IXh#A`fl?oz zPn__;P6JNpcUL|u0P|^hPnBiSObi#_ZFa&XxLtgmOV++70Rr|LX@R1{7#jfS?{CWMjk#(-*OZ4eRTNP6apw!U;eb zO52jt@|93%SF!5U3Flu;#R6P>qY*Rq@(~??gA{sK6!T#l_UmaoTDj8{uHrwr>WMY) zfT%I?k{`SZ<>%lkBA=Xyq^inght&}XHda5TECE6hOLTcm z0u+uoX+`ubO-IgX9V5TG0aw0r8H3%4%WJ#%`$jcQTu@>9)tu$t`dC)4#Sn!Ty?0j=f3S|?`(=suTUAyzy#bcOp zB>Z03j6VzWoex6@mc5q8A8dEuIZw??0Qq_e+2)WJ%NMA*Ph`i{Cw&Gg%}%jQNnVmq zpH?Y&Nawc?0$SH4U|TNxh*tG#$I62w6w@#V9(=SW9gpz<@)xpI>X@(css=)!`R(26 zC#abB*|M-dYrf{&VA7ar7^ql$9^>*nWr!;Bf)xH{RPBG3>kMuP1A-nQ?c{~wbeM8L`6>>y*G68GA$qTO?lQ5U7M$C zxoh-Uxa*au{$?PwKO(exbRMms;aQ%ZzIqesa8+l29UfbzLW^m5dPU`e|4#O)+=uuH z4r{|OFo3Z(cRV%)x|D@94vo$92IxK;cucksSiiAj-&8T~2*j_Y))hmB~u}(xWrzUaO&XW=+g$Kj{Wd;l*Mn~Ah(bO?BsZ5^x9O=>gE$G0u7u9k1}hBfi>^MS&(@ftv(aqAw_#7GAetYE%_ z&*X`_4T>9ks>Qm5D`cqeDle_?k~!&>H+{w?{X<#rIv>62Zdhq$GCH3A5Ggok=BkLN z#iiDt-3>gkuby-vqM=3aaM|5(Bkn{)GY7by*dcyPn(^rz3vS(McHgjH5?ygvII>$E zFXX0HXy}!|e7&Pj^}1s$mZ+kWGO6WWu%h!lE>LEBq7 z$D|WR$9PgjdZabhY!|y5y)j!RObRcqYsnh!GA-CzrVQS5KD;pZpbsgED!)$ou(;fO zw!KU~7AxS(1pB^j%U_|>>m9etE}YUqh6ogqqFC0D~n?nl>& zSvB@VQ2Jw9ZaZ8^jnv4xfBMNwKC?o3f^1_*(#Gd|**W!rjj5~l&X_vC?p7IYcL|#r z9KLJYVEv!#`HX>|NjLj8ttx;lagD91OEe(ay|RT~Atc{U7zVnzE>Iu8-8~~XW)L(3 z^Fb9xW+R*97yvOVa$dgXynj)0vYMvLa%g84$KkrAIKNogbK$*XAm5!A^-VYGs7^hk zopW;g>Lh?=3_xq$E{?n)sqf1b#_lJ7U1&ecg4Hx$)_rO*J$3uH4^Z$KR1mSCyLiV7 za@^0LJoL8^qj z)zTve{H_9mHE@1@Fib)c!95$TyF2vh&jnawgd4fI3q3dR?Lc;`mgkWZj?HVXF-Sdu zv-H>B<}dV3oHa{aEr%;CD#f7V7Nh%X-$&TFADaB?27`{wBa_9Lm_*jgjyo<0U~SH? za`66e3tr)z59wl^&G4>qqmn&^hy0+lb$38JgfJNkBslvFiaF;M~^9asAah8hRD zvsfvN)CQbSTNAt{(Ez<4Yg=0y#7C6nJ|N$I~YsJG7U&S^he zwn5+lc#<;5u;)1eP}Sh}kVM|xCl>JPL$M9!1}8j92U9!f=YGo9h0hADB@Uk&6QOk- z^p<$|14cBGzwC33P5s zF%++V;m-s*nXF$caoZ~I*=YGdv=cQ0o4wUIbCFq}Vt5=5T2JCRgz zvZQ*1YJ-eL(G6AjQ5K`AFBsWVow95^o__m}U%lLY^}_l*wJ{Uyk)c-SK6^xFn}2?? zS_f)T`$aK!D4n2*a^_oAf5in&864H!uFp%;LwwLsiB{y6$d)ud7nFSI*4xbdKlqYo@D-t$rK* zlCrsfv$=iGcaBE;oJ3t+%{~$*d3rRjMT&ujf2@Vbw1c2SesW0~NqL`A*~foNKFGP2t=XJd zGlshUyH<1Imf)553zVy;G$5m7jA`li0mY}Kqp-dDrb83bE@nF7OeR6FXNx*(@S;zCGtqh3IU7b)hE{0T{2Mkh#0sLaDw9cxTZVTIpXafNZKd;kIU670>qw0yxO-uNFHKeJa_ zW>PM97j*Vr9WLE&r`Z*2&)wl=p>>@DS*mlJozRQnG@wxePhE;&`6&up&7^2c^@%OrVn03 z?un5<<8av=$=E4b^Yq2;8lB!w_4>w|?^6MwV9R6)WS~<>HSSq_M?)FZtcrGIt%T>K zCqHq)dYUa6tLgS&De`p;!^Cc*+!>BeT``em#VOfMQ5#Evd};Czka_wj?&O8(Cm{KT zO|U8q(04r8L#ni9Q_sCDlf0&fS&fbNR5R#2x6Q7s<8@od_b%`8<{}jj^{Q`LfSUd! z#zKK%lg_@5udut51&BaRPK~>csVOA@sZI5SU(ZQM$g$KnWwr%$I>!Cw_eKPARi?LV zRM1L??3=X8sXQU~6u7Y*Ad&ws(Rp=L9eLzx=#gHaZ(tR`bFb4*2FiNi{yIL4c0Rp6!r6G*9S>NCKw02ffg*0N-RYRq!1HQnyNM1_wnIX?>t)M*)F zgbT;{Vlzq$Yubod0=DOT3{g~yrLhgK?DoOCo}pG#*5CskWFS#twE8XQNg!GB#(PYIkVRL zMyQ5Y=YYH>i@utn%bU`H(X+an!i7Af`8)h9#T3rcQd)?QmAANzv{;jGVxosd z@;BPJkxxt|9)vb1HU&G$^3(tkEb1OSxndym5)s4e6A z^5^@4&BNVgo_GWv60tyfV242rkbFslLMhxKw@yq zk{_5%;Y3UWxG(=vE$bLROV29`1nKNu5Vk06tK(pFQJspnEvR1d2IR`dc z=Mw9}@&&q8W!`)RzE_(QUj`~}lAn+Hd_vS>TZc`CUglhlp?tyCVjMktA@ECpx6^dp z;P~=2v_?WKyj1WmwCj8c8w8-Rr+S|rQ14;gnSSRp?(igZ>-8o=0b9b z-X|UjHto^C5g_>R;k^Kn%YmQkn%K_qPA$-V(to?J5PV=-PPx1(u=T958Lfb(F2!v; zU4p)U@0ctY|5e8V+MRzZ06{9pU|vjnOPBH`AqU@dqSV{SP|zbNJ8XxWP%UsGs!gbC zS}(`Pd#;TLth}sbx&NA`4zr89{PMQg1lDGE@QKaFwBk39rLS)k@RI87|NSj zd@80Qmt5~cG!SUhA3Q|C+s@ts1kk;>O@bJX|GQx$Y`KWcXtw<4reJ^C-HsZNbq6N* z7;xqtzxSN;&E=vWJ1fr_YP$t$+LusYJV4}EwI3K@?&yttdX?pc*4oDF8E$S(1Wh~S zc}rYQUou1<=RzLqrscWQy+`x^T6#@gyiHUO2o!CYp5J-fCP{MM?iXey_Dy(lYOud`F3iZq@^ zq;DvR+wF`EW>>vtECIOpl;go#$o6;i>a0#EjY*N?=Kok z&hoy@`sax3YE<$a?k|%iAOu7&F2H{>rE#dYPr_vZqo8{Q^W}T}uvc`9*=_SBWU<(3 zBtRE(Yu_0s@wCNI`;l7kvz{cg0GH(!p9BH80&5_C7@5@Nnan6Yo2UyoYjut%gpP|| zeskv|%F`Z^Wo}Wh=#TUQg&M?)`v|nr_k?m{9LhbSM`6h&hNL9iJtAc9>Nl;|Y$W^q zeHFLPEGA!w+>{7mmuOJG&{;5;W+R);d*uJE&X_`V65al`3)gr%FWPbTqNp|W4RS9b z(-Ke~x2wuD4cxB;Z2yDLmccnMM!;9?@ERgPw!Pl=uRZyOIl&67(qNf0PGK(K+EYL> zYmp$~!l}~P%a*C^dR5@^2S&-Rt8Zk*(}K`)Q=ah+{He&sQT3Af+l|o{Lf|vgB@;g* z&}dI(0o2I&&!7ms3{N+3hjbkOz;)cU*h}7x0^n|tl?ePn3lK(tP?|4{zRQCgkjRS* zeHCN(JnXO+hOlL-)Khc z0_nI{Ga(B4=$P_}ew85m0G6pfP0hPA5$jqjg!pD%kj6mROjc8y7i5-tajZI006#jQ)3^eEH9e!zI+7k%cB zJEP;-UyDAG@rhpPyAZ<8ekX9c6oj zWVrJ2Vu4wFkL1863%+7*+$p!zo7LxnK8uq6HSiYh_jaUq`R@RIFd!P8JgH)s%%7;p z2BO7QszEwdL}CRfF05BEJZCf|%(wTB{e$0TE5L0KK7KqfjPH@a5WsD^M=_RjeuW@IU9p%v_K8j zf&15suN0)gO)f--qZvPq;OR9$h*;U47m6>Q&T~ef*TngiA3tKEd-x0Qc^WnD^dkS5 zv686bu$gf8Q0S_V(Pk_zC{)a;aT&(yLVqK$JbWQ|0kBuE_FcD3i0pnf2mTcf(#a!) z*`xLpQcR7*i!~A_uSCm*qx=a4Q0>mEhL$B3Wh>Uql&)c1X=9iv`)wAb~gTeiu)Y{(>daw#oixxgAKc4p@F9c~jOk1K?3N)dy7% z4<6vIOMXGYTP&>YT`*Td!Bb8Jzzocc=N3f+w(fq%#K5$AcK_+=J%&eNyK;VwTnEvq z_#k(FSvv6jcn!7#3VQ%J5Ztj0+qSK!~NM-;gfJ=U3 z58EmP#FWIRAT=_8ZB>pe58k4z=x<3;>k_C{1{P)2-{{moy8W%=5wN}{wR6Skw~0vX zi&a1EnLKrWfksnxXfWlEa z*4lFC-Lm)~OUXdcN9H+aU#0QVXi-4EK&j`9Ul;5FHL-?WuBZb&`RVo~Nd}^U`$i6| zsD#-Yv9!F9!un)^{$gNHbF+;XdIAvcOGFiK+-{AD7McU;E1s!FLo7SPwqx?FE#YT~ zZ1L(h;A>v3_Z*(P^Lz;r@ztO|Bqs5cYLvS=j?W$UP5UxyWSVDdygPh-6VpPtwmyve zdJ3tuB68r&lOkQrR6wWrW0n*~fMQwU>GY4SzwBCpP7kPMrEFv$cFSkHP>`9;mLzk# zzHue_C_6_EbS>IYXSrl@a@_xh&=qxlwN+@US?SX(Ee@eJZF#NR>eCPv9RHU)YS*=N z>y+G5mrD+~K_@tO9KXZ$YGCQ~m8BpxP+4tHv60x*{)YS7ez3`?9&R`?i2?gNE1;ku za-WstuX$sJAiZIJMEybC(&R(=;DR?~vG$iQSU_AM~w?TjRJ8)2{{6 zXkLDgAD)1Iz9;ZqjwO443I6f6Z`-Xxby}x2(*(=S?|=sU+OtNyaIFNp;Q^+D$$1GX zERDHmS}96?<0sB`Dg2X1^krdfr;T_R7GWxKK@q(DRaV6vLR6Qs@t4@BW?GW+PXePGvtGKUo6>-QjJLsAXr^=~WZs_CAG6pl$$8j|NfJOcqcAQMxaarF-0IDzfe=}MF;|-vBDB9IdDik zb_I0jui&2+m=XO@m#)l!kS`hpKhdhowpK1!xaU6f9qz4OQ~rn`k=iljkkfn~EjCtT z_D&J5&XXO|Qiy2aX7ATMBw1bb-DVb#ck9S)iH##x-4)mDccN-@)YRK;A@NZfCosMY zE9TV;#80pY8ERxtl?X{OC`F8>zl!~~F&rGxAHKRK{x-ava^ngDr)~d zij;yVU4qg`C|xt4gbGNfv~(lgp&&?!bhmWpFrF~17Yk#OwrbhH?OitQzdfA z$i}W^W5=wF`8GB+RaS)W6)%s+qqg0@7o+yei6y}sp;vocMlj)8TUi+xX8n=QB>1A& z7(Q`@HqKOUu%z`EJj$`{)2d^lU>?(X$H{y}2=c5}V=cSBW3tA28<8S@A zBXLhKKeud3KZl>MqJ>JuOPp6ZOmAYvr#Uh%ZpINbHc(mY9G^^@*_ztH-zzIwPH+p$ zO{{q(ndfBwLsG7E8-IBm%uYNjSr;1sT?o)pdu}tW4pz~%SbB4d{Fmi-JJhx(m zRAaH{2(cKEG=XPrQ1!{^?DoVV@T?AH0+H%h&oLxMA!XyVAxrnr@H~qwsaWG*mDu~~ zP<7OL?FQz6v&aumnww&)xr5Wac|;F?N6X+|lg|-%<#b=W@vdsY3z$s><1>HeDsTxH zZ`UIcgz|{_0H639Sd57l*8K;fJ8rj6U{ZM`gI!4UB24OO<6068%PYfTstL6%%Yl2# zTVI$vYTWMo@=x5n`ONAPt{3b1^}yhdKPk7Z4Y6XMX#Heh5F6H%NvSpgn&cDv30yR! zC=&PAQH>HQ5S@#eh$Q!oy!8v34sBcS9m>AvZLU5|s2r7{qX(X4x7{zJgCKopy zhBxAE6vZ@clnf;(m!j*O+AcLk)t}QZTV5PrV6P9v?wU_lD)|0md-(=ACYtUp_#$%6 zDo!SmN2oH5f>i!P+p}*>wkoUVF@a=bH6Svp13S)3aQ?2Dw|-K=<{j@R&3`%MMyO{? zC#Rj<3zeE7OH`O4sW?ormVY!>|U|q5Ig0h?E?`9pwa*p2L~44?E?qD`re^8% z@bf$G>$40+^*XAl7g?29WR;!-*VG=1Y@+QEYDecf1VWre7E0N* zF8v01f;x$S)WI@6lB0>D{RgQ`r}D*xDiFp^1i)=-2MqH*Fp=IXmpfRBGSDHUa+5A`z4JCY4h>1%ONe zPDdtAo##bv&YJ|(0cL?h~MXb6gP(tY=6tgONKjB zsYn7^}62?QkE_AA-mUV6Zr60Hj!r;R5=>1k+YEFA@QAASopVZ4@^ZM!(!x@LYU*pf;e@Ekns6XiNn%-lsc~8*5T|?+8ou0@J)d zHF*_NTNFpcXB97c1?%FUE?9b|`CLT|Kg3hrrDv znvfs`>?g=mP{SypR%#MdqNM;?wqzlUMK2uRy~;uCLPcVgJ2Xf>>yW$mj?W_fn>=xI zv2e3iNNMaWr?!99IZCS2(hk}oZ{u20?j=Cuo;kmiee)cXIwAsSud9STbW{(P3*ERt z;A_vu$;I^h_(VJaWyx7tSz)8*cHYqdQuE4Y0|NtX!SA>2Y9$;G^t-xzN!XXN<5V_< zJjdubERQGUs-R&46mIypNCyy>uuTL#q#%E$d=hWmv_5@dx-s_VQWX1s zEQI`tM8wL-lHt;ikMvAT+%9|Cg`J^foS&+ys&-0&!3b{u24h7b=+{d7b03kj^Ok6q z^zi}?AKq%E!AA2WshJbDaHpC+a)r4g#OAedcW(G~wePde!9nV%#&@)3X^*!mbrk+u zRKsq(4<&!NWsd^78lE z@Bp^%-C|&DmHb#^o#B46la!(;#P0uK;RkSm2WK7tb{H3Md`y(+$XFHavR${*pMEWc zCpf2DA6=ey$d__*9xQmceN~RyF58_lRvI1GX_$|3wO~&_%j-u}3D)_=0-1};B}L66 zONFIZ24C+ylPsvCW9=pLL~FRRPpu!{=mr*7eh2ph&JRm3Oa@c?!4r#vhsVG>9-s7p znp*Bd*$Jc6=Z}d5!{xHA!_GH9&-!!IKE)gvc&wK$jA4xNM3wjTaCL$gXOsB_b}3jr z)>}5wZHbBYL~~^Po|`aG1mEt}Ru$lgMUKYl|m1A)!mlg$6fZWeG3iKbd#$aILj#i#NJhi#qqFw%xj&$i0jJm6#%G{_OMBWIT4s zw=;sj`!5Gsyu?rtsJ=KA==iIf{FTC{-8R6k!d{BUXD!E0ma<&f0b z%imXe@-h9^^+x}555C}6ekLmtffTmuInF_yG2?g>?ui=qbdSGqTBjP0WXcDfo6sg^ zpcf4gBxnzG8T@LXe{Ac6Z|i!!XYlm(eTZ{(wH80n;bp-r5lZ&KYj(g@N!H{g501#)$D5zC`bCA! zoS!DVO%XLrJhAWj+!i&Cha?bj2)T=I&@&qLn#Ao$j#In2DUeIj{OgsD(ej&z}7RM^!k=Bd03qt zUs2UN*ytxup;HwnzZr=E(-qbc77n4#_)Nj`vr(0J{wQ@)@BX8kq%XRbY~*tFZnGP3 zHq|ns_1|BUZ|QdWC7}ev2}2`Aa2Th8KdU=1?#h1(_v~?7{sp&X=jBL!%>LZ}rbG0W zz-jAp&~MD@pyZU5%>EAGEk7=uNpXAGtc+;2b7x{e<(&glJw_w$x!n%S{BYPm&nRNb za>5LkjREaps|1}uzdnGEKO)hS+{L#F3VJ+PBY=9jC$tJeoNfh z1Ba$gMIz07Ela{QI~n}Bl#{c{SNAxGnrCSwk(Lj|h*SwY*OYxqlPy^ zXx>&I_+8nhQ9m&viLHJO_=4VBOxX0~rfqt@!7&*JJ61Bk+vuiDT#{O@tlU(xXMJ$( z%4{*6SQT5TNk2;RYI}-C_i)Lq;Rm{XX^%xy`jLl%#2Y_lJXFQycXVG4?P`I%+}jl> zZ-s66Q`%lxFcd*|Xr8XikyvSpM9*+Dp+9H%V+U9h#Uc~rs?So_-n5$TksKS&LOT6k z!$z-ExODKXjLiElQL0vxTm8!pH(ciZ59>EMLeIHtI*NFIZC#$Gq4il!Ven93R_BX_ zJk1V%D;fSkOapK&?#4!9q={C1jbM_uFH|8n(RxF`!_t>FgXpc&MgW3hat5w)60~PX zw$jaI{`A;&6JuuaoX6H(6vg*>ddB?s9CH_GDJpM-J7dxnNX9+Ud<%?2F8lU_9<&02 z#QVT(l2su760tNvFKe>Yb9!iX}Y`d+2H^UxOE!TL5&QZ4cIlJ>62&l+256owlvqC9+8(|5^r96?5R6(=r!cU z$6D6$mWmC*Mnn0%Pl&DN6E|x!XQZK)enJY!A^XOn85>#juNEr-3WSBZj?Zg(t!Em{#5e&mPe}dPNCrCOnuqyr!@=!2uF~62dy2$cF1M38ac=lGDW7G7 zc3hG91c1md;}(g^v*qoY+Q8%^a>(fMZL-VBUv$1exSU z21yBbzV2m0yxanTxg09Aj#Z!o9sF$YurJ&)Y14buqh|dVLzQOO;GGHp;Q;seQI15{ zA%YL_=7<8V$#PCyNFlSy^US`41Oa6DhHoY(6NF9gK#`?%=1#md(CPGFST@083Fc8~ z@zZ&Q{RF!oD+%H5bgY+`nH4Rm55#=eJo8X6OyifAsAcUqg&pqs0S3^-ZzD)QeTv54 zXVVJ_pM~K65H4&MZ+0$@dbQrJR?28_Lk-+esM2^+;07+_#nDTEIsg8K$@nWu{7*qoO)Xm{KeM)% z2Y{4P@5&a|;ITKEK5UsV4YEI~R=WoK>(^P#!Rr0XIk~xnU}Xf>E$`odm6fgkS?4rV z$9o5(C0|*gAUgN;TFlYTMN*+qn+;us9~P)iEFBq}O%uej`>nJ?50!Q*suuCT%^TU6 z)fWJ(g&4r80vg=PX}-_FeDuq?2{ZHS6`o^aRzRpNhGb7;N@fhgwAdk8aKY;&((5&C zu%lY-I?*c7*-0=el3PkJr9UWZ(M>3y-^(5^Y&%*XDtTlzhdp1{63Os@U?oRA{cGa_ zBigDuHDA4#N$V*ZTi$%Y=tD)bJ^Jtsn-{tK%#zN2&?o*~pzLNfl>6O4qYTYgXFfGj zqWN~kY-5$3p&=Rp#a+wER&jiCQ?Z6G;An7OQFs#8SB6O7Km&U6AMQtHiVOd|wQa#5 zXOM%W3_>EU|9mZx1GOwool#P`v7!V{w4g7(&3`_r6@y%DuE=NcpiHJeq#hR*8l3%S@iWD~YTi>LXgm zL;63sO@G!EEBvIQJ!oE~AyL}>1!wG7EzP$^{<)_d_kmrPqmfF)lOLzrZq+k|130H) zf2P`iKg?~sWmU|s*NtN06SnZQjq%|>r zfc*CycCE+9Z@TyPvPu;qb}LEjs!eoiT_3K0#9ETO|CJ6U9ClO7Z2y5yWw~U``iw5O z$>&B2KHQ#Ux(}Y*>y-pzW)56scX{@UuOJXHdo2P&@#KPUGW0JR`rr*jtE8g zja@X!qXlEHY)mt-V+^DTkQd=Z>M6DyBdR+i(nmCP1Ehks%~9rRv^1D|;_+id8ujfZu#CbU?92x6{ltmZU z&_;NNghy=IPnPp5A1b$TToJnAbH{lFEN=7LKY(QTXpXTG;)+YDvbHw z7Lad7gq|xT+Bbx+_w5Bh6|o(hFQMR1itk^3&!K$g+UUZ7_Pp8I(&l2b@dKW@x@=Cb z&YS+X?K#pX@Aw@a;y0atDksR1ycZ9hog(HXtsZBgu)>OQo^3<-JiEG-KZ%)r>*8DZvQ2{2W zTbd**j6mR;QfMr^+dDL0h~czP@_&#w)7;b(6cO(XeqSDS?*}=at;_Pfd)J}k=5g}$ zj1o|&B;yA$X<`W|X{PHbd>2<|HUxDEWary>lu@ENBu6=;po|JI&i3D@T68me94M{2 z)~GtV)nrKCo4V6}1h3p0N;g0njyvL2kALZ-z;LzuGiiq0^~+;E+Y=so^A&g}r&cO^ zIpBnph1w9Q`7t^pb0@Zv>7E^pt==Vf!8kzxNr}ITKDm|!GIlO}Dqac`zTX|a_|398 z1L1Oke`u>F34d{yBtaJ@I-Gs>YUaQ6CGK5Lph`o`6nK4!7{)zoo9kfYI=Mq^v- znwm8RVLvAwtC97UhVHxM> z(thQ}&gVYEfoHa}nAS67F75r}p>M6XPv&~G>wn@mhh`q16z;GR_Qr-ct%N+5o`uTap{e2E0*ua) zttCb@?^0{+N`3~uo?Nv_rP+%s`wsT;O`gxeK)*5idk|6Ob4Di@7{horrN}X9-W5wj zDsXkh$UAr=-g@N{)EL>?e?jk`K9P=FU|wk#+t2yU|U~K^-@z%vgUA z)N@U{LkLj^Gj9Ukl`C$sY4p}C-+E4rs7(^a=PStfm>`l9ky?6N> z3zRwJadJL9QWod5+^@m>f_td$RPxfg;CYn}FA+xU4xirdH#n@agSpK{FN?7blyS$- z40xnhJ9watlN$*1F^Es76RYTQ+j&}6Zv#$>P|HNILdK`x352grH2_2`4-vG~HX~WG zQyl^-IQM+R4j*NIj8B@j@Upr11FDA3=y!@Sxoo!+%8njvmKbY7HeS7WAYS(;5GI7X zVxeHiQ}3K%d5wl6inl(U5|W=}9}mRMtu}vT)@5>C?I-O`{v1*J{5hZ?cCqpIMI(b! z#Ik*i?0{;G4J5x&2iN<2lRLWxr2>hW-e8D%W^o0?^E9hAX}>pmnmPHciyal(}fI%fYlrRkcDE(?iVPi>G&qEJgM<=%dC z7L|bS#+4hIO<+Jqp)Ov38-AN%_{(YwlkmZF;h{n)48|geQ6+K1l{d(m!D(qcH7>`r z4}0GIP8JS+nVW^Q94r)xo3|d9Y>L-R(^96TA7c3!I86=J#A`rOG$0c>HENyluC5O5 z-LJWDm1#zQ*dFS}BB)gtk~dg1H5VR^f@KeVtxqR%rHrH;od=BBzi^&7d>hdQ zV>*&EK8-DGJym>tAK0VBZUNJ|bhgqHE;_LJaVFaGSJt|Q)=^KP{h*1_nqvQ4H5o+s zJ|O#8c((e)yJYvHOjViJF_?5dcBmY|+2#U7$7_ zqD4pur76#%Dza(z)61Dos$`4^T}L4whbm+QDjYZdKQ@xu6Tnca}v%^RQTn zq)2@0Z{GJ8{;9t2i#kZ)h9mmF@~W!S2}ZtPYeoktTnNV&Vz}rXcOs8@>vyRrwC7~F zKY#v2rB{z8v}~t6iz+WY<0J312=MBjudt|j@%w6NiY7J)h1fl-st_iAc1_g~!rhZn z%n+V95Gl+r;f)324-3QMU0#K!DT@Pu2Y8_Cm66bY6 zs|V1$0YN4DANllTC_Rcf213xrvF!kYMHuaikEO48cOcx4jBxrE?4aBBDDUs#98gUf z0D2V+r^eq(00+(s;3+xeS6)sNfAGL}Bnt~beeKPDFX%GKDGCtLyQ!eAEORND+B5p8 z>fIE%PPoHnpN*)?9+je^qTAPh^PI|d;`Bld;ibIL+`T^aTqTbbfzPL`W=DC6i?__% zp7}qR5QLs#V`HZ)X1yxXtPW0vdfe{tA5|?><1`z342mx9!I6(VL)mqi2CM2FpR&(U z!KbteKXGwdwxr^Z1n2=MxI=eEFF;bcBBxEw=W01+llY8V{c((pjD||}MAacFLLNxq z*D$&`udI-+gt#Qtsd$6RI3Ou@=Z=Z}TxefV(&zdI?cDJq;KScz?l?#t9Ii#l+b_3b zEO_ig!O-UGHH+Zj;J8I9%s*udtsKPDA75lT<1dSE7q@4m=oyh-*55a{0Ze?7UFmYN z_^EC36=q)cDTZ3`aw9!CXT~-?J?}CH$rsE)*$R)jpSmYoiZ|4`BCro4%KLa)F|4s` zJL}7RYs0_{fa+yY-`NiY?w_+0}85J$jv|AtW9Ql@n{JpFgNQe#|}v(3B~( zjpf=lNIs@f-va0DR$-zw+P&bgC3@6(ojjkOedCCc4B_Tj8A$&8ZAlIESvkyKLjNvf ztZ1-rP#VB(`s82VT;^dizpcu%wjiLuu5NRO$qN|cnz}%>2&~sKf0a3x@Bag;bt~4x zt=2wA2K+St(g+LEWo4^f*hsn9DL1yLvERCGF5P+MQ0Dc|zWE5Zl*falP^S|>9 z%r9-A|9|gC9}arCAkMY(iyv4dJT))c^-ccU7X{~u#4;lgHIq!u(%%o=oj$=Vl};c4 z=t~r9ovjRJ8_GIU365WiGJV}MiV^^+vqwNjTlRu42y6opcMg=|APUL^#>Hy|BpU6|Fz#t>Gp>L=DTQMu|`cD1V-S=7?%!A zcG6C(f#a;!d6xo>lauqGpHMmN&hUcjmo;^k;+Q|PjnRL!)ft#?V1+_c;^WCc*Aq#A zFIFDh4O=@Qu|9GS;nVYIT=FP|=QZj|Dx!{w>PJ&poie`c#7(1F9DB6hTYoGF%1*&Q z@ip1q8YPn&7Bc%Mb)TIrSaL|0PA-H72K}G?bk0={9+HuDfv-F7DpM}$#6^=-Cv4EEi- znIJw}&DQjfirXguF5Sp?DMB%EaX`(+g?b>@mRkH$sbjS~kGh#aJD~tv@|CTO+OY&~ z(_gc&XP}ry4=CM$2VG{^d0!sCm*LnQO85FJJs)2zXt?uKy`=BgaF*&WjUri*UURC= ze4S#6wjd7$-aQhw=kOs*4#l*x= zZHopW2wc;_)RQ^idtD2I{JC*@-sEAg+Wc_{=lDeA4}+ErLEQ<(+XJyGB0m02qx;FP zp$zoR!L-xkA#DRd_QRzT_{OePQ)bY9x1yrL^;bqdU5ynv_;5AkSyQ@15J4(+qRhXD;=qf9++Q55wOc(y^#{7xE`>sQa={VP zbsOE^rb4DyuG73PSoFM3h{07DgB~~j{{BN5;#f8d4O<%#LV?AT9@iI6fU!+3Xipy$ z6m$gugn)zhZEwEbL;w50`~6KzRMoJ?YFZ3#zdpyqg|F=SVl+oK28Gi0DV^Mo0ZndL z3_HJ`EalB@UY#TErWa|!2EYX*SWcF(fFP}^R-};uIrxFz18yP|N*_lq;#oUVVJst$ z-)3FsxRnVxopS)%h@&n#=`;~92oLS#t!Uu6GWD^2P8OY)$Gi?;g}qI?zsC!u_4U)a zOb4j&LxUehfO)qeJRc>N$p>+)@eUFhcw`aLZhXlgF*-<$w6^nr_Pb{QjkKc*n$}cpW2^|{)w#My) zgAa+kmK(}G&EQFU`0!yWb+8N>E@rCth53T(DjopA?`|#EhD}tpu(e1|=Z-tVL)<;d8j6|)D?v;OM4!QAcXb!wK;0%rx#Twn2DB7m9iwe_g|y8( zN(Cw`jzW+A-SbI8Qqs*#We>Fih-G`%3Pze)uc-n23<9uzS_97^kJZ!zQBhHF6&f&` zN2m$l?d|>R#9bbLcni$+eAU9(X=`4~i8ZhWrKX6mY&`6sY&eb| zzZ5#YKHA>OfG2O+Mweb+b74?3$tCNz2NO#=PZ+iZ;3bRtq~0ZdYIJigWVdpswE2%v z4A9yiA7~eJVbR>Wb&JPh>E_+qp!zn?F=uDx5Sl{2!v9=Y;vpv55XEsiUyt_8&v%KVeSOig!BzLf zvTH#=pt{B8-5<}{HY)BAlBsJ^$Q)eZdT7AF!6Ao(i>qQWmY4B1iSO_G3a}8350+Z~ z9wR;d@j7^)s4lo|K4ReJ9tHn97(8YgS2riqrfI4~?HV@1pm|uf)pP|aoEhk7ADR;~1J3>zv5c-wz)t{MhI$nE y1-w}Q-FEN4E;ms80Utils + + diff --git a/docs/yaml_utils.html b/docs/yaml_utils.html index 57c285c2..185e5eae 100644 --- a/docs/yaml_utils.html +++ b/docs/yaml_utils.html @@ -262,7 +262,7 @@

YAML Utils

+ + diff --git a/nbdev_nbs/psm_reader/dia_psm_reader.ipynb b/nbdev_nbs/psm_reader/dia_psm_reader.ipynb index 94a6003e..bca7cc76 100644 --- a/nbdev_nbs/psm_reader/dia_psm_reader.ipynb +++ b/nbdev_nbs/psm_reader/dia_psm_reader.ipynb @@ -1219,15 +1219,6 @@ "display_name": "Python 3.8.3 ('base')", "language": "python", "name": "python3" - }, - "language_info": { - "name": "python", - "version": "3.8.3" - }, - "vscode": { - "interpreter": { - "hash": "8a3b27e141e49c996c9b863f8707e97aabd49c4a7e8445b9b783b34e4a21a9b2" - } } }, "nbformat": 4, diff --git a/nbdev_nbs/scoring/fdr.ipynb b/nbdev_nbs/scoring/fdr.ipynb index 993a76ba..0adb3800 100644 --- a/nbdev_nbs/scoring/fdr.ipynb +++ b/nbdev_nbs/scoring/fdr.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -13,7 +13,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# FDR" + "# FDR functionalities" ] }, { @@ -27,7 +27,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -39,7 +39,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -206,7 +206,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -336,7 +336,7 @@ "[1505 rows x 4 columns]" ] }, - "execution_count": 4, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -384,7 +384,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -514,7 +514,7 @@ "[500 rows x 4 columns]" ] }, - "execution_count": 5, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -525,7 +525,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -535,7 +535,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -579,7 +579,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -603,15 +603,7 @@ "name": "python3" }, "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", "version": "3.8.3" }, "vscode": { diff --git a/nbdev_nbs/scoring/feature_extraction_base.ipynb b/nbdev_nbs/scoring/feature_extraction_base.ipynb index 258ce3f1..edff5132 100644 --- a/nbdev_nbs/scoring/feature_extraction_base.ipynb +++ b/nbdev_nbs/scoring/feature_extraction_base.ipynb @@ -62,18 +62,93 @@ " Extract the scoring features (self._feature_list) \n", " and append them inplace into candidate PSMs (psm_df).\n", "\n", + " **All sub-classes must re-implement this method.**\n", + "\n", " Parameters\n", " ----------\n", " psm_df : pd.DataFrame\n", - " PSMs to be rescore.\n", + " PSMs to be rescored\n", "\n", " Returns\n", " -------\n", " pd.DataFrame\n", - " psm_df with appended the feature list extracted by this extractor.\n", + " psm_df with appended feature columns extracted by this extractor\n", " \"\"\"\n", " return psm_df\n" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| hide\n", + "from nbdev.showdoc import show_doc" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/markdown": [ + "---\n", + "\n", + "[source](https://github.com/MannLabs/alphabase/blob/main/alphabase/scoring/feature_extraction_base.py#L30){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### BaseFeatureExtractor.extract_features\n", + "\n", + "> BaseFeatureExtractor.extract_features\n", + "> (psm_df:pandas.core.frame.DataFram\n", + "> e, *args, **kwargs)\n", + "\n", + "Extract the scoring features (self._feature_list) \n", + "and append them inplace into candidate PSMs (psm_df).\n", + "\n", + "**All sub-classes must re-implement this method.**\n", + "\n", + "| | **Type** | **Details** |\n", + "| -- | -------- | ----------- |\n", + "| psm_df | DataFrame | PSMs to be rescored |\n", + "| args | | |\n", + "| kwargs | | |\n", + "| **Returns** | **DataFrame** | **psm_df with appended feature columns extracted by this extractor** |" + ], + "text/plain": [ + "---\n", + "\n", + "[source](https://github.com/MannLabs/alphabase/blob/main/alphabase/scoring/feature_extraction_base.py#L30){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### BaseFeatureExtractor.extract_features\n", + "\n", + "> BaseFeatureExtractor.extract_features\n", + "> (psm_df:pandas.core.frame.DataFram\n", + "> e, *args, **kwargs)\n", + "\n", + "Extract the scoring features (self._feature_list) \n", + "and append them inplace into candidate PSMs (psm_df).\n", + "\n", + "**All sub-classes must re-implement this method.**\n", + "\n", + "| | **Type** | **Details** |\n", + "| -- | -------- | ----------- |\n", + "| psm_df | DataFrame | PSMs to be rescored |\n", + "| args | | |\n", + "| kwargs | | |\n", + "| **Returns** | **DataFrame** | **psm_df with appended feature columns extracted by this extractor** |" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "show_doc(BaseFeatureExtractor.extract_features)" + ] } ], "metadata": { @@ -81,16 +156,6 @@ "display_name": "Python 3.8.3 ('base')", "language": "python", "name": "python3" - }, - "language_info": { - "name": "python", - "version": "3.8.3" - }, - "orig_nbformat": 4, - "vscode": { - "interpreter": { - "hash": "8a3b27e141e49c996c9b863f8707e97aabd49c4a7e8445b9b783b34e4a21a9b2" - } } }, "nbformat": 4, diff --git a/nbdev_nbs/scoring/ml_scoring_base.ipynb b/nbdev_nbs/scoring/ml_scoring_base.ipynb index c0238437..080d44c4 100644 --- a/nbdev_nbs/scoring/ml_scoring_base.ipynb +++ b/nbdev_nbs/scoring/ml_scoring_base.ipynb @@ -106,7 +106,7 @@ "```python\n", "class DiaNNRescoring(Percolator):\n", " def _train(self, train_t_df, train_d_df):\n", - " # No target filtration on FDR, which is the same as DiaNN but different in Percolator\n", + " # No target filtration on FDR, which is the same as DiaNN but different from Percolator\n", " #train_t_df = train_t_df[train_t_df.fdr<=self.fdr]\n", " train_df = pd.concat((train_t_df, train_d_df))\n", " train_label = np.ones(len(train_df),dtype=np.int32)\n", @@ -117,7 +117,7 @@ " train_label\n", " )\n", " def rescore(self, psm_df):\n", - " # We don't need iteration anymore, but cross validation may be still necessary\n", + " # We don't need iteration anymore, but cross validation is still necessary\n", " df = self._cv_score(df)\n", " return self._estimate_fdr(df)\n", "```\n", @@ -147,26 +147,35 @@ " self.cv_fold = 1\n", " self.iter_num = 1\n", "\n", + " self._base_features = ['score','nAA','charge']\n", + "\n", " @property\n", " def feature_list(self)->list:\n", - " \"\"\" The read-only property to get extracted feature_list \"\"\"\n", - " return self.feature_extractor.feature_list\n", + " \"\"\" Get extracted feature_list. Property, read-only \"\"\"\n", + " return list(set(\n", + " self._base_features+\n", + " self.feature_extractor.feature_list\n", + " ))\n", "\n", " @property\n", " def ml_model(self):\n", + " \"\"\" \n", + " ML model in Percolator.\n", + " It can be sklearn models or other models but implement \n", + " the methods `fit()` and `decision_function()` (or `predict_proba()`) \n", + " which are the same as sklearn models.\n", + " \"\"\"\n", " return self._ml_model\n", " \n", " @ml_model.setter\n", " def ml_model(self, model):\n", - " \"\"\" \n", - " `model` must be sklearn models or other models but implement \n", - " the same methods `fit()` and `decision_function()`/`predict_proba()` \n", - " as sklearn models\n", - " \"\"\"\n", " self._ml_model = model\n", "\n", " @property\n", " def feature_extractor(self)->BaseFeatureExtractor:\n", + " \"\"\"\n", + " The feature extractor inherited from `BaseFeatureExtractor`\n", + " \"\"\"\n", " return self._feature_extractor\n", " \n", " @feature_extractor.setter\n", @@ -202,7 +211,8 @@ " def rescore(self, \n", " df:pd.DataFrame\n", " )->pd.DataFrame:\n", - " \"\"\"Rescore\n", + " \"\"\"\n", + " Estimate ML scores and then FDRs (q-values)\n", "\n", " Parameters\n", " ----------\n", @@ -220,7 +230,61 @@ " df = self._estimate_fdr(df)\n", " return df\n", "\n", - " def run(self,\n", + " def run_rerank_workflow(self,\n", + " top_k_psm_df:pd.DataFrame,\n", + " rerank_column:str='spec_idx',\n", + " *args, **kwargs\n", + " )->pd.DataFrame:\n", + " \"\"\"\n", + " Run percolator workflow with reranking \n", + " the peptides for each spectrum.\n", + "\n", + " - self.extract_features()\n", + " - self.rescore()\n", + "\n", + " *args and **kwargs are used for \n", + " `self.feature_extractor.extract_features`.\n", + "\n", + " Parameters\n", + " ----------\n", + " top_k_psm_df : pd.DataFrame\n", + " PSM DataFrame\n", + "\n", + " rerank_column : str\n", + " The column use to rerank PSMs. \n", + " \n", + " For example, use the following code to select \n", + " the top-ranked peptide for each spectrum.\n", + " ```\n", + " rerank_column = 'spec_idx' # scan_num\n", + " idx = top_k_psm_df.groupby(\n", + " ['raw_name',rerank_column]\n", + " )['ml_score'].idxmax()\n", + " psm_df = top_k_psm_df.loc[idx].copy()\n", + " ```\n", + " Returns\n", + " -------\n", + " pd.DataFrame\n", + " Only top-scored PSM is returned for \n", + " each group of the `rerank_column`.\n", + " \"\"\"\n", + " top_k_psm_df = self.extract_features(\n", + " top_k_psm_df, *args, **kwargs\n", + " )\n", + " idxmax = top_k_psm_df.groupby(\n", + " ['raw_name',rerank_column]\n", + " )['ml_score'].idxmax()\n", + "\n", + " df = top_k_psm_df.loc[idxmax].copy()\n", + " self._train_and_score(df)\n", + "\n", + " top_k_psm_df = self._predict(top_k_psm_df)\n", + " idxmax = top_k_psm_df.groupby(\n", + " ['raw_name',rerank_column]\n", + " )['ml_score'].idxmax()\n", + " return top_k_psm_df.loc[idxmax].copy()\n", + "\n", + " def run_rescore_workflow(self,\n", " psm_df:pd.DataFrame,\n", " *args, **kwargs\n", " )->pd.DataFrame:\n", @@ -228,7 +292,7 @@ " Run percolator workflow:\n", "\n", " - self.extract_features()\n", - " - self.re_score()\n", + " - self.rescore()\n", "\n", " *args and **kwargs are used for \n", " `self.feature_extractor.extract_features`.\n", @@ -314,12 +378,12 @@ " ):\n", " train_t_df = train_t_df[train_t_df.fdr<=self.fdr]\n", "\n", - " if len(train_t_df) > self.max_train_sample:\n", + " if len(train_t_df) > self.max_training_sample:\n", " train_t_df = train_t_df.sample(\n", " n=self.max_training_sample, \n", " random_state=1337\n", " )\n", - " if len(train_d_df) > self.max_train_sample:\n", + " if len(train_d_df) > self.max_training_sample:\n", " train_d_df = train_d_df.sample(\n", " n=self.max_training_sample,\n", " random_state=1337\n", @@ -345,6 +409,28 @@ " )\n", " return test_df\n", "\n", + " def _train_and_score(self,\n", + " df:pd.DataFrame\n", + " )->pd.DataFrame:\n", + "\n", + " df_target = df[df.decoy == 0]\n", + " df_decoy = df[df.decoy != 0]\n", + "\n", + " if (\n", + " np.sum(df_target.fdr<=self.fdr) < \n", + " self.min_training_sample or\n", + " len(df_decoy) < self.min_training_sample\n", + " ):\n", + " return df\n", + " \n", + " self._train(df_target, df_decoy)\n", + " test_df = pd.concat(\n", + " [df_target, df_decoy],\n", + " ignore_index=True\n", + " )\n", + " \n", + " return self._predict(test_df)\n", + "\n", " def _cv_score(self, df:pd.DataFrame)->pd.DataFrame:\n", " \"\"\"\n", " Apply cross-validation for rescoring.\n", @@ -363,9 +449,14 @@ " pd.DataFrame\n", " PSMs after rescoring\n", " \"\"\"\n", + "\n", + " if self.cv_fold <= 1:\n", + " return self._train_and_score(df)\n", + "\n", " df = df.sample(\n", " frac=1, random_state=1337\n", " ).reset_index(drop=True)\n", + "\n", " df_target = df[df.decoy == 0]\n", " df_decoy = df[df.decoy != 0]\n", "\n", @@ -377,36 +468,826 @@ " ):\n", " return df\n", " \n", - " if self.cv_fold > 1:\n", - " test_df_list = []\n", - " for i in range(self.cv_fold):\n", - " t_mask = np.ones(len(df_target), dtype=bool)\n", - " _slice = slice(i, len(df_target), self.cv_fold)\n", - " t_mask[_slice] = False\n", - " train_t_df = df_target[t_mask]\n", - " test_t_df = df_target[_slice]\n", - " \n", - " d_mask = np.ones(len(df_decoy), dtype=bool)\n", - " _slice = slice(i, len(df_decoy), self.cv_fold)\n", - " d_mask[_slice] = False\n", - " train_d_df = df_decoy[d_mask]\n", - " test_d_df = df_decoy[_slice]\n", - "\n", - " self._train(train_t_df, train_d_df)\n", - "\n", - " test_df = pd.concat((test_t_df, test_d_df))\n", - " test_df_list.append(self._predict(test_df))\n", - " \n", - " return pd.concat(test_df_list, ignore_index=True)\n", - " else:\n", + " test_df_list = []\n", + " for i in range(self.cv_fold):\n", + " t_mask = np.ones(len(df_target), dtype=bool)\n", + " _slice = slice(i, len(df_target), self.cv_fold)\n", + " t_mask[_slice] = False\n", + " train_t_df = df_target[t_mask]\n", + " test_t_df = df_target[_slice]\n", + " \n", + " d_mask = np.ones(len(df_decoy), dtype=bool)\n", + " _slice = slice(i, len(df_decoy), self.cv_fold)\n", + " d_mask[_slice] = False\n", + " train_d_df = df_decoy[d_mask]\n", + " test_d_df = df_decoy[_slice]\n", "\n", - " self._train(df_target, df_decoy)\n", - " test_df = pd.concat((df_target, df_decoy),ignore_index=True)\n", - " \n", - " return self._predict(test_df)\n", + " self._train(train_t_df, train_d_df)\n", + "\n", + " test_df = pd.concat((test_t_df, test_d_df))\n", + " test_df_list.append(self._predict(test_df))\n", + " \n", + " return pd.concat(test_df_list, ignore_index=True)\n", " " ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| hide\n", + "from nbdev.showdoc import show_doc" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Properties of `Percolator`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/markdown": [ + "---\n", + "\n", + "[source](https://github.com/MannLabs/alphabase/blob/main/alphabase/scoring/ml_scoring_base.py#L46){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### Percolator.ml_model\n", + "\n", + "> Percolator.ml_model ()\n", + "\n", + "ML model in Percolator.\n", + "It can be sklearn models or other models but implement \n", + "the methods `fit()` and `decision_function()` (or `predict_proba()`) \n", + "which are the same as sklearn models." + ], + "text/plain": [ + "---\n", + "\n", + "[source](https://github.com/MannLabs/alphabase/blob/main/alphabase/scoring/ml_scoring_base.py#L46){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### Percolator.ml_model\n", + "\n", + "> Percolator.ml_model ()\n", + "\n", + "ML model in Percolator.\n", + "It can be sklearn models or other models but implement \n", + "the methods `fit()` and `decision_function()` (or `predict_proba()`) \n", + "which are the same as sklearn models." + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "show_doc(Percolator.ml_model)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/markdown": [ + "---\n", + "\n", + "[source](https://github.com/MannLabs/alphabase/blob/main/alphabase/scoring/ml_scoring_base.py#L66){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### Percolator.feature_extractor\n", + "\n", + "> Percolator.feature_extractor ()\n", + "\n", + "The feature extractor inherited from `BaseFeatureExtractor`" + ], + "text/plain": [ + "---\n", + "\n", + "[source](https://github.com/MannLabs/alphabase/blob/main/alphabase/scoring/ml_scoring_base.py#L66){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### Percolator.feature_extractor\n", + "\n", + "> Percolator.feature_extractor ()\n", + "\n", + "The feature extractor inherited from `BaseFeatureExtractor`" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "show_doc(Percolator.feature_extractor)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/markdown": [ + "---\n", + "\n", + "[source](https://github.com/MannLabs/alphabase/blob/main/alphabase/scoring/ml_scoring_base.py#L37){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### Percolator.feature_list\n", + "\n", + "> Percolator.feature_list ()\n", + "\n", + "Get extracted feature_list. Property, read-only" + ], + "text/plain": [ + "---\n", + "\n", + "[source](https://github.com/MannLabs/alphabase/blob/main/alphabase/scoring/ml_scoring_base.py#L37){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### Percolator.feature_list\n", + "\n", + "> Percolator.feature_list ()\n", + "\n", + "Get extracted feature_list. Property, read-only" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "show_doc(Percolator.feature_list)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Methods of `Percolator`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/markdown": [ + "---\n", + "\n", + "[source](https://github.com/MannLabs/alphabase/blob/main/alphabase/scoring/ml_scoring_base.py#L69){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### Percolator.extract_features\n", + "\n", + "> Percolator.extract_features (psm_df:pandas.core.frame.DataFrame, *args,\n", + "> **kwargs)\n", + "\n", + "Extract features for rescoring.\n", + "\n", + "*args and **kwargs are used for \n", + "`self.feature_extractor.extract_features`.\n", + "\n", + "| | **Type** | **Details** |\n", + "| -- | -------- | ----------- |\n", + "| psm_df | DataFrame | PSM DataFrame |\n", + "| args | | |\n", + "| kwargs | | |\n", + "| **Returns** | **DataFrame** | **psm_df with feature columns appended inplace.** |" + ], + "text/plain": [ + "---\n", + "\n", + "[source](https://github.com/MannLabs/alphabase/blob/main/alphabase/scoring/ml_scoring_base.py#L69){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### Percolator.extract_features\n", + "\n", + "> Percolator.extract_features (psm_df:pandas.core.frame.DataFrame, *args,\n", + "> **kwargs)\n", + "\n", + "Extract features for rescoring.\n", + "\n", + "*args and **kwargs are used for \n", + "`self.feature_extractor.extract_features`.\n", + "\n", + "| | **Type** | **Details** |\n", + "| -- | -------- | ----------- |\n", + "| psm_df | DataFrame | PSM DataFrame |\n", + "| args | | |\n", + "| kwargs | | |\n", + "| **Returns** | **DataFrame** | **psm_df with feature columns appended inplace.** |" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "show_doc(Percolator.extract_features)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/markdown": [ + "---\n", + "\n", + "[source](https://github.com/MannLabs/alphabase/blob/main/alphabase/scoring/ml_scoring_base.py#L95){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### Percolator.rescore\n", + "\n", + "> Percolator.rescore (df:pandas.core.frame.DataFrame)\n", + "\n", + "Estimate ML scores and then FDRs (q-values)\n", + "\n", + "| | **Type** | **Details** |\n", + "| -- | -------- | ----------- |\n", + "| df | DataFrame | psm_df |\n", + "| **Returns** | **DataFrame** | **psm_df with `ml_score` and `fdr` columns updated inplace** |" + ], + "text/plain": [ + "---\n", + "\n", + "[source](https://github.com/MannLabs/alphabase/blob/main/alphabase/scoring/ml_scoring_base.py#L95){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### Percolator.rescore\n", + "\n", + "> Percolator.rescore (df:pandas.core.frame.DataFrame)\n", + "\n", + "Estimate ML scores and then FDRs (q-values)\n", + "\n", + "| | **Type** | **Details** |\n", + "| -- | -------- | ----------- |\n", + "| df | DataFrame | psm_df |\n", + "| **Returns** | **DataFrame** | **psm_df with `ml_score` and `fdr` columns updated inplace** |" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "show_doc(Percolator.rescore)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/markdown": [ + "---\n", + "\n", + "[source](https://github.com/MannLabs/alphabase/blob/main/alphabase/scoring/ml_scoring_base.py#L171){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### Percolator.run_rescore_workflow\n", + "\n", + "> Percolator.run_rescore_workflow (psm_df:pandas.core.frame.DataFrame,\n", + "> *args, **kwargs)\n", + "\n", + "Run percolator workflow:\n", + "\n", + "- self.extract_features()\n", + "- self.rescore()\n", + "\n", + "*args and **kwargs are used for \n", + "`self.feature_extractor.extract_features`.\n", + "\n", + "| | **Type** | **Details** |\n", + "| -- | -------- | ----------- |\n", + "| psm_df | DataFrame | PSM DataFrame |\n", + "| args | | |\n", + "| kwargs | | |\n", + "| **Returns** | **DataFrame** | **psm_df with feature columns appended inplace.** |" + ], + "text/plain": [ + "---\n", + "\n", + "[source](https://github.com/MannLabs/alphabase/blob/main/alphabase/scoring/ml_scoring_base.py#L171){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### Percolator.run_rescore_workflow\n", + "\n", + "> Percolator.run_rescore_workflow (psm_df:pandas.core.frame.DataFrame,\n", + "> *args, **kwargs)\n", + "\n", + "Run percolator workflow:\n", + "\n", + "- self.extract_features()\n", + "- self.rescore()\n", + "\n", + "*args and **kwargs are used for \n", + "`self.feature_extractor.extract_features`.\n", + "\n", + "| | **Type** | **Details** |\n", + "| -- | -------- | ----------- |\n", + "| psm_df | DataFrame | PSM DataFrame |\n", + "| args | | |\n", + "| kwargs | | |\n", + "| **Returns** | **DataFrame** | **psm_df with feature columns appended inplace.** |" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "show_doc(Percolator.run_rescore_workflow)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/markdown": [ + "---\n", + "\n", + "[source](https://github.com/MannLabs/alphabase/blob/main/alphabase/scoring/ml_scoring_base.py#L117){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### Percolator.run_rerank_workflow\n", + "\n", + "> Percolator.run_rerank_workflow (top_k_psm_df:pandas.core.frame.DataFrame,\n", + "> rerank_column:str='spec_idx', *args,\n", + "> **kwargs)\n", + "\n", + "Run percolator workflow with reranking \n", + "the peptides for each spectrum.\n", + "\n", + "- self.extract_features()\n", + "- self.rescore()\n", + "\n", + "*args and **kwargs are used for \n", + "`self.feature_extractor.extract_features`.\n", + "\n", + "| | **Type** | **Default** | **Details** |\n", + "| -- | -------- | ----------- | ----------- |\n", + "| top_k_psm_df | DataFrame | | PSM DataFrame |\n", + "| rerank_column | str | spec_idx | The column use to rerank PSMs.

For example, use the following code to select
the top-ranked peptide for each spectrum.
```
rerank_column = 'spec_idx' # scan_num
idx = top_k_psm_df.groupby(
['raw_name',rerank_column]
)['ml_score'].idxmax()
psm_df = top_k_psm_df.loc[idx].copy()
``` |\n", + "| args | | | |\n", + "| kwargs | | | |\n", + "| **Returns** | **DataFrame** | | |" + ], + "text/plain": [ + "---\n", + "\n", + "[source](https://github.com/MannLabs/alphabase/blob/main/alphabase/scoring/ml_scoring_base.py#L117){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### Percolator.run_rerank_workflow\n", + "\n", + "> Percolator.run_rerank_workflow (top_k_psm_df:pandas.core.frame.DataFrame,\n", + "> rerank_column:str='spec_idx', *args,\n", + "> **kwargs)\n", + "\n", + "Run percolator workflow with reranking \n", + "the peptides for each spectrum.\n", + "\n", + "- self.extract_features()\n", + "- self.rescore()\n", + "\n", + "*args and **kwargs are used for \n", + "`self.feature_extractor.extract_features`.\n", + "\n", + "| | **Type** | **Default** | **Details** |\n", + "| -- | -------- | ----------- | ----------- |\n", + "| top_k_psm_df | DataFrame | | PSM DataFrame |\n", + "| rerank_column | str | spec_idx | The column use to rerank PSMs.

For example, use the following code to select
the top-ranked peptide for each spectrum.
```
rerank_column = 'spec_idx' # scan_num
idx = top_k_psm_df.groupby(
['raw_name',rerank_column]
)['ml_score'].idxmax()
psm_df = top_k_psm_df.loc[idx].copy()
``` |\n", + "| args | | | |\n", + "| kwargs | | | |\n", + "| **Returns** | **DataFrame** | | |" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "show_doc(Percolator.run_rerank_workflow)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Simple Examples" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
scorenAAchargedecoyspec_idxraw_nameml_scorefdr
099.851979263018raw138.1427660.000000
198.74605273012raw133.8677790.000000
297.415167162016raw133.4477610.000000
396.857314143015raw131.8773180.000000
494.606208173048raw128.7857130.000000
...........................
1950.346523182189raw-17.0086490.979798
1960.703782153182raw-17.2927480.989899
1970.058571223177raw-17.3522931.000000
1980.90198392164raw-17.3577041.000000
1990.32037882031raw-18.3954211.000000
\n", + "

200 rows × 8 columns

\n", + "
" + ], + "text/plain": [ + " score nAA charge decoy spec_idx raw_name ml_score fdr\n", + "0 99.851979 26 3 0 18 raw 138.142766 0.000000\n", + "1 98.746052 7 3 0 12 raw 133.867779 0.000000\n", + "2 97.415167 16 2 0 16 raw 133.447761 0.000000\n", + "3 96.857314 14 3 0 15 raw 131.877318 0.000000\n", + "4 94.606208 17 3 0 48 raw 128.785713 0.000000\n", + ".. ... ... ... ... ... ... ... ...\n", + "195 0.346523 18 2 1 89 raw -17.008649 0.979798\n", + "196 0.703782 15 3 1 82 raw -17.292748 0.989899\n", + "197 0.058571 22 3 1 77 raw -17.352293 1.000000\n", + "198 0.901983 9 2 1 64 raw -17.357704 1.000000\n", + "199 0.320378 8 2 0 31 raw -18.395421 1.000000\n", + "\n", + "[200 rows x 8 columns]" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.DataFrame({\n", + " 'score': list(np.random.uniform(0,100,100))+list(np.random.uniform(0,10,100)),\n", + " 'nAA': list(np.random.randint(7,30,200)),\n", + " 'charge': list(np.random.randint(2,4,200)),\n", + " 'decoy': [0]*100+[1]*100,\n", + " 'spec_idx': np.repeat(np.arange(100),2),\n", + " 'raw_name': 'raw',\n", + "})\n", + "perc = Percolator()\n", + "perc.min_training_sample = 10\n", + "perc.run_rescore_workflow(df)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
scorenAAchargedecoyspec_idxraw_nameml_scorefdr
5444.98600025200raw23.2398710.000000
694.0206587301raw61.7629730.000000
7323.02806814202raw5.3460260.000000
1779.16353728303raw50.5846930.000000
3661.67392323204raw36.5007280.000000
...........................
1702.30608673195raw-11.4759200.744898
1058.10719282196raw-6.7650490.191011
929.717331103197raw-5.4596660.044944
1434.381494293198raw-9.1000270.565217
1305.831152263199raw-8.0403860.423913
\n", + "

100 rows × 8 columns

\n", + "
" + ], + "text/plain": [ + " score nAA charge decoy spec_idx raw_name ml_score fdr\n", + "54 44.986000 25 2 0 0 raw 23.239871 0.000000\n", + "6 94.020658 7 3 0 1 raw 61.762973 0.000000\n", + "73 23.028068 14 2 0 2 raw 5.346026 0.000000\n", + "17 79.163537 28 3 0 3 raw 50.584693 0.000000\n", + "36 61.673923 23 2 0 4 raw 36.500728 0.000000\n", + ".. ... ... ... ... ... ... ... ...\n", + "170 2.306086 7 3 1 95 raw -11.475920 0.744898\n", + "105 8.107192 8 2 1 96 raw -6.765049 0.191011\n", + "92 9.717331 10 3 1 97 raw -5.459666 0.044944\n", + "143 4.381494 29 3 1 98 raw -9.100027 0.565217\n", + "130 5.831152 26 3 1 99 raw -8.040386 0.423913\n", + "\n", + "[100 rows x 8 columns]" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "perc.run_rerank_workflow(df, rerank_column='spec_idx')" + ] + }, { "cell_type": "code", "execution_count": null, @@ -420,24 +1301,6 @@ "display_name": "Python 3.8.3 ('base')", "language": "python", "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.3" - }, - "orig_nbformat": 4, - "vscode": { - "interpreter": { - "hash": "8a3b27e141e49c996c9b863f8707e97aabd49c4a7e8445b9b783b34e4a21a9b2" - } } }, "nbformat": 4, diff --git a/nbdev_nbs/sidebar.yml b/nbdev_nbs/sidebar.yml index b4c2cebb..53028637 100644 --- a/nbdev_nbs/sidebar.yml +++ b/nbdev_nbs/sidebar.yml @@ -31,7 +31,15 @@ website: - psm_reader/msfragger_reader.ipynb - psm_reader/pfind_reader.ipynb - psm_reader/psm_reader.ipynb + - section: scoring + contents: + - scoring/fdr.ipynb + - scoring/feature_extraction_base.ipynb + - scoring/ml_scoring_base.ipynb - section: spectral_library contents: - spectral_library/decoy_library.ipynb - - spectral_library/library_base.ipynb \ No newline at end of file + - spectral_library/library_base.ipynb + - section: statistics + contents: + - statistics/regression.ipynb \ No newline at end of file From 5f06b3ce5b3c3f81db2ab4bcb010edb6185c03ca Mon Sep 17 00:00:00 2001 From: GeorgWa Date: Wed, 12 Oct 2022 11:45:33 +0200 Subject: [PATCH 09/52] updated weight function --- alphabase/_modidx.py | 10 +- alphabase/statistics/regression.py | 66 +++++++----- nbdev_nbs/_quarto.yml | 3 - nbdev_nbs/sidebar.yml | 5 +- nbdev_nbs/statistics/regression.ipynb | 140 ++++++++++++++++++++------ 5 files changed, 165 insertions(+), 59 deletions(-) diff --git a/alphabase/_modidx.py b/alphabase/_modidx.py index bd864a2f..34016410 100644 --- a/alphabase/_modidx.py +++ b/alphabase/_modidx.py @@ -493,8 +493,14 @@ 'alphabase/statistics/regression.py'), 'alphabase.statistics.regression.LOESSRegression.set_params': ( 'statistics/regression.html#loessregression.set_params', 'alphabase/statistics/regression.py'), - 'alphabase.statistics.regression.LOESSRegression.tricubic': ( 'statistics/regression.html#loessregression.tricubic', - 'alphabase/statistics/regression.py')}, + 'alphabase.statistics.regression.apply_kernel': ( 'statistics/regression.html#apply_kernel', + 'alphabase/statistics/regression.py'), + 'alphabase.statistics.regression.left_open_tricubic': ( 'statistics/regression.html#left_open_tricubic', + 'alphabase/statistics/regression.py'), + 'alphabase.statistics.regression.right_open_tricubic': ( 'statistics/regression.html#right_open_tricubic', + 'alphabase/statistics/regression.py'), + 'alphabase.statistics.regression.tricubic': ( 'statistics/regression.html#tricubic', + 'alphabase/statistics/regression.py')}, 'alphabase.utils': { 'alphabase.utils._flatten': ('utils.html#_flatten', 'alphabase/utils.py'), 'alphabase.utils.explode_multiple_columns': ('utils.html#explode_multiple_columns', 'alphabase/utils.py'), 'alphabase.utils.process_bar': ('utils.html#process_bar', 'alphabase/utils.py')}, diff --git a/alphabase/statistics/regression.py b/alphabase/statistics/regression.py index ad457386..dcbe61cf 100644 --- a/alphabase/statistics/regression.py +++ b/alphabase/statistics/regression.py @@ -1,7 +1,7 @@ # AUTOGENERATED! DO NOT EDIT! File to edit: ../../nbdev_nbs/statistics/regression.ipynb. # %% auto 0 -__all__ = ['EPSILON', 'LOESSRegression'] +__all__ = ['EPSILON', 'LOESSRegression', 'apply_kernel', 'tricubic', 'left_open_tricubic', 'right_open_tricubic'] # %% ../../nbdev_nbs/statistics/regression.ipynb 2 import numpy as np @@ -231,36 +231,54 @@ def get_weight_matrix(self, x: np.ndarray): """ w = np.tile(x,(1,self.n_kernels)) - w = np.abs(w - self.scale_mean) + w = w - self.scale_mean w = w/self.scale_max - + # apply weighting kernel - w = self.tricubic(w) + w = apply_kernel(w) + + w = w/np.sum(w, axis=1, keepdims=True) + + return w - #perform epsilon padding at the start and end of the weight matrix to allow for extrapolation. - # START - idx_values = np.where(w[:,0] > 0)[0] - if len(idx_values) > 0: - min_idx, max_idx = idx_values[[0, -1]] - w[:min_idx,0] = EPSILON - # END - idx_values = np.where(w[:,-1] > 0)[0] - if len(idx_values) > 0: - min_idx, max_idx = idx_values[[0, -1]] - w[max_idx:,-1] = EPSILON +def apply_kernel(w): - # normalize column wise + num_cols = w.shape[1] - w = w/np.sum(w, axis=1, keepdims=True) + if num_cols == 1: + return np.ones(w.shape) + + if num_cols == 2: + w[:,0] = left_open_tricubic(w[:,0]) + w[:,1] = right_open_tricubic(w[:,1]) return w - - @staticmethod - def tricubic(x): - """tricubic weight kernel""" - epsilon = EPSILON - mask = np.abs(x) <= 1 - return mask * (np.power(1-np.power(np.abs(x),3),3) + epsilon) + if num_cols > 2 : + w[:,0] = left_open_tricubic(w[:,0]) + w[:,1:-1] = tricubic(w[:,1:-1]) + w[:,-1] = right_open_tricubic(w[:,-1]) + + return w + +def tricubic(x): + """tricubic weight kernel""" + epsilon = EPSILON + mask = np.abs(x) <= 1 + return mask * (np.power(1-np.power(np.abs(x),3),3) + epsilon) + + +def left_open_tricubic(x): + """tricubic weight kernel which weights assigns 1 to values x < 0""" + y = tricubic(x) + y[x < 0] = 1 + return y + + +def right_open_tricubic(x): + """tricubic weight kernel which weights assigns 1 to values x > 0""" + y = tricubic(x) + y[x > 0] = 1 + return y diff --git a/nbdev_nbs/_quarto.yml b/nbdev_nbs/_quarto.yml index 7ffb44c1..0a6dfcb2 100644 --- a/nbdev_nbs/_quarto.yml +++ b/nbdev_nbs/_quarto.yml @@ -14,9 +14,6 @@ website: navbar: background: primary search: true - right: - - icon: github - href: "https://github.com/MannLabs/alphabase" sidebar: style: floating diff --git a/nbdev_nbs/sidebar.yml b/nbdev_nbs/sidebar.yml index b4c2cebb..32d574b8 100644 --- a/nbdev_nbs/sidebar.yml +++ b/nbdev_nbs/sidebar.yml @@ -34,4 +34,7 @@ website: - section: spectral_library contents: - spectral_library/decoy_library.ipynb - - spectral_library/library_base.ipynb \ No newline at end of file + - spectral_library/library_base.ipynb + - section: statistics + contents: + - statistics/regression.ipynb \ No newline at end of file diff --git a/nbdev_nbs/statistics/regression.ipynb b/nbdev_nbs/statistics/regression.ipynb index fbcdcc99..07c97a85 100644 --- a/nbdev_nbs/statistics/regression.ipynb +++ b/nbdev_nbs/statistics/regression.ipynb @@ -20,7 +20,16 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/georgwallmann/miniconda3/envs/alphadia/lib/python3.8/site-packages/scipy/__init__.py:146: UserWarning: A NumPy version >=1.16.5 and <1.23.0 is required for this version of SciPy (detected version 1.23.1\n", + " warnings.warn(f\"A NumPy version >={np_minversion} and <{np_maxversion}\"\n" + ] + } + ], "source": [ "#| export\n", "import numpy as np\n", @@ -267,38 +276,56 @@ " \"\"\"\n", " w = np.tile(x,(1,self.n_kernels))\n", "\n", - " w = np.abs(w - self.scale_mean)\n", + " w = w - self.scale_mean\n", " w = w/self.scale_max\n", - " \n", + "\n", " # apply weighting kernel\n", - " w = self.tricubic(w)\n", + " w = apply_kernel(w)\n", + "\n", + " w = w/np.sum(w, axis=1, keepdims=True)\n", + "\n", + " return w\n", " \n", - " #perform epsilon padding at the start and end of the weight matrix to allow for extrapolation.\n", - " # START\n", - " idx_values = np.where(w[:,0] > 0)[0]\n", - " if len(idx_values) > 0:\n", - " min_idx, max_idx = idx_values[[0, -1]]\n", - " w[:min_idx,0] = EPSILON\n", "\n", - " # END\n", - " idx_values = np.where(w[:,-1] > 0)[0]\n", - " if len(idx_values) > 0:\n", - " min_idx, max_idx = idx_values[[0, -1]]\n", - " w[max_idx:,-1] = EPSILON\n", + "def apply_kernel(w):\n", "\n", - " # normalize column wise\n", + " num_cols = w.shape[1]\n", "\n", - " w = w/np.sum(w, axis=1, keepdims=True)\n", + " if num_cols == 1:\n", + " return np.ones(w.shape)\n", + "\n", + " if num_cols == 2:\n", + " w[:,0] = left_open_tricubic(w[:,0])\n", + " w[:,1] = right_open_tricubic(w[:,1])\n", "\n", " return w\n", - " \n", "\n", - " @staticmethod\n", - " def tricubic(x):\n", - " \"\"\"tricubic weight kernel\"\"\"\n", - " epsilon = EPSILON\n", - " mask = np.abs(x) <= 1\n", - " return mask * (np.power(1-np.power(np.abs(x),3),3) + epsilon)\n", + " if num_cols > 2 :\n", + " w[:,0] = left_open_tricubic(w[:,0])\n", + " w[:,1:-1] = tricubic(w[:,1:-1])\n", + " w[:,-1] = right_open_tricubic(w[:,-1])\n", + "\n", + " return w\n", + " \n", + "def tricubic(x):\n", + " \"\"\"tricubic weight kernel\"\"\"\n", + " epsilon = EPSILON\n", + " mask = np.abs(x) <= 1\n", + " return mask * (np.power(1-np.power(np.abs(x),3),3) + epsilon)\n", + "\n", + "\n", + "def left_open_tricubic(x):\n", + " \"\"\"tricubic weight kernel which weights assigns 1 to values x < 0\"\"\"\n", + " y = tricubic(x)\n", + " y[x < 0] = 1\n", + " return y\n", + "\n", + "\n", + "def right_open_tricubic(x):\n", + " \"\"\"tricubic weight kernel which weights assigns 1 to values x > 0\"\"\"\n", + " y = tricubic(x)\n", + " y[x > 0] = 1\n", + " return y\n", " " ] }, @@ -373,7 +400,7 @@ "text/markdown": [ "---\n", "\n", - "[source](https://github.com/MannLabs/alphabase/blob/main/alphabase/statistics/regression.py#L156){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "[source](https://github.com/MannLabs/alphabase/blob/main/alphabase/statistics/regression.py#L184){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", "\n", "### LOESSRegression.predict\n", "\n", @@ -389,7 +416,7 @@ "text/plain": [ "---\n", "\n", - "[source](https://github.com/MannLabs/alphabase/blob/main/alphabase/statistics/regression.py#L156){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "[source](https://github.com/MannLabs/alphabase/blob/main/alphabase/statistics/regression.py#L184){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", "\n", "### LOESSRegression.predict\n", "\n", @@ -445,7 +472,7 @@ "outputs": [ { "data": { - "image/png": "", + "image/png": "", "text/plain": [ "
" ] @@ -455,7 +482,7 @@ } ], "source": [ - "\n", + "np.set_printoptions(formatter={'float': lambda x: \"{0:0.3f}\".format(x)})\n", "def noisy_1d(x):\n", " y = np.sin(x)\n", " y_err = np.random.normal(y,0.5)\n", @@ -472,6 +499,51 @@ "plt.show()" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Weight function" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "fig, axs = plt.subplots(nrows=3,sharex=True)\n", + "\n", + "x = np.linspace(-2,2,200)\n", + "y = left_open_tricubic(x)\n", + "axs[0].plot(x,y)\n", + "axs[0].set_title('left_open_tricubic')\n", + "\n", + "x = np.linspace(-2,2,200)\n", + "y = tricubic(x)\n", + "axs[1].plot(x,y)\n", + "axs[1].set_title('tricubic')\n", + "\n", + "x = np.linspace(-2,2,200)\n", + "y = right_open_tricubic(x)\n", + "axs[2].plot(x,y)\n", + "axs[2].set_title('right_open_tricubic')\n", + "\n", + "fig.tight_layout()\n", + "plt.show()" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -516,12 +588,22 @@ "x_test = np.linspace(-10,25,10)\n", "y_test = LOESSRegression(n_kernels=4, polynomial_degree=2).fit(x_train, y_train).predict(x_test)\n", "\n", + "\n", + "\n", "# single datapoint inference\n", "\n", "x_train = np.linspace(0,15,100)\n", "y_train = noisy_1d(x_train)\n", "x_test = np.linspace(10,10,1)\n", - "y_test = LOESSRegression(n_kernels=4, polynomial_degree=2).fit(x_train, y_train).predict(x_test)" + "y_test = LOESSRegression(n_kernels=4, polynomial_degree=2).fit(x_train, y_train).predict(x_test)\n", + "\n", + "# extrapolation when no other points are interpolated\n", + "\n", + "x_train = np.linspace(0,15,60)\n", + "y_train = y = np.sin(x_train)\n", + "x_test = np.linspace(-2,-1,1)\n", + "y_test = LOESSRegression(n_kernels=4, polynomial_degree=2).fit(x_train, y_train).predict(x_test)\n", + "assert (-2.604 - y_test) < 0.01" ] } ], From b653ecaa8009e9187c3358f22d81d1d56e85a529 Mon Sep 17 00:00:00 2001 From: "Zeng, Wen-Feng" Date: Wed, 12 Oct 2022 13:56:54 +0200 Subject: [PATCH 10/52] FE:update_features --- alphabase/_modidx.py | 4 +- alphabase/scoring/feature_extraction_base.py | 17 +++++ alphabase/scoring/ml_scoring_base.py | 9 +-- nbdev_nbs/scoring/fdr.ipynb | 9 --- .../scoring/feature_extraction_base.ipynb | 68 +++++++++++++++++++ nbdev_nbs/scoring/ml_scoring_base.ipynb | 18 +++-- 6 files changed, 101 insertions(+), 24 deletions(-) diff --git a/alphabase/_modidx.py b/alphabase/_modidx.py index 8f4978ef..66b0f76b 100644 --- a/alphabase/_modidx.py +++ b/alphabase/_modidx.py @@ -401,7 +401,9 @@ 'alphabase.scoring.feature_extraction_base.BaseFeatureExtractor.extract_features': ( 'scoring/feature_extraction_base.html#basefeatureextractor.extract_features', 'alphabase/scoring/feature_extraction_base.py'), 'alphabase.scoring.feature_extraction_base.BaseFeatureExtractor.feature_list': ( 'scoring/feature_extraction_base.html#basefeatureextractor.feature_list', - 'alphabase/scoring/feature_extraction_base.py')}, + 'alphabase/scoring/feature_extraction_base.py'), + 'alphabase.scoring.feature_extraction_base.BaseFeatureExtractor.update_features': ( 'scoring/feature_extraction_base.html#basefeatureextractor.update_features', + 'alphabase/scoring/feature_extraction_base.py')}, 'alphabase.scoring.ml_scoring_base': { 'alphabase.scoring.ml_scoring_base.Percolator': ( 'scoring/ml_scoring_base.html#percolator', 'alphabase/scoring/ml_scoring_base.py'), 'alphabase.scoring.ml_scoring_base.Percolator.__init__': ( 'scoring/ml_scoring_base.html#percolator.__init__', diff --git a/alphabase/scoring/feature_extraction_base.py b/alphabase/scoring/feature_extraction_base.py index 5d624c5f..1916e81f 100644 --- a/alphabase/scoring/feature_extraction_base.py +++ b/alphabase/scoring/feature_extraction_base.py @@ -49,3 +49,20 @@ def extract_features(self, """ return psm_df + def update_features(self,psm_df:pd.DataFrame)->pd.DataFrame: + """ + This method allow us to update adaptive features + during the iteration of Percolator algorithm + + Parameters + ---------- + psm_df : pd.DataFrame + psm_df + + Returns + ------- + pd.DataFrame + psm_df with updated feature values + """ + return psm_df + diff --git a/alphabase/scoring/ml_scoring_base.py b/alphabase/scoring/ml_scoring_base.py index 5bcbe809..b04ad74e 100644 --- a/alphabase/scoring/ml_scoring_base.py +++ b/alphabase/scoring/ml_scoring_base.py @@ -25,7 +25,7 @@ def __init__(self): self._ml_model = LogisticRegression() self.fdr_level = 'psm' # psm, precursor, peptide, or sequence - self.fdr = 0.01 + self.training_fdr = 0.01 self.per_raw_fdr = False self.max_training_sample = 200000 @@ -113,6 +113,7 @@ def rescore(self, for i in range(self.iter_num): df = self._cv_score(df) df = self._estimate_fdr(df, 'psm', False) + df = self.feature_extractor.update_features(df) df = self._estimate_fdr(df) return df @@ -262,7 +263,7 @@ def _train(self, train_t_df:pd.DataFrame, train_d_df:pd.DataFrame ): - train_t_df = train_t_df[train_t_df.fdr<=self.fdr] + train_t_df = train_t_df[train_t_df.fdr<=self.training_fdr] if len(train_t_df) > self.max_training_sample: train_t_df = train_t_df.sample( @@ -303,7 +304,7 @@ def _train_and_score(self, df_decoy = df[df.decoy != 0] if ( - np.sum(df_target.fdr<=self.fdr) < + np.sum(df_target.fdr<=self.training_fdr) < self.min_training_sample or len(df_decoy) < self.min_training_sample ): @@ -347,7 +348,7 @@ def _cv_score(self, df:pd.DataFrame)->pd.DataFrame: df_decoy = df[df.decoy != 0] if ( - np.sum(df_target.fdrpd.DataFrame:\n", + " \"\"\"\n", + " This method allow us to update adaptive features\n", + " during the iteration of Percolator algorithm\n", + "\n", + " Parameters\n", + " ----------\n", + " psm_df : pd.DataFrame\n", + " psm_df\n", + " \n", + " Returns\n", + " -------\n", + " pd.DataFrame\n", + " psm_df with updated feature values\n", + " \"\"\"\n", " return psm_df\n" ] }, @@ -149,6 +166,57 @@ "source": [ "show_doc(BaseFeatureExtractor.extract_features)" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/markdown": [ + "---\n", + "\n", + "[source](https://github.com/MannLabs/alphabase/blob/main/alphabase/scoring/feature_extraction_base.py#L52){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### BaseFeatureExtractor.update_features\n", + "\n", + "> BaseFeatureExtractor.update_features (psm_df:pandas.core.frame.DataFrame)\n", + "\n", + "This method allow us to update adaptive features\n", + "during the iteration of Percolator algorithm\n", + "\n", + "| | **Type** | **Details** |\n", + "| -- | -------- | ----------- |\n", + "| psm_df | DataFrame | psm_df |\n", + "| **Returns** | **DataFrame** | **psm_df with updated feature values** |" + ], + "text/plain": [ + "---\n", + "\n", + "[source](https://github.com/MannLabs/alphabase/blob/main/alphabase/scoring/feature_extraction_base.py#L52){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### BaseFeatureExtractor.update_features\n", + "\n", + "> BaseFeatureExtractor.update_features (psm_df:pandas.core.frame.DataFrame)\n", + "\n", + "This method allow us to update adaptive features\n", + "during the iteration of Percolator algorithm\n", + "\n", + "| | **Type** | **Details** |\n", + "| -- | -------- | ----------- |\n", + "| psm_df | DataFrame | psm_df |\n", + "| **Returns** | **DataFrame** | **psm_df with updated feature values** |" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "show_doc(BaseFeatureExtractor.update_features)" + ] } ], "metadata": { diff --git a/nbdev_nbs/scoring/ml_scoring_base.ipynb b/nbdev_nbs/scoring/ml_scoring_base.ipynb index 080d44c4..439db435 100644 --- a/nbdev_nbs/scoring/ml_scoring_base.ipynb +++ b/nbdev_nbs/scoring/ml_scoring_base.ipynb @@ -105,12 +105,9 @@ "\n", "```python\n", "class DiaNNRescoring(Percolator):\n", - " def _train(self, train_t_df, train_d_df):\n", - " # No target filtration on FDR, which is the same as DiaNN but different from Percolator\n", - " #train_t_df = train_t_df[train_t_df.fdr<=self.fdr]\n", - " train_df = pd.concat((train_t_df, train_d_df))\n", - " train_label = np.ones(len(train_df),dtype=np.int32)\n", - " train_label[len(train_t_df):] = 0\n", + " def __init__(self):\n", + " super().__init__()\n", + " self.training_fdr = 100000 # disable target filtration on FDR, which is the same as DiaNN but different from Percolator\n", "\n", " self._ml_model.fit(\n", " train_df[self.feature_list].values, \n", @@ -139,7 +136,7 @@ " self._ml_model = LogisticRegression()\n", " \n", " self.fdr_level = 'psm' # psm, precursor, peptide, or sequence\n", - " self.fdr = 0.01\n", + " self.training_fdr = 0.01\n", " self.per_raw_fdr = False\n", "\n", " self.max_training_sample = 200000\n", @@ -227,6 +224,7 @@ " for i in range(self.iter_num):\n", " df = self._cv_score(df)\n", " df = self._estimate_fdr(df, 'psm', False)\n", + " df = self.feature_extractor.update_features(df)\n", " df = self._estimate_fdr(df)\n", " return df\n", "\n", @@ -376,7 +374,7 @@ " train_t_df:pd.DataFrame, \n", " train_d_df:pd.DataFrame\n", " ):\n", - " train_t_df = train_t_df[train_t_df.fdr<=self.fdr]\n", + " train_t_df = train_t_df[train_t_df.fdr<=self.training_fdr]\n", "\n", " if len(train_t_df) > self.max_training_sample:\n", " train_t_df = train_t_df.sample(\n", @@ -417,7 +415,7 @@ " df_decoy = df[df.decoy != 0]\n", "\n", " if (\n", - " np.sum(df_target.fdr<=self.fdr) < \n", + " np.sum(df_target.fdr<=self.training_fdr) < \n", " self.min_training_sample or\n", " len(df_decoy) < self.min_training_sample\n", " ):\n", @@ -461,7 +459,7 @@ " df_decoy = df[df.decoy != 0]\n", "\n", " if (\n", - " np.sum(df_target.fdr Date: Wed, 12 Oct 2022 20:31:56 +0200 Subject: [PATCH 11/52] add min_precursor_num_to_run_mp --- alphabase/peptide/precursor.py | 6 ++++++ alphabase/spectral_library/library_base.py | 4 ++-- nbdev_nbs/peptide/precursor.ipynb | 6 ++++++ nbdev_nbs/spectral_library/library_base.ipynb | 4 ++-- 4 files changed, 16 insertions(+), 4 deletions(-) diff --git a/alphabase/peptide/precursor.py b/alphabase/peptide/precursor.py index 15d20a6e..7b5a88aa 100644 --- a/alphabase/peptide/precursor.py +++ b/alphabase/peptide/precursor.py @@ -490,6 +490,7 @@ def calc_precursor_isotope_mp( mp_batch_size:int=100000, process_bar=None, min_right_most_intensity:float=0.2, + min_precursor_num_to_run_mp:int=1000, )->pd.DataFrame: """`calc_precursor_isotope` is not that fast for large dataframes, so here we use multiprocessing for faster isotope pattern calculation. @@ -527,6 +528,11 @@ def calc_precursor_isotope_mp( - isotope_right_most_mz - isotope_right_most_index """ + if len(precursor_df) < min_precursor_num_to_run_mp: + return calc_precursor_isotope( + precursor_df=precursor_df, + min_right_most_intensity=min_right_most_intensity, + ) df_list = [] df_group = precursor_df.groupby('nAA') with mp.Pool(processes) as p: diff --git a/alphabase/spectral_library/library_base.py b/alphabase/spectral_library/library_base.py index 5858bd87..0e48b375 100644 --- a/alphabase/spectral_library/library_base.py +++ b/alphabase/spectral_library/library_base.py @@ -234,7 +234,7 @@ def calc_precursor_isotope(self, multiprocessing:bool=True, mp_process_num:int=8, mp_process_bar=None, - min_num_for_mp:int=1000, + min_precursor_num_to_run_mp:int=1000, ): """ Append isotope columns into self.precursor_df. @@ -243,7 +243,7 @@ def calc_precursor_isotope(self, if 'precursor_mz' not in self._precursor_df.columns: self.calc_precursor_mz() self.clip_by_precursor_mz_() - if multiprocessing and len(self.precursor_df)>min_num_for_mp: + if multiprocessing and len(self.precursor_df)>min_precursor_num_to_run_mp: ( self._precursor_df ) = precursor.calc_precursor_isotope_mp( diff --git a/nbdev_nbs/peptide/precursor.ipynb b/nbdev_nbs/peptide/precursor.ipynb index 961a0106..59436bce 100644 --- a/nbdev_nbs/peptide/precursor.ipynb +++ b/nbdev_nbs/peptide/precursor.ipynb @@ -529,6 +529,7 @@ " mp_batch_size:int=100000,\n", " process_bar=None,\n", " min_right_most_intensity:float=0.2,\n", + " min_precursor_num_to_run_mp:int=1000,\n", ")->pd.DataFrame:\n", " \"\"\"`calc_precursor_isotope` is not that fast for large dataframes, \n", " so here we use multiprocessing for faster isotope pattern calculation. \n", @@ -566,6 +567,11 @@ " - isotope_right_most_mz\n", " - isotope_right_most_index\n", " \"\"\"\n", + " if len(precursor_df) < min_precursor_num_to_run_mp:\n", + " return calc_precursor_isotope(\n", + " precursor_df=precursor_df,\n", + " min_right_most_intensity=min_right_most_intensity,\n", + " )\n", " df_list = []\n", " df_group = precursor_df.groupby('nAA')\n", " with mp.Pool(processes) as p:\n", diff --git a/nbdev_nbs/spectral_library/library_base.ipynb b/nbdev_nbs/spectral_library/library_base.ipynb index 93d4986f..c8bcffc7 100644 --- a/nbdev_nbs/spectral_library/library_base.ipynb +++ b/nbdev_nbs/spectral_library/library_base.ipynb @@ -261,7 +261,7 @@ " multiprocessing:bool=True,\n", " mp_process_num:int=8,\n", " mp_process_bar=None,\n", - " min_num_for_mp:int=1000,\n", + " min_precursor_num_to_run_mp:int=1000,\n", " ):\n", " \"\"\"\n", " Append isotope columns into self.precursor_df.\n", @@ -270,7 +270,7 @@ " if 'precursor_mz' not in self._precursor_df.columns:\n", " self.calc_precursor_mz()\n", " self.clip_by_precursor_mz_()\n", - " if multiprocessing and len(self.precursor_df)>min_num_for_mp:\n", + " if multiprocessing and len(self.precursor_df)>min_precursor_num_to_run_mp:\n", " (\n", " self._precursor_df\n", " ) = precursor.calc_precursor_isotope_mp(\n", From 93aa2fad2d367b61d356164b4582a9882eecacdc Mon Sep 17 00:00:00 2001 From: "Zeng, Wen-Feng" Date: Thu, 13 Oct 2022 10:18:07 +0200 Subject: [PATCH 12/52] move translate.py from AlphaPeptDeep --- alphabase/_modidx.py | 26 + alphabase/spectral_library/translate.py | 440 +++++++ nbdev_nbs/spectral_library/translate.ipynb | 1321 ++++++++++++++++++++ 3 files changed, 1787 insertions(+) create mode 100644 alphabase/spectral_library/translate.py create mode 100644 nbdev_nbs/spectral_library/translate.ipynb diff --git a/alphabase/_modidx.py b/alphabase/_modidx.py index 66b0f76b..4eb6b3da 100644 --- a/alphabase/_modidx.py +++ b/alphabase/_modidx.py @@ -524,6 +524,32 @@ 'alphabase/spectral_library/library_base.py'), 'alphabase.spectral_library.library_base.SpecLibBase.update_precursor_mz': ( 'spectral_library/library_base.html#speclibbase.update_precursor_mz', 'alphabase/spectral_library/library_base.py')}, + 'alphabase.spectral_library.translate': { 'alphabase.spectral_library.translate.WritingProcess': ( 'spectral_library/translate.html#writingprocess', + 'alphabase/spectral_library/translate.py'), + 'alphabase.spectral_library.translate.WritingProcess.__init__': ( 'spectral_library/translate.html#writingprocess.__init__', + 'alphabase/spectral_library/translate.py'), + 'alphabase.spectral_library.translate.WritingProcess.run': ( 'spectral_library/translate.html#writingprocess.run', + 'alphabase/spectral_library/translate.py'), + 'alphabase.spectral_library.translate._get_frag_info_from_column_name': ( 'spectral_library/translate.html#_get_frag_info_from_column_name', + 'alphabase/spectral_library/translate.py'), + 'alphabase.spectral_library.translate._get_frag_num': ( 'spectral_library/translate.html#_get_frag_num', + 'alphabase/spectral_library/translate.py'), + 'alphabase.spectral_library.translate.create_modified_sequence': ( 'spectral_library/translate.html#create_modified_sequence', + 'alphabase/spectral_library/translate.py'), + 'alphabase.spectral_library.translate.is_nterm_frag': ( 'spectral_library/translate.html#is_nterm_frag', + 'alphabase/spectral_library/translate.py'), + 'alphabase.spectral_library.translate.mask_fragment_intensity_by_frag_nAA': ( 'spectral_library/translate.html#mask_fragment_intensity_by_frag_naa', + 'alphabase/spectral_library/translate.py'), + 'alphabase.spectral_library.translate.mask_fragment_intensity_by_mz_': ( 'spectral_library/translate.html#mask_fragment_intensity_by_mz_', + 'alphabase/spectral_library/translate.py'), + 'alphabase.spectral_library.translate.merge_precursor_fragment_df': ( 'spectral_library/translate.html#merge_precursor_fragment_df', + 'alphabase/spectral_library/translate.py'), + 'alphabase.spectral_library.translate.speclib_to_single_df': ( 'spectral_library/translate.html#speclib_to_single_df', + 'alphabase/spectral_library/translate.py'), + 'alphabase.spectral_library.translate.speclib_to_swath_df': ( 'spectral_library/translate.html#speclib_to_swath_df', + 'alphabase/spectral_library/translate.py'), + 'alphabase.spectral_library.translate.translate_to_tsv': ( 'spectral_library/translate.html#translate_to_tsv', + 'alphabase/spectral_library/translate.py')}, 'alphabase.statistics.regression': { 'alphabase.statistics.regression.LOESSRegression': ( 'statistics/regression.html#loessregression', 'alphabase/statistics/regression.py'), 'alphabase.statistics.regression.LOESSRegression.__init__': ( 'statistics/regression.html#loessregression.__init__', diff --git a/alphabase/spectral_library/translate.py b/alphabase/spectral_library/translate.py new file mode 100644 index 00000000..55f0f280 --- /dev/null +++ b/alphabase/spectral_library/translate.py @@ -0,0 +1,440 @@ +# AUTOGENERATED! DO NOT EDIT! File to edit: ../../nbdev_nbs/spectral_library/translate.ipynb. + +# %% auto 0 +__all__ = ['mod_to_unimod_dict', 'mod_to_modname_dict', 'create_modified_sequence', 'merge_precursor_fragment_df', + 'is_nterm_frag', 'mask_fragment_intensity_by_mz_', 'mask_fragment_intensity_by_frag_nAA', + 'speclib_to_single_df', 'speclib_to_swath_df', 'WritingProcess', 'translate_to_tsv'] + +# %% ../../nbdev_nbs/spectral_library/translate.ipynb 3 +import pandas as pd +import numpy as np +import tqdm +import typing +import numba +import multiprocessing as mp + +from ..constants.modification import MOD_DF + +from .library_base import SpecLibBase + +from ..utils import explode_multiple_columns + +# %% ../../nbdev_nbs/spectral_library/translate.ipynb 4 +#@numba.njit #(cannot use numba for pd.Series) +def create_modified_sequence( + df_items:typing.Tuple, # must be ('sequence','mods','mod_sites') + translate_mod_dict:dict=None, + mod_sep='()', + nterm = '_', + cterm = '_' +): + ''' + Translate `(sequence, mods, mod_sites)` into a modified sequence. Used by `df.apply()`. + For example, `('ABCDEFG','Mod1@A;Mod2@E','1;5')`->`_A[Mod1@A]BCDE[Mod2@E]FG_`. + + Parameters + ---------- + df_items : List + must be `(sequence, mods, mod_sites)` + + translate_mod_dict : dict + A dict to map alpha modification names to other software + + mod_sep : str + '[]' or '()', default '()' + + ''' + mod_seq = df_items[0] + if df_items[1]: + mods = df_items[1].split(';') + mod_sites = [int(i) for i in df_items[2].split(';')] + rev_order = np.argsort(mod_sites)[::-1] + mod_sites = [mod_sites[rev_order[i]] for i in range(len(mod_sites))] + mods = [mods[rev_order[i]] for i in range(len(mods))] + if translate_mod_dict is not None: + mods = [translate_mod_dict[mod] for mod in mods] + for _site, mod in zip(mod_sites, mods): + if _site > 0: + mod_seq = mod_seq[:_site] + mod_sep[0]+mod+mod_sep[1] + mod_seq[_site:] + elif _site == -1: + cterm += mod_sep[0]+mod+mod_sep[1] + elif _site == 0: + nterm += mod_sep[0]+mod+mod_sep[1] + else: + mod_seq = mod_seq[:_site] + mod_sep[0]+mod+mod_sep[1] + mod_seq[_site:] + return nterm + mod_seq + cterm + +# %% ../../nbdev_nbs/spectral_library/translate.ipynb 9 +@numba.njit +def _get_frag_info_from_column_name(column:str): + ''' + Only used when converting alphabase libraries into other libraries + ''' + idx = column.rfind('_') + frag_type = column[:idx] + charge = column[idx+2:] + if len(frag_type)==1: + loss_type = 'noloss' + else: + idx = frag_type.find('_') + loss_type = frag_type[idx+1:] + frag_type = frag_type[0] + return frag_type, loss_type, charge + +def _get_frag_num(columns, rows, frag_len): + frag_nums = [] + for r,c in zip(rows, columns): + if is_nterm_frag(c): + frag_nums.append(r+1) + else: + frag_nums.append(frag_len-r) + return frag_nums + +def merge_precursor_fragment_df( + precursor_df:pd.DataFrame, + fragment_mz_df:pd.DataFrame, + fragment_inten_df:pd.DataFrame, + top_n_inten:int, + frag_type_head:str='FragmentType', + frag_mass_head:str='FragmentMz', + frag_inten_head:str='RelativeIntensity', + frag_charge_head:str='FragmentCharge', + frag_loss_head:str='FragmentLossType', + frag_num_head:str='FragmentNumber', + verbose=True, +): + ''' + Convert alphabase library into a single dataframe. + This method is not important, as it will be only + used by DiaNN, or spectronaut, or others + ''' + df = precursor_df.copy() + frag_columns = fragment_mz_df.columns.values.astype('U') + frag_type_list = [] + frag_loss_list = [] + frag_charge_list = [] + frag_mass_list = [] + frag_inten_list = [] + frag_num_list = [] + iters = enumerate(df[['frag_start_idx','frag_end_idx']].values) + if verbose: + iters = tqdm.tqdm(iters) + for i,(start, end) in iters: + intens = fragment_inten_df.iloc[start:end,:].values # is loc[start:end-1,:] faster? + max_inten = np.amax(intens) + if max_inten > 0: + intens /= max_inten + masses = fragment_mz_df.iloc[start:end,:].values + sorted_idx = np.argsort(intens.reshape(-1))[-top_n_inten:][::-1] + idx_in_df = np.unravel_index(sorted_idx, masses.shape) + + frag_len = end-start + rows = np.arange(frag_len, dtype=np.int32)[idx_in_df[0]] + columns = frag_columns[idx_in_df[1]] + + frag_types, loss_types, charges = zip( + *[_get_frag_info_from_column_name(_) for _ in columns] + ) + + frag_nums = _get_frag_num(columns, rows, frag_len) + + frag_type_list.append(frag_types) + frag_loss_list.append(loss_types) + frag_charge_list.append(charges) + frag_mass_list.append(masses[idx_in_df]) + frag_inten_list.append(intens[idx_in_df]) + frag_num_list.append(frag_nums) + + df[frag_type_head] = frag_type_list + df[frag_mass_head] = frag_mass_list + df[frag_inten_head] = frag_inten_list + df[frag_charge_head] = frag_charge_list + df[frag_loss_head] = frag_loss_list + df[frag_num_head] = frag_num_list + + return explode_multiple_columns(df, + [ + frag_type_head, + frag_mass_head, + frag_inten_head, + frag_charge_head, + frag_loss_head, + frag_num_head + ] + ) + + # try: + # return df.explode([ + # frag_type_head, + # frag_mass_head, + # frag_inten_head, + # frag_charge_head, + # frag_loss_head, + # frag_num_head + # ]) + # except ValueError: + # # df.explode does not allow mulitple columns before pandas version 1.x.x. + # df = df.explode(frag_type_head) + + # df[frag_mass_head] = _flatten(frag_mass_list) + # df[frag_inten_head] = _flatten(frag_inten_list) + # df[frag_charge_head] = _flatten(frag_charge_list) + # df[frag_loss_head] = _flatten(frag_loss_list) + # df[frag_num_head] = _flatten(frag_num_list) + # return df + +mod_to_unimod_dict = {} +mod_to_modname_dict = {} +for mod_name,unimod_id in MOD_DF[['mod_name','unimod_id']].values: + if unimod_id==-1 or unimod_id=='-1': continue + mod_to_unimod_dict[mod_name] = f"UniMod:{unimod_id}" + mod_to_modname_dict[mod_name] = mod_name[:mod_name.find('@')] + +def is_nterm_frag(frag_type:str): + return frag_type[0] in 'abc' + +def mask_fragment_intensity_by_mz_( + fragment_mz_df:pd.DataFrame, + fragment_intensity_df:pd.DataFrame, + min_frag_mz, max_frag_mz +): + fragment_intensity_df.mask( + (fragment_mz_df>max_frag_mz)|(fragment_mz_dfpd.DataFrame: + ''' + Convert alphabase library to diann (or Spectronaut) library dataframe + This method is not important, as it will be only + used by DiaNN, or spectronaut, or others + + Parameters + ---------- + translate_mod_dict : dict + a dict map modifications from alphabase to other software. Default: build-in `alpha_to_other_mod_dict` + + keep_k_highest_peaks : int + only keep highest fragments for each precursor. Default: 12 + + Returns + ------- + pd.DataFrame + a single dataframe in the SWATH-like format + + ''' + df = pd.DataFrame() + df['ModifiedPeptide'] = speclib._precursor_df[ + ['sequence','mods','mod_sites'] + ].apply( + create_modified_sequence, + axis=1, + translate_mod_dict=translate_mod_dict, + mod_sep='()' + ) + + df['frag_start_idx'] = speclib._precursor_df['frag_start_idx'] + df['frag_end_idx'] = speclib._precursor_df['frag_end_idx'] + + df['PrecursorCharge'] = speclib._precursor_df['charge'] + if 'irt_pred' in speclib._precursor_df.columns: + df['Tr_recalibrated'] = speclib._precursor_df['irt_pred'] + elif 'rt_pred' in speclib._precursor_df.columns: + df['Tr_recalibrated'] = speclib._precursor_df['rt_pred'] + elif 'rt_norm' in speclib._precursor_df.columns: + df['Tr_recalibrated'] = speclib._precursor_df['rt_norm'] + else: + raise ValueError('precursor_df must contain the "rt_pred" or "rt_norm" column') + + if 'mobility_pred' in speclib._precursor_df.columns: + df['IonMobility'] = speclib._precursor_df.mobility_pred + elif 'mobility' in speclib._precursor_df.columns: + df['IonMobility'] = speclib._precursor_df.mobility + + # df['LabelModifiedSequence'] = df['ModifiedPeptide'] + df['StrippedPeptide'] = speclib._precursor_df['sequence'] + + if 'precursor_mz' not in speclib._precursor_df.columns: + speclib.calc_precursor_mz() + df['PrecursorMz'] = speclib._precursor_df['precursor_mz'] + + if 'uniprot_ids' in speclib._precursor_df.columns: + df['ProteinID'] = speclib._precursor_df.uniprot_ids + elif 'proteins' in speclib._precursor_df.columns: + df['ProteinID'] = speclib._precursor_df.proteins + + if 'genes' in speclib._precursor_df.columns: + df['Genes'] = speclib._precursor_df['genes'] + + # if 'protein_group' in speclib._precursor_df.columns: + # df['ProteinGroups'] = speclib._precursor_df['protein_group'] + + if min_frag_mz > 0 or max_frag_mz > 0: + mask_fragment_intensity_by_mz_( + speclib._fragment_mz_df, + speclib._fragment_intensity_df, + min_frag_mz, max_frag_mz + ) + + if min_frag_nAA > 0: + mask_fragment_intensity_by_frag_nAA( + speclib._fragment_intensity_df, + speclib._precursor_df, + max_mask_frag_nAA=min_frag_nAA-1 + ) + + df = merge_precursor_fragment_df( + df, + speclib._fragment_mz_df, + speclib._fragment_intensity_df, + top_n_inten=keep_k_highest_fragments, + frag_type_head=frag_type_head, + frag_mass_head=frag_mass_head, + frag_inten_head=frag_inten_head, + frag_charge_head=frag_charge_head, + frag_loss_head=frag_loss_head, + frag_num_head=frag_num_head, + verbose=verbose + ) + df = df[df['RelativeIntensity']>min_frag_intensity] + df.loc[df[frag_loss_head]=='modloss',frag_loss_head] = modloss + + return df.drop(['frag_start_idx','frag_end_idx'], axis=1) + +def speclib_to_swath_df( + speclib:SpecLibBase, + *, + keep_k_highest_fragments:int=12, + min_frag_mz = 200, + max_frag_mz = 2000, + min_frag_intensity = 0.01, +)->pd.DataFrame: + speclib_to_single_df( + speclib, + translate_mod_dict=mod_to_modname_dict, + keep_k_highest_fragments=keep_k_highest_fragments, + min_frag_mz = min_frag_mz, + max_frag_mz = max_frag_mz, + min_frag_intensity = min_frag_intensity, + ) + +class WritingProcess(mp.Process): + def __init__(self, task_queue, tsv, *args, **kwargs): + self.task_queue:mp.Queue = task_queue + self.tsv = tsv + super().__init__(*args, **kwargs) + + def run(self): + while True: + df, batch = self.task_queue.get() + if df is None: break + df.to_csv(self.tsv, header=(batch==0), sep="\t", mode="a", index=False) + + +def translate_to_tsv( + speclib:SpecLibBase, + tsv:str, + *, + keep_k_highest_fragments:int=12, + min_frag_mz:float = 200, + max_frag_mz:float = 2000, + min_frag_intensity:float = 0.01, + min_frag_nAA:int = 0, + batch_size:int = 100000, + translate_mod_dict:dict = mod_to_modname_dict, + multiprocessing:bool=True +): + if multiprocessing: + queue_size = 1000000//batch_size + if queue_size < 2: + queue_size = 2 + elif queue_size > 10: + queue_size = 10 + df_head_queue = mp.Queue(maxsize=queue_size) + writing_process = WritingProcess(df_head_queue, tsv) + writing_process.start() + mask_fragment_intensity_by_mz_( + speclib._fragment_mz_df, + speclib._fragment_intensity_df, + min_frag_mz, max_frag_mz + ) + if min_frag_nAA > 0: + mask_fragment_intensity_by_frag_nAA( + speclib._fragment_intensity_df, + speclib._precursor_df, + max_mask_frag_nAA=min_frag_nAA-1 + ) + if isinstance(tsv, str): + with open(tsv, "w"): pass + _speclib = SpecLibBase() + _speclib._fragment_intensity_df = speclib._fragment_intensity_df + _speclib._fragment_mz_df = speclib._fragment_mz_df + precursor_df = speclib._precursor_df + for i in tqdm.tqdm(range(0, len(precursor_df), batch_size)): + _speclib._precursor_df = precursor_df.iloc[i:i+batch_size] + df = speclib_to_single_df( + _speclib, translate_mod_dict=translate_mod_dict, + keep_k_highest_fragments=keep_k_highest_fragments, + min_frag_mz=0, + max_frag_mz=0, + min_frag_intensity=min_frag_intensity, + min_frag_nAA=0, + verbose=False + ) + if multiprocessing: + df_head_queue.put((df, i)) + else: + df.to_csv(tsv, header=(i==0), sep="\t", mode='a', index=False) + if multiprocessing: + df_head_queue.put((None, None)) + print("Translation finished, it will take several minutes to export the rest precursors to the tsv file...") + writing_process.join() + diff --git a/nbdev_nbs/spectral_library/translate.ipynb b/nbdev_nbs/spectral_library/translate.ipynb new file mode 100644 index 00000000..a697e59a --- /dev/null +++ b/nbdev_nbs/spectral_library/translate.ipynb @@ -0,0 +1,1321 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| default_exp spectral_library.translate" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Translate Spectral Libraries" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Translate peptdeep spectral libraries into other formats (e.g. TSV)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "import pandas as pd\n", + "import numpy as np\n", + "import tqdm\n", + "import typing\n", + "import numba\n", + "import multiprocessing as mp\n", + "\n", + "from alphabase.constants.modification import MOD_DF\n", + "\n", + "from alphabase.spectral_library.library_base import SpecLibBase\n", + "\n", + "from alphabase.utils import explode_multiple_columns" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "#@numba.njit #(cannot use numba for pd.Series)\n", + "def create_modified_sequence(\n", + " df_items:typing.Tuple, # must be ('sequence','mods','mod_sites')\n", + " translate_mod_dict:dict=None,\n", + " mod_sep='()',\n", + " nterm = '_',\n", + " cterm = '_'\n", + "):\n", + " '''\n", + " Translate `(sequence, mods, mod_sites)` into a modified sequence. Used by `df.apply()`.\n", + " For example, `('ABCDEFG','Mod1@A;Mod2@E','1;5')`->`_A[Mod1@A]BCDE[Mod2@E]FG_`.\n", + "\n", + " Parameters\n", + " ----------\n", + " df_items : List\n", + " must be `(sequence, mods, mod_sites)`\n", + "\n", + " translate_mod_dict : dict\n", + " A dict to map alpha modification names to other software\n", + "\n", + " mod_sep : str\n", + " '[]' or '()', default '()'\n", + "\n", + " '''\n", + " mod_seq = df_items[0]\n", + " if df_items[1]:\n", + " mods = df_items[1].split(';')\n", + " mod_sites = [int(i) for i in df_items[2].split(';')]\n", + " rev_order = np.argsort(mod_sites)[::-1]\n", + " mod_sites = [mod_sites[rev_order[i]] for i in range(len(mod_sites))]\n", + " mods = [mods[rev_order[i]] for i in range(len(mods))]\n", + " if translate_mod_dict is not None:\n", + " mods = [translate_mod_dict[mod] for mod in mods]\n", + " for _site, mod in zip(mod_sites, mods):\n", + " if _site > 0:\n", + " mod_seq = mod_seq[:_site] + mod_sep[0]+mod+mod_sep[1] + mod_seq[_site:]\n", + " elif _site == -1:\n", + " cterm += mod_sep[0]+mod+mod_sep[1]\n", + " elif _site == 0:\n", + " nterm += mod_sep[0]+mod+mod_sep[1]\n", + " else:\n", + " mod_seq = mod_seq[:_site] + mod_sep[0]+mod+mod_sep[1] + mod_seq[_site:]\n", + " return nterm + mod_seq + cterm" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 _(ModNterm)AC(ModA@C)DEFG(ModB@G)HIK_(ModCterm)\n", + "1 _(ModNterm)AC(ModA@C)DEFG(ModB@G)HIK_(ModCterm)\n", + "2 _(ModNterm)AC(ModA@C)DEFG(ModB@G)HIK_(ModCterm)\n", + "3 _(ModNterm)AC(ModA@C)DEFG(ModB@G)HIK_(ModCterm)\n", + "4 _(ModNterm)AC(ModA@C)DEFG(ModB@G)HIK_(ModCterm)\n", + "5 _(ModNterm)AC(ModA@C)DEFG(ModB@G)HIK_(ModCterm)\n", + "6 _(ModNterm)AC(ModA@C)DEFG(ModB@G)HIK_(ModCterm)\n", + "7 _(ModNterm)AC(ModA@C)DEFG(ModB@G)HIK_(ModCterm)\n", + "8 _(ModNterm)AC(ModA@C)DEFG(ModB@G)HIK_(ModCterm)\n", + "9 _(ModNterm)AC(ModA@C)DEFG(ModB@G)HIK_(ModCterm)\n", + "dtype: object" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.DataFrame()\n", + "df['sequence'] = ['ACDEFGHIK']*10\n", + "df['mods'] = ['ModNterm;ModB@G;ModCterm;ModA@C']*10\n", + "df['mod_sites'] = ['0;6;-1;2']*10\n", + "df[['sequence','mods','mod_sites']].apply(create_modified_sequence, axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "assert create_modified_sequence(('ACDEFGHIK','ModNterm;ModB@G;ModCterm;ModA@C','0;6;-1;2'), mod_sep='[]')=='_[ModNterm]AC[ModA@C]DEFG[ModB@G]HIK_[ModCterm]'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "assert create_modified_sequence(\n", + " ('ACDEFGHIK','ModNterm;ModB@G;ModCterm;ModA@C','0;6;-1;2'),\n", + " {'ModNterm':'Mod(Nterm)', 'ModCterm':'Mod(Cterm)', 'ModA@C':'ModA(C)', 'ModB@G':'ModB(G)'},\n", + " mod_sep='()'\n", + ") == '_(Mod(Nterm))AC(ModA(C))DEFG(ModB(G))HIK_(Mod(Cterm))'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "assert create_modified_sequence(\n", + " ('ACDEFGHIK','ModNterm;ModB@G;ModCterm;ModA@C','0;6;-1;2'),\n", + " {'ModNterm':'Mod(Nterm)', 'ModCterm':'Mod(Cterm)', 'ModA@C':'ModA(C)', 'ModB@G':'ModB(G)'},\n", + " mod_sep='()', nterm='', cterm=''\n", + ") == '(Mod(Nterm))AC(ModA(C))DEFG(ModB(G))HIK(Mod(Cterm))'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "@numba.njit\n", + "def _get_frag_info_from_column_name(column:str):\n", + " '''\n", + " Only used when converting alphabase libraries into other libraries\n", + " '''\n", + " idx = column.rfind('_')\n", + " frag_type = column[:idx]\n", + " charge = column[idx+2:]\n", + " if len(frag_type)==1:\n", + " loss_type = 'noloss'\n", + " else:\n", + " idx = frag_type.find('_')\n", + " loss_type = frag_type[idx+1:]\n", + " frag_type = frag_type[0]\n", + " return frag_type, loss_type, charge\n", + "\n", + "def _get_frag_num(columns, rows, frag_len):\n", + " frag_nums = []\n", + " for r,c in zip(rows, columns):\n", + " if is_nterm_frag(c):\n", + " frag_nums.append(r+1)\n", + " else:\n", + " frag_nums.append(frag_len-r)\n", + " return frag_nums\n", + "\n", + "def merge_precursor_fragment_df(\n", + " precursor_df:pd.DataFrame, \n", + " fragment_mz_df:pd.DataFrame, \n", + " fragment_inten_df:pd.DataFrame, \n", + " top_n_inten:int,\n", + " frag_type_head:str='FragmentType',\n", + " frag_mass_head:str='FragmentMz',\n", + " frag_inten_head:str='RelativeIntensity',\n", + " frag_charge_head:str='FragmentCharge',\n", + " frag_loss_head:str='FragmentLossType',\n", + " frag_num_head:str='FragmentNumber',\n", + " verbose=True,\n", + "):\n", + " '''\n", + " Convert alphabase library into a single dataframe. \n", + " This method is not important, as it will be only \n", + " used by DiaNN, or spectronaut, or others\n", + " '''\n", + " df = precursor_df.copy()\n", + " frag_columns = fragment_mz_df.columns.values.astype('U')\n", + " frag_type_list = []\n", + " frag_loss_list = []\n", + " frag_charge_list = []\n", + " frag_mass_list = []\n", + " frag_inten_list = []\n", + " frag_num_list = []\n", + " iters = enumerate(df[['frag_start_idx','frag_end_idx']].values)\n", + " if verbose:\n", + " iters = tqdm.tqdm(iters)\n", + " for i,(start, end) in iters:\n", + " intens = fragment_inten_df.iloc[start:end,:].values # is loc[start:end-1,:] faster?\n", + " max_inten = np.amax(intens)\n", + " if max_inten > 0:\n", + " intens /= max_inten\n", + " masses = fragment_mz_df.iloc[start:end,:].values\n", + " sorted_idx = np.argsort(intens.reshape(-1))[-top_n_inten:][::-1]\n", + " idx_in_df = np.unravel_index(sorted_idx, masses.shape)\n", + "\n", + " frag_len = end-start\n", + " rows = np.arange(frag_len, dtype=np.int32)[idx_in_df[0]]\n", + " columns = frag_columns[idx_in_df[1]]\n", + "\n", + " frag_types, loss_types, charges = zip(\n", + " *[_get_frag_info_from_column_name(_) for _ in columns]\n", + " )\n", + "\n", + " frag_nums = _get_frag_num(columns, rows, frag_len)\n", + "\n", + " frag_type_list.append(frag_types)\n", + " frag_loss_list.append(loss_types)\n", + " frag_charge_list.append(charges)\n", + " frag_mass_list.append(masses[idx_in_df])\n", + " frag_inten_list.append(intens[idx_in_df])\n", + " frag_num_list.append(frag_nums)\n", + " \n", + " df[frag_type_head] = frag_type_list\n", + " df[frag_mass_head] = frag_mass_list\n", + " df[frag_inten_head] = frag_inten_list\n", + " df[frag_charge_head] = frag_charge_list\n", + " df[frag_loss_head] = frag_loss_list\n", + " df[frag_num_head] = frag_num_list\n", + "\n", + " return explode_multiple_columns(df, \n", + " [\n", + " frag_type_head,\n", + " frag_mass_head,\n", + " frag_inten_head,\n", + " frag_charge_head,\n", + " frag_loss_head,\n", + " frag_num_head\n", + " ]\n", + " )\n", + "\n", + " # try:\n", + " # return df.explode([\n", + " # frag_type_head,\n", + " # frag_mass_head,\n", + " # frag_inten_head,\n", + " # frag_charge_head,\n", + " # frag_loss_head,\n", + " # frag_num_head\n", + " # ])\n", + " # except ValueError:\n", + " # # df.explode does not allow mulitple columns before pandas version 1.x.x.\n", + " # df = df.explode(frag_type_head)\n", + "\n", + " # df[frag_mass_head] = _flatten(frag_mass_list)\n", + " # df[frag_inten_head] = _flatten(frag_inten_list)\n", + " # df[frag_charge_head] = _flatten(frag_charge_list)\n", + " # df[frag_loss_head] = _flatten(frag_loss_list)\n", + " # df[frag_num_head] = _flatten(frag_num_list)\n", + " # return df\n", + "\n", + "mod_to_unimod_dict = {}\n", + "mod_to_modname_dict = {}\n", + "for mod_name,unimod_id in MOD_DF[['mod_name','unimod_id']].values:\n", + " if unimod_id==-1 or unimod_id=='-1': continue\n", + " mod_to_unimod_dict[mod_name] = f\"UniMod:{unimod_id}\"\n", + " mod_to_modname_dict[mod_name] = mod_name[:mod_name.find('@')]\n", + "\n", + "def is_nterm_frag(frag_type:str):\n", + " return frag_type[0] in 'abc'\n", + "\n", + "def mask_fragment_intensity_by_mz_(\n", + " fragment_mz_df:pd.DataFrame, \n", + " fragment_intensity_df:pd.DataFrame,\n", + " min_frag_mz, max_frag_mz\n", + "):\n", + " fragment_intensity_df.mask(\n", + " (fragment_mz_df>max_frag_mz)|(fragment_mz_dfpd.DataFrame:\n", + " '''\n", + " Convert alphabase library to diann (or Spectronaut) library dataframe\n", + " This method is not important, as it will be only \n", + " used by DiaNN, or spectronaut, or others\n", + "\n", + " Parameters\n", + " ----------\n", + " translate_mod_dict : dict\n", + " a dict map modifications from alphabase to other software. Default: build-in `alpha_to_other_mod_dict`\n", + " \n", + " keep_k_highest_peaks : int\n", + " only keep highest fragments for each precursor. Default: 12\n", + "\n", + " Returns\n", + " -------\n", + " pd.DataFrame\n", + " a single dataframe in the SWATH-like format\n", + "\n", + " '''\n", + " df = pd.DataFrame()\n", + " df['ModifiedPeptide'] = speclib._precursor_df[\n", + " ['sequence','mods','mod_sites']\n", + " ].apply(\n", + " create_modified_sequence, \n", + " axis=1,\n", + " translate_mod_dict=translate_mod_dict,\n", + " mod_sep='()'\n", + " )\n", + "\n", + " df['frag_start_idx'] = speclib._precursor_df['frag_start_idx']\n", + " df['frag_end_idx'] = speclib._precursor_df['frag_end_idx']\n", + " \n", + " df['PrecursorCharge'] = speclib._precursor_df['charge']\n", + " if 'irt_pred' in speclib._precursor_df.columns:\n", + " df['Tr_recalibrated'] = speclib._precursor_df['irt_pred']\n", + " elif 'rt_pred' in speclib._precursor_df.columns:\n", + " df['Tr_recalibrated'] = speclib._precursor_df['rt_pred']\n", + " elif 'rt_norm' in speclib._precursor_df.columns:\n", + " df['Tr_recalibrated'] = speclib._precursor_df['rt_norm']\n", + " else:\n", + " raise ValueError('precursor_df must contain the \"rt_pred\" or \"rt_norm\" column')\n", + "\n", + " if 'mobility_pred' in speclib._precursor_df.columns:\n", + " df['IonMobility'] = speclib._precursor_df.mobility_pred\n", + " elif 'mobility' in speclib._precursor_df.columns:\n", + " df['IonMobility'] = speclib._precursor_df.mobility\n", + " \n", + " # df['LabelModifiedSequence'] = df['ModifiedPeptide']\n", + " df['StrippedPeptide'] = speclib._precursor_df['sequence']\n", + "\n", + " if 'precursor_mz' not in speclib._precursor_df.columns:\n", + " speclib.calc_precursor_mz()\n", + " df['PrecursorMz'] = speclib._precursor_df['precursor_mz']\n", + "\n", + " if 'uniprot_ids' in speclib._precursor_df.columns:\n", + " df['ProteinID'] = speclib._precursor_df.uniprot_ids\n", + " elif 'proteins' in speclib._precursor_df.columns:\n", + " df['ProteinID'] = speclib._precursor_df.proteins\n", + "\n", + " if 'genes' in speclib._precursor_df.columns:\n", + " df['Genes'] = speclib._precursor_df['genes']\n", + "\n", + " # if 'protein_group' in speclib._precursor_df.columns:\n", + " # df['ProteinGroups'] = speclib._precursor_df['protein_group']\n", + "\n", + " if min_frag_mz > 0 or max_frag_mz > 0:\n", + " mask_fragment_intensity_by_mz_(\n", + " speclib._fragment_mz_df,\n", + " speclib._fragment_intensity_df,\n", + " min_frag_mz, max_frag_mz\n", + " )\n", + "\n", + " if min_frag_nAA > 0:\n", + " mask_fragment_intensity_by_frag_nAA(\n", + " speclib._fragment_intensity_df,\n", + " speclib._precursor_df,\n", + " max_mask_frag_nAA=min_frag_nAA-1\n", + " )\n", + "\n", + " df = merge_precursor_fragment_df(\n", + " df,\n", + " speclib._fragment_mz_df,\n", + " speclib._fragment_intensity_df,\n", + " top_n_inten=keep_k_highest_fragments,\n", + " frag_type_head=frag_type_head,\n", + " frag_mass_head=frag_mass_head,\n", + " frag_inten_head=frag_inten_head,\n", + " frag_charge_head=frag_charge_head,\n", + " frag_loss_head=frag_loss_head,\n", + " frag_num_head=frag_num_head,\n", + " verbose=verbose\n", + " )\n", + " df = df[df['RelativeIntensity']>min_frag_intensity]\n", + " df.loc[df[frag_loss_head]=='modloss',frag_loss_head] = modloss\n", + "\n", + " return df.drop(['frag_start_idx','frag_end_idx'], axis=1)\n", + "\n", + "def speclib_to_swath_df(\n", + " speclib:SpecLibBase,\n", + " *,\n", + " keep_k_highest_fragments:int=12,\n", + " min_frag_mz = 200,\n", + " max_frag_mz = 2000,\n", + " min_frag_intensity = 0.01,\n", + ")->pd.DataFrame:\n", + " speclib_to_single_df(\n", + " speclib, \n", + " translate_mod_dict=mod_to_modname_dict,\n", + " keep_k_highest_fragments=keep_k_highest_fragments,\n", + " min_frag_mz = min_frag_mz,\n", + " max_frag_mz = max_frag_mz,\n", + " min_frag_intensity = min_frag_intensity,\n", + " )\n", + "\n", + "class WritingProcess(mp.Process):\n", + " def __init__(self, task_queue, tsv, *args, **kwargs):\n", + " self.task_queue:mp.Queue = task_queue\n", + " self.tsv = tsv\n", + " super().__init__(*args, **kwargs)\n", + "\n", + " def run(self):\n", + " while True:\n", + " df, batch = self.task_queue.get()\n", + " if df is None: break\n", + " df.to_csv(self.tsv, header=(batch==0), sep=\"\\t\", mode=\"a\", index=False)\n", + "\n", + "\n", + "def translate_to_tsv(\n", + " speclib:SpecLibBase,\n", + " tsv:str,\n", + " *,\n", + " keep_k_highest_fragments:int=12,\n", + " min_frag_mz:float = 200,\n", + " max_frag_mz:float = 2000,\n", + " min_frag_intensity:float = 0.01,\n", + " min_frag_nAA:int = 0,\n", + " batch_size:int = 100000,\n", + " translate_mod_dict:dict = mod_to_modname_dict,\n", + " multiprocessing:bool=True\n", + "):\n", + " if multiprocessing:\n", + " queue_size = 1000000//batch_size\n", + " if queue_size < 2:\n", + " queue_size = 2\n", + " elif queue_size > 10:\n", + " queue_size = 10\n", + " df_head_queue = mp.Queue(maxsize=queue_size)\n", + " writing_process = WritingProcess(df_head_queue, tsv)\n", + " writing_process.start()\n", + " mask_fragment_intensity_by_mz_(\n", + " speclib._fragment_mz_df,\n", + " speclib._fragment_intensity_df,\n", + " min_frag_mz, max_frag_mz\n", + " )\n", + " if min_frag_nAA > 0:\n", + " mask_fragment_intensity_by_frag_nAA(\n", + " speclib._fragment_intensity_df,\n", + " speclib._precursor_df,\n", + " max_mask_frag_nAA=min_frag_nAA-1\n", + " )\n", + " if isinstance(tsv, str):\n", + " with open(tsv, \"w\"): pass\n", + " _speclib = SpecLibBase()\n", + " _speclib._fragment_intensity_df = speclib._fragment_intensity_df\n", + " _speclib._fragment_mz_df = speclib._fragment_mz_df\n", + " precursor_df = speclib._precursor_df\n", + " for i in tqdm.tqdm(range(0, len(precursor_df), batch_size)):\n", + " _speclib._precursor_df = precursor_df.iloc[i:i+batch_size]\n", + " df = speclib_to_single_df(\n", + " _speclib, translate_mod_dict=translate_mod_dict,\n", + " keep_k_highest_fragments=keep_k_highest_fragments,\n", + " min_frag_mz=0,\n", + " max_frag_mz=0,\n", + " min_frag_intensity=min_frag_intensity,\n", + " min_frag_nAA=0,\n", + " verbose=False\n", + " )\n", + " if multiprocessing:\n", + " df_head_queue.put((df, i))\n", + " else:\n", + " df.to_csv(tsv, header=(i==0), sep=\"\\t\", mode='a', index=False)\n", + " if multiprocessing:\n", + " df_head_queue.put((None, None))\n", + " print(\"Translation finished, it will take several minutes to export the rest precursors to the tsv file...\")\n", + " writing_process.join()\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from alphabase.peptide.fragment import create_fragment_mz_dataframe" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
b_z1y_z1y_modloss_z1
072.0443901376.5275551278.550659
1239.0427501209.5291950.000000
2296.0642131152.5077320.000000
3433.1231251015.4488200.000000
4536.132310912.4396350.000000
............
105585.208572634.3129780.000000
106771.287885448.2336650.000000
107902.328370317.1931800.000000
108973.365484246.1560660.000000
1091044.402598175.1189520.000000
\n", + "

110 rows × 3 columns

\n", + "
" + ], + "text/plain": [ + " b_z1 y_z1 y_modloss_z1\n", + "0 72.044390 1376.527555 1278.550659\n", + "1 239.042750 1209.529195 0.000000\n", + "2 296.064213 1152.507732 0.000000\n", + "3 433.123125 1015.448820 0.000000\n", + "4 536.132310 912.439635 0.000000\n", + ".. ... ... ...\n", + "105 585.208572 634.312978 0.000000\n", + "106 771.287885 448.233665 0.000000\n", + "107 902.328370 317.193180 0.000000\n", + "108 973.365484 246.156066 0.000000\n", + "109 1044.402598 175.118952 0.000000\n", + "\n", + "[110 rows x 3 columns]" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "repeat = 10\n", + "charged_frag_types = ['b_z1','y_z1','y_modloss_z1']\n", + "precursor_df = pd.DataFrame({\n", + " 'sequence': ['ASGHCEWMKYR']*repeat+['ASGHCEWMAAR'],\n", + " 'mods': ['Acetyl@Protein N-term;Carbamidomethyl@C;Oxidation@M']*repeat+[''],\n", + " 'mod_sites': ['0;4;8']*repeat+[''],\n", + " 'nAA': 11,\n", + " 'NCE': 20,\n", + " 'instrument': 'QE',\n", + " 'rt_pred': 10,\n", + " 'charge': 2,\n", + " 'protein_name': 'unknown',\n", + " 'mobility_pred': 1,\n", + "})\n", + "precursor_df.loc[0,['mods','mod_sites']] = ['Phospho@S','2']\n", + "frag_mass_df = create_fragment_mz_dataframe(precursor_df, charged_frag_types)\n", + "frag_mass_df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sequencemodsmod_sitesnAANCEinstrumentrt_predchargeprotein_namemobility_predfrag_start_idxfrag_end_idx
0ASGHCEWMKYRPhospho@S21120QE102unknown1010
1ASGHCEWMKYRAcetyl@Protein N-term;Carbamidomethyl@C;Oxidat...0;4;81120QE102unknown11020
2ASGHCEWMKYRAcetyl@Protein N-term;Carbamidomethyl@C;Oxidat...0;4;81120QE102unknown12030
3ASGHCEWMKYRAcetyl@Protein N-term;Carbamidomethyl@C;Oxidat...0;4;81120QE102unknown13040
4ASGHCEWMKYRAcetyl@Protein N-term;Carbamidomethyl@C;Oxidat...0;4;81120QE102unknown14050
5ASGHCEWMKYRAcetyl@Protein N-term;Carbamidomethyl@C;Oxidat...0;4;81120QE102unknown15060
6ASGHCEWMKYRAcetyl@Protein N-term;Carbamidomethyl@C;Oxidat...0;4;81120QE102unknown16070
7ASGHCEWMKYRAcetyl@Protein N-term;Carbamidomethyl@C;Oxidat...0;4;81120QE102unknown17080
8ASGHCEWMKYRAcetyl@Protein N-term;Carbamidomethyl@C;Oxidat...0;4;81120QE102unknown18090
9ASGHCEWMKYRAcetyl@Protein N-term;Carbamidomethyl@C;Oxidat...0;4;81120QE102unknown190100
10ASGHCEWMAAR1120QE102unknown1100110
\n", + "
" + ], + "text/plain": [ + " sequence mods mod_sites \\\n", + "0 ASGHCEWMKYR Phospho@S 2 \n", + "1 ASGHCEWMKYR Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat... 0;4;8 \n", + "2 ASGHCEWMKYR Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat... 0;4;8 \n", + "3 ASGHCEWMKYR Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat... 0;4;8 \n", + "4 ASGHCEWMKYR Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat... 0;4;8 \n", + "5 ASGHCEWMKYR Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat... 0;4;8 \n", + "6 ASGHCEWMKYR Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat... 0;4;8 \n", + "7 ASGHCEWMKYR Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat... 0;4;8 \n", + "8 ASGHCEWMKYR Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat... 0;4;8 \n", + "9 ASGHCEWMKYR Acetyl@Protein N-term;Carbamidomethyl@C;Oxidat... 0;4;8 \n", + "10 ASGHCEWMAAR \n", + "\n", + " nAA NCE instrument rt_pred charge protein_name mobility_pred \\\n", + "0 11 20 QE 10 2 unknown 1 \n", + "1 11 20 QE 10 2 unknown 1 \n", + "2 11 20 QE 10 2 unknown 1 \n", + "3 11 20 QE 10 2 unknown 1 \n", + "4 11 20 QE 10 2 unknown 1 \n", + "5 11 20 QE 10 2 unknown 1 \n", + "6 11 20 QE 10 2 unknown 1 \n", + "7 11 20 QE 10 2 unknown 1 \n", + "8 11 20 QE 10 2 unknown 1 \n", + "9 11 20 QE 10 2 unknown 1 \n", + "10 11 20 QE 10 2 unknown 1 \n", + "\n", + " frag_start_idx frag_end_idx \n", + "0 0 10 \n", + "1 10 20 \n", + "2 20 30 \n", + "3 30 40 \n", + "4 40 50 \n", + "5 50 60 \n", + "6 60 70 \n", + "7 70 80 \n", + "8 80 90 \n", + "9 90 100 \n", + "10 100 110 " + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "precursor_df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "11it [00:01, 6.85it/s]\n", + "11it [00:00, 2684.12it/s]\n" + ] + } + ], + "source": [ + "spec_lib = SpecLibBase(charged_frag_types)\n", + "spec_lib._precursor_df = precursor_df\n", + "spec_lib._fragment_intensity_df = frag_mass_df.copy()\n", + "spec_lib._fragment_mz_df = frag_mass_df.copy()\n", + "df = speclib_to_single_df(spec_lib, min_frag_mz=300, max_frag_mz=1800)\n", + "assert (df.FragmentMz>=300).all()\n", + "assert (df.FragmentMz<=1800).all()\n", + "df = speclib_to_single_df(spec_lib, min_frag_mz=200, min_frag_nAA=3)\n", + "assert (df.FragmentNumber>=3).all()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| hide\n", + "\n", + "import tempfile\n", + "from alphabase.peptide.fragment import create_fragment_mz_dataframe" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "11it [00:00, 3152.75it/s]\n", + "100%|██████████| 6/6 [00:00<00:00, 67.25it/s]\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ModifiedPeptidePrecursorChargeTr_recalibratedIonMobilityStrippedPeptidePrecursorMzFragmentTypeFragmentMzRelativeIntensityFragmentChargeFragmentLossTypeFragmentNumber
0_AS(Phospho)GHCEWMKYR_2101ASGHCEWMKYR724.285972y1376.5275551.0000001noloss10
1_AS(Phospho)GHCEWMKYR_2101ASGHCEWMKYR724.285972y1278.5506590.9288231H3PO410
2_AS(Phospho)GHCEWMKYR_2101ASGHCEWMKYR724.285972b1273.4529930.9251201noloss10
3_AS(Phospho)GHCEWMKYR_2101ASGHCEWMKYR724.285972y1209.5291950.8786811noloss9
4_AS(Phospho)GHCEWMKYR_2101ASGHCEWMKYR724.285972y1152.5077320.8372571noloss8
.......................................
127_ASGHCEWMAAR_2101ASGHCEWMAAR609.760775b771.2878850.6721601noloss7
128_ASGHCEWMAAR_2101ASGHCEWMAAR609.760775y763.3555710.6652471noloss6
129_ASGHCEWMAAR_2101ASGHCEWMAAR609.760775y634.3129780.5527891noloss5
130_ASGHCEWMAAR_2101ASGHCEWMAAR609.760775b585.2085720.5099961noloss6
131_ASGHCEWMAAR_2101ASGHCEWMAAR609.760775b456.1659790.3975381noloss5
\n", + "

132 rows × 12 columns

\n", + "
" + ], + "text/plain": [ + " ModifiedPeptide PrecursorCharge Tr_recalibrated IonMobility \\\n", + "0 _AS(Phospho)GHCEWMKYR_ 2 10 1 \n", + "1 _AS(Phospho)GHCEWMKYR_ 2 10 1 \n", + "2 _AS(Phospho)GHCEWMKYR_ 2 10 1 \n", + "3 _AS(Phospho)GHCEWMKYR_ 2 10 1 \n", + "4 _AS(Phospho)GHCEWMKYR_ 2 10 1 \n", + ".. ... ... ... ... \n", + "127 _ASGHCEWMAAR_ 2 10 1 \n", + "128 _ASGHCEWMAAR_ 2 10 1 \n", + "129 _ASGHCEWMAAR_ 2 10 1 \n", + "130 _ASGHCEWMAAR_ 2 10 1 \n", + "131 _ASGHCEWMAAR_ 2 10 1 \n", + "\n", + " StrippedPeptide PrecursorMz FragmentType FragmentMz RelativeIntensity \\\n", + "0 ASGHCEWMKYR 724.285972 y 1376.527555 1.000000 \n", + "1 ASGHCEWMKYR 724.285972 y 1278.550659 0.928823 \n", + "2 ASGHCEWMKYR 724.285972 b 1273.452993 0.925120 \n", + "3 ASGHCEWMKYR 724.285972 y 1209.529195 0.878681 \n", + "4 ASGHCEWMKYR 724.285972 y 1152.507732 0.837257 \n", + ".. ... ... ... ... ... \n", + "127 ASGHCEWMAAR 609.760775 b 771.287885 0.672160 \n", + "128 ASGHCEWMAAR 609.760775 y 763.355571 0.665247 \n", + "129 ASGHCEWMAAR 609.760775 y 634.312978 0.552789 \n", + "130 ASGHCEWMAAR 609.760775 b 585.208572 0.509996 \n", + "131 ASGHCEWMAAR 609.760775 b 456.165979 0.397538 \n", + "\n", + " FragmentCharge FragmentLossType FragmentNumber \n", + "0 1 noloss 10 \n", + "1 1 H3PO4 10 \n", + "2 1 noloss 10 \n", + "3 1 noloss 9 \n", + "4 1 noloss 8 \n", + ".. ... ... ... \n", + "127 1 noloss 7 \n", + "128 1 noloss 6 \n", + "129 1 noloss 5 \n", + "130 1 noloss 6 \n", + "131 1 noloss 5 \n", + "\n", + "[132 rows x 12 columns]" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#| hide\n", + "repeat = 10\n", + "charged_frag_types = ['b_z1','y_z1','y_modloss_z1']\n", + "precursor_df = pd.DataFrame({\n", + " 'sequence': ['ASGHCEWMKYR']*repeat+['ASGHCEWMAAR'],\n", + " 'mods': ['Acetyl@Protein N-term;Carbamidomethyl@C;Oxidation@M']*repeat+[''],\n", + " 'mod_sites': ['0;4;8']*repeat+[''],\n", + " 'nAA': 11,\n", + " 'NCE': 20,\n", + " 'instrument': 'QE',\n", + " 'rt_pred': 10,\n", + " 'charge': 2,\n", + " 'protein_name': 'unknown',\n", + " 'mobility_pred': 1,\n", + "})\n", + "precursor_df.loc[0,['mods','mod_sites']] = ['Phospho@S','2']\n", + "frag_mass_df = create_fragment_mz_dataframe(precursor_df, charged_frag_types)\n", + "spec_lib = SpecLibBase(charged_frag_types)\n", + "spec_lib._precursor_df = precursor_df\n", + "spec_lib._fragment_intensity_df = frag_mass_df.copy()\n", + "spec_lib._fragment_mz_df = frag_mass_df.copy()\n", + "speclib_sdf = speclib_to_single_df(spec_lib)\n", + "with tempfile.TemporaryFile('w+') as f:\n", + " translate_to_tsv(spec_lib, f, batch_size=2, multiprocessing=False)\n", + " f.seek(0)\n", + " ddf = pd.read_csv(f, sep=\"\\t\")\n", + "assert len(ddf) == len(speclib_sdf)\n", + "assert ddf.StrippedPeptide.values[0] == speclib_sdf.StrippedPeptide.values[0]\n", + "assert ddf.StrippedPeptide.values[-1] == speclib_sdf.StrippedPeptide.values[-1]\n", + "assert ddf.PrecursorCharge.dtype==np.int\n", + "ddf" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.8.3 ('base')", + "language": "python", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} From 0c3e662c8b337110328749401397e392407246c1 Mon Sep 17 00:00:00 2001 From: "Zeng, Wen-Feng" Date: Thu, 13 Oct 2022 10:54:27 +0200 Subject: [PATCH 13/52] docs from scoring --- docs/constants/aa.html | 19 +- docs/constants/element.html | 19 +- docs/constants/isotope.html | 19 +- docs/constants/modification.html | 19 +- docs/index.html | 19 +- docs/io/hdf.html | 19 +- docs/peptide/fragment.html | 19 +- docs/peptide/mass_calc.html | 19 +- docs/peptide/mobility.html | 19 +- docs/peptide/precursor.html | 28 +- docs/protein/fasta.html | 19 +- docs/protein/test_fasta.html | 19 +- docs/psm_reader/alphapept_reader.html | 19 +- docs/psm_reader/dia_psm_reader.html | 19 +- docs/psm_reader/maxquant_reader.html | 19 +- docs/psm_reader/msfragger_reader.html | 19 +- docs/psm_reader/pfind_reader.html | 19 +- docs/psm_reader/psm_reader.html | 19 +- docs/scoring/fdr.html | 19 +- docs/scoring/feature_extraction_base.html | 50 +- docs/scoring/ml_scoring_base.html | 50 +- docs/search.json | 17 +- docs/sitemap.xml | 56 +- docs/spectral_library/decoy_library.html | 19 +- docs/spectral_library/library_base.html | 23 +- docs/spectral_library/translate.html | 3548 +++++++++++++++++ docs/statistics/regression.html | 117 +- .../figure-html/cell-10-output-1.png | Bin 0 -> 27481 bytes .../figure-html/cell-5-output-1.png | Bin 28935 -> 0 bytes .../figure-html/cell-9-output-1.png | Bin 0 -> 28568 bytes docs/utils.html | 19 +- docs/yaml_utils.html | 19 +- nbdev_nbs/sidebar.yml | 1 + 33 files changed, 3885 insertions(+), 404 deletions(-) create mode 100644 docs/spectral_library/translate.html create mode 100644 docs/statistics/regression_files/figure-html/cell-10-output-1.png delete mode 100644 docs/statistics/regression_files/figure-html/cell-5-output-1.png create mode 100644 docs/statistics/regression_files/figure-html/cell-9-output-1.png diff --git a/docs/constants/aa.html b/docs/constants/aa.html index 74e8c097..55f434e2 100644 --- a/docs/constants/aa.html +++ b/docs/constants/aa.html @@ -146,19 +146,7 @@ alphabase - - + @@ -403,6 +401,41 @@

On this page

Regression


+

source

+ +
+

right_open_tricubic

+
+
 right_open_tricubic (x)
+
+

tricubic weight kernel which weights assigns 1 to values x > 0

+
+

source

+
+
+

left_open_tricubic

+
+
 left_open_tricubic (x)
+
+

tricubic weight kernel which weights assigns 1 to values x < 0

+
+

source

+
+
+

tricubic

+
+
 tricubic (x)
+
+

tricubic weight kernel

+
+

source

+
+
+

apply_kernel

+
+
 apply_kernel (w)
+
+

source

@@ -527,22 +560,50 @@

LOESSRegression.pr

Application example

-
def noisy_1d(x):
-    y = np.sin(x)
-    y_err = np.random.normal(y,0.5)
-    return y + y_err + 0.5 * x
-
-x_train = np.linspace(0,15,200)
-y_train = noisy_1d(x_train)
-
-x_test = np.linspace(0,15,200)
-y_test = LOESSRegression().fit(x_train, y_train).predict(x_test)
-
-plt.scatter(x_train,y_train)
-plt.plot(x_test,y_test,c='r')
-plt.show()
+
np.set_printoptions(formatter={'float': lambda x: "{0:0.3f}".format(x)})
+def noisy_1d(x):
+    y = np.sin(x)
+    y_err = np.random.normal(y,0.5)
+    return y + y_err + 0.5 * x
+
+x_train = np.linspace(0,15,200)
+y_train = noisy_1d(x_train)
+
+x_test = np.linspace(0,15,200)
+y_test = LOESSRegression().fit(x_train, y_train).predict(x_test)
+
+plt.scatter(x_train,y_train)
+plt.plot(x_test,y_test,c='r')
+plt.show()
+
+

+
+
+
+
+

Weight function

+
+
fig, axs = plt.subplots(nrows=3,sharex=True)
+
+x = np.linspace(-2,2,200)
+y = left_open_tricubic(x)
+axs[0].plot(x,y)
+axs[0].set_title('left_open_tricubic')
+
+x = np.linspace(-2,2,200)
+y = tricubic(x)
+axs[1].plot(x,y)
+axs[1].set_title('tricubic')
+
+x = np.linspace(-2,2,200)
+y = right_open_tricubic(x)
+axs[2].plot(x,y)
+axs[2].set_title('right_open_tricubic')
+
+fig.tight_layout()
+plt.show()
-

+

diff --git a/docs/statistics/regression_files/figure-html/cell-10-output-1.png b/docs/statistics/regression_files/figure-html/cell-10-output-1.png new file mode 100644 index 0000000000000000000000000000000000000000..f210bbaf1f4fe632619894085ae4a068520fdacb GIT binary patch literal 27481 zcmb@u2RxSV|2KS5c6LUD>_SKxA+jUcE1OW+GRw}o>?ol^NXX9KdlZq(?7e4XkNb1_ z{_fxZ{y)$Ee(w8z-A^yyZ@rw?d0xkP9G~Me-tYJ42v$*&BgCh{N1;%J^7o|GP$*0n z6bhpc7aM-#ajS#{{<`fXqvND*XW`^(^2{9dz{JVk+Rn-Psp%CL^Jk7v?QCyy^K;+i zy7I)y$=*?zhsWmMAKTI+I7xIf?F*twsGLv7pn=dwI5 z<6UX^W1-CGih=w?Ly&p}4S%@K`_Yk-l12p>=QA@gF?}Vu;DTJHd|4j>SKB0c2jME| zLtJbTxXMIOZ!8B_%^2ZkTvWc-@r!%W{+~Xvq{faGJ|=$o^5sXZEJk^`K|Ce|Oi%84^TXYpg2a!*I*r<$(dc>(Uy#@SKIjkqT#mRJq8&oSf@gx%$C8 zwMS$wE-r5K39k8`hc4yquYbhfe4KAw|56vroGE4KYjh=wiO zm!kM-g6mj4xw5FQNn_wQ?GIs{A8z{v5KyIfpPp>|{hPD-Y`!ZgNRsh%d_+u4>?#+R ze{pf~NE%5L>CEZ&6{bR$t+@v%pTfezs(`o~jOXzQ2ry6_G2dAl-mu84Wc8_V#5{|Y zl$M2QO0irWtEMFu-9W9ZtV~Qy_|*I1Ff%eroO+#}9N#c$IFGv9m3RlHflcWHu6*>X zq@>6$@zcxEEQ|948eU#vhF<&TZtm_<*{3;w{`}eW&&)S_SJxngG|6Bda8OiLRq=WqxtUp5ER|~a{f(kHR4*8<4Gj;MNfPsf z+X%$P#j|ybzfT-5}ZtiRgCAuq6xLsmmu^}Z!vJw{aWkG7Jb(V&ZNeAFerM&OVxs7!(Mnf23k$ZI(-U|7 zN*BW1++6-Ii|e-IHQ^F|*e2Tx-PD4%mj!Hw{O1$hi39`$;zXZ6D{epDpHD)bVr*j* z4)5OxdnuO3sD)Vk$ftDNYj{Bzok5Sw#KhQC;k~^*uuQ5Q8oSy8~63&TlHjzt4vp6jNVm%f`VfqMp z9hG?*K6~~REZ%f)WAfm+`e#Mb>$*iG@$vE4%a=u5vhANeBg4YNlEh^sx-4`Bg(jjC zMo&$ddRD^7R8S;fo?kY-tjDSzqDXLvE@x~fMn*=WuE5Nx&$@fBmkN%#NufyeN^Ns` zwbFa=7%$YicxxAkd*>yC#bWyCd^HR9GIVe#z{STW`|)0oij|cWzKV>T91B%oJ1Xp1 z3G0B5gM$)gQ-g_xeKelH}#(g-c9aYgKt&ujC3ZFG*t% zao%h5(E&~185B>P9`ZdabgwWpQIJ#MO%Vy}}-TiAF6;t!`+Nmyx@w7!T zFI!ilCKea*L_|dVp05rSpHuO&F<2j~=De;>YH7uXO;7qQ2wXo$_o5T(dtTwz#zw|= zO-#(bhm*!idylesPHp#eQiaQ^WwXJ@WfBqi;wCnQC> z*R`^BOnAM2e>H!_f)^=vkWqgW7f0=9HKV4cmj9@gNc3o<5qv)UP~U#WQeypE&HQTS zx0+sAnI{zkty$rhZKxrWst5HqK04%f0SR-IfE5~4obU2pxBc7GDq51gdGzyFByFj#8655_rOyD?D~ zR#rwZLGj~la+j4sUx?odjh~&?MlMZGPGUDEpz0a|2>#lPdF~oq7P3b%GBSdzpb>Ui zL1$-kH8nSzAdB<;`xhCW(T6H3BySX0DR~UTK76>Gp}`@#H{thWtg2jf8A34;3`r!6 zh94iXU@*3_LwF&R`OXA%WTc>FEjlBy z?fHFf()frkfgM8j6F#u-sFakHBy+Sv!F!@1n9y9{Q5&^qhg--!4|gcQE-`&nrIKdm z=0ai(U&yrCJT7p6UH&z>OcOx7C7K$V=F3@=8&z`#&o+8jc&x&<+yd1JB>@$lfBArARCS%3F#O#YGO&vWo9+Ek@XY z!}mEkJ`_}xke2ptv|2%DNL-|#m>e+ju730$w%~H>{hOMCwj*~XB^&65&*R`Mx4va& zvb^`Nl(KpsSijbbj);iJWAGM4U49NpTv(olo*r`i=7e>3{!nAr;{#_%kJ=58 zs2pVt`*C7+*11){f^>x=kt#kq8dFYAEe8gW z(~q#&Mm6v6?}HULfqS>=6}xeR)DMTa1vVH714ASljjNa-OaTcLze^92COhHCG8DLP zcgt-;TF_s1{6#ZsVtzh|_5NEFO4w<^$H@Db0v2D&&CM;Q{UQ}rYFU}iscvdFwch9M z5QSp5C%=@zvE*1kLFWbva+|5Zd;|DcaPz}+^dzZ{jlE921~A(U-W~#HC-BpU9cT#hR60Un(yh9?F7|yTeiB zQY9b%u`ZQ zNG@H<^fqwlHun6g%8|diFUD`F*W-H{M$4FQ5gz{Vv1jkY9uwm5-_FxJnStT6YwQne z6ZwsvhU83jpn2K6Vm=lTO)9!_T&fm)%x%cOe1aAn*M%Zdh=%SRg_*> z*of)n2OYn7vc9SzW7{ouZuYc!&-k_!k5y@bN}<9X`1+ zeG!+$HJgpS!e2=eYdbgT@q)Wke_+B)LEx5*xzM|~fYjiJIu*qB1%(niL33_Jjs&6; z>)&6=N%O0%*LOBHo?Bd8#NGm*FuAaxGuti9#B`rkp6uQ4te+i*0l9{q2m8Vacc*eZ zj=W|my(-@Xk8_VoYI)%a(A5S$Xw9qdGYps<6}|B}U2sUC_RtQDy3JaGy!+ zn3a@xuhR6-vDdEn=tw3>shHuk&Cl19= zwoxrT6n)ZaYGm;HC;1)sd8+-v%ht8G6XeyNRH&Z2coS+$3~f7#iLE#mYathkeO6u6 z-TAi>ae^7llEb%NmkTp};rUCmGE)cD=C*k0*8Pp&dD{6Wr7WIx1nV;q&;5i`Gz;1I zwB@9o!(sDgkhXp2ssD3Kwehz*>*vyW4J~|Wv0ftewcTlhB56whvyYU&N({vsXP;=B z)kFCxB}VA*36ree*?TT;h8Olt;^=h*x~-$w(Y0D++fnoD%^jWMzm&1F#GU& zcLqjQaNE%h82Ce!d^B%Gkl>EYPO}e65X>@vBj{#a(FyTLCr?zRD3j$C#NSbVD)*zY zV&7?MXceh%QSkMfzsYOXoF|mb6sef3(;H~mYbeK<(jPLQs0&+Tvbj?S$Vf;aehdedq*Rzs(+9$DExV)}X+ z8Ekk{nPE=8i>dOUE~mqSBz8;Q9sCEXnIq<;s&uA+3s_1Y6gxI*tGm}r*aV12+ddk3 zHy&kBH>rZ=!UFg}m6?2H`XynHdun_FAz7}~M(<;}DD^sKSLwGtXLG+av*>SkO1YMJ zvaULFk#B@FUIYe~BKsww@?1@PKP|V5fKbdkzu@JFq3-xOTWo`o*% z2kRV%DN6whk4x>zD(DXCv+o=jYb0G}iy6Ou*xsyIRdkJBmk#F!8=oBVLjG$x?@aq{ z*5_*}4DQbpR#k6hkFb)eUOz)KeEuY>h*4AEqI3@RnEH?3UUig_HseBO>K83dFIk=_ z84BgL)p~R98smex5Ofhwa_B7%-nNma`M8p4IWM{D_t`1tby=RZyjRrPcz zRH#c*bEZ|*SVY?!HE@>^^2oNU(JYhIQNB+WtwU$@ zu;NNrZ{?LT^O2rS=Pkr+pXif-L5?R?MLcL#N9GgR@Y2N>!J1+jY;jO=r=FPHpXR^o zlaeB~u9|aaxRj87!&OE#lZeL_E$0xg1v5s&kF4Dn{s75_=x$%|;`P62X}E@6RsVgh zg8c_wFJk^ES_>qZtO(NgNq?;EBuX71#PC5WnB7fR$=ep z05ULUs3Th==(0*yq3U3ikFNium1SkXOG+C5SuoJ)WohVjn9G8R!<)}yrFyS|GZe5L zLASNt>qru3aBy%y@?keOAzP@2SNWhMDRY<(^ioh#s!2}?Mz?JbW@YDFJCR%@%V%Aq zaY?FRbcGlUnT+k?uQVh%hpNawf=G}8tAR^MYGg5HcE+?S5}UzcY|fV zd$CtB@!QJ=g>o!SXn61-3VAthS**(FXtJi37RWPKyg5~_Lhuiai;J_2ZEtV)i;N_$ zs;<5PWiJa0%ZP(Wb8|C9b8K>WuOM{3%fieedKfD$Zfi8jg9A(T93RJ^xrK#fR-cye z)ZE%yL{U)@M=TyLZrxC^RRL5>9)J6^e=a&L^rRC*Ow(F*p}ekpo}HcjcxH9gMloJm znuVE`6YFgx-plY9%ph*FiQz}cbEPtOD@~=Oq;3G(bRG)}fdB?8oH<7Tn*rFzGB!9Q zL{dp9!b?GZG6<72>h`9qYaQn#2Fk~KUDPkD6yGcto#DXCF<4$vAqge_ojaq(UN2q< zi;9X`77;lZs& zED>5-TF6#<>)FF%s-1=ESuYEK?KTYcPFv6d0QWL_g8DJ z7H(dP4r((qvx#uQvAepuGzj+fxQqUnkU~4pFhFv%w#w~r$42qJ0O8Z8PoE4GU3=;0 z=P=Vo>a;Rgu(tCOjaKX*OdK10|0)n-5lQ(XONrPxPQQZK9y&wpUc-eosevcA0NjBI zCxUIt#3!}*j!Cp?h4&f$eNxi7NU(}$vC?J&XTdTUjb2KqOr^0ST0M`_+1z6NQ1O-- z-00h2#2?Z$P5UNNkECIJcfyQ%Kc%YiC8m;vDYOyU+=Ku(RqZ`b#~Au3P)K4RIz8b&16ifUiTYz$TN{S89Iv$pYDu&J6DMP#>j4WsbAb-PPny{`3n&;C-GIW z^p-(2t0O8JUmC;mBA7XArNNK?Ghy+fX_5_ckNhT9O+(*gONt(vbA_}QJ&Tnwdv^9% zE+N5!`XRhKlOMk}6nx_97Ac3P%y8NlkHI;jHt_1`6f9hC~!Txhi z3(0U^wu)e&#P$|31uj=|fH{BmLte6@b5F%cbi9@jWxQ6RKIkZnWCe<6W)z=xdlG}X zr;%0|qhrh-6Q^tZ4b1lMGn052&4On~ydEYe{2Y>+phcsmsk+;NB95ij7+*q zBYN$6avz?w&7FYV-Nb1j*WQJ*rIg9n;>b7CxMD%Pjf2V-wZGI;gO*W}$n1zA<-d&B z)>rCi7REoxE^h0=x%p74omvZHdu|gb=~rKeyp!;Z`r+zapdTAN@L<@xW%aUmMN+hY z1ABc%IP_&S>^$Z#+Gi6QC0y#%K+H1|$>^I1d3XQinwLRu>03I*?e3;e7rUCpvzTCO zzqPzYBXs*VBRe||1V9Lrz(TPdSPPF0M)|;lQFldL-8)|VWQcUx@hOgO6>2NG^xDtz z?`YNkGX_^aIs`;G6_6bOKo)+b5xA@lX}I-<2fs^3(w)fy$t0|*wM#~rWY zZF{lP5P09aJJL6NwP$ID`}IQNCC?^XAP1<$y+pNgbOToNNxbfyxGdeU>3VmwY((X>?c|@7MWFidl_3R5uGCo$xA;3;rNU6e`)WJiGkz zL04e^vOpM~i&jf^*{4tE3knL*AtArI)URH|B_NPeR8)*2EA;BOec6;vYRNgG+lh0)7$!=~rthz8lpomh}!M|=- za^vNL54JCF67#IQM zBo0fxnKX7cSy^RJ6HS0wDt!=guq;wnQ21_e^L6*YVECn**q1I{a+?kpeD(Hi;Kz?w zR(==CdHXd1l$@+`w0^-3NI1kOCyECQ8%N3=0nUsB3}L|Sf(UA3ds|vT0sr#l%UAlQ zWkM(fpMLuE>DH2o<{*=!Rc;7crmC7a`#ni^yWuMMpM zC<`z@Fs9k1x{xwHf-I{B#IzkegluGzEf8Ezk__T(Zi*8@;~!(mEVNJM1d zNqj?tq|U={rw#+9c19N|DH~c^WY^Sy8s9|Ikv?QePzp*(VM6)n<_H8Er>k5A20U41 zBQ+Migd=W%ZnH+d!f~zx1GTrmFA!ot-V)i#^>np6;JMf1avbuaT0F zkjPPrF$BWuQA=2TVV77aaJ0ErC{GPQ_QXhL z43s3Zp~t37>hhJ876iiSkR8Z1=&p8m^*CHD1<+O}ZNlZ`==ld>XMA|Ws_7TtEs*U1 zZpTbFeb>XI zs-EAkIL8l7BM6KkQ)qXdQEg?QFsvTzokwl27_pOcB{kEHtPm&~NW6hs7bmACNZK8y zn($z6YnFGwW_=YGhuE}&;LF|;BBrxY-ew95J6ileRu&t;Ghv{H(}!&Vc~0YnYE1-{ z@#lpos?1}b57qnpIv{_L8bmec7Z>LgI%~Q%DQ5Nc{}zB`ng(`3e2n`YnZ>y|Y{0kA zNUzm*0yg-l7;H$%m2$rxr`?a`FrfN}^V(sH0L}w-#(90Y20@*V6C@KT4xHR~YF979 z1X6qLP2yY@_p17JNHLkst{o>H{cFwm1gd0c5uxE_r}}(G##RcY*K$Z}~<4 zGl1J0TU(7lP;jmi+a+E}8JaLCkT%nTY61hH_8SKa9+?0q%_tQ}T^!*$_8bS{o1i!j z0)`?L$ag4lu?6hL!Vv7cv$N9;EU^2FCMUwQ!#?m;%WQ(8adK`hFiSg6OZRc5%POz= z_lrlHtqh=WXoYK0iHQ-;%l(>qL6XUJ-X~iO7cTLg2YZXV^^{Dv=!q}Dm%v?NASisU zBRZq|Y&ZWZe5_pcw_i`E*pdRw?;I>_QMLhO+f!OkdqJOF-oOZCphBlCp)}_@P0cZh}fKtD1c$hY?dWQz2DLT_O z!a_o*tK*DdZ;$T7w=wbQ{lljmh&r*$;#3|Td7Q__Mzkl?H%yxhhS}NJOgAR#-FALI z-b8dNP$8?QE6IdXUSAVtat{dU$%6zBNercXulCJ6yOruUni?q&o=DGl>ZtOnZ)HpJ_d$f8wdy&jI zO`}eM@tFVG$FEv<&C@1|>!exUzo>2DW@AHCBG_BN(!*S+uJS=Lbd{UCzqbF$t-qS| zqz^wrp~zw}H?3eH7R|Q>d0Lp>K5N@|P6>eLZJ{2^E4$cyiu=sDUiVtPp*fa&F~y2z zzrBWDeWQFhrBca;6ZdIpz-t<>T$ggbT)yz5WZ`h6%KVwEi;zCBycX#oo*M-`eEfeQsO$dymXGRpV|n3NP9# z@sqdFuTuuHk3D#Aj`^QDpWY8j)Aa?|rOZGD|Kmo)@V2F8=ng&6!qRM*ZO#YMnbiw8 z#l_vl|ATS4LaM5oNm;Ak(UeRexH+7rcdw%(?`uM60-Uo)NjC%$ir+Vwx7#ciD2GPbIhfQPZK z{CM%{?SQgG1AkihD@xtJcQ~Ld#^|M0DFKr3?Icy>p;S0noc=y*IJPm^G(Mce-F_A* zS>ID3t>O5Weg!2uDZVid4q6g0=vNTcEbrJkI36f9sCqX10k zBUpg9G9v~6pG?5)_!@RTS+G3}*;1A^kOt3Ek+XPc5yaZe?Bf1!bcOm^;^H zi-pmOcrQas7e<8Sk)q;5s1X1O_L$f%88xw_+Bmw0tuBUl->Lrb%sV*4!SVt zEiGf^p`9(8>ihaN`Lk!wghudGneDf~Uq=8E>7>fc+VbRbKm5kZ;-*Iice=~dQkxm7 zIkPM977d08^6)&c(agF{n|pIwD9p#U$~jf`IKx}e)o|x`?kbC z_|3OIqV20eXIrC)NEg$A{{DmW{R+s(9(_Od0s@T3(fKpzbbv@}_#zd%xtykmnsxo7 zRMACMNN?RPVj0YJ5eb+9u)P83#aNDHNmgVxPJ6{$kuGRK^d$6cr{=i6%9)`d3b+4yaIvlC8VLFtKdO?&vJ7p$!#$o7pTytjS5ldF$Xx?2jI=#=MD_ z!%i@MnCEtQ#8^55e9rWCtnr40wL%P@a zPiQ;u@_TXsYi>Vr#+1UmQ?T5OL%p;V{0|!?-ciATu}0L^!CBvPdMBjIs~q#@o=75k z4Pg=b-oxJKJ$gN!ztu~qa}A)pB62Yr+VkvsMD42Mn4*{SJ?A{BDQECVDA#!5^-3^jnQ`b|1muYG_y)=`g%h`hxZe6_C>Oi zgnP|5Pwxb-yd(p(A>{=EUpdS9>CS^2>$&{y;j(8^3 zWeqK%w{ZcaNtY!Qw0uX>NLBA3dQIWt)h7OqJr2r%G~I2h`M@7tfhhsxIJ9qvk56*< z*2={YhJrJG-_5j-R9~?xrjm>4%Oi+m8jowj;b*gsjM3)uR5Hixv2pzQ4^te{Pepv)qB=W<(DL-~hle8JRu*|1x87(cB zs!11C!@$S8{pm?XSH92#vMVEELp>I~_It>Pd~-eFF$Ss-yQukykq*iTHazv&ktS>- zWkQMdes1v0jsBZtj2fqgbl;r!S%E2#8CX+lfo36Y?zT49x?I$=y?8 zlL(%Lwr_NJNB$||io!x}&>bdoC)}##5kEfgI2Ay9pp!d4+$tz&s0hB(-9CU-E@K{- zzeCMEIzvbyJr@S=V`)Af?^N}5RvA_I=J1%8UDPMJ^TO}U?0gqB{d9^-b;!`wcvYm#K1*jcw$u2c(Jn>o-QebG}99 z84Jl>6-+I}K6_sZ1^T~1xa`qp8+Vmz1Lm#b!unYC7s+7cfH2(n3!Ej#>~t%-1KGQM zIxZ4;pR-TT-3~~@{o~RS!d~fckDTCF!#Bz#nX!Q*S%}z4kZ%?7PKa=~v6mAa95a{; z$w-#=FKz^Xq^kD0F^uO?R8+f&d9WfKj7D=A02d~K4?ZMy>Wn$Oi(*FQ_fxPx0VN?I zfSWVAV$Yn8d$TrJ3PEWff<6T?<1_&6M0Yj0{@gFZ-E^!6l+z)*LbgrSCY--(+o$?F zek)nw#(FJ1gPind1i@aj*FK3JZevr?b7>*^fo;b zQq9n$UTKYQrLAiDf2BEq%;Rivo~Fz94-&3m2G|(2F;vonaNYo>YEU3cZNH>TMCp-) zLdB8O`@HJ^0Thm6^9s%}X@5N(t1g{|CwWlS8MvR~4c5w3{o$pW=;;G4g`(L=g7@8} z#Urdyc&b-fJb1rw0_4xMsD&|SBZ9@aXNJ$Dj(7lK*7Ec4i6h2E6feB7v3y*hh1a-Kun!J=b%<9>4&9a;}~qkiXtvP zhpO5vE{K(#1Najt3O?tqzgB)YM+QK2G-lf=eax5U1cdxN zwn&O>UWP{K@A1UF<%)_qw~Eq882%-dt1Mh}bmz6*DLT{Vr+zSy|BLzTU8pWx=HedX zV#u@IY|E|6jj-(aArS!Zx>mxFF7mq{)04R$ihYTO`MVE+YHF!Z&;;a-fulGVUtsr7t6Q#ToECM z`E`+p|2$xp-GWPX{B&SAQR@c7M# z4<5=gZ|-r$JBzQY^Q1JR%!O=e+V%nowCM43(4P9XYi#!ukJzlO1_}r5y^Z`fuOl#h<&o z)2g$PYBEl~AQB6!0o}r$8+(!cSFp*i8j()6zEPvqAIa~|e0H0mi2Y_jrkeV1^fkw+ z>Bs0O9T0+=+~g!AKE+l8rod+JEoJ2)O#-o9*tOT_ljJhR z(6(@P)h`r~L`O`H>V7vMdsv1Q=_-ByvEC;$im3#H$Ftl)8QmHG-vq8|riMLWta0d! z9hhMB)?)b+i^qPYL9Ub6biTL}Sz1+zf8j(;yie73*zou}U_!1sdgFgd=ki(fNNe0L z`1Vl8bAb;vu?mUV_w@TJ=MV-9A}%KC#I8N*A4)Gpwl2f?G3c)+CXA)!|9C(e!01_ixE7S=;gk+Q#bbV_GH<%e%WBw)N{$N zd=Nji+)IeioKODm$zP8BRW&xT$ZHHY|5USAG<@sVg(y?7)c@sL;`%rxCy{eBt+?OvZJCXy{pVF9U0%Ti4(eRXcXGOt z9YeO0^q|$XppW>ujC66Uv7DL74%;V_`y>4K$PfZ+MMKueK|+HE@k95|obi6+rB4=A zpa+SX1u4weEAh{p<0}7NL>a#r?_Zc)W`ZyiXt8bMx0dq(p(}5{SC&k_xu4~W?!fn> ziwYo-GzNAPsbEpAIMOH1X8vt&u2PUekU&@(Iqva>*jn~>1F|R?4NFA?XXngEpnxvo%VR9t-IzBl2xlynQM z5#LQaYw_?;dRQAyYLB|n+^<*?U$VAO^fuA7L$G4R7;iRg1nm^^IwG(}7H0QX_^t0m z;Rl@e5Xz1N?UcZfuh8DTQcyn#*URka$;51^FhY1xRY*2Rg{H9zFU%^N1ZO2Mq7V=g{))ctr zsNi95HaaWs`{z+e?lVHy2W)yTFa6q#FJAKOIlEJ<9{EpJA&^eBZ6lMO51@v$Z3qrZ zv8IcdxWqNK^hydgC4a^_WHaR)k%^uwZ6k%OC+uT*c`dfUqXid6o$@r57##%Cmk^ikPCbK(QqBq*=&uWVK>qi z;bK5iM8HJ!O!L0!kwoE5+u<{j7aeR+te|U&fWZIGUb$HzY`Kl?V&A&rJ3BorVma%k6MuZXwnN9vo zm>-9fc8c-*hN2yV;=(kNda+3*zuK4_w0*KZ=>w`+#fPUj0B6l`hqiq}mKFUb8&h%r zi1^}LOF(%z=ZF`tA~&@vnFpLIt>7VvS=#I$rH5xJC?5tm8S2^{YCw9P%-cp%D7ys3 zM2ja%Yd(bvqKj?Y2V6%GkC}0&D5pCBBZP9YqwkN<-+N~Dim+Yq#^ZM1p9bk>JWaC@=`#*?GB=d&-!cvyYm*Bd&9EC)Z*8R?l<^PWBuiZ1Y<^=1&@3`+Z0~J?2*Yw# z7&BoYd|#sZY^-rhY=PTy>&>fu_!pL@e8YRBx<`CnYsvb8#&X`9EN^-KZ23X7_|7!! zaNE`6vL`91FwGR-DwA2*AHMo`n44HwxaxXanxDzPZf<&DqjRmWjr(dqxSH<({jv|B zTz8{c%G;U0y6rccyLlcyMO+DL;b#Ns`h|6XXy1L;zZB&F0x8%b&WgCcaqXq~X5&~* zY~$FdqFwvCL*?^-*zi)Y+5GGJ%QUKrw=R4oP+*j=Yg;pPo2)O{cxu7!`dpgdRFUXQ zU*;g_!y2IJ4>&$xJD`1WkZ#y+tm+cbRY8zIgRCvzY9M#W`fb}MirA+G^V@`BhID@H z;vyk~GsF4CTBd6sclIU&4lgaC&92Ck$zr=R3V&sgb+W@_^jSB}o2+LIA}PN{@1Skx zI3n`(D)cc7mpL%EL^4EbW@#~k>ajhRrxjY(9Ds(q1z)Uo$SfD}(Qdp(r|1bOz5B`y zkF6hqI#q5rAoc!P?l@0l^IC4DO#@b9LTBpw<(jzQ1n;rCN+#D(b)a%`xKq7peJTK* z37n2G(i)6wOEw-CJxKv&TbSQ3A2PW&K#6NJ9Re^nQjqH~s+#|O!65*nxZ}Rl6hgrX z?Gy<_(COhe89=oG*{hbNIH=)f=I5td8N7SDmPacE5j~t`-xnI2LdKT~TI~7jitoDH z`*w({hq$y3Z7$UsH?CL1k>1l^(uzhU05kf{{}zBy}m z_0{ZC@INHA7H(y)MGlG_0@Ui2-I&y=08lc2^m1d}#2mCqgowIwm$TIVW+^p@l>;{>66FBL_T=_xXp^@+C>>Qw$DgEL_Mg7tCwiC46Xq1NmpQpNtaTnOnWS`dIZ5d~izIY(;Xi+V>Ea0r45TT-@d1UA=5QiH^jfwqs23n5Ca($Ri|qwp z!|E3%`+sKtQ>G97Jq6H7gF^UiwAb;0$+u`$YEd^{=sfa)O5hPRjq#6JG#k7obRY+P zCu!8R%r1Zy=_` zW_N@JwOd{vuN4R4-ebT;US1w_BEZ1rAv%#A7+Quz^;W0IV2x+xhURu zy}RSy-Z2A%6qJ-mL%Kbfy`SGXdha6#&}M+T76d0Be^o%6;J*|sjo-fEy=HqLDIwu| z;`u)wM*wP+)Lhzjs9M{3YA--z9IV~>^XH-cSj>l{;bQ{O0sK~!2x$(#y$e!!=s*o3 zX2>n-?CsS+8ry*b1tHWzZzefN9jt!N{iCvZFJRLE>Jma89-fRo%L|uU=Xmk$!vBo1qFVpRFS6Rp%JbBl7&rAF!JWp7yXI3$+08 z1RC4@VUXIDo!zN&ja^;Dz;jDN6)$$UauWg~a+m=&%42^v_WylK!HWPAHxM*9Ow?f@ zvIo#E3|6}Gfc5yn%yH?JbbLHcmI^|I1RaTYXhG%_`Dh ze8YXm+5=h=<3wD!p$8F#I`H=Wqn-<@$HuTrH+gM_ZozVDl;>7eiHdI2ofC53;X*pW z)y;{@NP>fdvDdX%y2h;*2J(_nSKe|xOi9w#*XM#PL>g2wGV=1lB$8;5 zpkUlZ?ikO7%&DQiUIOqR?T0uZ#03?S<)=i?o!=rgwTSquZmUb&j`uWx$}nS7n89rV z1349iCj9eT&66%zkdGQ1$in*V!|5n|{v==(rS(TYf2tr|wnn9vefXffX&$R~Pwum1 z^sjq+U60qSjRcqRReXGA(tmM?0rnvd3Q(!?`Qx*O)s2mf%93G_QDL4t2lKe0u-@k~ zNCWCc&`(L74z;vk-?Zqc1NAMm*6Pjjfj$wACgDZQ2DV(EJe{-#qL2@Nlaw@0H?FLt z6eyXT(VJ^CT++wA)n&Bu;JpBridV?GAuBt3Bj~H3{TF+SQ|S>KJ3DR1?(S}0Q3Pmt zlRth$!IR-ggMXc8a1PEC@q7B~lRQdgaofpr%_yMK(||+2#-m(zy7Ln9AkaKvZ$XH} zLG_I_>ycV6S5Pv2p)-6W*L&~x&`|%ra8{+d+R^kJwmQI_C=_&S-(pjYQ!#%JGfGos zi$2b3&%5s|*HNTvlVIbARwO_emT#@CIFM~3ODtC#R!&gFQGDmlJb3Zr!(C=U!GVd4 zQOI|<*H1^AL#a4(ra^&*H0MCq3JL{p2CyP1YO^#S6su~dvkwW|FZ-+~htJy=M^-g;zedLcLJzPx-rSpCLEBe7Jb1~kQ(&a_2=KJI=@G7B?v z;{uIU3e&B=&%`IT@Oz=>e^@~Kq_tQb!i=VHz5LG!2BBxCNOb=19%7I&xdA#zP2_k2 zC=i-JH|DV1fBo2=okGsu(tYNCa%@34$j6N9d@$k!?dTzzBG^3wl!~~lAjcG?nW#fN z4g)mbZ&C?`gwz9p^_oL&qyZfy+enWKr~;+6v?!tY$oD?=GBY>tvt(Fj0Zk+v^?@8S z16?d|`U}0sh6MQPk&M5{5i#SQyL50SPQbU<>_{wo`0(K#oI+Dz*~bAgrxfr6h}%bm zhA}Z%R8&;fgZUWd=H{BYJz#0bGohswY%M69R**?dj2@1wDW9HbWANN2U8_CyLQZag zg${x+1TU2DIIq;y+Dcn&B<&PwR>d5PHdDEAOI}u1HfHOTTU^|zos$z#TU(2C zx^og{VcbO?g*0F_jE@^Z2=4}G#Sug;udGak9Bu$bQ^U}ZE~LX~-*Y%2u(%7u8W)Ja zKHswNGtZ(qNM#c{-d69faJEX;;2;d7+uYnlTHbS?#d{OEGWnB~+K!SHTKph`^Gz5N>h@K_MH{nBcV3E01_)|G+@}V%_V=sR&TQ zSr&DrD<@Hcwvq~DfzVnP>f?jjL}Vp>Jkn0H`z9ugpsZONYymdM*yj;Q`vK#W?puOh zHbf;DaxWdRd8DRqz$lpip(7kZhr3-z!CY0%> z&cbxLMzut6!i|{5JIIS^DE_-h8!uq0bGQ1POnxOCyOm&&@c_x5BmR9n0wkRIh5T&%HVXif( z;4B1a;YL(>fYU+ekBmdS(y1MweL&_^q$?i%HQMYN@Xm?cO(cy9dQB7Q{P#`eh3fw; z3E-;+vg^^dv(pN)VgSY2vobTtm;Q;0(hke6BAbbQGRqx>=2GM_ify4-f#!sXwKZGb zE}+by?hFVBK!mzQ9Vs%Q`Sz0y2i)Q4jyMub?LvI}nLpQ?SjS~O6a=`Cr_?IST^T{)Ws8*kT7Tc z0eRG=b{_7=w`Ep)NB79MAI~h$R|4#>BIhdFTX0+Q=v}>%c{Dpc8f|8>&2IXS^zD;( z0;s0<_T0eTeFQUrwi8K9OOBeoDFRd-VoK2TG7+O~SU>aa^(G`ik9s~o7%X>ujC4aj z49h+uL$q!lf8D>HEa;QdvSK?wMF`KI?Oj+3Sdnd{q?*kO#;!+u4i~6-@19mn>6Rfv zbSM)L9WtDu^Tm4{><&&TYeEhpfJ3TCKrXwtQrIQbi${8??HX6}5-7|-L~}fv1v<|- zrz9R(r`?x#B0G?lX_gM-D{SGi=b)E$=D)Re=FwQLdmn$IOl5eCA@h`!XrL&=Lr5}| zB$+l!DKw#-c`F`c8yOnRhlE0fcBVoiyO1GeC@M(^nIgR3OZ%MjuJxYtzUPm3ueH}| zE!@v@U-x}qzw7#azu(XO;lrr_2g#NDv)Xz)9S;_sd%L1baqSAqx$$!~;fzZL1~$&7scnjoCLtIkVTwJm!E{#25s;^`fJt;YWryfMXi2a~ zFdEVbg;5XjqmiC-@aF!rO79yi(VwYmw+e+wTT36YCEgjQXtELR8QSj4_%n6QQStnm zJ>vDpzCM-+hkKnYo7%R+nKmaw{W_$vQX*hsncK{%ld8!VWF&K(u{1Qlc}^`5o$jDX z{TK+OI=-W|jw0J5SgB@=*y5(EkwK07x=_>3bL)3$`<_&e#50v*%)+uiX$t0Yi1{T> zK4%?Lk0lBMJP)heZ)c*Jj;#fbey$NeKR=SY?T62Z?8!XdnGf|NEMEo`(I3{0B(cKo zSXhgW`q_vM!ljUx)WHL*1hIKxmOi+d$Lw|-+yT9v+lDj`iw-WR2c|{Q4UEaRR!~cN zwCK7W&*JBT!hP>r#&({n)YK87M&H1Q)E%R-l$C#3UtD7oglK)NTxo{8y2iISx&bj= zINfgx$$wW_Da%0V5q=$~YRE;sN+^n(o+Y=aj*g6+r3AcU@_JNfuQ44*uQP`(VHTP6 zFg}PLIYtQx_4QGn3~V3g$E3_e24C;xnJ}fva81mOI+dPo=Uq=RLyM7IQYg|(pW&Ij z(zr<6Xv1B@V`H)1GeZX@C3$L0j!Hh1N-`FAaNGedL(`uxm==?oc%ocRK&FsPUxyEE z7AY^8-Z7OVFP7`}e8&EK=@ya~mCPXl+5;yVKMPxW5RrXe?xAGIQ=)&-2aL%VG7P=? zJ@MXmn8GG1ZYyue&P3>(+T2Y}LuOZ|6|oz^dDhfYBHw?J87dpf9EBWme0Hj3TSELw z_%ZX&-&N21m3cii`p~reO*hivVmd;R!04T~Ye%bz0h-=_v zPDOMV;woBs_%td-yd;~q(O)E_Z<7cUyr`dMiKv9iM>4ODl>~HIR zetg?Sc~lwV6bX8Iwf+c*F~zl+G>K&m zwacrtT{l49(lytQ;P52KEhi`Ge&C(Jw8OGj5;P!H_ntgYL8Jh>9-P+E#2$@s12?;MXs}Y5(flB$tB_Udx6|}6``tI)e zs7`qCgY`0$6X^pzED}oYoyWUSM3StSIuAj50G?(d7WoI|Biv?u#ZPZKn9zh!d`T|*nW6mB zi9U^B^8^ME+oZUt2f!pqZP;-8*%1Z?1_I7R8lDlrFFvF!k=zS{hH{`Z3B6`a?*lil zT;X+mCY|mNuRC4U^U9+3WS^ElCU_D=m(1a`m@E&(o*V1hLA&YCEzBZn=N=8V2%9V5qCNry}mMmuJ!n3UNEbZ@F>ls-;9|@CgeF5)Nl7;!^`{xdQPU?V=FyK+1)c zLTH~P6@v~J1EuVH|8=qvdQ#I?#jai3ELqdI zB6fBLDmNbP%?NuWDm(rj9UTb7BO@F~AUIYf@ARHfH3#mFkL}4K$8tm~g;CvgKrcYF zDGa8ZHQUcD$5t+{v|K#hP-E^R8xb;-BDuvP*e=o}U}hqR2E6uzVxQYT*hGxDRY@xU zSAmyj$2#;Y({*#JOQjy#pUk&ddb*38j5zuZwCeI_A$JW6Xa=Gsixky}&-JIpL+L_9 z@?Lqb?^8I@-cfYRRPQQChu*aR`;WL0D2QX;$VllCdL#IJNoWVRfLIe~yz$4V5Y4QT z9{sLmvWj(rq`f6w>@h9yJB%fteGn6Uxu0VlOwqAjAGxT}t1wfpe-ypvDlr*S3T8s@ zAj=@-4t$z4`4XFN%CCD;sgi<%EclX*k>OecKqu7tOT+|G4y%iGqdZL^K^9n~pryk_ z;BbF-n6Mt7QJfNzW+en8&vhAMvPe`D$Wfiqk$yfYAO@D^ z2w>PUcog9yl7Ef>p#SgGqt2Ftk*Y&h75JWAFZ!AzZI0RZhRb@G9T5I zzG-6gco<(9V?3Qc5*XLDf@`YrgqoJjVQ!NwXyq*jXCN51!&d*x2g(3mLpgY$lg>e-CEY(4Xa zh_8nbF<)exKabgAV~M@{mlqW=xNGzXW_VwKPXG&5an$AuF?Zyvs>HYpzL*P9AjgVm zz_tre)TV=oZ^wg>kR5g81Ha?*-iDdM>0AJrXt#b>`mSMDHess1rLgxh;CF2)R*@GU z{zrU)T!&DjIHcVXd0Cw9PI=%z)<`ZStRv00<2L&xdT92ukJOn$%9?yJ@OH~z_4U88 z1;E}U!NF^)?zXq)qowqf+`6`rpxEV|Q>!b!_Y1EQR#^p5&;Uf(q@m?zItnd=r0DnU zzdJS;cbFJ!0T`P6%KWSP`zL`517h!Lp9Z0F21Q@ws!CizqtTj>e9W^fWT2qMNvfrQ z?fGT-6|*jxQ%L4&Bk}3?V@!X(4836~w8v1abIuUs>fdv6qf=9)Sq8&wq#dY^tP4TZSCpYcwf6**JUt2kZlu8L~w$p8aY3Z^^W6X@)#pN z27q;{v5xVn_zD}?Ck zQt}s)uqpB7$W>#OlRJ`Mrglny%PO`If7HE9;`Vh3lpNnT55}YIF@-ApA4>Lpml7U6 zz9U>r3Q-+xa@aQ97!9!6aJZ9kGk1ht!Bl6?lTGXo?56LQzj1AtiG2NfEtqfYpJY{t zQxSo=C>@-K>=Wtyev4b?(7D(j?u7|@GxL4dCRdatYMV^PNSMdJ6`e6d(X%}x)Exe4 zKXpVmhVslk8Ki+)U_mheTek3`iGts3YV0g=7o+LuoG9xK3;(bc(*H;DAy>=8e(cY< z&U7gnasBs%Os(eMTQM_kkSX zu<+wM>Hi@~om1Osw0k#oZROSIpV)!Enpt%f>JDUZaR9$pI?cLok;~55Nsyr~~j<%qn~**VFJ2d{T% zC<3~b_{NQ2qZ&y6@a1$h8Y7)re6J$^BNq@;&V4_@7EYpDPk0>6ZDY4|?IvO1;8lVQ zCMG6e4!||^*5fMry6K2b<7PhAS-hb=-_Gu(L`E9OFHlOb3cJ&wOA(FggT-P1qYGCXax0Fq*@vr#^Z# zbofyySXu_MCcHC?3oqVwFExmzgz>sU9a4LL`96 zZ+P1RG;;2@cniQz3rl|_)ukwO5EgVa-KR&o;RuMncFk(|3|eA58!KiU7|ks;L06Lj zD)^TF?KYQ7z@@;`WfnJ#oxuj2;z^~A($cz6{xWR8Q1u3Tp_Ok>aPTncfSvLMm zfk-7x-)bWSrJyW+DZCqeAl8onnJMCc+INd@t?$KX111l;T#>6$@4YlLvBF$H?>MZj zt#f=evDwSa%!EeP(s<%+#ZDMtE`duIytdhC&5rY2xZm`(v?$5mBio^X?iq;@GRMfH zHM^=p0DC_!Zf=L-w=0#El`;F_y#qTZzExFKrJAlK0=T;39CGQ^ZEbuo0O)~w%Erbv zSlXp#1`>Bihh|E>TgpVmq%?&v^<3P5#lbvA~iNRg7~O;M&1! zUpZO!O;?p;Cqu*5XtYU(vls0B*wPznaj~(C6t6#7vPS3a+w2c-VuZ(m*m)v2HD*%Z zs9;tuC@j2njy)dA$RWEu_IdiEzb=>3iwMB{qG(qU0RgeESzm0Ik1`)usy}EA7rTB2;ofUBE)PUC-O6V5VxY-J!7|tqwPyUO{ z_uXR$;1h8TfK?o7eI=4@qY0hg9*FmWH$@7#p-~XTQsI>_Ffa&6NI0l-Wkcy;g#b2H zc-6%(Cr&8rKf<1iT_kz-h}8*LXdeXq2jGptqZkHzK!Z)s&X&*TP^}*MPIrgEjdZZ{ z1+Rwi`f++UkT+Hzcir$$S@^CU2ZjkbUGl7>P2iMMHqx5K8KLYsWVvHu<~<=dk*7|~ z{|A-4!vk;vOD}aZ=D||2KX>^~SHng?6%~yF%l6%^o5@uIc0NSO=!M$j@j$@I1vT3!{;jgEi0qPtQ&qhpwkRvTx*>F>Qq!jCi z=UCw!$h$Y$lRG;U5|-};`v%cOQc?i2NKH+(x_z&r^}ZMmwa#X(orOg(aO)Wrl{z#U z<%-w}_m-^4=1CQ49BFObGefW9x1XKg0AG|n)t{;|KWft6-cE?V7!GqWnQMQ;B}2s! ze%}0WaAL4+C1jM`t|MLdK$H3GE?wFgeQf{C$$~ojPr!(%s+I^9?cKZgNdV~8Yt}^I zcF_hzLi>&K6kkKt`n4pLxr906)_P zBR)MNgXo-;k2h_|KOvq1Ac;3HARUCNK0yNc&ihi9)#mKg{1N+;xTtijDfL2L>wSEP zh0u$tu1bMHLvUOU`7oL5vjmc5E-)+Au;}!D`sDTMh*96Yw8w5OO4Cj&~Wfw6+1Z&OGRHop_=lmB#Rdq5nWg30r;4qicuI7^sN4%ci8!=sbPgzimP40u%cz|3q^n~9IF#L=qoGnQPJ`7;NtXM zbxx*Y6OdCek2Q)cgwPshF(2_4$vYL$`r8NvnIJ7cyt$ zopz9qsi|qIyJJ`id!$+igluC<_L&?|ci_~)aG&g=n`Lb z?K&D;=C7Ik-#SwqP%^P1#>`)XWypHCGRTX5Ftbz9d1~>Wxvtd9nvq90lOmw0%22?Q~*b7Ld8$=t?kUGVVCC-cI$U@~x_Z0FuLGBy1j1%-+c zQJINQI#@eZ-S;GX7wlLU@yZfZ3G$YsJ`7C!%^9_QHX1W?uU_$L3QJ2|%x;wW4P6gJ zMb?ksvBaa6Uc1CB{$F41gzxkpz88wV$Q-#V;r*jbhCICHin4o`vF;t3{cryTD28qjmTpj_ySuwY8l*wGyW>hpcV6P1i{JO&?~OO! z9}bjrcAUM}UTe;|);2^z?h`Ts9s&piLY9;eRRV!t5Q9K48L#1h-&Bvh%>sUKJBevH zDchMkxf(c_fMgAv?5*vbtSt=RyO=mQTG-jLGO{zW(!V!%aIN=gWiZ5Z&lupsg0Pv+-I1q+MI4t^(3M*w>H|Idq} zGYHzvvef_Y!*u72Wq^57mtMdH*z_?vJ}v^i!cDMopl8fpU$MkJf1kbo_e~4|jh3kR z+O&lRu^@PsQf&sFUNKL5Y=;K{oUCL{H?(Rk-UkY`kgOoAeds{*6UR=9-h?o zb^XOUt3GgdPzJj$FxlL3NeMZFj&jkO4%qkXhm^3U6tU6ie{y2n1G_sviHpNs?#&pT zZ4O`^KV3}8-|m%FCi6JcYB$(YkduE$A>^??-HOnOjdjGjas~Y5 zFLGiebt-|b%kFf^y9^%ZYPYOhYMEr_udZv}Nkse}pp%U#L8W?YO)PqikCnQu2pn3_ z2}Lis95`g+Mx|rCr#Pfsiu_F3<{){EJnWyYb?9&~&!VKPY%%xa^OX%43}&;M4IfG4 zmAepqgDT)5uiF#P>`a&oe|T`q%XYnbE>@FheAQ%RwV z&QuRD6;Z`W7RFQ>Z+vu2TVIPF>_I!i(c7ErHD7LrO^K`PzDPnc8k&&)ehFHY;*Mw% zk(zyv{hAA;RM{V4qJm0HD>Z=k=jtJQUQ!)s!a_8x~A-haCu2bI}W`Se0?-H zW)e_)x~j_!HHKFS@{c0qbr~;|O*=RaLdJ*ds2LL&LKVF4NV98`1T2E7?S@pnQn$cl zD3P3;91UZWH-rs(3@Laj0&fN>7Q8NEdpf#*Yu(x?T8tgppG^F6w!-M~_z*6h2ku4{ z+yws6k9R;fESFYrt3j-UzqzmMGjzfAHq3!WR%49M=LBqeHE73gAb@1=h z+nSc$&t5zf+t!U+u%pzOG133?0(>37fGTci%6A;YqZ`zrhYJ~tx}Twkes7Duy-uxD z31g{7`q@#qMVH*q;wk$`ST4$jxfA1s*HFkBk$vV6H;XgD1{!pQamNxXX-NEgnY-?m zBp`sk8QN8VU+R5ym+*Bl)ZY)b9NZkWT~cnLE&>6Uy@_CTIq~zq zq0ki5h9-&)-{f}tpE-%KPw1!9oQi6@{Kxf(+A#KwzEdx+H{m4Ytd^FIx`0HdLtSUr z2*TvWkXB^iZ1&FTkXdl6Ln4B@GL1XQ*PB0DvjsW^Qw#g)>i3e!3_ZIiAJS2nhN}8K zaax@OwJLfK9|Zm_>~e9oYci!yd}imSw$zkQM1_V0n1>(LF`NHhP(1oW8{ zr*+Y?1%19EXZ9kx`S@mx6%H zFIxxKF_=%LIaq%&Ye)STk2n=Pzhu>JzmA(F5DNOb>PvOf`zz(U z8QwV`hiduI=C=HadXGnTRf}l#>4{hnEULZTe_K*js}20=@8Q2X!|i2AlbD)L+6tHTeJ;!f5$0CAd27*Mx6(suERuwvRL(1!<5`hTP z@&RJkyu;+O50^#mC42p_4>PY(MbyoUye;E~+Z@Fnfc&7PVt7B;ZK;Q@h!TIp6ZdZ- zguq0o1ZVsvq_21A=3ZobEpit=38Vs>8&@!M+VdcPE?e_Sfh`d~W+zjK<<;9mD=`*V z2#wN|trnrQaj+m+^DHIpaMiZyDEaegH}FqIUvR~ybmZ=?(n0i#1`*CYNt)gf=h3*6 zWgwEgZCO6IaY?K+IsX!uWf`niR@HdV3VagWu#_mQsS!s2juFw1jjNQ%<$qRdt)NVr zmIcH#pJP24Ew;$)rG|Mz)qo@-pP=}klWI|%6hR!7I+J_hVh}AwBn$Nj+_Xr|biz}o zb(nuf#?e&z)UyGst7O;QUkq3}I!*(HDDd({!{@ho`n=rJO1e^_KLw#{qp(y;q-j_s z>QaXZoLp-KZBl$(bbV*Y-LeM-#mPc-2+(|o*fW$NOTGc$4aBEVfdTG>`HSq1{Z+=D zrpe4=($<*8V9P-5Kw6cXXYOB+|CwW9O+En@QiDZA?k!D<^7w7yYd54UIdo%uOAgxZ z5tJEd55^0Aj=RH?1{|=(RW|gHyLtPoE-xrDkeq>?dzw0aZ6XW6UkXm?DnIiy|9Uk!FviHEPsH;m!mmcG(=Y)_I-%62r{@Wtm0> zTid>Mfa9|K7PWz0$!f4g(k#t>wEEw{g1xGn)}$jQPkz6%zhPD>e;tM`A`!gjm=>#0 zQ71JAGUhUKBneU_X_BXjwAk-7pGP4uE~^wz2&2MvEDD+5RS#5rjQNCcp;KRK%|YCn zzeljDv}H*8FO z`!;&M;YPdhIzdoPNT;ciw}Tds%Kk^6+2WYeXeo3dHT#d3e3>oit>Q!L zwofG4I`P`31P5CIZpsN^A^+kg3Uk7o?r39zw>IG01=?!NhBj}g=CyJdm6y3Tvg;wG z4gc{=11h>{tnI>93Pt-0ol5RDJ#=R7Mh{USujMOumsbe}J0Wxn*Zar_ zX;Hd{29vnJLVaId+@=!{H+|+7ya4c){lu8MQmBaJb+;o51eWS5Lw?h_{nyJ3-E|8d zD~NF)S0w+vjE^3xY|d@V4mf_6=7*Hu1#G;nNk~ucSNAUP%)UHHMAS3PT4JS8EHXJh!JXLywnoFJz_^|8S3-s_Qf7DjpvHlpc zkcbEhF0SOk1Ugoa3v-Mf*EM4(42i0HsT^UrK4tQbCNW`P8^QUeNrU+06@1~En1EG@IbKw zMfu^{Bo)oqn>f$LBjWy($jTh^TGJ*1#JJI7p7sWtt8FLlJbQDGisK-DSh~)n7tN zPV7F`=J(Mnxl$H2a=C>gtL2wi>|23uOQv>5n-}XF6%DJo04$v@R)qES^~GZ|hvMPC zQ${d?l{NaicPZSClv%!y+^lA!wQl#pZ^j zbDkpSRZ=VVPa;u_-jUU1$vmzmBnnE(z=lolB(nxl0zjCH9zaa-bF0~FsacXVy0I=yG9=|P`K6`?9{x(^9G-O4pMI># z$#b>(Jn(2$+4F&&w5otQ4C`ar@IY@S&ScJDHAG%Fq@6Q9#YC4@xbk|2r8_*sa@tWK4TDw%ge^ zr{_BYn4|kvf;yM)BOkymHwuz;{{W=&KMfg8lKJ5e7weW$5Dc$<1@bdr@PzOq4DWIi zzoOuV()X6mCzpGXjH^>oJ^rzs>uZxq?^a_YE_{L}pP^N#-w|beGeAS$`Uo?jKITYS z%Qlt9ht%I|Kl;yu#rOm4a^^s`rFu&pB;dV{`{4l=W&vkZGd=Shn71$n-0Yx!17Y*9 z@=#k5fJ@26)J+sC;D?El9E}>&6X@Gi)U*|z!@vnQFpk$xx2%QUFNIzxE2l<$O%PvD zxWzeNXzv-R&}gfkb%y+z$XV<&84#cV_5!8`{ro_~YH){I@KpI8*<-ec!&qA;edav* z%BRXzU;uSM`Z>}Hh}bFI=${JEn^;kQ1Uz#|o8?=O-ttpzkK^Vt&gkmx$1@T7m}dw! zHvdd>L(>DT?{O<~?eoPvuSC`Bnpx6Y&$otA-?~1F8eL|a!&ChggFSIh+>LMi;zR<= z7I8*rZ>I$+nPR^y179xC9baS+Qc~-pSqBL=SIDYQke+N)iyuM2(%w;qw6sb5n^@jI zIFVnt!ZtJ&+>|PH?Y$35o@--T>k)r(#Ti)RBJd8mcOkaWb(AXHctH&CpJM>Q3g1&h z?4Zxu*s5p;7vX4NFGcGzU8}5QbuZpJ_}C5p?`ak}g@c}XG-vZf?W+9=C5cf;2FdB@;ASIWN&>p}!NO%mI&1*aFIft>lFOxefcVSpK%bgp zh@F;-U;baNt|*8MPOd7h^z~P>-f+LCJ!* zi*^-a((~Sn#<<+J_pTC!SqgkC)ccqTv8gx2W+Z_kq~KgSh6wa0ZG;XxXQEp7;>EcyJl^|4@rP`e9dvuBhbBkKCMxNQ1Zei}ACj5<<@fB+)E z@XytZyg-dcAMTQ{Qvg%~bRhozy!}goa>wWv*mYqNE*MmR*7dIbYDaH ztfXl@{JQ}3pqU=3&g?_`2soj|xzJeo9$m)kgM`^^v-8^B_*b!uSNa`|Go~=6|A)Jr z)m(z0_9d2DL6LB>?#IVU#Jxq|84j=`ucuNZ+)A3MM0}QFZk?+)N;)6beYN&WGDsv1<|fZYTN)J>Xc0#a zR(TC}cqO|xJhs8`Io?6ZZwOaYH1MVVA!{X^ksld!oXK((2$?pG&psSg_fV{xe1unq zNMj<@rjx2UVPWd8XZnvfW^@%iW|`olsX#HnB6k99##*gF$$HJm<_Av)z)%m*lZ#Kq zeXR|#EcU*?!3+ZSLCFOz_v#zqS}_2HiJW%%-i@Y{o&b=h&(rnp=OI4>d)+H>R8ccBb6aHl2(z-79%kSe9?f)7L0;Q%6?e5@*@$D%z%HNk=MKc zbRo%KnI0_9RILt&neHv}xbu_8JqB!ZZt0o)z?lm%6RUP6)LOd=rbj<&@QzN!IZaIAc?(wLl=q5u+Y|M(p^uSYaa|4kaljrbEh5?+#XK zD(Z2p;k>jtK5g0>h8;ML4RzvY@B$RVn$s8@ynKS+*)vtZex>!{V^F`1ZFs5P7z8yn z9op;K#yp7SyhjT=_qhb{zP651)s@%eW$nr2NCs?2!1lExOaAwjje@}*N8@b~VABdM zPM2BHD1`5!Ceh^&Tdvj&H0Li23$u_c`1Ifz449hfxWf2LO)yF5iyBjy=*Rwdr9`Bv zjG1Jtz~(jdbHi2{>d_*`jyiB}V6EBkjJl>pE{zoR-`)UV9avYz=CPElFlnK8#sNXr zOK_heKUNtoFSRpG#5I&iabRVVJa;o;!Jfa;*b^?>*OdNc=sW33zH-|^&jK8Vu*{i7Twh*L)4#tu?3+L7mB`t3+M@3-V;&PlS(F#neW6CJ55s5O5} z$m6tr+DB}*^2>SIag4v^Y>@8acx~+&lpC9xn)+sCW*!{?;`VZfGBBsM#^gjVna*G# z#WVIL(yUa(5UV*L-LTPqOyBhScn84b0A@BAjHf}u=Lm0D^GQ2gYB*oCs(g!v2Gx|) zc5bSS`p!DxiMc#+gkN(y8e=ce;Wp?f1?XE0P?65-%@{F^)b|%{3+q36HGY9DRrT+U zsIi;H$9O}~nha1R|IUv9ZNOzUMFJt?vAyT!PAO3?(n1mt569{JY0zEWF8`da>0RhX z7j8@@YwjPNcUs=P#h6X>`mx%tbm>;2OXCF)KC3C;_b<3+b`1t;vZo}h?z=hwsO;p# zR%bc$p}E3zB=!Cga*P#)iiAYY%sjr(4S2c~pf`?)2$dxHFIx!odMJQf-<|3$4g6q| z?d9J1O=%Pt6p!`eOt}YW)ODvIM#LC>b zSkDs+!y@2w8$YNQ2|`hacRTG8UIooi>8Dd-U1zuS*RT0%mF< zvFScPS(|zIq@+?ed`G8;fweL}HcH*{u!SHvg?|0*FvG1$iZ^L{Hgac#MNC$4+8Q?H zAC)5#_~tGKWoSO_tPauowY@VqLO)l_M1rAp$;!@tRYs zCGt0|-n@vAuYRCL`>%++F7K+nS$lrykX|)p1wR~8(Tb^Wr4>e1w`lRfRgC_@A^Y|) zl4x+CTFIr9OSeq6L$40?uuxyF8s(fHuicmkOoLK3mD%^v3%XZ9#6A+%*7WZvWwPb7 z1fHp6(tvA7X(uSl7v;DGw!toLcV{dL`PYf#gB(FrOP_Ti2{cj)h1~R*OL*y6q#SMyh1@;OS#nWv@`mFf#~{|AXewVy}BwLt!q~W7`_pd!P^~4 zNZr{Pet3Z}Ray%du>~^d)uWf( zeaWL_GR%uIqR*%gFg0_$6CKEOhOc(9_|$#S}MDraK&m%?HZ5MH2jB z_oBk87^fD8?`ON%5~$R@rGv8cL{#{L?wn9OapZbQ>vZrM9#vQd$$GiJN{T(haG@ZnHzgjABMolxP^ ziW+h%uO3rfXae$9OS-tCiq}i~yo(E`uI_G0X=yTg`r)bWx5CB7W!U1QklcIB)V4|Z zk-JMYOq2AG#%o+N_OuUN$Xupt7X%52RlB}Ymz~Jq=yVlru94k=oFepqGr}^W%g^nra^d@ z=p{Giz28qu6#CSQ{C9uaWT|QhAX@qtlMiazl}mX%?;AZZ3+02}yjwre+^|WY?WD@W zd8g;d0FlT-Wjco(pUfL6wDH{;^)0ykDSe5f`xtj%CWqzeWwr?XMI2>~>^lP&A=pjj+MvzMPV>h* zL~|@gKX7*hOx_6|6p=0q+bIAMsjhTAXSc-ICliA52**#%H=l(Y;fZl`3uHQR7wR|^ z4PL!|{X>uQ3I~@J?gQ(<1)}~R@7Cni3xjH>$5Ma1lx^3xnTyO7D(Z4QbUg>JoV=7oRN}i^}DE`;K_G$(5n|7m5*+>VXSuLILhh4O`yqR);t+ z)`Ir7uAE|QBoTfFH8oUm-ApSb!Vc}~8=`u5d2A9Xo@gc-q^_DYrG>x;A`QLeWnWOX zQ^0AXiW89c8IJx@=~$(~1<0GmkMfo0XNh_fco$HtLW)(6O*VDXQg5W|;u78ZJRr5!sg z1DTx@<{Apk$cMVgU0E|n-D}xqH@{MbZ!W!m6zS>%7GfGTg^#hSXWaUV_dWK2*H5-Q zEJyYPO0%n+Ide^o^REX-)w%Z2iiQ{_5C3nS*=TPM>@JgwFTz^d|KNitYu!ROi)rQD zkESnVigeu^#@2O^Q8p4X$=vd$^lfimP43;S(PwghPvb=&iA<}#V=md<)B1^cNk3AY z!V$BiQbn{Q60zc_ejx3<-}wC;%HO&S5*-N66fgvIxD$SA&)xF=;!#c$+@5CPDywK( z&9*;H4KOvrLvEk78OtoD^`k1SlzeZInoq11tDpQt(7 zz{f9l{9KOr@>QL-+1mX>;YM#{QLMW1UucHVKDcgjahZo;aabg%A-Z6OrDdw@ucM9Q zTAeMMw9AelaTl)5$jnPc_`%YP3a7xD|*3Pw_rp-` zIx_H@KGD0G-U?F}dX!X@m9;$irN^QpYG@$GCXY0Vk&@T^7N?CLa)g|IXh#A`fl?oz zPn__;P6JNpcUL|u0P|^hPnBiSObi#_ZFa&XxLtgmOV++70Rr|LX@R1{7#jfS?{CWMjk#(-*OZ4eRTNP6apw!U;eb zO52jt@|93%SF!5U3Flu;#R6P>qY*Rq@(~??gA{sK6!T#l_UmaoTDj8{uHrwr>WMY) zfT%I?k{`SZ<>%lkBA=Xyq^inght&}XHda5TECE6hOLTcm z0u+uoX+`ubO-IgX9V5TG0aw0r8H3%4%WJ#%`$jcQTu@>9)tu$t`dC)4#Sn!Ty?0j=f3S|?`(=suTUAyzy#bcOp zB>Z03j6VzWoex6@mc5q8A8dEuIZw??0Qq_e+2)WJ%NMA*Ph`i{Cw&Gg%}%jQNnVmq zpH?Y&Nawc?0$SH4U|TNxh*tG#$I62w6w@#V9(=SW9gpz<@)xpI>X@(css=)!`R(26 zC#abB*|M-dYrf{&VA7ar7^ql$9^>*nWr!;Bf)xH{RPBG3>kMuP1A-nQ?c{~wbeM8L`6>>y*G68GA$qTO?lQ5U7M$C zxoh-Uxa*au{$?PwKO(exbRMms;aQ%ZzIqesa8+l29UfbzLW^m5dPU`e|4#O)+=uuH z4r{|OFo3Z(cRV%)x|D@94vo$92IxK;cucksSiiAj-&8T~2*j_Y))hmB~u}(xWrzUaO&XW=+g$Kj{Wd;l*Mn~Ah(bO?BsZ5^x9O=>gE$G0u7u9k1}hBfi>^MS&(@ftv(aqAw_#7GAetYE%_ z&*X`_4T>9ks>Qm5D`cqeDle_?k~!&>H+{w?{X<#rIv>62Zdhq$GCH3A5Ggok=BkLN z#iiDt-3>gkuby-vqM=3aaM|5(Bkn{)GY7by*dcyPn(^rz3vS(McHgjH5?ygvII>$E zFXX0HXy}!|e7&Pj^}1s$mZ+kWGO6WWu%h!lE>LEBq7 z$D|WR$9PgjdZabhY!|y5y)j!RObRcqYsnh!GA-CzrVQS5KD;pZpbsgED!)$ou(;fO zw!KU~7AxS(1pB^j%U_|>>m9etE}YUqh6ogqqFC0D~n?nl>& zSvB@VQ2Jw9ZaZ8^jnv4xfBMNwKC?o3f^1_*(#Gd|**W!rjj5~l&X_vC?p7IYcL|#r z9KLJYVEv!#`HX>|NjLj8ttx;lagD91OEe(ay|RT~Atc{U7zVnzE>Iu8-8~~XW)L(3 z^Fb9xW+R*97yvOVa$dgXynj)0vYMvLa%g84$KkrAIKNogbK$*XAm5!A^-VYGs7^hk zopW;g>Lh?=3_xq$E{?n)sqf1b#_lJ7U1&ecg4Hx$)_rO*J$3uH4^Z$KR1mSCyLiV7 za@^0LJoL8^qj z)zTve{H_9mHE@1@Fib)c!95$TyF2vh&jnawgd4fI3q3dR?Lc;`mgkWZj?HVXF-Sdu zv-H>B<}dV3oHa{aEr%;CD#f7V7Nh%X-$&TFADaB?27`{wBa_9Lm_*jgjyo<0U~SH? za`66e3tr)z59wl^&G4>qqmn&^hy0+lb$38JgfJNkBslvFiaF;M~^9asAah8hRD zvsfvN)CQbSTNAt{(Ez<4Yg=0y#7C6nJ|N$I~YsJG7U&S^he zwn5+lc#<;5u;)1eP}Sh}kVM|xCl>JPL$M9!1}8j92U9!f=YGo9h0hADB@Uk&6QOk- z^p<$|14cBGzwC33P5s zF%++V;m-s*nXF$caoZ~I*=YGdv=cQ0o4wUIbCFq}Vt5=5T2JCRgz zvZQ*1YJ-eL(G6AjQ5K`AFBsWVow95^o__m}U%lLY^}_l*wJ{Uyk)c-SK6^xFn}2?? zS_f)T`$aK!D4n2*a^_oAf5in&864H!uFp%;LwwLsiB{y6$d)ud7nFSI*4xbdKlqYo@D-t$rK* zlCrsfv$=iGcaBE;oJ3t+%{~$*d3rRjMT&ujf2@Vbw1c2SesW0~NqL`A*~foNKFGP2t=XJd zGlshUyH<1Imf)553zVy;G$5m7jA`li0mY}Kqp-dDrb83bE@nF7OeR6FXNx*(@S;zCGtqh3IU7b)hE{0T{2Mkh#0sLaDw9cxTZVTIpXafNZKd;kIU670>qw0yxO-uNFHKeJa_ zW>PM97j*Vr9WLE&r`Z*2&)wl=p>>@DS*mlJozRQnG@wxePhE;&`6&up&7^2c^@%OrVn03 z?un5<<8av=$=E4b^Yq2;8lB!w_4>w|?^6MwV9R6)WS~<>HSSq_M?)FZtcrGIt%T>K zCqHq)dYUa6tLgS&De`p;!^Cc*+!>BeT``em#VOfMQ5#Evd};Czka_wj?&O8(Cm{KT zO|U8q(04r8L#ni9Q_sCDlf0&fS&fbNR5R#2x6Q7s<8@od_b%`8<{}jj^{Q`LfSUd! z#zKK%lg_@5udut51&BaRPK~>csVOA@sZI5SU(ZQM$g$KnWwr%$I>!Cw_eKPARi?LV zRM1L??3=X8sXQU~6u7Y*Ad&ws(Rp=L9eLzx=#gHaZ(tR`bFb4*2FiNi{yIL4c0Rp6!r6G*9S>NCKw02ffg*0N-RYRq!1HQnyNM1_wnIX?>t)M*)F zgbT;{Vlzq$Yubod0=DOT3{g~yrLhgK?DoOCo}pG#*5CskWFS#twE8XQNg!GB#(PYIkVRL zMyQ5Y=YYH>i@utn%bU`H(X+an!i7Af`8)h9#T3rcQd)?QmAANzv{;jGVxosd z@;BPJkxxt|9)vb1HU&G$^3(tkEb1OSxndym5)s4e6A z^5^@4&BNVgo_GWv60tyfV242rkbFslLMhxKw@yq zk{_5%;Y3UWxG(=vE$bLROV29`1nKNu5Vk06tK(pFQJspnEvR1d2IR`dc z=Mw9}@&&q8W!`)RzE_(QUj`~}lAn+Hd_vS>TZc`CUglhlp?tyCVjMktA@ECpx6^dp z;P~=2v_?WKyj1WmwCj8c8w8-Rr+S|rQ14;gnSSRp?(igZ>-8o=0b9b z-X|UjHto^C5g_>R;k^Kn%YmQkn%K_qPA$-V(to?J5PV=-PPx1(u=T958Lfb(F2!v; zU4p)U@0ctY|5e8V+MRzZ06{9pU|vjnOPBH`AqU@dqSV{SP|zbNJ8XxWP%UsGs!gbC zS}(`Pd#;TLth}sbx&NA`4zr89{PMQg1lDGE@QKaFwBk39rLS)k@RI87|NSj zd@80Qmt5~cG!SUhA3Q|C+s@ts1kk;>O@bJX|GQx$Y`KWcXtw<4reJ^C-HsZNbq6N* z7;xqtzxSN;&E=vWJ1fr_YP$t$+LusYJV4}EwI3K@?&yttdX?pc*4oDF8E$S(1Wh~S zc}rYQUou1<=RzLqrscWQy+`x^T6#@gyiHUO2o!CYp5J-fCP{MM?iXey_Dy(lYOud`F3iZq@^ zq;DvR+wF`EW>>vtECIOpl;go#$o6;i>a0#EjY*N?=Kok z&hoy@`sax3YE<$a?k|%iAOu7&F2H{>rE#dYPr_vZqo8{Q^W}T}uvc`9*=_SBWU<(3 zBtRE(Yu_0s@wCNI`;l7kvz{cg0GH(!p9BH80&5_C7@5@Nnan6Yo2UyoYjut%gpP|| zeskv|%F`Z^Wo}Wh=#TUQg&M?)`v|nr_k?m{9LhbSM`6h&hNL9iJtAc9>Nl;|Y$W^q zeHFLPEGA!w+>{7mmuOJG&{;5;W+R);d*uJE&X_`V65al`3)gr%FWPbTqNp|W4RS9b z(-Ke~x2wuD4cxB;Z2yDLmccnMM!;9?@ERgPw!Pl=uRZyOIl&67(qNf0PGK(K+EYL> zYmp$~!l}~P%a*C^dR5@^2S&-Rt8Zk*(}K`)Q=ah+{He&sQT3Af+l|o{Lf|vgB@;g* z&}dI(0o2I&&!7ms3{N+3hjbkOz;)cU*h}7x0^n|tl?ePn3lK(tP?|4{zRQCgkjRS* zeHCN(JnXO+hOlL-)Khc z0_nI{Ga(B4=$P_}ew85m0G6pfP0hPA5$jqjg!pD%kj6mROjc8y7i5-tajZI006#jQ)3^eEH9e!zI+7k%cB zJEP;-UyDAG@rhpPyAZ<8ekX9c6oj zWVrJ2Vu4wFkL1863%+7*+$p!zo7LxnK8uq6HSiYh_jaUq`R@RIFd!P8JgH)s%%7;p z2BO7QszEwdL}CRfF05BEJZCf|%(wTB{e$0TE5L0KK7KqfjPH@a5WsD^M=_RjeuW@IU9p%v_K8j zf&15suN0)gO)f--qZvPq;OR9$h*;U47m6>Q&T~ef*TngiA3tKEd-x0Qc^WnD^dkS5 zv686bu$gf8Q0S_V(Pk_zC{)a;aT&(yLVqK$JbWQ|0kBuE_FcD3i0pnf2mTcf(#a!) z*`xLpQcR7*i!~A_uSCm*qx=a4Q0>mEhL$B3Wh>Uql&)c1X=9iv`)wAb~gTeiu)Y{(>daw#oixxgAKc4p@F9c~jOk1K?3N)dy7% z4<6vIOMXGYTP&>YT`*Td!Bb8Jzzocc=N3f+w(fq%#K5$AcK_+=J%&eNyK;VwTnEvq z_#k(FSvv6jcn!7#3VQ%J5Ztj0+qSK!~NM-;gfJ=U3 z58EmP#FWIRAT=_8ZB>pe58k4z=x<3;>k_C{1{P)2-{{moy8W%=5wN}{wR6Skw~0vX zi&a1EnLKrWfksnxXfWlEa z*4lFC-Lm)~OUXdcN9H+aU#0QVXi-4EK&j`9Ul;5FHL-?WuBZb&`RVo~Nd}^U`$i6| zsD#-Yv9!F9!un)^{$gNHbF+;XdIAvcOGFiK+-{AD7McU;E1s!FLo7SPwqx?FE#YT~ zZ1L(h;A>v3_Z*(P^Lz;r@ztO|Bqs5cYLvS=j?W$UP5UxyWSVDdygPh-6VpPtwmyve zdJ3tuB68r&lOkQrR6wWrW0n*~fMQwU>GY4SzwBCpP7kPMrEFv$cFSkHP>`9;mLzk# zzHue_C_6_EbS>IYXSrl@a@_xh&=qxlwN+@US?SX(Ee@eJZF#NR>eCPv9RHU)YS*=N z>y+G5mrD+~K_@tO9KXZ$YGCQ~m8BpxP+4tHv60x*{)YS7ez3`?9&R`?i2?gNE1;ku za-WstuX$sJAiZIJMEybC(&R(=;DR?~vG$iQSU_AM~w?TjRJ8)2{{6 zXkLDgAD)1Iz9;ZqjwO443I6f6Z`-Xxby}x2(*(=S?|=sU+OtNyaIFNp;Q^+D$$1GX zERDHmS}96?<0sB`Dg2X1^krdfr;T_R7GWxKK@q(DRaV6vLR6Qs@t4@BW?GW+PXePGvtGKUo6>-QjJLsAXr^=~WZs_CAG6pl$$8j|NfJOcqcAQMxaarF-0IDzfe=}MF;|-vBDB9IdDik zb_I0jui&2+m=XO@m#)l!kS`hpKhdhowpK1!xaU6f9qz4OQ~rn`k=iljkkfn~EjCtT z_D&J5&XXO|Qiy2aX7ATMBw1bb-DVb#ck9S)iH##x-4)mDccN-@)YRK;A@NZfCosMY zE9TV;#80pY8ERxtl?X{OC`F8>zl!~~F&rGxAHKRK{x-ava^ngDr)~d zij;yVU4qg`C|xt4gbGNfv~(lgp&&?!bhmWpFrF~17Yk#OwrbhH?OitQzdfA z$i}W^W5=wF`8GB+RaS)W6)%s+qqg0@7o+yei6y}sp;vocMlj)8TUi+xX8n=QB>1A& z7(Q`@HqKOUu%z`EJj$`{)2d^lU>?(X$H{y}2=c5}V=cSBW3tA28<8S@A zBXLhKKeud3KZl>MqJ>JuOPp6ZOmAYvr#Uh%ZpINbHc(mY9G^^@*_ztH-zzIwPH+p$ zO{{q(ndfBwLsG7E8-IBm%uYNjSr;1sT?o)pdu}tW4pz~%SbB4d{Fmi-JJhx(m zRAaH{2(cKEG=XPrQ1!{^?DoVV@T?AH0+H%h&oLxMA!XyVAxrnr@H~qwsaWG*mDu~~ zP<7OL?FQz6v&aumnww&)xr5Wac|;F?N6X+|lg|-%<#b=W@vdsY3z$s><1>HeDsTxH zZ`UIcgz|{_0H639Sd57l*8K;fJ8rj6U{ZM`gI!4UB24OO<6068%PYfTstL6%%Yl2# zTVI$vYTWMo@=x5n`ONAPt{3b1^}yhdKPk7Z4Y6XMX#Heh5F6H%NvSpgn&cDv30yR! zC=&PAQH>HQ5S@#eh$Q!oy!8v34sBcS9m>AvZLU5|s2r7{qX(X4x7{zJgCKopy zhBxAE6vZ@clnf;(m!j*O+AcLk)t}QZTV5PrV6P9v?wU_lD)|0md-(=ACYtUp_#$%6 zDo!SmN2oH5f>i!P+p}*>wkoUVF@a=bH6Svp13S)3aQ?2Dw|-K=<{j@R&3`%MMyO{? zC#Rj<3zeE7OH`O4sW?ormVY!>|U|q5Ig0h?E?`9pwa*p2L~44?E?qD`re^8% z@bf$G>$40+^*XAl7g?29WR;!-*VG=1Y@+QEYDecf1VWre7E0N* zF8v01f;x$S)WI@6lB0>D{RgQ`r}D*xDiFp^1i)=-2MqH*Fp=IXmpfRBGSDHUa+5A`z4JCY4h>1%ONe zPDdtAo##bv&YJ|(0cL?h~MXb6gP(tY=6tgONKjB zsYn7^}62?QkE_AA-mUV6Zr60Hj!r;R5=>1k+YEFA@QAASopVZ4@^ZM!(!x@LYU*pf;e@Ekns6XiNn%-lsc~8*5T|?+8ou0@J)d zHF*_NTNFpcXB97c1?%FUE?9b|`CLT|Kg3hrrDv znvfs`>?g=mP{SypR%#MdqNM;?wqzlUMK2uRy~;uCLPcVgJ2Xf>>yW$mj?W_fn>=xI zv2e3iNNMaWr?!99IZCS2(hk}oZ{u20?j=Cuo;kmiee)cXIwAsSud9STbW{(P3*ERt z;A_vu$;I^h_(VJaWyx7tSz)8*cHYqdQuE4Y0|NtX!SA>2Y9$;G^t-xzN!XXN<5V_< zJjdubERQGUs-R&46mIypNCyy>uuTL#q#%E$d=hWmv_5@dx-s_VQWX1s zEQI`tM8wL-lHt;ikMvAT+%9|Cg`J^foS&+ys&-0&!3b{u24h7b=+{d7b03kj^Ok6q z^zi}?AKq%E!AA2WshJbDaHpC+a)r4g#OAedcW(G~wePde!9nV%#&@)3X^*!mbrk+u zRKsq(4<&!NWsd^78lE z@Bp^%-C|&DmHb#^o#B46la!(;#P0uK;RkSm2WK7tb{H3Md`y(+$XFHavR${*pMEWc zCpf2DA6=ey$d__*9xQmceN~RyF58_lRvI1GX_$|3wO~&_%j-u}3D)_=0-1};B}L66 zONFIZ24C+ylPsvCW9=pLL~FRRPpu!{=mr*7eh2ph&JRm3Oa@c?!4r#vhsVG>9-s7p znp*Bd*$Jc6=Z}d5!{xHA!_GH9&-!!IKE)gvc&wK$jA4xNM3wjTaCL$gXOsB_b}3jr z)>}5wZHbBYL~~^Po|`aG1mEt}Ru$lgMUKYl|m1A)!mlg$6fZWeG3iKbd#$aILj#i#NJhi#qqFw%xj&$i0jJm6#%G{_OMBWIT4s zw=;sj`!5Gsyu?rtsJ=KA==iIf{FTC{-8R6k!d{BUXD!E0ma<&f0b z%imXe@-h9^^+x}555C}6ekLmtffTmuInF_yG2?g>?ui=qbdSGqTBjP0WXcDfo6sg^ zpcf4gBxnzG8T@LXe{Ac6Z|i!!XYlm(eTZ{(wH80n;bp-r5lZ&KYj(g@N!H{g501#)$D5zC`bCA! zoS!DVO%XLrJhAWj+!i&Cha?bj2)T=I&@&qLn#Ao$j#In2DUeIj{OgsD(ej&z}7RM^!k=Bd03qt zUs2UN*ytxup;HwnzZr=E(-qbc77n4#_)Nj`vr(0J{wQ@)@BX8kq%XRbY~*tFZnGP3 zHq|ns_1|BUZ|QdWC7}ev2}2`Aa2Th8KdU=1?#h1(_v~?7{sp&X=jBL!%>LZ}rbG0W zz-jAp&~MD@pyZU5%>EAGEk7=uNpXAGtc+;2b7x{e<(&glJw_w$x!n%S{BYPm&nRNb za>5LkjREaps|1}uzdnGEKO)hS+{L#F3VJ+PBY=9jC$tJeoNfh z1Ba$gMIz07Ela{QI~n}Bl#{c{SNAxGnrCSwk(Lj|h*SwY*OYxqlPy^ zXx>&I_+8nhQ9m&viLHJO_=4VBOxX0~rfqt@!7&*JJ61Bk+vuiDT#{O@tlU(xXMJ$( z%4{*6SQT5TNk2;RYI}-C_i)Lq;Rm{XX^%xy`jLl%#2Y_lJXFQycXVG4?P`I%+}jl> zZ-s66Q`%lxFcd*|Xr8XikyvSpM9*+Dp+9H%V+U9h#Uc~rs?So_-n5$TksKS&LOT6k z!$z-ExODKXjLiElQL0vxTm8!pH(ciZ59>EMLeIHtI*NFIZC#$Gq4il!Ven93R_BX_ zJk1V%D;fSkOapK&?#4!9q={C1jbM_uFH|8n(RxF`!_t>FgXpc&MgW3hat5w)60~PX zw$jaI{`A;&6JuuaoX6H(6vg*>ddB?s9CH_GDJpM-J7dxnNX9+Ud<%?2F8lU_9<&02 z#QVT(l2su760tNvFKe>Yb9!iX}Y`d+2H^UxOE!TL5&QZ4cIlJ>62&l+256owlvqC9+8(|5^r96?5R6(=r!cU z$6D6$mWmC*Mnn0%Pl&DN6E|x!XQZK)enJY!A^XOn85>#juNEr-3WSBZj?Zg(t!Em{#5e&mPe}dPNCrCOnuqyr!@=!2uF~62dy2$cF1M38ac=lGDW7G7 zc3hG91c1md;}(g^v*qoY+Q8%^a>(fMZL-VBUv$1exSU z21yBbzV2m0yxanTxg09Aj#Z!o9sF$YurJ&)Y14buqh|dVLzQOO;GGHp;Q;seQI15{ zA%YL_=7<8V$#PCyNFlSy^US`41Oa6DhHoY(6NF9gK#`?%=1#md(CPGFST@083Fc8~ z@zZ&Q{RF!oD+%H5bgY+`nH4Rm55#=eJo8X6OyifAsAcUqg&pqs0S3^-ZzD)QeTv54 zXVVJ_pM~K65H4&MZ+0$@dbQrJR?28_Lk-+esM2^+;07+_#nDTEIsg8K$@nWu{7*qoO)Xm{KeM)% z2Y{4P@5&a|;ITKEK5UsV4YEI~R=WoK>(^P#!Rr0XIk~xnU}Xf>E$`odm6fgkS?4rV z$9o5(C0|*gAUgN;TFlYTMN*+qn+;us9~P)iEFBq}O%uej`>nJ?50!Q*suuCT%^TU6 z)fWJ(g&4r80vg=PX}-_FeDuq?2{ZHS6`o^aRzRpNhGb7;N@fhgwAdk8aKY;&((5&C zu%lY-I?*c7*-0=el3PkJr9UWZ(M>3y-^(5^Y&%*XDtTlzhdp1{63Os@U?oRA{cGa_ zBigDuHDA4#N$V*ZTi$%Y=tD)bJ^Jtsn-{tK%#zN2&?o*~pzLNfl>6O4qYTYgXFfGj zqWN~kY-5$3p&=Rp#a+wER&jiCQ?Z6G;An7OQFs#8SB6O7Km&U6AMQtHiVOd|wQa#5 zXOM%W3_>EU|9mZx1GOwool#P`v7!V{w4g7(&3`_r6@y%DuE=NcpiHJeq#hR*8l3%S@iWD~YTi>LXgm zL;63sO@G!EEBvIQJ!oE~AyL}>1!wG7EzP$^{<)_d_kmrPqmfF)lOLzrZq+k|130H) zf2P`iKg?~sWmU|s*NtN06SnZQjq%|>r zfc*CycCE+9Z@TyPvPu;qb}LEjs!eoiT_3K0#9ETO|CJ6U9ClO7Z2y5yWw~U``iw5O z$>&B2KHQ#Ux(}Y*>y-pzW)56scX{@UuOJXHdo2P&@#KPUGW0JR`rr*jtE8g zja@X!qXlEHY)mt-V+^DTkQd=Z>M6DyBdR+i(nmCP1Ehks%~9rRv^1D|;_+id8ujfZu#CbU?92x6{ltmZU z&_;NNghy=IPnPp5A1b$TToJnAbH{lFEN=7LKY(QTXpXTG;)+YDvbHw z7Lad7gq|xT+Bbx+_w5Bh6|o(hFQMR1itk^3&!K$g+UUZ7_Pp8I(&l2b@dKW@x@=Cb z&YS+X?K#pX@Aw@a;y0atDksR1ycZ9hog(HXtsZBgu)>OQo^3<-JiEG-KZ%)r>*8DZvQ2{2W zTbd**j6mR;QfMr^+dDL0h~czP@_&#w)7;b(6cO(XeqSDS?*}=at;_Pfd)J}k=5g}$ zj1o|&B;yA$X<`W|X{PHbd>2<|HUxDEWary>lu@ENBu6=;po|JI&i3D@T68me94M{2 z)~GtV)nrKCo4V6}1h3p0N;g0njyvL2kALZ-z;LzuGiiq0^~+;E+Y=so^A&g}r&cO^ zIpBnph1w9Q`7t^pb0@Zv>7E^pt==Vf!8kzxNr}ITKDm|!GIlO}Dqac`zTX|a_|398 z1L1Oke`u>F34d{yBtaJ@I-Gs>YUaQ6CGK5Lph`o`6nK4!7{)zoo9kfYI=Mq^v- znwm8RVLvAwtC97UhVHxM> z(thQ}&gVYEfoHa}nAS67F75r}p>M6XPv&~G>wn@mhh`q16z;GR_Qr-ct%N+5o`uTap{e2E0*ua) zttCb@?^0{+N`3~uo?Nv_rP+%s`wsT;O`gxeK)*5idk|6Ob4Di@7{horrN}X9-W5wj zDsXkh$UAr=-g@N{)EL>?e?jk`K9P=FU|wk#+t2yU|U~K^-@z%vgUA z)N@U{LkLj^Gj9Ukl`C$sY4p}C-+E4rs7(^a=PStfm>`l9ky?6N> z3zRwJadJL9QWod5+^@m>f_td$RPxfg;CYn}FA+xU4xirdH#n@agSpK{FN?7blyS$- z40xnhJ9watlN$*1F^Es76RYTQ+j&}6Zv#$>P|HNILdK`x352grH2_2`4-vG~HX~WG zQyl^-IQM+R4j*NIj8B@j@Upr11FDA3=y!@Sxoo!+%8njvmKbY7HeS7WAYS(;5GI7X zVxeHiQ}3K%d5wl6inl(U5|W=}9}mRMtu}vT)@5>C?I-O`{v1*J{5hZ?cCqpIMI(b! z#Ik*i?0{;G4J5x&2iN<2lRLWxr2>hW-e8D%W^o0?^E9hAX}>pmnmPHciyal(}fI%fYlrRkcDE(?iVPi>G&qEJgM<=%dC z7L|bS#+4hIO<+Jqp)Ov38-AN%_{(YwlkmZF;h{n)48|geQ6+K1l{d(m!D(qcH7>`r z4}0GIP8JS+nVW^Q94r)xo3|d9Y>L-R(^96TA7c3!I86=J#A`rOG$0c>HENyluC5O5 z-LJWDm1#zQ*dFS}BB)gtk~dg1H5VR^f@KeVtxqR%rHrH;od=BBzi^&7d>hdQ zV>*&EK8-DGJym>tAK0VBZUNJ|bhgqHE;_LJaVFaGSJt|Q)=^KP{h*1_nqvQ4H5o+s zJ|O#8c((e)yJYvHOjViJF_?5dcBmY|+2#U7$7_ zqD4pur76#%Dza(z)61Dos$`4^T}L4whbm+QDjYZdKQ@xu6Tnca}v%^RQTn zq)2@0Z{GJ8{;9t2i#kZ)h9mmF@~W!S2}ZtPYeoktTnNV&Vz}rXcOs8@>vyRrwC7~F zKY#v2rB{z8v}~t6iz+WY<0J312=MBjudt|j@%w6NiY7J)h1fl-st_iAc1_g~!rhZn z%n+V95Gl+r;f)324-3QMU0#K!DT@Pu2Y8_Cm66bY6 zs|V1$0YN4DANllTC_Rcf213xrvF!kYMHuaikEO48cOcx4jBxrE?4aBBDDUs#98gUf z0D2V+r^eq(00+(s;3+xeS6)sNfAGL}Bnt~beeKPDFX%GKDGCtLyQ!eAEORND+B5p8 z>fIE%PPoHnpN*)?9+je^qTAPh^PI|d;`Bld;ibIL+`T^aTqTbbfzPL`W=DC6i?__% zp7}qR5QLs#V`HZ)X1yxXtPW0vdfe{tA5|?><1`z342mx9!I6(VL)mqi2CM2FpR&(U z!KbteKXGwdwxr^Z1n2=MxI=eEFF;bcBBxEw=W01+llY8V{c((pjD||}MAacFLLNxq z*D$&`udI-+gt#Qtsd$6RI3Ou@=Z=Z}TxefV(&zdI?cDJq;KScz?l?#t9Ii#l+b_3b zEO_ig!O-UGHH+Zj;J8I9%s*udtsKPDA75lT<1dSE7q@4m=oyh-*55a{0Ze?7UFmYN z_^EC36=q)cDTZ3`aw9!CXT~-?J?}CH$rsE)*$R)jpSmYoiZ|4`BCro4%KLa)F|4s` zJL}7RYs0_{fa+yY-`NiY?w_+0}85J$jv|AtW9Ql@n{JpFgNQe#|}v(3B~( zjpf=lNIs@f-va0DR$-zw+P&bgC3@6(ojjkOedCCc4B_Tj8A$&8ZAlIESvkyKLjNvf ztZ1-rP#VB(`s82VT;^dizpcu%wjiLuu5NRO$qN|cnz}%>2&~sKf0a3x@Bag;bt~4x zt=2wA2K+St(g+LEWo4^f*hsn9DL1yLvERCGF5P+MQ0Dc|zWE5Zl*falP^S|>9 z%r9-A|9|gC9}arCAkMY(iyv4dJT))c^-ccU7X{~u#4;lgHIq!u(%%o=oj$=Vl};c4 z=t~r9ovjRJ8_GIU365WiGJV}MiV^^+vqwNjTlRu42y6opcMg=|APUL^#>Hy|BpU6|Fz#t>Gp>L=DTQMu|`cD1V-S=7?%!A zcG6C(f#a;!d6xo>lauqGpHMmN&hUcjmo;^k;+Q|PjnRL!)ft#?V1+_c;^WCc*Aq#A zFIFDh4O=@Qu|9GS;nVYIT=FP|=QZj|Dx!{w>PJ&poie`c#7(1F9DB6hTYoGF%1*&Q z@ip1q8YPn&7Bc%Mb)TIrSaL|0PA-H72K}G?bk0={9+HuDfv-F7DpM}$#6^=-Cv4EEi- znIJw}&DQjfirXguF5Sp?DMB%EaX`(+g?b>@mRkH$sbjS~kGh#aJD~tv@|CTO+OY&~ z(_gc&XP}ry4=CM$2VG{^d0!sCm*LnQO85FJJs)2zXt?uKy`=BgaF*&WjUri*UURC= ze4S#6wjd7$-aQhw=kOs*4#l*x= zZHopW2wc;_)RQ^idtD2I{JC*@-sEAg+Wc_{=lDeA4}+ErLEQ<(+XJyGB0m02qx;FP zp$zoR!L-xkA#DRd_QRzT_{OePQ)bY9x1yrL^;bqdU5ynv_;5AkSyQ@15J4(+qRhXD;=qf9++Q55wOc(y^#{7xE`>sQa={VP zbsOE^rb4DyuG73PSoFM3h{07DgB~~j{{BN5;#f8d4O<%#LV?AT9@iI6fU!+3Xipy$ z6m$gugn)zhZEwEbL;w50`~6KzRMoJ?YFZ3#zdpyqg|F=SVl+oK28Gi0DV^Mo0ZndL z3_HJ`EalB@UY#TErWa|!2EYX*SWcF(fFP}^R-};uIrxFz18yP|N*_lq;#oUVVJst$ z-)3FsxRnVxopS)%h@&n#=`;~92oLS#t!Uu6GWD^2P8OY)$Gi?;g}qI?zsC!u_4U)a zOb4j&LxUehfO)qeJRc>N$p>+)@eUFhcw`aLZhXlgF*-<$w6^nr_Pb{QjkKc*n$}cpW2^|{)w#My) zgAa+kmK(}G&EQFU`0!yWb+8N>E@rCth53T(DjopA?`|#EhD}tpu(e1|=Z-tVL)<;d8j6|)D?v;OM4!QAcXb!wK;0%rx#Twn2DB7m9iwe_g|y8( zN(Cw`jzW+A-SbI8Qqs*#We>Fih-G`%3Pze)uc-n23<9uzS_97^kJZ!zQBhHF6&f&` zN2m$l?d|>R#9bbLcni$+eAU9(X=`4~i8ZhWrKX6mY&`6sY&eb| zzZ5#YKHA>OfG2O+Mweb+b74?3$tCNz2NO#=PZ+iZ;3bRtq~0ZdYIJigWVdpswE2%v z4A9yiA7~eJVbR>Wb&JPh>E_+qp!zn?F=uDx5Sl{2!v9=Y;vpv55XEsiUyt_8&v%KVeSOig!BzLf zvTH#=pt{B8-5<}{HY)BAlBsJ^$Q)eZdT7AF!6Ao(i>qQWmY4B1iSO_G3a}8350+Z~ z9wR;d@j7^)s4lo|K4ReJ9tHn97(8YgS2riqrfI4~?HV@1pm|uf)pP|aoEhk7ADR;~1J3>zv5c-wz)t{MhI$nE y1-w}Q-FEN4E;ms807D#Xn?(XjH65QQM2o~I(#R(o<7I$}tz&)Gy)>rrE zrD}_1cV^B^Pfz#LPxlb6ASaHBM1TYWflwtSz9@n~uW&&inCv(3z$bF^yehyi9%oT? zXC*sRXEy^!6OgQdv%R&Qv$cicdsh=jCks1UHbxFcHv0GG&d&Buyi80s|7*Z#=V-<> zO4mFI41#Dcq2UAqp&LM7FvUVe79fxvoa7f_W%sP(WjF7{?T3ML`Nc_L8)TFEBc!k+ zn$HT@!U~-X5o?{J=-7QQ2kRdZa3-9x-qX_lkU{v38CUQo@I6jdAU%u*oCZTuN(vQN zNJMJNl*>X4d^%UkWPU->oEfw0dGgp&Ijql`E)xd)xC^I&5#E!NlYjpl5CZ)>`V}$; z2F5u794_$T1_u)TmoHy>t+3UhjpsgAy#>Ck{Hjj|e5s4|b&<)Ooq)&v?7f4713t>P zNna0WU;jbljp-64hA9D|91amYD!gaIYY}G3sE`<2r;?;ASkE{ot#9RLrQ<%mhAuU{PH_RfXTOs z{kJgL61?A-7#Q)$7tvI+$AV+=N<7NZk%c-^5W&q35w`Wv6d_{YL}uUF|(MAg6vRoQsp zTj?;BC}p2`ywjLOe>#8Y@W)NnBjcrrY?xZ5UVT;`$kB#QdU`qq1w~wZys)>ow?#)C zASE0#xC(m7)El`Zc~`5>ywZbOgHy!T0LhMtFT2&llHQP$&p%li0U0)AE{oVdsUWzR;&C zuFbVEP8HalcZn)Jt)A`IE&mP>AARkc8@XBdJVR@q7_;q#w$<75F;gUKxHsX|+fB_T z(Sh%fyt@2NnoY0A{O}tMQ>V^l{ECC-qyt(#pH*XkZBF8;z;1HnD6Xnfv8b)ekxkrR z;T|*)6#puEDRiI`?PNj!yhQVFH-?Dpa@}oI7V9YaNA!JON|dU61dPOIWEsc}5z&TaFm?Jse-1dg5C@gL! zpPZaY_elELj9Qf;+}19{yJF~5u1GE+OBGKuj1z&I!E=Gy#kgo+5Q=n+Xlv3JJnsF{F9h-;e9~>#}#%yoZ+hb?)+(%a4i` zi}XkAzx#dzL(NeeT}C!k7f=2EoqWp0+(g2K1f86&|}vu*6R=#y;Yi#{Dc*x*9k zvM7vmH>z{Z?Bx$$kw@=>xK&OuH%N%yG_D|_5~vHzwdf3_wU8CgTPX{;MCCyIc6Nk( z^Jc!=EM+ugX-%cDl@89d&29^JTg@sEBOfCHR>;p$b)cIZIMhoQ*=_aCUe&uFHHCiY z4-5`k+s?Nnq^9^v7`jq{N)PJRdTtu4Yef-MqnrA7M!K`25%779@_QV(gzB;p5mFS5 z7Tk8ulVkhNa=!)(+*1VK7Zx|TLeDz)`hi^Z88htXR*pP_Nl`gEn+k|e*3s36Hn2I- zoZZVif16~h%Yzy8v&}x65%eK`E3S#!3(>D~1MNKbn9t9*0@K{jcqqhDcNrXKS5wS> zg5Edprb{;0`4k=h?UxK)gHjKS+N=0laGBAwnY;qqL{K!vPoWcRYjccR(&3D%S1o-+jXl5CcG?heZWW9EcI-63oG5V8-=>ErY_c6 z1@2<}=BLfTJi}X`*aVJCIyq6jt-6AikXorc+?V!p(fGWL_n!;(=Fu`qamTvue(e}! zgKehU7z!2@b^h72NWhhdjY1p^ynYS#Mh**Z1nBL8m|G5mmb7n8H-^Mno-1Z4DAk$V zD0-7N!);ObfD?S+szw(;GTKrlQ}glo?F-j27f~Nd2wh+bE~Opd8=HPxnyx>|^mU?M zG8^jOxx`+oT%N>bi@(JBk@1ja#J3YvZ%W`Ms-Er`yOL1XUb}U zc1QsYs1z*lePM&TZ$?`0K;NU?cW`VtA3Oz3(P)tXv$y_I__tq6y3vW1uX5fesPHE) zg_*`Xw=GX=@#+i))~Y zUXs!$e}YN9U|mWNkYL9I^MfTwYr7KJE@mxnx=EHq3ofxFOi&WK8I{^CiLfrxTAhm zDu{LVN^EM;hjS5!nW?^?S)siYcX({XOj*1B-u9$0#AV6!gFu>{&Pize9=R)4d+)nS*aClz(x&ESo?1`SdfnOv0JCzCsrGM|BO(& z3AiV~8)*A3cO#~cHMPM06t0ilQ?E?W3qtw>_4G*wW2Ue_E*#w*7Kic2aza+x*Tbo; zN#<%TSQk9RaqZpK}WtmM9LLMXU*O?pe8N;;nn{Z zZ^DEh#+TVgxY~}|nVNcC`0THhC78+t3sQ?PgF^}$G^TCW{nfNGt^?SAc6HZ~&XC~I zRJ>TT&i;_2lDH8U*wuWIWF$trb>|kB`qx>ri)UnN0TUY!L_0WD{KgggLj@5oC)q8IJsL0F7ABX4lte!tSJ_!0r?p z0C0x~71LT}oSrTTii2KR&wB+z)JE14jQpJHykAx7`haKtie`QbxA~`~{$+Ix{Lhyt zATc+exTrMl%z@^qnl^exD6lnTby%<($4br}7-}g`o4TWI4TYB?`}L^+YJTF!g{TdV zX&r||&nroSY;e0B>n+qZc4wnReSHN#h{sS+Qii0Z;UCPD4(v_j(`-jm0<-7bCX_8( zaEam}M6WWPV{sgRS1vftwaBe!V%*6#4ytlHQ#vY*!^RFNewD1nhHEqsW6~XrD3imV zP9>Xhc325!wO{*$iHYg{bUxt2AqE)1BI}vhH*BBQ)w@jv!qs#G{GJ28p<3KMNXyK- z*3Dm1+&ANRFgujSh@P&rm#CH#_gi}X@LlaRruPU1rNXn_ zT)#!{sPcD_K&(PD0HD7%C|_}(C#q%FlG9TO#Z=O0IgUDBA$;-EVz&3xHIyu zySt2u4oF~UlxuZsC)4g`<_-YfL`djtXNI}`puPinY!gt$!=YNn zF$9T)dSlz2XzM>4pXp0T(H3B&5Zi96y9XUWy<#PHxzI$RoG9m!fWvo^ zz7F>8$_}I-KlE>HB~-iU)b1&uBB6-W0On(QeIGYB^0MNV8;C^u)lCTl1JUDN3@dY| z!e2C!IV2?hWqRtFMDh2(6Gdk8b&(9}$a66AL1~aPBwoMvQ~*&t@z?zM2{l3@DFvZn z4llkHs|`s0v5TuBo?>W#XW~U=jBu5%Um#W~`v`rsh~hd4Ijpai1dov*c-p#~bmjnN zr@SbC$L71&CPilZCu^7gTATMnTPGBRN=!FvpDZ@L^_R4<``ENP$!2_9Vbn)rmA?OR zF{pPYJ*Gwm>dAvCq|xIE8=gKD9Ld}as2NBGq4mWO@{jT=*iSRmtGg-F{0kz3fBtfq z&}U__@8Jh^m@h{@Rh=0QWRHy1W`x+!GR&|}RomV=;Ug4G%k2%=c^jC-{87Zr`g3yH*ZE4BLg@@Nu z{*ampyK=JIj(q+Af7U~Nm?fw;)3g_8TLK8x5L8MbCtY<8XclKmM|as z;N{xcR=j&Y#QyoNYtm>+cH|;0D-~u6_`F|6>;C^5?r}pf7s-7P9QYaxBVzjPZ(i|L$YgV@ev!vh*<(UaefzzIOEeJh$Omq4UERX4++ zLZk$>wZa1vs^gCL9!pg&lpsFOC7ErJTMOLo+kbxNk3-!|?XX@6g{he%MFY9>q3)6I zsK{MP0+;9)iaqW{Oi$@WRuFDuQ^pnB?vX2{%C6$-Kfn2!*lRHDoTsSSG}S|EzwVn_ zwaboJp1Yc&3*$906#q#m)YMbf-R25mB>Tmbs~ptSAN0XB;+;tS8Y}21cn26pMB00? zM3sx}*m3RAZ|&3)*?lcC%}pI+9jnk zRXD0z)LsXaPxCs2mHoy~Vc#>q#D`U25JY0O=Vlu)jWg`j4p+*S7AVpoBj3QI934?Z z4Xo)~nCvazm3BXE`hl2`F6g)ajjGN0vr#D|f+T;efHEHt0*#AYQN2S}dL?j7 z1|}xIe0k=nfHV;J|MogD6|vg=`)TR;9eD+G9}C77>KgJ#yv-23EiH&M=#x38*dmw= zv^(6eaGc~E_#0lKo*sSP{Ou_B7*K2$b_ytUyyC89zt6;`6z>#S*YH=!<%-|)>(2cZ zt^|CIiZypLi0BMkFBjCA7mUqBJ^mPIW}71WKA>7K3aHD~QZ=ssVi5d;mAPUVpqM|W zc`|03=YdPwP$&MHB#P5AI!45{l z1nip4f4nzs8in1P{1B-8fek#z(wbV-&nc-&_1`a6;YAo`3v9UGV{`y?P5nBW}t&5Z0;h1h0We zddJTWOx(>5L;4Z;R^pR`>J#m%IXM|sf|(clmb)rrIG$)cb|t}Uld*I*X19v_7fpD? zkH9ul=yTKgUY{k?j4V6u2)t9pxHabiu@Er-@Eks4zU}J#@;AbzrS}NsowL#^G8^FEAYNrPVY_yAZ5(R6o zPHT55A!c=reyfM?EFOPuk#GrlHiG?k=aUKmcMNzE0%%{~OF&!0Kc(iBm>2M%34rbC15 zbPW)gr2??e+B86j&P|X5KA}wyxcz!z|PLA8cCS9GNuX`*R2@Y_Ts|w_5m9m3sPu<|Kj{7Y*oepz8;(TBI$lNC`drH$9~&A!^{6fs$XO$Bz!L3$!k)aF3B z&ckINNgjaTW|GQtv!`d)1Jb>U?y|kq4%;QJI>D*e| zWv9EBL7g)im^1ZnY(tzZf3=_CfX>~Pq+(O*pZHlFpfTeHrnr`rkY;mcXr&BIgDg8s zLzeXTu`Nj74t1s5pJ#1?!21jto$U`x0Xbgf6N!Hn#P;$stoI$%s47)h(`2{qzZDX6 zs-H=jOsS{+V{vq07@mongWKS!EHjL0ZH_7!T&9B@iYDyMuk9q;HA-1>McV=8X|o(T zG#qqSXi8;BMrwN>Rr(pwD;=) zlmVCKyWy(uu+|Cl4V`qXI|P}>X$R%{aL#PAJe81Bf$8_%#i~3eld$9aQm30Ejj11dC(QqRw+TdM z3nxbXQSUyBh=_EFmuvn?qr%)H!s#L4KjvbsacZtMLFJt-HO5hfrW4jIz!Q!ys)8>U zqO@CCRP~;pZJQs#0JBey%ia>0a~r!kM-Uw*l-akd_nZmw)%!*F=l#y!?x4Ykmdk*? zAJV+dSG>OrF!52S5(l@D+T!Em0X<8}^?Pn!YZxH)%L0_I)^wC0Gq#$;=xeDe11l?Q zSZpj)*B{YdJb<7AaJOxvZOC?n{mfZ~s$UyJY~;NawVu3BO7D@3uY^TUK9`WV9M5e~ zV6jtk+xar&9~SGcHbV5e(TMG!IwzI)%b9OLWo*Y*`tn*J=P`n~c{?j5QjHjSb!cRS zf{d(d9|#oRMQ&{w6L8sDyJ+Q$Mc+N1f{z8MXlWzv?%Ye!F%Y$LT+wtIAGwu!b~@9yA<78iDu3m1t65o(87IzKgStoq^%( zzS3>u|~~yq$SRf((UH0{s{ghDiGbo3c!mQh3BYA#SUbfkK3_rH$Wm> zcrhW8aFrv8~ez7Pp3tNtWKm_ju8 z)9%FhRE5qN#O^H#8rRC3;vq|Bhr`B;nqfwxo1+E(+a;%`U7;5;fLCZ-_04?#`v?u} z6o=?rd81raiL6bRE49S4Upi1ai>*jZy^>o<+FfI*25%aZ& zK5J)*S5g-AWN%o)jWGpfS+9nZ-sToq_E!hK7txA=&4sM{0(8Ota%&?~KRLOt>B7u9 zePkEyp&%C?JX){)2~nu$m4R3!NneCH&*lYg*Yn@efL>y^&OkW(J{*8qo-W(JbJ*a@ zasDB2-{(vVveTYUDq-V%B{hOV1q7L45_^ubo3QUkK%1wuin@>2a|Xk>#hS@m7P&0z zC!Cyf-K5Q|aIk$?XG<&O?mehyJ?IMcwAPESZ6Mt>CS#=3zD7*qdiN#9eVO1 zb#NsNCvst9TP zr8B{2GX`l4)$?URcn5w#TW-yCi)y?3p|TPJNhawt(;kff&6P2KHCChzS5`U`oL`N% zC|ca5ex?AR9skFL$NPIRIdt!^;3aK2=Y6rKV;@8zzo_{nQ~;Sxo#}^&ff5| zi{Z1sw`YBL_Jfo;DxTQ6HBZ{=uR88C(KY`rTEF#08kTga+y5KfYvB>d zEmj|0$9foz$^#MOWYJfl3q8H{Nl8fJYyMU?^zGpuaZ=&Aik+=7m%}_0>1TY-1YwU+ z=67YVxGvJyLgFyF>uWHcMDvz{W5QD`$!_S@L)C9F)HIb z-XE3Xm3SVc%{XaXhy!3!q@&%2X@hM{%1eAS8@BU-Aesl$Oz2M89R75(d_iD%o=*msp zXpsd8r9#16S^~4S1>)y4;)7i&kPW;v*vJsh6yY@y`%-wL09hb-2kj(Y?!jqUub2(4 zRN@o%m3$cRFuZmKt(`S5gDG+M;ja2+Jx z8AYC`o3WPH_j4Dq@2VWg28&`Y=26ETsaOlwngCLYBs)PW3_&?$~E@jKlx$^7{wD*LZ-wm?Y6rwlVxlVRM-1MF-(8G@;uutV+tQ@MI2n zIw5vx#D*Vcs=48NiFexvr29?3nMpK-2>s<5Tf?2!$6l=6 zTZ@8fS=Yt_V}Af+i}pFnufCius4-3YzL0vKZ-aqNDz&auoICK)vE60Nd3>+}`C6_N zsB5_(`;Z1krigqQ`e-&755gHv%{N-KJRlbOB&JNyrm1~6Yn8M!T~B$}^dGcRwl4g+ zS0m+W%#OZvLk^xZhbCH;Ok;kU_OAzSs>GFG6Y-ha5+ShrTEEv9$`xA;UHc9wRg_~2 zkp;-sLL~@cY_2?Q^1!q70lnf)UCCkh;iKKPO;=)Loxy2RB~Qj=+%FC4w5u8E-rX#C z<+wo{N=Dk7>p8#))>-!9aB1%||2JsC+j!Oy4PTLqoa4Ir7X%=4>b=sEIy_GJTWN0H z03HB?9zN#{ft5TZyG|q75A3Kh$3_bWZu8goT-vi$TlZw&N9b(!i)7heAeE$R-jdRR zL0!hI$p`&wBO@{Zk^LLcOF_X7c0J86M^$qvOLuEQ!9Ev=mrU(}WL#)&`_1%4{-4^1 z>XMk49zrY6F5C{y3p=<+r{L4nb2wP9)9+fVk3Nq8Vc>+G3V!*fS?|rFd`=Fe zRy2VmHsl_0AQsE@Il|E~bH#P8gDwfZEeGwUaF3XS6QX!+NCIBhh=PZeNdx8jsQxlS z!>!;9b!wq!bt>d3Zn}_y!yyxsCL=e56y=Jza@JW7!uB9k`8}&<4pRnjy4YAn2+{~f z_#Rs(rrJ5Hw;GAI54-PP`0_V<67WBKWVb^r*GlcL3i?($x}J6NCd2k5P8#g%YBoZ_ z4GqUPqWY1gVLpHRQ>)?9!X?@8IIv2nCDq=wg6-qEqme3|3I;5~LkKCE#kOM8rJRIp zNF?LMvwl!gDW4A!$?b0L&MgOdpouK+Wo7lSbV?~1j3_Wy#ihxDDlabOVud*MU2DD~ zFi*x0x6r}2{i}99AP%eU7-DAUVzqueTJ7ci&9kP)P!kQkD^$?9_lo|J`0UX z`>7l0JSx7uCW3mB<1>T18?DP1Qs4Q?&kmuD5f0l~Y#Cb4Zu@}_y>CE{q~2kAb5CFG z6QGuG%qVrrZbS3i*t`Td+-jfS)g!&B8x5U}xkk;X6^FG;lBE=tP=5gJBH!rl*SEh< zI2L1%+^6k(=x zo%NX870lT$clO)1H@SiXY;!`ww(76vPSl4@tiZcH$*sdjSM8KMvnvTx030Afz`4KQ zu_^4_*Rpln=YTEuEaiq$;f(6dn64+mP25eJXnwK%3^7tkslB->O89Qg_wDW$`=Jy$ zgH>MEALUp1q+>Wh0Ge-5XJAoa!wo+`cTb5rWgHw(uQSd^p_V~ z9@~a}hvC$%DH~vV6VkE=!$_%k1O%~*PM1sc4Cd~0D}Cm!4FAk&*>hIs_ZxLd0-z!- zThPbgw#Vo8-sL0Z*QSYiKDUOgLEF?gY&&9eivE<^OLNFri1=A|eGtsv#-%4SD6dl&NyYI8gmxbK;jHTIh zpP`oF3>SwvS9y0K&F%E!P5Ov)e1f~RSu2%ogiX@kjvu^CQq z-eK%~73`m9ymch!jggK$+3rBSEIX;?9|yJsztNjI_S^gzhT5T8U5V1f4gDy^CFZ1F zyPezqE**Ai;t-Mx7Y>F~@@OOz=S$aWErR9T&25-eNVx`_*~xreT6N^g2Rhbo73gwn z_Sz<$-iqztMOmFVNhky^%a(zh2Zt)^DS`7nY>Ez<|BOuRcB%W_5cZBOud;QDsdeqg ziX$hu;E?k}`OGGp6WyXp4#2&^DADSUOfM^c(dIIVxL>W%8J1CinZ3scthBJ9_0Oqy zQB0X`)Icz8=v4cz{X&UkPdRlT-9_KHvfsPTv9TxbRiM{ySo_xe&tP+s4%U!Z)sqGu z>BO5MzTMnB>T+Io8jWo^M0Wrm(;OlttM8n&{^i3J`j{1qf%Ohr2!`KjdlCW|1#kuN zNZX*iz0IOjp-j4HCLW3YRbaX8H4Wly$um%u73oq}zg0H>@bKZ{a!54xzg!4r9&~hU+XTR$pnlAOU@=$dTJF z!(tTjE0(>6WcoC3R3(yP$C5V`YOTTifOi`cyHZ}*l zRq7V-Pj;Hp7##mr8*OFL-*jV_UmZjik~dMmB}2QU4{VMJ`H)gxEP5DtwF+eEC{cOO zSEP$xEy`E**0aYu~8Pxk%q-1u4807>pEo@X_3(} zbz&Pz-O8a4P#k1<%Kk37xRG~RQ$Xbwkv~m&#ejKW*HmEbx6meW0J?jDC18`9xcC7KG%1St=pWnE;45W6p#C5VT&g1c zn)f13g+=qG_^EKpSAGW_!^={JmC zq#uynn^X*w2&+m>*PA{s)Al@iNWNQQgM!Wkr0Y9hDa%eADx~0(yc7{R(SdklD->BBIbxGBrvX5swsWq~hm1KjN=t5?v|E+9c?+d~tsXI-=s9>+C|f7% zD?v<`cvC$`@vHcJHRk(V!;JTz6I|Wahi8AQD3g2M2w&8c_+;0&TNi^X#(}oWBlmQT zONO?hr~R7(h;O~`b({Y#d3YGlvZjxX%s~ z3vAGs1$0T^2J@}5>Nrs!N>`On>cLi>LIXt4U z?exYx1gsuT~pI#a;%5W@b#A2yH zpD<4p`T&BZ*CKC?=Ze3kVPb_t?y3KdspZm_?csp=sL#9qlT&3o{QmyV>)B9J)#ejc zAU`hgPG9n<%$X`^%*z*6-}bwWKm3T*eL$S@uBAQJ8{s&e@bDvF@7Eytzw9AM3+a7m zG(`071n=)I+8qy@rzRPXf;~1-!=juYuPK~?%pVOby%u#Pq#@%?V*IG&^+KXLY*U;v z-pA`cGDv+~^|hvAcI^YkTW;aF4=S{}y~VaWg8ym;di-wY`NtCc z)`le808guWO##YnS7Y=e?T-xt*dVSNLZOcNi;N@zAd9Z3dIUkJw=!bIm(Xbr|BFWn z@2sfve!BW&xOaXy0L9oGFCw+UHXK58wqrVO9g6lF3@v?gE2;*=ZaeFAN@=b8aih1e zXY$06QQ)8@*Bu>Y5SfM z`4DrS@;Sd8-T_w&Q5o{1%L_8w_CCt>CR@KUtU2*`>c`$wk0DoU1IdTkYr_7;w6tQKPGR zd;Y-;2UF-2dmIRx5HT=z7y^=CRXu->b10%Uc`ljCYOTZgig4?+wmu+{aTYVo#$@G6 zoDXnNad}Q&b1lxyvI7tAis9tA7xJ%==c+5Uy~PUt5gxtf>ttDl9cSZM4qfNX@h3 zsOV4jlp^p1v9!83CpIe)_oG~AXFXe$nxidXwyd2`P5(WcucInS*2;qO!R7Z@u;q;Y z0;~Dt>o?C|w9XkD&FD=>(u6H7X@SIp!~PwMQGZ-qoWdED@oz++QB-wzoY_5!Xc%1A zfii4IVINbb?ey$h^GUEb(gtvtR(uCn%OEcBrmRQ1POl7t1?As^}Uul-XHM_nf(4^KvA&f$gKardkbe z?nj(&FWl+&iSbOCE}P%@25Mqnzh@@w4}WRKQMaD23IOWv{V?f_skPuUP)7S#=((@pz1RqU zxqG@D`?hr}|K{apoL z9`DM3eYx{R>sQ&id9Gy3HyID)#Ystzi8IHi*4)14(Y44lH88GzAl@-n7?yb>_y?p`{`zpf3@cc)%xZgmsU#{5n19@S62bp?D0~e zs&1Iqk=bE3G~)M_Wfz+;k!ii(YsJ{D$(bj@>a@ZCt@F--R|lwG+95+Xj_3zEf1s zh`fU4PXkw8l?a?4hFs7r6LA=M5(mj_Y9Ts1H810L?} z9`GhB1!`wk*Yfra+DS7`csb`)^aVf7w&t@msE zKIfDt`0u$&i4$w~9Bw-T$uu4PgOh=VVmA2xJ#Z<=Z1O+=s79#gEu62Jt2WXW+6V~D z+c=i{>wfgjtUNHnn2?R-`y{hmn=~7)@6Lp;J0eX-(}5O{Yy(I@__1--riC71rMV6R z9tJ|X+p&F?DQ2oa)?%yQr6=U} zI@Y{nr^JD6;L~dHnn|sGv}*xK%K24P!^px4pYen#X5$AteAzh7d<-TRuVA$fzg#ol zr!1S~!>S*?e3(P2J?3VGle6O?wUbMtqFcKYb}M+n(N6>ym!mlBX5&<49!|WTXB^G7 z6cpdfd!@+~S*3h2&*(A!#7nEuzh5$@0K&o>idp-8mW&plR*tK3tjQP8bzjEDMLm@1 z;(CE?tio-tFz@Md@2(#BH?dpH`uHyzPEsxROpA_*Ub9lSR^<%7?K>D<^=Fo4*K&k4 zbs2u~fez80fs@-6JSr32@Wnzek!9IPCljQpPv;fEuN++Ryl(vM!%)`?N?%m6V{TxMo0|;=70=KMr(NN=2m!qxp z`7Y9ETnOE$|5L6Yz3263j!;_45ZK^^*Oth>iz3+&yS)8%9pmK4ood1y6OX^HeKDCi(vMF z969FEbyK)iIHb#Xt5C@IpDi-o5duAFG@}_+bmRiYRO#prs8s&b?vUWi*h`14{`$=^ z8ipFTKmdalyx$8dBI2JSbbGQAIqx0_+zD`-4O6R}71oZlub7h5ey&iIzr&QOy`g-g zz7)RZ$BIesfGA|?8$n`b4}%~5F2JXl+m9@ro&OO{cE5~(@t&%q>#y3d!E>G8q})i!#PAsAIe^aW zFgr}tQjF=t0qq%<&MZVvRB$veG)h_w7XfZ0{q%ct!9T=gwF-$pPc+c{=f~>-mek3#U zyeL<_(H#{>j)?NZ19?oUyuQ)}<&(RERfDQzmp*e`(%Vs=>=B4cwv9!jf*<)gJRB}e zZ}Mc*$`F6*+QwC{F{{Hs;pym$-yYd2x&&yI$zX9Lb{~>j)#1aF?i%L1D;>nw`nK9_ zLNTdmn2S7wrs@rIOwC<;x(8!d-$HoFb%0Rw*8J+$KoWJE9$1kexe2n{mx%{|amN;la!#SkKu2vQ&3#*xv+Ee+I8IS5{7wc0xzjm*>Xvv`>HP_fDaPRFwjwN%m>^= zbbeS`44}$Utc-h~H+!&AEZ~w{hNbdhr?bn>P_j17b8Z9X!vef{C{3q&L{xBZLbrks;R=!Q= zVnjir+dr*!%?q!zprLq^L~*&4s#LW>3aw_lnLN%K8QaXrJ}h=PAdTtkWviD%@R5g& zE5(D~f4A`e+*iny&Rlu$aW%OY>s89AeghgSXovM!67k+uDOEE8niphKd68cDX0l`mp!7l5OCR=e|Br0}68etFJr ztFjlgTSoqK%l?+9mAUHU;$!|^43plIU@JsitXXpdi6JEEQ^&-X^T8yB8}Q)t7GRWtd+y^juU){6S;Fm}%g3r2RDQ`sfbHgw3SAw{`#17h*m zq@(RZBSl#(xq6ly+qYZra03Tk1WndD_&@&({^dx$wv^G@Gu3W~Piu>siD?hiTxqK8 z6C3>ovuFnE3LBE)gfi8<*LRz&Hbp>!9Y=+MkZS={Z&4nbb!HJl>>_+QUdQS-piU~WSqOr#-+>(%?z^Hb8_K~#lPQ6hr88g?G zWEymrt-bx)ha7ksc~i2sYT4;*Fm=wB>mA>O@)K}_I@9}JT2*~CabG5kNBAzRk4Og6 zCI8OAI%VuNPu8?|a#@QJ2`x}59Y|80 zW4rr;yLsANwiX+ZP#_Z8u7A~aT?>=8c)?rqA>XABk6fx$J(=3?1An^8*A4fDLLy`? zp2ftIqtuajP3o!it7~cRhf%)~zSu8E_Ww99CH(r2v7C2y4y^6 zd9!W5EA(d1_0E?hR`?uMu3rZL-Z}_4WU1;6RpW`xEjOYZu!8F^lFwpInUM8CK;?)5 zmGf^^N)7qos|B8Jw0q8(kQRLZpv~Ct7$1!4L-c(5qHYznyAW-P$k@RhysW`NEDj}nVdbW}kwQ-05%3wCoutB^~TVhWql2UK>}GxgR0ym2=D ztAzb-8QZdblNv*&5VJy0T1Rxy=@mCA0~eV-@n!LdQ>N2#1b zF`*oo_$pEnm&7#BXhg%kjP&W5kAgxUNPVEp>1rL#12i^8UuNm$f-&=%ZjtX%xq9{s zgL|UZi@LB}^GDq#S7!XO$~S<{rDZV~{JW`;vCY!n^4h;Qe!SXJUkxzEv?3I?UquR% zaN%$ZYzEMpZZrY3n~mWfV`tNTi0HcN2%U7f8?t9dW|qFFKY<7uU* zOig-&iUET&lu>8nR9*W@_WK3w1oL=fe&{pps!QLc+7vbG=z0f`+-@NJW;{UrJR&Um z_u=ev+%*Vk82fgR?S>E=hZ8)Ji~>Gk0g-5MMFKGqQj{b?H z&qEM~{?bAI*-qvT;mhgEd#e5eZnsC7F!j3@({D+@wYxTJ=q;f3hN#9ATZFaA53Tox zI}#gC<7d+4g(z*ulyc(9y8s|Rq&XFlVs)`$4R+9a3>~zd$B!WjfPsC#>XGv;i9`(y z_9#{D$>4bV&H{}-LIHyu1Mv$m_U{_ooE#5af6Jm6*8S2{3GU3kN)7^tm+}`XzKia< zIz&kFi4?C(JJ&$xs2cxRbJ_(^UUqg+O6e(h&Ik+|ZRTtFJnj#pP@XYrRAcZZauptRIVOP8=n zcQ4(wG)gxt`OJFX@B2HRw~piegFhCQxn|~?IcMfP&+lha1g~$jS!hlkkI%$NXYH&d z*|;v^2KDD~IS}GL1@?d_-^n35gonIVI{Jyvo&pMQ#^qebP(sJ>q*OeoJw?v$;m5S` z5f@%+;$5>k7Svk%bdzbo`L=%n1P+oi??U4^=J={9-*^i-Zs8#ke;p>Pzaanm`g>!u z4Bo9<1Vu2pf87yp;1PyC*wZz_5KFnw{7H-Qx%$HIm@H?NGQJzjO;p^G2nXjRu+6A% zpqX=LVk*zU+?9c0g5_Tpu_X4?)Rd`J2GS%led)uo7}SqJ;e}|iESMYEHc0+Z?eMa# z4(J)<+hbMhm_7G#4}W7WwKpMnl8XJ>b~e3yu08+U#b=N&fr}Dd-cxDjwJUV@36>fF z(aHFqGSZnM%dh5A&T$CopEyuD>AmgQ*reVuAFHopvNqAUiy5_;(JRtZG2Z+nGQr04 zRIjto`5>QIhw1E6YrliGyH1$;V^`rh$;((46~hxcB6n$9%hv+dZZ*dsrCHZ zn)^MbvEq!9DB~@D^&EKTD-*-6IL8Bq;)Z*p`AP9&+8&6kaYjup<>9^hc;NV%)ljMe zTf^jt@P+)X{$4iyy++0e;uDkS7?n4@E5-#o3lMMZBq1Km^4An$I@n19uK$x|~e`xEPv;tp>7HHJ|?uLB;*-pnU?GwAHF>OpeMP zFT>VW(itekY$ynt5NDNxR>g17EbcPHEug>039=P<*h2v5fC8WGsMJ<@L`DGE?=NvU zARlwqWw=}v8{NC#fo_c8@?gY4)A1M|Bvo{bBZ`=ueau$rTxS;SH&tbyKQFO=wsFN% z`aLm6-7dZiB$ga*+#+Sh_}eYOd1KwplX!Z0&h;`$=)p&5wa~{dU11@XLQO0MK25gn z;Q7}b)2_NG@~3qdHq|*$+EPUUK&L^AJHtjxrR+;1#Ont$^R1wd1kH=sZ|wfgxOxQS zujGs+w*FbBBfk|=p8!G0BF-aKjhbIA9Wlmiek;u5q;cw`gRIt>)!`F|c8&S>*`=Fq7P`RQA>)9kU9`=Al>BwAMj4@2oVw5#cKGrMG9Ya z&Nun1)i|@t#6f7;*&{8A>*5LuAUdDfACs_A9vv#cZtgmjTW?k>wK5Jk5Vo@-^*$(X zOw&V4pQDJ_x)H0uO}Wb`EEZTHP~w>T%SA@?#*ACeYzs+Ah!lq%ONr`FQ^J#37aF<5 zmucc*WS+;9CKs0{NZ@L2f-c>;9hd(8(TR2X@El_Kllbo3%CQ0#?(-Gbbd8(9`O4h( zzS5Zy!TS_NVZy50E=>zIah6{*4xlhv2&q7?KJY)QB2qcu>p%mpjeAXs%bf0&~$#p3EBEoh56|D&9k z-{%hBz(LdH5QbE=w&bPN)X26S2r1Cg(x%O4s1&NF)@LGt{&aP}F3HqkSHYf+?g6s2 z7_rp1ysF_vZK^cdC$DHwTpbT`-1#Ob;J;w&J)h**LmX(7VsgHP7M zMT@hK1iDj8_IOJuK+wd@%&fO$urse@E!86;JUrYl62ukU&Z4}BfmAkU>>C_Q-e%C> zhs~aq_QgPUUP_ARy>_V4q7l!{-I;v_L=CAetKC2b-gl$$XVAsY5Xkn-Pl}JZti=u# zADPiZbLqYAbHmR2tNpP4P*Rp3YUSB21{vw;Kn?*vyUo2|F+N~?LT}$fmoTwS2B&@~ zr8HWU>`Cuf_4!FDX|Am;`;xVn-*cpu9h2RFA|{ElYW@$u` z7rYck1x8Q0Vf@XH0InAXp5}`K19prL=wZ`dctg$q7J>29PqW@{FM`v65 zBR7qIum1^OKMtN<>+O8i!Gg2`0j+y1W&*$YAl~sEIP1KAF(rb<~8c9-Qsrrf=5C7eH;NZv& zkjfyhdU+P$|G}xpGlS_zqVpubG^sKE9TUg#Ti=A1u(##48o{BiJU_}p8Xm-HPXUxQ zAP|B0%xX1`V!X3yMO2EFOmDJ7|VZ(2kV=&V#hHxSSdgo%CWUb{zqGmhdXEklFgKfMS*s7@>u~lxT5Edwn z#s$c0^w8B1TkMMGYtJ<3v0bZQJ+IsLxMiSP)zdHNs?JeI_vac2m_^DWbKx<9wDMpj|)y!M&5?lQPBu~ zilIHR-y^5lL-DxmZqm5jJCER1I;>;i8++H)0x&=!S`Ql-n1v(dr0-57cuF^U(f5`= zF!H+fIXEB=G>K%EO`Y|Can_{(d;UPBPSZW&^^ZKYn8K~{O4Yb$kQyizeHz;pS#tMXs8G}J@a=i!uZ~-^aizf<$TY>7fb&pqsO$#BfDSk)!*U?T-Tzm z6$CIDFhsvEYB>3kazEhW(NOjCXH7Mpqu^!R%9kkIS|b%T|clyEQR zMwnj#4UP%0hd}O8akxCkcAdw}ukp*x%Vtnox8`|Ub7Akg z`P}tH%k@-mqV@q1(QSek_sNVIO$n?c2eC*Ew%G5Gkec>j6=Cs__8Y!)8*{|St2?D4 zX68|LQ6yv{CQ((DO~jITqUrYRrKRfTwr*~onZJF8A>FP+%&~n*ppGI=W|RS@$_^k-$9lude+G z&RihRmaRFnI%N1tOF674#l#wc!o3lH+=Lu>Ci$r>9^lsuh3w@_}sD%pF0%gb4`1`d71vh_AG1!}~o&y10ap4tf3@+XdTV zf!;%dR`S?ftZjmE$4d}oC-n>70z2<&r)3^;Q-q_?m(~@qZediC&IS9Q%xbs!`fDbr+8% z_Isyk0JWg*AH@Ub5gl*6!376rh3b!p2T{8C^Ol#MBf%^kCo}|f&iy(diH&qK8NeQ# z@gT`1$t3bu4pB!-JF6fqklJn8%9?s89>ln}$iCS-+C$1C9{C?{>5a4f9>I-cH5iOo z5_uH!E6?=xi1#c}^Y;3!kdP7+g4Q;aM=*eDF07;YGoyI4>egAQUe45Bnt=VPu}hm_ zy~2-<6D7q5K2R13_6c*iyolQNj_!eAk#bjZ=VaPVKxDf-A9gr%tmfIKbS%HWUG{dJ zi*jKVinylXQwFv&Mw*sbwW`|kt4CkwLHRJA-zNaI!{o@9hRWjH#F^J|7~)Me^64>D z+uE5w6|5%#l4SQIKbv~rt4mxMAJ^kKn4*U9H(?s@>shLI0>=p_Fdea{w*$m<^=u~7 zO&b6*EXdXScn4eLjs2=HAV0joC{J?6?iKOR_?}+!nq_8LhOMq+wz{c6&9;XXa0S1> z2$Zr_pS8qQD)vM_|2Dr84{FMZ#jJ8T=MWZ*_4PZ_dfnV@WrsUSAw1CklaL0K=u=2V zMPiw7TH@MW-qh3b|5!K?92zIMRZL+iK{&U~w;`dhTP zJA{0M3UqAd9vv)4&r#F^{s-RKgFDb3mTL=n^-^Mk9bumvy4Fie{t}4cAB% zt@AV(_UpfMXSrFfeo>waaYL)RF_Fu5uMLP495-((WlyAt^L|U|(2#K%SY3HW-T#zi zRi0y+*~7r*yoYGf*>c6wk_veQGK=wL&3fknHtZ94;;!SX^b?f>%eHzMq-BU5Og|&Ao{v z%#cpuj_CYsj9u%EB#H)G_LA{Jfe78=cmPsU9q3o(Whu7AATkJpwL*FxA5dzk*=l4> zNaYhJW*MvjrS5KD3hOtJ#Rc-e)mN`3YWAt8tu9j%O#6gSPZ}_*thT6(Wj!K*({9R3 zkT}quTVUCa!!W4s!54lWR{E7zTA=9CRh)i^XYIlj*#dO~pxP^OJW5|&%uj6K7MX6U zS^~#+E@|8`C@c;~17s9fJLzvact0=%l5v0y48U|fS~V7v2xy&NAnuzpfEwG$*0~R_ z&B4)mj34@v4ZFjA_U5bARtpokV+{F=;SZB16DKLE-T6W$zggyMCjDXcAge*DQ3tW_ zzZMg_VEyd?fw*(aIZwn*+3Qnroe<<3eaSrd{EJAR?pyiZ>rR#6LmZQx(iZ@-+LfQw zjBvS*hmuFG#R;qI7L&q;_UD~H>74MYy>OSD@~3*Fyb)mhTj!3)zD2I~ae4-Pi-lzZ~0-DD{7Ngth~6X|8? zugVQ?fNe4)7a8$^sgO!&O#9>VmHq0z9_&$95>`TkId}w6Z!^dv?1ORVzKfuvtSbzFdmR?1{GnkTLs0;Qyy3glBSxvju zFLQKeL!3C;_l@Rw%Bt5T*h|G-|Swv9DH;-9Yl$G zQHxAv-Qow61r@cW<_*JZU}^+?G|ePs4&}zOP|Bfuc$_*9Tgov#UwEi|z3g@1E0mZw2SBn|{+yHCv(0ndLNY_EWoKInRSP9D{aLNAcI+H))Gd@%+fKSxRqy~fn$Lwu z4S&(n70b@TaF2DXVO5DabZG1-qS8%$IrT-9N1v&aMi7iVs@C>i`>t)X(AjOe-UcCn z-rECE;n#bO&9SBKEdTVkVMHLp%99?gC%xNvAyW+WhZc`yV|mvev61^#Z}Uymep}M= zxc?2-(Mby9K=qJIcM2IDRYh60IaVTu<3KBYJ&!mDoaQl+}TFr9zCVVuBeSj+PPvsU&>cJS*A7i*(1?%DJpj_ zV0iI%TWB>aiw13L3?<^$x`Y(a(~R_~>GY4_PHfr-s;ZTHq+?zk>=+QUfwXztim~xnWLiR8jO}(;W4i4tlaG`SBu(v?+4t}2LGoSHnzrbB zXJ1~sc!Y5xO?%_Pv)C^L1+;~HFUJVS8g2*YDZb~Lbf3dyN_Zd_ol5rxp0YX_ zs$=nzd5rGSyy=*lwWV6d&mdJ}64oHCn7X$~;LP^|(3J5(PNmzzitEBu(}h|V8z?$B z34ZZ|DIgZ?NyQ-w(<})B3Om>h6su6+kZO`@wmFuK|pt>3$Qnxt6`9KBK{;7ba zF6Ka?9T9lC(iff5Oc9Ty4l@9RDCpKRsJS0g$8r{0`kvgF93KZz+mf_GxZHgDEPnF+ z{QN2!ww$IRS>_DA)7*?t^(wYy+o#OcV=_X&KQ`5EENicFI=;OFc@YGXL4oy3H7S0G zhvPr|Pe+Ril$gdxN%?m=V3R(&vj+Wm$zrr8q)`0ZYQ^M26aI{6?tmWVc}cTQgY1VY z1s&**>D3pE=e91@?nRQ;*6%(_-|sIivu2SvJ|}m97s?<)SEOqhLjw||0JWp0NmSY$ z9Ml5*zTAU=807_zvoJ8Bmc`0Rb)@;^GdUGILu>1#UJL8Vk_wZlMiUyPd_2elVGvN^ zgI-tojyF)hnKqt1?dKwJsFsRXhQ{>t8vyqXzS_Yvj9WFwPuEi-PvOtgRimq$jT(;1 zpU~;E8p^nn1MwXQt5FB5sftz_6@?f;*!00VERmrSgZI*1p%Z0$IotD{qf`|E-hnyMu zNhzoE+U@Q}9{0eM3x>m3L(A8mG z;C}z-FvPFJ8+l<)H|G^pq?L$?rgommX>JVhMvY=a#kxOo&IaK3GUWLP!K8_7t7Kf5 z4Ia-R{fTO3{vhAeW>YPlC-sz_T%X4KP4|nn?*i6w!qhq+I{z&RKn4LeN3+7?CUk4= z-KHB5Y)&ATaOCkRySZlhRk3llV~~}~cUcpEs{M@4H|`S4gn$W+o`({MOMjcyK5X67 zUnG^EwThBMF;@FRE`UEq566G@j6%|YWq&o+mceiZsk@mki?t6KlOQ5t`Q?m(9X9OHr&_}Pbc#3u z%0nUxY>8Sfg|~E#-dglYh)r#$Ghw!<_>fCN80=-d!k^xXl>3LS@@uZ``;E}}KzNR6 zhz*?oS7>lR!k5HS##(vg3j0CgS!mca*^EwASav7{Hd#-|#UNAey~y$pBL${j;oYV0 zbw6h0^v5y6(|~V5h+-g-u-qHOk8AZ$rt<<9I?%uMP^+JAVeB5`Ve^&fybMnx_ytGtDds0H&XSAfMs@WaO}nf`5E?y zsN>7k2o?UeEcDCUIeawGb4S8)(n>q$x^#du*xetC1~lyNp}1J_ohK5H!Heusp*N0r zCS;efghJR`z-eRb86RKb@z!*?^(>!FWRHQMoFpJx^G>d$qbiJ}{idUd(kl=OZv*R6 zn9m-Nd@XC-%KGx{g$bwS1cwj^G)%#5`hoOAE|OYsqSET&hkTWzHj3GDQ$mcvac6`5 z?<_ZrDYd%Epnq#g3HJ#6X@gAdVXFzZTWZJDeMy=7A)hQ#@R$MsTy4)>VJCRM&dh;i zg@WfE4#Dx&0SGb0h!mwZ%&OycArM9_pYA$tjBPa&2LuilYVeE|s2LnQ2hxaVFJ26< zx0nD*-iQ3&>z&DAmtw$2rQ-(vVk$xRn2QTfz?C(2@&H&?DFp>M3Iv$`qi^V!+~@_G zx^1eFVuGV})V9-8#{W*#{m?){xQ7X|;qlu3wMM~hsoJy#Vzu>m*!tF>rj??hib@=W zUxyX|rFW$}7MFlvZFiwX1x_R6nUIblcZJK0_N}C`-kH?%uMb+I6246p> zW$V(>W%X?Fk%8VYSZobJlY9#`;GCudkITx+D0pqZv$wAS31?d@RMfJ-r-Kf&B?bLI ziuTrPsxx*Sk1h`Sg)h#L*YtvdDRHbCWu}Aa`nV}s@;#6B{S*B{f@9bsqj-YwA2ffV zeN!t))^B8zWC8tnBa9Qk=tXSq@K)Yga3~ryZD+Yz!uFtR{>Txfh?z_Le3^)sZpZNKC(0mv)71Z|) zK;QvYBud|uj?O+cPV}j1Rk_2P3c$&`0C$dH-R-$Y0L|qN1hoRndXbD>@bSn0{FntM z_gq6hX@ou7-GmMG|NLR?Y<2X(cyR^dTscWpthIqeRq9=Jy&ETxY-&N)ME@2qGcEVb z^l#Dgfb}6FhyLXaG-U$ZU|}Ju-`+e*HU$RZqJ{{KYSpl)2Kc=v%a3@`&58k_yVM(q zQ3j%@vI8jqI^GNXeiuSjF;MyMr5+K9mSE%u^I%))E))b-;&P>fyG59+L>{VEP3gY< z`~wvAR$%}K&=L|9pD+V%^bqk+-@vTE$~w6$b1|SzQ9SeGE@7MztueJtlTZe=xFY=N zi=E&hE(=WA)4#>x-WJh$Ec3Ul855t(B>TP-FjIb84MF;=LtwnK|*{Xd7ZkuA(z#dXJ&IR3s2 zsjkh)H+$c1-nz%3+lT>3d}6Y)_=rTVrEwjfqmvR(I$>eh*C&!uQBk7c4@c{x1^{O# z??H$a7Rv%1%2A-ngBC|L=>mvoLq#>xkaKUx=Q;@x)AaTAe~%SDf%z0cZM;&ie;KYO~sFR4i zygXCnhzFGy!lW;mmWwME-Dio3-34mp=*5X=#@3%kV*OEHeFqsjnz}Vr0eYY=Mq*Vm z`~E4E1h8-6h&)HCSN5$~3Emfbl0F1&Ir1rdM&<+Gea%2? zv)Qz&6M*<}9sQO;x5}mPyBwczyp;o`%AO#78b+<)GxlM*KG#>?lwWr+vM0+;$UUuJ zQp!n7OK0=Uxn|5)Ml&cRf?+2+oTG5ka=Whh_qZh*{wDj;4A9cDUNF*>og8f=8<5Hw z5_gr<)f00;1+oEZJ%6M_CDG5josNem{_aEW0k8u2O@qOYe%}^j1$#@KZ-Imk2O}gT z#G5+A<}S5Bf_9y|niuTN;LI-vGawNIz&C!dUV{Wz71?CoL1fGRxBm-`9q>Dm({&y^ zfByVY5gx|H#RZFaz3jzS8Q{k>>TM- z49UnS7)${yz6I9B02l9bh01|ReBcA$R}k34O5t-#Hyh5rqK@eAO?d7c){R2i1g6B))w6)|C~n{npi$7llIA?su~s z?jVuKHwV8C4;?X*d}<&kfz`>$Q~;62Ub?JfDdT-W{_`~xPXCS6eFph<;&RhWY@z4_c{!78Sb8xdnli z;^X)?1Sy&Oc{63&uXF>op2O*`Uj8%zjgYLOViZhb#pBH-;Yzr|Vp@^6mmhjc-lb!l=Hei^R*VfKGg0*9llVySSuh80J@!+BT6HM8tAV7_MHGi_v8t9ad*L9g{2B%`m)S&l9SJQ^LNKr)F;r| z&M&mRse(x>J@L^|QHx-TjBt%>w(K-q_qBh1ct>ce(n{v~>TAU)G}V=T3NE@~bzh2}iIr0QC=-$bd;GyDKUxO1*tsb~YUg-aC841yK}u!w`|S zh)4XY)s9TD%pdkozIX!Q#*Xp9DniAl0hq?V7ySY)i)~?-f&wtykgP9yEuZXm>Q2zF zbE?9k1HFG$Q6U4?_wZz()_QloNj8c5Ga#?tCntxNkR^Tj@>*6l945Ku1ui0M+n4-D z&8R4~8jJ2hO>fi+Ab{l`ZcWF7?eyNDBn|op?NT@MgE zzpk&&FM;jk^*VdW@3IX!^*q_0wVbK?+S)1xzKS4L5NHB3QPv;lu9UR2-g4(xw4qM2 zuI}>jXQP$Waj>`NgwHxZ%+1a1g$cthE-w5U^uRn9laM&Gn4oqz7G$I31q(w2*nU!i z?dsGUR>7;exJ6db=G0v0t*PwJPAR1t=Pg-awU0n=-6%ml2YFaN1cf63`_4+yU51G~ zHmSMb-DZP@aTv@l^D_XVybyc}>+0=&dG`_Dh;ogym6D_5&p(R*+ETaMd?y=N)H4rL z?Na?VyrhO=V6}_83^{FHJ$hxInG5F2-;%%pFaYm8$>)}qXhi+$Y>mtG<-n#j}6?)Hcw(hudc+0fH4f>YP@LP_5XG|-2caZ>iYc`C&>q| literal 0 HcmV?d00001 diff --git a/docs/utils.html b/docs/utils.html index dc548ee9..ea26a341 100644 --- a/docs/utils.html +++ b/docs/utils.html @@ -80,19 +80,7 @@ alphabase - - +