From 934f83d043a71b7b09cd5b1d824074a765c73f0d Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Mon, 8 Feb 2021 19:08:49 +0000 Subject: [PATCH 1/3] Added persistent ts MAB --- .../routers/thompson-sampling/.gitignore | 1 + .../.s2i/environment_persistent | 3 + components/routers/thompson-sampling/Makefile | 5 + .../ThompsonSamplingPersistent.py | 118 ++++++++++++++++++ .../thompson-sampling/requirements.txt | 1 + .../thompson-sampling/requirements_ts.txt | 1 + .../requirements_ts_persistent.txt | 2 + 7 files changed, 131 insertions(+) create mode 100644 components/routers/thompson-sampling/.gitignore create mode 100644 components/routers/thompson-sampling/.s2i/environment_persistent create mode 100644 components/routers/thompson-sampling/ThompsonSamplingPersistent.py create mode 100644 components/routers/thompson-sampling/requirements_ts.txt create mode 100644 components/routers/thompson-sampling/requirements_ts_persistent.txt diff --git a/components/routers/thompson-sampling/.gitignore b/components/routers/thompson-sampling/.gitignore new file mode 100644 index 0000000000..4414fc1e28 --- /dev/null +++ b/components/routers/thompson-sampling/.gitignore @@ -0,0 +1 @@ +requirements.txt diff --git a/components/routers/thompson-sampling/.s2i/environment_persistent b/components/routers/thompson-sampling/.s2i/environment_persistent new file mode 100644 index 0000000000..5629d9d852 --- /dev/null +++ b/components/routers/thompson-sampling/.s2i/environment_persistent @@ -0,0 +1,3 @@ +MODEL_NAME=ThompsonSamplingPersistent +SERVICE_TYPE=ROUTER +PERSISTENCE=0 diff --git a/components/routers/thompson-sampling/Makefile b/components/routers/thompson-sampling/Makefile index b5d888d1b0..cf65189969 100644 --- a/components/routers/thompson-sampling/Makefile +++ b/components/routers/thompson-sampling/Makefile @@ -3,8 +3,13 @@ IMAGE_NAME=seldonio/mab_thompson_sampling KIND_NAME ?= kind build: + cp requirements_ts.txt requirements.txt s2i build . seldonio/seldon-core-s2i-python37-ubi8:${VERSION} $(IMAGE_NAME):$(VERSION) +build_persistent: + cp requirements_ts_persistent.txt requirements.txt + s2i build . -E .s2i/environment_persistent seldonio/seldon-core-s2i-python37-ubi8:$(VERSION) $(IMAGE_NAME)_persistent:$(VERSION) + push: docker push $(IMAGE_NAME):$(VERSION) diff --git a/components/routers/thompson-sampling/ThompsonSamplingPersistent.py b/components/routers/thompson-sampling/ThompsonSamplingPersistent.py new file mode 100644 index 0000000000..b1641e3ce8 --- /dev/null +++ b/components/routers/thompson-sampling/ThompsonSamplingPersistent.py @@ -0,0 +1,118 @@ +import random +import logging +import numpy as np +import redis +import os + +PRED_UNIT_ID = os.environ.get("PREDICTIVE_UNIT_ID", "0") +PREDICTOR_ID = os.environ.get("PREDICTOR_ID", "0") +DEPLOYMENT_ID = os.environ.get("SELDON_DEPLOYMENT_ID", "0") + +REDIS_HOST = os.environ.get("REDIS_SERVICE_HOST", "localhost") +REDIS_PORT = os.environ.get("REDIS_SERVICE_PORT", 6379) + +KEY_PREFIX = f"seldon_{DEPLOYMENT_ID}_{PREDICTOR_ID}_{PRED_UNIT_ID}" +KEY_BETA_PARAMS = "-beta-params" + +logger = logging.getLogger(__name__) +__version__ = "0.1" + + +class ThompsonSamplingPersistent(object): + """ Multi-armed bandit routing using Thompson Sampling strategy. + + This class implements Thompson Sampling for the Beta-Binomial model, i.e. + rewards are assumed to come from a Bernoulli distribution for which the + conjugate prior is a Beta distribution. + + The reward is assumed to be a single float between 0 and 1 indicating the + mean reward for a batch of samples. The prior is a Beta(1,1) distribution + (Uniform over the child components). +# + Parameters + ---------- + n_branches : int + Number of child components/models the router will route requests to + verbose : bool + Set the logger level + seed : int, optional + Set the random seed + history : bool + Set storing router history + branch_names: str, optional + A string specifying branch names separated by `:` + + """ + + def __init__( + self, + n_branches=None, + verbose=False, + seed=None, + history=False, + branch_names=None, + ): + + if verbose: + logger.setLevel(10) + logger.info("Enabling debug mode") + + logger.info(f"Starting {__name__} Microservice") + + # for reproducibility + if seed: + logger.info("Setting random seed to %s", seed) + random.seed(seed) + np.random.seed(seed) + + try: + n_branches = int(n_branches) + except (TypeError, ValueError) as e: + logger.exception("n_branches parameter must be given") + raise + + self.rc = redis.Redis(host=REDIS_HOST, port=REDIS_PORT) + + self.key = self.key + __name__ + __version__ + self.n_branches = n_branches + self.verbose = verbose + + if not self.rc.exists(self.key): + models_beta_params = [1 for _ in range(n_branches) * 2] + self.rc.lpush(self.key, *models_beta_params) + + if branch_names is not None: + self.branch_names = branch_names.split(":") + logger.info("Branch names: %s", self.branch_names) + + logger.info("Router initialised, n_branches: %s", self.n_branches) + + def route(self, features, feature_names): + logger.debug("Routing features %s", features) + + models_beta_params = [int(i) for i in self.rc.lrange(self.key, 0, -1)] + + # Use zip iter to iterate across each pair of numbers in the list + branch_values = [np.random.beta(a, b) for a, b in zip(*[iter(models_beta_params)] * 2)] + + selected_branch = np.argmax(branch_values) + logger.debug("Sampled branch values: %s", branch_values) + + logger.info("Routing to branch %s", selected_branch) + return int(selected_branch) + + def send_feedback(self, features, feature_names, reward, truth, routing=None): + logger.debug(f"Sending feedback with reward {reward} and truth {truth}") + + n_success, n_failures = self.n_success_failures(features, reward) + logger.debug(f"n_success: {n_success}, n_failures: {n_failures}") + + # TODO: Non atomic / non-thread-safe operation which will get overriden by other replicas/threads + self.rc.lset(self.key, routing*2, self.rc.lindex(self.key, routing*2) + n_success) + self.rc.lset(self.key, routing*2 + 1, self.rc.lindex(self.key, routing*2 + 1) + n_failures) + + def n_success_failures(self, features, reward): + n_predictions = features.shape[0] + n_success = int(reward * n_predictions) + n_failures = n_predictions - n_success + return n_success, n_failures diff --git a/components/routers/thompson-sampling/requirements.txt b/components/routers/thompson-sampling/requirements.txt index 0dea8a6ed5..0499890e69 100644 --- a/components/routers/thompson-sampling/requirements.txt +++ b/components/routers/thompson-sampling/requirements.txt @@ -1 +1,2 @@ numpy>=1.15.1 +redis==3.5.3 diff --git a/components/routers/thompson-sampling/requirements_ts.txt b/components/routers/thompson-sampling/requirements_ts.txt new file mode 100644 index 0000000000..0dea8a6ed5 --- /dev/null +++ b/components/routers/thompson-sampling/requirements_ts.txt @@ -0,0 +1 @@ +numpy>=1.15.1 diff --git a/components/routers/thompson-sampling/requirements_ts_persistent.txt b/components/routers/thompson-sampling/requirements_ts_persistent.txt new file mode 100644 index 0000000000..0499890e69 --- /dev/null +++ b/components/routers/thompson-sampling/requirements_ts_persistent.txt @@ -0,0 +1,2 @@ +numpy>=1.15.1 +redis==3.5.3 From b66789041a38d394918ecde9d2c44238bc3c3f02 Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Mon, 8 Feb 2021 19:09:19 +0000 Subject: [PATCH 2/3] Extended case study to include persistent MAB --- .../case_study/credit_card_default.ipynb | 540 ++++++------------ 1 file changed, 175 insertions(+), 365 deletions(-) diff --git a/components/routers/case_study/credit_card_default.ipynb b/components/routers/case_study/credit_card_default.ipynb index 97b942e10e..0d911b9529 100644 --- a/components/routers/case_study/credit_card_default.ipynb +++ b/components/routers/case_study/credit_card_default.ipynb @@ -67,7 +67,7 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -77,52 +77,25 @@ }, { "cell_type": "code", - "execution_count": 37, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(30000, 25)" - ] - }, - "execution_count": 37, - "metadata": {}, - "output_type": "execute_result" - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "data.shape" ] }, { "cell_type": "code", - "execution_count": 38, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Index(['ID', 'LIMIT_BAL', 'SEX', 'EDUCATION', 'MARRIAGE', 'AGE', 'PAY_0',\n", - " 'PAY_2', 'PAY_3', 'PAY_4', 'PAY_5', 'PAY_6', 'BILL_AMT1', 'BILL_AMT2',\n", - " 'BILL_AMT3', 'BILL_AMT4', 'BILL_AMT5', 'BILL_AMT6', 'PAY_AMT1',\n", - " 'PAY_AMT2', 'PAY_AMT3', 'PAY_AMT4', 'PAY_AMT5', 'PAY_AMT6',\n", - " 'default.payment.next.month'],\n", - " dtype='object')" - ] - }, - "execution_count": 38, - "metadata": {}, - "output_type": "execute_result" - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "data.columns" ] }, { "cell_type": "code", - "execution_count": 39, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -131,22 +104,9 @@ }, { "cell_type": "code", - "execution_count": 40, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0 23364\n", - "1 6636\n", - "Name: default.payment.next.month, dtype: int64" - ] - }, - "execution_count": 40, - "metadata": {}, - "output_type": "execute_result" - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "data[target].value_counts()" ] @@ -160,20 +120,9 @@ }, { "cell_type": "code", - "execution_count": 41, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0.7788" - ] - }, - "execution_count": 41, - "metadata": {}, - "output_type": "execute_result" - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "data[target].value_counts().max()/data.shape[0]" ] @@ -229,7 +178,7 @@ }, { "cell_type": "code", - "execution_count": 42, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -285,20 +234,9 @@ }, { "cell_type": "code", - "execution_count": 43, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "RandomForestClassifier(random_state=1)" - ] - }, - "execution_count": 43, - "metadata": {}, - "output_type": "execute_result" - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "from sklearn.ensemble import RandomForestClassifier\n", "rf = RandomForestClassifier(random_state=1)\n", @@ -314,7 +252,7 @@ }, { "cell_type": "code", - "execution_count": 44, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -324,7 +262,7 @@ }, { "cell_type": "code", - "execution_count": 45, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -333,27 +271,11 @@ }, { "cell_type": "code", - "execution_count": 46, + "execution_count": null, "metadata": { "scrolled": true }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " precision recall f1-score support\n", - "\n", - " No default 0.84 0.95 0.89 3927\n", - " Default 0.64 0.36 0.46 1073\n", - "\n", - " accuracy 0.82 5000\n", - " macro avg 0.74 0.65 0.68 5000\n", - "weighted avg 0.80 0.82 0.80 5000\n", - "\n" - ] - } - ], + "outputs": [], "source": [ "print(classification_report(y_test1, y_preds1,\n", " target_names=['No default','Default']))" @@ -361,27 +283,9 @@ }, { "cell_type": "code", - "execution_count": 47, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "accuracy_score:\n", - " 0.82\n", - "precision_score:\n", - " 0.6444073455759599\n", - "recall_score:\n", - " 0.3597390493942218\n", - "f1_score:\n", - " 0.4617224880382775\n", - "confusion_matrix:\n", - " [[3714 213]\n", - " [ 687 386]]\n" - ] - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "for score in [accuracy_score, precision_score, recall_score, f1_score, confusion_matrix]:\n", " print(score.__name__ + ':\\n', score(y_test1, y_preds1))" @@ -389,31 +293,9 @@ }, { "cell_type": "code", - "execution_count": 48, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Normalized confusion matrix\n", - "[[0.94576012 0.05423988]\n", - " [0.64026095 0.35973905]]\n" - ] - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAUQAAAEYCAYAAAAkpo9KAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/Il7ecAAAACXBIWXMAAAsTAAALEwEAmpwYAAAqLklEQVR4nO3dd5xV1dX/8c93ZqhSpCqCDTs2BHxUbGgsGBGN0aCiCWqixhqNiTWK5vGJxq7Ye4wF1BhbokQTVPxZQMQCohLAAihIExSRGdbvj73veBmHe8/M3LllZr193Zf33nPOPmvmpWv2PmefvWRmOOecg7JCB+Ccc8XCE6JzzkWeEJ1zLvKE6JxzkSdE55yLPCE651zkCdE1mKQ2kp6StETSIw1oZ7iksbmMrVAk7S7pg0LH4epGPg+x+ZB0FHAWsCWwFJgMXGZm4xvY7jHAacBAM6tsaJzFTpIBm5nZ9ELH4nLLe4jNhKSzgOuA/wPWATYAbgYOzkHzGwIfNodkmISkikLH4OrJzPzVxF9AR2AZcHiGfVoREuac+LoOaBW3DQI+A34LzAPmAsfGbZcA3wEr4zmOB0YCf01reyPAgIr4eQQwg9BLnQkMT/t+fNpxA4EJwJL474Fp28YBfwReie2MBbqu4WdLxf/7tPgPAX4MfAgsBM5P2/9/gFeBxXHfUUDLuO2l+LN8HX/eYWntnwN8Dtyf+i4es0k8R7/4eT1gPjCo0P9t+Gv1l/cQm4ddgNbA4xn2uQDYGegLbE9IChembV+XkFh7EpLeTZI6mdnFhF7naDNrZ2Z3ZQpE0lrADcABZtaekPQm17JfZ+CZuG8X4BrgGUld0nY7CjgW6A60BM7OcOp1Cb+DnsBFwB3A0UB/YHfgD5I2jvtWAWcCXQm/ux8BJwOY2R5xn+3jzzs6rf3OhN7yCeknNrP/EpLlXyW1Be4B7jOzcRnidQXgCbF56AJ8aZmHtMOBS81snpnNJ/T8jknbvjJuX2lm/yD0jraoZzyrgG0ktTGzuWY2pZZ9DgQ+MrP7zazSzB4CpgEHpe1zj5l9aGbLgTGEZL4mKwnXS1cCDxOS3fVmtjSefyrhDwFm9qaZvRbPOwu4Ddgzwc90sZmtiPGsxszuAKYDrwM9CH+AXJHxhNg8LAC6Zrm2tR7wcdrnj+N31W3USKjfAO3qGoiZfU0YZp4EzJX0jKQtE8STiqln2ufP6xDPAjOriu9TCeuLtO3LU8dL2lzS05I+l/QVoQfcNUPbAPPN7Nss+9wBbAPcaGYrsuzrCsATYvPwKrCCcN1sTeYQhnspG8Tv6uNroG3a53XTN5rZc2a2L6GnNI2QKLLFk4ppdj1jqotbCHFtZmYdgPMBZTkm43QNSe0I12XvAkbGSwKuyHhCbAbMbAnhutlNkg6R1FZSC0kHSPpz3O0h4EJJ3SR1jfv/tZ6nnAzsIWkDSR2B81IbJK0j6eB4LXEFYei9qpY2/gFsLukoSRWShgF9gKfrGVNdtAe+ApbF3uuva2z/AuhdxzavByaa2S8J10ZvbXCULuc8ITYTZnY1YQ7ihYQ7nJ8CpwJ/j7v8LzAReAd4F5gUv6vPuf4FjI5tvcnqSawsxjGHcOd1T36YcDCzBcAQwp3tBYQ7xEPM7Mv6xFRHZxNu2Cwl9F5H19g+ErhP0mJJP8vWmKSDgcF8/3OeBfSTNDxnEbuc8InZzjkXeQ/ROeciT4jOORd5QnTOucgTonPORf4QegOpoo2pZftCh9Gs7LDVBoUOodmZNOnNL82sWy7aKu+woVnlDx7mWY0tn/+cmQ3OxfnqwhNiA6lle1ptkXXmhcuhV14fVegQmp02LVTzqaF6s8rlWf+f+XbyTdmeDGoUnhCdc/klQVl5oaOolSdE51z+qThvX3hCdM7ln7I9Gl4YnhCdc3nmQ2bnnAtE0Q6ZizMq51wTFnuImV5JWpEGS/pA0nRJ59ayfUNJL0h6R9I4Sb2ytekJ0TmXf1LmV9bDVQ7cBBxAWBbuSEl9aux2FfAXM9sOuBT4U7Z2PSE65/JMYcic6ZXd/wDTzWyGmX1HKAtRs4JkH+Df8f1/atn+A54QnXP5JZIMmbtKmpj2OqFGKz0Ja3qmfMbq5SUA3gYOje9/ArSvUaTsB/yminMuz5SkF/ilmQ1o4InOBkZJGkEoHzubUFFxjTwhOufyS0B5g6fdzAbWT/vcixr1dsxsDrGHGGva/NTMFmdq1IfMzrn8a+BNFWACsJmkjSW1BI4Anlz9FOoqVXdFzwPuztaoJ0TnXJ41/KZKLIl7KvAc8D4wxsymSLpU0tC42yDgA0kfAusAl2Vr14fMzrn8y8GTKmb2D0J1xvTvLkp7/yjwaF3a9ITonMuv5MPivPOE6JzLvyJ9dM8TonMuz3xxB+ec+54PmZ1zjrhidnGmnuKMyjnXtHkP0TnnIr+p4pxzeJEp55xbjQ+ZnXMukCdE55yLD6qUeUJ0zjlARdtDLM5bPc65Jq2srCzjK4kERaY2kPQfSW/FQlM/zhpXPX4W55xrEEkZXwmOT1Jk6kLCsmA7ENZLvDlbu54QnXP5pQSv7JIUmTKgQ3zfEZiTrVG/huicyyuhJMPirpImpn2+3cxuT/tcW5GpnWq0MRIYK+k0YC1gn2wn9YTonMu7BMPiXBSZOhK418yulrQLcL+kbcxs1ZoO8ITonMu7HNxlzlpkCjgeGAxgZq9Kag10BeatqVG/huicy684DzHTK4GsRaaAT4AfAUjaCmgNzM/UqPcQnXN5pRzMQzSzSkmpIlPlwN2pIlPARDN7EvgtcIekMwk3WEaYmWVq1xOicy7vcvGkSoIiU1OBXevSpidE51x+yZ9lds65ap4QnXOOxPMQC6I4o3INsu/ArXj78T/w3hMXc/ax+/5g+wY9OvGPW0/jjdHn8dwdZ9Cz+9rV25ZNvIHXHj6X1x4+l0euOzGPUZe2sc89y3Zbb8HWW27KlX++/AfbV6xYwdFHDWPrLTdl94E78fGsWQB8PGsWndq3Yaf+fdmpf19OO/mkPEdeIA1/UqVReA+xiSkrE9ed+zMO/PUoZn+xmPEP/I6nX3yXaTM+r97nT2f+hAeeeYMHnnqdPXfcnEtPG8rxf/gLAMtXrGTnI374P7Rbs6qqKn5z+ik8889/0bNXL3bbeUeGDBnKVn2+f7T23rvvotPanZgybTpjRj/MBeefw18fHA1A70024fU3Jxco+gIQ3kN0+bHjNhvx30+/ZNbsBaysrOKR5yYxZNB2q+2zZe8evPjGBwC8OOFDhgzathChNhkT3niDTTbZlI1796Zly5YcPuwInn7qidX2efqpJxh+zC8AOPSnhzHu3y+QZQZIk9bQxR0aiyfEJma97h357ItF1Z9nf7GInt06rrbPux/O5uC9+wJw8N7b06FdGzp3XAuA1i0rGP/A73nxvt9yUI1E6mo3Z85sevX6/qGJnj17MXv27B/us37Yp6Kigg4dO7JgwQIAZs2cyc4DdmDfvfdk/PiX8xd4IRXpkLnREqIkk3R12uezJY1sQHuzJHXNss/hkt6X9J96nmOcpAHx/fn1aaMUnHft4+zef1Nefegcdu+/KbO/WERVVXi8c4sfX8Ruw//ML86/lyt/91M27pXxV+4aaN0ePfhwxie8NvEtrrjyGkYccxRfffVVocNqVJJysh5iY2jMM68ADs2WxHLseOBXZrZXDtoqyYQ4Z94Seq3Tqfpzz3U6MXv+ktX2mTt/CUecfSe7HHkFF496CoAly5aH4+O+s2Yv4KWJH9F3y155irx0rbdeTz777PuFV2bP/oyePXv+cJ9Pwz6VlZV8tWQJXbp0oVWrVnTp0gWAfv3707v3Jnz04Yf5C75AmuOQuRK4HTiz5gZJG0n6d1zF9gVJG9SyTxdJYyVNkXQnaR1pSUdLekPSZEm3SSqXdBGwG3CXpCvjOV6WNCm+BsZjB0l6Oq2tUZJG1Dj35UCb2P4DOfp95MXEKR+z6Qbd2HC9LrSoKOfw/fvxzLh3Vtuny9prVf9H97vj9ue+J14DYO32bWjZoqJ6n1369ub9tJsxrnYDdtyR6dM/YtbMmXz33Xc8MvphDhwydLV9DhwylAfuvw+Avz32KHvutTeSmD9/PlVVVQDMnDGD6dM/YuPevfP+M+RbsSbExr7LfBPwjqQ/1/j+RuA+M7tP0nHADcAhNfa5GBhvZpdKOpDQ+0s9pD0M2NXMVkq6GRge99sbONvMJkpqC+xrZt9K2gx4CEi0nJCZnSvpVDPrW9t2SScAJwDQol2SJvOmqmoVZ14xhqduPoXyMnHfE6/x/ozP+cOvD2TS1E945sV32WPAZlx62lDMYPyk6fzmT2MA2LL3utx4wZGsslWUqYyr7vnXanenXe0qKiq49vpRHHTg/lRVVfGLEcfRZ+utuXTkRfTrP4AhBw1lxHHHc9yIY9h6y03p1Kkz9z/wMADjX36JP15yES0qWlBWVsaNN91K586dC/wTNb5iLTKlxrrTJWmZmbWLD1uvBJYD7cxspKQvgR4xobUA5ppZ1xrHTwYONbMZ8fNCYHPCqhbn8/0SPm2Ah2K74/g+IXYERgF9gSpgczNrK2lQ3GdIbHcU4WHwe2scv8zMsma7srbdrdUWP6vvr8nVw6IJowodQrPTpoXezMH6hAC0Wncz6zX8hoz7zLjmx1nPJ2kwcD1hcYc7zezyGtuvBVKXz9oC3c1s7Uxt5mMe4nXAJOCeHLUnQu/yvCz7nQl8AWxPuDTwbfy+ktUvFbTOUVzOuQTCkyoN6yGm1VTZl7Ba9gRJT8YFHQAwszPT9j8N2CFbu41+O8fMFgJjiEPe6P8RenoAw4Ha5hq8BBwFIOkAIHWn4AXgMEnd47bOkjas5fiOhJ7nKuAYwl8RgI+BPpJaSVqbuF5aLVK9V+dcjkmZXwkkqamS7kjCZbOM8nV/+2rCSrUppwHHSnqHkKzOqOWYS4A9JE0BDiUs9pha0udCQq2Ed4B/AT1qOf5m4BeS3ga2BL6Ox39KSNDvxX+/tYaYbydc/yypmyrOlYIc3FSpraZKz9p2jB2mjYF/Z2u00YbM6dffzOwLwhg+9fljYO8sxy8A9lvDttHA6Fq+H5T2/iMgfWbxOWnbfg/8Psvx56Qf45zLDQnKy7MmvWxFpuriCOBRM6vKtqM/y+ycy7sEncBsRaaS1FRJOQI4JUlc/uiecy7vcjBkTlJTBUlbEu4/vJqkUe8hOufySqLBd5kT1lSBkCgfzlZLJcUTonMuz3LzNEq2mirx88i6tOkJ0TmXdw3tITYWT4jOufxKPtcw7zwhOufySniRKeecq+ZDZueci4q0g+gJ0TmXZ16o3jnnglysdtNYPCE65/KuSDuInhCdc3mWgydVGosnROdcXvm0G+ecS+MJ0TnnomIdMvvyX865/MpSPiBp51HSYEkfSJou6dw17PMzSVNjOeMHs7XpPUTnXF7lq8hULD98HqFk8aJUHaZM1pgQJd0IrHENMTM7vQ7xO+dctbKGX0OsLjIFIClVZGpq2j6/Am4ys0UAZjbvB63UkKmHODHDNuecq7cE+TBbTZXaikztVKONzcO59AphEdmRZvZsppOuMSGa2X3pnyW1NbNvMjXmnHPZSFCefcicraZKEhXAZsAgQs2VlyRta2aL13RA1psqknaRNBWYFj9vL+nmBgbqnGvGclBTJUmRqc+AJ81spZnNBD4kJMg1SnKX+Tpgf2ABgJm9DeyRJGLnnKtNDu4yJyky9XdC7xBJXQlD6BmZGk10l9nMPq2RtbPWN3XOudoIKG/gTZWERaaeA/aLI9wq4Hex3vsaJUmIn0oaCJikFsAZwPsN+WGcc81Y8mFxRtmKTMVKe2fFVyJJEuJJwPWEuzpzCFk3UdFn55yrSSS6qVIQWROimX0JDM9DLM65ZqJIH2VOdJe5t6SnJM2XNE/SE5J65yM451zTlIO7zI0iyV3mB4ExQA9gPeAR4KHGDMo513Sl5iFmehVKkoTY1szuN7PK+Por0LqxA3PONV3K8iqUTM8yd45v/xlXkniY8GzzMGrc2XHOubooxfUQ3yQkwFTkJ6ZtM8IqEs45VydSYYfFmWR6lnnjfAbinGs+irSDmOxJFUnbAH1Iu3ZoZn9prKCcc01XSc9DlHQx4XnAPoRrhwcA4wFPiM65einWa4hJ7jIfBvwI+NzMjgW2Bzo2alTOuSat5O4yp1luZqskVUrqAMxj9WV3nHMusYTrIRZEkh7iRElrA3cQ7jxPAl5tzKCcc01bLp5UyVZkStKI+ITd5Pj6ZbY2kzzLfHJ8e6ukZ4EOZvZOooidc64WDb2EmKTIVDTazE5N2m6midn9Mm0zs0lJT+Kccyk5moeYpMhUnWXqIV6dYZsBezfkxE1F286d2e7InxU6jGbliXdrrhTvSk2CYXEuikwB/FTSHoTyAWea2ae17FMt08TsvbJF7JxzdZVwxexcFJl6CnjIzFZIOhG4jywduSQ3VZxzLqfKlPmVQNYiU2a2wMxWxI93Av2zxpUsfOecy50cJMSsRaYk9Uj7OJQEpU8SPbrnnHO5kot5iAmLTJ0uaShQCSwERmRrN8mjeyKUEOhtZpdK2gBY18zeqP+P45xrznLx5F6CIlPnUcdVuZIMmW8GdgGOjJ+XEub/OOdcnQmokDK+CiXJkHknM+sn6S0AM1sUx+zOOVcvRbq2Q6KEuDLOCjcASd2AVY0alXOuyZJEWZFmxCQJ8QbgcaC7pMsIq99c2KhROeeatPIind+S5FnmByS9SVgCTMAhZpb19rVzztVGULo9xHhX+RvCrO/q78zsk8YMzDnXdBVpPkw0ZH6G74tNtQY2Bj4Atm7EuJxzTZUSPbpXEEmGzNumf46r4Jy8ht2dcy6jMGQudBS1q/OTKmY2SVJtq0o451wixbpidpJriGelfSwD+gFzGi0i51yTVuo9xPZp7ysJ1xQfa5xwnHNNnkr0pkqckN3ezM7OUzzOuSZOQEWRdhHXOD1SUoWZVQG75jEe51wzIGV+JWsjc5GptP1+KskkZV1wNlMP8Q3C9cLJkp4EHgG+Tm00s78lC9s559KJsgZWX05aZEpSe+AM4PUk7Sa5htgaWEBYejs1H9EAT4jOuToL6yE2uJmkRab+CFwB/C5Jo5kSYvd4h/k9vk+EKZYwaOec+4EEj+41uMhUnDO9vpk9I6nBCbEcaAe19m09ITrn6kUkmofYoCJTksqAa0iwSna6TAlxrpldWt+AnHNuTXIw7SZbkan2wDbAuFjydF3gSUlDzSy957maTAmxOO+LO+dKmshJdbvqIlOERHgEcFRqo5ktAbpWn1MaB5ydKRmSJa4fNSRa55yrlcI1xEyvbMysEkgVmXofGJMqMhULS9VLpkL1C+vbqHPOrUmu1kPMVmSqxveDkrTpZUidc3lXpA+qeEJ0zuWbUJE+zOwJ0TmXVzm6qdIoPCE65/KuZGuqOOdcTgkfMjvnHPiQ2TnnVuNDZueci4o0H3pCdM7llyjhMqTOOZdbQkW6VIInROdc3hVpB9ETonMuvyQfMrs82mnjTvzmR5tQLvHUO59z/+uf/mCfvbfoyvG7bogB0+d9zcinp1Vva9uynAePH8BLH33JNc//N4+Rl67Jr/yH+666mFVVVez9kyM5+NhTV9v+r0fvZ+yYeykrK6d127X41YVX0Kv35gB8/OFU7rzsXJZ/vQyVicvuf4aWrVoX4sfIm1zkQ0mDgesJi1nfaWaX19h+EnAKUAUsA06oWXOlJk+ITUyZ4Ox9NuWMMe8yb+kK7vr5Drw8fQGzFnxTvU+vTq35+c4bcNIDb7N0RSWd2rZYrY0TdtuIyZ8uyXfoJWtVVRV3X3EhF9z8IF3W6cH5Rx9I/z33q054ALsOPoR9DzsGgIkvjuX+qy/hvJseoKqykpsuPJ1T/vcGNty8D0sXL6KiosWaTtVkNPQaYsIiUw+a2a1x/6GEFbQHZ2q3WOdHunrq06M9ny1ezpwl31K5ynj+/fnsvmmX1fYZul0PHntrDktXVAKw6JuV1du2WKcdnddqwRuzFuU17lI2/b3JrNtrI9bptSEVLVoycP+DmThu7Gr7tG3Xvvr9iuXfVHeR3nntRTbYbCs23LwPAO3X7kRZeXn+gi+A1F3mTK8EqotMmdl3QKrIVDUz+yrt41okKH3iPcQmplu7VnyxdEX15/lLV9Bnvfar7bNB5zYA3HrU9pSVibte+ZjXZy5CwGl79eaSp6ex40ad8hl2SVs4fy5d1u1R/blz93WZ/t5bP9jvudH38swDd1C58jv+cNtoAOZ+PBMk/u/k4Xy1eAED9xvK0BEn5y32QkmQ8xpcZCqcR6cAZwEtCZVDMyrKHqKkKkmTJU2R9Lak38aiMdmOuzIec2U9z7ss/nsjSUdl279UlZeJ9Tu14ZSH3+Hip6Zx7v6b065VOYfusB6vzljI/GXfFTrEJmn/YSO44clXOOr083n8zhsAqKqq5IPJEzj1shu55K7HmfCfZ3n39fEFjrRxJewhfmlmA9Jet2dptlZmdpOZbQKcA1yYbf9i7SEuN7O+AJK6Aw8CHYCLsxx3AtDZzKoaeP6NCPUZHmxgO3k3f9kK1mnfqvpzt/atmL909QQ3b+kKps5ZStUqY+6Sb/l00Tes36kN2/TswPa9OnDoDuvRpkU5LcrF8u+quOWlWXn+KUpL5249WPD53OrPC+d9TufuPda4/8D9D+auP50PQJd1erBVv53o0KkzAH1325tZ095l2512a9ygCyon8xCzFZmq6WHglmyNFmUPMZ2ZzSMkulMVlMee4ARJ70g6EUDSk4SyqW9KGibpIEmvS3pL0vOS1on7jZR0dqp9Se9J2qjGaS8Hdo+91DPz8oPmyPtzl9KrUxt6dGxNRZnYZ6tujJ++YLV9XvpoATtssDYAHdtUsH6ntsxe/C2XPD2NQ299g5/e9gajxs3gn1O+8GSYwCZbb8/nn85k3uxPqFz5Hf/vuSfov+e+q+0z95MZ1e/fevkFeqy/MQDb7bInn0yfxorly6mqrOT9N1+jZ9rNmCZJYcic6ZVAdZEpSS0JRaaeXO000mZpHw8EPsrWaLH2EFdjZjPiXaXuhAunS8xsR0mtgFckjTWzoZKWpfUsOwE7m5lJ+iXwe+C3CU95LqFC15Dc/zSNq8rgmuenc+3h21Au8fS7nzNzwTf8crcNmfb5UsZPX8jrMxex00adeOC4/qwyuGncDL76trLQoZes8ooKjj3nj/zfKcNZtWoVew0dxvqbbMGYW66kd5/tGbDnfjw3+l7ee3085RUVrNWhI7++9FoA2nVYmwOH/4oLjjkQJHbYdS/67d6067vl4tE9M6uUlCoyVQ7cnSoyBUw0sycJnah9gJXAIuAXWWMzK76a8zGxtavx3WJgC8Kt9u2A1DySjsCJZjY2/ThJ2wJXAz0IF1RnmtlgSSOBZWZ2VdzvPWCImc1KHS9pEBkSoqQTCL1WWq69Tv/+543J3Q/vsjpj300KHUKzc0S/Xm82pHB8uq223cHuefw/GffZZbNOOTtfXRT9kBlAUm/C5Mp5hD8wp5lZ3/ja2MzG1nLYjcAoM9sWOBFIzXStZPWfu84zYM3s9tTF3hZrrV3Xw51zyvIqkKJPiJK6AbcSkpsRusi/ltQibt9c0lq1HNqR7y+ypneVZwH94rH9gI1rOXYp0L6W751zOdDQusyNFlfBzpxZm9S0G+B5YCxwSdx2JzAVmBSHu7dR+7XQkcAjkt4Evkz7/jGgc2z7VODDWo59B6iKU35K6qaKc6WgSDuIxXlTxczWOFXfzFYB58dXzW3t0t4/ATxRyz7Lgf3W0Ha7+O+VJJjE6ZyrO+E1VZxzLkg+tSbvPCE65/KuSPOhJ0TnXL7Jh8zOOZdSpPnQE6JzLr/CTZVCR1E7T4jOubzzIlPOORcVaw+xWCdmO+eaqtysdoOkwZI+kDRd0rm1bD9L0tS4KtYLkjbM1qYnROdc3inLP1mP/76mygFAH+BISX1q7PYWMMDMtgMeBf6crV1PiM65vErdVGlgDzFJTZX/mFlqVazXCIvIZuQJ0TmXdzlIiLXVVOmZYf/jgX9ma9Rvqjjn8i7BsDhbkank55KOBgYAe2bb1xOicy7vyrL3Ar/MskBsopoqccXsC4A9zWxFze0/iCtrWM45l2sNX/8rSU2VHQjLAw6NtZmy8h6icy6vQs7LS02VKwmF5x6Jz05/YmZDM7XrCdE5l19KNGTOysz+AfyjxncXpb3fp65tekJ0zuVfkT6p4gnROZdnOSlU3yg8ITrn8krkZsjcGDwhOufyzxOic84FhSw1moknROdc3hVnOvSE6JzLN6+655xzgddlds65NMWZDj0hOucKoEg7iJ4QnXP550Nm55yLijMd+vJfzrk8k8I8xEyvZO1kLTK1h6RJkiolHZakTU+Izrn8a+B6iAmLTH0CjAAeTBqWD5mdc3mXgyFzdZEpAEmpIlNTUzuY2ay4bVXSRj0hOufyLNGwOFtNldqKTO3U0Mg8ITrn8ipVhjSLbDVVGoUnROdc3uVg1k2iIlN15TdVnHN5pyz/JJC1yFR9eEJ0zuVXliL1SXqPZlYJpIpMvQ+MSRWZkjQUQNKOkj4DDgdukzQlW7s+ZHbO5VXCa4hZJSgyNYEwlE7ME6JzLu+8popzzkVF+iizJ0TnXP55QnTOuahYh8wys0LHUNIkzQc+LnQc9dQV+LLQQTQjpfz73tDMuuWiIUnPEn4XmXxpZoNzcb668ITYjEmaWIinAZor/30XP5+H6JxzkSdE55yLPCE2b7dn38XlkP++i5xfQ3TOuch7iM45F3lCdM65yBOic85FnhBdIpJ6SNoivh8iqUuhY2pKJLVLe5+TCdCu7vzRPZfUWsDTkl4CNiAs0OkaSKFieytgqKRKoCWwoaSrzGxFYaNrfjwhuowkDQC6mdk/Y2Wz3wOnmNkXklqa2XcFDrHU9TSzzyS9BzxGSI7bmdkKSeVmVlXg+JoVHzK7bPoCIyXtA7wIHAtcLenQVDKU1LqA8ZUkBe2BKZLOAN4D5gPTgEMBPBnmn/cQXa0kyYI7JX1H6Blea2YPSloKPCBpAdAB2EvS7/x/4Loxs6XxD80zwBdmNlDSjsAFktqb2fWxh77YzKYXNtrmwROiq5XFGfuSNjGzv8TrW+fERPmUpKOBUcAS4NeeDOsm9fs1swmSfgz8S1InM7tF0ijgLEl7A52BYYWMtTnxhOjWSFIf4EJJT8eeIcDvYufxSUmvASvNbFFhIy1Nko4AKs3s0dhTfF7SKjO7TdInwAjgL2Y2p6CBNiOeEN0amdlUSeOBPSVVxaRowGWSVprZ84WOscS1Bs6TtCL2uvcB/hmHy1cB5xc4vmbHE6L7gTiE29DMbjGzmyX9CjhAUqWZPSRpJfBRgcMsWZI2BWaa2b2SVhD+wJSb2d8lHQQ8LOluwrXDVYWNtnnxhOiqb6DE9xVAW+AQSd+a2T1mdoekbYCL45Du0YIGXMIkbUeoJzxO0sPxD0w5cK+kE81stKQtfQ5iYfi0m2ZOUllaMmwHVMSEdzMhKf4y7joemAK8UphIS1OceJ1uBvAusBNweOwZ/pXw+z1JUlvA53YWiPcQm7nUkEzSWcAuQIf4lMQTklYBf4pD6C2An5jZvAKGW3LS/tgMA5YDC83sRkknAbsC3SUtI9RaOcXMvilctM57iM2UpL3Snk0+BTgIOI7wP+3TceL1U8BQ4CngIDP7sGABlxhJ3dPenwmcBvQELpd0nJndCowDtgZOBK4ys1ItVtZkeA+xGYqTfR8k3D1uS+idHAH8ipAQjwIeknSMmY0hDPNcQpKGAMNjL3BjYE9gD2Ak8C0wTFIrM7sF+JukDmb2VcECdtV8xexmJl7TGkwYrr1L6KH8mTAB+B7gaDObK+kFYHNgK+Br8/9QEpG0NuGPzcnASmBDYBbh9306sDdwJnAKcL2ZXZN+U8sVlvcQmxkzszi38FrCUG0vM1sW73TOAnaU1AN4G/i5mS0rXLSlx8wWS/oceBaYBwwys1WSOgL3mtlKSd8AdwOPxmM8GRYJT4jNRKoXIqkM+IZw/WoLYB9Jc4GFhLmF+xN6MT8zs9mFircUpfX0/k5YoGFhjXmEZ8frtkcQEuUnBQjTZeBD5magxjzDgcDHhGuFK4FHCFNpLo+9l3KgvZktLlS8pabmkFdSV8LUmQcIv8tB8fvDgDbABDObVohYXWaeEJsRSb8l3E2eQrhmeDrQEbiOcD3xCk+E9SfpRMJ11wXADfFSxL8I/5/tU9joXBI+7aaZkNQP2Df2VsoJl0tSy0r9BtgMv4RSb5JGAEcDtxGWSjsZwMz2BTpKeqJw0bmkPCE2UZIGSHow7asy4ENJFxBKABwTh8j7xKR4pJl9WZBgS1ycurQtIQnuDLwBXBMfg8TMdiT0xl2R8yFzE6ZQ/2SqmZ2ksKr1Y0APYKCZfRvnyQ0jPIGyuIChlhRJmwFdCM98TzazhZJOBw4BvjOzwXG/84G5ZnZPwYJ1deJDpCYmzjNUvLt5GXCXpEcJie9+YDfgNknvAD8HhnsyTE7SgcAfCTem2gFbSRoMvA8MB66U1JLwhM/PCHeUXYnwHmITpVCnYz9gDHABYRj3K8LjY8MIK10/73c7k4uJbyRwjpm9GL+7GPgFsC/QHziA0AtvCZxhZu8WJlpXH54Qm5jYQ2xJmPQ7ysyei9+/Csw2s8MKGV+pktSZ8IjjUDN7WlJrM/s2bruU0BvcjrDoa2vCStgLCxawqxe/qdLEWLACmE6YUpNyHHCopKsKE1lpi8ntIMLqP13iNdhWcdtFwBfAtmb2lZnN82RYmvwaYtP1LuHJiBnAW4T5cTcBtxQ0qhJmZs/EJdHekDTAzBZJamFmK4HFgC/qWuI8IZaw2hYFkFRhZpVmdrekDsAlwNeE4dzBZvbfQsTaVJjZPyWdCkxMS4o/B9YlPLvsSphfQyxRNR7H24xwk2RRnFvYKrUEvaTe8ZDvzOyzAoXb5Eg6gLBK0M3AMcAJZvZeYaNyDeUJscRJOplwfXAasD5wYHxkLDWUc40krnv4N2AHM5tS6Hhcw3lCLDEKJSqXxve7AzcSJgTPAa4E9gJ2MrPlBQuyGZHU1pf9bzr8LnMJkbQJ8AdJO8avFgOvmtksQsH4Mwg3Uw4pSIDNkCfDpsUTYmnpCKwCfiKpL2FVlf0kDUm7ufIF0KJA8TlX0nzIXAIkrZ16vE7S1oTHwdoAVwGbAo8DVxNWsfkpcIQXhHKu7ryHWOQk7UOY93Z9HCovJMwnXAacQZiAvS+h59ie8GyyJ0Pn6sF7iEUuDo1fI6zAfD4hCV4BbAnMB7oD15nZp4WK0bmmwidmFzkzmxwXd30R+IqwYMNehIUEOgJ9gTJJ5xBurPhfOOfqyXuIJSIOl58nrKByb6x9sj0hQT5hZu8XNEDnmgBPiCUkJsWxwAVmdnOh43GuqfEhcwkxswnxJssESd+a2d2Fjsm5psR7iCVI0g7AN2b2QaFjca4p8YTonHORz0N0zrnIE6JzzkWeEJ1zLvKE6JxzkSdE55yLPCG6rCRVSZos6T1Jj0hq24C27pV0WHx/p6Q+GfYdJGlgPc4xS1LXpN/X2GdZHc81UtLZdY3RFSdPiC6J5WbW18y2ISwycVL6Rkn1muBvZr80s6kZdhkE1DkhOldfnhBdXb0MbBp7by9LehKYKqlc0pWSJkh6R9KJEIphSRol6QNJzxNW5yFuGydpQHw/WNIkSW9LekHSRoTEe2bsne4uqZukx+I5JkjaNR7bRdJYSVMk3Qko2w8h6e+S3ozHnFBj27Xx+xckdYvfbSLp2XjMy5K2zMlv0xUVf3TPJRZ7ggcAz8av+gHbmNnMmFSWmNmOsYD7K5LGAjsAWwB9gHWAqcDdNdrtBtwB7BHb6mxmCyXdCiwzs6vifg8C15rZeEkbAM8BWwEXA+PN7FJJBwLHJ/hxjovnaEN4FPIxM1sArAVMNLMzJV0U2z4VuB04ycw+krQTodre3vX4Nboi5gnRJdFG0uT4/mXgLsJQ9g0zmxm/3w/YLnV9kLA02WbAHsBDZlYFzJH071ra3xl4KdWWmS1cQxz7AH2k6g5gB0nt4jkOjcc+I2lRgp/pdEk/ie/Xj7EuICy0Ozp+/1fgb/EcA4FH0s7dKsE5XInxhOiSWG5mfdO/iInh6/SvgNPM7Lka+/04h3GUATub2be1xJKYpEGE5LqLmX0jaRzQeg27Wzzv4pq/A9f0+DVElyvPAb+W1AJA0uaS1gJeAobFa4w9CIvb1vQasIekjeOxneP3SwllEVLGAqelPsTVxInnOCp+dwDQKUusHYFFMRluSeihppQBqV7uUYSh+FfATEmHx3NI0vZZzuFKkCdElyt3Eq4PTpL0HnAbYQTyOPBR3PYX4NWaB5rZfOAEwvD0bb4fsj5FqDA4WaEG9enAgHjTZirf3+2+hJBQpxCGzp9kifVZoELS+8DlhISc8jXwP/Fn2Bu4NH4/HDg+xjcFODjB78SVGF/txjnnIu8hOudc5AnROeciT4jOORd5QnTOucgTonPORZ4QnXMu8oTonHPR/wdt0g5LV2H8dwAAAABJRU5ErkJggg==\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "%matplotlib inline\n", "from utils import plot_confusion_matrix\n", @@ -438,27 +320,9 @@ }, { "cell_type": "code", - "execution_count": 50, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,\n", - " colsample_bynode=1, colsample_bytree=1, gamma=0, gpu_id=-1,\n", - " importance_type='gain', interaction_constraints='',\n", - " learning_rate=0.300000012, max_delta_step=0, max_depth=6,\n", - " min_child_weight=1, missing=nan, monotone_constraints='()',\n", - " n_estimators=100, n_jobs=0, num_parallel_tree=1, random_state=1,\n", - " reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,\n", - " tree_method='exact', validate_parameters=1, verbosity=None)" - ] - }, - "execution_count": 50, - "metadata": {}, - "output_type": "execute_result" - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "from xgboost import XGBClassifier\n", "xgb = XGBClassifier(random_state=1)\n", @@ -467,7 +331,7 @@ }, { "cell_type": "code", - "execution_count": 51, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -476,27 +340,11 @@ }, { "cell_type": "code", - "execution_count": 52, + "execution_count": null, "metadata": { "scrolled": true }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " precision recall f1-score support\n", - "\n", - " No default 0.91 0.99 0.95 3927\n", - " Default 0.95 0.64 0.76 1073\n", - "\n", - " accuracy 0.92 5000\n", - " macro avg 0.93 0.82 0.86 5000\n", - "weighted avg 0.92 0.92 0.91 5000\n", - "\n" - ] - } - ], + "outputs": [], "source": [ "print(classification_report(y_test1, y_preds1,\n", " target_names=['No default','Default']))" @@ -504,27 +352,9 @@ }, { "cell_type": "code", - "execution_count": 53, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "accuracy_score:\n", - " 0.9154\n", - "precision_score:\n", - " 0.9476584022038568\n", - "recall_score:\n", - " 0.641192917054986\n", - "f1_score:\n", - " 0.7648693718732629\n", - "confusion_matrix:\n", - " [[3889 38]\n", - " [ 385 688]]\n" - ] - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "for score in [accuracy_score, precision_score, recall_score, f1_score, confusion_matrix]:\n", " print(score.__name__ + ':\\n', score(y_test1, y_preds1))" @@ -532,31 +362,9 @@ }, { "cell_type": "code", - "execution_count": 54, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Normalized confusion matrix\n", - "[[0.9903234 0.0096766 ]\n", - " [0.35880708 0.64119292]]\n" - ] - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAUQAAAEYCAYAAAAkpo9KAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/Il7ecAAAACXBIWXMAAAsTAAALEwEAmpwYAAAnnElEQVR4nO3dd5hV1fn28e89Q5WOiCKIFVRsSLBhN2o0IhpTMKKJJdEktmhMYjRRQpqJmmgssfdumjVC9BdN8LWAxgJYgogFC11BqcPz/rHXwGEcZs4MM6fM3J9c5/Kcvdde+zkn+sxae621tyICMzODimIHYGZWKpwQzcwSJ0Qzs8QJ0cwscUI0M0ucEM3MEidEW2uSOkp6QNJHku5di3pGSRrXlLEVi6Q9Jb1W7DisYeR5iK2HpKOAM4GtgAXAC8CvImL8WtZ7DHAqMCwilq9tnKVOUgADImJqsWOxpuUWYish6UzgEuDXwPpAf+BK4LAmqH5j4PXWkAzzIalNsWOwRooIv1r4C+gGLAS+WkeZ9mQJ8730ugRon/btA7wL/ACYCbwPHJf2/RxYCixL5zgBGA3cllP3JkAAbdLnY4FpZK3UN4FROdvH5xw3DJgAfJT+OSxn3+PAL4AnUz3jgF5r+G7V8f8oJ/7DgS8CrwNzgXNyyu8MPAXMT2UvB9qlff9O3+WT9H1H5tT/Y+AD4NbqbemYzdM5hqTPGwKzgH2K/e+GX6u/3EJsHXYDOgB/q6PMucCuwGBgB7Kk8NOc/RuQJda+ZEnvCkk9IuJ8slbn3RHROSKurysQSZ2APwIHR0QXsqT3Qi3legIPpbLrAr8HHpK0bk6xo4DjgN5AO+CsOk69Adlv0Bc4D7gWOBr4HLAn8DNJm6ayVcAZQC+y3+7zwPcAImKvVGaH9H3vzqm/J1lr+cTcE0fEG2TJ8jZJ6wA3AjdHxON1xGtF4ITYOqwLzI66u7SjgDERMTMiZpG1/I7J2b8s7V8WEQ+TtY62bGQ8K4BtJXWMiPcjYnItZQ4B/hcRt0bE8oi4E3gVODSnzI0R8XpELALuIUvma7KM7HrpMuAusmR3aUQsSOefQvaHgIh4LiKeTuedDlwN7J3Hdzo/IpakeFYTEdcCU4FngD5kf4CsxDghtg5zgF71XNvaEHgr5/NbadvKOmok1E+Bzg0NJCI+Ietmfgd4X9JDkrbKI57qmPrmfP6gAfHMiYiq9L46YX2Ys39R9fGSBkp6UNIHkj4mawH3qqNugFkRsbieMtcC2wKXRcSSespaETghtg5PAUvIrputyXtk3b1q/dO2xvgEWCfn8wa5OyNibEQcQNZSepUsUdQXT3VMMxoZU0P8iSyuARHRFTgHUD3H1DldQ1Jnsuuy1wOj0yUBKzFOiK1ARHxEdt3sCkmHS1pHUltJB0v6XSp2J/BTSetJ6pXK39bIU74A7CWpv6RuwE+qd0haX9Jh6VriErKu94pa6ngYGCjpKEltJI0EBgEPNjKmhugCfAwsTK3X79bY/yGwWQPrvBSYGBHfIrs2etVaR2lNzgmxlYiIi8nmIP6UbITzHeAU4O+pyC+BicBLwMvA82lbY871T+DuVNdzrJ7EKlIc75GNvO7NZxMOETEHGE42sj2HbIR4eETMbkxMDXQW2YDNArLW69019o8GbpY0X9LX6qtM0mHAQaz6nmcCQySNarKIrUl4YraZWeIWoplZ4oRoZpY4IZqZJU6IZmaJF6GvJbXpGGrXpdhhtCo7bt2/2CG0Os8//9zsiFivKeqq7LpxxPLPLOZZTSyaNTYiDmqK8zWEE+JaUrsutN+y3pkX1oSefObyYofQ6nRsq5qrhhotli+q97+ZxS9cUd/KoGbhhGhmhSVBRWWxo6iVE6KZFZ5Kc/jCCdHMCk/1LQ0vDidEMyswd5nNzDLCXWYzs4xbiGZmq/gaopkZgNxlNjMDsmuI7jKbmYFbiGZm1QRUuoVoZpbxoIqZGbjLbGaWy4MqZmZk3WV3mc3MEneZzczAS/fMzHK5y2xmRrpjdmmmntKMysxaNrcQzcwSD6qYmeGHTJmZrcZdZjOzjJwQzczSQpUKJ0QzM0BuIZqZVauo8CizmRnga4hmZhmlVwlyQjSzghJyl9nMrFqpdplLM02bWYsmqc5XnnUcJOk1SVMlnV3L/v6S/iXpv5JekvTF+up0QjSzwkrzEOt61VuFVAlcARwMDAK+LmlQjWI/Be6JiB2BI4Er66vXCdHMCkrU3TrMs4W4MzA1IqZFxFLgLuCwGmUC6JredwPeq69SX0M0s4LLoxXYS9LEnM/XRMQ1OZ/7Au/kfH4X2KVGHaOBcZJOBToB+9d3UidEMyss5TWoMjsihq7lmb4O3BQRF0vaDbhV0rYRsWJNBzghmlnBNcEo8wxgo5zP/dK2XCcABwFExFOSOgC9gJlrqtTXEM2soKrnIdb1ysMEYICkTSW1Ixs0ub9GmbeBzwNI2hroAMyqq1InxBbogGFb8+Lffsak+87nrOMO+Mz+/n168PBVp/Ls3T9h7LWn07d395X7fnnaYUy89xwm3nsOXzlwSAGjLm/jxj7C9ttsyTZbbcGFv7vgM/uXLFnC0UeNZJuttmDPYbvw1vTpAMyZM4cv7L8vvbp35vunnVLgqItI9bzqERHLgVOAscArZKPJkyWNkTQiFfsB8G1JLwJ3AsdGRNRVr7vMLUxFhbjk7K9xyHcvZ8aH8xl/+w958ImXeXXaByvL/OaML3H7Q89y+wPPsPdOAxlz6ghO+NktHLTHNgzeeiN2OfIC2rdtw7jrTmfsk1NY8MniIn6j0ldVVcX3TzuZh/7xT/r268ceu+7E8OEj2HrQqlkgN91wPT2692Dyq1O55+67OPecH3PbHXfToUMHzhv9C6ZMnsTkyZOK+C0KSE1zc4eIeBh4uMa283LeTwF2b0idbiG2MDttuwlvvDOb6TPmsGx5FfeOfZ7h+2y/WpmtNuvDE8++BsATE15n+D7bAbD1Zhsw/vmpVFWt4NPFS3n5fzM4cNjWBf8O5WbCs8+y+eZbsOlmm9GuXTu+OvJIHnzgvtXKPPjAfYw65psAHPHlr/D4/z1GRNCpUyd232MPOnToUIzQi6YpJmY3ByfEFmbD3t1498N5Kz/P+HAefdfrtlqZl1+fwWH7DQbgsP12oGvnjvTs1omXXs8SYMcObVm3eyf2HjqQfhv0KGT4Zem992bQr9+q6/t9+/ZjxowZny2zUVamTZs2dO3WjTlz5hQ0zpKyll3m5tJsCVFSSLo45/NZkkavRX3TJfWqp8xXJb0i6V+NPMfjkoam9+c0po5y8JM//I09P7cFT935Y/b83BbM+HAeVVUreOzpV3lk/BT+ddMPuPk3x/HMS29SVbXGGQpmjSI1yaBKs2jOa4hLgCMk/SYiZjfjeXKdAHw7IsY3QV3nAL9ugnoK6r2ZH9Fv/VWtur7r92DGrI9WK/P+rI848qzrAOjUsR2Hf34wHy1cBMDvrh/L764fC8BNvz6W/729xhkKlmy4YV/efXfVHOEZM96lb9++ny3zzjv069eP5cuX8/FHH7HuuusWOtSS0Rpv7rAcuAY4o+YOSZtI+r+04PoxSf1rKbOupHGSJku6jpyGtKSjJT0r6QVJV0uqlHQesAdwvaQL0zn+I+n59BqWjt1H0oM5dV0u6dga574A6Jjqv72Jfo+CmDj5Lbbovx4bb7gubdtU8tUvDOGhx19arcy63Tut/Bfyh8d/gZvvexrIBmR6dusEwLYDNmTbARvy6FOvFvYLlKGhO+3E1Kn/Y/qbb7J06VLuvfsuDhk+YrUyhwwfwe233gzAX//yZ/bed7+STQqFUKrXEJt7lPkK4CVJv6ux/TLg5oi4WdLxwB+Bw2uUOR8YHxFjJB1C1vqrnk80Etg9IpZJuhIYlcrtB5wVERMlrQMcEBGLJQ0gG3bPa+Z7RJwt6ZSIGFzbfkknAicC0LZzPlUWTFXVCs747T08cOXJVFaIm+97mlemfcDPvnsIz095m4eeeJm9hg5gzKkjiIDxz0/l+7+5B4C2bSp59IbvA7Bg4WKOP/dmd5nz0KZNG/5w6eUcesgXqKqq4pvHHs+gbbZhzOjzGPK5oQw/dATHHn8Cxx97DNtstQU9evTk1tvvWnn8lltswoKPP2bp0qU8cP/fefDhcauNULdEpfqQKdUzLafxFUsLI6KzpDHAMmAR0DkiRkuaDfRJCa0t8H5E9Kpx/AvAERExLX2eCwwkm4B5Dqtmm3cE7kz1Ps6qhNgNuBwYDFQBAyNiHUn7pDLDU72XAxMj4qYaxy+MiHqzXcU6vaP9ll9r7M9kjTBvwuXFDqHV6dhWzzXBUjoA2m8wIPqN+mOdZab9/otNdr6GKMQ8xEuA54Ebm6g+kbUuf1JPuTOAD4EdyC4NVE+mW87qlwpa13wHsyLLVqqUZgux2YdzImIucA+py5v8P7KWHsAo4D+1HPpv4CgASQcD1SMFjwFfkdQ77espaeNaju9G1vJcARwDVKbtbwGDJLWX1J20tKcW1a1XM2tiUt2vYinU+PbFZIuqq50KHCfpJbJkdXotx/wc2EvSZOAIsnWJ1bPPf0p2W5+XgH8CfWo5/krgm2nZzlbAJ+n4d8gS9KT0z/+uIeZryK5/ltWgilk5KNVBlWa7htha+Bpi4fkaYuE15TXEjn0GxqbH1f3/4Su/+UKLvYZoZraaUp1x5IRoZgVXqnMwnRDNrKAkSnaU2QnRzAqsuAMndXFCNLOCcwvRzAzSQ6aKHUTtnBDNrKCEB1XMzFZyl9nMLCnRBqITopkVWH4Pqi8KJ0QzK6hSvtuNE6KZFVyJNhCdEM2swLxSxcws42k3ZmY5nBDNzBJ3mc3MwEv3zMyqleW0G0mXAWt8vkBEnNYsEZlZi1fRBE1ESQcBl5I9QO66iLigljJfA0aT5bIXI+Kouuqsq4U4sfGhmpmt2drmQ0mVwBXAAcC7wARJ96eH0FWXGQD8BNg9IuZVP6mzLmtMiBFxc40A1omITxv7BczMIEuGlWvfZd4ZmBoR07I6dRdwGDAlp8y3gSsiYh5ARMysr9J6H0MqaTdJU4BX0+cdJF3Z8PjNzDJ5PIa0l6SJOa8Ta1TRF3gn5/O7aVuugcBASU9Kejp1seuUz6DKJcAXgPsBIuJFSXvlcZyZWa3y6DLPboLHkLYBBgD7AP2Af0vaLiLm13VAvSLinRoTKasaH6OZtWYCKtd+UGUGsFHO535pW653gWciYhnwpqTXyRLkhDVVWm+XGXhH0jAgJLWVdBbwSoNCNzOrVk93Oc9VLBOAAZI2ldQOOJLUi83xd7LWIZJ6kXWhp9VVaT4J8TvAyWT98/eAwemzmVmDiWxQpa5XfSJiOXAKMJasgXZPREyWNEbSiFRsLDAnjYH8C/hhRMypq956u8wRMRsYVW+EZmZ5aoqVKhHxMPBwjW3n5bwP4Mz0yks+o8ybSXpA0ixJMyXdJ2mzBsRtZraaJugyN4t8usx3APcAfYANgXuBO5szKDNruarnIa5Nl7m55JMQ14mIWyNieXrdBnRo7sDMrOVSPa9iqWstc8/09h+SzgbuIlsPOJIa/XYzs4Yox/shPkeWAKsjPylnX5CtETQzaxCpuN3iutS1lnnTQgZiZq1HiTYQ81upImlbYBA51w4j4pbmCsrMWq7qeYilqN6EKOl8stneg8iuHR4MjAecEM2sUUr1GmI+o8xfAT4PfBARxwE7AN2aNSoza9HKbpQ5x6KIWCFpuaSuwExWX1RtZpa3JrofYrPIJyFOlNQduJZs5Hkh8FRzBmVmLVupdpnzWcv8vfT2KkmPAF0j4qXmDcvMWrISzYd1TsweUte+iHi+eUIys5asLOchAhfXsS+A/Zo4lrK0ySYbMOa6s4sdRqty4B/HFzsEW0tl12WOiH0LGYiZtQ5NdMfsZuEH1ZtZwZVoj9kJ0cwKzwnRzIzSnoeYzx2zJeloSeelz/0l7dz8oZlZSyXV/SqWfJbuXQnsBnw9fV4AXNFsEZlZiyagjVTnq1jy6TLvEhFDJP0XICLmpcf+mZk1SokOMueVEJdJqiSbe4ik9YAVzRqVmbVYkqgo0YyYT0L8I/A3oLekX5Hd/eanzRqVmbVolflcrCuCfNYy3y7pObJbgAk4PCJeafbIzKxFEpRvC1FSf+BT4IHcbRHxdnMGZmYtV4nmw7y6zA+x6mFTHYBNgdeAbZoxLjNrqVTGS/ciYrvcz+kuON9bQ3EzszplXeZiR1G7Bq9UiYjnJe3SHMGYWetQqitV8rmGeGbOxwpgCPBes0VkZi1aKbcQ8xn87pLzak92TfGw5gzKzFqwepbt5Xt5UdJBkl6TNFXSGm9KKunLkkLS0PrqrLOFmCZkd4mIs/IL0cysbgLarGUTMeWmK4ADgHeBCZLuj4gpNcp1AU4Hnsmn3jW2ECW1iYgqYPdGR21mVosmaCHuDEyNiGkRsRS4i9p7rr8AfgsszqfSulqIz5JdL3xB0v3AvcAn1Tsj4q95hW1mthpRUf/Tl3tJmpjz+ZqIuCbnc1/gnZzP7wKrDfamGTEbRcRDkn6YT2T5jDJ3AOaQPUOlej5iAE6IZtZg2f0Q6y02OyLqvea35nOoAvg9cGxDjqsrIfZOI8yTWJUIq0VDAzQzq9YES/dmABvlfO6XtlXrAmwLPJ4eaLUBcL+kERGR2/JcTV0JsRLoDLW2bZ0QzaxRRJPMQ5wADJC0KVkiPBI4qnpnRHwE9Fp5Tulx4Ky6kiHUnRDfj4gxaxOxmVlt1raBGBHLJZ0CjCVrvN0QEZMljQEmRsT9jam3roRYolMnzaycifwmQNcnIh4GHq6x7bw1lN0nnzrrSoifzzsyM7N8qQxv/xURcwsZiJm1DmV9P0Qzs6ZWqmuZnRDNrMCE3EI0M2u6QZXm4IRoZgXna4hmZpBu/+WEaGbmLrOZWS53mc3MkhLNh06IZlZYoowfQ2pm1rSESvRWCU6IZlZwJdpAdEI0s8KS3GW2Anrp//2LWy8azYoVVexz+Nc59NiTV9v/2J9v5dF7b6aispIOHTtx/LkX0HezgQC8/b9XuPHXZ7Pok4VI4ue3PEi79h2K8TXKys6bdOe0fTajokI89PKH3D7h3c+U2XdgL47brT8RwdTZn/CLh19fuW+ddpXc8s0hjH9jDpf837RChl4UJZoPnRBbmhVVVdz825/y4yvuoOf6fTjvG8MZstcBKxMewLCDDufzXzkGgOefGMftfxjDjy67jarly7nqZ6dx0phL2XjgIBbMn0ebNm2L9VXKRoXgjP0258y/TGLWgqVcM2ow49+Yw1tzF60s0697B0bt3I/v3fUiC5dU0b3j6r/rt4ZtzIszPip06EVTqtcQS3V+pDXSG5NfYP2NNqF3v41p07Ydux44gueeGLdamY6du6x8v2TRpytXDbz89L/ZaMDWbDxwEABduvegorKycMGXqa036MKM+Yt5/6MlLF8RPPbqLPbYfN3VygzfbgP+9sL7LFxSBcD8RctW7hvYuxM91mnLhOnzCxl20VSPMtf1Kha3EFuYeTM/oOf6G6783LN3H96Y9N/PlPvnPTfxyO3Xsnz5Mn7yp7sB+ODtaQjxu1NG8fG8uex64AiGf/O7BYu9XPXq3I6ZC5as/Dxr4RIG9emyWpmNenQE4IqR21NRATc+9TbPTp+PgJP33oxf/uM1Pte/ewGjLq5S7TKXZAtRUpWkFyRNlvSipB+kxwrWd9yF6ZgLG3nehemfm0g6qr7y5eyArx3Lxfc9ychTf8J91/8RgKqq5bz24gS++8vL+Nn1f+W5xx9h8rPjixxpy1BZIfr16Mhp977MmIde40cHDKBz+0q+NLgPT785l1kLlxY7xIJxC7HhFkXEYABJvYE7gK7A+fUcdyLQMyKq1vL8m5A9weuOtayn4Hr03oC5H7638vPcme/To/cGayy/64GHcdNvzgWy1uRWO+5Cl+49Adhh932Z/uokttl5j+YNuszNXriU3l3ar/y8Xuf2zFqweoKbtWAJUz5YQNWK4P2Pl/DOvEX0696Rbfp0Yfu+XTl8hz50bFdJ2wqxaGkVV49/q9Bfo4BKdx5iSbYQc0XETLJEd4oylaklOEHSS5JOApB0P9ljU5+TNFLSoZKekfRfSY9KWj+VGy3prOr6JU2StEmN014A7JlaqWcU5Is2kc0G7cAH70xn5oy3Wb5sKU+Pu58hex2wWpkP3n5z5fsXxj/GBv03AWD73fbmnamvsmTxIqqWL+fV55+h72YDChl+WXr1gwX0696RPl3b06ZCfH6r9Xhy2upP4PjPG3PYsV83ALp1aMNGPTry3keL+cU/Xuer101k5PUTufKJNxn7yswWngxJd7up+1UspdpCXE1ETJNUCfQGDgM+ioidJLUHnpQ0LiJGSFqY07LsAewaESHpW8CPgB/kecqzyZ7hOrzpv03zqmzThm/88BdceOrRrKiqYq8RI+m3+Zb85aqL2HTr7Rmy94H8856bmPzseCrbtKFTl26cOPoPAHTq2p2DR32b87+Rfe0ddt+PwXv4WWP1qQq45F9vcNGXt6VC8PCkD5k+51OOH9af1z5YyJPT5vLs9PnstHEPbvnmEFZEcOW/3+TjxcuLHXpRlPLSPUWU3jPnU2LrXGPbfGBL4Apge+DTtKsbcFJEjMs9TtJ2wMVAH6Ad8GZEHCRpNLAwIi5K5SYBwyNievXxkvahjoQo6USyVivrbtD3c5c8+HTTfXmr1zVPTC92CK3Of36w53MRMbQp6tp6ux3jxr/9q84yuw3o0WTna4iS7zIDSNoMqAJmkv2BOTUiBqfXphExrpbDLgMuj4jtgJOA6tnFy1n9ezd41nFEXBMRQyNiaNcePRt6uJmpnleRlHxClLQecBVZcgtgLPBdSW3T/oGSOtVyaDdgRnr/zZzt04Eh6dghwKa1HLsA6FLLdjNrAhVSna+ixVW0M9etY/W0G+BRYBzw87TvOmAK8Hzq7l5N7ddCRwP3SnoOmJ2z/S9Az1T3KcDrtRz7ElCVpvyU1aCKWTko0QZiaQ6qRMQal0dExArgnPSqua9zzvv7gPtqKbMIOHANdXdO/1wG7NfgwM2sXsLPVDEzyxR5ak1dnBDNrOBKNB+W7DVEM2uxhFT3K69apIMkvSZpqqSza9l/pqQpaQHHY5I2rq9OJ0QzK7i1XamSFmpcARwMDAK+LmlQjWL/BYZGxPbAn4Hf1VevE6KZFVQ2qLLWS/d2BqZGxLSIWArcRbaKbaWI+FdEVC/geBroV1+lTohmVnCq539AL0kTc14n1qiiL/BOzud307Y1OQH4R31xeVDFzAouj1bg7KZauifpaGAosHd9ZZ0QzaywmmbazQxgo5zP/Vi1Mm3VqaT9gXOBvSNiSc39NbnLbGYFl0eXuT4TgAGSNpXUDjgSuH+1c0g7kq1kG5FuI1gvtxDNrKCqB1XWRkQsl3QK2b0NKoEbImKypDHAxIi4H7iQ7B6p96apPG9HxIi66nVCNLOCa4qVKhHxMPBwjW3n5bzfv6F1OiGaWcGV6iMEnBDNrOAqSjMfOiGaWRE4IZqZVd/zsDQzohOimRWW3GU2M1vFCdHMDEr5QfVOiGZWUMJdZjOzVZwQzcwyxXzUaF2cEM2s4EozHTohmlmh+al7ZmYZP5fZzCxHaaZDJ0QzK4ISbSA6IZpZ4bnLbGaWlGY6dEI0swKTPA/RzGyV0syHTohmVnglmg+dEM2s0OQus5kZNM1jSJuLE6KZFZwToplZ4hvEmpmBb+5gZlbN1xDNzHK4y2xmlriFaGaWOCGamSWl2mVWRBQ7hrImaRbwVrHjaKRewOxiB9GKlPPvvXFErNcUFUl6hOy3qMvsiDioKc7XEE6IrZikiRExtNhxtBb+vUtfRbEDMDMrFU6IZmaJE2Lrdk2xA2hl/HuXOF9DNDNL3EI0M0ucEM3MEidEM7PECdHyIqmPpC3T++GS1i12TC2JpM4575tkArQ1nJfuWb46AQ9K+jfQH5hQ5HhaBGVPbG8PjJC0HGgHbCzpoohYUtzoWh8nRKuTpKHAehHxD0l3AT8CTo6IDyW1i4ilRQ6x3PWNiHclTQL+QpYct4+IJZIqI6KqyPG1Ku4yW30GA6Ml7Q88ARwHXCzpiOpkKKlDEeMrS8p0ASZLOh2YBMwCXgWOAHAyLDy3EK1WkhSZ6yQtJWsZ/iEi7pC0ALhd0hygK7CvpB/6P+CGiYgF6Q/NQ8CHETFM0k7AuZK6RMSlqYU+PyKmFjfa1sEJ0WoVaca+pM0j4pZ0fevHKVE+IOlo4HLgI+C7ToYNU/37RsQESV8E/impR0T8SdLlwJmS9gN6AiOLGWtr4oRoayRpEPBTSQ+mliHAD1Pj8X5JTwPLImJecSMtT5KOBJZHxJ9TS/FRSSsi4mpJbwPHArdExHtFDbQVcUK0NYqIKZLGA3tLqkpJMYBfSVoWEY8WO8Yy1wH4iaQlqdW9P/CP1F2+CDinyPG1Ok6I9hmpC7dxRPwpIq6U9G3gYEnLI+JOScuA/xU5zLIlaQvgzYi4SdISsj8wlRHxd0mHAndJuoHs2uGK4kbbujgh2soBlPS+DbAOcLikxRFxY0RcK2lb4PzUpftzUQMuY5K2B04BHpd0V/oDUwncJOmkiLhb0laeg1gcnnbTykmqyEmGnYE2KeFdSZYUv5WKjgcmA08WJ9LylCZe55oGvAzsAnw1tQxvI/t9vyNpHcBzO4vELcRWrrpLJulMYDega1olcZ+kFcBvUhd6S+BLETGziOGWnZw/NiOBRcDciLhM0neA3YHekhaSPWvl5Ij4tHjRmluIrZSkfXPWJp8MHAocT/Yf7YNp4vUDwAjgAeDQiHi9aAGXGUm9c96fAZwK9AUukHR8RFwFPA5sA5wEXBQR5fqwshbDLcRWKE32vYNs9HgdstbJkcC3yRLiUcCdko6JiHvIunmWJ0nDgVGpFbgpsDewFzAaWAyMlNQ+Iv4E/FVS14j4uGgB20q+Y3Yrk65pHUTWXXuZrIXyO7IJwDcCR0fE+5IeAwYCWwOfhP9FyYuk7mR/bL4HLAM2BqaT/d6nAfsBZwAnA5dGxO9zB7WsuNxCbGUiItLcwj+QddX2jYiFaaRzOrCTpD7Ai8A3ImJh8aItPxExX9IHwCPATGCfiFghqRtwU0Qsk/QpcAPw53SMk2GJcEJsJapbIZIqgE/Jrl9tCewv6X1gLtncwi+QtWK+FhEzihVvOcpp6f2d7AYNc2vMIzwrXbc9kixRvl2EMK0O7jK3AjXmGQ4D3iK7VrgMuJdsKs0FqfVSCXSJiPnFirfc1OzySupFNnXmdrLfcp+0/StAR2BCRLxajFitbk6IrYikH5CNJk8mu2Z4GtANuITseuJvnQgbT9JJZNdd5wB/TJci/kn239n+xY3O8uFpN62EpCHAAam1Ukl2uaT6tlLfBwbgSyiNJulY4GjgarJbpX0PICIOALpJuq940Vm+nBBbKElDJd2Rs6kCeF3SuWSPADgmdZH3T0nx6xExuyjBlrk0dWk7siS4K/As8Pu0DJKI2ImsNW4lzl3mFkzZ80+mRMR3lN3V+i9AH2BYRCxO8+RGkq1AmV/EUMuKpAHAumRrvl+IiLmSTgMOB5ZGxEGp3DnA+xFxY9GCtQZxF6mFSfMMlUY3fwVcL+nPZInvVmAP4GpJLwHfAEY5GeZP0iHAL8gGpjoDW0s6CHgFGAVcKKkd2Qqfr5GNKFuZcAuxhVL2nI4DgXuAc8m6cd8mWz42kuxO1496tDN/KfGNBn4cEU+kbecD3wQOAD4HHEzWCm8HnB4RLxcnWmsMJ8QWJrUQ25FN+r08Isam7U8BMyLiK8WMr1xJ6km2xHFERDwoqUNELE77xpC1Brcnu+lrB7I7Yc8tWsDWKB5UaWEiswSYSjalptrxwBGSLipOZOUtJbdDye7+s266Bts+7TsP+BDYLiI+joiZToblydcQW66XyVZGTAP+SzY/7grgT0WNqoxFxEPplmjPShoaEfMktY2IZcB8wDd1LXNOiGWstpsCSGoTEcsj4gZJXYGfA5+QdecOi4g3ihFrSxER/5B0CjAxJyl+A9iAbO2ylTFfQyxTNZbjDSAbJJmX5ha2r74FvaTN0iFLI+LdIoXb4kg6mOwuQVcCxwAnRsSk4kZla8sJscxJ+h7Z9cFXgY2AQ9KSsequnDWTdN/DvwI7RsTkYsdja88Jscwoe0TlgvR+T+AysgnB7wEXAvsCu0TEoqIF2YpIWse3/W85PMpcRiRtDvxM0k5p03zgqYiYTvbA+NPJBlMOL0qArZCTYcvihFheugErgC9JGkx2V5UDJQ3PGVz5EGhbpPjMypq7zGVAUvfq5XWStiFbDtYRuAjYAvgbcDHZXWy+DBzpB0KZNZxbiCVO0v5k894uTV3luWTzCRcCp5NNwD6ArOXYhWxtspOhWSO4hVjiUtf4abI7MJ9DlgR/C2wFzAJ6A5dExDvFitGspfDE7BIXES+km7s+AXxMdsOGfcluJNANGAxUSPox2cCK/8KZNZJbiGUidZcfJbuDyk3p2Sc7kCXI+yLilaIGaNYCOCGWkZQUxwHnRsSVxY7HrKVxl7mMRMSENMgyQdLiiLih2DGZtSRuIZYhSTsCn0bEa8WOxawlcUI0M0s8D9HMLHFCNDNLnBDNzBInRDOzxAnRzCxxQrR6SaqS9IKkSZLulbTOWtR1k6SvpPfXSRpUR9l9JA1rxDmmS+qV7/YaZRY28FyjJZ3V0BitNDkhWj4WRcTgiNiW7CYT38ndKalRE/wj4lsRMaWOIvsADU6IZo3lhGgN9R9gi9R6+4+k+4EpkiolXShpgqSXJJ0E2cOwJF0u6TVJj5LdnYe073FJQ9P7gyQ9L+lFSY9J2oQs8Z6RWqd7SlpP0l/SOSZI2j0du66kcZImS7oOUH1fQtLfJT2Xjjmxxr4/pO2PSVovbdtc0iPpmP9I2qpJfk0rKV66Z3lLLcGDgUfSpiHAthHxZkoqH0XETukB7k9KGgfsCGwJDALWB6YAN9Sodz3gWmCvVFfPiJgr6SpgYURclMrdAfwhIsZL6g+MBbYGzgfGR8QYSYcAJ+TxdY5P5+hIthTyLxExB+gETIyIMySdl+o+BbgG+E5E/E/SLmRP29uvET+jlTAnRMtHR0kvpPf/Aa4n68o+GxFvpu0HAttXXx8kuzXZAGAv4M6IqALek/R/tdS/K/Dv6roiYu4a4tgfGCStbAB2ldQ5neOIdOxDkubl8Z1Ok/Sl9H6jFOscshvt3p223wb8NZ1jGHBvzrnb53EOKzNOiJaPRRExOHdDSgyf5G4CTo2IsTXKfbEJ46gAdo2IxbXEkjdJ+5Al190i4lNJjwMd1lA80nnn1/wNrOXxNURrKmOB70pqCyBpoKROwL+BkekaYx+ym9vW9DSwl6RN07E90/YFZI9FqDYOOLX6Q7qbOOkcR6VtBwM96om1GzAvJcOtyFqo1SqA6lbuUWRd8Y+BNyV9NZ1Dknao5xxWhpwQralcR3Z98HlJk4CryXogfwP+l/bdAjxV88CImAWcSNY9fZFVXdYHyJ4w+IKyZ1CfBgxNgzZTWDXa/XOyhDqZrOv8dj2xPgK0kfQKcAFZQq72CbBz+g77AWPS9lHACSm+ycBhefwmVmZ8txszs8QtRDOzxAnRzCxxQjQzS5wQzcwSJ0Qzs8QJ0cwscUI0M0v+P+RxXoR5PDK/AAAAAElFTkSuQmCC\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "%matplotlib inline\n", "from utils import plot_confusion_matrix\n", @@ -574,20 +382,9 @@ }, { "cell_type": "code", - "execution_count": 55, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['models/xgb_model/XGBModel.sav']" - ] - }, - "execution_count": 55, - "metadata": {}, - "output_type": "execute_result" - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "import joblib\n", "joblib.dump(rf, 'models/rf_model/RFModel.sav')\n", @@ -704,7 +501,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -774,7 +571,7 @@ }, { "cell_type": "code", - "execution_count": 56, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -783,27 +580,11 @@ }, { "cell_type": "code", - "execution_count": 57, + "execution_count": null, "metadata": { "scrolled": true }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " precision recall f1-score support\n", - "\n", - " No default 0.83 0.94 0.88 3896\n", - " Default 0.60 0.33 0.42 1104\n", - "\n", - " accuracy 0.80 5000\n", - " macro avg 0.72 0.63 0.65 5000\n", - "weighted avg 0.78 0.80 0.78 5000\n", - "\n" - ] - } - ], + "outputs": [], "source": [ "print(classification_report(y_test2, y_preds2,\n", " target_names=['No default','Default']))" @@ -811,27 +592,9 @@ }, { "cell_type": "code", - "execution_count": 58, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "accuracy_score:\n", - " 0.8036\n", - "precision_score:\n", - " 0.6013289036544851\n", - "recall_score:\n", - " 0.3278985507246377\n", - "f1_score:\n", - " 0.4243845252051583\n", - "confusion_matrix:\n", - " [[3656 240]\n", - " [ 742 362]]\n" - ] - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "for score in [accuracy_score, precision_score, recall_score, f1_score, confusion_matrix]:\n", " print(score.__name__ + ':\\n', score(y_test2, y_preds2))" @@ -879,17 +642,9 @@ }, { "cell_type": "code", - "execution_count": 20, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Overwriting eg.yaml\n" - ] - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "%%writefile eg.yaml\n", "apiVersion: machinelearning.seldon.io/v1\n", @@ -941,17 +696,9 @@ }, { "cell_type": "code", - "execution_count": 21, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Overwriting ts.yaml\n" - ] - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "%%writefile ts.yaml\n", "apiVersion: machinelearning.seldon.io/v1\n", @@ -1001,18 +748,9 @@ }, { "cell_type": "code", - "execution_count": 22, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "seldondeployment.machinelearning.seldon.io/eg-experiment created\n", - "seldondeployment.machinelearning.seldon.io/ts-experiment created\n" - ] - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "!kubectl apply -f eg.yaml -n seldon\n", "!kubectl apply -f ts.yaml -n seldon" @@ -1020,35 +758,18 @@ }, { "cell_type": "code", - "execution_count": 23, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Waiting for deployment \"eg-experiment-eg-2-0-rf-model-xgb-model-eg-router\" rollout to finish: 0 of 1 updated replicas are available...\n", - "deployment \"eg-experiment-eg-2-0-rf-model-xgb-model-eg-router\" successfully rolled out\n" - ] - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "!kubectl rollout status deploy/$(kubectl get deploy -n seldon -l seldon-deployment-id=eg-experiment -o jsonpath='{.items[0].metadata.name}') -n seldon" ] }, { "cell_type": "code", - "execution_count": 24, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "deployment \"ts-experiment-ts-2-0-rf-model-xgb-model-ts-router\" successfully rolled out\r\n" - ] - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "!kubectl rollout status deploy/$(kubectl get deploy -n seldon -l seldon-deployment-id=ts-experiment -o jsonpath='{.items[0].metadata.name}') -n seldon" ] @@ -1069,26 +790,9 @@ }, { "cell_type": "code", - "execution_count": 60, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Processed 0/10000 samples\n", - "Processed 1000/10000 samples\n", - "Processed 2000/10000 samples\n", - "Processed 3000/10000 samples\n", - "Processed 4000/10000 samples\n", - "Processed 5000/10000 samples\n", - "Processed 6000/10000 samples\n", - "Processed 7000/10000 samples\n", - "Processed 8000/10000 samples\n", - "Processed 9000/10000 samples\n" - ] - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "\n", "for i in range(X_route.shape[0]):\n", @@ -1119,6 +823,112 @@ " _ = send_feedback_rest('ts-experiment', 'seldon', request, ts_response, ts_reward, truth)" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%writefile ts-persistent.yaml\n", + "apiVersion: machinelearning.seldon.io/v1\n", + "kind: SeldonDeployment\n", + "metadata:\n", + " name: ts-experiment-persistent\n", + "spec:\n", + " predictors:\n", + " - componentSpecs:\n", + " - spec:\n", + " containers:\n", + " - image: seldonio/credit_default_rf_model:0.2\n", + " name: rf-model\n", + " env:\n", + " - name: REDIS_SERVICE_HOST\n", + " value: redis-master-0\n", + " - image: seldonio/credit_default_xgb_model:0.2\n", + " name: xgb-model\n", + " env:\n", + " - name: REDIS_SERVICE_HOST\n", + " value: redis-master-0\n", + " - image: seldonio/mab_thompson_sampling_persistent:1.6.0-dev\n", + " name: ts-router\n", + " env:\n", + " - name: REDIS_SERVICE_HOST\n", + " value: redis-master-0\n", + " graph:\n", + " children:\n", + " - name: rf-model\n", + " type: MODEL\n", + " - name: xgb-model\n", + " type: MODEL\n", + " name: ts-router\n", + " parameters:\n", + " - name: n_branches\n", + " type: INT\n", + " value: '2'\n", + " - name: verbose\n", + " type: BOOL\n", + " value: '1'\n", + " - name: branch_names\n", + " type: STRING\n", + " value: rf:xgb\n", + " - name: seed\n", + " type: INT\n", + " value: '1'\n", + " type: ROUTER\n", + " name: ts-2\n", + " replicas: 3\n", + " svcOrchSpec:\n", + " env:\n", + " - name: SELDON_ENABLE_ROUTING_INJECTION\n", + " value: 'true'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!kubectl apply -n seldon -f ts-persistent.yaml" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "for i in range(X_route.shape[0]):\n", + " if i%1000 == 0:\n", + " print(f'Processed {i}/{X_route.shape[0]} samples', flush=True)\n", + " \n", + " # fetch sample and make a request payload\n", + " x = X_route[i].reshape(1,-1).tolist()\n", + " request = {'data':{'ndarray':x}}\n", + "\n", + " # send request to both deployments\n", + " ts_response = rest_request_ambassador('ts-experiment-persistent', 'seldon', request, endpoint=\"localhost:80\")\n", + " \n", + " # extract predictions\n", + " ts_probs = ts_response.get('data').get('ndarray')[0]\n", + " ts_pred = np.argmax(ts_probs)\n", + "\n", + " # send feedback to the model informing it if it made the right decision\n", + " truth_val = int(y_route[i])\n", + " ts_reward = int(ts_pred==truth_val)\n", + " truth = [truth_val]\n", + " \n", + " _ = send_feedback_rest('ts-experiment-persistent', 'seldon', request, ts_response, ts_reward, truth, endpoint=\"localhost:80\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, { "cell_type": "markdown", "metadata": {}, @@ -1183,7 +993,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.8" + "version": "3.7.4" }, "varInspector": { "cols": { From 48a43bc9c2937bf8432dba867191985ffd4f9810 Mon Sep 17 00:00:00 2001 From: Alejandro Saucedo Date: Mon, 8 Feb 2021 19:10:49 +0000 Subject: [PATCH 3/3] Cleanup --- .../case_study/credit_card_default.ipynb | 50 ++++++++++--------- 1 file changed, 26 insertions(+), 24 deletions(-) diff --git a/components/routers/case_study/credit_card_default.ipynb b/components/routers/case_study/credit_card_default.ipynb index 0d911b9529..c9d6c816ca 100644 --- a/components/routers/case_study/credit_card_default.ipynb +++ b/components/routers/case_study/credit_card_default.ipynb @@ -823,6 +823,30 @@ " _ = send_feedback_rest('ts-experiment', 'seldon', request, ts_response, ts_reward, truth)" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can see the model performance on the Grafana dashboard:\n", + "http://localhost:3000/dashboard/db/mab?refresh=5s&orgId=1 (refresh to update)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We note that both the Epsilon greedy and Thompson sampling allocate more traffic to the better performing model (XGBoost) over time, but Thompson Sampling does so at a quicker rate as evidenced by the superior metrics (F1 score in particular)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Persistent MAB\n", + "\n", + "We also show an example of a TS Router which uses Redis for persistence to ensure that the state is shared consistently across multiple replicas." + ] + }, { "cell_type": "code", "execution_count": null, @@ -908,7 +932,7 @@ " request = {'data':{'ndarray':x}}\n", "\n", " # send request to both deployments\n", - " ts_response = rest_request_ambassador('ts-experiment-persistent', 'seldon', request, endpoint=\"localhost:80\")\n", + " ts_response = rest_request_ambassador('ts-experiment-persistent', 'seldon', request)\n", " \n", " # extract predictions\n", " ts_probs = ts_response.get('data').get('ndarray')[0]\n", @@ -919,29 +943,7 @@ " ts_reward = int(ts_pred==truth_val)\n", " truth = [truth_val]\n", " \n", - " _ = send_feedback_rest('ts-experiment-persistent', 'seldon', request, ts_response, ts_reward, truth, endpoint=\"localhost:80\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We can see the model performance on the Grafana dashboard:\n", - "http://localhost:3000/dashboard/db/mab?refresh=5s&orgId=1 (refresh to update)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We note that both the Epsilon greedy and Thompson sampling allocate more traffic to the better performing model (XGBoost) over time, but Thompson Sampling does so at a quicker rate as evidenced by the superior metrics (F1 score in particular)." + " _ = send_feedback_rest('ts-experiment-persistent', 'seldon', request, ts_response, ts_reward, truth)" ] }, {