diff --git a/examples/models/sklearn_spacy_text/Dockerfile b/examples/models/sklearn_spacy_text/Dockerfile index 84e9b0f9c6..b25a0ae2af 100644 --- a/examples/models/sklearn_spacy_text/Dockerfile +++ b/examples/models/sklearn_spacy_text/Dockerfile @@ -1,4 +1,4 @@ -FROM seldonio/seldon-core-s2i-python3:1.1.1-rc +FROM seldonio/seldon-core-s2i-python37-ubi8:1.7.0-dev -RUN pip install spacy +RUN pip install spacy==2.3.2 RUN python -m spacy download en_core_web_sm diff --git a/examples/models/sklearn_spacy_text/reddit_clf.json b/examples/models/sklearn_spacy_text/reddit_clf.json index 940fb6ddf9..5d5a2c53cf 100644 --- a/examples/models/sklearn_spacy_text/reddit_clf.json +++ b/examples/models/sklearn_spacy_text/reddit_clf.json @@ -1,53 +1,51 @@ { - "apiVersion": "machinelearning.seldon.io/v1alpha2", - "kind": "SeldonDeployment", - "metadata": { - "labels": { - "app": "seldon" - }, - "name": "reddit-classifier" - }, - "spec": { - "annotations": { - "project_name": "Reddit classifier", - "deployment_version": "v1" + "apiVersion": "machinelearning.seldon.io/v1alpha2", + "kind": "SeldonDeployment", + "metadata": { + "labels": { + "app": "seldon" + }, + "name": "reddit-classifier" }, - "name": "reddit-classifier", - "predictors": [ - { - "componentSpecs": [ - { - "spec": { - "containers": [ - { - "image": "reddit-classifier:0.1", - "imagePullPolicy": "IfNotPresent", - "name": "classifier", - "resources": { - "requests": { - "memory": "1Mi" - } - } - } - ], - "terminationGracePeriodSeconds": 20 - } - } - ], - "graph": { - "children": [], - "name": "classifier", - "endpoint": { - "type": "REST" - }, - "type": "MODEL" + "spec": { + "annotations": { + "project_name": "Reddit classifier", + "deployment_version": "v1" }, - "name": "single-model", - "replicas": 1, + "name": "reddit-classifier", + "predictors": [ + { + "componentSpecs": [{ + "spec": { + "containers": [ + { + "image": "seldonio/reddit-classifier:0.1", + "imagePullPolicy": "IfNotPresent", + "name": "classifier", + "resources": { + "requests": { + "memory": "1Mi" + } + } + } + ], + "terminationGracePeriodSeconds": 20 + } + }], + "graph": { + "children": [], + "name": "classifier", + "endpoint": { + "type" : "REST" + }, + "type": "MODEL" + }, + "name": "single-model", + "replicas": 1, "annotations": { - "predictor_version": "v1" + "predictor_version" : "v1" } - } - ] - } + } + ] + } } diff --git a/examples/models/sklearn_spacy_text/sklearn_spacy_text_classifier_example.ipynb b/examples/models/sklearn_spacy_text/sklearn_spacy_text_classifier_example.ipynb index 0cc986734a..5038f95456 100644 --- a/examples/models/sklearn_spacy_text/sklearn_spacy_text_classifier_example.ipynb +++ b/examples/models/sklearn_spacy_text/sklearn_spacy_text_classifier_example.ipynb @@ -11,6 +11,7 @@ "For this, we will be using SpaCy for the word tokenization and lemmatization. \n", "\n", "The classification will be done with a Logistic Regression binary classifier.\n", + "For more information please visit: https://towardsdatascience.com/real-time-stream-processing-for-machine-learning-at-scale-with-spacy-kafka-seldon-core-6360f2fedbe\n", "\n", "The steps in this tutorial include:\n", "\n", @@ -45,7 +46,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 1, "metadata": { "scrolled": true }, @@ -63,14 +64,83 @@ "scikit-learn>=0.23.2\n", "spacy==2.3.2\n", "dill==0.3.2\n", - "pandas==1.1.1" + "pandas==1.1.1\n" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: scikit-learn>=0.23.2 in /home/nadine/miniconda3/envs/core/lib/python3.7/site-packages (from -r requirements.txt (line 1)) (0.24.1)\n", + "Collecting spacy==2.3.2\n", + " Downloading spacy-2.3.2-cp37-cp37m-manylinux1_x86_64.whl (9.9 MB)\n", + "\u001b[K |████████████████████████████████| 9.9 MB 4.2 MB/s eta 0:00:01\n", + "\u001b[?25hCollecting dill==0.3.2\n", + " Downloading dill-0.3.2.zip (177 kB)\n", + "\u001b[K |████████████████████████████████| 177 kB 15.2 MB/s eta 0:00:01\n", + "\u001b[?25hCollecting pandas==1.1.1\n", + " Downloading pandas-1.1.1-cp37-cp37m-manylinux1_x86_64.whl (10.5 MB)\n", + "\u001b[K |████████████████████████████████| 10.5 MB 7.9 MB/s eta 0:00:01\n", + "\u001b[?25hRequirement already satisfied: numpy>=1.15.4 in /home/nadine/miniconda3/envs/core/lib/python3.7/site-packages (from pandas==1.1.1->-r requirements.txt (line 4)) (1.20.1)\n", + "Requirement already satisfied: python-dateutil>=2.7.3 in /home/nadine/miniconda3/envs/core/lib/python3.7/site-packages (from pandas==1.1.1->-r requirements.txt (line 4)) (2.8.1)\n", + "Requirement already satisfied: pytz>=2017.2 in /home/nadine/miniconda3/envs/core/lib/python3.7/site-packages (from pandas==1.1.1->-r requirements.txt (line 4)) (2021.1)\n", + "Requirement already satisfied: requests<3.0.0,>=2.13.0 in /home/nadine/miniconda3/envs/core/lib/python3.7/site-packages (from spacy==2.3.2->-r requirements.txt (line 2)) (2.25.1)\n", + "Collecting srsly<1.1.0,>=1.0.2\n", + " Downloading srsly-1.0.5-cp37-cp37m-manylinux2014_x86_64.whl (184 kB)\n", + "\u001b[K |████████████████████████████████| 184 kB 19.2 MB/s eta 0:00:01\n", + "\u001b[?25hCollecting wasabi<1.1.0,>=0.4.0\n", + " Downloading wasabi-0.8.2-py3-none-any.whl (23 kB)\n", + "Collecting plac<1.2.0,>=0.9.6\n", + " Downloading plac-1.1.3-py2.py3-none-any.whl (20 kB)\n", + "Collecting cymem<2.1.0,>=2.0.2\n", + " Downloading cymem-2.0.5-cp37-cp37m-manylinux2014_x86_64.whl (35 kB)\n", + "Collecting preshed<3.1.0,>=3.0.2\n", + " Downloading preshed-3.0.5-cp37-cp37m-manylinux2014_x86_64.whl (126 kB)\n", + "\u001b[K |████████████████████████████████| 126 kB 25.4 MB/s eta 0:00:01\n", + "\u001b[?25hRequirement already satisfied: setuptools in /home/nadine/miniconda3/envs/core/lib/python3.7/site-packages (from spacy==2.3.2->-r requirements.txt (line 2)) (52.0.0.post20210125)\n", + "Collecting murmurhash<1.1.0,>=0.28.0\n", + " Downloading murmurhash-1.0.5-cp37-cp37m-manylinux2014_x86_64.whl (20 kB)\n", + "Collecting tqdm<5.0.0,>=4.38.0\n", + " Downloading tqdm-4.59.0-py2.py3-none-any.whl (74 kB)\n", + "\u001b[K |████████████████████████████████| 74 kB 1.6 MB/s eta 0:00:01\n", + "\u001b[?25hCollecting thinc==7.4.1\n", + " Downloading thinc-7.4.1-cp37-cp37m-manylinux1_x86_64.whl (2.1 MB)\n", + "\u001b[K |████████████████████████████████| 2.1 MB 22.5 MB/s eta 0:00:01\n", + "\u001b[?25hCollecting catalogue<1.1.0,>=0.0.7\n", + " Downloading catalogue-1.0.0-py2.py3-none-any.whl (7.7 kB)\n", + "Collecting blis<0.5.0,>=0.4.0\n", + " Downloading blis-0.4.1-cp37-cp37m-manylinux1_x86_64.whl (3.7 MB)\n", + "\u001b[K |████████████████████████████████| 3.7 MB 11.8 MB/s eta 0:00:01\n", + "\u001b[?25hRequirement already satisfied: threadpoolctl>=2.0.0 in /home/nadine/miniconda3/envs/core/lib/python3.7/site-packages (from scikit-learn>=0.23.2->-r requirements.txt (line 1)) (2.1.0)\n", + "Requirement already satisfied: joblib>=0.11 in /home/nadine/miniconda3/envs/core/lib/python3.7/site-packages (from scikit-learn>=0.23.2->-r requirements.txt (line 1)) (1.0.1)\n", + "Requirement already satisfied: scipy>=0.19.1 in /home/nadine/miniconda3/envs/core/lib/python3.7/site-packages (from scikit-learn>=0.23.2->-r requirements.txt (line 1)) (1.6.1)\n", + "Requirement already satisfied: importlib-metadata>=0.20 in /home/nadine/miniconda3/envs/core/lib/python3.7/site-packages (from catalogue<1.1.0,>=0.0.7->spacy==2.3.2->-r requirements.txt (line 2)) (3.7.2)\n", + "Requirement already satisfied: zipp>=0.5 in /home/nadine/miniconda3/envs/core/lib/python3.7/site-packages (from importlib-metadata>=0.20->catalogue<1.1.0,>=0.0.7->spacy==2.3.2->-r requirements.txt (line 2)) (3.4.1)\n", + "Requirement already satisfied: typing-extensions>=3.6.4 in /home/nadine/miniconda3/envs/core/lib/python3.7/site-packages (from importlib-metadata>=0.20->catalogue<1.1.0,>=0.0.7->spacy==2.3.2->-r requirements.txt (line 2)) (3.7.4.3)\n", + "Requirement already satisfied: six>=1.5 in /home/nadine/miniconda3/envs/core/lib/python3.7/site-packages (from python-dateutil>=2.7.3->pandas==1.1.1->-r requirements.txt (line 4)) (1.15.0)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /home/nadine/miniconda3/envs/core/lib/python3.7/site-packages (from requests<3.0.0,>=2.13.0->spacy==2.3.2->-r requirements.txt (line 2)) (2020.12.5)\n", + "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /home/nadine/miniconda3/envs/core/lib/python3.7/site-packages (from requests<3.0.0,>=2.13.0->spacy==2.3.2->-r requirements.txt (line 2)) (1.25.9)\n", + "Requirement already satisfied: idna<3,>=2.5 in /home/nadine/miniconda3/envs/core/lib/python3.7/site-packages (from requests<3.0.0,>=2.13.0->spacy==2.3.2->-r requirements.txt (line 2)) (2.10)\n", + "Requirement already satisfied: chardet<5,>=3.0.2 in /home/nadine/miniconda3/envs/core/lib/python3.7/site-packages (from requests<3.0.0,>=2.13.0->spacy==2.3.2->-r requirements.txt (line 2)) (4.0.0)\n", + "Building wheels for collected packages: dill\n", + " Building wheel for dill (setup.py) ... \u001b[?25ldone\n", + "\u001b[?25h Created wheel for dill: filename=dill-0.3.2-py3-none-any.whl size=78911 sha256=bf7905b0c7ccd22cf2a306704d5f408f62c547aa74f023367fee8a77634c4f53\n", + " Stored in directory: /home/nadine/.cache/pip/wheels/72/6b/d5/5548aa1b73b8c3d176ea13f9f92066b02e82141549d90e2100\n", + "Successfully built dill\n", + "Installing collected packages: murmurhash, cymem, wasabi, tqdm, srsly, preshed, plac, catalogue, blis, thinc, spacy, pandas, dill\n", + " Attempting uninstall: pandas\n", + " Found existing installation: pandas 1.1.0\n", + " Uninstalling pandas-1.1.0:\n", + " Successfully uninstalled pandas-1.1.0\n", + "Successfully installed blis-0.4.1 catalogue-1.0.0 cymem-2.0.5 dill-0.3.2 murmurhash-1.0.5 pandas-1.1.1 plac-1.1.3 preshed-3.0.5 spacy-2.3.2 srsly-1.0.5 thinc-7.4.1 tqdm-4.59.0 wasabi-0.8.2\n" + ] + } + ], "source": [ "!pip install -r requirements.txt" ] @@ -79,6 +149,48 @@ "cell_type": "code", "execution_count": 1, "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Collecting en_core_web_sm==2.3.1\n", + " Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-2.3.1/en_core_web_sm-2.3.1.tar.gz (12.0 MB)\n", + "\u001b[K |████████████████████████████████| 12.0 MB 2.4 MB/s eta 0:00:01\n", + "\u001b[?25hRequirement already satisfied: spacy<2.4.0,>=2.3.0 in /home/nadine/miniconda3/envs/core/lib/python3.7/site-packages (from en_core_web_sm==2.3.1) (2.3.2)\n", + "Requirement already satisfied: murmurhash<1.1.0,>=0.28.0 in /home/nadine/miniconda3/envs/core/lib/python3.7/site-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (1.0.5)\n", + "Requirement already satisfied: blis<0.5.0,>=0.4.0 in /home/nadine/miniconda3/envs/core/lib/python3.7/site-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (0.4.1)\n", + "Requirement already satisfied: setuptools in /home/nadine/miniconda3/envs/core/lib/python3.7/site-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (52.0.0.post20210125)\n", + "Requirement already satisfied: srsly<1.1.0,>=1.0.2 in /home/nadine/miniconda3/envs/core/lib/python3.7/site-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (1.0.5)\n", + "Requirement already satisfied: plac<1.2.0,>=0.9.6 in /home/nadine/miniconda3/envs/core/lib/python3.7/site-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (1.1.3)\n", + "Requirement already satisfied: catalogue<1.1.0,>=0.0.7 in /home/nadine/miniconda3/envs/core/lib/python3.7/site-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (1.0.0)\n", + "Requirement already satisfied: tqdm<5.0.0,>=4.38.0 in /home/nadine/miniconda3/envs/core/lib/python3.7/site-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (4.59.0)\n", + "Requirement already satisfied: cymem<2.1.0,>=2.0.2 in /home/nadine/miniconda3/envs/core/lib/python3.7/site-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (2.0.5)\n", + "Requirement already satisfied: thinc==7.4.1 in /home/nadine/miniconda3/envs/core/lib/python3.7/site-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (7.4.1)\n", + "Requirement already satisfied: requests<3.0.0,>=2.13.0 in /home/nadine/miniconda3/envs/core/lib/python3.7/site-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (2.25.1)\n", + "Requirement already satisfied: preshed<3.1.0,>=3.0.2 in /home/nadine/miniconda3/envs/core/lib/python3.7/site-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (3.0.5)\n", + "Requirement already satisfied: wasabi<1.1.0,>=0.4.0 in /home/nadine/miniconda3/envs/core/lib/python3.7/site-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (0.8.2)\n", + "Requirement already satisfied: numpy>=1.15.0 in /home/nadine/miniconda3/envs/core/lib/python3.7/site-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (1.20.1)\n", + "Requirement already satisfied: importlib-metadata>=0.20 in /home/nadine/miniconda3/envs/core/lib/python3.7/site-packages (from catalogue<1.1.0,>=0.0.7->spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (3.7.2)\n", + "Requirement already satisfied: typing-extensions>=3.6.4 in /home/nadine/miniconda3/envs/core/lib/python3.7/site-packages (from importlib-metadata>=0.20->catalogue<1.1.0,>=0.0.7->spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (3.7.4.3)\n", + "Requirement already satisfied: zipp>=0.5 in /home/nadine/miniconda3/envs/core/lib/python3.7/site-packages (from importlib-metadata>=0.20->catalogue<1.1.0,>=0.0.7->spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (3.4.1)\n", + "Requirement already satisfied: idna<3,>=2.5 in /home/nadine/miniconda3/envs/core/lib/python3.7/site-packages (from requests<3.0.0,>=2.13.0->spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (2.10)\n", + "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /home/nadine/miniconda3/envs/core/lib/python3.7/site-packages (from requests<3.0.0,>=2.13.0->spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (1.25.9)\n", + "Requirement already satisfied: chardet<5,>=3.0.2 in /home/nadine/miniconda3/envs/core/lib/python3.7/site-packages (from requests<3.0.0,>=2.13.0->spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (4.0.0)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /home/nadine/miniconda3/envs/core/lib/python3.7/site-packages (from requests<3.0.0,>=2.13.0->spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (2020.12.5)\n", + "\u001b[38;5;2m✔ Download and installation successful\u001b[0m\n", + "You can now load the model via spacy.load('en_core_web_sm')\n" + ] + } + ], + "source": [ + "!python -m spacy download en_core_web_sm" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, "outputs": [], "source": [ "import pandas as pd \n", @@ -96,7 +208,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 13, "metadata": {}, "outputs": [ { @@ -182,7 +294,7 @@ "4 0 " ] }, - "execution_count": 2, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } @@ -203,22 +315,22 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "" + "" ] }, - "execution_count": 3, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" }, { "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYMAAAD1CAYAAACyaJl6AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAQZklEQVR4nO3dW4xdZ3nG8f9Tu6EcCnbI1A2207EaF+RErQgjxxVSVZHKdgDhXABKhBo3tfAFpoUWCRJ6YSkhUqJWTYkKqVzi4iAUY6VUsSDgWiEIVSWHCQk5meBpTh4rIQPjhLYRB4e3F/O5bCYzHs/ek9mO5/+Ttmat9/vWWu+SLD9ehz1OVSFJWth+rd8NSJL6zzCQJBkGkiTDQJKEYSBJwjCQJAGL+91At84444waHBzsdxuS9Ipy7733/rCqBibXX7FhMDg4yPDwcL/bkKRXlCRPTlX3NpEkyTCQJBkGkiQMA0kShoEkCcNAkoRhIEnCMJAk8Qr+0tkrxeDlX+13C6eMJ655V79bkE5ZXhlIkgwDSdIJhEGSnUmeTfLQFGMfS1JJzmjrSXJ9kpEkDyQ5r2Pu5iQH22dzR/1tSR5s21yfJHN1cpKkE3MiVwafBzZOLiZZCawHnuooXwisbp+twA1t7unAduB8YC2wPcnSts0NwAc7tnvJsSRJL68Zw6CqvgWMTzF0HfBxoDpqm4CbasKdwJIkZwIbgP1VNV5VR4D9wMY29vqqurOqCrgJuKi3U5IkzVZXzwySbAIOV9V3Jw0tBw51rI+22vHqo1PUJUnzaNavliZ5DfBJJm4RzaskW5m4/cRZZ50134eXpFNWN1cGvwusAr6b5AlgBfCdJL8NHAZWdsxd0WrHq6+Yoj6lqtpRVUNVNTQw8JL/qEeS1KVZh0FVPVhVv1VVg1U1yMStnfOq6hlgL3Bpe6toHfB8VT0N7APWJ1naHhyvB/a1sR8nWdfeIroUuHWOzk2SdIJO5NXSm4FvA29OMppky3Gm3wY8BowA/wx8CKCqxoGrgHva58pWo835XNvmv4CvdXcqkqRuzfjMoKoumWF8sGO5gG3TzNsJ7JyiPgycO1MfkqSXj99AliQZBpIkw0CShGEgScIwkCRhGEiSMAwkSRgGkiQMA0kShoEkCcNAkoRhIEnCMJAkYRhIkjAMJEkYBpIkDANJEoaBJAnDQJKEYSBJ4gTCIMnOJM8meaij9rdJvpfkgST/lmRJx9gVSUaSPJpkQ0d9Y6uNJLm8o74qyV2t/qUkp83lCUqSZnYiVwafBzZOqu0Hzq2q3we+D1wBkGQNcDFwTtvms0kWJVkEfAa4EFgDXNLmAlwLXFdVZwNHgC09nZEkadZmDIOq+hYwPqn271V1tK3eCaxoy5uA3VX106p6HBgB1rbPSFU9VlU/A3YDm5IEeAdwS9t+F3BRj+ckSZqluXhm8OfA19rycuBQx9hoq01XfyPwXEewHKtLkuZRT2GQ5G+Ao8AX56adGY+3NclwkuGxsbH5OKQkLQhdh0GSPwPeDXygqqqVDwMrO6ataLXp6j8CliRZPKk+paraUVVDVTU0MDDQbeuSpEm6CoMkG4GPA++pqhc6hvYCFyd5VZJVwGrgbuAeYHV7c+g0Jh4y720hcgfw3rb9ZuDW7k5FktStE3m19Gbg28Cbk4wm2QL8I/CbwP4k9yf5J4CqehjYAzwCfB3YVlUvtmcCHwb2AQeAPW0uwCeAv04ywsQzhBvn9AwlSTNaPNOEqrpkivK0f2FX1dXA1VPUbwNum6L+GBNvG0mS+sRvIEuSDANJkmEgScIwkCRhGEiSMAwkSRgGkiQMA0kShoEkCcNAkoRhIEnCMJAkYRhIkjAMJEkYBpIkDANJEoaBJAnDQJKEYSBJwjCQJHECYZBkZ5JnkzzUUTs9yf4kB9vPpa2eJNcnGUnyQJLzOrbZ3OYfTLK5o/62JA+2ba5Pkrk+SUnS8Z3IlcHngY2TapcDt1fVauD2tg5wIbC6fbYCN8BEeADbgfOBtcD2YwHS5nywY7vJx5IkvcxmDIOq+hYwPqm8CdjVlncBF3XUb6oJdwJLkpwJbAD2V9V4VR0B9gMb29jrq+rOqirgpo59SZLmSbfPDJZV1dNt+RlgWVteDhzqmDfaaserj05Rn1KSrUmGkwyPjY112bokabKeHyC3f9HXHPRyIsfaUVVDVTU0MDAwH4eUpAWh2zD4QbvFQ/v5bKsfBlZ2zFvRaserr5iiLkmaR92GwV7g2BtBm4FbO+qXtreK1gHPt9tJ+4D1SZa2B8frgX1t7MdJ1rW3iC7t2JckaZ4snmlCkpuBPwbOSDLKxFtB1wB7kmwBngTe36bfBrwTGAFeAC4DqKrxJFcB97R5V1bVsYfSH2LijaVXA19rH0nSPJoxDKrqkmmGLphibgHbptnPTmDnFPVh4NyZ+pAkvXz8BrIkyTCQJBkGkiQMA0kShoEkCcNAkoRhIEnCMJAkYRhIkjAMJEkYBpIkDANJEoaBJAnDQJKEYSBJwjCQJGEYSJIwDCRJGAaSJHoMgyR/leThJA8luTnJbyRZleSuJCNJvpTktDb3VW19pI0PduznilZ/NMmG3k5JkjRbXYdBkuXAXwJDVXUusAi4GLgWuK6qzgaOAFvaJluAI61+XZtHkjVtu3OAjcBnkyzqti9J0uz1eptoMfDqJIuB1wBPA+8Abmnju4CL2vKmtk4bvyBJWn13Vf20qh4HRoC1PfYlSZqFrsOgqg4Dfwc8xUQIPA/cCzxXVUfbtFFgeVteDhxq2x5t89/YWZ9iG0nSPOjlNtFSJv5Vvwp4E/BaJm7zvGySbE0ynGR4bGzs5TyUJC0ovdwm+hPg8aoaq6qfA18G3g4sabeNAFYAh9vyYWAlQBt/A/CjzvoU2/yKqtpRVUNVNTQwMNBD65KkTr2EwVPAuiSvaff+LwAeAe4A3tvmbAZubct72zpt/BtVVa1+cXvbaBWwGri7h74kSbO0eOYpU6uqu5LcAnwHOArcB+wAvgrsTvKpVruxbXIj8IUkI8A4E28QUVUPJ9nDRJAcBbZV1Yvd9iVJmr2uwwCgqrYD2yeVH2OKt4Gq6ifA+6bZz9XA1b30Iknqnt9AliQZBpIkw0CShGEgScIwkCRhGEiSMAwkSRgGkiQMA0kShoEkCcNAkkSPv5tI0ivX4OVf7XcLp5QnrnlXv1voiVcGkiTDQJJkGEiSMAwkSRgGkiQMA0kShoEkCcNAkkSPYZBkSZJbknwvyYEkf5jk9CT7kxxsP5e2uUlyfZKRJA8kOa9jP5vb/INJNvd6UpKk2en1yuDTwNer6i3AHwAHgMuB26tqNXB7Wwe4EFjdPluBGwCSnA5sB84H1gLbjwWIJGl+dB0GSd4A/BFwI0BV/ayqngM2AbvatF3ARW15E3BTTbgTWJLkTGADsL+qxqvqCLAf2NhtX5Kk2evlymAVMAb8S5L7knwuyWuBZVX1dJvzDLCsLS8HDnVsP9pq09UlSfOklzBYDJwH3FBVbwX+l1/eEgKgqgqoHo7xK5JsTTKcZHhsbGyuditJC14vYTAKjFbVXW39FibC4Qft9g/t57Nt/DCwsmP7Fa02Xf0lqmpHVQ1V1dDAwEAPrUuSOnUdBlX1DHAoyZtb6QLgEWAvcOyNoM3ArW15L3Bpe6toHfB8u520D1ifZGl7cLy+1SRJ86TX/8/gL4AvJjkNeAy4jImA2ZNkC/Ak8P429zbgncAI8EKbS1WNJ7kKuKfNu7KqxnvsS5I0Cz2FQVXdDwxNMXTBFHML2DbNfnYCO3vpRZLUPb+BLEkyDCRJhoEkCcNAkoRhIEnCMJAkYRhIkjAMJEkYBpIkDANJEoaBJAnDQJKEYSBJwjCQJGEYSJIwDCRJGAaSJAwDSRKGgSSJOQiDJIuS3JfkK219VZK7kowk+VKS01r9VW19pI0PduzjilZ/NMmGXnuSJM3OXFwZfAQ40LF+LXBdVZ0NHAG2tPoW4EirX9fmkWQNcDFwDrAR+GySRXPQlyTpBPUUBklWAO8CPtfWA7wDuKVN2QVc1JY3tXXa+AVt/iZgd1X9tKoeB0aAtb30JUmanV6vDP4B+Djwi7b+RuC5qjra1keB5W15OXAIoI0/3+b/f32KbSRJ86DrMEjybuDZqrp3DvuZ6ZhbkwwnGR4bG5uvw0rSKa+XK4O3A+9J8gSwm4nbQ58GliRZ3OasAA635cPASoA2/gbgR531Kbb5FVW1o6qGqmpoYGCgh9YlSZ26DoOquqKqVlTVIBMPgL9RVR8A7gDe26ZtBm5ty3vbOm38G1VVrX5xe9toFbAauLvbviRJs7d45imz9glgd5JPAfcBN7b6jcAXkowA40wECFX1cJI9wCPAUWBbVb34MvQlSZrGnIRBVX0T+GZbfowp3gaqqp8A75tm+6uBq+eiF0nS7PkNZEmSYSBJMgwkSRgGkiQMA0kShoEkCcNAkoRhIEnCMJAkYRhIkjAMJEkYBpIkDANJEoaBJAnDQJKEYSBJwjCQJGEYSJIwDCRJGAaSJHoIgyQrk9yR5JEkDyf5SKufnmR/koPt59JWT5Lrk4wkeSDJeR372tzmH0yyuffTkiTNRi9XBkeBj1XVGmAdsC3JGuBy4PaqWg3c3tYBLgRWt89W4AaYCA9gO3A+sBbYfixAJEnzo+swqKqnq+o7bfm/gQPAcmATsKtN2wVc1JY3ATfVhDuBJUnOBDYA+6tqvKqOAPuBjd32JUmavTl5ZpBkEHgrcBewrKqebkPPAMva8nLgUMdmo602XV2SNE96DoMkrwP+FfhoVf24c6yqCqhej9FxrK1JhpMMj42NzdVuJWnB6ykMkvw6E0Hwxar6civ/oN3+of18ttUPAys7Nl/RatPVX6KqdlTVUFUNDQwM9NK6JKlDL28TBbgROFBVf98xtBc49kbQZuDWjvql7a2idcDz7XbSPmB9kqXtwfH6VpMkzZPFPWz7duBPgQeT3N9qnwSuAfYk2QI8Cby/jd0GvBMYAV4ALgOoqvEkVwH3tHlXVtV4D31Jkmap6zCoqv8AMs3wBVPML2DbNPvaCezsthdJUm/8BrIkyTCQJBkGkiQMA0kShoEkCcNAkoRhIEnCMJAkYRhIkjAMJEkYBpIkDANJEoaBJAnDQJKEYSBJwjCQJGEYSJIwDCRJGAaSJAwDSRInURgk2Zjk0SQjSS7vdz+StJCcFGGQZBHwGeBCYA1wSZI1/e1KkhaOkyIMgLXASFU9VlU/A3YDm/rckyQtGIv73UCzHDjUsT4KnD95UpKtwNa2+j9JHp2H3haCM4Af9ruJmeTafnegPvHP59z6namKJ0sYnJCq2gHs6Hcfp5okw1U11O8+pKn453N+nCy3iQ4DKzvWV7SaJGkenCxhcA+wOsmqJKcBFwN7+9yTJC0YJ8Vtoqo6muTDwD5gEbCzqh7uc1sLibfedDLzz+c8SFX1uwdJUp+dLLeJJEl9ZBhIkgwDSdJJ8gBZ8yvJW5j4hvfyVjoM7K2qA/3rSlI/eWWwwCT5BBO/7iPA3e0T4GZ/QaBOZkku63cPpzLfJlpgknwfOKeqfj6pfhrwcFWt7k9n0vEleaqqzup3H6cqbxMtPL8A3gQ8Oal+ZhuT+ibJA9MNAcvms5eFxjBYeD4K3J7kIL/85YBnAWcDH+5bV9KEZcAG4MikeoD/nP92Fg7DYIGpqq8n+T0mfm145wPke6rqxf51JgHwFeB1VXX/5IEk35z/dhYOnxlIknybSJJkGEiSMAwkSRgGkiQMA0kS8H/RvqHadZ6EEAAAAABJRU5ErkJggg==\n", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYMAAAD1CAYAAACyaJl6AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAAAQZklEQVR4nO3dW4xdZ3nG8f9Tu6EcCnbI1A2207EaF+RErQgjxxVSVZHKdgDhXABKhBo3tfAFpoUWCRJ6YSkhUqJWTYkKqVzi4iAUY6VUsSDgWiEIVSWHCQk5meBpTh4rIQPjhLYRB4e3F/O5bCYzHs/ek9mO5/+Ttmat9/vWWu+SLD9ehz1OVSFJWth+rd8NSJL6zzCQJBkGkiTDQJKEYSBJwjCQJAGL+91At84444waHBzsdxuS9Ipy7733/rCqBibXX7FhMDg4yPDwcL/bkKRXlCRPTlX3NpEkyTCQJBkGkiQMA0kShoEkCcNAkoRhIEnCMJAk8Qr+0tkrxeDlX+13C6eMJ655V79bkE5ZXhlIkgwDSdIJhEGSnUmeTfLQFGMfS1JJzmjrSXJ9kpEkDyQ5r2Pu5iQH22dzR/1tSR5s21yfJHN1cpKkE3MiVwafBzZOLiZZCawHnuooXwisbp+twA1t7unAduB8YC2wPcnSts0NwAc7tnvJsSRJL68Zw6CqvgWMTzF0HfBxoDpqm4CbasKdwJIkZwIbgP1VNV5VR4D9wMY29vqqurOqCrgJuKinM5IkzVpXzwySbAIOV9V3Jw0tBw51rI+22vHqo1PUJUnzaNavliZ5DfBJJm4RzaskW5m4/cRZZ50134eXpFNWN1cGvwusAr6b5AlgBfCdJL8NHAZWdsxd0WrHq6+Yoj6lqtpRVUNVNTQw8JL/qEeS1KVZh0FVPVhVv1VVg1U1yMStnfOq6hlgL3Bpe6toHfB8VT0N7APWJ1naHhyvB/a1sR8nWdfeIroUuHWOzk2SdIJO5NXSm4FvA29OMppky3Gm3wY8BowA/wx8CKCqxoGrgHva58pWo835XNvmv4CvdXcqkqRuzfjMoKoumWF8sGO5gG3TzNsJ7JyiPgycO1MfkqSXj99AliQZBpIkw0CShGEgScIwkCRhGEiSMAwkSRgGkiQMA0kShoEkCcNAkoRhIEnCMJAkYRhIkjAMJEkYBpIkDANJEoaBJAnDQJKEYSBJ4gTCIMnOJM8meaij9rdJvpfkgST/lmRJx9gVSUaSPJpkQ0d9Y6uNJLm8o74qyV2t/qUkp83h+UmSTsCJXBl8Htg4qbYfOLeqfh/4PnAFQJI1wMXAOW2bzyZZlGQR8BngQmANcEmbC3AtcF1VnQ0cAbb0dEaSpFmbMQyq6lvA+KTav1fV0bZ6J7CiLW8CdlfVT6vqcWAEWNs+I1X1WFX9DNgNbEoS4B3ALW37XcBFvZ2SJGm25uKZwZ8DX2vLy4FDHWOjrTZd/Y3Acx3BcqwuSZpHPYVBkr8BjgJfnJt2Zjze1iTDSYbHxsbm45CStCB0HQZJ/gx4N/CBqqpWPgys7Ji2otWmq/8IWJJk8aT6lKpqR1UNVdXQwMBAt61LkibpKgySbAQ+Drynql7oGNoLXJzkVUlWAauBu4F7gNXtzaHTmHjIvLeFyB3Ae9v2m4FbuzsVSVK3TuTV0puBbwNvTjKaZAvwj8BvAvuT3J/knwCq6mFgD/AI8HVgW1W92J4JfBjYBxwA9rS5AJ8A/jrJCBPPEG6c0zOUJM1o8UwTquqSKcrT/oVdVVcDV09Rvw24bYr6Y0y8bSRJ6hO/gSxJMgwkSYaBJAnDQJKEYSBJwjCQJGEYSJIwDCRJGAaSJAwDSRKGgSQJw0CShGEgScIwkCRhGEiSMAwkSRgGkiQMA0kShoEkCcNAksQJhEGSnUmeTfJQR+30JPuTHGw/l7Z6klyfZCTJA0nO69hmc5t/MMnmjvrbkjzYtrk+Seb6JCVJx3ciVwafBzZOql0O3F5Vq4Hb2zrAhcDq9tkK3AAT4QFsB84H1gLbjwVIm/PBju0mH0uS9DKbMQyq6lvA+KTyJmBXW94FXNRRv6km3AksSXImsAHYX1XjVXUE2A9sbGOvr6o7q6qAmzr2JUmaJ90+M1hWVU+35WeAZW15OXCoY95oqx2vPjpFfUpJtiYZTjI8NjbWZeuSpMl6foDc/kVfc9DLiRxrR1UNVdXQwMDAfBxSkhaEbsPgB+0WD+3ns61+GFjZMW9Fqx2vvmKKuiRpHnUbBnuBY28EbQZu7ahf2t4qWgc8324n7QPWJ1naHhyvB/a1sR8nWdfeIrq0Y1+SpHmyeKYJSW4G/hg4I8koE28FXQPsSbIFeBJ4f5t+G/BOYAR4AbgMoKrGk1wF3NPmXVlVxx5Kf4iJN5ZeDXytfSRJ82jGMKiqS6YZumCKuQVsm2Y/O4GdU9SHgXNn6kOS9PLxG8iSJMNAkmQYSJIwDCRJGAaSJAwDSRKGgSQJw0CShGEgScIwkCRhGEiSMAwkSRgGkiQMA0kShoEkCcNAkoRhIEnCMJAkYRhIkugxDJL8VZKHkzyU5OYkv5FkVZK7kowk+VKS09rcV7X1kTY+2LGfK1r90SQbejwnSdIsdR0GSZYDfwkMVdW5wCLgYuBa4LqqOhs4Amxpm2wBjrT6dW0eSda07c4BNgKfTbKo274kSbPX622ixcCrkywGXgM8DbwDuKWN7wIuasub2jpt/IIkafXdVfXTqnocGAHW9tiXJGkWug6DqjoM/B3wFBMh8DxwL/BcVR1t00aB5W15OXCobXu0zX9jZ32KbSRJ86CX20RLmfhX/SrgTcBrmbjN87JJsjXJcJLhsbGxl/NQkrSg9HKb6E+Ax6tqrKp+DnwZeDuwpN02AlgBHG7Lh4GVAG38DcCPOutTbPMrqmpHVQ1V1dDAwEAPrUuSOvUSBk8B65K8pt37vwB4BLgDeG+bsxm4tS3vbeu08W9UVbX6xe1to1XAauDuHvqSJM3S4pmnTK2q7kpyC/Ad4ChwH7AD+CqwO8mnWu3GtsmNwBeSjADjTLxBRFU9nGQPE0FyFNhWVS9225ckafa6DgOAqtoObJ9Ufowp3gaqqp8A75tmP1cDV/fSiySpe34DWZJkGEiSDANJEoaBJAnDQJKEYSBJwjCQJGEYSJIwDCRJGAaSJAwDSRI9/m4iSa9cg5d/td8tnFKeuOZd/W6hJ14ZSJIMA0mSYSBJwjCQJGEYSJIwDCRJGAaSJAwDSRI9hkGSJUluSfK9JAeS/GGS05PsT3Kw/Vza5ibJ9UlGkjyQ5LyO/Wxu8w8m2dzrSUmSZqfXK4NPA1+vqrcAfwAcAC4Hbq+q1cDtbR3gQmB1+2wFbgBIcjqwHTgfWAtsPxYgkqT50XUYJHkD8EfAjQBV9bOqeg7YBOxq03YBF7XlTcBNNeFOYEmSM4ENwP6qGq+qI8B+YGO3fUmSZq+XK4NVwBjwL0nuS/K5JK8FllXV023OM8CytrwcONSx/WirTVeXJM2TXsJgMXAecENVvRX4X355SwiAqiqgejjGr0iyNclwkuGxsbG52q0kLXi9hMEoMFpVd7X1W5gIhx+02z+0n8+28cPAyo7tV7TadPWXqKodVTVUVUMDAwM9tC5J6tR1GFTVM8ChJG9upQuAR4C9wLE3gjYDt7blvcCl7a2idcDz7XbSPmB9kqXtwfH6VpMkzZNe/z+DvwC+mOQ04DHgMiYCZk+SLcCTwPvb3NuAdwIjwAttLlU1nuQq4J4278qqGu+xL0nSLPQUBlV1PzA0xdAFU8wtYNs0+9kJ7OylF0lS9/wGsiTJMJAkGQaSJAwDSRKGgSQJw0CShGEgScIwkCRhGEiSMAwkSRgGkiQMA0kShoEkCcNAkoRhIEnCMJAkYRhIkjAMJEkYBpIk5iAMkixKcl+Sr7T1VUnuSjKS5EtJTmv1V7X1kTY+2LGPK1r90SQbeu1JkjQ7c3Fl8BHgQMf6tcB1VXU2cATY0upbgCOtfl2bR5I1wMXAOcBG4LNJFs1BX5KkE9RTGCRZAbwL+FxbD/AO4JY2ZRdwUVve1NZp4xe0+ZuA3VX106p6HBgB1vbSlyRpdnq9MvgH4OPAL9r6G4HnqupoWx8Flrfl5cAhgDb+fJv///UptpEkzYOuwyDJu4Fnq+reOexnpmNuTTKcZHhsbGy+DitJp7xergzeDrwnyRPAbiZuD30aWJJkcZuzAjjclg8DKwHa+BuAH3XWp9jmV1TVjqoaqqqhgYGBHlqXJHXqOgyq6oqqWlFVg0w8AP5GVX0AuAN4b5u2Gbi1Le9t67Txb1RVtfrF7W2jVcBq4O5u+5Ikzd7imafM2ieA3Uk+BdwH3NjqNwJfSDICjDMRIFTVw0n2AI8AR4FtVfXiy9CXJGkacxIGVfVN4Jtt+TGmeBuoqn4CvG+a7a8Grp6LXiRJs+c3kCVJhoEkyTCQJGEYSJIwDCRJGAaSJAwDSRKGgSQJw0CShGEgScIwkCRhGEiSMAwkSRgGkiQMA0kShoEkCcNAkoRhIEnCMJAkYRhIkughDJKsTHJHkkeSPJzkI61+epL9SQ62n0tbPUmuTzKS5IEk53Xsa3ObfzDJ5t5PS5I0G71cGRwFPlZVa4B1wLYka4DLgdurajVwe1sHuBBY3T5bgRtgIjyA7cD5wFpg+7EAkSTNj67DoKqerqrvtOX/Bg4Ay4FNwK42bRdwUVveBNxUE+4EliQ5E9gA7K+q8ao6AuwHNnbblyRp9ubkmUGSQeCtwF3Asqp6ug09Ayxry8uBQx2bjbbadHVJ0jzpOQySvA74V+CjVfXjzrGqKqB6PUbHsbYmGU4yPDY2Nle7laQFr6cwSPLrTATBF6vqy638g3b7h/bz2VY/DKzs2HxFq01Xf4mq2lFVQ1U1NDAw0EvrkqQOvbxNFOBG4EBV/X3H0F7g2BtBm4FbO+qXtreK1gHPt9tJ+4D1SZa2B8frW02SNE8W97Dt24E/BR5Mcn+rfRK4BtiTZAvwJPD+NnYb8E5gBHgBuAygqsaTXAXc0+ZdWVXjPfQlSZqlrsOgqv4DyDTDF0wxv4Bt0+xrJ7Cz214kSb3xG8iSJMNAkmQYSJIwDCRJGAaSJAwDSRKGgSQJw0CShGEgScIwkCRhGEiSMAwkSRgGkiQMA0kShoEkCcNAkoRhIEnCMJAkYRhIkjAMJEmcRGGQZGOSR5OMJLm83/1I0kJyUoRBkkXAZ4ALgTXAJUnW9LcrSVo4ToowANYCI1X1WFX9DNgNbOpzT5K0YCzudwPNcuBQx/oocP7kSUm2Alvb6v8keXQeelsIzgB+2O8mZpJr+92B+sQ/n3Prd6YqnixhcEKqagewo999nGqSDFfVUL/7kKbin8/5cbLcJjoMrOxYX9FqkqR5cLKEwT3A6iSrkpwGXAzs7XNPkrRgnBS3iarqaJIPA/uARcDOqnq4z20tJN5608nMP5/zIFXV7x4kSX12stwmkiT1kWEgSTIMJEknyQNkza8kb2HiG97LW+kwsLeqDvSvK0n95JXBApPkE0z8uo8Ad7dPgJv9BYE6mSW5rN89nMp8m2iBSfJ94Jyq+vmk+mnAw1W1uj+dSceX5KmqOqvffZyqvE208PwCeBPw5KT6mW1M6pskD0w3BCybz14WGsNg4fkocHuSg/zylwOeBZwNfLhfTUnNMmADcGRSPcB/zn87C4dhsMBU1deT/B4Tvza88wHyPVX1Yv86kwD4CvC6qrp/8kCSb857NwuIzwwkSb5NJEkyDCRJGAaSJAwDSRKGgSQJ+D/RvqHaeD1MhAAAAABJRU5ErkJggg==\n", "text/plain": [ "
" ] @@ -236,7 +348,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 15, "metadata": {}, "outputs": [], "source": [ @@ -251,7 +363,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 16, "metadata": {}, "outputs": [], "source": [ @@ -262,7 +374,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 17, "metadata": {}, "outputs": [], "source": [ @@ -273,19 +385,19 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "TfidfVectorizer(max_features=10000, ngram_range=(1, 3),\n", - " preprocessor= at 0x7f9db31f58c0>,\n", + " preprocessor= at 0x7fe61e82f680>,\n", " token_pattern=None,\n", - " tokenizer= at 0x7f9db31f5830>)" + " tokenizer= at 0x7fe61e82f5f0>)" ] }, - "execution_count": 7, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } @@ -304,7 +416,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 14, "metadata": {}, "outputs": [], "source": [ @@ -314,7 +426,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 15, "metadata": {}, "outputs": [ { @@ -323,7 +435,7 @@ "LogisticRegression(C=0.1, solver='sag')" ] }, - "execution_count": 9, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } @@ -336,7 +448,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 16, "metadata": {}, "outputs": [], "source": [ @@ -356,7 +468,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 17, "metadata": {}, "outputs": [ { @@ -396,7 +508,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 18, "metadata": {}, "outputs": [ { @@ -404,7 +516,7 @@ "output_type": "stream", "text": [ "['This is the study that the article is based on:\\r\\n\\r\\nhttps://www.nature.com/articles/nature25778.epdf']\n", - "[[0.82791777 0.17208223]]\n" + "[[0.82791732 0.17208268]]\n" ] } ], @@ -427,7 +539,162 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Overwriting Dockerfile\n" + ] + } + ], + "source": [ + "%%writefile Dockerfile\n", + "FROM seldonio/seldon-core-s2i-python37-ubi8:1.7.0-dev\n", + "\n", + "RUN pip install spacy==2.3.2\n", + "RUN python -m spacy download en_core_web_sm" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "#1 [internal] load build definition from Dockerfile\n", + "#1 sha256:456ba4fb7975f8c66cb47f5783f3ff34ac4c535a63912c1d27ce72bbe0d63b83\n", + "#1 transferring dockerfile: 173B done\n", + "#1 DONE 0.0s\n", + "\n", + "#2 [internal] load .dockerignore\n", + "#2 sha256:8f400c1990a77c47cbbf40ed397dd023f5b85426e8239c6c07bef77255e0861a\n", + "#2 transferring context: 2B done\n", + "#2 DONE 0.0s\n", + "\n", + "#3 [internal] load metadata for docker.io/seldonio/seldon-core-s2i-python37-ubi8:1.7.0-dev\n", + "#3 sha256:0ea020e7a4239d8c0dfab255297ed4513a937f10c4eb4a0da2ede2aa401cf138\n", + "#3 DONE 0.0s\n", + "\n", + "#4 [1/3] FROM docker.io/seldonio/seldon-core-s2i-python37-ubi8:1.7.0-dev\n", + "#4 sha256:1c0985604a56cce763c19b93669933d75dbdf8b7fc62a5cc9aa635630df6aa28\n", + "#4 CACHED\n", + "\n", + "#5 [2/3] RUN pip install spacy==2.3.2\n", + "#5 sha256:d050cddbacda288e4faf815fd216ac3fee288c8e13762b13970f0493db32a1ba\n", + "#5 0.863 Collecting spacy==2.3.2\n", + "#5 0.967 Downloading spacy-2.3.2-cp37-cp37m-manylinux1_x86_64.whl (9.9 MB)\n", + "#5 1.612 Collecting blis<0.5.0,>=0.4.0\n", + "#5 1.640 Downloading blis-0.4.1-cp37-cp37m-manylinux1_x86_64.whl (3.7 MB)\n", + "#5 2.153 Collecting thinc==7.4.1\n", + "#5 2.182 Downloading thinc-7.4.1-cp37-cp37m-manylinux1_x86_64.whl (2.1 MB)\n", + "#5 2.363 Collecting plac<1.2.0,>=0.9.6\n", + "#5 2.385 Downloading plac-1.1.3-py2.py3-none-any.whl (20 kB)\n", + "#5 2.480 Collecting murmurhash<1.1.0,>=0.28.0\n", + "#5 2.496 Downloading murmurhash-1.0.5-cp37-cp37m-manylinux2014_x86_64.whl (20 kB)\n", + "#5 2.601 Collecting preshed<3.1.0,>=3.0.2\n", + "#5 2.618 Downloading preshed-3.0.5-cp37-cp37m-manylinux2014_x86_64.whl (126 kB)\n", + "#5 2.694 Collecting catalogue<1.1.0,>=0.0.7\n", + "#5 2.718 Downloading catalogue-1.0.0-py2.py3-none-any.whl (7.7 kB)\n", + "#5 2.741 Requirement already satisfied: numpy>=1.15.0 in /opt/conda/lib/python3.7/site-packages (from spacy==2.3.2) (1.20.1)\n", + "#5 2.898 Collecting tqdm<5.0.0,>=4.38.0\n", + "#5 2.914 Downloading tqdm-4.59.0-py2.py3-none-any.whl (74 kB)\n", + "#5 2.941 Requirement already satisfied: setuptools in /opt/conda/lib/python3.7/site-packages (from spacy==2.3.2) (46.1.0)\n", + "#5 2.963 Requirement already satisfied: requests<3.0.0,>=2.13.0 in /opt/conda/lib/python3.7/site-packages (from spacy==2.3.2) (2.22.0)\n", + "#5 3.034 Collecting cymem<2.1.0,>=2.0.2\n", + "#5 3.059 Downloading cymem-2.0.5-cp37-cp37m-manylinux2014_x86_64.whl (35 kB)\n", + "#5 3.136 Collecting wasabi<1.1.0,>=0.4.0\n", + "#5 3.155 Downloading wasabi-0.8.2-py3-none-any.whl (23 kB)\n", + "#5 3.295 Collecting srsly<1.1.0,>=1.0.2\n", + "#5 3.318 Downloading srsly-1.0.5-cp37-cp37m-manylinux2014_x86_64.whl (184 kB)\n", + "#5 3.348 Requirement already satisfied: importlib-metadata>=0.20; python_version < \"3.8\" in /opt/conda/lib/python3.7/site-packages (from catalogue<1.1.0,>=0.0.7->spacy==2.3.2) (3.7.3)\n", + "#5 3.370 Requirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.7/site-packages (from requests<3.0.0,>=2.13.0->spacy==2.3.2) (2020.12.5)\n", + "#5 3.371 Requirement already satisfied: chardet<3.1.0,>=3.0.2 in /opt/conda/lib/python3.7/site-packages (from requests<3.0.0,>=2.13.0->spacy==2.3.2) (3.0.4)\n", + "#5 3.373 Requirement already satisfied: idna<2.9,>=2.5 in /opt/conda/lib/python3.7/site-packages (from requests<3.0.0,>=2.13.0->spacy==2.3.2) (2.8)\n", + "#5 3.376 Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /opt/conda/lib/python3.7/site-packages (from requests<3.0.0,>=2.13.0->spacy==2.3.2) (1.25.9)\n", + "#5 3.389 Requirement already satisfied: typing-extensions>=3.6.4; python_version < \"3.8\" in /opt/conda/lib/python3.7/site-packages (from importlib-metadata>=0.20; python_version < \"3.8\"->catalogue<1.1.0,>=0.0.7->spacy==2.3.2) (3.7.4.3)\n", + "#5 3.392 Requirement already satisfied: zipp>=0.5 in /opt/conda/lib/python3.7/site-packages (from importlib-metadata>=0.20; python_version < \"3.8\"->catalogue<1.1.0,>=0.0.7->spacy==2.3.2) (3.4.1)\n", + "#5 3.658 Installing collected packages: blis, tqdm, murmurhash, cymem, preshed, plac, catalogue, srsly, wasabi, thinc, spacy\n", + "#5 3.751 Attempting uninstall: tqdm\n", + "#5 3.757 Found existing installation: tqdm 4.36.1\n", + "#5 3.763 Uninstalling tqdm-4.36.1:\n", + "#5 3.776 Successfully uninstalled tqdm-4.36.1\n", + "#5 5.165 Successfully installed blis-0.4.1 catalogue-1.0.0 cymem-2.0.5 murmurhash-1.0.5 plac-1.1.3 preshed-3.0.5 spacy-2.3.2 srsly-1.0.5 thinc-7.4.1 tqdm-4.59.0 wasabi-0.8.2\n", + "#5 5.190 WARNING: You are using pip version 20.2; however, version 21.0.1 is available.\n", + "#5 5.190 You should consider upgrading via the '/opt/conda/bin/python -m pip install --upgrade pip' command.\n", + "#5 DONE 5.4s\n", + "\n", + "#6 [3/3] RUN python -m spacy download en_core_web_sm\n", + "#6 sha256:768291585909f254516f0462cd67c4d393536b55f38a15c94164394b5761e6a8\n", + "#6 1.542 Collecting en_core_web_sm==2.3.1\n", + "#6 1.867 Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-2.3.1/en_core_web_sm-2.3.1.tar.gz (12.0 MB)\n", + "#6 2.868 Requirement already satisfied: spacy<2.4.0,>=2.3.0 in /opt/conda/lib/python3.7/site-packages (from en_core_web_sm==2.3.1) (2.3.2)\n", + "#6 2.904 Requirement already satisfied: wasabi<1.1.0,>=0.4.0 in /opt/conda/lib/python3.7/site-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (0.8.2)\n", + "#6 2.906 Requirement already satisfied: numpy>=1.15.0 in /opt/conda/lib/python3.7/site-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (1.20.1)\n", + "#6 2.908 Requirement already satisfied: murmurhash<1.1.0,>=0.28.0 in /opt/conda/lib/python3.7/site-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (1.0.5)\n", + "#6 2.909 Requirement already satisfied: thinc==7.4.1 in /opt/conda/lib/python3.7/site-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (7.4.1)\n", + "#6 2.930 Requirement already satisfied: srsly<1.1.0,>=1.0.2 in /opt/conda/lib/python3.7/site-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (1.0.5)\n", + "#6 2.933 Requirement already satisfied: blis<0.5.0,>=0.4.0 in /opt/conda/lib/python3.7/site-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (0.4.1)\n", + "#6 2.936 Requirement already satisfied: catalogue<1.1.0,>=0.0.7 in /opt/conda/lib/python3.7/site-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (1.0.0)\n", + "#6 2.941 Requirement already satisfied: cymem<2.1.0,>=2.0.2 in /opt/conda/lib/python3.7/site-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (2.0.5)\n", + "#6 2.943 Requirement already satisfied: plac<1.2.0,>=0.9.6 in /opt/conda/lib/python3.7/site-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (1.1.3)\n", + "#6 2.945 Requirement already satisfied: preshed<3.1.0,>=3.0.2 in /opt/conda/lib/python3.7/site-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (3.0.5)\n", + "#6 2.957 Requirement already satisfied: tqdm<5.0.0,>=4.38.0 in /opt/conda/lib/python3.7/site-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (4.59.0)\n", + "#6 2.967 Requirement already satisfied: requests<3.0.0,>=2.13.0 in /opt/conda/lib/python3.7/site-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (2.22.0)\n", + "#6 2.979 Requirement already satisfied: setuptools in /opt/conda/lib/python3.7/site-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (46.1.0)\n", + "#6 3.003 Requirement already satisfied: importlib-metadata>=0.20; python_version < \"3.8\" in /opt/conda/lib/python3.7/site-packages (from catalogue<1.1.0,>=0.0.7->spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (3.7.3)\n", + "#6 3.024 Requirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.7/site-packages (from requests<3.0.0,>=2.13.0->spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (2020.12.5)\n", + "#6 3.025 Requirement already satisfied: idna<2.9,>=2.5 in /opt/conda/lib/python3.7/site-packages (from requests<3.0.0,>=2.13.0->spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (2.8)\n", + "#6 3.028 Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /opt/conda/lib/python3.7/site-packages (from requests<3.0.0,>=2.13.0->spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (1.25.9)\n", + "#6 3.042 Requirement already satisfied: chardet<3.1.0,>=3.0.2 in /opt/conda/lib/python3.7/site-packages (from requests<3.0.0,>=2.13.0->spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (3.0.4)\n", + "#6 3.043 Requirement already satisfied: zipp>=0.5 in /opt/conda/lib/python3.7/site-packages (from importlib-metadata>=0.20; python_version < \"3.8\"->catalogue<1.1.0,>=0.0.7->spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (3.4.1)\n", + "#6 3.060 Requirement already satisfied: typing-extensions>=3.6.4; python_version < \"3.8\" in /opt/conda/lib/python3.7/site-packages (from importlib-metadata>=0.20; python_version < \"3.8\"->catalogue<1.1.0,>=0.0.7->spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (3.7.4.3)\n", + "#6 3.061 Building wheels for collected packages: en-core-web-sm\n", + "#6 3.061 Building wheel for en-core-web-sm (setup.py): started\n", + "#6 3.925 Building wheel for en-core-web-sm (setup.py): finished with status 'done'\n", + "#6 3.951 Created wheel for en-core-web-sm: filename=en_core_web_sm-2.3.1-py3-none-any.whl size=12047106 sha256=b44642fe250e62c7dae48a8d4ccff708e700132c61d25b1d018256493578bab7\n", + "#6 3.951 Stored in directory: /tmp/pip-ephem-wheel-cache-wmlcm06k/wheels/b7/0d/f0/7ecae8427c515065d75410989e15e5785dd3975fe06e795cd9\n", + "#6 3.952 Successfully built en-core-web-sm\n", + "#6 4.120 Installing collected packages: en-core-web-sm\n", + "#6 4.249 Successfully installed en-core-web-sm-2.3.1\n", + "#6 4.450 WARNING: You are using pip version 20.2; however, version 21.0.1 is available.\n", + "#6 4.450 You should consider upgrading via the '/opt/conda/bin/python -m pip install --upgrade pip' command.\n", + "#6 4.511 \u001b[38;5;2m✔ Download and installation successful\u001b[0m\n", + "#6 4.511 You can now load the model via spacy.load('en_core_web_sm')\n", + "#6 DONE 4.6s\n", + "\n", + "#7 exporting to image\n", + "#7 sha256:e8c613e07b0b7ff33893b694f7759a10d42e180f2b4dc349fb57dc6b71dcab00\n", + "#7 exporting layers\n", + "#7 exporting layers 0.6s done\n", + "#7 writing image sha256:8ca14f237d13fb2ad14dcb2621a69a246b9dca87bffcbb280af81485e4af9758 done\n", + "#7 naming to docker.io/seldonio/seldon-core-spacy-base:0.1 done\n", + "#7 DONE 0.6s\n" + ] + } + ], + "source": [ + "%%bash\n", + "docker build . -f Dockerfile -t seldonio/seldon-core-spacy-base:0.1" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### To create a docker image we need to create the .s2i folder configuration as below:" + ] + }, + { + "cell_type": "code", + "execution_count": 5, "metadata": {}, "outputs": [ { @@ -442,13 +709,12 @@ } ], "source": [ - "# To create a docker image we need to create the .s2i folder configuration as below:\n", "!cat .s2i/environment" ] }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 6, "metadata": {}, "outputs": [ { @@ -469,35 +735,95 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "UsageError: the following arguments are required: filename\n" + "---> Installing application source...\n", + "---> Installing dependencies ...\n", + "Looking in links: /whl\n", + "WARNING: Url '/whl' is ignored. It is either a non-existing path or lacks a specific scheme.\n", + "Collecting scikit-learn>=0.23.2\n", + "Downloading scikit_learn-0.24.1-cp37-cp37m-manylinux2010_x86_64.whl (22.3 MB)\n", + "Requirement already satisfied: spacy==2.3.2 in /opt/conda/lib/python3.7/site-packages (from -r requirements.txt (line 2)) (2.3.2)\n", + "WARNING: Url '/whl' is ignored. It is either a non-existing path or lacks a specific scheme.\n", + "Collecting dill==0.3.2\n", + "Downloading dill-0.3.2.zip (177 kB)\n", + "WARNING: Url '/whl' is ignored. It is either a non-existing path or lacks a specific scheme.\n", + "Collecting pandas==1.1.1\n", + "Downloading pandas-1.1.1-cp37-cp37m-manylinux1_x86_64.whl (10.5 MB)\n", + "WARNING: Url '/whl' is ignored. It is either a non-existing path or lacks a specific scheme.\n", + "Collecting scipy>=0.19.1\n", + "Downloading scipy-1.6.1-cp37-cp37m-manylinux1_x86_64.whl (27.4 MB)\n", + "WARNING: Url '/whl' is ignored. It is either a non-existing path or lacks a specific scheme.\n", + "Collecting threadpoolctl>=2.0.0\n", + "Downloading threadpoolctl-2.1.0-py3-none-any.whl (12 kB)\n", + "Requirement already satisfied: numpy>=1.13.3 in /opt/conda/lib/python3.7/site-packages (from scikit-learn>=0.23.2->-r requirements.txt (line 1)) (1.20.1)\n", + "WARNING: Url '/whl' is ignored. It is either a non-existing path or lacks a specific scheme.\n", + "Collecting joblib>=0.11\n", + "Downloading joblib-1.0.1-py3-none-any.whl (303 kB)\n", + "Requirement already satisfied: plac<1.2.0,>=0.9.6 in /opt/conda/lib/python3.7/site-packages (from spacy==2.3.2->-r requirements.txt (line 2)) (1.1.3)\n", + "Requirement already satisfied: murmurhash<1.1.0,>=0.28.0 in /opt/conda/lib/python3.7/site-packages (from spacy==2.3.2->-r requirements.txt (line 2)) (1.0.5)\n", + "Requirement already satisfied: preshed<3.1.0,>=3.0.2 in /opt/conda/lib/python3.7/site-packages (from spacy==2.3.2->-r requirements.txt (line 2)) (3.0.5)\n", + "Requirement already satisfied: srsly<1.1.0,>=1.0.2 in /opt/conda/lib/python3.7/site-packages (from spacy==2.3.2->-r requirements.txt (line 2)) (1.0.5)\n", + "Requirement already satisfied: cymem<2.1.0,>=2.0.2 in /opt/conda/lib/python3.7/site-packages (from spacy==2.3.2->-r requirements.txt (line 2)) (2.0.5)\n", + "Requirement already satisfied: tqdm<5.0.0,>=4.38.0 in /opt/conda/lib/python3.7/site-packages (from spacy==2.3.2->-r requirements.txt (line 2)) (4.59.0)\n", + "Requirement already satisfied: blis<0.5.0,>=0.4.0 in /opt/conda/lib/python3.7/site-packages (from spacy==2.3.2->-r requirements.txt (line 2)) (0.4.1)\n", + "Requirement already satisfied: thinc==7.4.1 in /opt/conda/lib/python3.7/site-packages (from spacy==2.3.2->-r requirements.txt (line 2)) (7.4.1)\n", + "Requirement already satisfied: requests<3.0.0,>=2.13.0 in /opt/conda/lib/python3.7/site-packages (from spacy==2.3.2->-r requirements.txt (line 2)) (2.22.0)\n", + "Requirement already satisfied: setuptools in /opt/conda/lib/python3.7/site-packages (from spacy==2.3.2->-r requirements.txt (line 2)) (46.1.0)\n", + "Requirement already satisfied: wasabi<1.1.0,>=0.4.0 in /opt/conda/lib/python3.7/site-packages (from spacy==2.3.2->-r requirements.txt (line 2)) (0.8.2)\n", + "Requirement already satisfied: catalogue<1.1.0,>=0.0.7 in /opt/conda/lib/python3.7/site-packages (from spacy==2.3.2->-r requirements.txt (line 2)) (1.0.0)\n", + "WARNING: Url '/whl' is ignored. It is either a non-existing path or lacks a specific scheme.\n", + "Collecting python-dateutil>=2.7.3\n", + "Downloading python_dateutil-2.8.1-py2.py3-none-any.whl (227 kB)\n", + "WARNING: Url '/whl' is ignored. It is either a non-existing path or lacks a specific scheme.\n", + "Collecting pytz>=2017.2\n", + "Downloading pytz-2021.1-py2.py3-none-any.whl (510 kB)\n", + "Requirement already satisfied: idna<2.9,>=2.5 in /opt/conda/lib/python3.7/site-packages (from requests<3.0.0,>=2.13.0->spacy==2.3.2->-r requirements.txt (line 2)) (2.8)\n", + "Requirement already satisfied: chardet<3.1.0,>=3.0.2 in /opt/conda/lib/python3.7/site-packages (from requests<3.0.0,>=2.13.0->spacy==2.3.2->-r requirements.txt (line 2)) (3.0.4)\n", + "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /opt/conda/lib/python3.7/site-packages (from requests<3.0.0,>=2.13.0->spacy==2.3.2->-r requirements.txt (line 2)) (1.25.9)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.7/site-packages (from requests<3.0.0,>=2.13.0->spacy==2.3.2->-r requirements.txt (line 2)) (2020.12.5)\n", + "Requirement already satisfied: importlib-metadata>=0.20; python_version < \"3.8\" in /opt/conda/lib/python3.7/site-packages (from catalogue<1.1.0,>=0.0.7->spacy==2.3.2->-r requirements.txt (line 2)) (3.7.3)\n", + "Requirement already satisfied: six>=1.5 in /opt/conda/lib/python3.7/site-packages (from python-dateutil>=2.7.3->pandas==1.1.1->-r requirements.txt (line 4)) (1.12.0)\n", + "Requirement already satisfied: typing-extensions>=3.6.4; python_version < \"3.8\" in /opt/conda/lib/python3.7/site-packages (from importlib-metadata>=0.20; python_version < \"3.8\"->catalogue<1.1.0,>=0.0.7->spacy==2.3.2->-r requirements.txt (line 2)) (3.7.4.3)\n", + "Requirement already satisfied: zipp>=0.5 in /opt/conda/lib/python3.7/site-packages (from importlib-metadata>=0.20; python_version < \"3.8\"->catalogue<1.1.0,>=0.0.7->spacy==2.3.2->-r requirements.txt (line 2)) (3.4.1)\n", + "Building wheels for collected packages: dill\n", + "Building wheel for dill (setup.py): started\n", + "Building wheel for dill (setup.py): finished with status 'done'\n", + "Created wheel for dill: filename=dill-0.3.2-py3-none-any.whl size=78912 sha256=484ab95ec424c23e0b5d0e92279e0dd99d7b5c337ab85d7a926596cc0028b947\n", + "Stored in directory: /tmp/pip-ephem-wheel-cache-3emqefhy/wheels/72/6b/d5/5548aa1b73b8c3d176ea13f9f92066b02e82141549d90e2100\n", + "Successfully built dill\n", + "Installing collected packages: scipy, threadpoolctl, joblib, scikit-learn, dill, python-dateutil, pytz, pandas\n", + "Successfully installed dill-0.3.2 joblib-1.0.1 pandas-1.1.1 python-dateutil-2.8.1 pytz-2021.1 scikit-learn-0.24.1 scipy-1.6.1 threadpoolctl-2.1.0\n", + "WARNING: Url '/whl' is ignored. It is either a non-existing path or lacks a specific scheme.\n", + "WARNING: You are using pip version 20.2; however, version 21.0.1 is available.\n", + "You should consider upgrading via the '/opt/conda/bin/python -m pip install --upgrade pip' command.\n", + "Collecting pip-licenses\n", + "Downloading pip_licenses-3.3.1-py3-none-any.whl (16 kB)\n", + "Collecting PTable\n", + "Downloading PTable-0.9.2.tar.gz (31 kB)\n", + "Building wheels for collected packages: PTable\n", + "Building wheel for PTable (setup.py): started\n", + "Building wheel for PTable (setup.py): finished with status 'done'\n", + "Created wheel for PTable: filename=PTable-0.9.2-py3-none-any.whl size=22907 sha256=8475610d2b71640b1d3866997bd82d7de2ccb5ed12de195ddc11a8f1dc05a75f\n", + "Stored in directory: /root/.cache/pip/wheels/33/df/2f/674985b3f8a2de3f96357d1eadef5110f74fa91b3785e52a54\n", + "Successfully built PTable\n", + "Installing collected packages: PTable, pip-licenses\n", + "Successfully installed PTable-0.9.2 pip-licenses-3.3.1\n", + "WARNING: You are using pip version 20.2; however, version 21.0.1 is available.\n", + "You should consider upgrading via the '/opt/conda/bin/python -m pip install --upgrade pip' command.\n", + "created path: ./licenses/license_info.csv\n", + "created path: ./licenses/license.txt\n", + "Build completed successfully\n" ] } ], - "source": [ - "%%writefile\n", - "FROM seldonio/seldon-core-s2i-python37-ubi8:1.7.0-dev\n", - "\n", - "RUN pip install spacy\n", - "RUN python -m spacy download en_core_web_sm" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "scrolled": true - }, - "outputs": [], "source": [ "%%bash\n", - "docker build . -t seldonio/seldon-core-spacy-base:0.1\n", "s2i build . seldonio/seldon-core-spacy-base:0.1 seldonio/reddit-classifier:0.1" ] }, @@ -510,14 +836,14 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Error: No such container: reddit_predictor\n" + "Error: No such container: reddit_predictor\r\n" ] } ], @@ -528,24 +854,82 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 21, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "d594705a8a21e8d8994af0b24e28b3c1510e9050fa5c24146eeaac55f496f73e\r\n" + ] + } + ], + "source": [ + "!docker run --name \"reddit_predictor\" -d --rm -p 9001:9000 seldonio/reddit-classifier:0.1" + ] + }, + { + "cell_type": "code", + "execution_count": 24, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "2743a0561c99be7371dbbbb6c87a036c8fe22690bcba6da03d8041c1011546cc\n" + "Note: Unnecessary use of -X or --request, POST is already inferred.\r\n", + "* Trying 127.0.0.1:9001...\r\n", + "* TCP_NODELAY set\r\n", + "* Connected to localhost (127.0.0.1) port 9001 (#0)\r\n", + "> POST /api/v1.0/predictions HTTP/1.1\r", + "\r\n", + "> Host: localhost:9001\r", + "\r\n", + "> User-Agent: curl/7.68.0\r", + "\r\n", + "> Accept: */*\r", + "\r\n", + "> Content-Type: application/json\r", + "\r\n", + "> Content-Length: 76\r", + "\r\n", + "> \r", + "\r\n", + "* upload completely sent off: 76 out of 76 bytes\r\n", + "* Mark bundle as not supporting multiuse\r\n", + "< HTTP/1.1 200 OK\r", + "\r\n", + "< Server: gunicorn/20.0.4\r", + "\r\n", + "< Date: Thu, 18 Mar 2021 14:23:16 GMT\r", + "\r\n", + "< Connection: keep-alive\r", + "\r\n", + "< Content-Type: application/json\r", + "\r\n", + "< Content-Length: 95\r", + "\r\n", + "< Access-Control-Allow-Origin: *\r", + "\r\n", + "< \r", + "\r\n", + "{\"data\":{\"names\":[\"t:0\",\"t:1\"],\"ndarray\":[[0.535398662890584,0.46460133710941603]]},\"meta\":{}}\r\n", + "* Connection #0 to host localhost left intact\r\n" ] } ], "source": [ - "!docker run --name \"reddit_predictor\" -d --rm -p 5001:5000 seldonio/reddit-classifier:0.1" + "!curl -v -X POST -H 'Content-Type: application/json' \\\n", + " -d '{\"data\": { \"ndarray\": [\"this is a terrible comment\"], \"names\": [\"tfidf\"] } }' \\\n", + " http://localhost:9001/api/v1.0/predictions" ] }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 23, "metadata": { "scrolled": true }, @@ -575,10 +959,10 @@ " values {\n", " list_value {\n", " values {\n", - " number_value: 0.8285423647440985\n", + " number_value: 0.8279173241631921\n", " }\n", " values {\n", - " number_value: 0.17145763525590152\n", + " number_value: 0.1720826758368079\n", " }\n", " }\n", " }\n", @@ -590,7 +974,7 @@ ], "source": [ "# We now test the REST endpoint expecting the same result\n", - "endpoint = \"0.0.0.0:5001\"\n", + "endpoint = \"0.0.0.0:9001\"\n", "batch = sample\n", "payload_type = \"ndarray\"\n", "\n", @@ -606,14 +990,14 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 25, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "reddit_predictor\n" + "reddit_predictor\r\n" ] } ], @@ -635,7 +1019,7 @@ "source": [ "## Setup Seldon Core\n", "\n", - "Use the setup notebook to [Setup Cluster](https://docs.seldon.io/projects/seldon-core/en/latest/examples/seldon_core_setup.html#Setup-Cluster) with [Ambassador Ingress](https://docs.seldon.io/projects/seldon-core/en/latest/examples/seldon_core_setup.html#Ambassador) and [Install Seldon Core](https://docs.seldon.io/projects/seldon-core/en/latest/examples/seldon_core_setup.html#Install-Seldon-Core). Instructions [also online](https://docs.seldon.io/projects/seldon-core/en/latest/examples/seldon_core_setup.html)." + "Use the setup notebook to [Setup Cluster](https://docs.seldon.io/projects/seldon-core/en/latest/examples/seldon_core_setup.html#Setup-Cluster) with [Ambassador Ingress](https://docs.seldon.io/projects/seldon-core/en/latest/examples/seldon_core_setup.html#Ambassador) or Istio and [Install Seldon Core](https://docs.seldon.io/projects/seldon-core/en/latest/examples/seldon_core_setup.html#Install-Seldon-Core). Instructions [also online](https://docs.seldon.io/projects/seldon-core/en/latest/examples/seldon_core_setup.html)." ] }, { @@ -648,7 +1032,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 26, "metadata": { "scrolled": true }, @@ -728,14 +1112,14 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 40, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "seldondeployment.machinelearning.seldon.io/reddit-classifier created\n" + "seldondeployment.machinelearning.seldon.io/reddit-classifier unchanged\r\n" ] } ], @@ -745,15 +1129,16 @@ }, { "cell_type": "code", - "execution_count": 42, + "execution_count": 28, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "NAME READY STATUS RESTARTS AGE\n", - "reddit-classifier-single-model-0-classifier-6fb8dbfd87-w8stj 2/2 Running 0 27s\n" + "NAME READY STATUS RESTARTS AGE\r\n", + "reddit-classifier-single-model-0-classifier-78d5cf779d-btxqg 2/2 Running 0 27s\r\n", + "seldon-92a927e5e90d7602e08ba9b9304f70e8-5bcf96696f-6pwbt 1/2 Running 2 2d23h\r\n" ] } ], @@ -779,22 +1164,60 @@ }, { "cell_type": "code", - "execution_count": 43, + "execution_count": 41, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "{\"data\":{\"names\":[\"t:0\",\"t:1\"],\"ndarray\":[[0.6821638979867455,0.3178361020132546]]},\"meta\":{}}\n" + "{\"data\":{\"names\":[\"t:0\",\"t:1\"],\"ndarray\":[[0.6811752552555037,0.3188247447444963]]},\"meta\":{\"requestPath\":{\"classifier\":\"seldonio/reddit-classifier:0.1\"}}}\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " % Total % Received % Xferd Average Speed Time Time Time Current\n", + " Dload Upload Total Spent Left Speed\n", + "\r", + " 0 0 0 0 0 0 0 0 --:--:-- --:--:-- --:--:-- 0* Trying 127.0.0.1:80...\n", + "* TCP_NODELAY set\n", + "* Connected to localhost (127.0.0.1) port 80 (#0)\n", + "> POST /seldon/default/reddit-classifier/api/v1.0/predictions HTTP/1.1\r\n", + "> Host: localhost\r\n", + "> User-Agent: curl/7.68.0\r\n", + "> Accept: */*\r\n", + "> Content-Type: application/json\r\n", + "> Content-Length: 72\r\n", + "> \r\n", + "} [72 bytes data]\n", + "* upload completely sent off: 72 out of 72 bytes\n", + "* Mark bundle as not supporting multiuse\n", + "< HTTP/1.1 200 OK\r\n", + "< access-control-allow-headers: Accept, Accept-Encoding, Authorization, Content-Length, Content-Type, X-CSRF-Token\r\n", + "< access-control-allow-methods: OPTIONS,POST\r\n", + "< access-control-allow-origin: *\r\n", + "< content-type: application/json\r\n", + "< seldon-puid: 7b586b15-2a7a-4d36-aa39-210b24cb5841\r\n", + "< x-content-type-options: nosniff\r\n", + "< date: Fri, 19 Mar 2021 09:47:09 GMT\r\n", + "< content-length: 156\r\n", + "< x-envoy-upstream-service-time: 19\r\n", + "< server: istio-envoy\r\n", + "< \r\n", + "{ [156 bytes data]\n", + "\r", + "100 228 100 156 100 72 4000 1846 --:--:-- --:--:-- --:--:-- 5846\n", + "* Connection #0 to host localhost left intact\n" ] } ], "source": [ "%%bash\n", - "curl -s -H 'Content-Type: application/json' \\\n", + "curl -v -H 'Content-Type: application/json' \\\n", " -d '{\"data\": {\"names\": [\"text\"], \"ndarray\": [\"Hello world this is a test\"]}}' \\\n", - " http://localhost:8003/seldon/seldon/reddit-classifier/api/v1.0/predictions" + " http://localhost:80/seldon/default/reddit-classifier/api/v1.0/predictions" ] }, { @@ -806,7 +1229,7 @@ }, { "cell_type": "code", - "execution_count": 44, + "execution_count": 42, "metadata": {}, "outputs": [ { @@ -827,7 +1250,7 @@ "}\n", "\n", "Response:\n", - "{'data': {'names': ['t:0', 't:1'], 'ndarray': [[0.6821638979867455, 0.3178361020132546]]}, 'meta': {}}\n" + "{'data': {'names': ['t:0', 't:1'], 'ndarray': [[0.6811752552555037, 0.3188247447444963]]}, 'meta': {'requestPath': {'classifier': 'seldonio/reddit-classifier:0.1'}}}\n" ] } ], @@ -838,8 +1261,8 @@ "sc = SeldonClient(\n", " gateway=\"ambassador\", \n", " transport=\"rest\",\n", - " gateway_endpoint=\"localhost:8003\", # Make sure you use the port above\n", - " namespace=\"seldon\"\n", + " gateway_endpoint=\"localhost:80\", # Make sure you use the port above\n", + " namespace=\"default\"\n", ")\n", "\n", "client_prediction = sc.predict(\n",