From 0dbb97b5fb1bda6680ed2a8243b7bf550fc86fda Mon Sep 17 00:00:00 2001
From: nadinet <nadine.toledano@gmail.com>
Date: Fri, 16 Apr 2021 16:23:32 +0100
Subject: [PATCH 1/5] add Triton GPT2 Example

---
 examples/triton_gpt2/README.ipynb | 367 ++++++++++++++++++++++++++++++
 1 file changed, 367 insertions(+)
 create mode 100644 examples/triton_gpt2/README.ipynb

diff --git a/examples/triton_gpt2/README.ipynb b/examples/triton_gpt2/README.ipynb
new file mode 100644
index 0000000000..966af103b7
--- /dev/null
+++ b/examples/triton_gpt2/README.ipynb
@@ -0,0 +1,367 @@
+{
+ "cells": [
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "id": "incorporate-present",
+   "metadata": {},
+   "source": [
+    "# Pretrainned GPT2  Model Deployment Example\n",
+    "\n",
+    "In this notebook we will run an example of text generation using GPT2 pretrained model exported from Hugging Face transformers library and deployed with Seldon-Core Triton ..... We will run a greedy algorithem implemantation for next token prediction.\n",
+    "more info: https://huggingface.co/transformers/model_doc/gpt2.html?highlight=gpt2\n",
+    "\n",
+    "## Steps:\n",
+    "1. Download pretrained GPT2 model from hugging face\n",
+    "2. Convert the model to ONNX\n",
+    "3. Store it in MinIo bucket\n",
+    "4. Setup seldon in your kubernetes cluster\n",
+    "5. Deploy the ONNX model with Seldon’s prepackaged Triton server.\n",
+    "6. Interact with the model, run a greedy alg example (generate sentance complition)\n",
+    "\n",
+    "## Basic requierments \n",
+    "* Helm v3.0.0+\n",
+    "* A Kubernetes cluster running v1.13 or above (minkube / docker-for-windows work well if enough RAM)\n",
+    "* kubectl v1.14+\n",
+    "* Python 3.6+ "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "plain-purple",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%writefile requirements.txt\n",
+    "transformers==4.5.1\n",
+    "torch==1.8.1\n",
+    "tokenizers<0.11,>=0.10.1\n",
+    "tensorflow==2.4.1\n",
+    "tf2onnx"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "dressed-paint",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!pip install --trusted-host=pypi.python.org --trusted-host=pypi.org --trusted-host=files.pythonhosted.org -r requirements.txt\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "willing-greene",
+   "metadata": {},
+   "source": [
+    "### Export   🤗 TFGPT2LMHeadModel pre-trained model and save it locally"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "moving-monitoring",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from transformers import TFGPT2LMHeadModel, GPT2Tokenizer\n",
+    "tokenizer = GPT2Tokenizer.from_pretrained(\"gpt2\")\n",
+    "model = TFGPT2LMHeadModel.from_pretrained(\"gpt2\", from_pt=True, pad_token_id=tokenizer.eos_token_id)\n",
+    "model.save_pretrained(\"./tfgpt2model\", saved_model=True)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "expired-burns",
+   "metadata": {},
+   "source": [
+    "### Convert the TensorFlow saved model to ONNX"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "institutional-observation",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!python -m tf2onnx.convert --saved-model ./tfgpt2model/saved_model/1 --opset 11  --output model.onnx"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "demographic-bottom",
+   "metadata": {},
+   "source": [
+    "### Copy your model to a local MinIo\n",
+    "#### Setup MinIo\n",
+    "Use the provided [notebook](https://docs.seldon.io/projects/seldon-core/en/latest/examples/minio_setup.html) to install MinIo in your cluster and configure `mc` CLI tool. Instructions also [online](https://docs.min.io/docs/minio-client-quickstart-guide.html).\n",
+    "\n",
+    "-- Note: You can use your prefer remote storage server (google/ AWS and etc.)\n",
+    "\n",
+    "#### Create a Bucket and store your model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "offensive-astronomy",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!mc mb minio-seldon/onnx-gpt2 -p\n",
+    "!mc cp ./model.onnx minio-seldon/onnx-gpt2/gpt2/1/"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "three-recommendation",
+   "metadata": {},
+   "source": [
+    "### Run Seldon in your kubernetes cluster\n",
+    "\n",
+    "Follow the [Seldon-Core Setup notebook](https://docs.seldon.io/projects/seldon-core/en/latest/examples/seldon_core_setup.html) to Setup a cluster with Ambassador Ingress or Istio and install Seldon Core"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "aware-advocate",
+   "metadata": {},
+   "source": [
+    "### Deploy your model with Seldon pre-packaged Triton server"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "young-apparel",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%writefile secret.yaml\n",
+    "\n",
+    "apiVersion: v1\n",
+    "kind: Secret\n",
+    "metadata:\n",
+    "  name: seldon-init-container-secret\n",
+    "  namespace: seldon\n",
+    "type: Opaque\n",
+    "stringData:\n",
+    "  AWS_ACCESS_KEY_ID: minioadmin\n",
+    "  AWS_SECRET_ACCESS_KEY: minioadmin\n",
+    "  AWS_ENDPOINT_URL: http://minio.minio-system.svc.cluster.local:9000\n",
+    "  USE_SSL: \"false\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "technological-treasure",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%writefile gpt2-deploy.yaml\n",
+    "apiVersion: machinelearning.seldon.io/v1alpha2\n",
+    "kind: SeldonDeployment\n",
+    "metadata:\n",
+    "  name: gpt2\n",
+    "  namespace: seldon\n",
+    "spec:\n",
+    "  predictors:\n",
+    "  - graph:\n",
+    "      implementation: TRITON_SERVER\n",
+    "      logger:\n",
+    "        mode: all\n",
+    "      modelUri: s3://onnx-gpt2\n",
+    "      envSecretRefName: seldon-init-container-secret\n",
+    "      name: gpt2\n",
+    "      type: MODEL\n",
+    "    name: default\n",
+    "    replicas: 1\n",
+    "  protocol: kfserving"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "stupid-pattern",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!kubectl apply -f secret.yaml\n",
+    "!kubectl apply -f gpt2-deploy.yaml"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "sonic-peace",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!kubectl rollout status deploy/$(kubectl get deploy -l seldon-deployment-id=gpt2 -o jsonpath='{.items[0].metadata.name}')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "cellular-still",
+   "metadata": {},
+   "source": [
+    "#### Interact with the model: get model metadata ( a \"test\" request to make sure model is avilable)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 43,
+   "id": "complete-solomon",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "*   Trying 127.0.0.1:80...\r\n",
+      "* TCP_NODELAY set\r\n",
+      "* Connected to localhost (127.0.0.1) port 80 (#0)\r\n",
+      "> GET /seldon/seldon/gpt2/v2/models/gpt2 HTTP/1.1\r",
+      "\r\n",
+      "> Host: localhost\r",
+      "\r\n",
+      "> User-Agent: curl/7.68.0\r",
+      "\r\n",
+      "> Accept: */*\r",
+      "\r\n",
+      "> \r",
+      "\r\n",
+      "* Mark bundle as not supporting multiuse\r\n",
+      "< HTTP/1.1 200 OK\r",
+      "\r\n",
+      "< access-control-allow-headers: Accept, Accept-Encoding, Authorization, Content-Length, Content-Type, X-CSRF-Token\r",
+      "\r\n",
+      "< access-control-allow-methods: GET,OPTIONS\r",
+      "\r\n",
+      "< access-control-allow-origin: *\r",
+      "\r\n",
+      "< content-type: application/json\r",
+      "\r\n",
+      "< seldon-puid: 7e24a20b-3130-4f50-a86b-bda5a9c4c917\r",
+      "\r\n",
+      "< x-content-type-options: nosniff\r",
+      "\r\n",
+      "< date: Fri, 16 Apr 2021 15:19:28 GMT\r",
+      "\r\n",
+      "< content-length: 336\r",
+      "\r\n",
+      "< x-envoy-upstream-service-time: 1\r",
+      "\r\n",
+      "< server: istio-envoy\r",
+      "\r\n",
+      "< \r",
+      "\r\n",
+      "* Connection #0 to host localhost left intact\r\n",
+      "{\"name\":\"gpt2\",\"versions\":[\"1\"],\"platform\":\"onnxruntime_onnx\",\"inputs\":[{\"name\":\"input_ids:0\",\"datatype\":\"INT32\",\"shape\":[-1,-1]},{\"name\":\"attention_mask:0\",\"datatype\":\"INT32\",\"shape\":[-1,-1]}],\"outputs\":[{\"name\":\"past_key_values\",\"datatype\":\"FP32\",\"shape\":[12,2,-1,12,-1,64]},{\"name\":\"logits\",\"datatype\":\"FP32\",\"shape\":[-1,-1,50257]}]}"
+     ]
+    }
+   ],
+   "source": [
+    "!curl -v http://localhost:80/seldon/seldon/gpt2/v2/models/gpt2"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "worldwide-tuition",
+   "metadata": {},
+   "source": [
+    "### Run prediction test: generate a sentense complition using GPT2 model  - Greedy approach\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 44,
+   "id": "express-czech",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Input: I enjoy spring in London, especially walking in the parks\n",
+      "Output: I enjoy spring in London, especially walking in the parks . I love the smell of the roses and the\n"
+     ]
+    }
+   ],
+   "source": [
+    "import requests\n",
+    "import json\n",
+    "import numpy as np\n",
+    "from transformers import GPT2Tokenizer\n",
+    "\n",
+    "tokenizer = GPT2Tokenizer.from_pretrained(\"gpt2\")\n",
+    "input_text = 'I enjoy spring in London, especially walking in the parks'\n",
+    "count = 0\n",
+    "max_gen_len = 10\n",
+    "gen_sentence = input_text\n",
+    "while count < max_gen_len:\n",
+    "    input_ids = tokenizer.encode(gen_sentence, return_tensors='tf')\n",
+    "    shape = input_ids.shape.as_list()\n",
+    "    payload = {\n",
+    "            \"inputs\": [\n",
+    "                {\"name\": \"input_ids:0\",\n",
+    "                 \"datatype\": \"INT32\",\n",
+    "                 \"shape\": shape,\n",
+    "                 \"data\": input_ids.numpy().tolist()\n",
+    "                 },\n",
+    "                {\"name\": \"attention_mask:0\",\n",
+    "                 \"datatype\": \"INT32\",\n",
+    "                 \"shape\": shape,\n",
+    "                 \"data\": np.ones(shape, dtype=np.int32).tolist()\n",
+    "                 }\n",
+    "                ]\n",
+    "            }\n",
+    "\n",
+    "    ret = requests.post('http://localhost:80/seldon/seldon/gpt2/v2/models/gpt2/infer', json=payload)\n",
+    "\n",
+    "    try:\n",
+    "        res = ret.json()\n",
+    "    except:\n",
+    "       continue\n",
+    "\n",
+    "    # extract logits\n",
+    "    logits = np.array(res[\"outputs\"][1][\"data\"])\n",
+    "    logits = logits.reshape(res[\"outputs\"][1][\"shape\"])\n",
+    "\n",
+    "    # take the best next token probability of the last token of input ( greedy approach)\n",
+    "    next_token = logits.argmax(axis=2)[0]\n",
+    "    next_token_str = tokenizer.decode(next_token[-1:], skip_special_tokens=True,\n",
+    "                                      clean_up_tokenization_spaces=True).strip()\n",
+    "    gen_sentence += ' ' + next_token_str\n",
+    "    count += 1\n",
+    "\n",
+    "print(f'Input: {input_text}\\nOutput: {gen_sentence}')"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

From a71dfaf43f08b14b80117d69acd22aa789885b77 Mon Sep 17 00:00:00 2001
From: nadinet <nadine.toledano@gmail.com>
Date: Sat, 17 Apr 2021 16:53:41 +0100
Subject: [PATCH 2/5] Triton GPT2 example

 add to docs link and update the notebook
---
 doc/source/examples/notebooks.rst             |   1 +
 .../examples/triton_gpt2_example.nblink       |   3 +
 examples/triton_gpt2/README.ipynb             | 123 +++++++++++++-----
 3 files changed, 91 insertions(+), 36 deletions(-)
 create mode 100644 doc/source/examples/triton_gpt2_example.nblink

diff --git a/doc/source/examples/notebooks.rst b/doc/source/examples/notebooks.rst
index 77696ab96a..d52c4c1771 100644
--- a/doc/source/examples/notebooks.rst
+++ b/doc/source/examples/notebooks.rst
@@ -37,6 +37,7 @@ Python Language Wrapper Examples
    TFserving MNIST <tfserving_mnist>
    Statsmodels Holt-Winter's time-series model <statsmodels>
    Runtime Metrics & Tags <runtime_metrics_tags>
+   Triton GPT2 Example <triton_gpt2_example>
 
 Specialised Framework Examples
 ------------------------------
diff --git a/doc/source/examples/triton_gpt2_example.nblink b/doc/source/examples/triton_gpt2_example.nblink
new file mode 100644
index 0000000000..6d83f6551b
--- /dev/null
+++ b/doc/source/examples/triton_gpt2_example.nblink
@@ -0,0 +1,3 @@
+{
+  "path": "../../../examples/triton_gpt2/README.ipynb"
+}
diff --git a/examples/triton_gpt2/README.ipynb b/examples/triton_gpt2/README.ipynb
index 966af103b7..965a027c16 100644
--- a/examples/triton_gpt2/README.ipynb
+++ b/examples/triton_gpt2/README.ipynb
@@ -1,14 +1,15 @@
 {
  "cells": [
   {
-   "attachments": {},
    "cell_type": "markdown",
-   "id": "incorporate-present",
+   "id": "liked-toronto",
    "metadata": {},
    "source": [
     "# Pretrainned GPT2  Model Deployment Example\n",
     "\n",
-    "In this notebook we will run an example of text generation using GPT2 pretrained model exported from Hugging Face transformers library and deployed with Seldon-Core Triton ..... We will run a greedy algorithem implemantation for next token prediction.\n",
+    "In this notebook we will run an example of text generation using GPT2 pretrained model, which is exported from HuggingFace transformers library and deployed with Seldon's Triton pre-packed server.\n",
+    "the implemented example bellow is of the Greedy approach, implemantation for next token prediction.\n",
+    "\n",
     "more info: https://huggingface.co/transformers/model_doc/gpt2.html?highlight=gpt2\n",
     "\n",
     "## Steps:\n",
@@ -18,6 +19,7 @@
     "4. Setup seldon in your kubernetes cluster\n",
     "5. Deploy the ONNX model with Seldon’s prepackaged Triton server.\n",
     "6. Interact with the model, run a greedy alg example (generate sentance complition)\n",
+    "7. Cleanup\n",
     "\n",
     "## Basic requierments \n",
     "* Helm v3.0.0+\n",
@@ -29,7 +31,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "plain-purple",
+   "id": "korean-reporter",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -44,7 +46,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "dressed-paint",
+   "id": "assigned-diesel",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -53,7 +55,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "willing-greene",
+   "id": "completed-evaluation",
    "metadata": {},
    "source": [
     "### Export   🤗 TFGPT2LMHeadModel pre-trained model and save it locally"
@@ -62,7 +64,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "moving-monitoring",
+   "id": "iraqi-million",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -74,7 +76,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "expired-burns",
+   "id": "further-tribute",
    "metadata": {},
    "source": [
     "### Convert the TensorFlow saved model to ONNX"
@@ -83,7 +85,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "institutional-observation",
+   "id": "irish-mountain",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -92,7 +94,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "demographic-bottom",
+   "id": "sunset-pantyhose",
    "metadata": {},
    "source": [
     "### Copy your model to a local MinIo\n",
@@ -107,7 +109,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "offensive-astronomy",
+   "id": "lasting-performance",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -117,7 +119,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "three-recommendation",
+   "id": "convinced-syracuse",
    "metadata": {},
    "source": [
     "### Run Seldon in your kubernetes cluster\n",
@@ -127,7 +129,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "aware-advocate",
+   "id": "backed-outreach",
    "metadata": {},
    "source": [
     "### Deploy your model with Seldon pre-packaged Triton server"
@@ -135,10 +137,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "id": "young-apparel",
+   "execution_count": 3,
+   "id": "declared-crown",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Overwriting secret.yaml\n"
+     ]
+    }
+   ],
    "source": [
     "%%writefile secret.yaml\n",
     "\n",
@@ -146,7 +156,6 @@
     "kind: Secret\n",
     "metadata:\n",
     "  name: seldon-init-container-secret\n",
-    "  namespace: seldon\n",
     "type: Opaque\n",
     "stringData:\n",
     "  AWS_ACCESS_KEY_ID: minioadmin\n",
@@ -157,17 +166,24 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "id": "technological-treasure",
+   "execution_count": 4,
+   "id": "beneficial-anime",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Overwriting gpt2-deploy.yaml\n"
+     ]
+    }
+   ],
    "source": [
     "%%writefile gpt2-deploy.yaml\n",
     "apiVersion: machinelearning.seldon.io/v1alpha2\n",
     "kind: SeldonDeployment\n",
     "metadata:\n",
     "  name: gpt2\n",
-    "  namespace: seldon\n",
     "spec:\n",
     "  predictors:\n",
     "  - graph:\n",
@@ -176,7 +192,7 @@
     "        mode: all\n",
     "      modelUri: s3://onnx-gpt2\n",
     "      envSecretRefName: seldon-init-container-secret\n",
-    "      name: gpt2\n",
+    "      name: gpt23\n",
     "      type: MODEL\n",
     "    name: default\n",
     "    replicas: 1\n",
@@ -185,10 +201,19 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "id": "stupid-pattern",
+   "execution_count": 5,
+   "id": "subjective-involvement",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "secret/seldon-init-container-secret configured\n",
+      "seldondeployment.machinelearning.seldon.io/gpt2 configured\n"
+     ]
+    }
+   ],
    "source": [
     "!kubectl apply -f secret.yaml\n",
     "!kubectl apply -f gpt2-deploy.yaml"
@@ -196,17 +221,25 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "id": "sonic-peace",
+   "execution_count": 6,
+   "id": "demanding-thesaurus",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "deployment \"gpt2-default-0-gpt2\" successfully rolled out\r\n"
+     ]
+    }
+   ],
    "source": [
     "!kubectl rollout status deploy/$(kubectl get deploy -l seldon-deployment-id=gpt2 -o jsonpath='{.items[0].metadata.name}')"
    ]
   },
   {
    "cell_type": "markdown",
-   "id": "cellular-still",
+   "id": "digital-supervisor",
    "metadata": {},
    "source": [
     "#### Interact with the model: get model metadata ( a \"test\" request to make sure model is avilable)"
@@ -215,7 +248,7 @@
   {
    "cell_type": "code",
    "execution_count": 43,
-   "id": "complete-solomon",
+   "id": "married-roller",
    "metadata": {},
    "outputs": [
     {
@@ -271,7 +304,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "worldwide-tuition",
+   "id": "anonymous-resource",
    "metadata": {},
    "source": [
     "### Run prediction test: generate a sentense complition using GPT2 model  - Greedy approach\n"
@@ -279,16 +312,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 44,
-   "id": "express-czech",
+   "execution_count": 7,
+   "id": "modified-termination",
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Input: I enjoy spring in London, especially walking in the parks\n",
-      "Output: I enjoy spring in London, especially walking in the parks . I love the smell of the roses and the\n"
+      "Input: I enjoy working in Seldon\n",
+      "Output: I enjoy working in Seldon 's office , and I 'm glad to see that\n"
      ]
     }
    ],
@@ -299,7 +332,7 @@
     "from transformers import GPT2Tokenizer\n",
     "\n",
     "tokenizer = GPT2Tokenizer.from_pretrained(\"gpt2\")\n",
-    "input_text = 'I enjoy spring in London, especially walking in the parks'\n",
+    "input_text = 'I enjoy working in Seldon'\n",
     "count = 0\n",
     "max_gen_len = 10\n",
     "gen_sentence = input_text\n",
@@ -341,6 +374,24 @@
     "\n",
     "print(f'Input: {input_text}\\nOutput: {gen_sentence}')"
    ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "patient-suite",
+   "metadata": {},
+   "source": [
+    "### Cleanup"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "pacific-collectible",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!kubectl delete -f gpt2-deploy.yaml"
+   ]
   }
  ],
  "metadata": {

From 0fc37873c49f147617685031ac862230d2e79dc8 Mon Sep 17 00:00:00 2001
From: nadinet <nadine.toledano@gmail.com>
Date: Sat, 17 Apr 2021 19:29:26 +0100
Subject: [PATCH 3/5] Remove HF logo from subtitle

---
 examples/triton_gpt2/README.ipynb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/triton_gpt2/README.ipynb b/examples/triton_gpt2/README.ipynb
index 965a027c16..7307597c56 100644
--- a/examples/triton_gpt2/README.ipynb
+++ b/examples/triton_gpt2/README.ipynb
@@ -58,7 +58,7 @@
    "id": "completed-evaluation",
    "metadata": {},
    "source": [
-    "### Export   🤗 TFGPT2LMHeadModel pre-trained model and save it locally"
+    "### Export HuggingFace TFGPT2LMHeadModel pre-trained model and save it locally"
    ]
   },
   {

From da988d1feb029b5b5dbf9e80d154e86dfe8785ec Mon Sep 17 00:00:00 2001
From: nadinet <nadine.toledano@gmail.com>
Date: Mon, 19 Apr 2021 15:44:55 +0100
Subject: [PATCH 4/5] fix typo

---
 examples/triton_gpt2/README.ipynb | 21 ++++++++++-----------
 1 file changed, 10 insertions(+), 11 deletions(-)

diff --git a/examples/triton_gpt2/README.ipynb b/examples/triton_gpt2/README.ipynb
index 7307597c56..a33e8d8a77 100644
--- a/examples/triton_gpt2/README.ipynb
+++ b/examples/triton_gpt2/README.ipynb
@@ -5,10 +5,9 @@
    "id": "liked-toronto",
    "metadata": {},
    "source": [
-    "# Pretrainned GPT2  Model Deployment Example\n",
+    "# Pretrained  GPT2  Model Deployment Example\n",
     "\n",
-    "In this notebook we will run an example of text generation using GPT2 pretrained model, which is exported from HuggingFace transformers library and deployed with Seldon's Triton pre-packed server.\n",
-    "the implemented example bellow is of the Greedy approach, implemantation for next token prediction.\n",
+    "In this notebook we will run an example of text generation using GPT2 pretrained model, which is exported from HuggingFace transformers library and deployed with Seldon's Triton pre-packed server. The implemented example below is of the Greedy approach, an implementation for next token prediction.\n",
     "\n",
     "more info: https://huggingface.co/transformers/model_doc/gpt2.html?highlight=gpt2\n",
     "\n",
@@ -16,10 +15,10 @@
     "1. Download pretrained GPT2 model from hugging face\n",
     "2. Convert the model to ONNX\n",
     "3. Store it in MinIo bucket\n",
-    "4. Setup seldon in your kubernetes cluster\n",
+    "4. Setup Seldon-Core in your kubernetes cluster\n",
     "5. Deploy the ONNX model with Seldon’s prepackaged Triton server.\n",
-    "6. Interact with the model, run a greedy alg example (generate sentance complition)\n",
-    "7. Cleanup\n",
+    "6. Interact with the model, run a greedy alg example (generate sentence completion)\n",
+    "7. Clean-up\n",
     "\n",
     "## Basic requierments \n",
     "* Helm v3.0.0+\n",
@@ -101,7 +100,7 @@
     "#### Setup MinIo\n",
     "Use the provided [notebook](https://docs.seldon.io/projects/seldon-core/en/latest/examples/minio_setup.html) to install MinIo in your cluster and configure `mc` CLI tool. Instructions also [online](https://docs.min.io/docs/minio-client-quickstart-guide.html).\n",
     "\n",
-    "-- Note: You can use your prefer remote storage server (google/ AWS and etc.)\n",
+    "-- Note: You can use your prefer remote storage server (google/ AWS etc.)\n",
     "\n",
     "#### Create a Bucket and store your model"
    ]
@@ -192,7 +191,7 @@
     "        mode: all\n",
     "      modelUri: s3://onnx-gpt2\n",
     "      envSecretRefName: seldon-init-container-secret\n",
-    "      name: gpt23\n",
+    "      name: gpt2\n",
     "      type: MODEL\n",
     "    name: default\n",
     "    replicas: 1\n",
@@ -242,7 +241,7 @@
    "id": "digital-supervisor",
    "metadata": {},
    "source": [
-    "#### Interact with the model: get model metadata ( a \"test\" request to make sure model is avilable)"
+    "#### Interact with the model: get model metadata (a \"test\" request to make sure our model is available and loaded correctly)"
    ]
   },
   {
@@ -307,7 +306,7 @@
    "id": "anonymous-resource",
    "metadata": {},
    "source": [
-    "### Run prediction test: generate a sentense complition using GPT2 model  - Greedy approach\n"
+    "### Run prediction test: generate a sentence completion using GPT2 model  - Greedy approach\n"
    ]
   },
   {
@@ -380,7 +379,7 @@
    "id": "patient-suite",
    "metadata": {},
    "source": [
-    "### Cleanup"
+    "### Clean-up"
    ]
   },
   {

From 598c9e48fe143b510497adfcaf6c35f9b7333672 Mon Sep 17 00:00:00 2001
From: nadinet <nadine.toledano@gmail.com>
Date: Tue, 20 Apr 2021 14:34:16 +0100
Subject: [PATCH 5/5] fix typo #2

---
 examples/triton_gpt2/README.ipynb | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/examples/triton_gpt2/README.ipynb b/examples/triton_gpt2/README.ipynb
index a33e8d8a77..f0437429cb 100644
--- a/examples/triton_gpt2/README.ipynb
+++ b/examples/triton_gpt2/README.ipynb
@@ -7,7 +7,8 @@
    "source": [
     "# Pretrained  GPT2  Model Deployment Example\n",
     "\n",
-    "In this notebook we will run an example of text generation using GPT2 pretrained model, which is exported from HuggingFace transformers library and deployed with Seldon's Triton pre-packed server. The implemented example below is of the Greedy approach, an implementation for next token prediction.\n",
+    "In this notebook, we will run an example of text generation using GPT2 model exported from HuggingFace and deployed with Seldon's Triton pre-packed server. the example also covers converting the model to ONNX format.\n",
+    "The implemented example below is of the Greedy approach for the next token prediction.\n",
     "\n",
     "more info: https://huggingface.co/transformers/model_doc/gpt2.html?highlight=gpt2\n",
     "\n",
@@ -20,7 +21,7 @@
     "6. Interact with the model, run a greedy alg example (generate sentence completion)\n",
     "7. Clean-up\n",
     "\n",
-    "## Basic requierments \n",
+    "## Basic requirements\n",
     "* Helm v3.0.0+\n",
     "* A Kubernetes cluster running v1.13 or above (minkube / docker-for-windows work well if enough RAM)\n",
     "* kubectl v1.14+\n",