From 0dbb97b5fb1bda6680ed2a8243b7bf550fc86fda Mon Sep 17 00:00:00 2001 From: nadinet Date: Fri, 16 Apr 2021 16:23:32 +0100 Subject: [PATCH 1/5] add Triton GPT2 Example --- examples/triton_gpt2/README.ipynb | 367 ++++++++++++++++++++++++++++++ 1 file changed, 367 insertions(+) create mode 100644 examples/triton_gpt2/README.ipynb diff --git a/examples/triton_gpt2/README.ipynb b/examples/triton_gpt2/README.ipynb new file mode 100644 index 0000000000..966af103b7 --- /dev/null +++ b/examples/triton_gpt2/README.ipynb @@ -0,0 +1,367 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "incorporate-present", + "metadata": {}, + "source": [ + "# Pretrainned GPT2 Model Deployment Example\n", + "\n", + "In this notebook we will run an example of text generation using GPT2 pretrained model exported from Hugging Face transformers library and deployed with Seldon-Core Triton ..... We will run a greedy algorithem implemantation for next token prediction.\n", + "more info: https://huggingface.co/transformers/model_doc/gpt2.html?highlight=gpt2\n", + "\n", + "## Steps:\n", + "1. Download pretrained GPT2 model from hugging face\n", + "2. Convert the model to ONNX\n", + "3. Store it in MinIo bucket\n", + "4. Setup seldon in your kubernetes cluster\n", + "5. Deploy the ONNX model with Seldon’s prepackaged Triton server.\n", + "6. Interact with the model, run a greedy alg example (generate sentance complition)\n", + "\n", + "## Basic requierments \n", + "* Helm v3.0.0+\n", + "* A Kubernetes cluster running v1.13 or above (minkube / docker-for-windows work well if enough RAM)\n", + "* kubectl v1.14+\n", + "* Python 3.6+ " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "plain-purple", + "metadata": {}, + "outputs": [], + "source": [ + "%%writefile requirements.txt\n", + "transformers==4.5.1\n", + "torch==1.8.1\n", + "tokenizers<0.11,>=0.10.1\n", + "tensorflow==2.4.1\n", + "tf2onnx" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dressed-paint", + "metadata": {}, + "outputs": [], + "source": [ + "!pip install --trusted-host=pypi.python.org --trusted-host=pypi.org --trusted-host=files.pythonhosted.org -r requirements.txt\n" + ] + }, + { + "cell_type": "markdown", + "id": "willing-greene", + "metadata": {}, + "source": [ + "### Export 🤗 TFGPT2LMHeadModel pre-trained model and save it locally" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "moving-monitoring", + "metadata": {}, + "outputs": [], + "source": [ + "from transformers import TFGPT2LMHeadModel, GPT2Tokenizer\n", + "tokenizer = GPT2Tokenizer.from_pretrained(\"gpt2\")\n", + "model = TFGPT2LMHeadModel.from_pretrained(\"gpt2\", from_pt=True, pad_token_id=tokenizer.eos_token_id)\n", + "model.save_pretrained(\"./tfgpt2model\", saved_model=True)" + ] + }, + { + "cell_type": "markdown", + "id": "expired-burns", + "metadata": {}, + "source": [ + "### Convert the TensorFlow saved model to ONNX" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "institutional-observation", + "metadata": {}, + "outputs": [], + "source": [ + "!python -m tf2onnx.convert --saved-model ./tfgpt2model/saved_model/1 --opset 11 --output model.onnx" + ] + }, + { + "cell_type": "markdown", + "id": "demographic-bottom", + "metadata": {}, + "source": [ + "### Copy your model to a local MinIo\n", + "#### Setup MinIo\n", + "Use the provided [notebook](https://docs.seldon.io/projects/seldon-core/en/latest/examples/minio_setup.html) to install MinIo in your cluster and configure `mc` CLI tool. Instructions also [online](https://docs.min.io/docs/minio-client-quickstart-guide.html).\n", + "\n", + "-- Note: You can use your prefer remote storage server (google/ AWS and etc.)\n", + "\n", + "#### Create a Bucket and store your model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "offensive-astronomy", + "metadata": {}, + "outputs": [], + "source": [ + "!mc mb minio-seldon/onnx-gpt2 -p\n", + "!mc cp ./model.onnx minio-seldon/onnx-gpt2/gpt2/1/" + ] + }, + { + "cell_type": "markdown", + "id": "three-recommendation", + "metadata": {}, + "source": [ + "### Run Seldon in your kubernetes cluster\n", + "\n", + "Follow the [Seldon-Core Setup notebook](https://docs.seldon.io/projects/seldon-core/en/latest/examples/seldon_core_setup.html) to Setup a cluster with Ambassador Ingress or Istio and install Seldon Core" + ] + }, + { + "cell_type": "markdown", + "id": "aware-advocate", + "metadata": {}, + "source": [ + "### Deploy your model with Seldon pre-packaged Triton server" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "young-apparel", + "metadata": {}, + "outputs": [], + "source": [ + "%%writefile secret.yaml\n", + "\n", + "apiVersion: v1\n", + "kind: Secret\n", + "metadata:\n", + " name: seldon-init-container-secret\n", + " namespace: seldon\n", + "type: Opaque\n", + "stringData:\n", + " AWS_ACCESS_KEY_ID: minioadmin\n", + " AWS_SECRET_ACCESS_KEY: minioadmin\n", + " AWS_ENDPOINT_URL: http://minio.minio-system.svc.cluster.local:9000\n", + " USE_SSL: \"false\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "technological-treasure", + "metadata": {}, + "outputs": [], + "source": [ + "%%writefile gpt2-deploy.yaml\n", + "apiVersion: machinelearning.seldon.io/v1alpha2\n", + "kind: SeldonDeployment\n", + "metadata:\n", + " name: gpt2\n", + " namespace: seldon\n", + "spec:\n", + " predictors:\n", + " - graph:\n", + " implementation: TRITON_SERVER\n", + " logger:\n", + " mode: all\n", + " modelUri: s3://onnx-gpt2\n", + " envSecretRefName: seldon-init-container-secret\n", + " name: gpt2\n", + " type: MODEL\n", + " name: default\n", + " replicas: 1\n", + " protocol: kfserving" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "stupid-pattern", + "metadata": {}, + "outputs": [], + "source": [ + "!kubectl apply -f secret.yaml\n", + "!kubectl apply -f gpt2-deploy.yaml" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "sonic-peace", + "metadata": {}, + "outputs": [], + "source": [ + "!kubectl rollout status deploy/$(kubectl get deploy -l seldon-deployment-id=gpt2 -o jsonpath='{.items[0].metadata.name}')" + ] + }, + { + "cell_type": "markdown", + "id": "cellular-still", + "metadata": {}, + "source": [ + "#### Interact with the model: get model metadata ( a \"test\" request to make sure model is avilable)" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "id": "complete-solomon", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "* Trying 127.0.0.1:80...\r\n", + "* TCP_NODELAY set\r\n", + "* Connected to localhost (127.0.0.1) port 80 (#0)\r\n", + "> GET /seldon/seldon/gpt2/v2/models/gpt2 HTTP/1.1\r", + "\r\n", + "> Host: localhost\r", + "\r\n", + "> User-Agent: curl/7.68.0\r", + "\r\n", + "> Accept: */*\r", + "\r\n", + "> \r", + "\r\n", + "* Mark bundle as not supporting multiuse\r\n", + "< HTTP/1.1 200 OK\r", + "\r\n", + "< access-control-allow-headers: Accept, Accept-Encoding, Authorization, Content-Length, Content-Type, X-CSRF-Token\r", + "\r\n", + "< access-control-allow-methods: GET,OPTIONS\r", + "\r\n", + "< access-control-allow-origin: *\r", + "\r\n", + "< content-type: application/json\r", + "\r\n", + "< seldon-puid: 7e24a20b-3130-4f50-a86b-bda5a9c4c917\r", + "\r\n", + "< x-content-type-options: nosniff\r", + "\r\n", + "< date: Fri, 16 Apr 2021 15:19:28 GMT\r", + "\r\n", + "< content-length: 336\r", + "\r\n", + "< x-envoy-upstream-service-time: 1\r", + "\r\n", + "< server: istio-envoy\r", + "\r\n", + "< \r", + "\r\n", + "* Connection #0 to host localhost left intact\r\n", + "{\"name\":\"gpt2\",\"versions\":[\"1\"],\"platform\":\"onnxruntime_onnx\",\"inputs\":[{\"name\":\"input_ids:0\",\"datatype\":\"INT32\",\"shape\":[-1,-1]},{\"name\":\"attention_mask:0\",\"datatype\":\"INT32\",\"shape\":[-1,-1]}],\"outputs\":[{\"name\":\"past_key_values\",\"datatype\":\"FP32\",\"shape\":[12,2,-1,12,-1,64]},{\"name\":\"logits\",\"datatype\":\"FP32\",\"shape\":[-1,-1,50257]}]}" + ] + } + ], + "source": [ + "!curl -v http://localhost:80/seldon/seldon/gpt2/v2/models/gpt2" + ] + }, + { + "cell_type": "markdown", + "id": "worldwide-tuition", + "metadata": {}, + "source": [ + "### Run prediction test: generate a sentense complition using GPT2 model - Greedy approach\n" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "id": "express-czech", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Input: I enjoy spring in London, especially walking in the parks\n", + "Output: I enjoy spring in London, especially walking in the parks . I love the smell of the roses and the\n" + ] + } + ], + "source": [ + "import requests\n", + "import json\n", + "import numpy as np\n", + "from transformers import GPT2Tokenizer\n", + "\n", + "tokenizer = GPT2Tokenizer.from_pretrained(\"gpt2\")\n", + "input_text = 'I enjoy spring in London, especially walking in the parks'\n", + "count = 0\n", + "max_gen_len = 10\n", + "gen_sentence = input_text\n", + "while count < max_gen_len:\n", + " input_ids = tokenizer.encode(gen_sentence, return_tensors='tf')\n", + " shape = input_ids.shape.as_list()\n", + " payload = {\n", + " \"inputs\": [\n", + " {\"name\": \"input_ids:0\",\n", + " \"datatype\": \"INT32\",\n", + " \"shape\": shape,\n", + " \"data\": input_ids.numpy().tolist()\n", + " },\n", + " {\"name\": \"attention_mask:0\",\n", + " \"datatype\": \"INT32\",\n", + " \"shape\": shape,\n", + " \"data\": np.ones(shape, dtype=np.int32).tolist()\n", + " }\n", + " ]\n", + " }\n", + "\n", + " ret = requests.post('http://localhost:80/seldon/seldon/gpt2/v2/models/gpt2/infer', json=payload)\n", + "\n", + " try:\n", + " res = ret.json()\n", + " except:\n", + " continue\n", + "\n", + " # extract logits\n", + " logits = np.array(res[\"outputs\"][1][\"data\"])\n", + " logits = logits.reshape(res[\"outputs\"][1][\"shape\"])\n", + "\n", + " # take the best next token probability of the last token of input ( greedy approach)\n", + " next_token = logits.argmax(axis=2)[0]\n", + " next_token_str = tokenizer.decode(next_token[-1:], skip_special_tokens=True,\n", + " clean_up_tokenization_spaces=True).strip()\n", + " gen_sentence += ' ' + next_token_str\n", + " count += 1\n", + "\n", + "print(f'Input: {input_text}\\nOutput: {gen_sentence}')" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.4" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From a71dfaf43f08b14b80117d69acd22aa789885b77 Mon Sep 17 00:00:00 2001 From: nadinet Date: Sat, 17 Apr 2021 16:53:41 +0100 Subject: [PATCH 2/5] Triton GPT2 example add to docs link and update the notebook --- doc/source/examples/notebooks.rst | 1 + .../examples/triton_gpt2_example.nblink | 3 + examples/triton_gpt2/README.ipynb | 123 +++++++++++++----- 3 files changed, 91 insertions(+), 36 deletions(-) create mode 100644 doc/source/examples/triton_gpt2_example.nblink diff --git a/doc/source/examples/notebooks.rst b/doc/source/examples/notebooks.rst index 77696ab96a..d52c4c1771 100644 --- a/doc/source/examples/notebooks.rst +++ b/doc/source/examples/notebooks.rst @@ -37,6 +37,7 @@ Python Language Wrapper Examples TFserving MNIST Statsmodels Holt-Winter's time-series model Runtime Metrics & Tags + Triton GPT2 Example Specialised Framework Examples ------------------------------ diff --git a/doc/source/examples/triton_gpt2_example.nblink b/doc/source/examples/triton_gpt2_example.nblink new file mode 100644 index 0000000000..6d83f6551b --- /dev/null +++ b/doc/source/examples/triton_gpt2_example.nblink @@ -0,0 +1,3 @@ +{ + "path": "../../../examples/triton_gpt2/README.ipynb" +} diff --git a/examples/triton_gpt2/README.ipynb b/examples/triton_gpt2/README.ipynb index 966af103b7..965a027c16 100644 --- a/examples/triton_gpt2/README.ipynb +++ b/examples/triton_gpt2/README.ipynb @@ -1,14 +1,15 @@ { "cells": [ { - "attachments": {}, "cell_type": "markdown", - "id": "incorporate-present", + "id": "liked-toronto", "metadata": {}, "source": [ "# Pretrainned GPT2 Model Deployment Example\n", "\n", - "In this notebook we will run an example of text generation using GPT2 pretrained model exported from Hugging Face transformers library and deployed with Seldon-Core Triton ..... We will run a greedy algorithem implemantation for next token prediction.\n", + "In this notebook we will run an example of text generation using GPT2 pretrained model, which is exported from HuggingFace transformers library and deployed with Seldon's Triton pre-packed server.\n", + "the implemented example bellow is of the Greedy approach, implemantation for next token prediction.\n", + "\n", "more info: https://huggingface.co/transformers/model_doc/gpt2.html?highlight=gpt2\n", "\n", "## Steps:\n", @@ -18,6 +19,7 @@ "4. Setup seldon in your kubernetes cluster\n", "5. Deploy the ONNX model with Seldon’s prepackaged Triton server.\n", "6. Interact with the model, run a greedy alg example (generate sentance complition)\n", + "7. Cleanup\n", "\n", "## Basic requierments \n", "* Helm v3.0.0+\n", @@ -29,7 +31,7 @@ { "cell_type": "code", "execution_count": null, - "id": "plain-purple", + "id": "korean-reporter", "metadata": {}, "outputs": [], "source": [ @@ -44,7 +46,7 @@ { "cell_type": "code", "execution_count": null, - "id": "dressed-paint", + "id": "assigned-diesel", "metadata": {}, "outputs": [], "source": [ @@ -53,7 +55,7 @@ }, { "cell_type": "markdown", - "id": "willing-greene", + "id": "completed-evaluation", "metadata": {}, "source": [ "### Export 🤗 TFGPT2LMHeadModel pre-trained model and save it locally" @@ -62,7 +64,7 @@ { "cell_type": "code", "execution_count": null, - "id": "moving-monitoring", + "id": "iraqi-million", "metadata": {}, "outputs": [], "source": [ @@ -74,7 +76,7 @@ }, { "cell_type": "markdown", - "id": "expired-burns", + "id": "further-tribute", "metadata": {}, "source": [ "### Convert the TensorFlow saved model to ONNX" @@ -83,7 +85,7 @@ { "cell_type": "code", "execution_count": null, - "id": "institutional-observation", + "id": "irish-mountain", "metadata": {}, "outputs": [], "source": [ @@ -92,7 +94,7 @@ }, { "cell_type": "markdown", - "id": "demographic-bottom", + "id": "sunset-pantyhose", "metadata": {}, "source": [ "### Copy your model to a local MinIo\n", @@ -107,7 +109,7 @@ { "cell_type": "code", "execution_count": null, - "id": "offensive-astronomy", + "id": "lasting-performance", "metadata": {}, "outputs": [], "source": [ @@ -117,7 +119,7 @@ }, { "cell_type": "markdown", - "id": "three-recommendation", + "id": "convinced-syracuse", "metadata": {}, "source": [ "### Run Seldon in your kubernetes cluster\n", @@ -127,7 +129,7 @@ }, { "cell_type": "markdown", - "id": "aware-advocate", + "id": "backed-outreach", "metadata": {}, "source": [ "### Deploy your model with Seldon pre-packaged Triton server" @@ -135,10 +137,18 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "young-apparel", + "execution_count": 3, + "id": "declared-crown", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Overwriting secret.yaml\n" + ] + } + ], "source": [ "%%writefile secret.yaml\n", "\n", @@ -146,7 +156,6 @@ "kind: Secret\n", "metadata:\n", " name: seldon-init-container-secret\n", - " namespace: seldon\n", "type: Opaque\n", "stringData:\n", " AWS_ACCESS_KEY_ID: minioadmin\n", @@ -157,17 +166,24 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "technological-treasure", + "execution_count": 4, + "id": "beneficial-anime", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Overwriting gpt2-deploy.yaml\n" + ] + } + ], "source": [ "%%writefile gpt2-deploy.yaml\n", "apiVersion: machinelearning.seldon.io/v1alpha2\n", "kind: SeldonDeployment\n", "metadata:\n", " name: gpt2\n", - " namespace: seldon\n", "spec:\n", " predictors:\n", " - graph:\n", @@ -176,7 +192,7 @@ " mode: all\n", " modelUri: s3://onnx-gpt2\n", " envSecretRefName: seldon-init-container-secret\n", - " name: gpt2\n", + " name: gpt23\n", " type: MODEL\n", " name: default\n", " replicas: 1\n", @@ -185,10 +201,19 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "stupid-pattern", + "execution_count": 5, + "id": "subjective-involvement", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "secret/seldon-init-container-secret configured\n", + "seldondeployment.machinelearning.seldon.io/gpt2 configured\n" + ] + } + ], "source": [ "!kubectl apply -f secret.yaml\n", "!kubectl apply -f gpt2-deploy.yaml" @@ -196,17 +221,25 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "sonic-peace", + "execution_count": 6, + "id": "demanding-thesaurus", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "deployment \"gpt2-default-0-gpt2\" successfully rolled out\r\n" + ] + } + ], "source": [ "!kubectl rollout status deploy/$(kubectl get deploy -l seldon-deployment-id=gpt2 -o jsonpath='{.items[0].metadata.name}')" ] }, { "cell_type": "markdown", - "id": "cellular-still", + "id": "digital-supervisor", "metadata": {}, "source": [ "#### Interact with the model: get model metadata ( a \"test\" request to make sure model is avilable)" @@ -215,7 +248,7 @@ { "cell_type": "code", "execution_count": 43, - "id": "complete-solomon", + "id": "married-roller", "metadata": {}, "outputs": [ { @@ -271,7 +304,7 @@ }, { "cell_type": "markdown", - "id": "worldwide-tuition", + "id": "anonymous-resource", "metadata": {}, "source": [ "### Run prediction test: generate a sentense complition using GPT2 model - Greedy approach\n" @@ -279,16 +312,16 @@ }, { "cell_type": "code", - "execution_count": 44, - "id": "express-czech", + "execution_count": 7, + "id": "modified-termination", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Input: I enjoy spring in London, especially walking in the parks\n", - "Output: I enjoy spring in London, especially walking in the parks . I love the smell of the roses and the\n" + "Input: I enjoy working in Seldon\n", + "Output: I enjoy working in Seldon 's office , and I 'm glad to see that\n" ] } ], @@ -299,7 +332,7 @@ "from transformers import GPT2Tokenizer\n", "\n", "tokenizer = GPT2Tokenizer.from_pretrained(\"gpt2\")\n", - "input_text = 'I enjoy spring in London, especially walking in the parks'\n", + "input_text = 'I enjoy working in Seldon'\n", "count = 0\n", "max_gen_len = 10\n", "gen_sentence = input_text\n", @@ -341,6 +374,24 @@ "\n", "print(f'Input: {input_text}\\nOutput: {gen_sentence}')" ] + }, + { + "cell_type": "markdown", + "id": "patient-suite", + "metadata": {}, + "source": [ + "### Cleanup" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "pacific-collectible", + "metadata": {}, + "outputs": [], + "source": [ + "!kubectl delete -f gpt2-deploy.yaml" + ] } ], "metadata": { From 0fc37873c49f147617685031ac862230d2e79dc8 Mon Sep 17 00:00:00 2001 From: nadinet Date: Sat, 17 Apr 2021 19:29:26 +0100 Subject: [PATCH 3/5] Remove HF logo from subtitle --- examples/triton_gpt2/README.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/triton_gpt2/README.ipynb b/examples/triton_gpt2/README.ipynb index 965a027c16..7307597c56 100644 --- a/examples/triton_gpt2/README.ipynb +++ b/examples/triton_gpt2/README.ipynb @@ -58,7 +58,7 @@ "id": "completed-evaluation", "metadata": {}, "source": [ - "### Export 🤗 TFGPT2LMHeadModel pre-trained model and save it locally" + "### Export HuggingFace TFGPT2LMHeadModel pre-trained model and save it locally" ] }, { From da988d1feb029b5b5dbf9e80d154e86dfe8785ec Mon Sep 17 00:00:00 2001 From: nadinet Date: Mon, 19 Apr 2021 15:44:55 +0100 Subject: [PATCH 4/5] fix typo --- examples/triton_gpt2/README.ipynb | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/examples/triton_gpt2/README.ipynb b/examples/triton_gpt2/README.ipynb index 7307597c56..a33e8d8a77 100644 --- a/examples/triton_gpt2/README.ipynb +++ b/examples/triton_gpt2/README.ipynb @@ -5,10 +5,9 @@ "id": "liked-toronto", "metadata": {}, "source": [ - "# Pretrainned GPT2 Model Deployment Example\n", + "# Pretrained GPT2 Model Deployment Example\n", "\n", - "In this notebook we will run an example of text generation using GPT2 pretrained model, which is exported from HuggingFace transformers library and deployed with Seldon's Triton pre-packed server.\n", - "the implemented example bellow is of the Greedy approach, implemantation for next token prediction.\n", + "In this notebook we will run an example of text generation using GPT2 pretrained model, which is exported from HuggingFace transformers library and deployed with Seldon's Triton pre-packed server. The implemented example below is of the Greedy approach, an implementation for next token prediction.\n", "\n", "more info: https://huggingface.co/transformers/model_doc/gpt2.html?highlight=gpt2\n", "\n", @@ -16,10 +15,10 @@ "1. Download pretrained GPT2 model from hugging face\n", "2. Convert the model to ONNX\n", "3. Store it in MinIo bucket\n", - "4. Setup seldon in your kubernetes cluster\n", + "4. Setup Seldon-Core in your kubernetes cluster\n", "5. Deploy the ONNX model with Seldon’s prepackaged Triton server.\n", - "6. Interact with the model, run a greedy alg example (generate sentance complition)\n", - "7. Cleanup\n", + "6. Interact with the model, run a greedy alg example (generate sentence completion)\n", + "7. Clean-up\n", "\n", "## Basic requierments \n", "* Helm v3.0.0+\n", @@ -101,7 +100,7 @@ "#### Setup MinIo\n", "Use the provided [notebook](https://docs.seldon.io/projects/seldon-core/en/latest/examples/minio_setup.html) to install MinIo in your cluster and configure `mc` CLI tool. Instructions also [online](https://docs.min.io/docs/minio-client-quickstart-guide.html).\n", "\n", - "-- Note: You can use your prefer remote storage server (google/ AWS and etc.)\n", + "-- Note: You can use your prefer remote storage server (google/ AWS etc.)\n", "\n", "#### Create a Bucket and store your model" ] @@ -192,7 +191,7 @@ " mode: all\n", " modelUri: s3://onnx-gpt2\n", " envSecretRefName: seldon-init-container-secret\n", - " name: gpt23\n", + " name: gpt2\n", " type: MODEL\n", " name: default\n", " replicas: 1\n", @@ -242,7 +241,7 @@ "id": "digital-supervisor", "metadata": {}, "source": [ - "#### Interact with the model: get model metadata ( a \"test\" request to make sure model is avilable)" + "#### Interact with the model: get model metadata (a \"test\" request to make sure our model is available and loaded correctly)" ] }, { @@ -307,7 +306,7 @@ "id": "anonymous-resource", "metadata": {}, "source": [ - "### Run prediction test: generate a sentense complition using GPT2 model - Greedy approach\n" + "### Run prediction test: generate a sentence completion using GPT2 model - Greedy approach\n" ] }, { @@ -380,7 +379,7 @@ "id": "patient-suite", "metadata": {}, "source": [ - "### Cleanup" + "### Clean-up" ] }, { From 598c9e48fe143b510497adfcaf6c35f9b7333672 Mon Sep 17 00:00:00 2001 From: nadinet Date: Tue, 20 Apr 2021 14:34:16 +0100 Subject: [PATCH 5/5] fix typo #2 --- examples/triton_gpt2/README.ipynb | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/examples/triton_gpt2/README.ipynb b/examples/triton_gpt2/README.ipynb index a33e8d8a77..f0437429cb 100644 --- a/examples/triton_gpt2/README.ipynb +++ b/examples/triton_gpt2/README.ipynb @@ -7,7 +7,8 @@ "source": [ "# Pretrained GPT2 Model Deployment Example\n", "\n", - "In this notebook we will run an example of text generation using GPT2 pretrained model, which is exported from HuggingFace transformers library and deployed with Seldon's Triton pre-packed server. The implemented example below is of the Greedy approach, an implementation for next token prediction.\n", + "In this notebook, we will run an example of text generation using GPT2 model exported from HuggingFace and deployed with Seldon's Triton pre-packed server. the example also covers converting the model to ONNX format.\n", + "The implemented example below is of the Greedy approach for the next token prediction.\n", "\n", "more info: https://huggingface.co/transformers/model_doc/gpt2.html?highlight=gpt2\n", "\n", @@ -20,7 +21,7 @@ "6. Interact with the model, run a greedy alg example (generate sentence completion)\n", "7. Clean-up\n", "\n", - "## Basic requierments \n", + "## Basic requirements\n", "* Helm v3.0.0+\n", "* A Kubernetes cluster running v1.13 or above (minkube / docker-for-windows work well if enough RAM)\n", "* kubectl v1.14+\n",