Skip to content

Gopalv/refactor #1

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: yifyu/triton
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"import azureml.core\n",
Expand All @@ -46,7 +48,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"from azureml.core.workspace import Workspace\n",
Expand Down Expand Up @@ -93,9 +97,7 @@
"\n",
"if not os.path.exists(target_file):\n",
" response = requests.get(model_url)\n",
" open(target_file, 'wb').write(response.content)\n",
"\n",
"config_file = os.path.join('models', 'triton', 'densenet_onnx', 'config.pbtxt')"
" open(target_file, 'wb').write(response.content)\n"
]
},
{
Expand All @@ -104,39 +106,7 @@
"source": [
"# Add Model Configuration file\n",
"\n",
"Each model needs a [Model Configuration](https://docs.nvidia.com/deeplearning/triton-inference-server/user-guide/docs/model_configuration.html) that provides required and optional information about the model.\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"%%writefile $config_file\n",
"name: \"densenet_onnx\"\n",
"platform: \"onnxruntime_onnx\"\n",
"max_batch_size: 0\n",
"input [\n",
" {\n",
" name: \"data_0\"\n",
" data_type: TYPE_FP32\n",
" format: FORMAT_NCHW\n",
" dims: [ 3, 224, 224 ]\n",
" reshape { shape: [ 1, 3, 224, 224 ] }\n",
" }\n",
"]\n",
"output [\n",
" {\n",
" name: \"fc6_1\"\n",
" data_type: TYPE_FP32\n",
" dims: [ 1000 ]\n",
" reshape { shape: [ 1, 1000, 1, 1 ] }\n",
" label_filename: \"densenet_labels.txt\"\n",
" }\n",
"]"
"Each Triton model needs a [Model Configuration](https://docs.nvidia.com/deeplearning/triton-inference-server/user-guide/docs/model_configuration.html) that provides required and optional information about the model. We've provided one in this directory already.\n"
]
},
{
Expand Down Expand Up @@ -198,9 +168,11 @@
" print(\"Creating new gpu-cluster\")\n",
" \n",
" # Specify the configuration for the new cluster\n",
" compute_config = AksCompute.provisioning_configuration(cluster_purpose=AksCompute.ClusterPurpose.DEV_TEST,\n",
" agent_count=1,\n",
" vm_size=\"Standard_NC6s_v3\")\n",
" compute_config = AksCompute.provisioning_configuration(\n",
" cluster_purpose=AksCompute.ClusterPurpose.DEV_TEST,\n",
" agent_count=1,\n",
" vm_size=\"Standard_NC6\",\n",
" location=\"westus2\")\n",
" \n",
" # Create the cluster with the specified name and configuration\n",
" gpu_cluster = ComputeTarget.create(ws, gpu_cluster_name, compute_config)\n",
Expand All @@ -215,78 +187,11 @@
"source": [
"# Deploy the model as a web service to AKS\n",
"\n",
"First create a scoring script\n",
"First create a scoring script. You can see the one we created for you in the `scripts` directory.\n",
"\n",
"** Note: ** Triton server listens to a fixed local port. You may choose to use the Triton Python [client library](https://docs.nvidia.com/deeplearning/triton-inference-server/master-user-guide/docs/client_library.html) to talk to it, while keeping the flexibility of pre-/post- processing."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"%%writefile score.py\n",
"import numpy as np\n",
"from PIL import Image\n",
"import sys\n",
"from functools import partial\n",
"import os\n",
"import io\n",
"\n",
"import tritonhttpclient\n",
"from tritonclientutils import InferenceServerException\n",
"\n",
"from azureml.contrib.services.aml_request import AMLRequest, rawhttp\n",
"from azureml.contrib.services.aml_response import AMLResponse\n",
"\n",
"sys.path.append(os.path.join(os.getenv('AZUREML_MODEL_DIR'), 'models'))\n",
"from utils import preprocess, postprocess\n",
"\n",
"\n",
"trition_client = None\n",
"_url = \"localhost:8000\"\n",
"_model = \"densenet_onnx\"\n",
"_scaling = \"INCEPTION\"\n",
"\n",
"def init():\n",
" global triton_client, max_batch_size, input_name, output_name, dtype\n",
"\n",
" triton_client = tritonhttpclient.InferenceServerClient(_url)\n",
"\n",
" max_batch_size = 0\n",
" input_name = \"data_0\"\n",
" output_name = \"fc6_1\"\n",
" dtype = \"FP32\"\n",
"\n",
"\n",
"@rawhttp\n",
"def run(request):\n",
" if request.method == 'POST':\n",
" \n",
" reqBody = request.get_data(False)\n",
" img = Image.open(io.BytesIO(reqBody))\n",
" \n",
" image_data = preprocess(img, _scaling, dtype)\n",
" \n",
" input = tritonhttpclient.InferInput(input_name, image_data.shape, dtype)\n",
" input.set_data_from_numpy(image_data, binary_data=True)\n",
" output = tritonhttpclient.InferRequestedOutput(output_name, binary_data=True, class_count=1)\n",
" \n",
" res = triton_client.infer(_model,\n",
" [input],\n",
" request_id=\"0\",\n",
" outputs=[output])\n",
"\n",
" result = postprocess(res, output_name, 1, max_batch_size > 0)\n",
"\n",
" return AMLResponse(result, 200)\n",
" else:\n",
" return AMLResponse(\"bad request\", 500)\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
Expand Down Expand Up @@ -355,8 +260,8 @@
"# Optionally specify a worker count to leverage the capability of concurrency and server-side batching from Triton\n",
"# env.environment_variables = {\"WORKER_COUNT\":\"128\"}\n",
"\n",
"inference_config = InferenceConfig(entry_script=\"score.py\", environment=env)\n",
"aks_config = AksWebservice.deploy_configuration(cpu_cores = 1, memory_gb = 4, gpu_cores = 1)\n",
"inference_config = InferenceConfig(entry_script=\"score.py\", source_directory=\"scripts\", environment=env)\n",
"aks_config = AksWebservice.deploy_configuration(cpu_cores=1, memory_gb=4, gpu_cores=1)\n",
"\n",
"# # Enable token auth and disable (key) auth on the webservice\n",
"# aks_config = AksWebservice.deploy_configuration(cpu_cores = 1, memory_gb = 4, gpu_cores = 1, token_auth_enabled=True, auth_enabled=False)"
Expand Down Expand Up @@ -438,6 +343,17 @@
"model.delete()\n",
"gpu_cluster.delete()\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"aks_service.wait_for_deployment(True)"
]
}
],
"metadata": {
Expand All @@ -447,9 +363,9 @@
}
],
"kernelspec": {
"display_name": "Python 3.6.9 64-bit",
"display_name": "Python 3.7.7 64-bit",
"language": "python",
"name": "python36964bit49f9d8f83b294f2eb4e5a3f7c26b67fb"
"name": "python_defaultSpec_1598590942828"
},
"language_info": {
"codemirror_mode": {
Expand All @@ -461,7 +377,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.9-final"
"version": "3.7.7-final"
}
},
"nbformat": 4,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
import numpy as np
from PIL import Image
import sys
from functools import partial
import os
import io

from azureml.contrib.services.aml_request import AMLRequest, rawhttp
from azureml.contrib.services.aml_response import AMLResponse
from utils import preprocess, postprocess, triton_init, triton_infer


def init():
triton_init()

@rawhttp
def run(request):
if request.method == 'POST':
reqBody = request.get_data(False)
img = Image.open(io.BytesIO(reqBody))
result = triton_infer(model_name="densenet_onnx", img=img)

return AMLResponse(result, 200)
else:
return AMLResponse("bad request", 500)
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import numpy as np
import tritonhttpclient
from PIL import Image
from tritonclientutils import triton_to_np_dtype
from tritonclientutils import InferenceServerException, triton_to_np_dtype


def preprocess(img, scaling, dtype):
Expand Down Expand Up @@ -71,4 +72,27 @@ def postprocess(results, output_name, batch_size, batching):
cls = result.split(':')
output += " {} ({}) = {}".format(cls[0], cls[1], cls[2])

return output
return output

def triton_init(url="localhost:8000"):
global triton_client
triton_client = tritonhttpclient.InferenceServerClient(url)
_url = "localhost:8000"
_model = "densenet_onnx"
_scaling = "INCEPTION"

# TODO: set input_name, output_name, dtype from model metadata, as in
# https://github.com/triton-inference-server/server/blob/master/src/clients/python/examples/image_client.py#L402
def triton_infer(model_name, img, input_name="data_0", output_name="fc6_1",
dtype="FP32"):
image_data = preprocess(img, scaling="INCEPTION", dtype=dtype)
input = tritonhttpclient.InferInput(input_name, image_data.shape, dtype)
input.set_data_from_numpy(image_data, binary_data=True)
output = tritonhttpclient.InferRequestedOutput(output_name, binary_data=True, class_count=1)

res = triton_client.infer(model_name,
[input],
request_id="0",
outputs=[output])

return postprocess(res, output_name=output_name, batch_size=1, batching=False)