diff --git a/examples/triton_gpt2/GPT2-ONNX-Azure.ipynb b/examples/triton_gpt2/GPT2-ONNX-Azure.ipynb
index 500c289296..2c3237d432 100644
--- a/examples/triton_gpt2/GPT2-ONNX-Azure.ipynb
+++ b/examples/triton_gpt2/GPT2-ONNX-Azure.ipynb
@@ -15,14 +15,16 @@
"\n",
"\n",
"## Steps:\n",
- "1. Download pretrained GPT2 model from hugging face\n",
- "2. Convert the model to ONNX\n",
- "3. Store it in MinIo bucket\n",
- "4. Setup Seldon-Core in your kubernetes cluster\n",
- "5. Deploy the ONNX model with Seldon’s prepackaged Triton server.\n",
- "6. Interact with the model, run a greedy alg example (generate sentence completion)\n",
- "7. Run load test using vegeta\n",
- "8. Clean-up\n",
+ "- [Download pretrained GPT2 model from hugging face](#hf)\n",
+ "- [Convert the model to ONNX](#onnx)\n",
+ "- [Store model in Azure Storage Blob](#blob)\n",
+ "- [Create PersistentVolume and PVC](#pv) mounting Azure Storage Blob\n",
+ "- [Setup Seldon-Core](#seldon) in your kubernetes cluster\n",
+ "- [Deploy the ONNX model](#sd) with Seldon’s prepackaged Triton server.\n",
+ "- [Run model inference](#infer), run a greedy alg example (generate sentence completion)\n",
+ "- [Monitor model with Azure Monitor](#azuremonitor)\n",
+ "- [Run load test using vegeta](#vegeta)\n",
+ "- [Clean-up](#cleanup)\n",
"\n",
"## Basic requirements\n",
"* Helm v3.0.0+\n",
@@ -63,7 +65,7 @@
"id": "completed-evaluation",
"metadata": {},
"source": [
- "### Export HuggingFace TFGPT2LMHeadModel pre-trained model and save it locally"
+ "### Export HuggingFace TFGPT2LMHeadModel pre-trained model and save it locally "
]
},
{
@@ -84,7 +86,7 @@
"id": "further-tribute",
"metadata": {},
"source": [
- "### Convert the TensorFlow saved model to ONNX"
+ "### Convert the TensorFlow saved model to ONNX "
]
},
{
@@ -100,7 +102,8 @@
{
"source": [
"## Azure Setup\n",
- "We have provided Azure Setup Notebook that deploys AKS cluster, Azure storage account and installs Azure Blob CSI driver. If AKS cluster already exists skip to creation of Blob Storage and CSI driver installtion steps."
+ "We have provided [Azure Setup Notebook](./AzureSetup.ipynb) that deploys AKS cluster, Azure storage account and installs Azure Blob CSI driver. If AKS cluster already exists skip to creation of Blob Storage and CSI driver installtion steps. Upon completion of Azure setup following infrastructure will be created:\n",
+ "![Azure](./azure.jpg)"
],
"cell_type": "markdown",
"metadata": {}
@@ -123,7 +126,7 @@
"id": "sunset-pantyhose",
"metadata": {},
"source": [
- "### Copy your model to Azure Blob\n"
+ "### Copy your model to Azure Blob \n"
]
},
{
@@ -171,7 +174,7 @@
},
{
"source": [
- "## Add Azure PersistentVolume and Claim \n",
+ "## Add Azure PersistentVolume and Claim \n",
"For more details on creating PersistentVolume using CSI driver refer to https://github.com/kubernetes-sigs/blob-csi-driver/blob/master/deploy/example/e2e_usage.md\n",
" - Create secret\n",
" - Create PersistentVolume pointing to secret and Blob Container Name and `mountOptions` specifying user id for non-root containers \n",
@@ -292,7 +295,7 @@
"id": "convinced-syracuse",
"metadata": {},
"source": [
- "### Run Seldon in your kubernetes cluster\n",
+ "### Run Seldon in your kubernetes cluster \n",
"\n",
"Follow the [Seldon-Core Setup notebook](https://docs.seldon.io/projects/seldon-core/en/latest/examples/seldon_core_setup.html) to Setup a cluster with Istio Ingress and install Seldon Core"
]
@@ -302,7 +305,7 @@
"id": "backed-outreach",
"metadata": {},
"source": [
- "### Deploy your model with Seldon pre-packaged Triton server"
+ "### Deploy your model with Seldon pre-packaged Triton server "
]
},
{
@@ -324,8 +327,11 @@
"apiVersion: machinelearning.seldon.io/v1alpha2\n",
"kind: SeldonDeployment\n",
"metadata:\n",
- " name: gpt2\n",
+ " name: gpt2gpu\n",
"spec:\n",
+ " annotations:\n",
+ " prometheus.io/port: \"8002\" # we will explain below in Monitoring section\n",
+ " prometheus.io/path: \"/metrics\"\n",
" predictors:\n",
" - componentSpecs:\n",
" - spec:\n",
@@ -333,13 +339,18 @@
" - name: gpt2\n",
" resources:\n",
" requests:\n",
- " memory: 750Mi\n",
- " cpu: 2\n",
- " #nvidia.com/gpu: 1 \n",
- " limits:\n",
" memory: 2Gi\n",
" cpu: 2\n",
- " #nvidia.com/gpu: 1 \n",
+ " nvidia.com/gpu: 1 \n",
+ " limits:\n",
+ " memory: 4Gi\n",
+ " cpu: 4\n",
+ " nvidia.com/gpu: 1 \n",
+ " tolerations:\n",
+ " - key: \"nvidia.com\" # to be able to run in GPU Nodepool\n",
+ " operator: \"Equal\"\n",
+ " value: \"gpu\"\n",
+ " effect: \"NoSchedule\" \n",
" graph:\n",
" implementation: TRITON_SERVER\n",
" logger:\n",
@@ -373,7 +384,7 @@
},
{
"cell_type": "code",
- "execution_count": 10,
+ "execution_count": 3,
"id": "demanding-thesaurus",
"metadata": {},
"outputs": [
@@ -381,13 +392,12 @@
"output_type": "stream",
"name": "stdout",
"text": [
- "Waiting for deployment \"gpt2-default-0-gpt2\" rollout to finish: 0 of 1 updated replicas are available...\n",
- "error: deployment \"gpt2-default-0-gpt2\" exceeded its progress deadline\n"
+ "deployment \"gpt2gpu-default-0-gpt2\" successfully rolled out\n"
]
}
],
"source": [
- "!kubectl rollout status deploy/$(kubectl get deploy -l seldon-deployment-id=gpt2 -o jsonpath='{.items[0].metadata.name}')"
+ "!kubectl rollout status deploy/$(kubectl get deploy -l seldon-deployment-id=gpt2gpu -o jsonpath='{.items[0].metadata.name}')"
]
},
{
@@ -438,8 +448,6 @@
"ingress_ip=!(kubectl get svc --namespace istio-system istio-ingressgateway -o jsonpath='{.status.loadBalancer.ingress[0].ip}')\n",
"ingress_ip = ingress_ip[0]\n",
"\n",
- "#!curl -v http://{ingress_ip}:80/seldon/default/gpt2/v2/models/gpt2\n",
- "\n",
"!curl -v http://{ingress_ip}:80/seldon/default/gpt2gpu/v2/models/gpt2"
]
},
@@ -448,12 +456,12 @@
"id": "anonymous-resource",
"metadata": {},
"source": [
- "### Run prediction test: generate a sentence completion using GPT2 model - Greedy approach\n"
+ "### Run prediction test: generate a sentence completion using GPT2 model - Greedy approach \n"
]
},
{
"cell_type": "code",
- "execution_count": 2,
+ "execution_count": 11,
"id": "modified-termination",
"metadata": {},
"outputs": [
@@ -537,12 +545,110 @@
"print(f'Input: {input_text}\\nOutput: {gen_sentence}')"
]
},
+ {
+ "source": [
+ "## Configure Model Monitoring with Azure Monitor \n",
+ "The Azure Monitor Containers Insights provides functionality to allow collecting data from any Prometheus endpoints. It removes the need to install and operate Prometheus server and manage the monitoring data as Azure Monitor provides centralized point for collecting, displaying and alerting on monitoring data. To turn on Azure Monitor Container Insights follow steps described [here](https://docs.microsoft.com/en-us/azure/azure-monitor/containers/container-insights-onboard) and you should that you have an “omsagent” pod running."
+ ],
+ "cell_type": "markdown",
+ "metadata": {}
+ },
+ {
+ "source": [
+ "!kubectl get pods -n kube-system | grep omsagent"
+ ],
+ "cell_type": "code",
+ "metadata": {},
+ "execution_count": 5,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "omsagent-27lk7 1/1 Running 3 12d\nomsagent-7q49d 1/1 Running 3 12d\nomsagent-9slf6 1/1 Running 3 12d\nomsagent-kzbkr 1/1 Running 3 12d\nomsagent-q85hk 1/1 Running 3 12d\nomsagent-rs-5976fbdc8b-rgxs4 1/1 Running 0 8d\nomsagent-tpkq2 1/1 Running 3 12d\n"
+ ]
+ }
+ ]
+ },
+ {
+ "source": [
+ "### Configure Prometheus Metrics scraping\n",
+ "Once `omsagent` is running we need to configure it to collect metrics from Prometheus endpoints. Azure Monitor Containers Insights allows configuration to be applied on a cluster or node-wide scope and configure endpoints for monitoring on one of the following ways:\n",
+ "- Provide an array of URLs \n",
+ "- Provide an Array of Kubernetes services\n",
+ "- Enable monitoring of any pods with Prometheus annotations\n",
+ "For more details on how to configure the scraping endpoints and query collected data refer to [MS Docs on Configure scraping of Prometheus metrics with Container insights](https://docs.microsoft.com/en-us/azure/azure-monitor/containers/container-insights-prometheus-integration)\n",
+ "\n",
+ "Our deployed model metrics are availble from couple infrasture layers - [Seldon model orchestrator metrics](https://docs.seldon.io/projects/seldon-core/en/latest/analytics/analytics.html) and [Nvidia Triton Server Metrics](https://github.com/triton-inference-server/server/blob/main/docs/metrics.md). To enable scraping for both endpoints we updated Microsoft provided default `ConfigMap` that configures `omsagent` [azure-metrics-cm.yaml](./azure-metrics-cm.yaml):\n",
+ "- **Triton Server:** update `monitor_kubernetes_pods = true` to enable scrapting for Pods with `prometheus.io` annotations\n",
+ " In SeldonDeployment shown above `prometheus.io/path` and `prometheus.io/port` point to default Triton metrics endpoint\n",
+ "- **Seldon Orchestrator:** add our deployed model seldon service endpoint to list of Kubernetes services to be scraped: \n",
+ " ```yaml\n",
+ " kubernetes_services = [\"http://gpt2gpu-default.default:8000/prometheus\"]\n",
+ " ``` "
+ ],
+ "cell_type": "markdown",
+ "metadata": {}
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "!kubectl apply -f azure-metrics-cm.yaml"
+ ]
+ },
+ {
+ "source": [
+ "## Query and Visualize collected data\n",
+ "Collected metrics are available in Logs blade of Azure Monitor in a table **InsightsMetrics**, you could see all metrics gathered by running query\n",
+ "\n",
+ "```yaml\n",
+ "InsightsMetrics\n",
+ "| where Namespace == \"prometheus\" \n",
+ "```\n",
+ "\n",
+ "To get Model Inference Requests per minute from Seldon Metrics run the following query and pin it to Dashboard or add to Azure Monitor Workbook:\n",
+ "\n",
+ "```yaml\n",
+ "InsightsMetrics \n",
+ "| where Namespace == \"prometheus\"\n",
+ "| where Name == \"seldon_api_executor_server_requests_seconds_count\"\n",
+ "| extend Model = parse_json(Tags).deployment_name\n",
+ "| where parse_json(Tags).service == \"predictions\" \n",
+ "| order by TimeGenerated asc \n",
+ "| extend RequestsPerMin = Val - prev(Val,1)\n",
+ "| project TimeGenerated, RequestsPerMin\n",
+ "| render areachart \n",
+ "```\n",
+ "\n",
+ "\n",
+ "To get Inference Duration from Triton Metrics:\n",
+ "\n",
+ "```yaml\n",
+ "InsightsMetrics \n",
+ "| where Namespace == \"prometheus\"\n",
+ "| where Name in (\"nv_inference_request_duration_us\")\n",
+ "| order by TimeGenerated asc\n",
+ "| extend QueueDurationSec = (Val - prev(Val, 1)) / 1000\n",
+ "| project TimeGenerated, Name, QueueDurationSec\n",
+ "| render areachart \n",
+ "```\n",
+ "\n",
+ "Here is example dashboard we created using queries above\n",
+ "\n",
+ "![dashboard](./azuredashboard.jpg) \n"
+ ],
+ "cell_type": "markdown",
+ "metadata": {}
+ },
{
"cell_type": "markdown",
"id": "colored-status",
"metadata": {},
"source": [
- "### Run Load Test / Performance Test using vegeta"
+ "### Run Load Test / Performance Test using vegeta "
]
},
{
@@ -675,7 +781,7 @@
"id": "patient-suite",
"metadata": {},
"source": [
- "### Clean-up"
+ "### Clean-up "
]
},
{
@@ -712,4 +818,4 @@
},
"nbformat": 4,
"nbformat_minor": 5
-}
+}
\ No newline at end of file
diff --git a/examples/triton_gpt2/SeldonMetricsDashboard.json b/examples/triton_gpt2/SeldonMetricsDashboard.json
new file mode 100644
index 0000000000..6c56292699
--- /dev/null
+++ b/examples/triton_gpt2/SeldonMetricsDashboard.json
@@ -0,0 +1,550 @@
+{
+ "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#",
+ "contentVersion": "1.0.0.0",
+ "parameters": {
+ "clusterId": {
+ "type": "string",
+ "defaultValue":"/subscriptions/xxxx/resourcegroups/seldon/providers/Microsoft.ContainerService/managedClusters/modeltests"
+
+ }
+ },
+ "resources": [
+ {
+ "properties": {
+ "lenses": {
+ "0": {
+ "order": 0,
+ "parts": {
+ "0": {
+ "position": {
+ "x": 0,
+ "y": 0,
+ "colSpan": 5,
+ "rowSpan": 4
+ },
+ "metadata": {
+ "inputs": [
+ {
+ "name": "resourceTypeMode",
+ "isOptional": true
+ },
+ {
+ "name": "ComponentId",
+ "isOptional": true
+ },
+ {
+ "name": "Scope",
+ "value": {
+ "resourceIds": [
+ "[parameters('clusterId')]"
+ ]
+ },
+ "isOptional": true
+ },
+ {
+ "name": "PartId",
+ "value": "00294c68-30d2-47a8-a14e-154cbf66219a",
+ "isOptional": true
+ },
+ {
+ "name": "Version",
+ "value": "2.0",
+ "isOptional": true
+ },
+ {
+ "name": "TimeRange",
+ "value": "PT1H",
+ "isOptional": true
+ },
+ {
+ "name": "DashboardId",
+ "isOptional": true
+ },
+ {
+ "name": "DraftRequestParameters",
+ "value": {
+ "scope": "hierarchy"
+ },
+ "isOptional": true
+ },
+ {
+ "name": "Query",
+ "value": "InsightsMetrics \n| where Namespace == \"prometheus\"\n| where Name == \"seldon_api_executor_server_requests_seconds_count\"\n| extend Model = parse_json(Tags).deployment_name\n| where parse_json(Tags).service == \"predictions\" \n| order by TimeGenerated asc \n| extend RequestsPerMin = Val - prev(Val,1)\n| project TimeGenerated, RequestsPerMin\n| render areachart \n\n",
+ "isOptional": true
+ },
+ {
+ "name": "ControlType",
+ "value": "FrameControlChart",
+ "isOptional": true
+ },
+ {
+ "name": "SpecificChart",
+ "value": "StackedArea",
+ "isOptional": true
+ },
+ {
+ "name": "PartTitle",
+ "value": "Analytics",
+ "isOptional": true
+ },
+ {
+ "name": "PartSubTitle",
+ "value": "modeltests",
+ "isOptional": true
+ },
+ {
+ "name": "Dimensions",
+ "value": {
+ "xAxis": {
+ "name": "TimeGenerated",
+ "type": "datetime"
+ },
+ "yAxis": [
+ {
+ "name": "RequestsPerMin",
+ "type": "real"
+ }
+ ],
+ "splitBy": [],
+ "aggregation": "Sum"
+ },
+ "isOptional": true
+ },
+ {
+ "name": "LegendOptions",
+ "value": {
+ "isEnabled": true,
+ "position": "Bottom"
+ },
+ "isOptional": true
+ },
+ {
+ "name": "IsQueryContainTimeRange",
+ "value": false,
+ "isOptional": true
+ }
+ ],
+ "type": "Extension/Microsoft_OperationsManagementSuite_Workspace/PartType/LogsDashboardPart",
+ "settings": {},
+ "savedContainerState": {
+ "partTitle": "Analytics",
+ "assetName": "modeltests"
+ }
+ }
+ },
+ "1": {
+ "position": {
+ "x": 5,
+ "y": 0,
+ "colSpan": 6,
+ "rowSpan": 4
+ },
+ "metadata": {
+ "inputs": [
+ {
+ "name": "resourceTypeMode",
+ "isOptional": true
+ },
+ {
+ "name": "ComponentId",
+ "isOptional": true
+ },
+ {
+ "name": "Scope",
+ "value": {
+ "resourceIds": [
+ "/subscriptions/f869415f-5cff-46a3-b728-20659d14d62d/resourcegroups/seldon/providers/Microsoft.ContainerService/managedClusters/modeltests"
+ ]
+ },
+ "isOptional": true
+ },
+ {
+ "name": "PartId",
+ "value": "10181a5a-1191-44ac-a933-57043a27f74a",
+ "isOptional": true
+ },
+ {
+ "name": "Version",
+ "value": "2.0",
+ "isOptional": true
+ },
+ {
+ "name": "TimeRange",
+ "value": "PT1H",
+ "isOptional": true
+ },
+ {
+ "name": "DashboardId",
+ "isOptional": true
+ },
+ {
+ "name": "DraftRequestParameters",
+ "value": {
+ "scope": "hierarchy"
+ },
+ "isOptional": true
+ },
+ {
+ "name": "Query",
+ "value": "InsightsMetrics \n| where Namespace == \"prometheus\"\n| where Name in (\"nv_inference_request_duration_us\")\n| order by TimeGenerated asc\n| extend InferDurationSec = (Val - prev(Val, 1)) / 1000\n| project TimeGenerated, Name, InferDurationSec\n| render areachart \n\n",
+ "isOptional": true
+ },
+ {
+ "name": "ControlType",
+ "value": "FrameControlChart",
+ "isOptional": true
+ },
+ {
+ "name": "SpecificChart",
+ "value": "StackedArea",
+ "isOptional": true
+ },
+ {
+ "name": "PartTitle",
+ "value": "Analytics",
+ "isOptional": true
+ },
+ {
+ "name": "PartSubTitle",
+ "value": "modeltests",
+ "isOptional": true
+ },
+ {
+ "name": "Dimensions",
+ "value": {
+ "xAxis": {
+ "name": "TimeGenerated",
+ "type": "datetime"
+ },
+ "yAxis": [
+ {
+ "name": "InferDurationSec",
+ "type": "real"
+ }
+ ],
+ "splitBy": [
+ {
+ "name": "Name",
+ "type": "string"
+ }
+ ],
+ "aggregation": "Sum"
+ },
+ "isOptional": true
+ },
+ {
+ "name": "LegendOptions",
+ "value": {
+ "isEnabled": true,
+ "position": "Bottom"
+ },
+ "isOptional": true
+ },
+ {
+ "name": "IsQueryContainTimeRange",
+ "value": false,
+ "isOptional": true
+ }
+ ],
+ "type": "Extension/Microsoft_OperationsManagementSuite_Workspace/PartType/LogsDashboardPart",
+ "settings": {},
+ "savedContainerState": {
+ "partTitle": "Analytics",
+ "assetName": "modeltests"
+ }
+ }
+ },
+ "2": {
+ "position": {
+ "x": 0,
+ "y": 4,
+ "colSpan": 5,
+ "rowSpan": 4
+ },
+ "metadata": {
+ "inputs": [
+ {
+ "name": "resourceTypeMode",
+ "isOptional": true
+ },
+ {
+ "name": "ComponentId",
+ "isOptional": true
+ },
+ {
+ "name": "Scope",
+ "value": {
+ "resourceIds": [
+ "[parameters('clusterId')]"
+ ]
+ },
+ "isOptional": true
+ },
+ {
+ "name": "PartId",
+ "value": "61a274fb-2040-4aa2-812f-23dcbe10e4c0",
+ "isOptional": true
+ },
+ {
+ "name": "Version",
+ "value": "2.0",
+ "isOptional": true
+ },
+ {
+ "name": "TimeRange",
+ "value": "PT1H",
+ "isOptional": true
+ },
+ {
+ "name": "DashboardId",
+ "isOptional": true
+ },
+ {
+ "name": "DraftRequestParameters",
+ "value": {
+ "scope": "hierarchy"
+ },
+ "isOptional": true
+ },
+ {
+ "name": "Query",
+ "value": "let Success =\n InsightsMetrics \n | where Namespace == \"prometheus\"\n | where Name in (\"nv_inference_request_success\")\n | order by TimeGenerated asc\n | project TimeGenerated, Name, Successes = Val;\nlet Failure =\n InsightsMetrics \n | where Namespace == \"prometheus\"\n | where Name in (\"nv_inference_request_failure\")\n | order by TimeGenerated asc\n | project TimeGenerated, Name, Failures = Val;\nSuccess\n| join (Failure) on TimeGenerated\n| project TimeGenerated, Successes, Failures\n| render timechart \n\n",
+ "isOptional": true
+ },
+ {
+ "name": "ControlType",
+ "value": "FrameControlChart",
+ "isOptional": true
+ },
+ {
+ "name": "SpecificChart",
+ "value": "Line",
+ "isOptional": true
+ },
+ {
+ "name": "PartTitle",
+ "value": "Analytics",
+ "isOptional": true
+ },
+ {
+ "name": "PartSubTitle",
+ "value": "modeltests",
+ "isOptional": true
+ },
+ {
+ "name": "Dimensions",
+ "value": {
+ "xAxis": {
+ "name": "TimeGenerated",
+ "type": "datetime"
+ },
+ "yAxis": [
+ {
+ "name": "Successes",
+ "type": "real"
+ },
+ {
+ "name": "Failures",
+ "type": "real"
+ }
+ ],
+ "splitBy": [],
+ "aggregation": "Sum"
+ },
+ "isOptional": true
+ },
+ {
+ "name": "LegendOptions",
+ "value": {
+ "isEnabled": true,
+ "position": "Bottom"
+ },
+ "isOptional": true
+ },
+ {
+ "name": "IsQueryContainTimeRange",
+ "value": false,
+ "isOptional": true
+ }
+ ],
+ "type": "Extension/Microsoft_OperationsManagementSuite_Workspace/PartType/LogsDashboardPart",
+ "settings": {},
+ "savedContainerState": {
+ "partTitle": "Analytics",
+ "assetName": "modeltests"
+ }
+ }
+ },
+ "3": {
+ "position": {
+ "x": 5,
+ "y": 4,
+ "colSpan": 6,
+ "rowSpan": 4
+ },
+ "metadata": {
+ "inputs": [
+ {
+ "name": "resourceTypeMode",
+ "isOptional": true
+ },
+ {
+ "name": "ComponentId",
+ "isOptional": true
+ },
+ {
+ "name": "Scope",
+ "value": {
+ "resourceIds": [
+ "[parameters('clusterId')]"
+ ]
+ },
+ "isOptional": true
+ },
+ {
+ "name": "PartId",
+ "value": "07987b60-ca0f-44f0-870e-835a01390d89",
+ "isOptional": true
+ },
+ {
+ "name": "Version",
+ "value": "2.0",
+ "isOptional": true
+ },
+ {
+ "name": "TimeRange",
+ "value": "PT1H",
+ "isOptional": true
+ },
+ {
+ "name": "DashboardId",
+ "isOptional": true
+ },
+ {
+ "name": "DraftRequestParameters",
+ "value": {
+ "scope": "hierarchy"
+ },
+ "isOptional": true
+ },
+ {
+ "name": "Query",
+ "value": "InsightsMetrics \n| where Namespace == \"prometheus\"\n| where Name in (\"nv_inference_queue_duration_us\")\n| order by TimeGenerated asc\n| extend QueueDurationSec = (Val - prev(Val,1))/1000\n| project TimeGenerated, Name, QueueDurationSec\n| render areachart \n\n",
+ "isOptional": true
+ },
+ {
+ "name": "ControlType",
+ "value": "FrameControlChart",
+ "isOptional": true
+ },
+ {
+ "name": "SpecificChart",
+ "value": "StackedArea",
+ "isOptional": true
+ },
+ {
+ "name": "PartTitle",
+ "value": "Analytics",
+ "isOptional": true
+ },
+ {
+ "name": "PartSubTitle",
+ "value": "modeltests",
+ "isOptional": true
+ },
+ {
+ "name": "Dimensions",
+ "value": {
+ "xAxis": {
+ "name": "TimeGenerated",
+ "type": "datetime"
+ },
+ "yAxis": [
+ {
+ "name": "QueueDurationSec",
+ "type": "real"
+ }
+ ],
+ "splitBy": [
+ {
+ "name": "Name",
+ "type": "string"
+ }
+ ],
+ "aggregation": "Sum"
+ },
+ "isOptional": true
+ },
+ {
+ "name": "LegendOptions",
+ "value": {
+ "isEnabled": true,
+ "position": "Bottom"
+ },
+ "isOptional": true
+ },
+ {
+ "name": "IsQueryContainTimeRange",
+ "value": false,
+ "isOptional": true
+ }
+ ],
+ "type": "Extension/Microsoft_OperationsManagementSuite_Workspace/PartType/LogsDashboardPart",
+ "settings": {},
+ "savedContainerState": {
+ "partTitle": "Analytics",
+ "assetName": "modeltests"
+ }
+ }
+ }
+ }
+ }
+ },
+ "metadata": {
+ "model": {
+ "timeRange": {
+ "value": {
+ "relative": {
+ "duration": 24,
+ "timeUnit": 1
+ }
+ },
+ "type": "MsPortalFx.Composition.Configuration.ValueTypes.TimeRange"
+ },
+ "filterLocale": {
+ "value": "en-us"
+ },
+ "filters": {
+ "value": {
+ "MsPortalFx_TimeRange": {
+ "model": {
+ "format": "utc",
+ "granularity": "auto",
+ "relative": "1h"
+ },
+ "displayCache": {
+ "name": "UTC Time",
+ "value": "Past hour"
+ },
+ "filteredPartIds": [
+ "StartboardPart-LogsDashboardPart-6364554f-1af7-4e36-913c-004cf554050f",
+ "StartboardPart-LogsDashboardPart-6364554f-1af7-4e36-913c-004cf5540511",
+ "StartboardPart-LogsDashboardPart-6364554f-1af7-4e36-913c-004cf5540513",
+ "StartboardPart-LogsDashboardPart-6364554f-1af7-4e36-913c-004cf5540515"
+ ]
+ }
+ }
+ }
+ }
+ }
+ },
+ "name": "SeldonMetrics",
+ "type": "Microsoft.Portal/dashboards",
+ "location": "INSERT LOCATION",
+ "tags": {
+ "hidden-title": "SeldonMetrics"
+ },
+ "apiVersion": "2015-08-01-preview"
+ }
+ ]
+}
\ No newline at end of file
diff --git a/examples/triton_gpt2/azure-metrics-cm.yaml b/examples/triton_gpt2/azure-metrics-cm.yaml
new file mode 100644
index 0000000000..ee14ec4869
--- /dev/null
+++ b/examples/triton_gpt2/azure-metrics-cm.yaml
@@ -0,0 +1,140 @@
+kind: ConfigMap
+apiVersion: v1
+data:
+ schema-version:
+ #string.used by agent to parse config. supported versions are {v1}. Configs with other schema versions will be rejected by the agent.
+ v1
+ config-version:
+ #string.used by customer to keep track of this config file's version in their source control/repository (max allowed 10 chars, other chars will be truncated)
+ ver1
+ log-data-collection-settings: |-
+ # Log data collection settings
+ # Any errors related to config map settings can be found in the KubeMonAgentEvents table in the Log Analytics workspace that the cluster is sending data to.
+
+ [log_collection_settings]
+ [log_collection_settings.stdout]
+ # In the absense of this configmap, default value for enabled is true
+ enabled = true
+ # exclude_namespaces setting holds good only if enabled is set to true
+ # kube-system log collection is disabled by default in the absence of 'log_collection_settings.stdout' setting. If you want to enable kube-system, remove it from the following setting.
+ # If you want to continue to disable kube-system log collection keep this namespace in the following setting and add any other namespace you want to disable log collection to the array.
+ # In the absense of this configmap, default value for exclude_namespaces = ["kube-system"]
+ exclude_namespaces = ["kube-system"]
+
+ [log_collection_settings.stderr]
+ # Default value for enabled is true
+ enabled = true
+ # exclude_namespaces setting holds good only if enabled is set to true
+ # kube-system log collection is disabled by default in the absence of 'log_collection_settings.stderr' setting. If you want to enable kube-system, remove it from the following setting.
+ # If you want to continue to disable kube-system log collection keep this namespace in the following setting and add any other namespace you want to disable log collection to the array.
+ # In the absense of this cofigmap, default value for exclude_namespaces = ["kube-system"]
+ exclude_namespaces = ["kube-system"]
+
+ [log_collection_settings.env_var]
+ # In the absense of this configmap, default value for enabled is true
+ enabled = true
+ [log_collection_settings.enrich_container_logs]
+ # In the absense of this configmap, default value for enrich_container_logs is false
+ enabled = false
+ # When this is enabled (enabled = true), every container log entry (both stdout & stderr) will be enriched with container Name & container Image
+ [log_collection_settings.collect_all_kube_events]
+ # In the absense of this configmap, default value for collect_all_kube_events is false
+ # When the setting is set to false, only the kube events with !normal event type will be collected
+ enabled = false
+ # When this is enabled (enabled = true), all kube events including normal events will be collected
+
+ prometheus-data-collection-settings: |-
+ # Custom Prometheus metrics data collection settings
+ [prometheus_data_collection_settings.cluster]
+ # Cluster level scrape endpoint(s). These metrics will be scraped from agent's Replicaset (singleton)
+ # Any errors related to prometheus scraping can be found in the KubeMonAgentEvents table in the Log Analytics workspace that the cluster is sending data to.
+
+ #Interval specifying how often to scrape for metrics. This is duration of time and can be specified for supporting settings by combining an integer value and time unit as a string value. Valid time units are ns, us (or µs), ms, s, m, h.
+ interval = "1m"
+
+ ## Uncomment the following settings with valid string arrays for prometheus scraping
+ #fieldpass = ["metric_to_pass1", "metric_to_pass12"]
+
+ #fielddrop = ["metric_to_drop"]
+
+ # An array of urls to scrape metrics from.
+ # urls = ["http://myurl:9101/metrics"]
+
+ # An array of Kubernetes services to scrape metrics from.
+ kubernetes_services = ["http://gpt2gpu-default.default:8000/prometheus"]
+
+ # When monitor_kubernetes_pods = true, replicaset will scrape Kubernetes pods for the following prometheus annotations:
+ # - prometheus.io/scrape: Enable scraping for this pod
+ # - prometheus.io/scheme: If the metrics endpoint is secured then you will need to
+ # set this to `https` & most likely set the tls config.
+ # - prometheus.io/path: If the metrics path is not /metrics, define it with this annotation.
+ # - prometheus.io/port: If port is not 9102 use this annotation
+ monitor_kubernetes_pods = true
+
+ ## Restricts Kubernetes monitoring to namespaces for pods that have annotations set and are scraped using the monitor_kubernetes_pods setting.
+ ## This will take effect when monitor_kubernetes_pods is set to true
+ ## ex: monitor_kubernetes_pods_namespaces = ["default1", "default2", "default3"]
+ monitor_kubernetes_pods_namespaces = ["default","seldon-system", "istio-system"]
+
+ ## Label selector to target pods which have the specified label
+ ## This will take effect when monitor_kubernetes_pods is set to true
+ ## Reference the docs at https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/#label-selectors
+ # kubernetes_label_selector = "env=dev,app=nginx"
+
+ ## Field selector to target pods which have the specified field
+ ## This will take effect when monitor_kubernetes_pods is set to true
+ ## Reference the docs at https://kubernetes.io/docs/concepts/overview/working-with-objects/field-selectors/
+ ## eg. To scrape pods on a specific node
+ # kubernetes_field_selector = "spec.nodeName=$HOSTNAME"
+
+ [prometheus_data_collection_settings.node]
+ # Node level scrape endpoint(s). These metrics will be scraped from agent's DaemonSet running in every node in the cluster
+ # Any errors related to prometheus scraping can be found in the KubeMonAgentEvents table in the Log Analytics workspace that the cluster is sending data to.
+
+ #Interval specifying how often to scrape for metrics. This is duration of time and can be specified for supporting settings by combining an integer value and time unit as a string value. Valid time units are ns, us (or µs), ms, s, m, h.
+ interval = "1m"
+
+ ## Uncomment the following settings with valid string arrays for prometheus scraping
+
+ # An array of urls to scrape metrics from. $NODE_IP (all upper case) will substitute of running Node's IP address
+ # urls = ["http://$NODE_IP:9103/metrics"]
+
+ #fieldpass = ["metric_to_pass1", "metric_to_pass12"]
+
+ #fielddrop = ["metric_to_drop"]
+
+ metric_collection_settings: |-
+ # Metrics collection settings for metrics sent to Log Analytics and MDM
+ [metric_collection_settings.collect_kube_system_pv_metrics]
+ # In the absense of this configmap, default value for collect_kube_system_pv_metrics is false
+ # When the setting is set to false, only the persistent volume metrics outside the kube-system namespace will be collected
+ enabled = false
+ # When this is enabled (enabled = true), persistent volume metrics including those in the kube-system namespace will be collected
+
+ alertable-metrics-configuration-settings: |-
+ # Alertable metrics configuration settings for container resource utilization
+ [alertable_metrics_configuration_settings.container_resource_utilization_thresholds]
+ # The threshold(Type Float) will be rounded off to 2 decimal points
+ # Threshold for container cpu, metric will be sent only when cpu utilization exceeds or becomes equal to the following percentage
+ container_cpu_threshold_percentage = 95.0
+ # Threshold for container memoryRss, metric will be sent only when memory rss exceeds or becomes equal to the following percentage
+ container_memory_rss_threshold_percentage = 95.0
+ # Threshold for container memoryWorkingSet, metric will be sent only when memory working set exceeds or becomes equal to the following percentage
+ container_memory_working_set_threshold_percentage = 95.0
+
+ # Alertable metrics configuration settings for persistent volume utilization
+ [alertable_metrics_configuration_settings.pv_utilization_thresholds]
+ # Threshold for persistent volume usage bytes, metric will be sent only when persistent volume utilization exceeds or becomes equal to the following percentage
+ pv_usage_threshold_percentage = 60.0
+
+ # Alertable metrics configuration settings for completed jobs count
+ [alertable_metrics_configuration_settings.job_completion_threshold]
+ # Threshold for completed job count , metric will be sent only for those jobs which were completed earlier than the following threshold
+ job_completion_threshold_time_minutes = 360
+ integrations: |-
+ [integrations.azure_network_policy_manager]
+ collect_basic_metrics = false
+ collect_advanced_metrics = false
+metadata:
+ name: container-azm-ms-agentconfig
+ namespace: kube-system
\ No newline at end of file
diff --git a/examples/triton_gpt2/azure.jpg b/examples/triton_gpt2/azure.jpg
new file mode 100644
index 0000000000..dd68d60323
Binary files /dev/null and b/examples/triton_gpt2/azure.jpg differ
diff --git a/examples/triton_gpt2/azuredashboard.jpg b/examples/triton_gpt2/azuredashboard.jpg
new file mode 100644
index 0000000000..4144716c39
Binary files /dev/null and b/examples/triton_gpt2/azuredashboard.jpg differ