From e06c963408b56b115b3dcd3d1a195ca9ff188b69 Mon Sep 17 00:00:00 2001
From: Gerard Ryan <git@grdryn.xyz>
Date: Wed, 30 Oct 2024 15:42:28 +0000
Subject: [PATCH] Attempt to work around weird otel metric renaming

For some reason it adds a _total suffix to the end of the series name,
which isn't there in the original.
---
 acm/odh-core/acm-observability/files/uwl_metrics_list.yaml    | 2 ++
 .../grafana-dashboards/edge-inference-health.json             | 4 ++--
 acm/odh-edge/base/files/uwl_metrics_list.yaml                 | 2 ++
 3 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/acm/odh-core/acm-observability/files/uwl_metrics_list.yaml b/acm/odh-core/acm-observability/files/uwl_metrics_list.yaml
index 49bf7d52..1097e4f0 100644
--- a/acm/odh-core/acm-observability/files/uwl_metrics_list.yaml
+++ b/acm/odh-core/acm-observability/files/uwl_metrics_list.yaml
@@ -18,6 +18,8 @@ names:
   - ovms_request_time_us_sum
   - ovms_requests_fail
   - ovms_requests_success
+  - ovms_requests_fail_total
+  - ovms_requests_success_total
   - ovms_streams
   - ovms_wait_for_infer_req_time_us_bucket
   - ovms_wait_for_infer_req_time_us_count
diff --git a/acm/odh-core/acm-observability/grafana-dashboards/edge-inference-health.json b/acm/odh-core/acm-observability/grafana-dashboards/edge-inference-health.json
index 14225ed8..05ee7fb9 100644
--- a/acm/odh-core/acm-observability/grafana-dashboards/edge-inference-health.json
+++ b/acm/odh-core/acm-observability/grafana-dashboards/edge-inference-health.json
@@ -242,7 +242,7 @@
       "targets": [
         {
           "exemplar": true,
-          "expr": "sum(ovms_requests_success{cluster=~\"$cluster\"}) by(namespace)",
+          "expr": "sum(ovms_requests_success_total{cluster=~\"$cluster\"}) by(namespace)",
           "hide": false,
           "interval": "",
           "legendFormat": "{{namespace}} success",
@@ -250,7 +250,7 @@
         },
         {
           "exemplar": true,
-          "expr": "sum(ovms_requests_fail{cluster=~\"$cluster\"}) by(namespace)",
+          "expr": "sum(ovms_requests_fail_total{cluster=~\"$cluster\"}) by(namespace)",
           "hide": false,
           "interval": "",
           "legendFormat": "{{namespace}} fail",
diff --git a/acm/odh-edge/base/files/uwl_metrics_list.yaml b/acm/odh-edge/base/files/uwl_metrics_list.yaml
index 49bf7d52..1097e4f0 100644
--- a/acm/odh-edge/base/files/uwl_metrics_list.yaml
+++ b/acm/odh-edge/base/files/uwl_metrics_list.yaml
@@ -18,6 +18,8 @@ names:
   - ovms_request_time_us_sum
   - ovms_requests_fail
   - ovms_requests_success
+  - ovms_requests_fail_total
+  - ovms_requests_success_total
   - ovms_streams
   - ovms_wait_for_infer_req_time_us_bucket
   - ovms_wait_for_infer_req_time_us_count