Skip to content

Commit 8d52f2b

Browse files
[ray][metrics] Replace ':' with '_' for OpenTelemetry compatibility in Ray (#25439)
Signed-off-by: Seiji Eicher <seiji@anyscale.com> Signed-off-by: Seiji Eicher <58963096+eicherseiji@users.noreply.github.com> Co-authored-by: Rui Qiao <161574667+ruisearch42@users.noreply.github.com>
1 parent 984d184 commit 8d52f2b

File tree

2 files changed

+57
-1
lines changed

2 files changed

+57
-1
lines changed

tests/v1/metrics/test_ray_metrics.py

Lines changed: 38 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,8 @@
88
from vllm.config import ModelDType
99
from vllm.sampling_params import SamplingParams
1010
from vllm.v1.engine.async_llm import AsyncEngineArgs, AsyncLLM
11-
from vllm.v1.metrics.ray_wrappers import RayPrometheusStatLogger
11+
from vllm.v1.metrics.ray_wrappers import (RayPrometheusMetric,
12+
RayPrometheusStatLogger)
1213

1314

1415
@pytest.fixture(scope="function", autouse=True)
@@ -65,3 +66,39 @@ async def run(self):
6566
# Create the actor and call the async method
6667
actor = EngineTestActor.remote() # type: ignore[attr-defined]
6768
ray.get(actor.run.remote())
69+
70+
71+
def test_sanitized_opentelemetry_name():
72+
"""Test the metric name sanitization logic for Ray."""
73+
74+
# Only a-z, A-Z, 0-9, _, test valid characters are preserved
75+
valid_name = "valid_metric_123_abcDEF"
76+
assert RayPrometheusMetric._get_sanitized_opentelemetry_name(
77+
valid_name) == valid_name
78+
79+
# Test dash, dot, are replaced
80+
name_with_dash_dot = "metric-name.test"
81+
expected = "metric_name_test"
82+
assert RayPrometheusMetric._get_sanitized_opentelemetry_name(
83+
name_with_dash_dot) == expected
84+
85+
# Test colon is replaced with underscore
86+
name_with_colon = "metric:name"
87+
expected = "metric_name"
88+
assert RayPrometheusMetric._get_sanitized_opentelemetry_name(
89+
name_with_colon) == expected
90+
91+
# Test multiple invalid characters are replaced
92+
name_with_invalid = "metric:name@with#special%chars"
93+
expected = "metric_name_with_special_chars"
94+
assert RayPrometheusMetric._get_sanitized_opentelemetry_name(
95+
name_with_invalid) == expected
96+
97+
# Test mixed valid and invalid characters
98+
complex_name = "vllm:engine_stats/time.latency_ms-99p"
99+
expected = "vllm_engine_stats_time_latency_ms_99p"
100+
assert RayPrometheusMetric._get_sanitized_opentelemetry_name(
101+
complex_name) == expected
102+
103+
# Test empty string
104+
assert RayPrometheusMetric._get_sanitized_opentelemetry_name("") == ""

vllm/v1/metrics/ray_wrappers.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from ray.util.metrics import Metric
1212
except ImportError:
1313
ray_metrics = None
14+
import regex as re
1415

1516

1617
class RayPrometheusMetric:
@@ -42,6 +43,21 @@ def labels(self, *labels, **labelskwargs):
4243

4344
return self
4445

46+
@staticmethod
47+
def _get_sanitized_opentelemetry_name(name: str) -> str:
48+
"""
49+
For compatibility with Ray + OpenTelemetry, the metric name must be
50+
sanitized. In particular, this replaces disallowed character (e.g., ':')
51+
with '_' in the metric name.
52+
Allowed characters: a-z, A-Z, 0-9, _
53+
54+
# ruff: noqa: E501
55+
Ref: https://github.com/open-telemetry/opentelemetry-cpp/blob/main/sdk/src/metrics/instrument_metadata_validator.cc#L22-L23
56+
Ref: https://github.com/ray-project/ray/blob/master/src/ray/stats/metric.cc#L107
57+
"""
58+
59+
return re.sub(r"[^a-zA-Z0-9_]", "_", name)
60+
4561

4662
class RayGaugeWrapper(RayPrometheusMetric):
4763
"""Wraps around ray.util.metrics.Gauge to provide same API as
@@ -58,6 +74,7 @@ def __init__(self,
5874
# implemented at the observability layer (Prometheus/Grafana).
5975
del multiprocess_mode
6076
labelnames_tuple = tuple(labelnames) if labelnames else None
77+
name = self._get_sanitized_opentelemetry_name(name)
6178
self.metric = ray_metrics.Gauge(name=name,
6279
description=documentation,
6380
tag_keys=labelnames_tuple)
@@ -79,6 +96,7 @@ def __init__(self,
7996
documentation: Optional[str] = "",
8097
labelnames: Optional[list[str]] = None):
8198
labelnames_tuple = tuple(labelnames) if labelnames else None
99+
name = self._get_sanitized_opentelemetry_name(name)
82100
self.metric = ray_metrics.Counter(name=name,
83101
description=documentation,
84102
tag_keys=labelnames_tuple)
@@ -99,6 +117,7 @@ def __init__(self,
99117
labelnames: Optional[list[str]] = None,
100118
buckets: Optional[list[float]] = None):
101119
labelnames_tuple = tuple(labelnames) if labelnames else None
120+
name = self._get_sanitized_opentelemetry_name(name)
102121
boundaries = buckets if buckets else []
103122
self.metric = ray_metrics.Histogram(name=name,
104123
description=documentation,

0 commit comments

Comments
 (0)