Skip to content

Commit fe7373c

Browse files
keivenchangJason Zhou
authored andcommitted
refactor: centralize Prometheus metrics naming and sanitization DIS-554 (#2733)
Co-authored-by: Keiven Chang <keivenchang@users.noreply.github.com> Signed-off-by: Jason Zhou <jasonzho@jasonzho-mlt.client.nvidia.com>
1 parent 83b4e06 commit fe7373c

File tree

12 files changed

+736
-330
lines changed

12 files changed

+736
-330
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

components/planner/src/dynamo/planner/utils/prometheus.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ def _get_average_metric(
4343
Average metric value or 0 if no data/error
4444
"""
4545
try:
46+
# TODO: use prometheus_names.rs
4647
full_metric_name = f"dynamo_frontend_{metric_name}"
4748
query = f"increase({full_metric_name}_sum[{interval}])/increase({full_metric_name}_count[{interval}])"
4849
result = self.prom.custom_query(query=query)

lib/llm/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,7 @@ reqwest = { workspace = true }
155155
rstest = "0.18.2"
156156
rstest_reuse = "0.7.0"
157157
serial_test = "3"
158+
temp-env = { version = "0.3.6", features = ["async_closure"] }
158159
tempfile = "3.17.1"
159160
insta = { version = "1.41", features = [
160161
"glob",

lib/llm/src/http/service/metrics.rs

Lines changed: 21 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,9 @@
22
// SPDX-License-Identifier: Apache-2.0
33

44
use axum::{Router, extract::State, http::StatusCode, response::IntoResponse, routing::get};
5+
use dynamo_runtime::metrics::prometheus_names::{
6+
frontend_service, name_prefix, sanitize_frontend_prometheus_prefix,
7+
};
58
use prometheus::{Encoder, HistogramOpts, HistogramVec, IntCounterVec, IntGaugeVec, Opts};
69
use std::{
710
sync::Arc,
@@ -12,49 +15,6 @@ pub use prometheus::Registry;
1215

1316
use super::RouteDoc;
1417

15-
// Default metric prefix
16-
pub const FRONTEND_METRIC_PREFIX: &str = "dynamo_frontend";
17-
18-
// Environment variable that overrides the default metric prefix if provided
19-
pub const METRICS_PREFIX_ENV: &str = "DYN_METRICS_PREFIX";
20-
21-
/// Value for the `status` label in the request counter for successful requests
22-
pub const REQUEST_STATUS_SUCCESS: &str = "success";
23-
24-
/// Value for the `status` label in the request counter if the request failed
25-
pub const REQUEST_STATUS_ERROR: &str = "error";
26-
27-
/// Partial value for the `type` label in the request counter for streaming requests
28-
pub const REQUEST_TYPE_STREAM: &str = "stream";
29-
30-
/// Partial value for the `type` label in the request counter for unary requests
31-
pub const REQUEST_TYPE_UNARY: &str = "unary";
32-
33-
fn sanitize_prometheus_prefix(raw: &str) -> String {
34-
// Prometheus metric name pattern: [a-zA-Z_:][a-zA-Z0-9_:]*
35-
let mut s: String = raw
36-
.chars()
37-
.map(|c| {
38-
if c.is_ascii_alphanumeric() || c == '_' || c == ':' {
39-
c
40-
} else {
41-
'_'
42-
}
43-
})
44-
.collect();
45-
46-
if s.is_empty() {
47-
return FRONTEND_METRIC_PREFIX.to_string();
48-
}
49-
50-
let first = s.as_bytes()[0];
51-
let valid_first = first.is_ascii_alphabetic() || first == b'_' || first == b':';
52-
if !valid_first {
53-
s.insert(0, '_');
54-
}
55-
s
56-
}
57-
5818
pub struct Metrics {
5919
request_counter: IntCounterVec,
6020
inflight_gauge: IntGaugeVec,
@@ -67,8 +27,8 @@ pub struct Metrics {
6727

6828
/// RAII object for inflight gauge and request counters
6929
/// If this object is dropped without calling `mark_ok`, then the request will increment
70-
/// the request counter with the `status` label with [`REQUEST_STATUS_ERROR`]; otherwise, it will increment
71-
/// the counter with `status` label [`REQUEST_STATUS_SUCCESS`]
30+
/// the request counter with the `status` label with [`frontend_service::status::ERROR`]; otherwise, it will increment
31+
/// the counter with `status` label [`frontend_service::status::SUCCESS`]
7232
pub struct InflightGuard {
7333
metrics: Arc<Metrics>,
7434
model: String,
@@ -130,7 +90,7 @@ impl Default for Metrics {
13090
}
13191

13292
impl Metrics {
133-
/// Create Metrics with the standard prefix defined by [`FRONTEND_METRIC_PREFIX`] or specify custom prefix via the following environment variable:
93+
/// Create Metrics with the standard prefix defined by [`name_prefix::FRONTEND`] or specify custom prefix via the following environment variable:
13494
/// - `DYN_METRICS_PREFIX`: Override the default metrics prefix
13595
///
13696
/// The following metrics will be created with the configured prefix:
@@ -142,22 +102,22 @@ impl Metrics {
142102
/// - `{prefix}_time_to_first_token_seconds` - HistogramVec for time to first token in seconds
143103
/// - `{prefix}_inter_token_latency_seconds` - HistogramVec for inter-token latency in seconds
144104
pub fn new() -> Self {
145-
let raw_prefix = std::env::var(METRICS_PREFIX_ENV)
146-
.unwrap_or_else(|_| FRONTEND_METRIC_PREFIX.to_string());
147-
let prefix = sanitize_prometheus_prefix(&raw_prefix);
105+
let raw_prefix = std::env::var(frontend_service::METRICS_PREFIX_ENV)
106+
.unwrap_or_else(|_| name_prefix::FRONTEND.to_string());
107+
let prefix = sanitize_frontend_prometheus_prefix(&raw_prefix);
148108
if prefix != raw_prefix {
149109
tracing::warn!(
150110
raw=%raw_prefix,
151111
sanitized=%prefix,
152-
env=%METRICS_PREFIX_ENV,
112+
env=%frontend_service::METRICS_PREFIX_ENV,
153113
"Sanitized HTTP metrics prefix"
154114
);
155115
}
156116
let frontend_metric_name = |suffix: &str| format!("{}_{}", &prefix, suffix);
157117

158118
let request_counter = IntCounterVec::new(
159119
Opts::new(
160-
frontend_metric_name("requests_total"),
120+
frontend_metric_name(frontend_service::REQUESTS_TOTAL),
161121
"Total number of LLM requests processed",
162122
),
163123
&["model", "endpoint", "request_type", "status"],
@@ -166,7 +126,7 @@ impl Metrics {
166126

167127
let inflight_gauge = IntGaugeVec::new(
168128
Opts::new(
169-
frontend_metric_name("inflight_requests"),
129+
frontend_metric_name(frontend_service::INFLIGHT_REQUESTS),
170130
"Number of inflight requests",
171131
),
172132
&["model"],
@@ -177,7 +137,7 @@ impl Metrics {
177137

178138
let request_duration = HistogramVec::new(
179139
HistogramOpts::new(
180-
frontend_metric_name("request_duration_seconds"),
140+
frontend_metric_name(frontend_service::REQUEST_DURATION_SECONDS),
181141
"Duration of LLM requests",
182142
)
183143
.buckets(buckets),
@@ -187,7 +147,7 @@ impl Metrics {
187147

188148
let input_sequence_length = HistogramVec::new(
189149
HistogramOpts::new(
190-
frontend_metric_name("input_sequence_tokens"),
150+
frontend_metric_name(frontend_service::INPUT_SEQUENCE_TOKENS),
191151
"Input sequence length in tokens",
192152
)
193153
.buckets(vec![
@@ -200,7 +160,7 @@ impl Metrics {
200160

201161
let output_sequence_length = HistogramVec::new(
202162
HistogramOpts::new(
203-
frontend_metric_name("output_sequence_tokens"),
163+
frontend_metric_name(frontend_service::OUTPUT_SEQUENCE_TOKENS),
204164
"Output sequence length in tokens",
205165
)
206166
.buckets(vec![
@@ -212,7 +172,7 @@ impl Metrics {
212172

213173
let time_to_first_token = HistogramVec::new(
214174
HistogramOpts::new(
215-
frontend_metric_name("time_to_first_token_seconds"),
175+
frontend_metric_name(frontend_service::TIME_TO_FIRST_TOKEN_SECONDS),
216176
"Time to first token in seconds",
217177
)
218178
.buckets(vec![
@@ -225,7 +185,7 @@ impl Metrics {
225185

226186
let inter_token_latency = HistogramVec::new(
227187
HistogramOpts::new(
228-
frontend_metric_name("inter_token_latency_seconds"),
188+
frontend_metric_name(frontend_service::INTER_TOKEN_LATENCY_SECONDS),
229189
"Inter-token latency in seconds",
230190
)
231191
.buckets(vec![
@@ -422,17 +382,17 @@ impl Endpoint {
422382
impl RequestType {
423383
pub fn as_str(&self) -> &'static str {
424384
match self {
425-
RequestType::Unary => REQUEST_TYPE_UNARY,
426-
RequestType::Stream => REQUEST_TYPE_STREAM,
385+
RequestType::Unary => frontend_service::request_type::UNARY,
386+
RequestType::Stream => frontend_service::request_type::STREAM,
427387
}
428388
}
429389
}
430390

431391
impl Status {
432392
pub fn as_str(&self) -> &'static str {
433393
match self {
434-
Status::Success => REQUEST_STATUS_SUCCESS,
435-
Status::Error => REQUEST_STATUS_ERROR,
394+
Status::Success => frontend_service::status::SUCCESS,
395+
Status::Error => frontend_service::status::ERROR,
436396
}
437397
}
438398
}

lib/llm/tests/http-service.rs

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ use dynamo_llm::http::{
1111
service::{
1212
Metrics,
1313
error::HttpError,
14-
metrics::{Endpoint, FRONTEND_METRIC_PREFIX, RequestType, Status},
14+
metrics::{Endpoint, RequestType, Status},
1515
service_v2::HttpService,
1616
},
1717
};
@@ -24,6 +24,7 @@ use dynamo_llm::protocols::{
2424
completions::{NvCreateCompletionRequest, NvCreateCompletionResponse},
2525
},
2626
};
27+
use dynamo_runtime::metrics::prometheus_names::{frontend_service, name_prefix};
2728
use dynamo_runtime::{
2829
CancellationToken,
2930
engine::AsyncEngineContext,
@@ -350,7 +351,14 @@ async fn test_http_service() {
350351
let families = registry.gather();
351352
let histogram_metric_family = families
352353
.into_iter()
353-
.find(|m| m.get_name() == format!("{}_request_duration_seconds", FRONTEND_METRIC_PREFIX))
354+
.find(|m| {
355+
m.get_name()
356+
== format!(
357+
"{}_{}",
358+
name_prefix::FRONTEND,
359+
frontend_service::REQUEST_DURATION_SECONDS
360+
)
361+
})
354362
.expect("Histogram metric not found");
355363

356364
assert_eq!(

0 commit comments

Comments
 (0)