Skip to content

Commit 072f489

Browse files
committed
Update frontend metric names, and update metrics.rs to compile
1 parent cff8120 commit 072f489

File tree

5 files changed

+50
-45
lines changed

5 files changed

+50
-45
lines changed

components/metrics/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,4 +38,4 @@ tracing = { workspace = true }
3838
# TODO: Update axum to 0.8
3939
axum = { version = "0.6" }
4040
clap = { version = "4.5", features = ["derive", "env"] }
41-
reqwest = { version = "0.12", default-features = false, features = ["json", "rustls-tls"] }
41+
reqwest = { version = "0.12.22", default-features = false, features = ["json", "rustls-tls"] }

lib/llm/src/http/service/metrics.rs

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,9 @@ pub use prometheus::Registry;
1212

1313
use super::RouteDoc;
1414

15+
/// Metric prefix for all HTTP service metrics
16+
pub const FRONTEND_METRIC_PREFIX: &str = "dynamo_frontend";
17+
1518
/// Value for the `status` label in the request counter for successful requests
1619
pub const REQUEST_STATUS_SUCCESS: &str = "success";
1720

@@ -24,6 +27,11 @@ pub const REQUEST_TYPE_STREAM: &str = "stream";
2427
/// Partial value for the `type` label in the request counter for unary requests
2528
pub const REQUEST_TYPE_UNARY: &str = "unary";
2629

30+
/// Helper function to construct metric names with the standard prefix
31+
fn frontend_metric_name(suffix: &str) -> String {
32+
format!("{}_{}", FRONTEND_METRIC_PREFIX, suffix)
33+
}
34+
2735
pub struct Metrics {
2836
request_counter: IntCounterVec,
2937
inflight_gauge: IntGaugeVec,
@@ -99,7 +107,7 @@ impl Default for Metrics {
99107
}
100108

101109
impl Metrics {
102-
/// Create Metrics with hardcoded "dynamo" prefix
110+
/// Create Metrics with the standard prefix defined by [`FRONTEND_METRIC_PREFIX`]
103111
/// The following metrics will be created:
104112
/// - `dynamo_frontend_requests_total` - IntCounterVec for the total number of requests processed
105113
/// - `dynamo_frontend_inflight_requests` - IntGaugeVec for the number of inflight requests
@@ -111,7 +119,7 @@ impl Metrics {
111119
pub fn new() -> Self {
112120
let request_counter = IntCounterVec::new(
113121
Opts::new(
114-
"dynamo_frontend_requests_total",
122+
frontend_metric_name("requests_total"),
115123
"Total number of LLM requests processed",
116124
),
117125
&["model", "endpoint", "request_type", "status"],
@@ -120,7 +128,7 @@ impl Metrics {
120128

121129
let inflight_gauge = IntGaugeVec::new(
122130
Opts::new(
123-
"dynamo_frontend_inflight_requests",
131+
frontend_metric_name("inflight_requests"),
124132
"Number of inflight requests",
125133
),
126134
&["model"],
@@ -131,7 +139,7 @@ impl Metrics {
131139

132140
let request_duration = HistogramVec::new(
133141
HistogramOpts::new(
134-
"dynamo_frontend_request_duration_seconds",
142+
frontend_metric_name("request_duration_seconds"),
135143
"Duration of LLM requests",
136144
)
137145
.buckets(buckets),
@@ -141,7 +149,7 @@ impl Metrics {
141149

142150
let input_sequence_length = HistogramVec::new(
143151
HistogramOpts::new(
144-
"dynamo_frontend_input_sequence_tokens",
152+
frontend_metric_name("input_sequence_tokens"),
145153
"Input sequence length in tokens",
146154
)
147155
.buckets(vec![
@@ -154,7 +162,7 @@ impl Metrics {
154162

155163
let output_sequence_length = HistogramVec::new(
156164
HistogramOpts::new(
157-
"dynamo_frontend_output_sequence_tokens",
165+
frontend_metric_name("output_sequence_tokens"),
158166
"Output sequence length in tokens",
159167
)
160168
.buckets(vec![
@@ -166,7 +174,7 @@ impl Metrics {
166174

167175
let time_to_first_token = HistogramVec::new(
168176
HistogramOpts::new(
169-
"dynamo_frontend_time_to_first_token_seconds",
177+
frontend_metric_name("time_to_first_token_seconds"),
170178
"Time to first token in seconds",
171179
)
172180
.buckets(vec![
@@ -179,7 +187,7 @@ impl Metrics {
179187

180188
let inter_token_latency = HistogramVec::new(
181189
HistogramOpts::new(
182-
"dynamo_frontend_inter_token_latency_seconds",
190+
frontend_metric_name("inter_token_latency_seconds"),
183191
"Inter-token latency in seconds",
184192
)
185193
.buckets(vec![

lib/llm/tests/http-service.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ use dynamo_llm::http::{
2222
},
2323
service::{
2424
error::HttpError,
25-
metrics::{Endpoint, RequestType, Status},
25+
metrics::{Endpoint, FRONTEND_METRIC_PREFIX, RequestType, Status},
2626
service_v2::HttpService,
2727
Metrics,
2828
},
@@ -357,7 +357,7 @@ async fn test_http_service() {
357357
let families = registry.gather();
358358
let histogram_metric_family = families
359359
.into_iter()
360-
.find(|m| m.get_name() == "dynamo_frontend_request_duration_seconds")
360+
.find(|m| m.get_name() == &format!("{}_request_duration_seconds", FRONTEND_METRIC_PREFIX))
361361
.expect("Histogram metric not found");
362362

363363
assert_eq!(

lib/runtime/src/http_server.rs

Lines changed: 4 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -65,25 +65,7 @@ impl Clone for HttpServerInfo {
6565
}
6666
}
6767

68-
pub struct HttpMetricsRegistry {
69-
pub drt: Arc<crate::DistributedRuntime>,
70-
}
7168

72-
impl crate::traits::DistributedRuntimeProvider for HttpMetricsRegistry {
73-
fn drt(&self) -> &crate::DistributedRuntime {
74-
&self.drt
75-
}
76-
}
77-
78-
impl MetricsRegistry for HttpMetricsRegistry {
79-
fn basename(&self) -> String {
80-
"http_server".to_string()
81-
}
82-
83-
fn parent_hierarchy(&self) -> Vec<String> {
84-
[self.drt().parent_hierarchy(), vec![self.drt().basename()]].concat()
85-
}
86-
}
8769

8870
/// HTTP server state containing metrics and uptime tracking
8971
pub struct HttpServerState {
@@ -96,10 +78,9 @@ pub struct HttpServerState {
9678
impl HttpServerState {
9779
/// Create new HTTP server state with the provided metrics registry
9880
pub fn new(drt: Arc<crate::DistributedRuntime>) -> anyhow::Result<Self> {
99-
let http_metrics_registry = Arc::new(HttpMetricsRegistry { drt: drt.clone() });
10081
// Note: This metric is created at the DRT level (no namespace), so we manually add "dynamo_" prefix
10182
// to maintain consistency with the project's metric naming convention
102-
let uptime_gauge = http_metrics_registry.as_ref().create_gauge(
83+
let uptime_gauge = drt.as_ref().create_gauge(
10384
"dynamo_uptime_seconds",
10485
"Total uptime of the DistributedRuntime in seconds",
10586
&[],
@@ -370,7 +351,7 @@ mod tests {
370351
let expected = "\
371352
# HELP dynamo_uptime_seconds Total uptime of the DistributedRuntime in seconds
372353
# TYPE dynamo_uptime_seconds gauge
373-
dynamo_uptime_seconds{namespace=\"http_server\"} 42
354+
dynamo_uptime_seconds 42
374355
";
375356
assert_eq!(response, expected);
376357
}
@@ -445,8 +426,8 @@ dynamo_uptime_seconds{namespace=\"http_server\"} 42
445426
let tracestate_value = "vendor1=opaqueValue1,vendor2=opaqueValue2";
446427
let mut headers = reqwest::header::HeaderMap::new();
447428
headers.insert(
448-
reqwest::header::HeaderName.from_static("traceparent"),
449-
reqwest::header::HeaderValue.from_str(traceparent_value)?,
429+
reqwest::header::HeaderName::from_static("traceparent"),
430+
reqwest::header::HeaderValue::from_str(traceparent_value).unwrap(),
450431
);
451432
let url = format!("http://{}{}", addr, path);
452433
let response = client.get(&url).send().await.unwrap();

lib/runtime/src/metrics.rs

Lines changed: 27 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -645,6 +645,7 @@ mod tests {
645645
mod test_prefixes {
646646
use super::create_test_drt;
647647
use super::*;
648+
use prometheus::core::Collector;
648649

649650
#[test]
650651
fn test_hierarchical_prefixes_and_parent_hierarchies() {
@@ -810,17 +811,32 @@ mod test_prefixes {
810811
);
811812
println!("Invalid namespace prefix: '{}'", invalid_namespace.prefix());
812813

813-
// Try to create a metric - this should fail because the namespace name will be used in the metric name
814+
// Try to create a metric - this should succeed because the namespace name will be sanitized
814815
let result = invalid_namespace.create_counter("test_counter", "A test counter", &[]);
815816
println!("Result with invalid namespace 'test-namespace':");
816817
println!("{:?}", result);
817818

818-
// The result should be an error from Prometheus
819+
// The result should succeed because invalid names are sanitized
819820
assert!(
820-
result.is_err(),
821-
"Creating metric with invalid namespace should fail"
821+
result.is_ok(),
822+
"Creating metric with invalid namespace should succeed after sanitization"
822823
);
823824

825+
// Verify the metric name was sanitized (hyphen removed)
826+
if let Ok(counter) = &result {
827+
let metric_name = counter.desc()[0].fq_name.as_str();
828+
assert!(
829+
metric_name.contains("testnamespace"),
830+
"Metric name should contain sanitized namespace 'testnamespace', got: {}",
831+
metric_name
832+
);
833+
assert!(
834+
!metric_name.contains("test-namespace"),
835+
"Metric name should not contain unsanitized namespace 'test-namespace', got: {}",
836+
metric_name
837+
);
838+
}
839+
824840
// For comparison, show a valid namespace works
825841
let valid_namespace = drt.namespace("test_namespace").unwrap();
826842
let valid_result = valid_namespace.create_counter("test_counter", "A test counter", &[]);
@@ -926,15 +942,15 @@ testnamespace_testgauge{{component="testcomponent",namespace="testnamespace"}} 5
926942
println!("{}", namespace_output);
927943

928944
let expected_namespace_output = format!(
929-
r#"# HELP testintcounter A test int counter
930-
# TYPE testintcounter counter
931-
testintcounter{{namespace="testnamespace"}} 12345
932-
# HELP testnamespace_testcounter A test counter
945+
r#"# HELP testnamespace_testcounter A test counter
933946
# TYPE testnamespace_testcounter counter
934947
testnamespace_testcounter{{component="testcomponent",endpoint="testendpoint",namespace="testnamespace"}} 123.456789
935948
# HELP testnamespace_testgauge A test gauge
936949
# TYPE testnamespace_testgauge gauge
937950
testnamespace_testgauge{{component="testcomponent",namespace="testnamespace"}} 50000
951+
# HELP testnamespace_testintcounter A test int counter
952+
# TYPE testnamespace_testintcounter counter
953+
testnamespace_testintcounter{{namespace="testnamespace"}} 12345
938954
"#
939955
);
940956

@@ -1015,9 +1031,6 @@ testhistogram_bucket{{le="10"}} 3
10151031
testhistogram_bucket{{le="+Inf"}} 3
10161032
testhistogram_sum 7.5
10171033
testhistogram_count 3
1018-
# HELP testintcounter A test int counter
1019-
# TYPE testintcounter counter
1020-
testintcounter{{namespace="testnamespace"}} 12345
10211034
# HELP testintgauge A test int gauge
10221035
# TYPE testintgauge gauge
10231036
testintgauge 42
@@ -1031,6 +1044,9 @@ testnamespace_testcounter{{component="testcomponent",endpoint="testendpoint",nam
10311044
# HELP testnamespace_testgauge A test gauge
10321045
# TYPE testnamespace_testgauge gauge
10331046
testnamespace_testgauge{{component="testcomponent",namespace="testnamespace"}} 50000
1047+
# HELP testnamespace_testintcounter A test int counter
1048+
# TYPE testnamespace_testintcounter counter
1049+
testnamespace_testintcounter{{namespace="testnamespace"}} 12345
10341050
"#
10351051
);
10361052

0 commit comments

Comments
 (0)