From d1d1884307658620cdd0374d4abcb82eb0c08eb3 Mon Sep 17 00:00:00 2001 From: Ryan Lempka Date: Wed, 13 Aug 2025 00:12:37 +0000 Subject: [PATCH 1/4] feat: enable custom metrics prefix --- .../frontend/src/dynamo/frontend/main.py | 10 +++++ lib/llm/src/http/service/metrics.rs | 43 ++++++++++++------- lib/llm/tests/http_metrics_prefix_default.rs | 43 +++++++++++++++++++ lib/llm/tests/http_metrics_prefix_env.rs | 42 ++++++++++++++++++ 4 files changed, 123 insertions(+), 15 deletions(-) create mode 100644 lib/llm/tests/http_metrics_prefix_default.rs create mode 100644 lib/llm/tests/http_metrics_prefix_env.rs diff --git a/components/frontend/src/dynamo/frontend/main.py b/components/frontend/src/dynamo/frontend/main.py index 06594443bc..8698148ea2 100644 --- a/components/frontend/src/dynamo/frontend/main.py +++ b/components/frontend/src/dynamo/frontend/main.py @@ -133,6 +133,12 @@ def parse_args(): type=validate_model_path, help="Path to model directory on disk (e.g., /tmp/model_cache/lama3.2_1B/)", ) + parser.add_argument( + "--metrics-prefix", + type=str, + default=None, + help="Prefix for Dynamo frontend metrics. If unset, uses DYN_METRICS_PREFIX env var or 'dynamo_frontend'.", + ) flags = parser.parse_args() @@ -146,6 +152,10 @@ async def async_main(): flags = parse_args() is_static = bool(flags.static_endpoint) # true if the string has a value + # Configure Dynamo frontend HTTP service metrics prefix (consumed by Rust via DYN_METRICS_PREFIX) + if flags.metrics_prefix is not None: + os.environ["DYN_METRICS_PREFIX"] = flags.metrics_prefix + runtime = DistributedRuntime(asyncio.get_running_loop(), is_static) if flags.router_mode == "kv": diff --git a/lib/llm/src/http/service/metrics.rs b/lib/llm/src/http/service/metrics.rs index b1e138d778..dd9d7fbf09 100644 --- a/lib/llm/src/http/service/metrics.rs +++ b/lib/llm/src/http/service/metrics.rs @@ -7,14 +7,30 @@ use std::{ sync::Arc, time::{Duration, Instant}, }; +use std::sync::OnceLock; pub use prometheus::Registry; use super::RouteDoc; -/// Metric prefix for all HTTP service metrics +// Default metric prefix pub const FRONTEND_METRIC_PREFIX: &str = "dynamo_frontend"; +// Environment variable that overrides the default metric prefix if provided +pub const METRICS_PREFIX_ENV: &str = "DYN_METRICS_PREFIX"; + +static METRICS_PREFIX: OnceLock = OnceLock::new(); + +fn metrics_prefix() -> &'static str { + METRICS_PREFIX + .get_or_init(|| std::env::var(METRICS_PREFIX_ENV).unwrap_or_else(|_| FRONTEND_METRIC_PREFIX.to_string())) + .as_str() +} + +fn frontend_metric_name(suffix: &str) -> String { + format!("{}_{}", metrics_prefix(), suffix) +} + /// Value for the `status` label in the request counter for successful requests pub const REQUEST_STATUS_SUCCESS: &str = "success"; @@ -27,11 +43,6 @@ pub const REQUEST_TYPE_STREAM: &str = "stream"; /// Partial value for the `type` label in the request counter for unary requests pub const REQUEST_TYPE_UNARY: &str = "unary"; -/// Helper function to construct metric names with the standard prefix -fn frontend_metric_name(suffix: &str) -> String { - format!("{}_{}", FRONTEND_METRIC_PREFIX, suffix) -} - pub struct Metrics { request_counter: IntCounterVec, inflight_gauge: IntGaugeVec, @@ -107,15 +118,17 @@ impl Default for Metrics { } impl Metrics { - /// Create Metrics with the standard prefix defined by [`FRONTEND_METRIC_PREFIX`] - /// The following metrics will be created: - /// - `dynamo_frontend_requests_total` - IntCounterVec for the total number of requests processed - /// - `dynamo_frontend_inflight_requests` - IntGaugeVec for the number of inflight requests - /// - `dynamo_frontend_request_duration_seconds` - HistogramVec for the duration of requests - /// - `dynamo_frontend_input_sequence_tokens` - HistogramVec for input sequence length in tokens - /// - `dynamo_frontend_output_sequence_tokens` - HistogramVec for output sequence length in tokens - /// - `dynamo_frontend_time_to_first_token_seconds` - HistogramVec for time to first token in seconds - /// - `dynamo_frontend_inter_token_latency_seconds` - HistogramVec for inter-token latency in seconds + /// Create Metrics with the standard prefix defined by [`FRONTEND_METRIC_PREFIX`] or specify custom prefix via the following environment variable: + /// - `DYN_METRICS_PREFIX`: Override the default metrics prefix + /// + /// The following metrics will be created with the configured prefix: + /// - `{prefix}_requests_total` - IntCounterVec for the total number of requests processed + /// - `{prefix}_inflight_requests` - IntGaugeVec for the number of inflight requests + /// - `{prefix}_request_duration_seconds` - HistogramVec for the duration of requests + /// - `{prefix}_input_sequence_tokens` - HistogramVec for input sequence length in tokens + /// - `{prefix}_output_sequence_tokens` - HistogramVec for output sequence length in tokens + /// - `{prefix}_time_to_first_token_seconds` - HistogramVec for time to first token in seconds + /// - `{prefix}_inter_token_latency_seconds` - HistogramVec for inter-token latency in seconds pub fn new() -> Self { let request_counter = IntCounterVec::new( Opts::new( diff --git a/lib/llm/tests/http_metrics_prefix_default.rs b/lib/llm/tests/http_metrics_prefix_default.rs new file mode 100644 index 0000000000..741e7ba0ea --- /dev/null +++ b/lib/llm/tests/http_metrics_prefix_default.rs @@ -0,0 +1,43 @@ +// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use dynamo_llm::http::service::service_v2::HttpService; +use dynamo_llm::http::service::metrics::{self, Endpoint}; +use dynamo_runtime::CancellationToken; +use std::{env, time::Duration}; + +#[tokio::test] +async fn metrics_prefix_default() { + env::remove_var(metrics::METRICS_PREFIX_ENV); + let svc = HttpService::builder().port(9101).build().unwrap(); + let token = CancellationToken::new(); + let _handle = svc.spawn(token.clone()).await; + tokio::time::sleep(Duration::from_millis(100)).await; + + // Force-create a labeled metric without needing a model/engine + let state = svc.state_clone(); + { + let _guard = state + .metrics_clone() + .create_inflight_guard("test-model", Endpoint::ChatCompletions, false); + // guard drops here -> counter/gauge updated + } + + let body = reqwest::get("http://localhost:9101/metrics") + .await.unwrap() + .text().await.unwrap(); + assert!(body.contains("dynamo_frontend_requests_total")); + token.cancel(); +} diff --git a/lib/llm/tests/http_metrics_prefix_env.rs b/lib/llm/tests/http_metrics_prefix_env.rs new file mode 100644 index 0000000000..a0e1508f01 --- /dev/null +++ b/lib/llm/tests/http_metrics_prefix_env.rs @@ -0,0 +1,42 @@ +// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use dynamo_llm::http::service::service_v2::HttpService; +use dynamo_llm::http::service::metrics::{self, Endpoint}; +use dynamo_runtime::CancellationToken; +use std::{env, time::Duration}; + +#[tokio::test] +async fn metrics_prefix_env_override() { + env::set_var(metrics::METRICS_PREFIX_ENV, "nv_llm_http_service"); + let svc = HttpService::builder().port(9102).build().unwrap(); + let token = CancellationToken::new(); + let _handle = svc.spawn(token.clone()).await; + tokio::time::sleep(Duration::from_millis(100)).await; + + // Force-create a labeled metric + let state = svc.state_clone(); + { + let _guard = state + .metrics_clone() + .create_inflight_guard("test-model", Endpoint::ChatCompletions, true); + } + + let body = reqwest::get("http://localhost:9102/metrics") + .await.unwrap() + .text().await.unwrap(); + assert!(body.contains("nv_llm_http_service_requests_total")); + token.cancel(); +} From 116386551c4b72b50a19cfe71a3accedecb782ea Mon Sep 17 00:00:00 2001 From: Ryan Lempka Date: Wed, 13 Aug 2025 19:06:44 +0000 Subject: [PATCH 2/4] chore: simplify env var approach --- lib/llm/src/http/service/metrics.rs | 17 ++---- lib/llm/tests/http_metrics.rs | 58 ++++++++++++++++++++ lib/llm/tests/http_metrics_prefix_default.rs | 43 --------------- lib/llm/tests/http_metrics_prefix_env.rs | 42 -------------- 4 files changed, 62 insertions(+), 98 deletions(-) create mode 100644 lib/llm/tests/http_metrics.rs delete mode 100644 lib/llm/tests/http_metrics_prefix_default.rs delete mode 100644 lib/llm/tests/http_metrics_prefix_env.rs diff --git a/lib/llm/src/http/service/metrics.rs b/lib/llm/src/http/service/metrics.rs index dd9d7fbf09..551b832114 100644 --- a/lib/llm/src/http/service/metrics.rs +++ b/lib/llm/src/http/service/metrics.rs @@ -7,7 +7,6 @@ use std::{ sync::Arc, time::{Duration, Instant}, }; -use std::sync::OnceLock; pub use prometheus::Registry; @@ -19,18 +18,6 @@ pub const FRONTEND_METRIC_PREFIX: &str = "dynamo_frontend"; // Environment variable that overrides the default metric prefix if provided pub const METRICS_PREFIX_ENV: &str = "DYN_METRICS_PREFIX"; -static METRICS_PREFIX: OnceLock = OnceLock::new(); - -fn metrics_prefix() -> &'static str { - METRICS_PREFIX - .get_or_init(|| std::env::var(METRICS_PREFIX_ENV).unwrap_or_else(|_| FRONTEND_METRIC_PREFIX.to_string())) - .as_str() -} - -fn frontend_metric_name(suffix: &str) -> String { - format!("{}_{}", metrics_prefix(), suffix) -} - /// Value for the `status` label in the request counter for successful requests pub const REQUEST_STATUS_SUCCESS: &str = "success"; @@ -130,6 +117,10 @@ impl Metrics { /// - `{prefix}_time_to_first_token_seconds` - HistogramVec for time to first token in seconds /// - `{prefix}_inter_token_latency_seconds` - HistogramVec for inter-token latency in seconds pub fn new() -> Self { + let prefix = std::env::var(METRICS_PREFIX_ENV) + .unwrap_or_else(|_| FRONTEND_METRIC_PREFIX.to_string()); + let frontend_metric_name = |suffix: &str| format!("{}_{}", &prefix, suffix); + let request_counter = IntCounterVec::new( Opts::new( frontend_metric_name("requests_total"), diff --git a/lib/llm/tests/http_metrics.rs b/lib/llm/tests/http_metrics.rs new file mode 100644 index 0000000000..be52d4a93e --- /dev/null +++ b/lib/llm/tests/http_metrics.rs @@ -0,0 +1,58 @@ + +// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! Block Manager Dynamo Integration Tests +//! +//! This module both the integration components in the `llm_kvbm` module +//! and the tests for the `llm_kvbm` module. +//! +//! The intent is to move [llm_kvbm] to a separate crate in the future. + +use dynamo_llm::http::service::metrics::{self, Endpoint}; +use dynamo_llm::http::service::service_v2::HttpService; +use dynamo_runtime::CancellationToken; +use std::{env, time::Duration}; + +#[tokio::test] +async fn metrics_prefix_default_then_env_override() { + // Case 1: default prefix + env::remove_var(metrics::METRICS_PREFIX_ENV); + let svc1 = HttpService::builder().port(9101).build().unwrap(); + let token1 = CancellationToken::new(); + let _h1 = svc1.spawn(token1.clone()).await; + tokio::time::sleep(Duration::from_millis(100)).await; + + // Populate labeled metrics + let s1 = svc1.state_clone(); + { let _g = s1.metrics_clone().create_inflight_guard("test-model", Endpoint::ChatCompletions, false); } + let body1 = reqwest::get("http://localhost:9101/metrics").await.unwrap().text().await.unwrap(); + assert!(body1.contains("dynamo_frontend_requests_total")); + token1.cancel(); + + // Case 2: env override to NIM prefix + env::set_var(metrics::METRICS_PREFIX_ENV, "nv_llm_http_service"); + let svc2 = HttpService::builder().port(9102).build().unwrap(); + let token2 = CancellationToken::new(); + let _h2 = svc2.spawn(token2.clone()).await; + tokio::time::sleep(Duration::from_millis(100)).await; + + // Populate labeled metrics + let s2 = svc2.state_clone(); + { let _g = s2.metrics_clone().create_inflight_guard("test-model", Endpoint::ChatCompletions, true); } + let body2 = reqwest::get("http://localhost:9102/metrics").await.unwrap().text().await.unwrap(); + assert!(body2.contains("nv_llm_http_service_requests_total")); + token2.cancel(); +} diff --git a/lib/llm/tests/http_metrics_prefix_default.rs b/lib/llm/tests/http_metrics_prefix_default.rs deleted file mode 100644 index 741e7ba0ea..0000000000 --- a/lib/llm/tests/http_metrics_prefix_default.rs +++ /dev/null @@ -1,43 +0,0 @@ -// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -// SPDX-License-Identifier: Apache-2.0 -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use dynamo_llm::http::service::service_v2::HttpService; -use dynamo_llm::http::service::metrics::{self, Endpoint}; -use dynamo_runtime::CancellationToken; -use std::{env, time::Duration}; - -#[tokio::test] -async fn metrics_prefix_default() { - env::remove_var(metrics::METRICS_PREFIX_ENV); - let svc = HttpService::builder().port(9101).build().unwrap(); - let token = CancellationToken::new(); - let _handle = svc.spawn(token.clone()).await; - tokio::time::sleep(Duration::from_millis(100)).await; - - // Force-create a labeled metric without needing a model/engine - let state = svc.state_clone(); - { - let _guard = state - .metrics_clone() - .create_inflight_guard("test-model", Endpoint::ChatCompletions, false); - // guard drops here -> counter/gauge updated - } - - let body = reqwest::get("http://localhost:9101/metrics") - .await.unwrap() - .text().await.unwrap(); - assert!(body.contains("dynamo_frontend_requests_total")); - token.cancel(); -} diff --git a/lib/llm/tests/http_metrics_prefix_env.rs b/lib/llm/tests/http_metrics_prefix_env.rs deleted file mode 100644 index a0e1508f01..0000000000 --- a/lib/llm/tests/http_metrics_prefix_env.rs +++ /dev/null @@ -1,42 +0,0 @@ -// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -// SPDX-License-Identifier: Apache-2.0 -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use dynamo_llm::http::service::service_v2::HttpService; -use dynamo_llm::http::service::metrics::{self, Endpoint}; -use dynamo_runtime::CancellationToken; -use std::{env, time::Duration}; - -#[tokio::test] -async fn metrics_prefix_env_override() { - env::set_var(metrics::METRICS_PREFIX_ENV, "nv_llm_http_service"); - let svc = HttpService::builder().port(9102).build().unwrap(); - let token = CancellationToken::new(); - let _handle = svc.spawn(token.clone()).await; - tokio::time::sleep(Duration::from_millis(100)).await; - - // Force-create a labeled metric - let state = svc.state_clone(); - { - let _guard = state - .metrics_clone() - .create_inflight_guard("test-model", Endpoint::ChatCompletions, true); - } - - let body = reqwest::get("http://localhost:9102/metrics") - .await.unwrap() - .text().await.unwrap(); - assert!(body.contains("nv_llm_http_service_requests_total")); - token.cancel(); -} From d917534b8577faef4373e41012117b3b6ec6341f Mon Sep 17 00:00:00 2001 From: Ryan Lempka Date: Wed, 13 Aug 2025 19:12:47 +0000 Subject: [PATCH 3/4] chore: clean up licensing comment --- components/frontend/src/dynamo/frontend/main.py | 2 +- lib/llm/tests/http_metrics.rs | 14 +++----------- 2 files changed, 4 insertions(+), 12 deletions(-) diff --git a/components/frontend/src/dynamo/frontend/main.py b/components/frontend/src/dynamo/frontend/main.py index 8698148ea2..46e4f24304 100644 --- a/components/frontend/src/dynamo/frontend/main.py +++ b/components/frontend/src/dynamo/frontend/main.py @@ -152,7 +152,7 @@ async def async_main(): flags = parse_args() is_static = bool(flags.static_endpoint) # true if the string has a value - # Configure Dynamo frontend HTTP service metrics prefix (consumed by Rust via DYN_METRICS_PREFIX) + # Configure Dynamo frontend HTTP service metrics prefix if flags.metrics_prefix is not None: os.environ["DYN_METRICS_PREFIX"] = flags.metrics_prefix diff --git a/lib/llm/tests/http_metrics.rs b/lib/llm/tests/http_metrics.rs index be52d4a93e..464bd72e26 100644 --- a/lib/llm/tests/http_metrics.rs +++ b/lib/llm/tests/http_metrics.rs @@ -1,4 +1,3 @@ - // SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. // SPDX-License-Identifier: Apache-2.0 // @@ -14,13 +13,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -//! Block Manager Dynamo Integration Tests -//! -//! This module both the integration components in the `llm_kvbm` module -//! and the tests for the `llm_kvbm` module. -//! -//! The intent is to move [llm_kvbm] to a separate crate in the future. - use dynamo_llm::http::service::metrics::{self, Endpoint}; use dynamo_llm::http::service::service_v2::HttpService; use dynamo_runtime::CancellationToken; @@ -42,8 +34,8 @@ async fn metrics_prefix_default_then_env_override() { assert!(body1.contains("dynamo_frontend_requests_total")); token1.cancel(); - // Case 2: env override to NIM prefix - env::set_var(metrics::METRICS_PREFIX_ENV, "nv_llm_http_service"); + // Case 2: env override to prefix + env::set_var(metrics::METRICS_PREFIX_ENV, "custom_prefix"); let svc2 = HttpService::builder().port(9102).build().unwrap(); let token2 = CancellationToken::new(); let _h2 = svc2.spawn(token2.clone()).await; @@ -53,6 +45,6 @@ async fn metrics_prefix_default_then_env_override() { let s2 = svc2.state_clone(); { let _g = s2.metrics_clone().create_inflight_guard("test-model", Endpoint::ChatCompletions, true); } let body2 = reqwest::get("http://localhost:9102/metrics").await.unwrap().text().await.unwrap(); - assert!(body2.contains("nv_llm_http_service_requests_total")); + assert!(body2.contains("custom_prefix")); token2.cancel(); } From cd3b8e2c631484eb46f234db1a5abf905964c203 Mon Sep 17 00:00:00 2001 From: Ryan Lempka Date: Wed, 13 Aug 2025 20:49:51 +0000 Subject: [PATCH 4/4] fix: sanitize metric prefix and improve test reliability --- Cargo.lock | 41 +++++++++ .../frontend/src/dynamo/frontend/main.py | 4 +- lib/llm/Cargo.toml | 1 + lib/llm/src/http/service/metrics.rs | 36 +++++++- lib/llm/tests/http_metrics.rs | 92 +++++++++++++++---- 5 files changed, 153 insertions(+), 21 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 420a3a74e1..b88bb494f6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1902,6 +1902,7 @@ dependencies = [ "sentencepiece", "serde", "serde_json", + "serial_test", "strum", "tempfile", "thiserror 2.0.12", @@ -6255,6 +6256,15 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "scc" +version = "2.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22b2d775fb28f245817589471dd49c5edf64237f4a19d10ce9a92ff4651a27f4" +dependencies = [ + "sdd", +] + [[package]] name = "schannel" version = "0.1.27" @@ -6309,6 +6319,12 @@ dependencies = [ "tendril", ] +[[package]] +name = "sdd" +version = "3.0.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "490dcfcbfef26be6800d11870ff2df8774fa6e86d047e3e8c8a76b25655e41ca" + [[package]] name = "secrecy" version = "0.10.3" @@ -6554,6 +6570,31 @@ dependencies = [ "unsafe-libyaml", ] +[[package]] +name = "serial_test" +version = "3.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b258109f244e1d6891bf1053a55d63a5cd4f8f4c30cf9a1280989f80e7a1fa9" +dependencies = [ + "futures", + "log", + "once_cell", + "parking_lot", + "scc", + "serial_test_derive", +] + +[[package]] +name = "serial_test_derive" +version = "3.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d69265a08751de7844521fd15003ae0a888e035773ba05695c5c759a6f89eef" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.100", +] + [[package]] name = "servo_arc" version = "0.4.0" diff --git a/components/frontend/src/dynamo/frontend/main.py b/components/frontend/src/dynamo/frontend/main.py index 46e4f24304..28da1c6fd0 100644 --- a/components/frontend/src/dynamo/frontend/main.py +++ b/components/frontend/src/dynamo/frontend/main.py @@ -154,7 +154,9 @@ async def async_main(): # Configure Dynamo frontend HTTP service metrics prefix if flags.metrics_prefix is not None: - os.environ["DYN_METRICS_PREFIX"] = flags.metrics_prefix + prefix = flags.metrics_prefix.strip() + if prefix: + os.environ["DYN_METRICS_PREFIX"] = flags.metrics_prefix runtime = DistributedRuntime(asyncio.get_running_loop(), is_static) diff --git a/lib/llm/Cargo.toml b/lib/llm/Cargo.toml index fc12ba1bb6..52e16c5af6 100644 --- a/lib/llm/Cargo.toml +++ b/lib/llm/Cargo.toml @@ -143,6 +143,7 @@ proptest = "1.5.0" reqwest = { workspace = true } rstest = "0.18.2" rstest_reuse = "0.7.0" +serial_test = "3" tempfile = "3.17.1" insta = { version = "1.41", features = [ "glob", diff --git a/lib/llm/src/http/service/metrics.rs b/lib/llm/src/http/service/metrics.rs index 551b832114..2b21e789f1 100644 --- a/lib/llm/src/http/service/metrics.rs +++ b/lib/llm/src/http/service/metrics.rs @@ -30,6 +30,31 @@ pub const REQUEST_TYPE_STREAM: &str = "stream"; /// Partial value for the `type` label in the request counter for unary requests pub const REQUEST_TYPE_UNARY: &str = "unary"; +fn sanitize_prometheus_prefix(raw: &str) -> String { + // Prometheus metric name pattern: [a-zA-Z_:][a-zA-Z0-9_:]* + let mut s: String = raw + .chars() + .map(|c| { + if c.is_ascii_alphanumeric() || c == '_' || c == ':' { + c + } else { + '_' + } + }) + .collect(); + + if s.is_empty() { + return FRONTEND_METRIC_PREFIX.to_string(); + } + + let first = s.as_bytes()[0]; + let valid_first = first.is_ascii_alphabetic() || first == b'_' || first == b':'; + if !valid_first { + s.insert(0, '_'); + } + s +} + pub struct Metrics { request_counter: IntCounterVec, inflight_gauge: IntGaugeVec, @@ -117,8 +142,17 @@ impl Metrics { /// - `{prefix}_time_to_first_token_seconds` - HistogramVec for time to first token in seconds /// - `{prefix}_inter_token_latency_seconds` - HistogramVec for inter-token latency in seconds pub fn new() -> Self { - let prefix = std::env::var(METRICS_PREFIX_ENV) + let raw_prefix = std::env::var(METRICS_PREFIX_ENV) .unwrap_or_else(|_| FRONTEND_METRIC_PREFIX.to_string()); + let prefix = sanitize_prometheus_prefix(&raw_prefix); + if prefix != raw_prefix { + tracing::warn!( + raw=%raw_prefix, + sanitized=%prefix, + env=%METRICS_PREFIX_ENV, + "Sanitized HTTP metrics prefix" + ); + } let frontend_metric_name = |suffix: &str| format!("{}_{}", &prefix, suffix); let request_counter = IntCounterVec::new( diff --git a/lib/llm/tests/http_metrics.rs b/lib/llm/tests/http_metrics.rs index 464bd72e26..92001df458 100644 --- a/lib/llm/tests/http_metrics.rs +++ b/lib/llm/tests/http_metrics.rs @@ -1,36 +1,37 @@ // SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. // SPDX-License-Identifier: Apache-2.0 -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. use dynamo_llm::http::service::metrics::{self, Endpoint}; use dynamo_llm::http::service::service_v2::HttpService; use dynamo_runtime::CancellationToken; +use serial_test::serial; use std::{env, time::Duration}; #[tokio::test] +#[serial] async fn metrics_prefix_default_then_env_override() { // Case 1: default prefix env::remove_var(metrics::METRICS_PREFIX_ENV); let svc1 = HttpService::builder().port(9101).build().unwrap(); let token1 = CancellationToken::new(); let _h1 = svc1.spawn(token1.clone()).await; - tokio::time::sleep(Duration::from_millis(100)).await; + wait_for_metrics_ready(9101).await; // Populate labeled metrics let s1 = svc1.state_clone(); - { let _g = s1.metrics_clone().create_inflight_guard("test-model", Endpoint::ChatCompletions, false); } - let body1 = reqwest::get("http://localhost:9101/metrics").await.unwrap().text().await.unwrap(); + { + let _g = s1.metrics_clone().create_inflight_guard( + "test-model", + Endpoint::ChatCompletions, + false, + ); + } + let body1 = reqwest::get("http://localhost:9101/metrics") + .await + .unwrap() + .text() + .await + .unwrap(); assert!(body1.contains("dynamo_frontend_requests_total")); token1.cancel(); @@ -39,12 +40,65 @@ async fn metrics_prefix_default_then_env_override() { let svc2 = HttpService::builder().port(9102).build().unwrap(); let token2 = CancellationToken::new(); let _h2 = svc2.spawn(token2.clone()).await; - tokio::time::sleep(Duration::from_millis(100)).await; + wait_for_metrics_ready(9102).await; // Populate labeled metrics let s2 = svc2.state_clone(); - { let _g = s2.metrics_clone().create_inflight_guard("test-model", Endpoint::ChatCompletions, true); } - let body2 = reqwest::get("http://localhost:9102/metrics").await.unwrap().text().await.unwrap(); - assert!(body2.contains("custom_prefix")); + { + let _g = + s2.metrics_clone() + .create_inflight_guard("test-model", Endpoint::ChatCompletions, true); + } + // Single fetch and assert + let body2 = reqwest::get("http://localhost:9102/metrics") + .await + .unwrap() + .text() + .await + .unwrap(); + assert!(body2.contains("custom_prefix_requests_total")); + assert!(!body2.contains("dynamo_frontend_requests_total")); token2.cancel(); + + // Case 3: invalid env prefix is sanitized + env::set_var(metrics::METRICS_PREFIX_ENV, "nv-llm/http service"); + let svc3 = HttpService::builder().port(9103).build().unwrap(); + let token3 = CancellationToken::new(); + let _h3 = svc3.spawn(token3.clone()).await; + wait_for_metrics_ready(9103).await; + + let s3 = svc3.state_clone(); + { + let _g = + s3.metrics_clone() + .create_inflight_guard("test-model", Endpoint::ChatCompletions, true); + } + let body3 = reqwest::get("http://localhost:9103/metrics") + .await + .unwrap() + .text() + .await + .unwrap(); + assert!(body3.contains("nv_llm_http_service_requests_total")); + assert!(!body3.contains("dynamo_frontend_requests_total")); + token3.cancel(); + + // Cleanup env to avoid leaking state + env::remove_var(metrics::METRICS_PREFIX_ENV); +} + +// Poll /metrics until ready or timeout +async fn wait_for_metrics_ready(port: u16) { + let url = format!("http://localhost:{}/metrics", port); + let start = tokio::time::Instant::now(); + let timeout = Duration::from_secs(5); + loop { + if start.elapsed() > timeout { + panic!("Timed out waiting for metrics endpoint at {}", url); + } + match reqwest::get(&url).await { + Ok(resp) if resp.status().is_success() => break, + _ => tokio::time::sleep(Duration::from_millis(50)).await, + } + } }