22// SPDX-License-Identifier: Apache-2.0
33
44use axum:: { Router , extract:: State , http:: StatusCode , response:: IntoResponse , routing:: get} ;
5+ use dynamo_runtime:: metrics:: prometheus_names:: {
6+ frontend_service, name_prefix, sanitize_frontend_prometheus_prefix,
7+ } ;
58use prometheus:: { Encoder , HistogramOpts , HistogramVec , IntCounterVec , IntGaugeVec , Opts } ;
69use std:: {
710 sync:: Arc ,
@@ -12,49 +15,6 @@ pub use prometheus::Registry;
1215
1316use super :: RouteDoc ;
1417
15- // Default metric prefix
16- pub const FRONTEND_METRIC_PREFIX : & str = "dynamo_frontend" ;
17-
18- // Environment variable that overrides the default metric prefix if provided
19- pub const METRICS_PREFIX_ENV : & str = "DYN_METRICS_PREFIX" ;
20-
21- /// Value for the `status` label in the request counter for successful requests
22- pub const REQUEST_STATUS_SUCCESS : & str = "success" ;
23-
24- /// Value for the `status` label in the request counter if the request failed
25- pub const REQUEST_STATUS_ERROR : & str = "error" ;
26-
27- /// Partial value for the `type` label in the request counter for streaming requests
28- pub const REQUEST_TYPE_STREAM : & str = "stream" ;
29-
30- /// Partial value for the `type` label in the request counter for unary requests
31- pub const REQUEST_TYPE_UNARY : & str = "unary" ;
32-
33- fn sanitize_prometheus_prefix ( raw : & str ) -> String {
34- // Prometheus metric name pattern: [a-zA-Z_:][a-zA-Z0-9_:]*
35- let mut s: String = raw
36- . chars ( )
37- . map ( |c| {
38- if c. is_ascii_alphanumeric ( ) || c == '_' || c == ':' {
39- c
40- } else {
41- '_'
42- }
43- } )
44- . collect ( ) ;
45-
46- if s. is_empty ( ) {
47- return FRONTEND_METRIC_PREFIX . to_string ( ) ;
48- }
49-
50- let first = s. as_bytes ( ) [ 0 ] ;
51- let valid_first = first. is_ascii_alphabetic ( ) || first == b'_' || first == b':' ;
52- if !valid_first {
53- s. insert ( 0 , '_' ) ;
54- }
55- s
56- }
57-
5818pub struct Metrics {
5919 request_counter : IntCounterVec ,
6020 inflight_gauge : IntGaugeVec ,
@@ -67,8 +27,8 @@ pub struct Metrics {
6727
6828/// RAII object for inflight gauge and request counters
6929/// If this object is dropped without calling `mark_ok`, then the request will increment
70- /// the request counter with the `status` label with [`REQUEST_STATUS_ERROR `]; otherwise, it will increment
71- /// the counter with `status` label [`REQUEST_STATUS_SUCCESS `]
30+ /// the request counter with the `status` label with [`frontend_service::status::ERROR `]; otherwise, it will increment
31+ /// the counter with `status` label [`frontend_service::status::SUCCESS `]
7232pub struct InflightGuard {
7333 metrics : Arc < Metrics > ,
7434 model : String ,
@@ -130,7 +90,7 @@ impl Default for Metrics {
13090}
13191
13292impl Metrics {
133- /// Create Metrics with the standard prefix defined by [`FRONTEND_METRIC_PREFIX `] or specify custom prefix via the following environment variable:
93+ /// Create Metrics with the standard prefix defined by [`name_prefix::FRONTEND `] or specify custom prefix via the following environment variable:
13494 /// - `DYN_METRICS_PREFIX`: Override the default metrics prefix
13595 ///
13696 /// The following metrics will be created with the configured prefix:
@@ -142,22 +102,22 @@ impl Metrics {
142102 /// - `{prefix}_time_to_first_token_seconds` - HistogramVec for time to first token in seconds
143103 /// - `{prefix}_inter_token_latency_seconds` - HistogramVec for inter-token latency in seconds
144104 pub fn new ( ) -> Self {
145- let raw_prefix = std:: env:: var ( METRICS_PREFIX_ENV )
146- . unwrap_or_else ( |_| FRONTEND_METRIC_PREFIX . to_string ( ) ) ;
147- let prefix = sanitize_prometheus_prefix ( & raw_prefix) ;
105+ let raw_prefix = std:: env:: var ( frontend_service :: METRICS_PREFIX_ENV )
106+ . unwrap_or_else ( |_| name_prefix :: FRONTEND . to_string ( ) ) ;
107+ let prefix = sanitize_frontend_prometheus_prefix ( & raw_prefix) ;
148108 if prefix != raw_prefix {
149109 tracing:: warn!(
150110 raw=%raw_prefix,
151111 sanitized=%prefix,
152- env=%METRICS_PREFIX_ENV ,
112+ env=%frontend_service :: METRICS_PREFIX_ENV ,
153113 "Sanitized HTTP metrics prefix"
154114 ) ;
155115 }
156116 let frontend_metric_name = |suffix : & str | format ! ( "{}_{}" , & prefix, suffix) ;
157117
158118 let request_counter = IntCounterVec :: new (
159119 Opts :: new (
160- frontend_metric_name ( "requests_total" ) ,
120+ frontend_metric_name ( frontend_service :: REQUESTS_TOTAL ) ,
161121 "Total number of LLM requests processed" ,
162122 ) ,
163123 & [ "model" , "endpoint" , "request_type" , "status" ] ,
@@ -166,7 +126,7 @@ impl Metrics {
166126
167127 let inflight_gauge = IntGaugeVec :: new (
168128 Opts :: new (
169- frontend_metric_name ( "inflight_requests" ) ,
129+ frontend_metric_name ( frontend_service :: INFLIGHT_REQUESTS ) ,
170130 "Number of inflight requests" ,
171131 ) ,
172132 & [ "model" ] ,
@@ -177,7 +137,7 @@ impl Metrics {
177137
178138 let request_duration = HistogramVec :: new (
179139 HistogramOpts :: new (
180- frontend_metric_name ( "request_duration_seconds" ) ,
140+ frontend_metric_name ( frontend_service :: REQUEST_DURATION_SECONDS ) ,
181141 "Duration of LLM requests" ,
182142 )
183143 . buckets ( buckets) ,
@@ -187,7 +147,7 @@ impl Metrics {
187147
188148 let input_sequence_length = HistogramVec :: new (
189149 HistogramOpts :: new (
190- frontend_metric_name ( "input_sequence_tokens" ) ,
150+ frontend_metric_name ( frontend_service :: INPUT_SEQUENCE_TOKENS ) ,
191151 "Input sequence length in tokens" ,
192152 )
193153 . buckets ( vec ! [
@@ -200,7 +160,7 @@ impl Metrics {
200160
201161 let output_sequence_length = HistogramVec :: new (
202162 HistogramOpts :: new (
203- frontend_metric_name ( "output_sequence_tokens" ) ,
163+ frontend_metric_name ( frontend_service :: OUTPUT_SEQUENCE_TOKENS ) ,
204164 "Output sequence length in tokens" ,
205165 )
206166 . buckets ( vec ! [
@@ -212,7 +172,7 @@ impl Metrics {
212172
213173 let time_to_first_token = HistogramVec :: new (
214174 HistogramOpts :: new (
215- frontend_metric_name ( "time_to_first_token_seconds" ) ,
175+ frontend_metric_name ( frontend_service :: TIME_TO_FIRST_TOKEN_SECONDS ) ,
216176 "Time to first token in seconds" ,
217177 )
218178 . buckets ( vec ! [
@@ -225,7 +185,7 @@ impl Metrics {
225185
226186 let inter_token_latency = HistogramVec :: new (
227187 HistogramOpts :: new (
228- frontend_metric_name ( "inter_token_latency_seconds" ) ,
188+ frontend_metric_name ( frontend_service :: INTER_TOKEN_LATENCY_SECONDS ) ,
229189 "Inter-token latency in seconds" ,
230190 )
231191 . buckets ( vec ! [
@@ -422,17 +382,17 @@ impl Endpoint {
422382impl RequestType {
423383 pub fn as_str ( & self ) -> & ' static str {
424384 match self {
425- RequestType :: Unary => REQUEST_TYPE_UNARY ,
426- RequestType :: Stream => REQUEST_TYPE_STREAM ,
385+ RequestType :: Unary => frontend_service :: request_type :: UNARY ,
386+ RequestType :: Stream => frontend_service :: request_type :: STREAM ,
427387 }
428388 }
429389}
430390
431391impl Status {
432392 pub fn as_str ( & self ) -> & ' static str {
433393 match self {
434- Status :: Success => REQUEST_STATUS_SUCCESS ,
435- Status :: Error => REQUEST_STATUS_ERROR ,
394+ Status :: Success => frontend_service :: status :: SUCCESS ,
395+ Status :: Error => frontend_service :: status :: ERROR ,
436396 }
437397 }
438398}
0 commit comments