Skip to content

Commit 205b397

Browse files
committed
docs: improve metrics documentation and fix naming
- Fix metric name from model_workers_total to model_workers - Document model name deduplication behavior in README.md - Add comments explaining gauge vs counter usage for runtime config metrics - Clarify that some metrics use gauges because they're synchronized from upstream Signed-off-by: Keiven Chang <keivenchang@users.noreply.github.com>
1 parent a6884b2 commit 205b397

File tree

2 files changed

+18
-11
lines changed

2 files changed

+18
-11
lines changed

deploy/metrics/README.md

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -98,9 +98,11 @@ These metrics come from the Model Deployment Card information provided by worker
9898
- `dynamo_frontend_model_migration_limit`: Request migration limit for a worker serving the model (gauge)
9999

100100
**Worker Management Metrics:**
101-
- `dynamo_frontend_model_workers_total`: Number of worker instances currently serving the model (gauge)
101+
- `dynamo_frontend_model_workers`: Number of worker instances currently serving the model (gauge)
102102

103-
**Note**: The `dynamo_frontend_inflight_requests_total` metric tracks requests from HTTP handler start until the complete response is finished, while `dynamo_frontend_queued_requests_total` tracks requests from HTTP handler start until first token generation begins (including prefill time). HTTP queue time is a subset of inflight time.
103+
**Important Notes:**
104+
- The `dynamo_frontend_inflight_requests_total` metric tracks requests from HTTP handler start until the complete response is finished, while `dynamo_frontend_queued_requests_total` tracks requests from HTTP handler start until first token generation begins (including prefill time). HTTP queue time is a subset of inflight time.
105+
- **Model Name Deduplication**: When multiple worker instances register with the same model name, only the first instance's configuration metrics (runtime config and MDC metrics) will be populated. Subsequent instances with duplicate model names will be skipped for configuration metric updates, though the worker count metric will reflect all instances.
104106

105107
#### Request Processing Flow
106108

lib/llm/src/http/service/metrics.rs

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -40,14 +40,16 @@ pub struct Metrics {
4040
time_to_first_token: HistogramVec,
4141
inter_token_latency: HistogramVec,
4242

43-
// Runtime configuration metrics
43+
// Runtime configuration metrics. Note: Some of these metrics represent counter-like values from
44+
// source systems, but are implemented as gauges because they are copied/synchronized from upstream
45+
// counter values rather than being directly incremented.
4446
model_total_kv_blocks: IntGaugeVec,
4547
model_max_num_seqs: IntGaugeVec,
4648
model_max_num_batched_tokens: IntGaugeVec,
4749
model_context_length: IntGaugeVec,
4850
model_kv_cache_block_size: IntGaugeVec,
4951
model_migration_limit: IntGaugeVec,
50-
model_workers_total: IntGaugeVec,
52+
model_workers: IntGaugeVec, // this is an actual gauge, not a counter
5153
}
5254

5355
// Inflight tracks requests from HTTP handler start until complete response is finished.
@@ -154,7 +156,7 @@ impl Metrics {
154156
/// - `{prefix}_model_context_length` - IntGaugeVec for maximum context length for a worker serving the model
155157
/// - `{prefix}_model_kv_cache_block_size` - IntGaugeVec for KV cache block size for a worker serving the model
156158
/// - `{prefix}_model_migration_limit` - IntGaugeVec for request migration limit for a worker serving the model
157-
/// - `{prefix}_model_workers_total` - IntGaugeVec for number of worker instances serving each model
159+
/// - `{prefix}_model_workers` - IntGaugeVec for number of worker instances serving each model
158160
///
159161
/// ## Runtime Config Polling Configuration
160162
///
@@ -273,6 +275,9 @@ impl Metrics {
273275
.unwrap();
274276

275277
// Runtime configuration metrics
278+
// Note: Some of these metrics represent counter-like values from source systems,
279+
// but are implemented as gauges because they are copied/synchronized from upstream
280+
// counter values rather than being directly incremented.
276281
let model_total_kv_blocks = IntGaugeVec::new(
277282
Opts::new(
278283
frontend_metric_name(frontend_service::MODEL_TOTAL_KV_BLOCKS),
@@ -327,9 +332,9 @@ impl Metrics {
327332
)
328333
.unwrap();
329334

330-
let model_workers_total = IntGaugeVec::new(
335+
let model_workers = IntGaugeVec::new(
331336
Opts::new(
332-
frontend_metric_name(frontend_service::MODEL_WORKERS_TOTAL),
337+
frontend_metric_name(frontend_service::MODEL_WORKERS),
333338
"Number of worker instances currently serving the model",
334339
),
335340
&["model"],
@@ -352,7 +357,7 @@ impl Metrics {
352357
model_context_length,
353358
model_kv_cache_block_size,
354359
model_migration_limit,
355-
model_workers_total,
360+
model_workers,
356361
}
357362
}
358363

@@ -449,7 +454,7 @@ impl Metrics {
449454
registry.register(Box::new(self.model_context_length.clone()))?;
450455
registry.register(Box::new(self.model_kv_cache_block_size.clone()))?;
451456
registry.register(Box::new(self.model_migration_limit.clone()))?;
452-
registry.register(Box::new(self.model_workers_total.clone()))?;
457+
registry.register(Box::new(self.model_workers.clone()))?;
453458

454459
Ok(())
455460
}
@@ -607,7 +612,7 @@ impl Metrics {
607612

608613
// Update worker count metrics for all models
609614
for (model_name, count) in &model_worker_counts {
610-
metrics.model_workers_total
615+
metrics.model_workers
611616
.with_label_values(&[model_name])
612617
.set(*count);
613618
}
@@ -616,7 +621,7 @@ impl Metrics {
616621
let current_models_with_workers: std::collections::HashSet<String> =
617622
model_worker_counts.keys().cloned().collect();
618623
for model_name in known_models.difference(&current_models_with_workers) {
619-
metrics.model_workers_total
624+
metrics.model_workers
620625
.with_label_values(&[model_name])
621626
.set(0);
622627
}

0 commit comments

Comments
 (0)