@@ -22,8 +22,8 @@ use crate::discovery::ModelEntry;
2222use crate :: local_model:: runtime_config:: ModelRuntimeConfig ;
2323use crate :: model_card:: { ModelDeploymentCard , ROOT_PATH as MDC_ROOT_PATH } ;
2424use dynamo_runtime:: metrics:: prometheus_names:: clamp_u64_to_i64;
25- use dynamo_runtime:: storage:: key_value_store:: { EtcdStorage , KeyValueStore , KeyValueStoreManager } ;
2625use dynamo_runtime:: slug:: Slug ;
26+ use dynamo_runtime:: storage:: key_value_store:: { EtcdStorage , KeyValueStore , KeyValueStoreManager } ;
2727
2828pub use prometheus:: Registry ;
2929
@@ -49,7 +49,6 @@ pub struct Metrics {
4949 model_context_length : IntGaugeVec ,
5050 model_kv_cache_block_size : IntGaugeVec ,
5151 model_migration_limit : IntGaugeVec ,
52- model_workers : IntGaugeVec , // this is an actual gauge, not a counter
5352}
5453
5554// Inflight tracks requests from HTTP handler start until complete response is finished.
@@ -153,12 +152,11 @@ impl Metrics {
153152 /// - `{prefix}_model_context_length` - IntGaugeVec for maximum context length for a worker serving the model
154153 /// - `{prefix}_model_kv_cache_block_size` - IntGaugeVec for KV cache block size for a worker serving the model
155154 /// - `{prefix}_model_migration_limit` - IntGaugeVec for request migration limit for a worker serving the model
156- /// - `{prefix}_model_workers` - IntGaugeVec for number of worker instances serving each model
157155 ///
158156 /// ## Runtime Config Polling Configuration
159157 ///
160158 /// The polling behavior can be configured via environment variables:
161- /// - `DYN_HTTP_SVC_CONFIG_METRICS_POLL_INTERVAL_SECS`: Poll interval in seconds (must be > 0, defaults to 8)
159+ /// - `DYN_HTTP_SVC_CONFIG_METRICS_POLL_INTERVAL_SECS`: Poll interval in seconds (must be > 0, supports fractional seconds, defaults to 8)
162160 ///
163161 /// Metrics are never removed to preserve historical data. Runtime config and MDC
164162 /// metrics are updated when models are discovered and their configurations are available.
@@ -329,15 +327,6 @@ impl Metrics {
329327 )
330328 . unwrap ( ) ;
331329
332- let model_workers = IntGaugeVec :: new (
333- Opts :: new (
334- frontend_metric_name ( frontend_service:: MODEL_WORKERS ) ,
335- "Number of worker instances currently serving the model" ,
336- ) ,
337- & [ "model" ] ,
338- )
339- . unwrap ( ) ;
340-
341330 Metrics {
342331 request_counter,
343332 inflight_gauge,
@@ -354,7 +343,6 @@ impl Metrics {
354343 model_context_length,
355344 model_kv_cache_block_size,
356345 model_migration_limit,
357- model_workers,
358346 }
359347 }
360348
@@ -451,7 +439,6 @@ impl Metrics {
451439 registry. register ( Box :: new ( self . model_context_length . clone ( ) ) ) ?;
452440 registry. register ( Box :: new ( self . model_kv_cache_block_size . clone ( ) ) ) ?;
453441 registry. register ( Box :: new ( self . model_migration_limit . clone ( ) ) ) ?;
454- registry. register ( Box :: new ( self . model_workers . clone ( ) ) ) ?;
455442
456443 Ok ( ( ) )
457444 }
@@ -504,7 +491,6 @@ impl Metrics {
504491 . set ( migration_limit as i64 ) ;
505492 }
506493
507-
508494 /// Update metrics from a ModelEntry
509495 /// This is a convenience method that extracts runtime config from a ModelEntry
510496 /// and updates the appropriate metrics
@@ -531,7 +517,10 @@ impl Metrics {
531517 let store: Box < dyn KeyValueStore > = Box :: new ( EtcdStorage :: new ( etcd_client. clone ( ) ) ) ;
532518 let card_store = Arc :: new ( KeyValueStoreManager :: new ( store) ) ;
533519
534- match card_store. load :: < ModelDeploymentCard > ( MDC_ROOT_PATH , & model_slug) . await {
520+ match card_store
521+ . load :: < ModelDeploymentCard > ( MDC_ROOT_PATH , & model_slug)
522+ . await
523+ {
535524 Ok ( Some ( mdc) ) => {
536525 self . update_mdc_metrics (
537526 & model_entry. name ,
@@ -563,11 +552,27 @@ impl Metrics {
563552 }
564553
565554 /// Start a background task that periodically updates runtime config metrics
566- /// This polls the ModelManager for current models and updates metrics accordingly
567- /// Models are never removed - only marked as healthy/unhealthy to preserve historical data
568555 ///
569- /// Note: If multiple model instances have the same name, only the first instance's metrics are used.
570- /// Subsequent instances with duplicate names will be skipped.
556+ /// ## Why Polling is Required
557+ ///
558+ /// Polling is necessary because new models may come online at any time through the distributed
559+ /// discovery system. The ModelManager is continuously updated as workers register/deregister
560+ /// with etcd, and we need to periodically check for these changes to expose their metrics.
561+ ///
562+ /// ## Behavior
563+ ///
564+ /// - Polls the ModelManager for current models and updates metrics accordingly
565+ /// - Models are never removed from metrics to preserve historical data
566+ /// - If multiple model instances have the same name, only the first instance's metrics are used
567+ /// - Subsequent instances with duplicate names will be skipped
568+ ///
569+ /// ## MDC (Model Deployment Card) Behavior
570+ ///
571+ /// Currently, we don't overwrite an MDC. The first worker to start wins, and we assume
572+ /// that all other workers claiming to serve that model really are using the same configuration.
573+ /// Later, every worker will have its own MDC, and the frontend will validate that they
574+ /// checksum the same. For right now, you can assume they have the same MDC, because
575+ /// they aren't allowed to change it.
571576 ///
572577 /// The task will run until the provided cancellation token is cancelled.
573578 pub fn start_runtime_config_polling_task (
@@ -600,31 +605,12 @@ impl Metrics {
600605 // Get current model entries from the manager
601606 let current_entries = manager. get_model_entries ( ) ;
602607 let mut current_models = std:: collections:: HashSet :: new ( ) ;
603- let mut model_worker_counts = std:: collections:: HashMap :: new ( ) ;
604-
605- // Count worker instances per model
606- for entry in & current_entries {
607- * model_worker_counts. entry ( entry. name . clone ( ) ) . or_insert ( 0 ) += 1 ;
608- }
609-
610- // Update worker count metrics for all models
611- for ( model_name, count) in & model_worker_counts {
612- metrics. model_workers
613- . with_label_values ( & [ model_name] )
614- . set ( * count) ;
615- }
616-
617- // Reset worker count to 0 for models that no longer have any workers
618- let current_models_with_workers: std:: collections:: HashSet < String > =
619- model_worker_counts. keys ( ) . cloned ( ) . collect ( ) ;
620- for model_name in known_models. difference ( & current_models_with_workers) {
621- metrics. model_workers
622- . with_label_values ( & [ model_name] )
623- . set ( 0 ) ;
624- }
625608
626609 // Note: If multiple model instances have the same name, only the first instance's config metrics are recorded.
627610 // Subsequent instances with duplicate names will be skipped for config updates.
611+ // This is based on the assumption that all workers serving the same model have identical
612+ // configuration values (MDC content, runtime config, etc.). This assumption holds because
613+ // workers are not allowed to change their configuration after registration.
628614
629615 // Update configuration metrics for current models
630616 for entry in current_entries {
@@ -657,14 +643,6 @@ impl Metrics {
657643 }
658644 }
659645
660- // Log models that are no longer active (worker count reset to 0, other metrics preserved)
661- for model_name in known_models. difference ( & current_models_with_workers) {
662- tracing:: debug!(
663- model = %model_name,
664- "Model no longer active (worker count reset to 0, other metrics preserved)"
665- ) ;
666- }
667-
668646 // Update our known models set
669647 known_models. extend ( current_models. iter ( ) . cloned ( ) ) ;
670648
0 commit comments