@@ -22,8 +22,8 @@ use crate::discovery::ModelEntry;
2222use crate :: local_model:: runtime_config:: ModelRuntimeConfig ;
2323use crate :: model_card:: { ModelDeploymentCard , ROOT_PATH as MDC_ROOT_PATH } ;
2424use dynamo_runtime:: metrics:: prometheus_names:: clamp_u64_to_i64;
25- use dynamo_runtime:: storage:: key_value_store:: { EtcdStorage , KeyValueStore , KeyValueStoreManager } ;
2625use dynamo_runtime:: slug:: Slug ;
26+ use dynamo_runtime:: storage:: key_value_store:: { EtcdStorage , KeyValueStore , KeyValueStoreManager } ;
2727
2828pub use prometheus:: Registry ;
2929
@@ -49,7 +49,6 @@ pub struct Metrics {
4949 model_context_length : IntGaugeVec ,
5050 model_kv_cache_block_size : IntGaugeVec ,
5151 model_migration_limit : IntGaugeVec ,
52- model_workers : IntGaugeVec , // this is an actual gauge, not a counter
5352}
5453
5554// Inflight tracks requests from HTTP handler start until complete response is finished.
@@ -156,12 +155,11 @@ impl Metrics {
156155 /// - `{prefix}_model_context_length` - IntGaugeVec for maximum context length for a worker serving the model
157156 /// - `{prefix}_model_kv_cache_block_size` - IntGaugeVec for KV cache block size for a worker serving the model
158157 /// - `{prefix}_model_migration_limit` - IntGaugeVec for request migration limit for a worker serving the model
159- /// - `{prefix}_model_workers` - IntGaugeVec for number of worker instances serving each model
160158 ///
161159 /// ## Runtime Config Polling Configuration
162160 ///
163161 /// The polling behavior can be configured via environment variables:
164- /// - `DYN_HTTP_SVC_CONFIG_METRICS_POLL_INTERVAL_SECS`: Poll interval in seconds (must be > 0, defaults to 8)
162+ /// - `DYN_HTTP_SVC_CONFIG_METRICS_POLL_INTERVAL_SECS`: Poll interval in seconds (must be > 0, supports fractional seconds, defaults to 8)
165163 ///
166164 /// Metrics are never removed to preserve historical data. Runtime config and MDC
167165 /// metrics are updated when models are discovered and their configurations are available.
@@ -332,15 +330,6 @@ impl Metrics {
332330 )
333331 . unwrap ( ) ;
334332
335- let model_workers = IntGaugeVec :: new (
336- Opts :: new (
337- frontend_metric_name ( frontend_service:: MODEL_WORKERS ) ,
338- "Number of worker instances currently serving the model" ,
339- ) ,
340- & [ "model" ] ,
341- )
342- . unwrap ( ) ;
343-
344333 Metrics {
345334 request_counter,
346335 inflight_gauge,
@@ -357,7 +346,6 @@ impl Metrics {
357346 model_context_length,
358347 model_kv_cache_block_size,
359348 model_migration_limit,
360- model_workers,
361349 }
362350 }
363351
@@ -454,7 +442,6 @@ impl Metrics {
454442 registry. register ( Box :: new ( self . model_context_length . clone ( ) ) ) ?;
455443 registry. register ( Box :: new ( self . model_kv_cache_block_size . clone ( ) ) ) ?;
456444 registry. register ( Box :: new ( self . model_migration_limit . clone ( ) ) ) ?;
457- registry. register ( Box :: new ( self . model_workers . clone ( ) ) ) ?;
458445
459446 Ok ( ( ) )
460447 }
@@ -507,7 +494,6 @@ impl Metrics {
507494 . set ( migration_limit as i64 ) ;
508495 }
509496
510-
511497 /// Update metrics from a ModelEntry
512498 /// This is a convenience method that extracts runtime config from a ModelEntry
513499 /// and updates the appropriate metrics
@@ -534,7 +520,10 @@ impl Metrics {
534520 let store: Box < dyn KeyValueStore > = Box :: new ( EtcdStorage :: new ( etcd_client. clone ( ) ) ) ;
535521 let card_store = Arc :: new ( KeyValueStoreManager :: new ( store) ) ;
536522
537- match card_store. load :: < ModelDeploymentCard > ( MDC_ROOT_PATH , & model_slug) . await {
523+ match card_store
524+ . load :: < ModelDeploymentCard > ( MDC_ROOT_PATH , & model_slug)
525+ . await
526+ {
538527 Ok ( Some ( mdc) ) => {
539528 self . update_mdc_metrics (
540529 & model_entry. name ,
@@ -566,11 +555,27 @@ impl Metrics {
566555 }
567556
568557 /// Start a background task that periodically updates runtime config metrics
569- /// This polls the ModelManager for current models and updates metrics accordingly
570- /// Models are never removed - only marked as healthy/unhealthy to preserve historical data
571558 ///
572- /// Note: If multiple model instances have the same name, only the first instance's metrics are used.
573- /// Subsequent instances with duplicate names will be skipped.
559+ /// ## Why Polling is Required
560+ ///
561+ /// Polling is necessary because new models may come online at any time through the distributed
562+ /// discovery system. The ModelManager is continuously updated as workers register/deregister
563+ /// with etcd, and we need to periodically check for these changes to expose their metrics.
564+ ///
565+ /// ## Behavior
566+ ///
567+ /// - Polls the ModelManager for current models and updates metrics accordingly
568+ /// - Models are never removed from metrics to preserve historical data
569+ /// - If multiple model instances have the same name, only the first instance's metrics are used
570+ /// - Subsequent instances with duplicate names will be skipped
571+ ///
572+ /// ## MDC (Model Deployment Card) Behavior
573+ ///
574+ /// Currently, we don't overwrite an MDC. The first worker to start wins, and we assume
575+ /// that all other workers claiming to serve that model really are using the same configuration.
576+ /// Later, every worker will have its own MDC, and the frontend will validate that they
577+ /// checksum the same. For right now, you can assume they have the same MDC, because
578+ /// they aren't allowed to change it.
574579 ///
575580 /// The task will run until the provided cancellation token is cancelled.
576581 pub fn start_runtime_config_polling_task (
@@ -603,31 +608,12 @@ impl Metrics {
603608 // Get current model entries from the manager
604609 let current_entries = manager. get_model_entries ( ) ;
605610 let mut current_models = std:: collections:: HashSet :: new ( ) ;
606- let mut model_worker_counts = std:: collections:: HashMap :: new ( ) ;
607-
608- // Count worker instances per model
609- for entry in & current_entries {
610- * model_worker_counts. entry ( entry. name . clone ( ) ) . or_insert ( 0 ) += 1 ;
611- }
612-
613- // Update worker count metrics for all models
614- for ( model_name, count) in & model_worker_counts {
615- metrics. model_workers
616- . with_label_values ( & [ model_name] )
617- . set ( * count) ;
618- }
619-
620- // Reset worker count to 0 for models that no longer have any workers
621- let current_models_with_workers: std:: collections:: HashSet < String > =
622- model_worker_counts. keys ( ) . cloned ( ) . collect ( ) ;
623- for model_name in known_models. difference ( & current_models_with_workers) {
624- metrics. model_workers
625- . with_label_values ( & [ model_name] )
626- . set ( 0 ) ;
627- }
628611
629612 // Note: If multiple model instances have the same name, only the first instance's config metrics are recorded.
630613 // Subsequent instances with duplicate names will be skipped for config updates.
614+ // This is based on the assumption that all workers serving the same model have identical
615+ // configuration values (MDC content, runtime config, etc.). This assumption holds because
616+ // workers are not allowed to change their configuration after registration.
631617
632618 // Update configuration metrics for current models
633619 for entry in current_entries {
@@ -660,14 +646,6 @@ impl Metrics {
660646 }
661647 }
662648
663- // Log models that are no longer active (worker count reset to 0, other metrics preserved)
664- for model_name in known_models. difference ( & current_models_with_workers) {
665- tracing:: debug!(
666- model = %model_name,
667- "Model no longer active (worker count reset to 0, other metrics preserved)"
668- ) ;
669- }
670-
671649 // Update our known models set
672650 known_models. extend ( current_models. iter ( ) . cloned ( ) ) ;
673651
0 commit comments