@@ -40,6 +40,8 @@ def __init__(self, engine_index: int = 0):
4040 # TODO: Make the interval configurable.
4141 self .prefix_caching_metrics = PrefixCachingMetrics ()
4242 self .spec_decoding_metrics = SpecDecodingMetrics ()
43+ self .last_prompt_throughput : float = 0.0
44+ self .last_generation_throughput : float = 0.0
4345
4446 def _reset (self , now ):
4547 self .last_log_time = now
@@ -83,8 +85,17 @@ def log(self):
8385
8486 scheduler_stats = self .last_scheduler_stats
8587
88+ log_fn = logger .info
89+ if not any (
90+ (prompt_throughput , generation_throughput ,
91+ self .last_prompt_throughput , self .last_generation_throughput )):
92+ # Avoid log noise on an idle production system
93+ log_fn = logger .debug
94+ self .last_generation_throughput = generation_throughput
95+ self .last_prompt_throughput = prompt_throughput
96+
8697 # Format and print output.
87- logger . info (
98+ log_fn (
8899 "Engine %03d: "
89100 "Avg prompt throughput: %.1f tokens/s, "
90101 "Avg generation throughput: %.1f tokens/s, "
@@ -101,7 +112,7 @@ def log(self):
101112 )
102113
103114 if scheduler_stats .spec_decoding_stats is not None :
104- self .spec_decoding_metrics .log ()
115+ self .spec_decoding_metrics .log (log_fn = log_fn )
105116
106117
107118class PrometheusStatLogger (StatLoggerBase ):
0 commit comments