diff --git a/vllm/engine/metrics.py b/vllm/engine/metrics.py index 7c4265fac20b..0f79b7e79d38 100644 --- a/vllm/engine/metrics.py +++ b/vllm/engine/metrics.py @@ -140,16 +140,13 @@ def __init__(self, labelnames: List[str], vllm_config: VllmConfig): name="vllm:generation_tokens_total", documentation="Number of generation tokens processed.", labelnames=labelnames) - buckets = [1, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8096] - if not vllm_config.model_config.enforce_eager: - buckets = vllm_config.compilation_config.\ - cudagraph_capture_sizes.copy() - buckets.sort() self.histogram_iteration_tokens = self._histogram_cls( name="vllm:iteration_tokens_total", documentation="Histogram of number of tokens per engine_step.", labelnames=labelnames, - buckets=buckets) + buckets=[ + 1, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8096, 16192 + ]) self.histogram_time_to_first_token = self._histogram_cls( name="vllm:time_to_first_token_seconds", documentation="Histogram of time to first token in seconds.", diff --git a/vllm/v1/metrics/loggers.py b/vllm/v1/metrics/loggers.py index 22d1d9724c8c..e2e0b305e81f 100644 --- a/vllm/v1/metrics/loggers.py +++ b/vllm/v1/metrics/loggers.py @@ -232,7 +232,10 @@ def __init__(self, vllm_config: VllmConfig, engine_index: int = 0): prometheus_client.Histogram( name="vllm:iteration_tokens_total", documentation="Histogram of number of tokens per engine_step.", - buckets=build_cudagraph_buckets(vllm_config), + buckets=[ + 1, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8096, + 16192 + ], labelnames=labelnames).labels(*labelvalues) self.histogram_max_num_generation_tokens_request = \ @@ -467,16 +470,6 @@ def build_1_2_5_buckets(max_value: int) -> list[int]: return build_buckets([1, 2, 5], max_value) -def build_cudagraph_buckets(vllm_config: VllmConfig) -> list[int]: - if not vllm_config.model_config.enforce_eager: - buckets = vllm_config.compilation_config.\ - cudagraph_capture_sizes.copy() - buckets.sort() - return buckets - else: - return [1, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8096] - - def setup_default_loggers( vllm_config: VllmConfig, log_stats: bool,