diff --git a/vllm/benchmarks/sweep/serve.py b/vllm/benchmarks/sweep/serve.py index 6e408dac0b49..a06d4d6d6098 100644 --- a/vllm/benchmarks/sweep/serve.py +++ b/vllm/benchmarks/sweep/serve.py @@ -66,6 +66,8 @@ def run_benchmark( ): benchmark_cmd = [ *bench_overrides.apply_to_cmd(bench_cmd), + "--percentile-metrics", + "ttft,tpot,itl,e2el", "--save-result", "--result-dir", str(output_path.parent), diff --git a/vllm/benchmarks/sweep/serve_sla.py b/vllm/benchmarks/sweep/serve_sla.py index 62e2917dc22b..6159aba4bbb5 100644 --- a/vllm/benchmarks/sweep/serve_sla.py +++ b/vllm/benchmarks/sweep/serve_sla.py @@ -401,7 +401,8 @@ class SweepServeSLAArgs(SweepServeArgs): @classmethod def from_cli_args(cls, args: argparse.Namespace): - base_args = super().from_cli_args(args) + # NOTE: Don't use super() as `from_cli_args` calls `cls()` + base_args = SweepServeArgs.from_cli_args(args) if args.sla_params: sla_params = SLASweep.read_json(args.sla_params)