@@ -56,15 +56,20 @@ def auto_mock(module, attr, max_mocks=50):
5656 )
5757
5858
59- latency = auto_mock ("vllm.benchmarks" , "latency" )
60- serve = auto_mock ("vllm.benchmarks" , "serve" )
61- throughput = auto_mock ("vllm.benchmarks" , "throughput" )
59+ bench_latency = auto_mock ("vllm.benchmarks" , "latency" )
60+ bench_serve = auto_mock ("vllm.benchmarks" , "serve" )
61+ bench_sweep_plot = auto_mock ("vllm.benchmarks.sweep.plot" , "SweepPlotArgs" )
62+ bench_sweep_serve = auto_mock ("vllm.benchmarks.sweep.serve" , "SweepServeArgs" )
63+ bench_sweep_serve_sla = auto_mock (
64+ "vllm.benchmarks.sweep.serve_sla" , "SweepServeSLAArgs"
65+ )
66+ bench_throughput = auto_mock ("vllm.benchmarks" , "throughput" )
6267AsyncEngineArgs = auto_mock ("vllm.engine.arg_utils" , "AsyncEngineArgs" )
6368EngineArgs = auto_mock ("vllm.engine.arg_utils" , "EngineArgs" )
6469ChatCommand = auto_mock ("vllm.entrypoints.cli.openai" , "ChatCommand" )
6570CompleteCommand = auto_mock ("vllm.entrypoints.cli.openai" , "CompleteCommand" )
66- cli_args = auto_mock ("vllm.entrypoints.openai" , "cli_args" )
67- run_batch = auto_mock ("vllm.entrypoints.openai" , "run_batch" )
71+ openai_cli_args = auto_mock ("vllm.entrypoints.openai" , "cli_args" )
72+ openai_run_batch = auto_mock ("vllm.entrypoints.openai" , "run_batch" )
6873FlexibleArgumentParser = auto_mock (
6974 "vllm.utils.argparse_utils" , "FlexibleArgumentParser"
7075)
@@ -114,6 +119,9 @@ def add_arguments(self, actions):
114119 self ._markdown_output .append (f"{ action .help } \n \n " )
115120
116121 if (default := action .default ) != SUPPRESS :
122+ # Make empty string defaults visible
123+ if default == "" :
124+ default = '""'
117125 self ._markdown_output .append (f"Default: `{ default } `\n \n " )
118126
119127 def format_help (self ):
@@ -150,17 +158,23 @@ def on_startup(command: Literal["build", "gh-deploy", "serve"], dirty: bool):
150158
151159 # Create parsers to document
152160 parsers = {
161+ # Engine args
153162 "engine_args" : create_parser (EngineArgs .add_cli_args ),
154163 "async_engine_args" : create_parser (
155164 AsyncEngineArgs .add_cli_args , async_args_only = True
156165 ),
157- "serve" : create_parser (cli_args .make_arg_parser ),
166+ # CLI
167+ "serve" : create_parser (openai_cli_args .make_arg_parser ),
158168 "chat" : create_parser (ChatCommand .add_cli_args ),
159169 "complete" : create_parser (CompleteCommand .add_cli_args ),
160- "bench_latency" : create_parser (latency .add_cli_args ),
161- "bench_throughput" : create_parser (throughput .add_cli_args ),
162- "bench_serve" : create_parser (serve .add_cli_args ),
163- "run-batch" : create_parser (run_batch .make_arg_parser ),
170+ "run-batch" : create_parser (openai_run_batch .make_arg_parser ),
171+ # Benchmark CLI
172+ "bench_latency" : create_parser (bench_latency .add_cli_args ),
173+ "bench_serve" : create_parser (bench_serve .add_cli_args ),
174+ "bench_sweep_plot" : create_parser (bench_sweep_plot .add_cli_args ),
175+ "bench_sweep_serve" : create_parser (bench_sweep_serve .add_cli_args ),
176+ "bench_sweep_serve_sla" : create_parser (bench_sweep_serve_sla .add_cli_args ),
177+ "bench_throughput" : create_parser (bench_throughput .add_cli_args ),
164178 }
165179
166180 # Generate documentation for each parser
0 commit comments