Skip to content

Commit 26df46e

Browse files
authored
[Misc] cli auto show default value (#15582)
Signed-off-by: reidliu41 <reid201711@gmail.com>
1 parent c3f687a commit 26df46e

File tree

4 files changed

+11
-22
lines changed

4 files changed

+11
-22
lines changed

vllm/benchmarks/serve.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -726,15 +726,13 @@ def add_cli_args(parser: argparse.ArgumentParser):
726726
default="ttft,tpot,itl",
727727
help="Comma-seperated list of selected metrics to report percentils. "
728728
"This argument specifies the metrics to report percentiles. "
729-
"Allowed metric names are \"ttft\", \"tpot\", \"itl\", \"e2el\". "
730-
"Default value is \"ttft,tpot,itl\".")
729+
"Allowed metric names are \"ttft\", \"tpot\", \"itl\", \"e2el\". ")
731730
parser.add_argument(
732731
"--metric-percentiles",
733732
type=str,
734733
default="99",
735734
help="Comma-seperated list of percentiles for selected metrics. "
736735
"To report 25-th, 50-th, and 75-th percentiles, use \"25,50,75\". "
737-
"Default value is \"99\". "
738736
"Use \"--percentile-metrics\" to select metrics.",
739737
)
740738
parser.add_argument(

vllm/engine/arg_utils.py

Lines changed: 8 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -322,9 +322,7 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
322322
parser.add_argument('--download-dir',
323323
type=nullable_str,
324324
default=EngineArgs.download_dir,
325-
help='Directory to download and load the weights, '
326-
'default to the default cache dir of '
327-
'huggingface.')
325+
help='Directory to download and load the weights.')
328326
parser.add_argument(
329327
'--load-format',
330328
type=str,
@@ -399,8 +397,7 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
399397
'Valid backend values are "xgrammar", "guidance", and "auto". '
400398
'With "auto", we will make opinionated choices based on request'
401399
'contents and what the backend libraries currently support, so '
402-
'the behavior is subject to change in each release. '
403-
'The default is xgrammar.')
400+
'the behavior is subject to change in each release.')
404401
parser.add_argument(
405402
'--logits-processor-pattern',
406403
type=nullable_str,
@@ -493,8 +490,7 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
493490
default=EngineArgs.prefix_caching_hash_algo,
494491
help="Set the hash algorithm for prefix caching. "
495492
"Options are 'builtin' (Python's built-in hash) or 'sha256' "
496-
"(collision resistant but with certain overheads). Defaults "
497-
"to 'builtin'.",
493+
"(collision resistant but with certain overheads).",
498494
)
499495
parser.add_argument('--disable-sliding-window',
500496
action='store_true',
@@ -568,9 +564,7 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
568564
type=int,
569565
default=EngineArgs.max_num_partial_prefills,
570566
help="For chunked prefill, the max number of concurrent \
571-
partial prefills."
572-
"Defaults to 1",
573-
)
567+
partial prefills.")
574568
parser.add_argument(
575569
"--max-long-partial-prefills",
576570
type=int,
@@ -579,15 +573,13 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
579573
"than --long-prefill-token-threshold that will be prefilled "
580574
"concurrently. Setting this less than --max-num-partial-prefills "
581575
"will allow shorter prompts to jump the queue in front of longer "
582-
"prompts in some cases, improving latency. Defaults to 1.")
576+
"prompts in some cases, improving latency.")
583577
parser.add_argument(
584578
"--long-prefill-token-threshold",
585579
type=float,
586580
default=EngineArgs.long_prefill_token_threshold,
587581
help="For chunked prefill, a request is considered long if the "
588-
"prompt is longer than this number of tokens. Defaults to 4%% of "
589-
"the model's context length.",
590-
)
582+
"prompt is longer than this number of tokens.")
591583
parser.add_argument('--max-num-seqs',
592584
type=int,
593585
default=EngineArgs.max_num_seqs,
@@ -739,8 +731,7 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
739731
type=int,
740732
default=EngineArgs.max_cpu_loras,
741733
help=('Maximum number of LoRAs to store in CPU memory. '
742-
'Must be >= than max_loras. '
743-
'Defaults to max_loras.'))
734+
'Must be >= than max_loras.'))
744735
parser.add_argument(
745736
'--fully-sharded-loras',
746737
action='store_true',
@@ -894,7 +885,7 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
894885
help='Set the lower bound threshold for the posterior '
895886
'probability of a token to be accepted. This threshold is '
896887
'used by the TypicalAcceptanceSampler to make sampling decisions '
897-
'during speculative decoding. Defaults to 0.09')
888+
'during speculative decoding.')
898889

899890
parser.add_argument(
900891
'--typical-acceptance-sampler-posterior-alpha',

vllm/entrypoints/openai/cli_args.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -247,7 +247,7 @@ def make_arg_parser(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
247247
default=None,
248248
help='Max number of prompt characters or prompt '
249249
'ID numbers being printed in log.'
250-
'\n\nDefault: Unlimited')
250+
' The default of None means unlimited.')
251251

252252
parser.add_argument(
253253
"--disable-fastapi-docs",

vllm/utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1212,7 +1212,7 @@ def __call__(self, parser, namespace, values, option_string=None):
12121212
"Expected 'true' or 'false'.")
12131213

12141214

1215-
class SortedHelpFormatter(argparse.HelpFormatter):
1215+
class SortedHelpFormatter(argparse.ArgumentDefaultsHelpFormatter):
12161216
"""SortedHelpFormatter that sorts arguments by their option strings."""
12171217

12181218
def add_arguments(self, actions):

0 commit comments

Comments
 (0)