@@ -101,8 +101,8 @@ class EngineArgs:
101101 tokenizer_mode : str = 'auto'
102102 trust_remote_code : bool = False
103103 allowed_local_media_path : str = ""
104- download_dir : Optional [str ] = None
105- load_format : str = 'auto'
104+ download_dir : Optional [str ] = LoadConfig . download_dir
105+ load_format : str = LoadConfig . load_format
106106 config_format : ConfigFormat = ConfigFormat .AUTO
107107 dtype : str = 'auto'
108108 kv_cache_dtype : str = 'auto'
@@ -174,8 +174,10 @@ class EngineArgs:
174174 ray_workers_use_nsight : bool = ParallelConfig .ray_workers_use_nsight
175175 num_gpu_blocks_override : Optional [int ] = None
176176 num_lookahead_slots : int = 0
177- model_loader_extra_config : Optional [dict ] = None
178- ignore_patterns : Optional [Union [str , List [str ]]] = None
177+ model_loader_extra_config : Optional [
178+ dict ] = LoadConfig .model_loader_extra_config
179+ ignore_patterns : Optional [Union [str ,
180+ List [str ]]] = LoadConfig .ignore_patterns
179181 preemption_mode : Optional [str ] = None
180182
181183 scheduler_delay_factor : float = 0.0
@@ -213,7 +215,7 @@ class EngineArgs:
213215 additional_config : Optional [Dict [str , Any ]] = None
214216 enable_reasoning : Optional [bool ] = None
215217 reasoning_parser : Optional [str ] = None
216- use_tqdm_on_load : bool = True
218+ use_tqdm_on_load : bool = LoadConfig . use_tqdm_on_load
217219
218220 def __post_init__ (self ):
219221 if not self .tokenizer :
@@ -234,9 +236,13 @@ def __post_init__(self):
234236 def add_cli_args (parser : FlexibleArgumentParser ) -> FlexibleArgumentParser :
235237 """Shared CLI arguments for vLLM engine."""
236238
239+ def is_type_in_union (cls : type [Any ], type : type [Any ]) -> bool :
240+ """Check if the class is a type in a union type."""
241+ return get_origin (cls ) is Union and type in get_args (cls )
242+
237243 def is_optional (cls : type [Any ]) -> bool :
238244 """Check if the class is an optional type."""
239- return get_origin (cls ) is Union and type (None ) in get_args ( cls )
245+ return is_type_in_union (cls , type (None ))
240246
241247 def get_kwargs (cls : type [Any ]) -> Dict [str , Any ]:
242248 cls_docs = get_attr_docs (cls )
@@ -255,6 +261,10 @@ def get_kwargs(cls: type[Any]) -> Dict[str, Any]:
255261 if is_optional (field .type ):
256262 kwargs [name ]["type" ] = nullable_str
257263 continue
264+ # Handle str in union fields
265+ if is_type_in_union (field .type , str ):
266+ kwargs [name ]["type" ] = str
267+ continue
258268 kwargs [name ]["type" ] = field .type
259269 return kwargs
260270
@@ -333,38 +343,23 @@ def get_kwargs(cls: type[Any]) -> Dict[str, Any]:
333343 "from directories specified by the server file system. "
334344 "This is a security risk. "
335345 "Should only be enabled in trusted environments." )
336- parser .add_argument ('--download-dir' ,
337- type = nullable_str ,
338- default = EngineArgs .download_dir ,
339- help = 'Directory to download and load the weights.' )
340- parser .add_argument (
341- '--load-format' ,
342- type = str ,
343- default = EngineArgs .load_format ,
344- choices = [f .value for f in LoadFormat ],
345- help = 'The format of the model weights to load.\n \n '
346- '* "auto" will try to load the weights in the safetensors format '
347- 'and fall back to the pytorch bin format if safetensors format '
348- 'is not available.\n '
349- '* "pt" will load the weights in the pytorch bin format.\n '
350- '* "safetensors" will load the weights in the safetensors format.\n '
351- '* "npcache" will load the weights in pytorch format and store '
352- 'a numpy cache to speed up the loading.\n '
353- '* "dummy" will initialize the weights with random values, '
354- 'which is mainly for profiling.\n '
355- '* "tensorizer" will load the weights using tensorizer from '
356- 'CoreWeave. See the Tensorize vLLM Model script in the Examples '
357- 'section for more information.\n '
358- '* "runai_streamer" will load the Safetensors weights using Run:ai'
359- 'Model Streamer.\n '
360- '* "bitsandbytes" will load the weights using bitsandbytes '
361- 'quantization.\n '
362- '* "sharded_state" will load weights from pre-sharded checkpoint '
363- 'files, supporting efficient loading of tensor-parallel models\n '
364- '* "gguf" will load weights from GGUF format files (details '
365- 'specified in https://github.com/ggml-org/ggml/blob/master/docs/gguf.md).\n '
366- '* "mistral" will load weights from consolidated safetensors files '
367- 'used by Mistral models.\n ' )
346+ # Model loading arguments
347+ load_kwargs = get_kwargs (LoadConfig )
348+ load_group = parser .add_argument_group (
349+ title = "LoadConfig" ,
350+ description = LoadConfig .__doc__ ,
351+ )
352+ load_group .add_argument ('--load-format' ,
353+ choices = [f .value for f in LoadFormat ],
354+ ** load_kwargs ["load_format" ])
355+ load_group .add_argument ('--download-dir' ,
356+ ** load_kwargs ["download_dir" ])
357+ load_group .add_argument ('--model-loader-extra-config' ,
358+ ** load_kwargs ["model_loader_extra_config" ])
359+ load_group .add_argument ('--use-tqdm-on-load' ,
360+ action = argparse .BooleanOptionalAction ,
361+ ** load_kwargs ["use_tqdm_on_load" ])
362+
368363 parser .add_argument (
369364 '--config-format' ,
370365 default = EngineArgs .config_format ,
@@ -770,14 +765,6 @@ def get_kwargs(cls: type[Any]) -> Dict[str, Any]:
770765 default = 1 ,
771766 help = ('Maximum number of forward steps per '
772767 'scheduler call.' ))
773- parser .add_argument (
774- '--use-tqdm-on-load' ,
775- dest = 'use_tqdm_on_load' ,
776- action = argparse .BooleanOptionalAction ,
777- default = EngineArgs .use_tqdm_on_load ,
778- help = 'Whether to enable/disable progress bar '
779- 'when loading model weights.' ,
780- )
781768
782769 parser .add_argument (
783770 '--multi-step-stream-outputs' ,
@@ -806,15 +793,6 @@ def get_kwargs(cls: type[Any]) -> Dict[str, Any]:
806793 default = None ,
807794 help = 'The configurations for speculative decoding.'
808795 ' Should be a JSON string.' )
809-
810- parser .add_argument ('--model-loader-extra-config' ,
811- type = nullable_str ,
812- default = EngineArgs .model_loader_extra_config ,
813- help = 'Extra config for model loader. '
814- 'This will be passed to the model loader '
815- 'corresponding to the chosen load_format. '
816- 'This should be a JSON string that will be '
817- 'parsed into a dictionary.' )
818796 parser .add_argument (
819797 '--ignore-patterns' ,
820798 action = "append" ,
0 commit comments