@@ -1585,7 +1585,7 @@ def try_get_generation_config(self) -> dict[str, Any]:
15851585 """
15861586 This method attempts to retrieve the non-default values of the
15871587 generation config for this model.
1588-
1588+
15891589 The generation config can contain information about special tokens, as
15901590 well as sampling parameters. Which is why this method exists separately
15911591 to `get_diff_sampling_param`.
@@ -2066,7 +2066,7 @@ class ParallelConfig:
20662066 and when data_parallel_size > 0. Enables running an AsyncLLM
20672067 and API server on a "per-node" basis where vLLM load balances
20682068 between local data parallel ranks, but an external LB balances
2069- between vLLM nodes/replicas. Set explicitly in conjunction with
2069+ between vLLM nodes/replicas. Set explicitly in conjunction with
20702070 --data-parallel-start-rank."""
20712071 enable_expert_parallel : bool = False
20722072 """Use expert parallelism instead of tensor parallelism for MoE layers."""
@@ -4358,12 +4358,20 @@ def __repr__(self) -> str:
43584358 "disabled_custom_ops" : True ,
43594359 "compilation_time" : True ,
43604360 "bs_to_padded_graph_size" : True ,
4361- "pass_config" : True ,
43624361 "traced_files" : True ,
43634362 "inductor_compile_config" : {
43644363 "post_grad_custom_post_pass" : True ,
43654364 },
43664365 }
4366+
4367+ # exclude default attr in pass_config
4368+ pass_config_exclude = {}
4369+ for attr , default_val in vars (PassConfig ()).items ():
4370+ if getattr (self .pass_config , attr ) == default_val :
4371+ pass_config_exclude [attr ] = True
4372+ if pass_config_exclude :
4373+ exclude ["pass_config" ] = pass_config_exclude
4374+
43674375 # The cast to string is necessary because Pydantic is mocked in docs
43684376 # builds and sphinx-argparse doesn't know the return type of decode()
43694377 return str (
0 commit comments