@@ -130,6 +130,27 @@ def check_and_update_config(cls, vllm_config: VllmConfig) -> None:
130130 enforce_eager = getattr (vllm_config .model_config , "enforce_eager" ,
131131 False )
132132
133+ additional_config = vllm_config .additional_config
134+ parallel_config = vllm_config .parallel_config
135+ cache_config = vllm_config .cache_config
136+
137+ if parallel_config :
138+ # Default value for expert tensor parallel size
139+ parallel_config .expert_tensor_parallel_size = 1
140+
141+ # NOTE: When enable_expert_parallel is True, we follow vLLM convention:
142+ # ep_size = world_size, which means expert_tensor_parallel_size must be 1
143+ if (additional_config
144+ and "expert_tensor_parallel_size" in additional_config
145+ and not parallel_config .enable_expert_parallel ):
146+ parallel_config .expert_tensor_parallel_size = int (
147+ additional_config ["expert_tensor_parallel_size" ])
148+
149+ # Calculate expert parallel size based on world size
150+ parallel_config .expert_parallel_size = (
151+ parallel_config .world_size //
152+ parallel_config .expert_tensor_parallel_size )
153+
133154 # TODO(Yizhou): Override the value of enforce_eager to True before
134155 # the CANN and torch_npu support NPU compilation.
135156 enforce_eager = True
@@ -157,21 +178,20 @@ def check_and_update_config(cls, vllm_config: VllmConfig) -> None:
157178 ["vllm.unified_ascend_attention_with_output" ])
158179 update_aclgraph_sizes (vllm_config )
159180
160- if vllm_config . additional_config is not None :
161- enable_graph_mode = vllm_config . additional_config .get (
162- "enable_graph_mode" , False )
181+ if additional_config is not None :
182+ enable_graph_mode = additional_config .get ("enable_graph_mode" ,
183+ False )
163184 if enable_graph_mode and not supports_dynamo ():
164185 logger .warning (
165186 "enable_graph_mode is not supported because the version of torch is too low, forcing close enable_graph_mode"
166187 )
167- vllm_config . additional_config ["enable_graph_mode" ] = False
188+ additional_config ["enable_graph_mode" ] = False
168189 if enable_graph_mode and envs .VLLM_USE_V1 and envs .VLLM_MLA_DISABLE :
169190 logger .warning (
170191 "NPU graph mode is still experimental and not supported for V1 without mla currently, "
171192 "it has been disabled automatically." )
172- vllm_config . additional_config ["enable_graph_mode" ] = False
193+ additional_config ["enable_graph_mode" ] = False
173194
174- parallel_config = vllm_config .parallel_config
175195 if parallel_config and parallel_config .worker_cls == "auto" :
176196 if envs .VLLM_USE_V1 :
177197 parallel_config .worker_cls = "vllm_ascend.worker.worker_v1.NPUWorker"
@@ -183,7 +203,6 @@ def check_and_update_config(cls, vllm_config: VllmConfig) -> None:
183203 else :
184204 parallel_config .worker_cls = "vllm_ascend.worker.worker.NPUWorker"
185205
186- cache_config = vllm_config .cache_config
187206 if cache_config :
188207 if cache_config .block_size is None :
189208 cache_config .block_size = 128
@@ -199,7 +218,6 @@ def check_and_update_config(cls, vllm_config: VllmConfig) -> None:
199218 # If ascend_scheduler_config exists in additional_config,
200219 # extents original scheduler_config to use AscendScheduler.
201220
202- additional_config = vllm_config .additional_config
203221 if additional_config and additional_config .get (
204222 "ascend_scheduler_config" , None ) is not None :
205223 additional_scheduler_config = additional_config .get (
0 commit comments