@@ -119,6 +119,27 @@ def check_and_update_config(cls, vllm_config: VllmConfig) -> None:
119119 from vllm .config import CompilationLevel # noqa: E402
120120 compilation_config = vllm_config .compilation_config
121121 model_config = vllm_config .model_config
122+ additional_config = vllm_config .additional_config
123+ parallel_config = vllm_config .parallel_config
124+ cache_config = vllm_config .cache_config
125+
126+ if parallel_config :
127+ # Default value for expert tensor parallel size
128+ parallel_config .expert_tensor_parallel_size = 1
129+
130+ # NOTE: When enable_expert_parallel is True, we follow vLLM convention:
131+ # ep_size = world_size, which means expert_tensor_parallel_size must be 1
132+ if (additional_config
133+ and "expert_tensor_parallel_size" in additional_config
134+ and not parallel_config .enable_expert_parallel ):
135+ parallel_config .expert_tensor_parallel_size = int (
136+ additional_config ["expert_tensor_parallel_size" ])
137+
138+ # Calculate expert parallel size based on world size
139+ parallel_config .expert_parallel_size = (
140+ parallel_config .world_size //
141+ parallel_config .expert_tensor_parallel_size )
142+
122143
123144 if model_config is None :
124145 logger .warning ("Model config is missing. This may indicate "
@@ -127,8 +148,8 @@ def check_and_update_config(cls, vllm_config: VllmConfig) -> None:
127148 else :
128149 enforce_eager = getattr (model_config , "enforce_eager" , False )
129150
130- if vllm_config . additional_config is not None :
131- enable_graph_mode = vllm_config . additional_config .get (
151+ if additional_config is not None :
152+ enable_graph_mode = additional_config .get (
132153 "enable_graph_mode" , False )
133154 if enable_graph_mode :
134155 if enforce_eager :
@@ -139,7 +160,7 @@ def check_and_update_config(cls, vllm_config: VllmConfig) -> None:
139160 logger .warning (
140161 "NPU graph mode is still experimental and not supported for V1 without mla currently, "
141162 "it has been disabled automatically." )
142- vllm_config . additional_config ["enable_graph_mode" ] = False
163+ additional_config ["enable_graph_mode" ] = False
143164 if model_config :
144165 model_type = model_config .hf_config .model_type
145166 if "deepseek" not in model_type :
@@ -178,7 +199,6 @@ def check_and_update_config(cls, vllm_config: VllmConfig) -> None:
178199 ["vllm.unified_ascend_attention_with_output" ])
179200 update_aclgraph_sizes (vllm_config )
180201
181- parallel_config = vllm_config .parallel_config
182202 if parallel_config and parallel_config .worker_cls == "auto" :
183203 if envs .VLLM_USE_V1 :
184204 parallel_config .worker_cls = "vllm_ascend.worker.worker_v1.NPUWorker"
@@ -190,7 +210,6 @@ def check_and_update_config(cls, vllm_config: VllmConfig) -> None:
190210 else :
191211 parallel_config .worker_cls = "vllm_ascend.worker.worker.NPUWorker"
192212
193- cache_config = vllm_config .cache_config
194213 if cache_config :
195214 if cache_config .block_size is None :
196215 cache_config .block_size = 128
@@ -202,11 +221,10 @@ def check_and_update_config(cls, vllm_config: VllmConfig) -> None:
202221
203222 if envs .VLLM_USE_V1 :
204223 # Activate custom ops for v1.
205- vllm_config . compilation_config .custom_ops = ["all" ]
224+ compilation_config .custom_ops = ["all" ]
206225 # If ascend_scheduler_config exists in additional_config,
207226 # extents original scheduler_config to use AscendScheduler.
208227
209- additional_config = vllm_config .additional_config
210228 if additional_config and additional_config .get (
211229 "ascend_scheduler_config" , None ) is not None :
212230 additional_scheduler_config = additional_config .get (
0 commit comments