2525 _get_default_timeout ,
2626 is_nccl_available )
2727from torch .distributed .rendezvous import rendezvous
28- from vllm .config import ParallelConfig
28+ from vllm .config import ParallelConfig , VllmConfig
29+ from vllm .v1 .engine .core import DPEngineCoreProc
2930
3031
3132def ascend_destroy_model_parallel ():
@@ -171,7 +172,7 @@ def parallel_config_get_dp_port(self) -> int:
171172 return port
172173
173174
174- def ascend_stateless_init_dp_group (self ) -> "ProcessGroup" :
175+ def stateless_init_dp_group (self ) -> "ProcessGroup" :
175176 # TODO(Yizhou): Currently we have to set the backend to gloo
176177 # because in vllm.config.ParallelConfig.has_unfinished_dp the
177178 # device is set to cpu. We need to fix this in the future.
@@ -187,6 +188,21 @@ def ascend_stateless_init_dp_group(self) -> "ProcessGroup":
187188 return dp_group
188189
189190
191+ def _init_data_parallel (self , vllm_config : VllmConfig ):
192+ # Configure NPUs and stateless process group for data parallel.
193+ dp_rank = vllm_config .parallel_config .data_parallel_rank
194+ dp_size = vllm_config .parallel_config .data_parallel_size
195+ local_dp_rank = vllm_config .parallel_config .data_parallel_rank_local
196+
197+ assert dp_size > 1
198+ assert 0 <= local_dp_rank <= dp_rank < dp_size
199+
200+ self .local_dp_rank = local_dp_rank
201+ self .dp_group = vllm_config .parallel_config .stateless_init_dp_group ()
202+ self .current_wave = 0
203+
204+
190205vllm .distributed .parallel_state .destroy_model_parallel = ascend_destroy_model_parallel
206+ DPEngineCoreProc ._init_data_parallel = _init_data_parallel
191207ParallelConfig .get_next_dp_init_port = parallel_config_get_dp_port
192- ParallelConfig .stateless_init_dp_group = ascend_stateless_init_dp_group
208+ ParallelConfig .stateless_init_dp_group = stateless_init_dp_group
0 commit comments