Skip to content

Commit 868aa2f

Browse files
author
weijinqian_v1
committed
[v0.9.1][Feature] add moe alltoallv.
Signed-off-by: weijinqian_v1 <weijinqian@huawei.com>
1 parent 55c2138 commit 868aa2f

File tree

4 files changed

+686
-4
lines changed

4 files changed

+686
-4
lines changed

vllm_ascend/attention/attention_v1.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
from vllm.utils import direct_register_custom_op
2929
from vllm.v1.core.sched.output import SchedulerOutput
3030

31+
from vllm_ascend.multistream.base import MSAttentionMetadataSplitConfig
3132
from vllm_ascend.ops.attention import vanilla_chunked_prefill
3233
from vllm_ascend.utils import (ACL_FORMAT_FRACTAL_NZ, aligned_16, is_310p,
3334
nd_to_nz_2d, nd_to_nz_spec)
@@ -150,6 +151,18 @@ class AscendMetadata:
150151
# (num_tokens,)
151152
slot_mapping: torch.Tensor = None
152153

154+
def split_metadata_for_multistream(
155+
self,
156+
ms_split_config: MSAttentionMetadataSplitConfig,
157+
) -> list["AscendMetadata"]:
158+
"""Split metadata for multi-stream with AscendMetadata"""
159+
from vllm_ascend.multistream.ms_split import model_input_split_v1_attn
160+
return model_input_split_v1_attn(
161+
ms_split_config=ms_split_config,
162+
attn_metadata=self,
163+
_metadata_cls=AscendMetadata,
164+
)
165+
153166

154167
class AscendAttentionMetadataBuilder:
155168

vllm_ascend/models/__init__.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,10 @@ def register_model():
4141
"DeepseekV3ForCausalLM",
4242
"vllm_ascend.models.deepseek_dbo:CustomDeepseekDBOForCausalLM")
4343

44+
ModelRegistry.register_model(
45+
"Qwen3MoeForCausalLM",
46+
"vllm_ascend.models.qwen3_dbo:CustomQwen3MoeForCausalLMDBO")
47+
4448
else:
4549
ModelRegistry.register_model(
4650
"DeepseekV2ForCausalLM",
@@ -50,9 +54,9 @@ def register_model():
5054
"DeepseekV3ForCausalLM",
5155
"vllm_ascend.models.deepseek_v3:CustomDeepseekV3ForCausalLM")
5256

53-
ModelRegistry.register_model(
54-
"Qwen3MoeForCausalLM",
55-
"vllm_ascend.models.qwen3_moe:CustomQwen3MoeForCausalLM")
57+
ModelRegistry.register_model(
58+
"Qwen3MoeForCausalLM",
59+
"vllm_ascend.models.qwen3_moe:CustomQwen3MoeForCausalLM")
5660

5761
ModelRegistry.register_model(
5862
"Qwen3ForCausalLM", "vllm_ascend.models.qwen3:CustomQwen3ForCausalLM")

0 commit comments

Comments
 (0)