Skip to content

Commit 1e3db7a

Browse files
authored
Merge pull request vllm-project#7 from wangxiyuan/upodate_model
Update qwen3 moe
2 parents 87f27ef + 22a69fc commit 1e3db7a

File tree

5 files changed

+1591
-746
lines changed

5 files changed

+1591
-746
lines changed

vllm_ascend/attention/attention_v1.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818
from dataclasses import dataclass
1919
from enum import Enum
20-
from typing import List, Optional, Tuple, Type, ClassVar
20+
from typing import ClassVar, List, Optional, Tuple, Type
2121

2222
import torch
2323
import torch.nn as nn
@@ -32,13 +32,12 @@
3232
from vllm.forward_context import ForwardContext, get_forward_context
3333
from vllm.utils import cdiv, direct_register_custom_op
3434
from vllm.v1.core.sched.output import SchedulerOutput
35+
from vllm.v1.kv_cache_interface import AttentionSpec
3536

3637
from vllm_ascend.attention.utils import AscendCommonAttentionMetadata
3738
from vllm_ascend.ops.attention import vanilla_chunked_prefill
3839
from vllm_ascend.utils import (ACL_FORMAT_FRACTAL_NZ, aligned_16, is_310p,
3940
nd_to_nz_2d, nd_to_nz_spec)
40-
from vllm_ascend.worker.npu_input_batch import InputBatch
41-
from vllm.v1.kv_cache_interface import AttentionSpec
4241

4342

4443
def wait_for_kv_layer_from_connector(layer_name: str):
@@ -582,7 +581,7 @@ def unified_ascend_attention_with_output(
582581
attn_metadata = attn_metadata[layer_name]
583582
self = forward_context.no_compile_layers[layer_name]
584583
kv_cache = self.kv_cache[forward_context.virtual_engine]
585-
print(100*"^", f"layer_name: {layer_name}")
584+
print(100 * "^", f"layer_name: {layer_name}")
586585
self.impl.forward(self,
587586
query,
588587
key,

vllm_ascend/models/__init__.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,3 +53,6 @@ def register_model():
5353
"PanguProMoEForCausalLM",
5454
"vllm_ascend.torchair.models.torchair_pangu_moe:PanguProMoEForCausalLM"
5555
)
56+
ModelRegistry.register_model(
57+
"Qwen3NextForCausalLM",
58+
"vllm_ascend.torchair.models.qwen3_next:Qwen3NextForCausalLM")

0 commit comments

Comments
 (0)