Skip to content

Commit b6a9207

Browse files
committed
fix torchair mtp
Signed-off-by: MengqingCao <cmq0113@163.com>
1 parent 9607aae commit b6a9207

File tree

2 files changed

+2
-1
lines changed

2 files changed

+2
-1
lines changed

tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_torchair_correctness.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,6 @@ def mtp_torchair_correctness(
9292
assert matches > int(0.66 * len(ref_outputs))
9393

9494

95-
@pytest.mark.skip("TODO: revert this skip")
9695
def test_mtp_torchair_correctness_piecewise(
9796
sampling_config: SamplingParams,
9897
model_name: str,

vllm_ascend/torchair/models/torchair_deepseek_mtp.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
import torch.nn as nn
2424
from transformers import PretrainedConfig
2525
from vllm.attention.backends.abstract import AttentionMetadata
26+
from vllm.compilation.decorators import support_torch_compile
2627
from vllm.config import CacheConfig, ModelConfig, VllmConfig
2728
from vllm.distributed import get_tensor_model_parallel_world_size
2829
from vllm.model_executor.layers.layernorm import RMSNorm
@@ -187,6 +188,7 @@ def compute_logits(
187188
return logits
188189

189190

191+
@support_torch_compile
190192
class TorchairDeepSeekMTP(DeepSeekMTP):
191193
# NOTE 1.The quantized MTP layer of deepseek on the NPU is not quantized;
192194
# NOTE 2.The description file generated by the current msmodelslim tool does not have

0 commit comments

Comments
 (0)