|
15 | 15 | # This file is a part of the vllm-ascend project. |
16 | 16 | # |
17 | 17 |
|
18 | | -import torch |
19 | | -import torch_npu |
20 | | - |
21 | 18 | import vllm_ascend.ops.activation # noqa |
22 | 19 | import vllm_ascend.ops.fused_moe # noqa |
23 | 20 | import vllm_ascend.ops.layernorm # noqa |
24 | 21 | import vllm_ascend.ops.rotary_embedding # noqa |
25 | 22 | import vllm_ascend.ops.vocab_parallel_embedding # noqa |
26 | | - |
27 | | - |
28 | | -class dummyFusionOp: |
29 | | - default = None |
30 | | - |
31 | | - def __init__(self, name=""): |
32 | | - self.name = name |
33 | | - |
34 | | - |
35 | | -def register_dummy_fusion_op() -> None: |
36 | | - torch.cuda.CUDAGraph = torch_npu.npu.NPUGraph |
37 | | - torch.ops._C.rms_norm = dummyFusionOp(name="rms_norm") |
38 | | - torch.ops._C.fused_add_rms_norm = dummyFusionOp(name="fused_add_rms_norm") |
39 | | - torch.ops._C.static_scaled_fp8_quant = dummyFusionOp( |
40 | | - name="static_scaled_fp8_quant") |
41 | | - torch.ops._C.dynamic_scaled_fp8_quant = dummyFusionOp( |
42 | | - name="dynamic_scaled_fp8_quant") |
43 | | - torch.ops._C.dynamic_per_token_scaled_fp8_quant = dummyFusionOp( |
44 | | - name="dynamic_per_token_scaled_fp8_quant") |
45 | | - torch.ops._C.rms_norm_static_fp8_quant = dummyFusionOp( |
46 | | - name="rms_norm_static_fp8_quant") |
47 | | - torch.ops._C.fused_add_rms_norm_static_fp8_quant = dummyFusionOp( |
48 | | - name="fused_add_rms_norm_static_fp8_quant") |
49 | | - torch.ops._C.rms_norm_dynamic_per_token_quant = dummyFusionOp( |
50 | | - name="rms_norm_dynamic_per_token_quant") |
51 | | - torch.ops._C.rms_norm_dynamic_per_token_quant = dummyFusionOp( |
52 | | - name="rms_norm_dynamic_per_token_quant") |
0 commit comments