Skip to content

Commit 93b9570

Browse files
committed
switch to xpu kernel for w8a16 gemm (vllm-project#323)
Signed-off-by: Kunshang Ji <kunshang.ji@intel.com>
1 parent 87b1769 commit 93b9570

File tree

2 files changed

+4
-3
lines changed

2 files changed

+4
-3
lines changed

vllm/model_executor/layers/quantization/ipex_quant.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
from torch.nn import Module
99
from torch.nn.parameter import Parameter
1010

11-
from vllm._ipex_ops import ipex_ops as ops
11+
from vllm import _custom_ops as ops
1212
from vllm.model_executor.layers.fused_moe import (FusedMoEMethodBase,
1313
FusedMoeWeightScaleSupported)
1414
from vllm.model_executor.layers.fused_moe.config import FusedMoEQuantConfig
@@ -283,8 +283,8 @@ def apply(self,
283283
bias: Optional[torch.Tensor] = None) -> torch.Tensor:
284284
weight = layer.weight.data
285285
weight_scale = layer.weight_scale.data
286-
output = torch.ops.torch_ipex.fp8_gemm_w8a16(x, weight, True,
287-
weight_scale, bias)
286+
output = torch.ops._xpu_C.fp8_gemm_w8a16(x, weight, True, weight_scale,
287+
bias)
288288
return output
289289

290290

vllm/platforms/xpu.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import torch
88
# import custom ops, trigger op registration
99
import vllm_xpu_kernels._C # noqa
10+
import vllm_xpu_kernels._xpu_C # noqa
1011

1112
import vllm.envs as envs
1213
from vllm.logger import init_logger

0 commit comments

Comments
 (0)