Skip to content

Commit 03da02f

Browse files
committed
fix custom op register
Signed-off-by: jiang1.li <jiang1.li@intel.com>
1 parent 2e25bb1 commit 03da02f

File tree

1 file changed

+7
-6
lines changed

1 file changed

+7
-6
lines changed

vllm/model_executor/layers/quantization/utils/fp8_utils.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -201,12 +201,13 @@ def apply_w8a8_block_fp8_linear_fake(
201201
return torch.empty(output_shape, dtype=input.dtype, device=input.device)
202202

203203

204-
direct_register_custom_op(
205-
op_name="apply_w8a8_block_fp8_linear",
206-
op_func=apply_w8a8_block_fp8_linear,
207-
mutates_args=[],
208-
fake_impl=apply_w8a8_block_fp8_linear_fake,
209-
)
204+
if not current_platform.is_cpu():
205+
direct_register_custom_op(
206+
op_name="apply_w8a8_block_fp8_linear",
207+
op_func=apply_w8a8_block_fp8_linear,
208+
mutates_args=[],
209+
fake_impl=apply_w8a8_block_fp8_linear_fake,
210+
)
210211

211212

212213
def input_to_float8(

0 commit comments

Comments
 (0)