We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 9fb3ae4 commit 6273fe8Copy full SHA for 6273fe8
benchmarks/kernels/benchmark_w8a8_block_fp8.py
@@ -14,7 +14,7 @@
14
from tqdm import tqdm
15
16
from vllm.model_executor.layers.quantization.utils.fp8_utils import (
17
- _w8a8_block_fp8_matmul,
+ _w8a8_triton_block_scaled_mm,
18
)
19
from vllm.platforms import current_platform
20
from vllm.triton_utils import triton
@@ -83,7 +83,7 @@ def grid(META):
83
84
85
if A.dtype == torch.float8_e4m3fn:
86
- kernel = _w8a8_block_fp8_matmul
+ kernel = _w8a8_triton_block_scaled_mm
87
else:
88
raise RuntimeError("Currently, only support tune w8a8 block fp8 kernel.")
89
0 commit comments