Skip to content
4 changes: 2 additions & 2 deletions vllm/utils/deep_gemm.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ def _align(x: int, y: int) -> int:


# Taken from https://github.com/deepseek-ai/DeepGEMM/blob/dd6ed14acbc7445dcef224248a77ab4d22b5f240/deep_gemm/utils/math.py#L38
# TODO(wentao): optimize this function, using triton or cuda kernel
@torch.compile(dynamic=True, backend=current_platform.simple_compile_backend)
def per_block_cast_to_fp8(
x: torch.Tensor,
block_size: list[int] = DEFAULT_BLOCK_SIZE,
Expand Down Expand Up @@ -187,4 +187,4 @@ def should_use_deepgemm_for_fp8_linear(output_dtype: torch.dtype,
"is_deep_gemm_e8m0_used",
"is_deep_gemm_supported",
"should_use_deepgemm_for_fp8_linear",
]
]