Skip to content

Commit 151ea66

Browse files
Remove invalid weight_dtype check
Config doesn't have weight_dtype attribute. Error checking is properly handled at kernel level in scaled_mm_helper.hpp Signed-off-by: padg9912 <phone.and.desktop@gmail.com>
1 parent daaec73 commit 151ea66

File tree

1 file changed

+0
-9
lines changed
  • vllm/model_executor/layers/quantization/kernels/scaled_mm

1 file changed

+0
-9
lines changed

vllm/model_executor/layers/quantization/kernels/scaled_mm/cutlass.py

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -28,15 +28,6 @@ def can_implement(
2828
if not current_platform.is_cuda():
2929
return False, "CutlassScaledMM requires running on CUDA."
3030

31-
# Blackwell doesn't support INT8
32-
capability = current_platform.get_device_capability()
33-
if capability is not None:
34-
compute_cap = capability.to_int()
35-
if compute_cap >= 100 and c.weight_dtype == torch.int8:
36-
return False, (
37-
f"INT8 not supported on SM{compute_cap}. "
38-
f"Use FP8 quantization or older GPU architecture.")
39-
4031
return True, None
4132

4233
def process_weights_after_loading(self, layer: torch.nn.Module) -> None:

0 commit comments

Comments
 (0)