Remove invalid weight_dtype check

certainly-param · certainly-param · commit 151ea664de02 · 2025-09-30T04:41:53.000-04:00
Config doesn't have weight_dtype attribute. Error checking
is properly handled at kernel level in scaled_mm_helper.hpp

Signed-off-by: padg9912 &lt;phone.and.desktop@gmail.com&gt;
diff --git a/vllm/model_executor/layers/quantization/kernels/scaled_mm/cutlass.py b/vllm/model_executor/layers/quantization/kernels/scaled_mm/cutlass.py
@@ -28,15 +28,6 @@ def can_implement(
         if not current_platform.is_cuda():
             return False, "CutlassScaledMM requires running on CUDA."
 
-        # Blackwell doesn't support INT8
-        capability = current_platform.get_device_capability()
-        if capability is not None:
-            compute_cap = capability.to_int()
-            if compute_cap >= 100 and c.weight_dtype == torch.int8:
-                return False, (
-                    f"INT8 not supported on SM{compute_cap}. "
-                    f"Use FP8 quantization or older GPU architecture.")
-
         return True, None
 
     def process_weights_after_loading(self, layer: torch.nn.Module) -> None: