pytorch · Aidyn-A · Jan 27, 2025 · Jan 27, 2025 · Jan 27, 2025 · Jan 27, 2025
@@ -790,6 +790,9 @@ void check_inputs(
     const at::Tensor& scale_b,
     const std::optional<at::Tensor>& bias,
     const at::Tensor& out) {
+  auto dprops = at::cuda::getCurrentDeviceProperties();
+  TORCH_CHECK(dprops->major == 9, "f8f8bf16_rowwise is sm_90 specific.");
+
   TORCH_CHECK(a.is_cuda());
   TORCH_CHECK(a.device() == b.device());
   TORCH_CHECK(scale_a.device() == a.device());

diff --git a/test/test_matmul_cuda.py b/test/test_matmul_cuda.py
@@ -43,9 +43,11 @@
 
 _IS_SM8X = False
 _IS_SM9X = False
+_IS_SM10X = False
 if TEST_CUDA:
     _IS_SM8X = torch.cuda.get_device_capability(0)[0] == 8
     _IS_SM9X = torch.cuda.get_device_capability(0)[0] == 9
+    _IS_SM10X = torch.cuda.get_device_capability(0)[0] == 10
 
 # Protects against includes accidentally setting the default dtype
 assert torch.get_default_dtype() is torch.float32
@@ -659,18 +661,33 @@ def test_float8_error_messages(self, device) -> None:
                 out_dtype=torch.bfloat16,
             )
 
-        # Note re.compile is used, not re.escape. This is to accomodate fn vs fnuz type message.
-        with self.assertRaisesRegex(
-            RuntimeError,
-            r"Expected b\.dtype\(\) == at::kFloat8_e4m3fnu?z? to be true, but got false\.",
-        ):
-            torch._scaled_mm(
-                x_fp8,
-                y_fp8.to(e5m2_type),
-                scale_a=torch.ones((M, 1), device="cuda"),
-                scale_b=torch.ones((1, N), device="cuda"),
-                out_dtype=torch.bfloat16,
-            )
+        if _IS_SM10X:
+            with self.assertRaisesRegex(
+                RuntimeError,
+                re.escape(
+                    "f8f8bf16_rowwise is not implemented on sm_100 or later.",
+                ),
+            ):
+                torch._scaled_mm(
+                    x_fp8,
+                    y_fp8.to(e5m2_type),
+                    scale_a=torch.ones((M, 1), device="cuda"),
+                    scale_b=torch.ones((1, N), device="cuda"),
+                    out_dtype=torch.bfloat16,
+                )
+        else:
+            # Note re.compile is used, not re.escape. This is to accomodate fn vs fnuz type message.
+            with self.assertRaisesRegex(
+                RuntimeError,
+                r"Expected b\.dtype\(\) == at::kFloat8_e4m3fnu?z? to be true, but got false\.",
+            ):
+                torch._scaled_mm(
+                    x_fp8,
+                    y_fp8.to(e5m2_type),
+                    scale_a=torch.ones((M, 1), device="cuda"),
+                    scale_b=torch.ones((1, N), device="cuda"),
+                    out_dtype=torch.bfloat16,
+                )
 
     @unittest.skipIf(not PLATFORM_SUPPORTS_FP8 or IS_WINDOWS, f8_msg)
     @unittest.skipIf(not _IS_SM9X, "rowwise implementation is currently sm90 specific")