triton-lang · ptillet · Apr 24, 2023 · Apr 11, 2023 · Apr 24, 2023 · Apr 24, 2023
@@ -87,6 +87,8 @@ class BlockedToMMA : public mlir::RewritePattern {
   mlir::LogicalResult
   matchAndRewrite(mlir::Operation *op,
                   mlir::PatternRewriter &rewriter) const override {
+    if (computeCapability < 70)
+      return failure();
     auto dotOp = cast<triton::DotOp>(op);
     // TODO: Check data-types and SM compatibility
     auto oldRetType = dotOp.getResult().getType().cast<RankedTensorType>();

@@ -3,6 +3,9 @@
 from functools import wraps
 from typing import List, Optional, Sequence, Tuple, TypeVar
 
+import torch
+
+import triton
 from . import core as tl
 from triton._C.libtriton.triton import ir
 
@@ -1180,6 +1183,14 @@ def dot(lhs: tl.tensor,
         allow_tf32: bool,
         out_dtype: tl.dtype,
         builder: ir.builder) -> tl.tensor:
+    if torch.version.hip is None:
+        device = triton.runtime.jit.get_current_device()
+        capability = triton.runtime.jit.get_device_capability(device)
+        capability = capability[0] * 10 + capability[1]
+        if capability < 70:
+            assert (
+                not rhs.dtype.is_fp16() and not rhs.dtype.is_fp8()
+            ), "Float8 and Float16 types are not supported for compute capability < 70 (use Float32 or above)"
     assert lhs.type.is_block() and rhs.type.is_block()
     assert lhs.dtype == rhs.dtype, "lhs and rhs must have the same dtype!"
     assert len(lhs.shape) == 2 and len(rhs.shape) == 2