Revert "[MLIR][XeGPU] Adding XeGPU 2d block operators (#84692)" #85653

bviyer · 2024-03-18T15:48:21Z

This reverts commit daebe5c.

This commit causes the following asan issue:

<snip>/llvm-project/build/bin/mlir-opt <snip>/llvm-project/mlir/test/Dialect/XeGPU/XeGPUOps.mlir | <snip>/llvm-project/build/bin/FileCheck <snip>/llvm-project/mlir/test/Dialect/XeGPU/XeGPUOps.mlir
# executed command: <snip>/llvm-project/build/bin/mlir-opt <snip>/llvm-project/mlir/test/Dialect/XeGPU/XeGPUOps.mlir
# .---command stderr------------
# | =================================================================
# | ==2772558==ERROR: AddressSanitizer: stack-use-after-return on address 0x7fd2c2c42b90 at pc 0x55e406d54614 bp 0x7ffc810e4070 sp 0x7ffc810e4068
# | READ of size 8 at 0x7fd2c2c42b90 thread T0
# |     #0 0x55e406d54613 in operator()<long int const*> /usr/include/c++/13/bits/predefined_ops.h:318
# |     #1 0x55e406d54613 in __count_if<long int const*, __gnu_cxx::__ops::_Iter_pred<mlir::verifyListOfOperandsOrIntegers(Operation*, llvm::StringRef, unsigned int, llvm::ArrayRef<long int>, ValueRange)::<lambda(int64_t)> > > /usr/include/c++/13/bits/stl_algobase.h:2125
# |     #2 0x55e406d54613 in count_if<long int const*, mlir::verifyListOfOperandsOrIntegers(Operation*, 
...

This reverts commit daebe5c.

llvmbot · 2024-03-18T15:48:51Z

@llvm/pr-subscribers-mlir-gpu

@llvm/pr-subscribers-mlir

Author: Balaji V. Iyer. (bviyer)

Changes

This reverts commit daebe5c.

This commit causes the following asan issue:

&lt;snip&gt;/llvm-project/build/bin/mlir-opt &lt;snip&gt;/llvm-project/mlir/test/Dialect/XeGPU/XeGPUOps.mlir | &lt;snip&gt;/llvm-project/build/bin/FileCheck &lt;snip&gt;/llvm-project/mlir/test/Dialect/XeGPU/XeGPUOps.mlir
# executed command: &lt;snip&gt;/llvm-project/build/bin/mlir-opt &lt;snip&gt;/llvm-project/mlir/test/Dialect/XeGPU/XeGPUOps.mlir
# .---command stderr------------
# | =================================================================
# | ==2772558==ERROR: AddressSanitizer: stack-use-after-return on address 0x7fd2c2c42b90 at pc 0x55e406d54614 bp 0x7ffc810e4070 sp 0x7ffc810e4068
# | READ of size 8 at 0x7fd2c2c42b90 thread T0
# |     #<!-- -->0 0x55e406d54613 in operator()&lt;long int const*&gt; /usr/include/c++/13/bits/predefined_ops.h:318
# |     #<!-- -->1 0x55e406d54613 in __count_if&lt;long int const*, __gnu_cxx::__ops::_Iter_pred&lt;mlir::verifyListOfOperandsOrIntegers(Operation*, llvm::StringRef, unsigned int, llvm::ArrayRef&lt;long int&gt;, ValueRange)::&lt;lambda(int64_t)&gt; &gt; &gt; /usr/include/c++/13/bits/stl_algobase.h:2125
# |     #<!-- -->2 0x55e406d54613 in count_if&lt;long int const*, mlir::verifyListOfOperandsOrIntegers(Operation*, 
...

Patch is 36.31 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/85653.diff

8 Files Affected:

(modified) mlir/include/mlir/Dialect/XeGPU/IR/XeGPU.h (+1-6)
(modified) mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td (-61)
(modified) mlir/include/mlir/Dialect/XeGPU/IR/XeGPUDialect.td (+2-2)
(modified) mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td (+2-289)
(modified) mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td (+1-103)
(modified) mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp (+3-70)
(modified) mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp (+3-174)
(removed) mlir/test/Dialect/XeGPU/XeGPUOps.mlir (-62)

diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPU.h b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPU.h
index 87aabdc015fea5..7aaa4ecc7ee77a 100644
--- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPU.h
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPU.h
@@ -9,12 +9,7 @@
 #ifndef MLIR_DIALECT_XEGPU_IR_XEGPU_H
 #define MLIR_DIALECT_XEGPU_IR_XEGPU_H
 
-#include "mlir/Bytecode/BytecodeOpInterface.h"
-#include "mlir/IR/BuiltinTypes.h"
-#include "mlir/IR/Dialect.h"
-#include "mlir/Interfaces/ShapedOpInterfaces.h"
-#include "mlir/Interfaces/SideEffectInterfaces.h"
-#include "mlir/Interfaces/ViewLikeInterface.h"
+#include <mlir/IR/Dialect.h>
 
 namespace mlir {
 namespace xegpu {
diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td
index cd38549f1ccf43..bb325c272e3324 100644
--- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td
@@ -10,7 +10,6 @@
 #define MLIR_DIALECT_XEGPU_IR_XEGPUATTRS_TD
 
 include "mlir/Dialect/XeGPU/IR/XeGPUDialect.td"
-include "mlir/IR/EnumAttr.td"
 
 class XeGPUAttr<string name, string attrMnemonic, list<Trait> traits = [],
                 string baseCppClass = "::mlir::Attribute">
@@ -18,64 +17,4 @@ class XeGPUAttr<string name, string attrMnemonic, list<Trait> traits = [],
   let mnemonic = attrMnemonic;
 }
 
-def XeGPU_TensorDescAttr: XeGPUAttr<"TensorDesc", "tdesc_attr"> {
-  let parameters = (ins
-    OptionalParameter<"MemoryScopeAttr">: $memory_scope,
-    OptionalParameter<"IntegerAttr", "1">: $array_length,
-    OptionalParameter<"BoolAttr", "true">: $boundary_check
-  );
-
-  let builders = [
-    AttrBuilder<(ins
-      CArg<"xegpu::MemoryScope", "xegpu::MemoryScope::Global">:$memory_scope,
-      CArg<"int", "1">:$array_length,
-      CArg<"bool", "true">: $boundary_check
-    )>
-  ];
-
-  let assemblyFormat = "`<` struct(params) `>`";
-}
-
-//===----------------------------------------------------------------------===//
-// XeGPU Memory Scope Enums.
-//===----------------------------------------------------------------------===//
-def XeGPU_MemoryScopeGlobal: I32EnumAttrCase<"Global", 0, "global">;
-def XeGPU_MemoryScopeShared: I32EnumAttrCase<"SLM", 1, "slm">;
-def XeGPU_MemoryScope: I32EnumAttr<"MemoryScope", 
-      "The address space of the memory the tensor descritor is created for", 
-      [XeGPU_MemoryScopeGlobal, XeGPU_MemoryScopeShared]> {
-  let genSpecializedAttr = 0;
-  let cppNamespace = "::mlir::xegpu";
-}
-
-def XeGPU_MemoryScopeAttr: 
-  EnumAttr<XeGPU_Dialect, XeGPU_MemoryScope, "memory_scope"> {
-    let assemblyFormat = "$value";
-}
-
-//===----------------------------------------------------------------------===//
-// XeGPU Cache Enums.
-//===----------------------------------------------------------------------===//
-def XeGPU_CachePolicyCached:        I32EnumAttrCase<"CACHED", 0, "cached">;                    // valid for read and write
-def XeGPU_CachePolicyUncached:      I32EnumAttrCase<"UNCACHED", 1, "uncached">;                // valid for read and write
-def XeGPU_CachePolicyStreaming:     I32EnumAttrCase<"STREAMING", 2, "streaming">;              // valid for read only
-def XeGPU_CachePolicyInvalid:       I32EnumAttrCase<"READ_INVALIDATE", 3, "read_invalidate">;  // valid for read only
-def XeGPU_CachePolicyWriteBack:     I32EnumAttrCase<"WRITE_BACK", 4, "write_back">;            // valid for write only
-def XeGPU_CachePolicyWriteThrough:  I32EnumAttrCase<"WRITE_THROUGH", 5, "write_through">;      // valid for write only
-
-def XeGPU_CachePolicyEnums : I32EnumAttr<"CachePolicy", "Cache policy", 
-  [XeGPU_CachePolicyCached, XeGPU_CachePolicyUncached, 
-   XeGPU_CachePolicyStreaming, XeGPU_CachePolicyInvalid,
-   XeGPU_CachePolicyWriteBack, XeGPU_CachePolicyWriteThrough]> {
-  let genSpecializedAttr = 0;
-  let cppNamespace = "::mlir::xegpu";
-}
-
-def XeGPU_CacheHintAttr 
-  : EnumAttr<XeGPU_Dialect, XeGPU_CachePolicyEnums, "cache_hint"> {
-    let assemblyFormat = "`<` $value `>`";
-}
-
-
-
 #endif // MLIR_DIALECT_XEGPU_IR_XEGPUATTRS_TD
diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUDialect.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUDialect.td
index c2f09319c790e0..3851275ad30a0a 100644
--- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUDialect.td
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUDialect.td
@@ -23,8 +23,8 @@ def XeGPU_Dialect : Dialect {
       the lower-level GPU compiler.
     }];
 
-    let useDefaultTypePrinterParser = true;
-    let useDefaultAttributePrinterParser = true;
+    // let useDefaultTypePrinterParser = true;
+    // let useDefaultAttributePrinterParser = true;
 }
 
 #endif // MLIR_DIALECT_XEGPU_IR_XEGPUDIALECT_TD
diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
index 1f90dcb4bf55ad..5825ef9195b03f 100644
--- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
@@ -9,13 +9,10 @@
 #ifndef MLIR_DIALECT_XEGPU_IR_XEGPUOPS_TD
 #define MLIR_DIALECT_XEGPU_IR_XEGPUOPS_TD
 
-include "mlir/IR/AttrTypeBase.td"
 include "mlir/Dialect/XeGPU/IR/XeGPUAttrs.td"
 include "mlir/Dialect/XeGPU/IR/XeGPUDialect.td"
 include "mlir/Dialect/XeGPU/IR/XeGPUTypes.td"
-include "mlir/Interfaces/ShapedOpInterfaces.td"
-include "mlir/Interfaces/SideEffectInterfaces.td"
-include "mlir/Interfaces/ViewLikeInterface.td"
+
 
 // Base class for dialect operations. This operation inherits from the base
 // `Op` class in OpBase.td, and provides:
@@ -23,291 +20,7 @@ include "mlir/Interfaces/ViewLikeInterface.td"
 //   * The mnemonic for the operation, or the name without the dialect prefix.
 //   * A list of traits for the operation.
 class XeGPU_Op<string mnemonic, list<Trait> traits = []>:
-          Op<XeGPU_Dialect, mnemonic, traits> {
-
-  code extraBaseClassDeclaration = [{
-    void printProperties(::mlir::MLIRContext *ctx,
-            ::mlir::OpAsmPrinter &p, const Properties &prop) {
-      Attribute propAttr = getPropertiesAsAttr(ctx, prop);
-      if (propAttr)
-        p << "<" << propAttr << ">";
-    }
-
-    static ::mlir::ParseResult parseProperties(::mlir::OpAsmParser &parser,
-                                     ::mlir::OperationState &result) {
-      if (mlir::succeeded(parser.parseLess())) {
-        if (parser.parseAttribute(result.propertiesAttr) || parser.parseGreater())
-          return failure();
-      }
-      return success();
-    }
-
-  }];
-}
-
-
-def XeGPU_CreateNdDescOp: XeGPU_Op<"create_nd_tdesc", [Pure, ViewLikeOpInterface, 
-                        AttrSizedOperandSegments, OffsetSizeAndStrideOpInterface]> {
-
-  let summary = "Create nd-tensor descriptor operation";
-  let description = [{
-    The "create_nd_tdesc" operation creates a TensorDescType which represents
-    a sub-view of a 2D memory region (It can be extended to support n-D memory
-    region if needed in future). Elements in the subview continuous in each 
-    dimention. It encodes the following important information for supporting 
-    Intel hardware features:
-
-    * source: an object representing (starting address/pointer of) a 2D memory region. 
-        It can be either a 2D memref object, or simply a pointer represented by uint64_t type.
-        for the later case, the shape and layout information of the 2D memory region should 
-        be explicitly passed via `dynamic_shape` and `dynamic_strides` parameters.
-    * offsets: two index values represents offsets from the "source" at the each dimension 
-        at which the subview of the target memory will be created. It is encoded via two
-        variables, including "dynamic_offsets" and "static_offsets", such that it can
-        accept various forms, such as, operands (e.g., [%c0, %c]) and attributes (e.g., [2, 4])).
-    * shape: the shape information of the memory region pointed by the "source".  It is 
-        typically encoded via the MemRefType of the source, e.g., memref<4096x4096xf16>. 
-        But if "source" is simply a pointer represented as uint64_t type, or a memref 
-        type without shape information e.g., memref<?x?xf16>, the shape information has 
-        to be explicitly passed via the "dynamic_shape" argument. Currently "dynamic_shape" 
-        only accepts operands(e.g., [%c4096, %c4096]), not attributes(e.g., [4096, 4096]).
-    * strides: the strides of the memory region pointed by the "source". Similar to shape, 
-        it is typically encoded via the MemRefType of the source too. But if "source" is 
-        simply a pointer represented as uint64_t type, or a memref type without shape 
-        information e.g., memref<?x?xf16>, the strides information has to be explicitly 
-        passed via the "dynamic_strides" argument. And it currently only accepts operands two.
-
-    Example 1 (suppose the tensor shape inferred by the compiler is 8x16):
-    %0 = memref.alloc() : memref<1024x1024xf32>
-    %c0 = arith.constant 0 : index
-    %c1 = arith.constant 1 : index
-    %1 = xegpu.create_nd_tdesc %0[%c0, %c0]: memref<1024x1024xf32> -> TensorDesc<8x16xf32>
-
-    Example 2 (suppose the tensor shape inferred by the compiler is 8x16):
-    %0 = memref.alloc(%h, %w) : memref<?x?xf32>
-    %c0 = arith.constant 0 : index
-    %c1 = arith.constant 1 : index
-    %1 = xegpu.create_nd_tdesc %0[%c0, %c0], [%h, %w], [%w, %c1]: memref<?x?xf32> -> TensorDesc<8x16xf32>
-
-    Example 3 (suppose the tensor shape inferred by the compiler is 8x16):
-    %0 = ... : ui64
-    %c0 = arith.constant 0 : index
-    %c1 = arith.constant 1 : index
-    %1 = xegpu.create_nd_tdesc %0[%c0, %c0], [%h, %w], [%w, %c1]: ui64 -> TensorDesc<8x16xf32>
-  }];
-
-  let arguments = (ins 
-    XeGPU_BaseAddrType: $source, 
-    Variadic<Index>: $offsets, 
-    Variadic<Index>: $shape, 
-    Variadic<Index>: $strides,
-    DenseI64ArrayAttr: $static_offsets
-  );
-  let results = (outs XeGPU_TensorDesc: $TensorDesc);
-
-  let assemblyFormat = [{
-    $source ``
-    custom<DynamicIndexList>($offsets, $static_offsets)
-    (`,` `[` $shape^ `]` `,` `[` $strides `]`)?
-    attr-dict `:` type($source) `->` qualified(type($TensorDesc))
-  }];
-
-  let hasVerifier = 1;
-
-  let builders = [
-    OpBuilder<(ins "Type": $tdesc, "TypedValue<MemRefType>": $source, 
-                   "llvm::ArrayRef<OpFoldResult>": $offsets)>,
-
-    OpBuilder<(ins "Type": $tdesc, "TypedValue<IntegerType> ": $source, 
-                   "llvm::ArrayRef<OpFoldResult>": $offsets,
-                   "ValueRange": $shape, "ValueRange": $stride)>
-  ];
-
-  let extraClassDeclaration = extraBaseClassDeclaration # [{
-    /// Returns the type of the source memref operand.
-    Type getSourceType() {
-      return getSource().getType();
-    }
-
-    /// Returns the type of the result TensorDesc.
-    xegpu::TensorDescType getType() {
-      return getTensorDesc().getType();
-    }
-
-    /// Return the element type of the TensorDesc
-    Type getElementType() {
-      return getType().getElementType();
-    }
-
-    /// Return the shape of the TensorDesc
-    llvm::ArrayRef<int64_t> getTensorDescShape() {
-      return getType().getShape();
-    }
-
-    /// wrapper for matching with OffsetSizeAndStrideOpInterface
-    OperandRange getSizes() {
-      return getShape();
-    }
-
-    /// wrapper for matching with OffsetSizeAndStrideOpInterface
-    /// If source is IntegerType and `shape` is filled, it will 
-    /// return an array of ShapedType::kDynamic representing dynamic 
-    /// shape encoded in the `shape` argument will be used. Presence
-    /// of `shape` overides static shape from source memref type.
-    SmallVector<int64_t> getStaticSizes() {
-      if (getSourceType().isa<IntegerType>() || getShape().size()) {
-        auto dims = getMixedOffsets().size();
-        return SmallVector<int64_t>(dims, ShapedType::kDynamic);
-      }
-      auto memrefType = getSourceType().dyn_cast<MemRefType>();
-      return SmallVector<int64_t>(memrefType.getShape());
-    }
-
-    /// wrapper for matching with OffsetSizeAndStrideOpInterface
-    /// If source is IntegerType or `strides` is filled, it will 
-    /// return an array of ShapedType::kDynamic representing dynamic 
-    /// strides encoded in the `strides` argument will be used. Presence
-    /// of `strides` overides static strides from source memref type.
-    SmallVector<int64_t> getStaticStrides() {
-      if (getSourceType().isa<IntegerType>() || getStrides().size()) {
-        auto dims = getMixedOffsets().size();
-        return SmallVector<int64_t>(dims, ShapedType::kDynamic);
-      }
-      auto memrefType = getSourceType().dyn_cast<MemRefType>();
-      auto [strides, offset] = getStridesAndOffset(memrefType);
-      return strides;
-    }
-
-    /// Return the expected rank of each of the`static_offsets`, 
-    /// `static_shape` and `static_strides` attributes.
-    std::array<unsigned, 3> getArrayAttrMaxRanks() {
-      unsigned rank;
-      if (auto ty = getSourceType().dyn_cast<MemRefType>()) {
-        rank = ty.getRank();
-      } else {
-        rank = (unsigned)getMixedOffsets().size();
-      }
-      return {rank, rank, rank};
-    }
-    
-    /// Return the number of leading operands before the `offsets`, 
-    /// `shape` and `strides` operands.
-    static unsigned getOffsetSizeAndStrideStartOperandIndex() { return 1; }
-
-    mlir::Value getViewSource() { return getSource(); }
-  }];
-}
-
-def XeGPU_PrefetchNdOp : XeGPU_Op<"prefetch_nd", []> {
-  let summary = "prefetches a nD block to cache";
-  let description = [{
-    It issues an instruction to prefetch the data from memory to each 
-    level of the cache based on their cache policy.
-
-    Example:
-    ```
-      xegpu.prefetch_nd %tdesc {l1_hint = #xegpu.cache_hint<cached>, 
-                                l2_hint = #xegpu.cache_hint<cached>, 
-                                l3_hint = #xegpu.cache_hint<cached>}
-        : !xegpu.tensor_desc<8x16xf16>
-    ```
-
-  }];
-
-  let arguments = (ins XeGPU_TensorDesc: $TensorDesc,
-                       OptionalAttr<XeGPU_CacheHintAttr>: $l1_hint,
-                       OptionalAttr<XeGPU_CacheHintAttr>: $l2_hint,
-                       OptionalAttr<XeGPU_CacheHintAttr>: $l3_hint);
-                       
-  let extraClassDeclaration = extraBaseClassDeclaration;
-
-  let assemblyFormat = "$TensorDesc prop-dict attr-dict `:` qualified(type($TensorDesc))";
-}
-
-
-def XeGPU_LoadNdOp : XeGPU_Op<"load_nd"> {
-  let summary = "loads a n-D block from memory (represented by TensorDesc)" 
-                "to registers (represented by vector)";
-  let description = [{
-    LoadNdOp essentially mimics the hardware block read instruction to read 
-    a block of data from memory to register. It takes a set of optional cache 
-    hints for each level of cache, L1, L2 and L3. If hardware does not have a 
-    correspoding cache, Corresponding cache hint attribute will be masked.
-    vnni transform is an hardware feature for Intel GPU, which is used to 
-    do data packing during the load for B operand of matrix operation, if 
-    the bit width of the data type is less then 32 bits, e.g., fp16. And 
-    transpose is another Intel hardware feature, which will do transpose
-    operation when loading the data if the bit width of the data type is 
-    fp32 or fp64. It implies that vnni and transpose cannot exit at the 
-    same time.
-
-    Example:
-    ```
-      xegpu.load_nd %1 {transpose = [1, 0],
-                        l1_hint = #xegpu.cache_hint<cached>, 
-                        l2_hint = #xegpu.cache_hint<uncached>, 
-                        l3_hint = #xegpu.cache_hint<streaming>}
-              : !xegpu.tensor_desc<8x16xf32> -> vector<16x8xf32>
-    ```
-
-
-  }];
-
-  let arguments = (ins XeGPU_TensorDesc: $TensorDesc,
-                       OptionalAttr<I64Attr>: $vnni_axis,
-                       OptionalAttr<DenseI64ArrayAttr>: $transpose,
-                       OptionalAttr<XeGPU_CacheHintAttr>: $l1_hint,
-                       OptionalAttr<XeGPU_CacheHintAttr>: $l2_hint,
-                       OptionalAttr<XeGPU_CacheHintAttr>: $l3_hint);
-
-  let results = (outs XeGPU_ValueType: $value);
-
-  let extraClassDeclaration = extraBaseClassDeclaration # [{
-    VectorType getType() {
-      return llvm::dyn_cast<VectorType>(getValue().getType());
-    }
-
-    xegpu::TensorDescType getTensorDescType() {
-      return getTensorDesc().getType();
-    }
-  }];
-
-  let assemblyFormat = "$TensorDesc prop-dict attr-dict `:` qualified(type($TensorDesc)) `->` type($value)";
-  let hasVerifier = 1;
-}
-
-def XeGPU_StoreNdOp : XeGPU_Op<"store_nd", []> {
-  let summary = "stores a n-D block register region back to memory, currently only supports 2D";
-
-  let description = [{
-    StoreNdOp essentially mimics the hardware block write instruction io
-    write a block of data from register into the memory region as described 
-    by the TensorDesc. It takes a set of optional cache hints for each level 
-    of cache, L1, L2 and L3. If hardware does not have a correspoding cache, 
-    Corresponding cache hint attribute will be masked.
-
-    Example:
-    ```
-      xegpu.store_nd %3, %2 {l1_hint = #xegpu.cache_hint<uncached>,
-                             l2_hint = #xegpu.cache_hint<write_back>, 
-                             l3_hint = #xegpu.cache_hint<write_through>}
-                             : vector<8x16xf16>, !xegpu.tensor_desc<8x16xf16>
-    ```
-
-
-  }];
-
-  let arguments = (ins XeGPU_ValueType: $value,
-                       XeGPU_TensorDesc: $TensorDesc,
-                       OptionalAttr<XeGPU_CacheHintAttr>: $l1_hint,
-                       OptionalAttr<XeGPU_CacheHintAttr>: $l2_hint,
-                       OptionalAttr<XeGPU_CacheHintAttr>: $l3_hint);
-
-  let extraClassDeclaration = extraBaseClassDeclaration;
+          Op<XeGPU_Dialect, mnemonic, traits>;
 
-  let assemblyFormat = [{$value `,` $TensorDesc prop-dict attr-dict 
-                        `:` type($value) `,` qualified(type($TensorDesc))}];
-  let hasVerifier = 1;
-}
 
 #endif // MLIR_DIALECT_XEGPU_IR_XEGPUOPS_TD
diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td
index 19ac1693712dd8..1d75bb4e2906fe 100644
--- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td
@@ -9,9 +9,9 @@
 #ifndef MLIR_DIALECT_XEGPU_IR_XEGPUTYPES_TD
 #define MLIR_DIALECT_XEGPU_IR_XEGPUTYPES_TD
 
+include "mlir/IR/BuiltinTypes.td"
 include "mlir/Dialect/XeGPU/IR/XeGPUAttrs.td"
 include "mlir/Dialect/XeGPU/IR/XeGPUDialect.td"
-include "mlir/IR/BuiltinTypes.td"
 
 def XeGPU_IntType: AnyTypeOf<[I1, I8, I16, I32, I64, SI1, SI8, SI16, SI32, SI64, UI1, UI8, UI16, UI32, UI64]>;
 def XeGPU_FloatType: AnyTypeOf<[F16, F32, F64, BF16, TF32]>;
@@ -30,106 +30,4 @@ class XeGPUTypeDef<string name, string typeMnemonic, list<Trait> traits = [],
   let mnemonic = typeMnemonic;
 }
 
-def XeGPU_TensorDesc: XeGPUTypeDef<"TensorDesc", "tensor_desc",
-        [ShapedTypeInterface], "::mlir::TensorType"> {
-  let summary = "TensorDesc describing regions of interested data.";
-  let description = [{
-    TensorDesc is a type designed to describe regions of the interested data as well as some 
-    features that are unique to Intel hardware. Different with the builtin tensor type in MLIR, 
-    it essentially only contains the meta data, and doesn't hold the data by itself. It is designed 
-    to mainly support 2D block load/store and DPAS (matrix multiplication instruction) on Intel GPU. 
-    It encodes the following information:
-
-    * shape:  the sizes/shape of the intereted data block, e.g., 8x16 means 8 rows
-              and each row contains 16 contiguous data element. The rows could be
-              either contiguous or not, depends on whether the encoding attribute
-              is set or not.
-    * element_type: the data type of the data element, e.g., f16, f32.
-
-    Similar to the builtin tensor, it also provides an optinal attribute to encoding 
-    the following information via the TensorDescAttr object:
-    * memory_scope (xegpu::MemoryScope): [optional] where the data is located, 
-                global memory or shared memory. It is default to Global.
-    * array_length (int): [optional] The number of contiguous blocks with size as `shape`,
-               that will be loaded by block load at a time. It is default to 1.
-  ...
[truncated]

chencha3 · 2024-03-18T20:18:44Z

Hi @bviyer, do you mind sharing me instructions to reproduce this bug?

joker-eph · 2024-03-18T20:21:04Z

This seems like a regular ASAN failure, i would think a bot should have covered this? You didn’t get any email @chencha3 ?

chencha3 · 2024-03-18T21:06:42Z

no, I didn't get emails about this. I just double checked my mailbox. It looks like the bug is around here (https://github.com/llvm/llvm-project/blob/main/mlir/lib/Interfaces/ViewLikeInterface.cpp#L30), but I am not clear how it will generate stack-use-after-return error, since it only returns logic result, as well as its caller. @joker-eph

bviyer · 2024-03-18T21:06:52Z

I build LLVM (with mlir and clang) with the following cmake flags: cmake -DLLVM_USE_SANITIZER=Address -DLLVM_ENABLE_PROJECTS="mlir;clang" -DLLVM_TARGETS_TO_BUILD=X86 ../llvm -DCMAKE_BUILD_TYPE=Debug -G "Ninja" . After that, (with your patch already applied) I just did ninja check-mlir and you can see the failure.

Note: my build directory is in llvm-project/build

chencha3 · 2024-03-18T21:24:40Z

Thanks, let me have it a try. @bviyer

joker-eph · 2024-03-18T21:34:37Z

I am not clear how it will generate stack-use-after-return error, since it only returns logic result

Well ASAN here likely means that the staticVals array is out-of-scope, unfortunately we'd need to see more of the backtrace to figure where it comes from.

chencha3 · 2024-03-18T21:45:05Z

Hi @bviyer, I didn't reproduce it on the branch I submitted the PR. I am going to switch to the main branch and test it again. Here is the output using your instructions:

Total Discovered Tests: 2704
  Unsupported      :  479 (17.71%)
  Passed           : 2224 (82.25%)
  Expectedly Failed:    1 (0.04%)

chencha3 · 2024-03-18T22:21:06Z

Hi @bviyer, I switched to this commit ca04b56a8b26, the one just before your revert. I still cannot reproduce it with this output. could it be different compiler version? I am using gcc-11.4.0 on Ubuntu-22.04.

Total Discovered Tests: 2716
  Unsupported      :  480 (17.67%)
  Passed           : 2235 (82.29%)
  Expectedly Failed:    1 (0.04%)

bviyer · 2024-03-18T23:46:49Z

Here is what my sytem has:

c++ (Debian 13.2.0-10) 13.2.0
Copyright (C) 2023 Free Software Foundation, Inc.
This is free software; see the source for copying conditions.  There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.

bviyer · 2024-03-19T00:36:37Z

@chencha3 and @joker-eph , Here is the full call-stack for the crash (hope this helps):

FAIL: MLIR :: Dialect/XeGPU/XeGPUOps.mlir (95 of 2716)
******************** TEST 'MLIR :: Dialect/XeGPU/XeGPUOps.mlir' FAILED ********************
Exit Code: 2

Command Output (stdout):
--
# RUN: at line 1
/<snip>/llvm-project/build/bin/mlir-opt /<snip>/llvm-project/mlir/test/Dialect/XeGPU/XeGPUOps.mlir | /<snip>/llvm-project/build/bin/FileCheck /<snip>/llvm-project/mlir/test/Dialect/XeGPU/XeGPUOps.mlir
# executed command: /<snip>/llvm-project/build/bin/mlir-opt /<snip>/llvm-project/mlir/test/Dialect/XeGPU/XeGPUOps.mlir
# .---command stderr------------
# | =================================================================
# | ==2980947==ERROR: AddressSanitizer: stack-use-after-return on address 0x7fa11e242b90 at pc 0x555f76a1d614 bp 0x7fffa2f443f0 sp 0x7fffa2f443e8
# | READ of size 8 at 0x7fa11e242b90 thread T0
# |     #0 0x555f76a1d613 in operator()<long int const*> /usr/include/c++/13/bits/predefined_ops.h:318
# |     #1 0x555f76a1d613 in __count_if<long int const*, __gnu_cxx::__ops::_Iter_pred<mlir::verifyListOfOperandsOrIntegers(Operation*, llvm::StringRef, unsigned int, llvm::ArrayRef<long int>, ValueRange)::<lambda(int64_t)> > > /usr/include/c++/13/bits/stl_algobase.h:2125
# |     #2 0x555f76a1d613 in count_if<long int const*, mlir::verifyListOfOperandsOrIntegers(Operation*, llvm::StringRef, unsigned int, llvm::ArrayRef<long int>, ValueRange)::<lambda(int64_t)> > /usr/include/c++/13/bits/stl_algo.h:4104
# |     #3 0x555f76a1d613 in count_if<llvm::ArrayRef<long int>&, mlir::verifyListOfOperandsOrIntegers(Operation*, llvm::StringRef, unsigned int, llvm::ArrayRef<long int>, ValueRange)::<lambda(int64_t)> > /<snip>/llvm-project/llvm/include/llvm/ADT/STLExtras.h:1931
# |     #4 0x555f76a1d613 in mlir::verifyListOfOperandsOrIntegers(mlir::Operation*, llvm::StringRef, unsigned int, llvm::ArrayRef<long>, mlir::ValueRange) /<snip>/llvm-project/mlir/lib/Interfaces/ViewLikeInterface.cpp:30
# |     #5 0x555f76a2100e in mlir::detail::verifyOffsetSizeAndStrideOp(mlir::OffsetSizeAndStrideOpInterface) /<snip>/llvm-project/mlir/lib/Interfaces/ViewLikeInterface.cpp:63
# |     #6 0x555f69f7f570 in mlir::detail::OffsetSizeAndStrideOpInterfaceTrait<mlir::xegpu::CreateNdDescOp>::verifyTrait(mlir::Operation*) /<snip>/llvm-project/build/tools/mlir/include/mlir/Interfaces/ViewLikeInterface.h.inc:384
# |     #7 0x555f69f7f570 in std::enable_if<llvm::detail::detector<void, mlir::op_definition_impl::has_verify_trait, mlir::OffsetSizeAndStrideOpInterface::Trait<mlir::xegpu::CreateNdDescOp> >::value_t::value, mlir::LogicalResult>::type mlir::op_definition_impl::verifyTrait<mlir::OffsetSizeAndStrideOpInterface::Trait<mlir::xegpu::CreateNdDescOp> >(mlir::Operation*) /<snip>/llvm-project/mlir/include/mlir/IR/OpDefinition.h:1620
# |     #8 0x555f69f7f933 in mlir::LogicalResult mlir::op_definition_impl::verifyTraits<mlir::OpTrait::ZeroRegions<mlir::xegpu::CreateNdDescOp>, mlir::OpTrait::OneResult<mlir::xegpu::CreateNdDescOp>, mlir::OpTrait::OneTypedResult<mlir::xegpu::TensorDescType>::Impl<mlir::xegpu::CreateNdDescOp>, mlir::OpTrait::ZeroSuccessors<mlir::xegpu::CreateNdDescOp>, mlir::OpTrait::AtLeastNOperands<1u>::Impl<mlir::xegpu::CreateNdDescOp>, mlir::OpTrait::AttrSizedOperandSegments<mlir::xegpu::CreateNdDescOp>, mlir::OpTrait::OpInvariants<mlir::xegpu::CreateNdDescOp>, mlir::BytecodeOpInterface::Trait<mlir::xegpu::CreateNdDescOp>, mlir::ConditionallySpeculatable::Trait<mlir::xegpu::CreateNdDescOp>, mlir::OpTrait::AlwaysSpeculatableImplTrait<mlir::xegpu::CreateNdDescOp>, mlir::MemoryEffectOpInterface::Trait<mlir::xegpu::CreateNdDescOp>, mlir::ViewLikeOpInterface::Trait<mlir::xegpu::CreateNdDescOp>, mlir::OffsetSizeAndStrideOpInterface::Trait<mlir::xegpu::CreateNdDescOp> >(mlir::Operation*) /<snip>/llvm-project/mlir/include/mlir/IR/OpDefinition.h:1631
# |     #9 0x555f69f7fa2d in mlir::Op<mlir::xegpu::CreateNdDescOp, mlir::OpTrait::ZeroRegions, mlir::OpTrait::OneResult, mlir::OpTrait::OneTypedResult<mlir::xegpu::TensorDescType>::Impl, mlir::OpTrait::ZeroSuccessors, mlir::OpTrait::AtLeastNOperands<1u>::Impl, mlir::OpTrait::AttrSizedOperandSegments, mlir::OpTrait::OpInvariants, mlir::BytecodeOpInterface::Trait, mlir::ConditionallySpeculatable::Trait, mlir::OpTrait::AlwaysSpeculatableImplTrait, mlir::MemoryEffectOpInterface::Trait, mlir::ViewLikeOpInterface::Trait, mlir::OffsetSizeAndStrideOpInterface::Trait>::verifyInvariants(mlir::Operation*) /<snip>/llvm-project/mlir/include/mlir/IR/OpDefinition.h:2012
# |     #10 0x555f5e58bd44 in mlir::LogicalResult llvm::detail::UniqueFunctionBase<mlir::LogicalResult, mlir::Operation*>::CallImpl<mlir::LogicalResult (* const)(mlir::Operation*)>(void*, mlir::Operation*) /<snip>/llvm-project/llvm/include/llvm/ADT/FunctionExtras.h:221
# |     #11 0x555f5e64fe9a in llvm::unique_function<mlir::LogicalResult (mlir::Operation*) const>::operator()(mlir::Operation*) const /<snip>/llvm-project/llvm/include/llvm/ADT/FunctionExtras.h:411
# |     #12 0x555f69f7cb8d in mlir::RegisteredOperationName::Model<mlir::xegpu::CreateNdDescOp>::verifyInvariants(mlir::Operation*) /<snip>/llvm-project/mlir/include/mlir/IR/OperationSupport.h:558
# |     #13 0x555f6e15f782 in mlir::OperationName::verifyInvariants(mlir::Operation*) const /<snip>/llvm-project/mlir/include/mlir/IR/OperationSupport.h:317
# |     #14 0x555f6e15f782 in verifyOnEntrance /<snip>/llvm-project/mlir/lib/IR/Verifier.cpp:179
# |     #15 0x555f6e162d52 in operator()<mlir::Operation> /<snip>/llvm-project/mlir/lib/IR/Verifier.cpp:293
# |     #16 0x555f6e162d52 in operator()<(anonymous namespace)::OperationVerifier::verifyOperation(mlir::Operation&)::<lambda(auto:46*)> > /<snip>/llvm-project/mlir/lib/IR/Verifier.cpp:277
# |     #17 0x555f6e162d52 in verifyOperation /<snip>/llvm-project/mlir/lib/IR/Verifier.cpp:292
# |     #18 0x555f6e162d52 in verifyOpAndDominance /<snip>/llvm-project/mlir/lib/IR/Verifier.cpp:85
# |     #19 0x555f6e16d6fb in operator() /<snip>/llvm-project/mlir/lib/IR/Verifier.cpp:229
# |     #20 0x555f6e16d6fb in failableParallelForEach<mlir::Operation**, (anonymous namespace)::OperationVerifier::verifyOnExit(mlir::Operation&)::<lambda(mlir::Operation*)> > /<snip>/llvm-project/mlir/include/mlir/IR/Threading.h:46
# |     #21 0x555f6e16d6fb in failableParallelForEach<llvm::SmallVector<mlir::Operation*>&, (anonymous namespace)::OperationVerifier::verifyOnExit(mlir::Operation&)::<lambda(mlir::Operation*)> > /<snip>/llvm-project/mlir/include/mlir/IR/Threading.h:92
# |     #22 0x555f6e16d6fb in verifyOnExit /<snip>/llvm-project/mlir/lib/IR/Verifier.cpp:227
# |     #23 0x555f6e16330d in operator()<mlir::Operation> /<snip>/llvm-project/mlir/lib/IR/Verifier.cpp:286
# |     #24 0x555f6e16330d in operator()<(anonymous namespace)::OperationVerifier::verifyOperation(mlir::Operation&)::<lambda(auto:45*)> > /<snip>/llvm-project/mlir/lib/IR/Verifier.cpp:277
# |     #25 0x555f6e16330d in verifyOperation /<snip>/llvm-project/mlir/lib/IR/Verifier.cpp:285
# |     #26 0x555f6e16330d in verifyOpAndDominance /<snip>/llvm-project/mlir/lib/IR/Verifier.cpp:85
# |     #27 0x555f6e16d6fb in operator() /<snip>/llvm-project/mlir/lib/IR/Verifier.cpp:229
# |     #28 0x555f6e16d6fb in failableParallelForEach<mlir::Operation**, (anonymous namespace)::OperationVerifier::verifyOnExit(mlir::Operation&)::<lambda(mlir::Operation*)> > /<snip>/llvm-project/mlir/include/mlir/IR/Threading.h:46
# |     #29 0x555f6e16d6fb in failableParallelForEach<llvm::SmallVector<mlir::Operation*>&, (anonymous namespace)::OperationVerifier::verifyOnExit(mlir::Operation&)::<lambda(mlir::Operation*)> > /<snip>/llvm-project/mlir/include/mlir/IR/Threading.h:92
# |     #30 0x555f6e16d6fb in verifyOnExit /<snip>/llvm-project/mlir/lib/IR/Verifier.cpp:227
# |     #31 0x555f6e16330d in operator()<mlir::Operation> /<snip>/llvm-project/mlir/lib/IR/Verifier.cpp:286
# |     #32 0x555f6e16330d in operator()<(anonymous namespace)::OperationVerifier::verifyOperation(mlir::Operation&)::<lambda(auto:45*)> > /<snip>/llvm-project/mlir/lib/IR/Verifier.cpp:277
# |     #33 0x555f6e16330d in verifyOperation /<snip>/llvm-project/mlir/lib/IR/Verifier.cpp:285
# |     #34 0x555f6e16330d in verifyOpAndDominance /<snip>/llvm-project/mlir/lib/IR/Verifier.cpp:85
# |     #35 0x555f6e16c2a3 in mlir::verify(mlir::Operation*, bool) /<snip>/llvm-project/mlir/lib/IR/Verifier.cpp:423
# |     #36 0x555f766d5ea5 in finalize /<snip>/llvm-project/mlir/lib/AsmParser/Parser.cpp:842
# |     #37 0x555f766d5ea5 in parse /<snip>/llvm-project/mlir/lib/AsmParser/Parser.cpp:2736
# |     #38 0x555f766d5ea5 in mlir::parseAsmSourceFile(llvm::SourceMgr const&, mlir::Block*, mlir::ParserConfig const&, mlir::AsmParserState*, mlir::AsmParserCodeCompleteContext*) /<snip>/llvm-project/mlir/lib/AsmParser/Parser.cpp:2790
# |     #39 0x555f6d85ca8f in mlir::parseSourceFile(std::shared_ptr<llvm::SourceMgr> const&, mlir::Block*, mlir::ParserConfig const&, mlir::LocationAttr*) /<snip>/llvm-project/mlir/lib/Parser/Parser.cpp:46
# |     #40 0x555f6d85284b in mlir::OwningOpRef<mlir::ModuleOp> mlir::detail::parseSourceFile<mlir::ModuleOp, std::shared_ptr<llvm::SourceMgr> const&>(mlir::ParserConfig const&, std::shared_ptr<llvm::SourceMgr> const&) /<snip>/llvm-project/mlir/include/mlir/Parser/Parser.h:159
# |     #41 0x555f6d85284b in mlir::OwningOpRef<mlir::ModuleOp> mlir::parseSourceFile<mlir::ModuleOp>(std::shared_ptr<llvm::SourceMgr> const&, mlir::ParserConfig const&) /<snip>/llvm-project/mlir/include/mlir/Parser/Parser.h:189
# |     #42 0x555f6d85284b in mlir::parseSourceFileForTool(std::shared_ptr<llvm::SourceMgr> const&, mlir::ParserConfig const&, bool) /<snip>/llvm-project/mlir/include/mlir/Tools/ParseUtilities.h:31
# |     #43 0x555f6d85284b in performActions /<snip>/llvm-project/mlir/lib/Tools/mlir-opt/MlirOptMain.cpp:383
# |     #44 0x555f6d855f0a in processBuffer /<snip>/llvm-project/mlir/lib/Tools/mlir-opt/MlirOptMain.cpp:472
# |     #45 0x555f6d85647d in operator() /<snip>/llvm-project/mlir/lib/Tools/mlir-opt/MlirOptMain.cpp:547
# |     #46 0x555f6d85647d in callback_fn<mlir::MlirOptMain(llvm::raw_ostream&, std::unique_ptr<llvm::MemoryBuffer>, DialectRegistry&, const MlirOptMainConfig&)::<lambda(std::unique_ptr<llvm::MemoryBuffer>, llvm::raw_ostream&)> > /<snip>/llvm-project/llvm/include/llvm/ADT/STLFunctionalExtras.h:45
# |     #47 0x555f6dc1ae36 in llvm::function_ref<mlir::LogicalResult (std::unique_ptr<llvm::MemoryBuffer, std::default_delete<llvm::MemoryBuffer> >, llvm::raw_ostream&)>::operator()(std::unique_ptr<llvm::MemoryBuffer, std::default_delete<llvm::MemoryBuffer> >, llvm::raw_ostream&) const /<snip>/llvm-project/llvm/include/llvm/ADT/STLFunctionalExtras.h:68
# |     #48 0x555f6dc1ae36 in mlir::splitAndProcessBuffer(std::unique_ptr<llvm::MemoryBuffer, std::default_delete<llvm::MemoryBuffer> >, llvm::function_ref<mlir::LogicalResult (std::unique_ptr<llvm::MemoryBuffer, std::default_delete<llvm::MemoryBuffer> >, llvm::raw_ostream&)>, llvm::raw_ostream&, llvm::StringRef, llvm::StringRef) /<snip>/llvm-project/mlir/lib/Support/ToolUtilities.cpp:28
# |     #49 0x555f6d83de07 in mlir::MlirOptMain(llvm::raw_ostream&, std::unique_ptr<llvm::MemoryBuffer, std::default_delete<llvm::MemoryBuffer> >, mlir::DialectRegistry&, mlir::MlirOptMainConfig const&) /<snip>/llvm-project/mlir/lib/Tools/mlir-opt/MlirOptMain.cpp:550
# |     #50 0x555f6d856b5a in mlir::MlirOptMain(int, char**, llvm::StringRef, llvm::StringRef, mlir::DialectRegistry&) /<snip>/llvm-project/mlir/lib/Tools/mlir-opt/MlirOptMain.cpp:588
# |     #51 0x555f6d857f94 in mlir::MlirOptMain(int, char**, llvm::StringRef, mlir::DialectRegistry&) /<snip>/llvm-project/mlir/lib/Tools/mlir-opt/MlirOptMain.cpp:604
# |     #52 0x555f5e1889d2 in main /<snip>/llvm-project/mlir/tools/mlir-opt/mlir-opt.cpp:305
# |     #53 0x7fa1200456c9 in __libc_start_call_main ../sysdeps/nptl/libc_start_call_main.h:58
# |     #54 0x7fa120045784 in __libc_start_main_impl ../csu/libc-start.c:360
# |     #55 0x555f5e188520 in _start (/<snip>/llvm-project/build/bin/mlir-opt+0x2f46520) (BuildId: b7e6261e7c5460d2b9d33d0c9288640d4941bca8)
# |
# | Address 0x7fa11e242b90 is located in stack of thread T0 at offset 144 in frame
# |     #0 0x555f69f6e835 in mlir::detail::OffsetSizeAndStrideOpInterfaceInterfaceTraits::Model<mlir::xegpu::CreateNdDescOp>::getStaticSizes(mlir::detail::OffsetSizeAndStrideOpInterfaceInterfaceTraits::Concept const*, mlir::Operation*) /<snip>/llvm-project/build/tools/mlir/include/mlir/Interfaces/ViewLikeInterface.h.inc:426
# |
# |   This frame has 4 object(s):
# |     [32, 40) '<unknown>'
# |     [64, 72) '<unknown>'
# |     [96, 112) '<unknown>'
# |     [128, 192) '<unknown>' <== Memory access at offset 144 is inside this variable
# | HINT: this may be a false positive if your program uses some custom stack unwind mechanism, swapcontext or vfork
# |       (longjmp and C++ exceptions *are* supported)
# | SUMMARY: AddressSanitizer: stack-use-after-return /usr/include/c++/13/bits/predefined_ops.h:318 in operator()<long int const*>
# | Shadow bytes around the buggy address:
# |   0x7fa11e242900: f5 f5 f5 f5 f5 f5 f5 f5 f5 f5 f5 f5 f5 f5 f5 f5
# |   0x7fa11e242980: f5 f5 f5 f5 f5 f5 f5 f5 00 00 00 00 00 00 00 00
# |   0x7fa11e242a00: f5 f5 f5 f5 f5 f5 f5 f5 f5 f5 f5 f5 f5 f5 f5 f5
# |   0x7fa11e242a80: f5 f5 f5 f5 f5 f5 f5 f5 00 00 00 00 00 00 00 00
# |   0x7fa11e242b00: f5 f5 f5 f5 f5 f5 f5 f5 f5 f5 f5 f5 f5 f5 f5 f5
# | =>0x7fa11e242b80: f5 f5[f5]f5 f5 f5 f5 f5 f5 f5 f5 f5 00 00 00 00
# |   0x7fa11e242c00: f5 f5 f5 f5 f5 f5 f5 f5 f5 f5 f5 f5 f5 f5 f5 f5
# |   0x7fa11e242c80: f5 f5 f5 f5 f5 f5 f5 f5 00 00 00 00 00 00 00 00
# |   0x7fa11e242d00: f5 f5 f5 f5 f5 f5 f5 f5 f5 f5 f5 f5 f5 f5 f5 f5
# |   0x7fa11e242d80: f5 f5 f5 f5 f5 f5 f5 f5 00 00 00 00 00 00 00 00
# |   0x7fa11e242e00: f5 f5 f5 f5 f5 f5 f5 f5 f5 f5 f5 f5 f5 f5 f5 f5
# | Shadow byte legend (one shadow byte represents 8 application bytes):
# |   Addressable:           00
# |   Partially addressable: 01 02 03 04 05 06 07
# |   Heap left redzone:       fa
# |   Freed heap region:       fd
# |   Stack left redzone:      f1
# |   Stack mid redzone:       f2
# |   Stack right redzone:     f3
# |   Stack after return:      f5
# |   Stack use after scope:   f8
# |   Global redzone:          f9
# |   Global init order:       f6
# |   Poisoned by user:        f7
# |   Container overflow:      fc
# |   Array cookie:            ac
# |   Intra object redzone:    bb
# |   ASan internal:           fe
# |   Left alloca redzone:     ca
# |   Right alloca redzone:    cb
# | ==2980947==ABORTING
# `-----------------------------
# error: command failed with exit status: 1
# executed command: /<snip>/llvm-project/build/bin/FileCheck /<snip>/llvm-project/mlir/test/Dialect/XeGPU/XeGPUOps.mlir
# .---command stderr------------
# | FileCheck error: '<stdin>' is empty.
# | FileCheck command line:  /<snip>/llvm-project/build/bin/FileCheck /<snip>/llvm-project/mlir/test/Dialect/XeGPU/XeGPUOps.mlir
# `-----------------------------
# error: command failed with exit status: 2

--

********************
********************
Failed Tests (1):
  MLIR :: Dialect/XeGPU/XeGPUOps.mlir


Testing Time: 20.12s

Total Discovered Tests: 2716
  Unsupported      :  480 (17.67%)
  Passed           : 2234 (82.25%)
  Expectedly Failed:    1 (0.04%)
  Failed           :    1 (0.04%)
FAILED: tools/mlir/test/CMakeFiles/check-mlir /<snip>/llvm-project/build/tools/mlir/test/CMakeFiles/check-mlir
cd /<snip>/llvm-project/build/tools/mlir/test && /usr/bin/python3 /<snip>/llvm-project/build/./bin/llvm-lit -sv /<snip>/llvm-project/build/tools/mlir/test
ninja: build stopped: subcommand failed.

rengolin · 2024-03-19T14:40:22Z

I don't see how this is an error in this patch. That's a genuine usage of llvm::count_if, no captures, just return true/false, which just uses std::count_if.

I believe this is a bug (or false positive) in the (GNU) STL implementation, not in the code in this patch.

rengolin · 2024-03-19T14:44:45Z

Has this issue been seen in other ASAN buildbots or just this one? Can we have a bit more context here, please?

I'm a strong believer of keeping the bots green, but this issue is not a trivial thing to fix.

Meanwhile, @chencha3, you can replace that count_if with a simple loop and get the patch merged, so that we can progress on the XeGPU side and let the Clang/C++/GNU people fix the other bug. When you do, please add a comment that you're not using count_if because of this issue (reference the PR so that people get the reference).

d0k · 2024-03-19T14:49:09Z

Has this issue been seen in other ASAN buildbots or just this one? Can we have a bit more context here, please?

I'm a strong believer of keeping the bots green, but this issue is not a trivial thing to fix.

Meanwhile, @chencha3, you can replace that count_if with a simple loop and get the patch merged, so that we can progress on the XeGPU side and let the Clang/C++/GNU people fix the other bug. When you do, please add a comment that you're not using count_if because of this issue (reference the PR so that people get the reference).

I know that asan reports can be hard to read, but this is really a mismatch between ArrayRef and SmallVector on the definition of getStaticSizes. It has nothing to do with count_if.

chencha3 · 2024-03-19T15:20:32Z

Thanks @bviyer, @d0k, @joker-eph, @rengolin for your help. Yes, it is a bug as described by @d0k. It is my bad. I missed the implementation details about OffsetSizeAndStrideOpInterface::getStaticSizes, it returns ArrayRef, so value is dead there. So, I refined my implementation.

chencha3 · 2024-03-19T15:43:20Z

I created another PR (#85804) with the fix. @d0k, @rengolin, @joker-eph, @bviyer

joker-eph · 2024-03-20T06:08:26Z

If you fixed the ASAN failure, you can land this, you don't need a new review to re-land a reverted patch.

…lvm#85653) This reverts commit daebe5c. This commit causes the following asan issue: ``` <snip>/llvm-project/build/bin/mlir-opt <snip>/llvm-project/mlir/test/Dialect/XeGPU/XeGPUOps.mlir | <snip>/llvm-project/build/bin/FileCheck <snip>/llvm-project/mlir/test/Dialect/XeGPU/XeGPUOps.mlir # executed command: <snip>/llvm-project/build/bin/mlir-opt <snip>/llvm-project/mlir/test/Dialect/XeGPU/XeGPUOps.mlir # .---command stderr------------ # | ================================================================= # | ==2772558==ERROR: AddressSanitizer: stack-use-after-return on address 0x7fd2c2c42b90 at pc 0x55e406d54614 bp 0x7ffc810e4070 sp 0x7ffc810e4068 # | READ of size 8 at 0x7fd2c2c42b90 thread T0 # | #0 0x55e406d54613 in operator()<long int const*> /usr/include/c++/13/bits/predefined_ops.h:318 # | #1 0x55e406d54613 in __count_if<long int const*, __gnu_cxx::__ops::_Iter_pred<mlir::verifyListOfOperandsOrIntegers(Operation*, llvm::StringRef, unsigned int, llvm::ArrayRef<long int>, ValueRange)::<lambda(int64_t)> > > /usr/include/c++/13/bits/stl_algobase.h:2125 # | #2 0x55e406d54613 in count_if<long int const*, mlir::verifyListOfOperandsOrIntegers(Operation*, ... ```

Revert "[MLIR][XeGPU] Adding XeGPU 2d block operators (llvm#84692)"

7721656

This reverts commit daebe5c.

bviyer requested review from joker-eph and akuegel March 18, 2024 15:48

llvmbot added mlir:gpu mlir labels Mar 18, 2024

d0k approved these changes Mar 18, 2024

View reviewed changes

bviyer merged commit 8d14204 into llvm:main Mar 18, 2024

Revert "[MLIR][XeGPU] Adding XeGPU 2d block operators (#84692)" #85653

Revert "[MLIR][XeGPU] Adding XeGPU 2d block operators (#84692)" #85653

Uh oh!

Conversation

bviyer commented Mar 18, 2024

Uh oh!

llvmbot commented Mar 18, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

chencha3 commented Mar 18, 2024

Uh oh!

joker-eph commented Mar 18, 2024

Uh oh!

chencha3 commented Mar 18, 2024

Uh oh!

bviyer commented Mar 18, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

chencha3 commented Mar 18, 2024

Uh oh!

joker-eph commented Mar 18, 2024

Uh oh!

chencha3 commented Mar 18, 2024

Uh oh!

chencha3 commented Mar 18, 2024

Uh oh!

bviyer commented Mar 18, 2024

Uh oh!

bviyer commented Mar 19, 2024

Uh oh!

rengolin commented Mar 19, 2024

Uh oh!

rengolin commented Mar 19, 2024

Uh oh!

d0k commented Mar 19, 2024

Uh oh!

chencha3 commented Mar 19, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

chencha3 commented Mar 19, 2024

Uh oh!

joker-eph commented Mar 20, 2024

Uh oh!

Uh oh!

llvmbot commented Mar 18, 2024 •

edited

Loading

bviyer commented Mar 18, 2024 •

edited

Loading

chencha3 commented Mar 19, 2024 •

edited

Loading