-
Notifications
You must be signed in to change notification settings - Fork 13.3k
[MLIR] XeGPU dialect for Intel GPU - core definitions and base classes #78483
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
Thank you for submitting a Pull Request (PR) to the LLVM Project! This PR will be automatically labeled and the relevant teams will be If you wish to, you can add reviewers by using the "Reviewers" section on this page. If this is not working for you, it is probably because you do not have write If you have received no comments on your PR for a week, you can request a review If you have further questions, they may be answered by the LLVM GitHub User Guide. You can also ask questions in a comment on this PR, on the LLVM Discord or on the forums. |
@llvm/pr-subscribers-mlir-gpu @llvm/pr-subscribers-mlir Author: None (chencha3) ChangesThis PR follows our previous RFC to add XeGPU dialect definition for Intel GPUs. It contains dialect, type, attributes and operators definitions, as well as testcases for semantic checks. The lowering and optimization passes will be issued with separated passes. Patch is 194.62 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/78483.diff 35 Files Affected:
diff --git a/mlir/include/mlir/Dialect/CMakeLists.txt b/mlir/include/mlir/Dialect/CMakeLists.txt
index 1c4569ecfa5848..e0eb421291ded7 100644
--- a/mlir/include/mlir/Dialect/CMakeLists.txt
+++ b/mlir/include/mlir/Dialect/CMakeLists.txt
@@ -39,3 +39,4 @@ add_subdirectory(UB)
add_subdirectory(Utils)
add_subdirectory(Vector)
add_subdirectory(X86Vector)
+add_subdirectory(XeGPU)
diff --git a/mlir/include/mlir/Dialect/XeGPU/CMakeLists.txt b/mlir/include/mlir/Dialect/XeGPU/CMakeLists.txt
new file mode 100644
index 00000000000000..f33061b2d87cff
--- /dev/null
+++ b/mlir/include/mlir/Dialect/XeGPU/CMakeLists.txt
@@ -0,0 +1 @@
+add_subdirectory(IR)
diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/CMakeLists.txt b/mlir/include/mlir/Dialect/XeGPU/IR/CMakeLists.txt
new file mode 100644
index 00000000000000..f1740e9ed929a6
--- /dev/null
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/CMakeLists.txt
@@ -0,0 +1,14 @@
+add_mlir_dialect(XeGPU xegpu)
+add_mlir_doc(XeGPU XeGPU Dialects/ -gen-dialect-doc -dialect=xegpu)
+
+set(LLVM_TARGET_DEFINITIONS XeGPU.td)
+mlir_tablegen(XeGPUAttrs.h.inc -gen-attrdef-decls)
+mlir_tablegen(XeGPUAttrs.cpp.inc -gen-attrdef-defs)
+add_public_tablegen_target(MLIRXeGPUAttrsIncGen)
+add_dependencies(mlir-headers MLIRXeGPUAttrsIncGen)
+
+set(LLVM_TARGET_DEFINITIONS XeGPU.td)
+mlir_tablegen(XeGPUEnums.h.inc -gen-enum-decls)
+mlir_tablegen(XeGPUEnums.cpp.inc -gen-enum-defs)
+add_public_tablegen_target(MLIRXeGPUEnumsIncGen)
+add_dependencies(mlir-headers MLIRXeGPUEnumsIncGen)
diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPU.h b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPU.h
new file mode 100644
index 00000000000000..a05e046a0e0c0b
--- /dev/null
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPU.h
@@ -0,0 +1,52 @@
+//===- XeGPU.h - MLIR dialect for XeGPU -------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_DIALECT_XEGPU_IR_XEGPU_H
+#define MLIR_DIALECT_XEGPU_IR_XEGPU_H
+
+#include <mlir/IR/BuiltinTypes.h>
+#include <mlir/IR/Dialect.h>
+#include <mlir/IR/OpDefinition.h>
+#include <mlir/IR/Region.h>
+#include <mlir/IR/Types.h>
+#include <mlir/Interfaces/CastInterfaces.h>
+#include <mlir/Interfaces/ControlFlowInterfaces.h>
+#include <mlir/Interfaces/CopyOpInterface.h>
+#include <mlir/Interfaces/InferTypeOpInterface.h>
+#include <mlir/Interfaces/ShapedOpInterfaces.h>
+#include <mlir/Interfaces/SideEffectInterfaces.h>
+#include <mlir/Interfaces/ViewLikeInterface.h>
+
+namespace mlir {
+
+/// Return the list of Range (i.e. offset, size, stride). Each Range
+/// entry contains either the dynamic value or a ConstantIndexOp constructed
+/// with `b` at location `loc`.
+SmallVector<Range, 8> getOrCreateRanges(OffsetSizeAndStrideOpInterface op,
+ OpBuilder &b, Location loc);
+
+} // namespace mlir
+
+namespace mlir {
+namespace xegpu {
+
+class TensorDescType;
+
+} // namespace xegpu
+} // namespace mlir
+
+#include <mlir/Dialect/XeGPU/IR/XeGPUDialect.h.inc>
+#include <mlir/Dialect/XeGPU/IR/XeGPUEnums.h.inc>
+#define GET_ATTRDEF_CLASSES
+#include <mlir/Dialect/XeGPU/IR/XeGPUAttrs.h.inc>
+#define GET_TYPEDEF_CLASSES
+#include <mlir/Dialect/XeGPU/IR/XeGPUTypes.h.inc>
+#define GET_OP_CLASSES
+#include <mlir/Dialect/XeGPU/IR/XeGPU.h.inc>
+
+#endif // MLIR_DIALECT_XEGPU_IR_XEGPU_H
diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPU.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPU.td
new file mode 100644
index 00000000000000..232e962870716c
--- /dev/null
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPU.td
@@ -0,0 +1,14 @@
+//===- XeGPU.td - XeGPU dialect definition ------------------*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_DIALECT_XEGPU_IR_XEGPU_TD
+#define MLIR_DIALECT_XEGPU_IR_XEGPU_TD
+
+include "mlir/Dialect/XeGPU/IR/XeGPUOps.td"
+
+#endif // MLIR_DIALECT_XEGPU_IR_XEGPU_TD
diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td
new file mode 100644
index 00000000000000..ed3d9bbc772567
--- /dev/null
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td
@@ -0,0 +1,150 @@
+//===- XeGPUAttrs.td - XeGPU dialect attributes definition --*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_DIALECT_XEGPU_IR_XEGPUATTRS_TD
+#define MLIR_DIALECT_XEGPU_IR_XEGPUATTRS_TD
+
+include "mlir/Dialect/XeGPU/IR/XeGPUDialect.td"
+include "mlir/IR/EnumAttr.td"
+
+class XeGPUAttr<string name, string attrMnemonic, list<Trait> traits = [],
+ string baseCppClass = "::mlir::Attribute">
+ : AttrDef<XeGPU_Dialect, name, traits, baseCppClass> {
+ let mnemonic = attrMnemonic;
+}
+
+def XeGPU_ScatteredAttr : XeGPUAttr<"Scattered", "scattered"> {
+ let summary = "Scattered attribute for scattered read and write operation.";
+ let description = [{An attribute represent scattered read and write operation.
+ It does not (need to) have meaningful input values. The existence of itself
+ implies scattered read/write.}];
+
+ let assemblyFormat = "";
+}
+
+def XeGPU_SgMapAttr: XeGPUAttr<"SubGroupMap", "sg_map"> {
+ let parameters = (ins
+ "mlir::DenseI32ArrayAttr":$wi_layout,
+ "mlir::DenseI32ArrayAttr":$wi_data
+ );
+
+ // In format of #xegpu.sg_map<{mma_block_size = [2, 4], wi_layout = [2, 4], wi_data = [2, 4]}>
+ let assemblyFormat = "`<` struct(params) `>`";
+
+ let genVerifyDecl = true;
+
+ let builders = [
+ AttrBuilder<(ins
+ "llvm::ArrayRef<int32_t>":$wiLayout,
+ "llvm::ArrayRef<int32_t>":$wiData
+ )>
+ ];
+}
+
+def XeGPU_TensorDescAttr: XeGPUAttr<"TensorDesc", "tdesc_attr"> {
+ let parameters = (ins
+ DefaultValuedParameter<"xegpu::MemoryScopeKind", "xegpu::MemoryScopeKind::GLOBAL">: $memory_scope,
+ DefaultValuedParameter<"int", "1">: $array_length,
+ DefaultValuedParameter<"bool", "true">: $boundary_check,
+ OptionalParameter<"xegpu::ScatteredAttr">: $scattered,
+ OptionalParameter<"xegpu::SubGroupMapAttr"> : $map
+ );
+
+ let builders = [
+ AttrBuilder<(ins
+ CArg<"xegpu::MemoryScopeKind", "xegpu::MemoryScopeKind::GLOBAL">:$memory_scope,
+ CArg<"int", "1">:$array_length,
+ CArg<"xegpu::ScatteredAttr", "{}">:$scattered,
+ CArg<"xegpu::SubGroupMapAttr", "{}">:$map
+ )>
+ ];
+
+ let extraClassDeclaration = [{
+ bool hasNonDefaultAttrs();
+ }];
+
+ let hasCustomAssemblyFormat = true;
+}
+
+def ARG_TYPE_VECTOR : I32EnumAttrCase<"VECTOR", 0, "vector">;
+def ARG_TYPE_SCALAR : I32EnumAttrCase<"SCALAR", 1, "scalar">;
+def XeGPU_ArgTypeKind : I32EnumAttr<"ArgTypeKind",
+ "Argument type for Invoke_SIMD op",
+ [ARG_TYPE_VECTOR, ARG_TYPE_SCALAR]> {
+ let genSpecializedAttr = 0;
+ let cppNamespace = "::mlir::xegpu";
+}
+
+def MODE_SIMT : I32EnumAttrCase<"SIMT", 0, "simt">;
+def MODE_VC : I32EnumAttrCase<"VC", 1, "vc">;
+def XeGPU_ModeKind : I32EnumAttr<"ModeKind",
+ "The Mode an operator runs on",
+ [MODE_SIMT, MODE_VC]> {
+ let genSpecializedAttr = 0;
+ let cppNamespace = "::mlir::xegpu";
+}
+
+def MEMORY_SCOPE_GLOBAL: I32EnumAttrCase<"GLOBAL", 0, "global">;
+def MEMORY_SCOPE_SHARED: I32EnumAttrCase<"SLM", 1, "slm">;
+def XeGPU_MemoryScopeKind: I32EnumAttr<"MemoryScopeKind",
+ "The scope of the memory the tensor descritor is created for",
+ [MEMORY_SCOPE_GLOBAL, MEMORY_SCOPE_SHARED]> {
+ let genSpecializedAttr = 0;
+ let cppNamespace = "::mlir::xegpu";
+}
+
+def CACHE_KIND_CACHED: I32EnumAttrCase<"CACHED", 0, "cached">; // valid for read and write
+def CACHE_KIND_UNCACHED: I32EnumAttrCase<"UNCACHED", 1, "uncached">; // valid for read and write
+def CACHE_KIND_STREAMING: I32EnumAttrCase<"STREAMING", 2, "streaming">; // valid for read only
+def CACHE_KIND_INVALIDATE: I32EnumAttrCase<"READ_INVALIDATE", 3, "read_invalidate">; // valid for read only
+def CACHE_KIND_WRITE_BACK: I32EnumAttrCase<"WRITE_BACK", 4, "write_back">; // valid for write only
+def CACHE_KIND_WRITE_THROUGH: I32EnumAttrCase<"WRITE_THROUGH", 5, "write_through">; // valid for write only
+
+
+
+def XeGPU_CacheKind : I32EnumAttr<"CacheKind", "Cache kind",
+ [CACHE_KIND_CACHED, CACHE_KIND_UNCACHED,
+ CACHE_KIND_STREAMING, CACHE_KIND_INVALIDATE,
+ CACHE_KIND_WRITE_BACK, CACHE_KIND_WRITE_THROUGH]> {
+ let genSpecializedAttr = 0;
+ let cppNamespace = "::mlir::xegpu";
+}
+
+def XeGPU_ArgTypeAttr : EnumAttr<XeGPU_Dialect, XeGPU_ArgTypeKind, "arg_type_kind">;
+def XeGPU_ModeAttr : EnumAttr<XeGPU_Dialect, XeGPU_ModeKind, "mode_kind">;
+def XeGPU_MemoryScopeAttr : EnumAttr<XeGPU_Dialect, XeGPU_MemoryScopeKind, "memory_scope_kind">;
+def XeGPU_CacheAttr : EnumAttr<XeGPU_Dialect, XeGPU_CacheKind, "cache_kind">;
+
+// RMW kind attribute
+def ATOMIC_RMW_KIND_ADDF : I32EnumAttrCase<"addf", 0>;
+def ATOMIC_RMW_KIND_ADDI : I32EnumAttrCase<"addi", 1>;
+def ATOMIC_RMW_KIND_ASSIGN : I32EnumAttrCase<"assign", 2>;
+def ATOMIC_RMW_KIND_MAXF : I32EnumAttrCase<"maxf", 3>;
+def ATOMIC_RMW_KIND_MAXS : I32EnumAttrCase<"maxs", 4>;
+def ATOMIC_RMW_KIND_MAXU : I32EnumAttrCase<"maxu", 5>;
+def ATOMIC_RMW_KIND_MINF : I32EnumAttrCase<"minf", 6>;
+def ATOMIC_RMW_KIND_MINS : I32EnumAttrCase<"mins", 7>;
+def ATOMIC_RMW_KIND_MINU : I32EnumAttrCase<"minu", 8>;
+def ATOMIC_RMW_KIND_MULF : I32EnumAttrCase<"mulf", 9>;
+def ATOMIC_RMW_KIND_MULI : I32EnumAttrCase<"muli", 10>;
+def ATOMIC_RMW_KIND_ORI : I32EnumAttrCase<"ori", 11>;
+def ATOMIC_RMW_KIND_ANDI : I32EnumAttrCase<"andi", 12>;
+
+def XeGPU_AtomicRMWKind : I32EnumAttr<"AtomicRMWKind",
+ "Operation type for AtomicRMW",
+ [ATOMIC_RMW_KIND_ADDF, ATOMIC_RMW_KIND_ADDI, ATOMIC_RMW_KIND_ASSIGN,
+ ATOMIC_RMW_KIND_MAXF, ATOMIC_RMW_KIND_MAXS, ATOMIC_RMW_KIND_MAXU,
+ ATOMIC_RMW_KIND_MINF, ATOMIC_RMW_KIND_MINS, ATOMIC_RMW_KIND_MINU,
+ ATOMIC_RMW_KIND_MULF, ATOMIC_RMW_KIND_MULI, ATOMIC_RMW_KIND_ORI,
+ ATOMIC_RMW_KIND_ANDI]> {
+ let genSpecializedAttr = 0;
+ let cppNamespace = "::mlir::xegpu";
+}
+def XeGPU_AtomicRMWKindAttr : EnumAttr<XeGPU_Dialect, XeGPU_AtomicRMWKind, "atomic_rmw_kind">;
+
+#endif // MLIR_DIALECT_XEGPU_IR_XEGPUATTRS_TD
diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUDialect.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUDialect.td
new file mode 100644
index 00000000000000..f85ccb32cc43b0
--- /dev/null
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUDialect.td
@@ -0,0 +1,46 @@
+//===- XeGPUDialect.td - XeGPU dialect definition -----------*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_DIALECT_XEGPU_IR_XEGPUDIALECT_TD
+#define MLIR_DIALECT_XEGPU_IR_XEGPUDIALECT_TD
+
+include "mlir/IR/OpBase.td"
+include "mlir/IR/OpAsmInterface.td"
+include "mlir/IR/AttrTypeBase.td"
+include "mlir/IR/BuiltinTypes.td"
+include "mlir/IR/BuiltinTypeInterfaces.td"
+include "mlir/Interfaces/SideEffectInterfaces.td"
+include "mlir/Interfaces/ViewLikeInterface.td"
+include "mlir/Interfaces/CastInterfaces.td"
+include "mlir/Interfaces/ControlFlowInterfaces.td"
+include "mlir/Interfaces/CopyOpInterface.td"
+include "mlir/Interfaces/InferTypeOpInterface.td"
+include "mlir/Interfaces/ShapedOpInterfaces.td"
+
+def XeGPU_Dialect : Dialect {
+ let name = "xegpu";
+ let cppNamespace = "::mlir::xegpu";
+ let summary = "The XeGPU dialect that models Intel GPU's ISA";
+ let description = [{
+ The XeGPU dialect models Intel Xe ISA semantics but works at vector and
+ TensorDesc data type. It provides 1:1 mappings to match Xe instructions
+ like DPAS and 2D block load. The matrix size being processed at this level
+ exactly matches the hardware instructions or the intrinsic supported by
+ the lower-level GPU compiler.
+ }];
+
+ let dependentDialects = [
+ "arith::ArithDialect",
+ "memref::MemRefDialect"
+ ];
+
+ let useDefaultTypePrinterParser = true;
+ let useDefaultAttributePrinterParser = true;
+}
+
+#endif // MLIR_DIALECT_XEGPU_IR_XEGPUDIALECT_TD
diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
new file mode 100644
index 00000000000000..766590f6a3f878
--- /dev/null
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
@@ -0,0 +1,505 @@
+//===- XeGPUOps.td - XeGPU dialect operations definition ----*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_DIALECT_XEGPU_IR_XEGPUOPS_TD
+#define MLIR_DIALECT_XEGPU_IR_XEGPUOPS_TD
+
+include "mlir/Dialect/XeGPU/IR/XeGPUAttrs.td"
+include "mlir/Dialect/XeGPU/IR/XeGPUDialect.td"
+include "mlir/Dialect/XeGPU/IR/XeGPUTypes.td"
+
+
+// Base class for dialect operations. This operation inherits from the base
+// `Op` class in OpBase.td, and provides:
+// * The parent dialect of the operation.
+// * The mnemonic for the operation, or the name without the dialect prefix.
+// * A list of traits for the operation.
+class XeGPU_Op<string mnemonic, list<Trait> traits = []>:
+ Op<XeGPU_Dialect, mnemonic, traits>;
+
+def XeGPU_CreateNdDescOp: XeGPU_Op<"create_nd_tdesc", [Pure, AttrSizedOperandSegments]> {
+
+ let summary = "create nd tensor descriptor operation";
+ let description = [{
+ The "create_nd_tdesc" operation creates a TensorDescType which represents
+ a sub-view of a 2D memory region (It can be extended to support N-D memory
+ region if needed in future). Elements in the subview continuous in each
+ dimention. It encodes the following important information for supporting
+ Intel hardware features:
+
+ * source: an object representing (starting address/pointer of) a 2D memory reagion.
+ It can be either a 2D memref object, or simply a pointer represented by uint64_t type.
+ * offsets: two index values represents offsets from the "source" at the each dimension
+ at which the subview of the target memory will be created. It is encoded via two
+ variables, including "dynamic_offsets" and "static_offsets", such that it can
+ accept various forms, such as, operands (e.g., [%c0, %c]) and attributes (e.g., [2, 4])).
+ * shape: the shape information of the memory region pointed by the "source". It is
+ typically encoded via the MemRefType of the source, e.g., memref<4096x4096xf16>.
+ But if "source" is simply a pointer represented as uint64_t type, or a memref
+ type without shape information e.g., memref<?x?xf16>, the shape information has
+ to be explicitly passed via the "dynamic_shape" argument. Currently "dynamic_shape"
+ only accepts operands(e.g., [%c4096, %c4096]), not attributes(e.g., [4096, 4096]).
+ * strides: the strides of the memory region pointed by the "source". Similar to shape,
+ it is typically encoded via the MemRefType of the source too. But if "source" is
+ simply a pointer represented as uint64_t type, or a memref type without shape
+ information e.g., memref<?x?xf16>, the strides information has to be explicitly
+ passed via the "dynamic_strides" argument. And it currently only accepts operands two.
+
+ Example 1 (suppose the tensor shape inferred by the compiler is 8x16):
+ %0 = memref.alloc() : memref<32x24xf32>
+ %c0 = arith.constant 0 : index
+ %c1 = arith.constant 1 : index
+ %1 = xegpu.create_nd_tdesc %0[%c0, %c1]: memref<32x24xf32> -> TensorDesc<8x16xf32>
+
+ Example 2 (suppose the tensor shape inferred by the compiler is 8x16):
+ %0 = memref.alloc(%h, %w) : memref<?x?xf32>
+ %c0 = arith.constant 0 : index
+ %c1 = arith.constant 1 : index
+ %1 = xegpu.create_nd_tdesc %0[%c0, %c1], [%h, %w], [%w, %c1]: memref<?x?xf32> -> TensorDesc<8x16xf32>
+
+ Example 3 (suppose the tensor shape inferred by the compiler is 8x16):
+ %0 = ... : ui64
+ %c0 = arith.constant 0 : index
+ %c1 = arith.constant 1 : index
+ %1 = xegpu.create_nd_tdesc %0[%c0, %c1], [%h, %w], [%w, %c1]: ui64 -> TensorDesc<8x16xf32>
+ }];
+
+ let arguments = (ins XeGPU_BaseAddrType: $source,
+ Variadic<Index>: $dynamic_offsets,
+ Variadic<Index>: $dynamic_shape,
+ Variadic<Index>: $dynamic_strides,
+ DenseI64ArrayAttr: $static_offsets,
+ DefaultValuedAttr<XeGPU_ModeAttr, "xegpu::ModeKind::SIMT">: $mode);
+ let results = (outs XeGPU_TensorDesc:$TensorDesc);
+
+ let hasCustomAssemblyFormat = 1;
+ let skipDefaultBuilders = 1;
+ let hasVerifier = 1;
+
+ let builders = [
+ OpBuilder<(ins "Type": $TensorDesc, "Value": $source, "ValueRange": $offsets,
+ "ValueRange": $shape, "ValueRange": $strides,
+ "llvm::ArrayRef<int64_t>": $static_offsets,
+ CArg<"xegpu::ModeKind", "xegpu::ModeKind::SIMT">: $mode)>,
+
+ OpBuilder<(ins "Type": $tdesc, "Value": $source,
+ "llvm::ArrayRef<OpFoldResult>": $offsets,
+ CArg<"xegpu::ModeKind", "xegpu::ModeKind::SIMT">: $mode)>,
+
+ OpBuilder<(ins "Type": $tdesc, "Value": $source,
+ "llvm::ArrayRef<OpFoldResult>": $offsets,
+ "ValueRange": $shape, "ValueRange": $stride,
+ CArg<"xegpu::ModeKind", "xegpu::ModeKind::SIMT">: $mode)>
+ ];
+
+ let extraClassDeclaration = [{
+ /// Returns the type of the source memref operand.
+ Type getSourceType() {
+ return getSource().getType();
+ }
+
+ /// Returns the type of the result TensorDesc.
+ xegpu::TensorDescType getTensorDescType();
+
+ /// Returns the offsets info to the source. It consolidates
+ /// information from both dynamic_offsets and static_offsets
+ /// parameters. static_offsets parameter always has the expected
+ /// ranks with some dim could have ShapeType::kDynamic value
+ /// indicating the corresponding value should be from dynamic_offsets.
+ llvm::SmallVector<OpFoldResult> getOffsets();
+
+ /// returns the shape info of the source. It is either from the
+ /// memref type, if source is a memref with static shape
+ /// information or from the dynamic_shape parameter. If both
+ /// exists, the dynamic_shape parameter will be used and the
+ /// shape information from memref type will be ignored.
+ llvm::SmallVector<OpFoldResult> getShape();
+
+ /// returns the strides info of the source. It is either from the
+ /// memref type, if source is a memref with static shape
+ /// information or from the dynamic_stride parameter. If both
+ /// exists, the dynamic_strides parameter will be used and the
+ /// strides information from memref type will be ignored.
+ llvm::SmallVector<OpFoldResult> getStrides();
+
+ /// return the shape embeded in the memref type of the source.
+ /// If source is not memref type. array of kDynamic will be returned.
+ llvm::ArrayRef<int64_t> getStaticShape();
+
+ /// return the strides embeded in the memref type of the source.
+ /// If source is not memref type. array of kDynamic will be returned.
+ llvm::ArrayRef<int64_t> getStaticStrides();
+
+ /// Return the element type of the TensorDesc
+ Type getElementType();
+
+ /// Return the shape of the TensorDesc
+ llvm::ArrayRef<int64_t> ge...
[truncated]
|
@rengolin @joker-eph This is the XeGPU dialect definition. Could you please take a look and give us some guideline? Appreciate if you can invite some one to review it also. |
The code looks good to me. It's a GPU dialect like the others, it has load/store, MMA/SIMD ops, sub-group descriptors, etc. |
@joker-eph what are the next steps here? |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for the patch. I don't have significant concerns with anything right now.
I would wonder if you could split this though and introduce this in a few steps?
I could imagine for example:
- The basic dialect structure
- Basic operations
- The TensorDesc type and the operations interacting with it.
- The barrier type and the operations interacting with it.
But you may see other independent blocks?
/// entry contains either the dynamic value or a ConstantIndexOp constructed | ||
/// with `b` at location `loc`. | ||
SmallVector<Range, 8> getOrCreateRanges(OffsetSizeAndStrideOpInterface op, | ||
OpBuilder &b, Location loc); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Seems like a general utility, not what I expect in the top-level dialect include file.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, you are right. And I am going to remove it.
|
||
let dependentDialects = [ | ||
"arith::ArithDialect", | ||
"memref::MemRefDialect" |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Where are these dependencies coming from?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I initially thought dependency exists when the dialect can be mixed with others. It looks like I was wrong. What does dependency among dialects mean? I failed to find a good example to help me understand it from the doc and in tree code. I saw SCF dialect depends on arith dialect. But how?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Dependencies are used when you must load the other dialect to use this one.
For example that can come from folding, constant materialization, or canonicalization patterns.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks very much for your explanation. I removed them now.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
still there right now?
def XeGPU_CompileHintOp: XeGPU_Op<"compile_hint", []> { | ||
let summary = "prevents the compiler from scheduling."; | ||
let assemblyFormat = [{ attr-dict }]; | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I have concerns with the (undefined) semantics of this op right now, can you remove it and introduce it separately at a later time?
|
||
|
||
def XeGPU_Nbarrier: XeGPUTypeDef<"Nbarrier", "nbarrier", [], "mlir::Type"> { | ||
let summary = "!xegpu.nbarrier a custom XeGPU type representing a barrier."; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Seems like maybe where everything about barriers should be documented, a bit like the tensordesc above.
} | ||
|
||
bool printDefaultValues() { | ||
auto *env = getenv("MLIR_XEGPU_PRINT_DEFAULTS"); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
MLIR dialect's behavior isn't sensitive to the environment right now, please remove and replace with regular setter/getter that a client can setup how they wish.
seen_scattered = true; | ||
} | ||
return mlir::success(); | ||
}; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This looks like ODS generated code that has been slightly tweaked?
Co-authored-by: Mehdi Amini <joker.eph@gmail.com>
Co-authored-by: Mehdi Amini <joker.eph@gmail.com>
} | ||
|
||
def XeGPU_TensorDescAttr: XeGPUAttr<"TensorDesc", "tdesc_attr"> { | ||
let parameters = (ins |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We need some more comprehensive documentation for all types/attr (ideally with example syntax, etc.)
✅ With the latest revision this PR passed the C/C++ code formatter. |
Thanks for your feedback. We plan to split it into following 5 steps according to your suggestions:
Does this plan sound good to you? |
Looks fine, thanks! |
Thanks Mehdi, I have trimmed this PR to only have core definitions and base classes. Does it look good to you? I will submit the PR for the second part when this one is approved. |
@joker-eph Hi Mehdi, what is my next step? merge this one first after getting approvals or submit other PRs first so they can be reviewed together? |
You can pipeline and stack multiple PRs. However this requires you to push your branches to the LLVM repo, not open PRs from your fork (otherwise you can't cascade them). |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM too, with @joker-eph comments addressed. Thanks!
|
Hi @joker-eph, not sure whether this is the right way to contact you though, here is PR for the second part (#84692). I don't have permission to push branch into the llvm-project repo, so I submitted PR from my fork for this one again. How can I apply for the access permission for future access? |
Here are the info: https://llvm.org/docs/DeveloperPolicy.html#obtaining-commit-access |
This PR follows our previous RFC to add XeGPU dialect definition for Intel GPUs. It contains dialect, type, attributes and operators definitions, as well as testcases for semantic checks. The lowering and optimization passes will be issued with separated passes.