diff --git a/mlir/include/mlir/Conversion/Passes.td b/mlir/include/mlir/Conversion/Passes.td index e394bae64e0918..76ac386057ef2b 100644 --- a/mlir/include/mlir/Conversion/Passes.td +++ b/mlir/include/mlir/Conversion/Passes.td @@ -551,10 +551,7 @@ def LowerHostCodeToLLVMPass : Pass<"lower-host-to-llvm", "ModuleOp"> { def ConvertGpuOpsToLLVMSPVOps : Pass<"convert-gpu-to-llvm-spv", "gpu::GPUModuleOp"> { let summary = "Generate LLVM operations to be ingested by a SPIR-V backend for gpu operations"; - let dependentDialects = [ - "LLVM::LLVMDialect", - "spirv::SPIRVDialect", - ]; + let dependentDialects = ["LLVM::LLVMDialect"]; let options = [ Option<"indexBitwidth", "index-bitwidth", "unsigned", /*default=kDeriveIndexBitwidthFromDataLayout*/"0", diff --git a/mlir/lib/Conversion/GPUToLLVMSPV/CMakeLists.txt b/mlir/lib/Conversion/GPUToLLVMSPV/CMakeLists.txt index d47c5e679d86e8..f2381c623e2014 100644 --- a/mlir/lib/Conversion/GPUToLLVMSPV/CMakeLists.txt +++ b/mlir/lib/Conversion/GPUToLLVMSPV/CMakeLists.txt @@ -10,5 +10,4 @@ add_mlir_conversion_library(MLIRGPUToLLVMSPV MLIRLLVMCommonConversion MLIRLLVMDialect MLIRSPIRVAttrToLLVMConversion - MLIRSPIRVDialect ) diff --git a/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp b/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp index bb6a38c0e76edf..03745f4537e99e 100644 --- a/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp +++ b/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp @@ -20,9 +20,7 @@ #include "mlir/Dialect/LLVMIR/LLVMAttrs.h" #include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/Dialect/LLVMIR/LLVMTypes.h" -#include "mlir/Dialect/SPIRV/IR/SPIRVDialect.h" #include "mlir/Dialect/SPIRV/IR/SPIRVEnums.h" -#include "mlir/Dialect/SPIRV/IR/TargetAndABI.h" #include "mlir/IR/BuiltinTypes.h" #include "mlir/IR/Matchers.h" #include "mlir/IR/PatternMatch.h" @@ -274,10 +272,11 @@ struct GPUShuffleConversion final : ConvertOpToLLVMPattern { } /// Get the subgroup size from the target or return a default. - static int getSubgroupSize(Operation *op) { - return spirv::lookupTargetEnvOrDefault(op) - .getResourceLimits() - .getSubgroupSize(); + static std::optional getSubgroupSize(Operation *op) { + auto parentFunc = op->getParentOfType(); + if (!parentFunc) + return std::nullopt; + return parentFunc.getIntelReqdSubGroupSize(); } static bool hasValidWidth(gpu::ShuffleOp op) { diff --git a/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir b/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir index c143d030ed362b..16b692b9689398 100644 --- a/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir +++ b/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir @@ -227,84 +227,9 @@ gpu.module @barriers { // ----- -// Check `gpu.shuffle` conversion with default subgroup size. - -gpu.module @shuffles { - // CHECK: llvm.func spir_funccc @_Z22sub_group_shuffle_downdj(f64, i32) -> f64 attributes { - // CHECK-SAME-DAG: no_unwind - // CHECK-SAME-DAG: convergent - // CHECK-SAME-DAG: will_return - // CHECK-NOT: memory_effects = #llvm.memory_effects - // CHECK-SAME: } - // CHECK: llvm.func spir_funccc @_Z20sub_group_shuffle_upfj(f32, i32) -> f32 attributes { - // CHECK-SAME-DAG: no_unwind - // CHECK-SAME-DAG: convergent - // CHECK-SAME-DAG: will_return - // CHECK-NOT: memory_effects = #llvm.memory_effects - // CHECK-SAME: } - // CHECK: llvm.func spir_funccc @_Z21sub_group_shuffle_xorlj(i64, i32) -> i64 attributes { - // CHECK-SAME-DAG: no_unwind - // CHECK-SAME-DAG: convergent - // CHECK-SAME-DAG: will_return - // CHECK-NOT: memory_effects = #llvm.memory_effects - // CHECK-SAME: } - // CHECK: llvm.func spir_funccc @_Z17sub_group_shuffleij(i32, i32) -> i32 attributes { - // CHECK-SAME-DAG: no_unwind - // CHECK-SAME-DAG: convergent - // CHECK-SAME-DAG: will_return - // CHECK-NOT: memory_effects = #llvm.memory_effects - // CHECK-SAME: } - - // CHECK-LABEL: gpu_shuffles - // CHECK-SAME: (%[[VAL_0:.*]]: i32, %[[VAL_1:.*]]: i32, %[[VAL_2:.*]]: i64, %[[VAL_3:.*]]: i32, %[[VAL_4:.*]]: f32, %[[VAL_5:.*]]: i32, %[[VAL_6:.*]]: f64, %[[VAL_7:.*]]: i32) - func.func @gpu_shuffles(%val0: i32, %id: i32, - %val1: i64, %mask: i32, - %val2: f32, %delta_up: i32, - %val3: f64, %delta_down: i32) { - %width = arith.constant 32 : i32 - // CHECK: llvm.call spir_funccc @_Z17sub_group_shuffleij(%[[VAL_0]], %[[VAL_1]]) { - // CHECK-SAME-DAG: no_unwind - // CHECK-SAME-DAG: convergent - // CHECK-SAME-DAG: will_return - // CHECK-NOT: memory_effects = #llvm.memory_effects - // CHECK-SAME: } : (i32, i32) -> i32 - // CHECK: llvm.mlir.constant(true) : i1 - // CHECK: llvm.call spir_funccc @_Z21sub_group_shuffle_xorlj(%[[VAL_2]], %[[VAL_3]]) { - // CHECK-SAME-DAG: no_unwind - // CHECK-SAME-DAG: convergent - // CHECK-SAME-DAG: will_return - // CHECK-NOT: memory_effects = #llvm.memory_effects - // CHECK-SAME: } : (i64, i32) -> i64 - // CHECK: llvm.mlir.constant(true) : i1 - // CHECK: llvm.call spir_funccc @_Z20sub_group_shuffle_upfj(%[[VAL_4]], %[[VAL_5]]) { - // CHECK-SAME-DAG: no_unwind - // CHECK-SAME-DAG: convergent - // CHECK-SAME-DAG: will_return - // CHECK-NOT: memory_effects= #llvm.memory_effects - // CHECK-SAME: } : (f32, i32) -> f32 - // CHECK: llvm.mlir.constant(true) : i1 - // CHECK: llvm.call spir_funccc @_Z22sub_group_shuffle_downdj(%[[VAL_6]], %[[VAL_7]]) { - // CHECK-SAME-DAG: no_unwind - // CHECK-SAME-DAG: convergent - // CHECK-SAME-DAG: will_return - // CHECK-NOT: memory_effects= #llvm.memory_effects - // CHECK-SAME: } : (f64, i32) -> f64 - // CHECK: llvm.mlir.constant(true) : i1 - %shuffleResult0, %valid0 = gpu.shuffle idx %val0, %id, %width : i32 - %shuffleResult1, %valid1 = gpu.shuffle xor %val1, %mask, %width : i64 - %shuffleResult2, %valid2 = gpu.shuffle up %val2, %delta_up, %width : f32 - %shuffleResult3, %valid3 = gpu.shuffle down %val3, %delta_down, %width : f64 - return - } -} - -// ----- - // Check `gpu.shuffle` conversion with explicit subgroup size. -gpu.module @shuffles attributes { - spirv.target_env = #spirv.target_env<#spirv.vce, #spirv.resource_limits> -} { +gpu.module @shuffles { // CHECK: llvm.func spir_funccc @_Z22sub_group_shuffle_downdj(f64, i32) -> f64 attributes { // CHECK-SAME-DAG: no_unwind // CHECK-SAME-DAG: convergent @@ -352,15 +277,15 @@ gpu.module @shuffles attributes { // CHECK-SAME: (%[[I8_VAL:.*]]: i8, %[[I16_VAL:.*]]: i16, // CHECK-SAME: %[[I32_VAL:.*]]: i32, %[[I64_VAL:.*]]: i64, // CHECK-SAME: %[[F16_VAL:.*]]: f16, %[[F32_VAL:.*]]: f32, - // CHECK-SAME: %[[F64_VAL:.*]]: f64, %[[OFFSET:.*]]: i32) { - func.func @gpu_shuffles(%i8_val: i8, + // CHECK-SAME: %[[F64_VAL:.*]]: f64, %[[OFFSET:.*]]: i32) + llvm.func @gpu_shuffles(%i8_val: i8, %i16_val: i16, %i32_val: i32, %i64_val: i64, %f16_val: f16, %f32_val: f32, %f64_val: f64, - %offset: i32) { + %offset: i32) attributes {intel_reqd_sub_group_size = 16 : i32} { %width = arith.constant 16 : i32 // CHECK: llvm.call spir_funccc @_Z17sub_group_shufflecj(%[[I8_VAL]], %[[OFFSET]]) // CHECK: llvm.mlir.constant(true) : i1 @@ -383,7 +308,7 @@ gpu.module @shuffles attributes { %shuffleResult4, %valid4 = gpu.shuffle up %f16_val, %offset, %width : f16 %shuffleResult5, %valid5 = gpu.shuffle up %f32_val, %offset, %width : f32 %shuffleResult6, %valid6 = gpu.shuffle down %f64_val, %offset, %width : f64 - return + llvm.return } } @@ -392,11 +317,11 @@ gpu.module @shuffles attributes { // Cannot convert due to shuffle width and target subgroup size mismatch gpu.module @shuffles_mismatch { - func.func @gpu_shuffles(%val: i32, %id: i32) { + llvm.func @gpu_shuffles(%val: i32, %id: i32) attributes {intel_reqd_sub_group_size = 32 : i32} { %width = arith.constant 16 : i32 // expected-error@below {{failed to legalize operation 'gpu.shuffle' that was explicitly marked illegal}} %shuffleResult, %valid = gpu.shuffle idx %val, %id, %width : i32 - return + llvm.return } } @@ -405,10 +330,10 @@ gpu.module @shuffles_mismatch { // Cannot convert due to variable shuffle width gpu.module @shuffles_mismatch { - func.func @gpu_shuffles(%val: i32, %id: i32, %width: i32) { + llvm.func @gpu_shuffles(%val: i32, %id: i32, %width: i32) attributes {intel_reqd_sub_group_size = 32 : i32} { // expected-error@below {{failed to legalize operation 'gpu.shuffle' that was explicitly marked illegal}} %shuffleResult, %valid = gpu.shuffle idx %val, %id, %width : i32 - return + llvm.return } } @@ -417,11 +342,11 @@ gpu.module @shuffles_mismatch { // Cannot convert due to value type not being supported by the conversion gpu.module @not_supported_lowering { - func.func @gpu_shuffles(%val: i1, %id: i32) { + llvm.func @gpu_shuffles(%val: i1, %id: i32) attributes {intel_reqd_sub_group_size = 32 : i32} { %width = arith.constant 32 : i32 // expected-error@below {{failed to legalize operation 'gpu.shuffle' that was explicitly marked illegal}} %shuffleResult, %valid = gpu.shuffle xor %val, %id, %width : i1 - return + llvm.return } }