diff --git a/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp b/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp index b1e2f914db4cb9..7432a58f18b442 100644 --- a/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp +++ b/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp @@ -349,13 +349,13 @@ class GpuKernelOutliningPass void runOnOperation() override { SymbolTable symbolTable(getOperation()); bool modified = false; - for (auto func : getOperation().getOps()) { + for (auto func : getOperation().getOps()) { // Insert just after the function. Block::iterator insertPt(func->getNextNode()); auto funcWalkResult = func.walk([&](gpu::LaunchOp op) { SetVector operands; std::string kernelFnName = - Twine(op->getParentOfType().getName(), "_kernel") + Twine(op->getParentOfType().getName(), "_kernel") .str(); gpu::GPUFuncOp outlinedFunc = diff --git a/mlir/test/Dialect/GPU/outlining.mlir b/mlir/test/Dialect/GPU/outlining.mlir index 28c121a550100c..8020f6dfa65b74 100644 --- a/mlir/test/Dialect/GPU/outlining.mlir +++ b/mlir/test/Dialect/GPU/outlining.mlir @@ -37,7 +37,6 @@ func.func @launch() { } // CHECK-DL-LABEL: gpu.module @launch_kernel attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry>} - // CHECK-LABEL: gpu.module @launch_kernel // CHECK-NEXT: gpu.func @launch_kernel // CHECK-SAME: (%[[KERNEL_ARG0:.*]]: f32, %[[KERNEL_ARG1:.*]]: memref) @@ -63,6 +62,42 @@ func.func @launch() { // ----- +// This test checks gpu-out-lining can handle gpu.launch kernel from an llvm.func +// CHECK-LABEL: @launch_from_llvm_func +llvm.func @launch_from_llvm_func() { + // CHECK: %[[ARG0:.*]] = "op"() : () -> f32 + %0 = "op"() : () -> (f32) + // CHECK: %[[ARG1:.*]] = "op"() : () -> memref + %1 = "op"() : () -> (memref) + + // CHECK: %[[DIM:.*]] = arith.constant 1 + %dim = arith.constant 1 : index + + // CHECK: gpu.launch_func @launch_from_llvm_func_kernel::@launch_from_llvm_func_kernel + // CHECK-SAME: (%[[DIM]], %[[DIM]], %[[DIM]]) + // CHECK-SAME: (%[[DIM]], %[[DIM]], %[[DIM]]) args(%[[ARG0]] : f32, %[[ARG1]] : memref) + // CHECK-NEXT: llvm.return + + // CHECK: gpu.func {{.*}} kernel attributes + // CHECK-SAME: gpu.known_block_size = array + // CHECK-SAME: gpu.known_grid_size = array + // CHECK: gpu.return + gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %dim, %grid_y = %dim, + %grid_z = %dim) + threads(%tx, %ty, %tz) in (%block_x = %dim, %block_y = %dim, + %block_z = %dim) { + "use"(%0): (f32) -> () + "some_op"(%bx, %block_x) : (index, index) -> () + %2 = memref.load %1[%tx] : memref + gpu.terminator + } + llvm.return +} + +// CHECK-DL-LABLE: gpu.module @launch_from_llvm_func_kernel attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry>} + +// ----- + // CHECK: module attributes {gpu.container_module} // CHECK-LABEL: @multiple_launches func.func @multiple_launches() {