diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index 9d5483275a4ee92..063055f8015b816 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -268,6 +268,7 @@ static LogicalResult checkImplementationStatus(Operation &op) { checkAllocate(op, result); checkLinear(op, result); checkOrder(op, result); + checkPrivate(op, result); }) .Case([&](omp::ParallelOp op) { checkAllocate(op, result); }) .Case([&](omp::SimdOp op) { @@ -1301,7 +1302,6 @@ allocatePrivateVars(llvm::IRBuilderBase &builder, MutableArrayRef mlirPrivateVars, llvm::SmallVectorImpl &llvmPrivateVars, const llvm::OpenMPIRBuilder::InsertPointTy &allocaIP) { - llvm::IRBuilderBase::InsertPointGuard guard(builder); // Allocate private vars llvm::BranchInst *allocaTerminator = llvm::cast(allocaIP.getBlock()->getTerminator()); @@ -1363,86 +1363,6 @@ allocatePrivateVars(llvm::IRBuilderBase &builder, return afterAllocas; } -static LogicalResult -initFirstPrivateVars(llvm::IRBuilderBase &builder, - LLVM::ModuleTranslation &moduleTranslation, - SmallVectorImpl &mlirPrivateVars, - SmallVectorImpl &llvmPrivateVars, - SmallVectorImpl &privateDecls, - llvm::BasicBlock *afterAllocas) { - llvm::IRBuilderBase::InsertPointGuard guard(builder); - // Apply copy region for firstprivate. - bool needsFirstprivate = - llvm::any_of(privateDecls, [](omp::PrivateClauseOp &privOp) { - return privOp.getDataSharingType() == - omp::DataSharingClauseType::FirstPrivate; - }); - - if (!needsFirstprivate) - return success(); - - assert(afterAllocas->getSinglePredecessor()); - - // Find the end of the allocation blocks - builder.SetInsertPoint(afterAllocas->getSinglePredecessor()->getTerminator()); - llvm::BasicBlock *copyBlock = - splitBB(builder, /*CreateBranch=*/true, "omp.private.copy"); - builder.SetInsertPoint(copyBlock->getFirstNonPHIOrDbgOrAlloca()); - - for (auto [decl, mlirVar, llvmVar] : - llvm::zip_equal(privateDecls, mlirPrivateVars, llvmPrivateVars)) { - if (decl.getDataSharingType() != omp::DataSharingClauseType::FirstPrivate) - continue; - - // copyRegion implements `lhs = rhs` - Region ©Region = decl.getCopyRegion(); - - // map copyRegion rhs arg - llvm::Value *nonPrivateVar = moduleTranslation.lookupValue(mlirVar); - assert(nonPrivateVar); - moduleTranslation.mapValue(decl.getCopyMoldArg(), nonPrivateVar); - - // map copyRegion lhs arg - moduleTranslation.mapValue(decl.getCopyPrivateArg(), llvmVar); - - // in-place convert copy region - builder.SetInsertPoint(builder.GetInsertBlock()->getTerminator()); - if (failed(inlineConvertOmpRegions(copyRegion, "omp.private.copy", builder, - moduleTranslation))) - return decl.emitError("failed to inline `copy` region of `omp.private`"); - - // ignore unused value yielded from copy region - - // clear copy region block argument mapping in case it needs to be - // re-created with different sources for reuse of the same reduction - // decl - moduleTranslation.forgetMapping(copyRegion); - } - - return success(); -} - -static LogicalResult -cleanupPrivateVars(llvm::IRBuilderBase &builder, - LLVM::ModuleTranslation &moduleTranslation, Location loc, - SmallVectorImpl &llvmPrivateVars, - SmallVectorImpl &privateDecls) { - // private variable deallocation - SmallVector privateCleanupRegions; - llvm::transform(privateDecls, std::back_inserter(privateCleanupRegions), - [](omp::PrivateClauseOp privatizer) { - return &privatizer.getDeallocRegion(); - }); - - if (failed(inlineOmpRegionCleanup( - privateCleanupRegions, llvmPrivateVars, moduleTranslation, builder, - "omp.private.dealloc", /*shouldLoadCleanupRegionArg=*/false))) - return mlir::emitError(loc, "failed to inline `dealloc` region of an " - "`omp.private` op in"); - - return success(); -} - static LogicalResult convertOmpSections(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation) { @@ -1702,10 +1622,50 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder, if (handleError(afterAllocas, *taskOp).failed()) return llvm::make_error(); - if (failed(initFirstPrivateVars(builder, moduleTranslation, mlirPrivateVars, - llvmPrivateVars, privateDecls, - afterAllocas.get()))) - return llvm::make_error(); + // Apply copy region for firstprivate + bool needsFirstPrivate = + llvm::any_of(privateDecls, [](omp::PrivateClauseOp &privOp) { + return privOp.getDataSharingType() == + omp::DataSharingClauseType::FirstPrivate; + }); + if (needsFirstPrivate) { + // Find the end of the allocation blocks + assert(afterAllocas.get()->getSinglePredecessor()); + builder.SetInsertPoint( + afterAllocas.get()->getSinglePredecessor()->getTerminator()); + llvm::BasicBlock *copyBlock = + splitBB(builder, /*CreateBranch=*/true, "omp.private.copy"); + builder.SetInsertPoint(copyBlock->getFirstNonPHIOrDbgOrAlloca()); + } + for (auto [decl, mlirVar, llvmVar] : + llvm::zip_equal(privateDecls, mlirPrivateVars, llvmPrivateVars)) { + if (decl.getDataSharingType() != omp::DataSharingClauseType::FirstPrivate) + continue; + + // copyRegion implements `lhs = rhs` + Region ©Region = decl.getCopyRegion(); + + // map copyRegion rhs arg + llvm::Value *nonPrivateVar = moduleTranslation.lookupValue(mlirVar); + assert(nonPrivateVar); + moduleTranslation.mapValue(decl.getCopyMoldArg(), nonPrivateVar); + + // map copyRegion lhs arg + moduleTranslation.mapValue(decl.getCopyPrivateArg(), llvmVar); + + // in-place convert copy region + builder.SetInsertPoint(builder.GetInsertBlock()->getTerminator()); + if (failed(inlineConvertOmpRegions(copyRegion, "omp.private.copy", + builder, moduleTranslation))) + return llvm::createStringError( + "failed to inline `copy` region of an `omp.private` op in taskOp"); + + // ignore unused value yielded from copy region + + // clear copy region block argument mapping in case it needs to be + // re-created with different source for reuse of the same reduction decl + moduleTranslation.forgetMapping(copyRegion); + } // translate the body of the task: builder.restoreIP(codegenIP); @@ -1714,11 +1674,19 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder, if (failed(handleError(continuationBlockOrError, *taskOp))) return llvm::make_error(); - builder.SetInsertPoint(continuationBlockOrError.get()->getTerminator()); + // private variable deallocation + SmallVector privateCleanupRegions; + llvm::transform(privateDecls, std::back_inserter(privateCleanupRegions), + [](omp::PrivateClauseOp privatizer) { + return &privatizer.getDeallocRegion(); + }); - if (failed(cleanupPrivateVars(builder, moduleTranslation, taskOp.getLoc(), - llvmPrivateVars, privateDecls))) - return llvm::make_error(); + builder.SetInsertPoint(continuationBlockOrError.get()->getTerminator()); + if (failed(inlineOmpRegionCleanup( + privateCleanupRegions, llvmPrivateVars, moduleTranslation, builder, + "omp.private.dealloc", /*shouldLoadCleanupRegionArg=*/false))) + return llvm::createStringError("failed to inline `dealloc` region of an " + "`omp.private` op in an omp.task"); return llvm::Error::success(); }; @@ -1809,18 +1777,6 @@ convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder, chunk = builder.CreateSExtOrTrunc(chunkVar, ivType); } - MutableArrayRef privateBlockArgs = - cast(*wsloopOp).getPrivateBlockArgs(); - SmallVector mlirPrivateVars; - SmallVector llvmPrivateVars; - SmallVector privateDecls; - mlirPrivateVars.reserve(privateBlockArgs.size()); - llvmPrivateVars.reserve(privateBlockArgs.size()); - collectPrivatizationDecls(wsloopOp, privateDecls); - - for (mlir::Value privateVar : wsloopOp.getPrivateVars()) - mlirPrivateVars.push_back(privateVar); - SmallVector reductionDecls; collectReductionDecls(wsloopOp, reductionDecls); llvm::OpenMPIRBuilder::InsertPointTy allocaIP = @@ -1828,37 +1784,15 @@ convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder, SmallVector privateReductionVariables( wsloopOp.getNumReductionVars()); - - llvm::Expected afterAllocas = allocatePrivateVars( - builder, moduleTranslation, privateBlockArgs, privateDecls, - mlirPrivateVars, llvmPrivateVars, allocaIP); - if (handleError(afterAllocas, opInst).failed()) - return failure(); - DenseMap reductionVariableMap; MutableArrayRef reductionArgs = cast(opInst).getReductionBlockArgs(); - SmallVector deferredStores; - - if (failed(allocReductionVars(wsloopOp, reductionArgs, builder, - moduleTranslation, allocaIP, reductionDecls, - privateReductionVariables, reductionVariableMap, - deferredStores, isByRef))) - return failure(); - - if (failed(initFirstPrivateVars(builder, moduleTranslation, mlirPrivateVars, - llvmPrivateVars, privateDecls, - afterAllocas.get()))) - return failure(); - - assert(afterAllocas.get()->getSinglePredecessor()); - if (failed(initReductionVars(wsloopOp, reductionArgs, builder, - moduleTranslation, - afterAllocas.get()->getSinglePredecessor(), - reductionDecls, privateReductionVariables, - reductionVariableMap, isByRef, deferredStores))) + if (failed(allocAndInitializeReductionVars( + wsloopOp, reductionArgs, builder, moduleTranslation, allocaIP, + reductionDecls, privateReductionVariables, reductionVariableMap, + isByRef))) return failure(); // TODO: Replace this with proper composite translation support. @@ -1965,13 +1899,9 @@ convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder, builder.restoreIP(afterIP); // Process the reductions if required. - if (failed(createReductionsAndCleanup(wsloopOp, builder, moduleTranslation, - allocaIP, reductionDecls, - privateReductionVariables, isByRef))) - return failure(); - - return cleanupPrivateVars(builder, moduleTranslation, wsloopOp.getLoc(), - llvmPrivateVars, privateDecls); + return createReductionsAndCleanup(wsloopOp, builder, moduleTranslation, + allocaIP, reductionDecls, + privateReductionVariables, isByRef); } /// Converts the OpenMP parallel operation to LLVM IR. @@ -2029,12 +1959,52 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder, deferredStores, isByRef))) return llvm::make_error(); - if (failed(initFirstPrivateVars(builder, moduleTranslation, mlirPrivateVars, - llvmPrivateVars, privateDecls, - afterAllocas.get()))) - return llvm::make_error(); + // Apply copy region for firstprivate. + bool needsFirstprivate = + llvm::any_of(privateDecls, [](omp::PrivateClauseOp &privOp) { + return privOp.getDataSharingType() == + omp::DataSharingClauseType::FirstPrivate; + }); + if (needsFirstprivate) { + // Find the end of the allocation blocks + assert(afterAllocas.get()->getSinglePredecessor()); + builder.SetInsertPoint( + afterAllocas.get()->getSinglePredecessor()->getTerminator()); + llvm::BasicBlock *copyBlock = + splitBB(builder, /*CreateBranch=*/true, "omp.private.copy"); + builder.SetInsertPoint(copyBlock->getFirstNonPHIOrDbgOrAlloca()); + } + for (auto [decl, mlirVar, llvmVar] : + llvm::zip_equal(privateDecls, mlirPrivateVars, llvmPrivateVars)) { + if (decl.getDataSharingType() != omp::DataSharingClauseType::FirstPrivate) + continue; + + // copyRegion implements `lhs = rhs` + Region ©Region = decl.getCopyRegion(); + + // map copyRegion rhs arg + llvm::Value *nonPrivateVar = moduleTranslation.lookupValue(mlirVar); + assert(nonPrivateVar); + moduleTranslation.mapValue(decl.getCopyMoldArg(), nonPrivateVar); + + // map copyRegion lhs arg + moduleTranslation.mapValue(decl.getCopyPrivateArg(), llvmVar); + + // in-place convert copy region + builder.SetInsertPoint(builder.GetInsertBlock()->getTerminator()); + if (failed(inlineConvertOmpRegions(copyRegion, "omp.private.copy", + builder, moduleTranslation))) + return llvm::createStringError( + "failed to inline `copy` region of `omp.private`"); + + // ignore unused value yielded from copy region + + // clear copy region block argument mapping in case it needs to be + // re-created with different sources for reuse of the same reduction + // decl + moduleTranslation.forgetMapping(copyRegion); + } - assert(afterAllocas.get()->getSinglePredecessor()); if (failed( initReductionVars(opInst, reductionArgs, builder, moduleTranslation, afterAllocas.get()->getSinglePredecessor(), @@ -2119,9 +2089,17 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder, return llvm::createStringError( "failed to inline `cleanup` region of `omp.declare_reduction`"); - if (failed(cleanupPrivateVars(builder, moduleTranslation, opInst.getLoc(), - llvmPrivateVars, privateDecls))) - return llvm::make_error(); + SmallVector privateCleanupRegions; + llvm::transform(privateDecls, std::back_inserter(privateCleanupRegions), + [](omp::PrivateClauseOp privatizer) { + return &privatizer.getDeallocRegion(); + }); + + if (failed(inlineOmpRegionCleanup( + privateCleanupRegions, llvmPrivateVars, moduleTranslation, builder, + "omp.private.dealloc", /*shouldLoadCleanupRegionArg=*/false))) + return llvm::createStringError( + "failed to inline `dealloc` region of `omp.private`"); builder.restoreIP(oldIP); return llvm::Error::success(); diff --git a/mlir/test/Target/LLVMIR/openmp-todo.mlir b/mlir/test/Target/LLVMIR/openmp-todo.mlir index c1e0014d1f571f5..de797ea2aa3649b 100644 --- a/mlir/test/Target/LLVMIR/openmp-todo.mlir +++ b/mlir/test/Target/LLVMIR/openmp-todo.mlir @@ -635,3 +635,22 @@ llvm.func @wsloop_order(%lb : i32, %ub : i32, %step : i32) { } llvm.return } + +// ----- + +omp.private {type = private} @x.privatizer : !llvm.ptr alloc { +^bb0(%arg0: !llvm.ptr): + %0 = llvm.mlir.constant(1 : i32) : i32 + %1 = llvm.alloca %0 x i32 : (i32) -> !llvm.ptr + omp.yield(%1 : !llvm.ptr) +} +llvm.func @wsloop_private(%lb : i32, %ub : i32, %step : i32, %x : !llvm.ptr) { + // expected-error@below {{not yet implemented: Unhandled clause privatization in omp.wsloop operation}} + // expected-error@below {{LLVM Translation failed for operation: omp.wsloop}} + omp.wsloop private(@x.privatizer %x -> %arg0 : !llvm.ptr) { + omp.loop_nest (%iv) : i32 = (%lb) to (%ub) step (%step) { + omp.yield + } + } + llvm.return +} diff --git a/mlir/test/Target/LLVMIR/openmp-wsloop-private.mlir b/mlir/test/Target/LLVMIR/openmp-wsloop-private.mlir deleted file mode 100644 index db67bb5fae58b0f..000000000000000 --- a/mlir/test/Target/LLVMIR/openmp-wsloop-private.mlir +++ /dev/null @@ -1,88 +0,0 @@ -// RUN: mlir-translate -mlir-to-llvmir -split-input-file %s | FileCheck %s - -// tests a wsloop private + firstprivate + reduction to make sure block structure -// is handled properly. - -omp.private {type = private} @_QFwsloop_privateEi_private_ref_i32 : !llvm.ptr alloc { -^bb0(%arg0: !llvm.ptr): - %0 = llvm.mlir.constant(1 : i64) : i64 - %1 = llvm.alloca %0 x i32 {bindc_name = "i", pinned} : (i64) -> !llvm.ptr - omp.yield(%1 : !llvm.ptr) -} - -llvm.func @foo_free(!llvm.ptr) - -omp.private {type = firstprivate} @_QFwsloop_privateEc_firstprivate_ref_c8 : !llvm.ptr alloc { -^bb0(%arg0: !llvm.ptr): - %0 = llvm.mlir.constant(1 : i64) : i64 - %1 = llvm.alloca %0 x !llvm.array<1 x i8> {bindc_name = "c", pinned} : (i64) -> !llvm.ptr - omp.yield(%1 : !llvm.ptr) -} copy { -^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr): - %0 = llvm.load %arg0 : !llvm.ptr -> !llvm.array<1 x i8> - llvm.store %0, %arg1 : !llvm.array<1 x i8>, !llvm.ptr - omp.yield(%arg1 : !llvm.ptr) -} dealloc { -^bb0(%arg0: !llvm.ptr): - llvm.call @foo_free(%arg0) : (!llvm.ptr) -> () - omp.yield -} - -omp.declare_reduction @max_f32 : f32 init { -^bb0(%arg0: f32): - %0 = llvm.mlir.constant(-3.40282347E+38 : f32) : f32 - omp.yield(%0 : f32) -} combiner { -^bb0(%arg0: f32, %arg1: f32): - %0 = llvm.intr.maxnum(%arg0, %arg1) {fastmathFlags = #llvm.fastmath} : (f32, f32) -> f32 - omp.yield(%0 : f32) -} - -llvm.func @wsloop_private_(%arg0: !llvm.ptr {fir.bindc_name = "y"}) attributes {fir.internal_name = "_QPwsloop_private", frame_pointer = #llvm.framePointerKind, target_cpu = "x86-64"} { - %0 = llvm.mlir.constant(1 : i64) : i64 - %1 = llvm.alloca %0 x f32 {bindc_name = "x"} : (i64) -> !llvm.ptr - %3 = llvm.alloca %0 x i32 {bindc_name = "i"} : (i64) -> !llvm.ptr - %5 = llvm.alloca %0 x !llvm.array<1 x i8> {bindc_name = "c"} : (i64) -> !llvm.ptr - %6 = llvm.mlir.constant(1 : i32) : i32 - %7 = llvm.mlir.constant(10 : i32) : i32 - %8 = llvm.mlir.constant(0 : i32) : i32 - omp.parallel { - omp.wsloop private(@_QFwsloop_privateEc_firstprivate_ref_c8 %5 -> %arg1, @_QFwsloop_privateEi_private_ref_i32 %3 -> %arg2 : !llvm.ptr, !llvm.ptr) reduction(@max_f32 %1 -> %arg3 : !llvm.ptr) { - omp.loop_nest (%arg4) : i32 = (%8) to (%7) inclusive step (%6) { - omp.yield - } - } - omp.terminator - } - llvm.return -} - -// CHECK: call void {{.*}} @__kmpc_fork_call(ptr @1, i32 1, ptr @[[OUTLINED:.*]], ptr %{{.*}}) - -// CHECK: define internal void @[[OUTLINED:.*]]{{.*}} { - -// First, check that all memory for privates and reductions is allocated. -// CHECK: omp.par.entry: -// CHECK: %[[CHR:.*]] = alloca [1 x i8], i64 1, align 1 -// CHECK: %[[INT:.*]] = alloca i32, i64 1, align 4 -// CHECK: %[[FLT:.*]] = alloca float, align 4 -// CHECK: %[[RED_ARR:.*]] = alloca [1 x ptr], align 8 -// CHECK: br label %[[LATE_ALLOC_BB:.*]] - -// CHECK: [[LATE_ALLOC_BB]]: -// CHECK: br label %[[PRIVATE_CPY_BB:.*]] - -// Second, check that first private was properly copied. -// CHECK: [[PRIVATE_CPY_BB:.*]]: -// CHECK: %[[CHR_VAL:.*]] = load [1 x i8], ptr %{{.*}}, align 1 -// CHECK: store [1 x i8] %[[CHR_VAL]], ptr %[[CHR]], align 1 -// CHECK: br label %[[RED_INIT_BB:.*]] - -// Third, check that reduction init took place. -// CHECK: [[RED_INIT_BB]]: -// CHECK: store float 0x{{.*}}, ptr %[[FLT]], align 4 - -// Finally, check for the private dealloc region -// CHECK: call void @foo_free(ptr %[[CHR]]) - -// CHECK: }