-
Notifications
You must be signed in to change notification settings - Fork 12.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[OpenMP][OMPIRBuilder] Add delayed privatization support for wsloop
#118463
Conversation
@llvm/pr-subscribers-mlir @llvm/pr-subscribers-mlir-llvm Author: Kareem Ergawy (ergawy) ChangesExtend MLIR to LLVM lowering by adding support for Parent PR: #118447. Only latest commit is relevant for this PR. Patch is 29.77 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/118463.diff 6 Files Affected:
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 35b0633a04a352..e189480e6b07d3 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -268,7 +268,6 @@ static LogicalResult checkImplementationStatus(Operation &op) {
checkAllocate(op, result);
checkLinear(op, result);
checkOrder(op, result);
- checkPrivate(op, result);
})
.Case([&](omp::ParallelOp op) { checkAllocate(op, result); })
.Case([&](omp::SimdOp op) {
@@ -1030,6 +1029,99 @@ mapInitializationArgs(T loop, LLVM::ModuleTranslation &moduleTranslation,
}
}
+template <typename OP>
+static LogicalResult
+initReductionVars(OP op, ArrayRef<BlockArgument> reductionArgs,
+ llvm::IRBuilderBase &builder,
+ LLVM::ModuleTranslation &moduleTranslation,
+ llvm::BasicBlock *latestAllocaBlock,
+ SmallVectorImpl<omp::DeclareReductionOp> &reductionDecls,
+ SmallVectorImpl<llvm::Value *> &privateReductionVariables,
+ DenseMap<Value, llvm::Value *> &reductionVariableMap,
+ llvm::ArrayRef<bool> isByRef,
+ SmallVectorImpl<DeferredStore> &deferredStores) {
+ if (op.getNumReductionVars() == 0)
+ return success();
+
+ llvm::IRBuilderBase::InsertPointGuard guard(builder);
+
+ builder.SetInsertPoint(latestAllocaBlock->getTerminator());
+ llvm::BasicBlock *initBlock = splitBB(builder, true, "omp.reduction.init");
+ auto allocaIP = llvm::IRBuilderBase::InsertPoint(
+ latestAllocaBlock, latestAllocaBlock->getTerminator()->getIterator());
+ builder.restoreIP(allocaIP);
+ SmallVector<llvm::Value *> byRefVars(op.getNumReductionVars());
+
+ for (unsigned i = 0; i < op.getNumReductionVars(); ++i) {
+ if (isByRef[i]) {
+ if (!reductionDecls[i].getAllocRegion().empty())
+ continue;
+
+ // TODO: remove after all users of by-ref are updated to use the alloc
+ // region: Allocate reduction variable (which is a pointer to the real
+ // reduciton variable allocated in the inlined region)
+ byRefVars[i] = builder.CreateAlloca(
+ moduleTranslation.convertType(reductionDecls[i].getType()));
+ }
+ }
+
+ builder.SetInsertPoint(&*initBlock->getFirstNonPHIOrDbgOrAlloca());
+
+ // store result of the alloc region to the allocated pointer to the real
+ // reduction variable
+ for (auto [data, addr] : deferredStores)
+ builder.CreateStore(data, addr);
+
+ // Before the loop, store the initial values of reductions into reduction
+ // variables. Although this could be done after allocas, we don't want to mess
+ // up with the alloca insertion point.
+ for (unsigned i = 0; i < op.getNumReductionVars(); ++i) {
+ SmallVector<llvm::Value *, 1> phis;
+
+ // map block argument to initializer region
+ mapInitializationArgs(op, moduleTranslation, reductionDecls,
+ reductionVariableMap, i);
+
+ if (failed(inlineConvertOmpRegions(reductionDecls[i].getInitializerRegion(),
+ "omp.reduction.neutral", builder,
+ moduleTranslation, &phis)))
+ return failure();
+
+ assert(phis.size() == 1 && "expected one value to be yielded from the "
+ "reduction neutral element declaration region");
+
+ builder.SetInsertPoint(builder.GetInsertBlock()->getTerminator());
+
+ if (isByRef[i]) {
+ if (!reductionDecls[i].getAllocRegion().empty())
+ // done in allocReductionVars
+ continue;
+
+ // TODO: this path can be removed once all users of by-ref are updated to
+ // use an alloc region
+
+ // Store the result of the inlined region to the allocated reduction var
+ // ptr
+ builder.CreateStore(phis[0], byRefVars[i]);
+
+ privateReductionVariables[i] = byRefVars[i];
+ moduleTranslation.mapValue(reductionArgs[i], phis[0]);
+ reductionVariableMap.try_emplace(op.getReductionVars()[i], phis[0]);
+ } else {
+ // for by-ref case the store is inside of the reduction region
+ builder.CreateStore(phis[0], privateReductionVariables[i]);
+ // the rest was handled in allocByValReductionVars
+ }
+
+ // forget the mapping for the initializer region because we might need a
+ // different mapping if this reduction declaration is re-used for a
+ // different variable
+ moduleTranslation.forgetMapping(reductionDecls[i].getInitializerRegion());
+ }
+
+ return success();
+}
+
/// Collect reduction info
template <typename T>
static void collectReductionInfo(
@@ -1183,6 +1275,7 @@ static LogicalResult allocAndInitializeReductionVars(
if (op.getNumReductionVars() == 0)
return success();
+ llvm::IRBuilderBase::InsertPointGuard guard(builder);
SmallVector<DeferredStore> deferredStores;
if (failed(allocReductionVars(op, reductionArgs, builder, moduleTranslation,
@@ -1191,59 +1284,10 @@ static LogicalResult allocAndInitializeReductionVars(
deferredStores, isByRef)))
return failure();
- // store result of the alloc region to the allocated pointer to the real
- // reduction variable
- for (auto [data, addr] : deferredStores)
- builder.CreateStore(data, addr);
-
- // Before the loop, store the initial values of reductions into reduction
- // variables. Although this could be done after allocas, we don't want to mess
- // up with the alloca insertion point.
- for (unsigned i = 0; i < op.getNumReductionVars(); ++i) {
- SmallVector<llvm::Value *, 1> phis;
-
- // map block argument to initializer region
- mapInitializationArgs(op, moduleTranslation, reductionDecls,
- reductionVariableMap, i);
-
- if (failed(inlineConvertOmpRegions(reductionDecls[i].getInitializerRegion(),
- "omp.reduction.neutral", builder,
- moduleTranslation, &phis)))
- return failure();
- assert(phis.size() == 1 && "expected one value to be yielded from the "
- "reduction neutral element declaration region");
- if (isByRef[i]) {
- if (!reductionDecls[i].getAllocRegion().empty())
- // done in allocReductionVars
- continue;
-
- // TODO: this path can be removed once all users of by-ref are updated to
- // use an alloc region
-
- // Allocate reduction variable (which is a pointer to the real reduction
- // variable allocated in the inlined region)
- llvm::Value *var = builder.CreateAlloca(
- moduleTranslation.convertType(reductionDecls[i].getType()));
- // Store the result of the inlined region to the allocated reduction var
- // ptr
- builder.CreateStore(phis[0], var);
-
- privateReductionVariables[i] = var;
- moduleTranslation.mapValue(reductionArgs[i], phis[0]);
- reductionVariableMap.try_emplace(op.getReductionVars()[i], phis[0]);
- } else {
- // for by-ref case the store is inside of the reduction region
- builder.CreateStore(phis[0], privateReductionVariables[i]);
- // the rest was handled in allocByValReductionVars
- }
-
- // forget the mapping for the initializer region because we might need a
- // different mapping if this reduction declaration is re-used for a
- // different variable
- moduleTranslation.forgetMapping(reductionDecls[i].getInitializerRegion());
- }
-
- return success();
+ return initReductionVars(op, reductionArgs, builder, moduleTranslation,
+ allocaIP.getBlock(), reductionDecls,
+ privateReductionVariables, reductionVariableMap,
+ isByRef, deferredStores);
}
/// Allocate delayed private variables. Returns the basic block which comes
@@ -1257,6 +1301,7 @@ allocatePrivateVars(llvm::IRBuilderBase &builder,
MutableArrayRef<mlir::Value> mlirPrivateVars,
llvm::SmallVectorImpl<llvm::Value *> &llvmPrivateVars,
const llvm::OpenMPIRBuilder::InsertPointTy &allocaIP) {
+ llvm::IRBuilderBase::InsertPointGuard guard(builder);
// Allocate private vars
llvm::BranchInst *allocaTerminator =
llvm::cast<llvm::BranchInst>(allocaIP.getBlock()->getTerminator());
@@ -1318,6 +1363,63 @@ allocatePrivateVars(llvm::IRBuilderBase &builder,
return afterAllocas;
}
+static LogicalResult
+initFirstPrivateVars(llvm::IRBuilderBase &builder,
+ LLVM::ModuleTranslation &moduleTranslation,
+ SmallVectorImpl<mlir::Value> &mlirPrivateVars,
+ SmallVectorImpl<llvm::Value *> &llvmPrivateVars,
+ SmallVectorImpl<omp::PrivateClauseOp> &privateDecls,
+ llvm::BasicBlock *afterAllocas) {
+ llvm::IRBuilderBase::InsertPointGuard guard(builder);
+ // Apply copy region for firstprivate.
+ bool needsFirstprivate =
+ llvm::any_of(privateDecls, [](omp::PrivateClauseOp &privOp) {
+ return privOp.getDataSharingType() ==
+ omp::DataSharingClauseType::FirstPrivate;
+ });
+
+ if (needsFirstprivate) {
+ // Find the end of the allocation blocks
+ builder.SetInsertPoint(
+ afterAllocas->getSinglePredecessor()->getTerminator());
+ llvm::BasicBlock *copyBlock =
+ splitBB(builder, /*CreateBranch=*/true, "omp.private.copy");
+ builder.SetInsertPoint(copyBlock->getFirstNonPHIOrDbgOrAlloca());
+ }
+
+ for (auto [decl, mlirVar, llvmVar] :
+ llvm::zip_equal(privateDecls, mlirPrivateVars, llvmPrivateVars)) {
+ if (decl.getDataSharingType() != omp::DataSharingClauseType::FirstPrivate)
+ continue;
+
+ // copyRegion implements `lhs = rhs`
+ Region ©Region = decl.getCopyRegion();
+
+ // map copyRegion rhs arg
+ llvm::Value *nonPrivateVar = moduleTranslation.lookupValue(mlirVar);
+ assert(nonPrivateVar);
+ moduleTranslation.mapValue(decl.getCopyMoldArg(), nonPrivateVar);
+
+ // map copyRegion lhs arg
+ moduleTranslation.mapValue(decl.getCopyPrivateArg(), llvmVar);
+
+ // in-place convert copy region
+ builder.SetInsertPoint(builder.GetInsertBlock()->getTerminator());
+ if (failed(inlineConvertOmpRegions(copyRegion, "omp.private.copy", builder,
+ moduleTranslation)))
+ return decl.emitError("failed to inline `copy` region of `omp.private`");
+
+ // ignore unused value yielded from copy region
+
+ // clear copy region block argument mapping in case it needs to be
+ // re-created with different sources for reuse of the same reduction
+ // decl
+ moduleTranslation.forgetMapping(copyRegion);
+ }
+
+ return success();
+}
+
static LogicalResult
convertOmpSections(Operation &opInst, llvm::IRBuilderBase &builder,
LLVM::ModuleTranslation &moduleTranslation) {
@@ -1577,50 +1679,10 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
if (handleError(afterAllocas, *taskOp).failed())
return llvm::make_error<PreviouslyReportedError>();
- // Apply copy region for firstprivate
- bool needsFirstPrivate =
- llvm::any_of(privateDecls, [](omp::PrivateClauseOp &privOp) {
- return privOp.getDataSharingType() ==
- omp::DataSharingClauseType::FirstPrivate;
- });
- if (needsFirstPrivate) {
- // Find the end of the allocation blocks
- assert(afterAllocas.get()->getSinglePredecessor());
- builder.SetInsertPoint(
- afterAllocas.get()->getSinglePredecessor()->getTerminator());
- llvm::BasicBlock *copyBlock =
- splitBB(builder, /*CreateBranch=*/true, "omp.private.copy");
- builder.SetInsertPoint(copyBlock->getFirstNonPHIOrDbgOrAlloca());
- }
- for (auto [decl, mlirVar, llvmVar] :
- llvm::zip_equal(privateDecls, mlirPrivateVars, llvmPrivateVars)) {
- if (decl.getDataSharingType() != omp::DataSharingClauseType::FirstPrivate)
- continue;
-
- // copyRegion implements `lhs = rhs`
- Region ©Region = decl.getCopyRegion();
-
- // map copyRegion rhs arg
- llvm::Value *nonPrivateVar = moduleTranslation.lookupValue(mlirVar);
- assert(nonPrivateVar);
- moduleTranslation.mapValue(decl.getCopyMoldArg(), nonPrivateVar);
-
- // map copyRegion lhs arg
- moduleTranslation.mapValue(decl.getCopyPrivateArg(), llvmVar);
-
- // in-place convert copy region
- builder.SetInsertPoint(builder.GetInsertBlock()->getTerminator());
- if (failed(inlineConvertOmpRegions(copyRegion, "omp.private.copy",
- builder, moduleTranslation)))
- return llvm::createStringError(
- "failed to inline `copy` region of an `omp.private` op in taskOp");
-
- // ignore unused value yielded from copy region
-
- // clear copy region block argument mapping in case it needs to be
- // re-created with different source for reuse of the same reduction decl
- moduleTranslation.forgetMapping(copyRegion);
- }
+ if (failed(initFirstPrivateVars(builder, moduleTranslation, mlirPrivateVars,
+ llvmPrivateVars, privateDecls,
+ afterAllocas.get())))
+ return llvm::make_error<PreviouslyReportedError>();
// translate the body of the task:
builder.restoreIP(codegenIP);
@@ -1732,6 +1794,18 @@ convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder,
chunk = builder.CreateSExtOrTrunc(chunkVar, ivType);
}
+ MutableArrayRef<BlockArgument> privateBlockArgs =
+ cast<omp::BlockArgOpenMPOpInterface>(*wsloopOp).getPrivateBlockArgs();
+ SmallVector<mlir::Value> mlirPrivateVars;
+ SmallVector<llvm::Value *> llvmPrivateVars;
+ SmallVector<omp::PrivateClauseOp> privateDecls;
+ mlirPrivateVars.reserve(privateBlockArgs.size());
+ llvmPrivateVars.reserve(privateBlockArgs.size());
+ collectPrivatizationDecls(wsloopOp, privateDecls);
+
+ for (mlir::Value privateVar : wsloopOp.getPrivateVars())
+ mlirPrivateVars.push_back(privateVar);
+
SmallVector<omp::DeclareReductionOp> reductionDecls;
collectReductionDecls(wsloopOp, reductionDecls);
llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
@@ -1739,15 +1813,37 @@ convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder,
SmallVector<llvm::Value *> privateReductionVariables(
wsloopOp.getNumReductionVars());
+
+ llvm::Expected<llvm::BasicBlock *> afterAllocas = allocatePrivateVars(
+ builder, moduleTranslation, privateBlockArgs, privateDecls,
+ mlirPrivateVars, llvmPrivateVars, allocaIP);
+ if (handleError(afterAllocas, opInst).failed())
+ return failure();
+
DenseMap<Value, llvm::Value *> reductionVariableMap;
MutableArrayRef<BlockArgument> reductionArgs =
cast<omp::BlockArgOpenMPOpInterface>(opInst).getReductionBlockArgs();
- if (failed(allocAndInitializeReductionVars(
- wsloopOp, reductionArgs, builder, moduleTranslation, allocaIP,
- reductionDecls, privateReductionVariables, reductionVariableMap,
- isByRef)))
+ SmallVector<DeferredStore> deferredStores;
+
+ if (failed(allocReductionVars(wsloopOp, reductionArgs, builder,
+ moduleTranslation, allocaIP, reductionDecls,
+ privateReductionVariables, reductionVariableMap,
+ deferredStores, isByRef)))
+ return failure();
+
+ if (failed(initFirstPrivateVars(builder, moduleTranslation, mlirPrivateVars,
+ llvmPrivateVars, privateDecls,
+ afterAllocas.get())))
+ return failure();
+
+ assert(afterAllocas.get()->getSinglePredecessor());
+ if (failed(initReductionVars(wsloopOp, reductionArgs, builder,
+ moduleTranslation,
+ afterAllocas.get()->getSinglePredecessor(),
+ reductionDecls, privateReductionVariables,
+ reductionVariableMap, isByRef, deferredStores)))
return failure();
// TODO: Replace this with proper composite translation support.
@@ -1914,122 +2010,18 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
deferredStores, isByRef)))
return llvm::make_error<PreviouslyReportedError>();
- // Apply copy region for firstprivate.
- bool needsFirstprivate =
- llvm::any_of(privateDecls, [](omp::PrivateClauseOp &privOp) {
- return privOp.getDataSharingType() ==
- omp::DataSharingClauseType::FirstPrivate;
- });
- if (needsFirstprivate) {
- // Find the end of the allocation blocks
- assert(afterAllocas.get()->getSinglePredecessor());
- builder.SetInsertPoint(
- afterAllocas.get()->getSinglePredecessor()->getTerminator());
- llvm::BasicBlock *copyBlock =
- splitBB(builder, /*CreateBranch=*/true, "omp.private.copy");
- builder.SetInsertPoint(copyBlock->getFirstNonPHIOrDbgOrAlloca());
- }
- for (auto [decl, mlirVar, llvmVar] :
- llvm::zip_equal(privateDecls, mlirPrivateVars, llvmPrivateVars)) {
- if (decl.getDataSharingType() != omp::DataSharingClauseType::FirstPrivate)
- continue;
-
- // copyRegion implements `lhs = rhs`
- Region ©Region = decl.getCopyRegion();
-
- // map copyRegion rhs arg
- llvm::Value *nonPrivateVar = moduleTranslation.lookupValue(mlirVar);
- assert(nonPrivateVar);
- moduleTranslation.mapValue(decl.getCopyMoldArg(), nonPrivateVar);
-
- // map copyRegion lhs arg
- moduleTranslation.mapValue(decl.getCopyPrivateArg(), llvmVar);
-
- // in-place convert copy region
- builder.SetInsertPoint(builder.GetInsertBlock()->getTerminator());
- if (failed(inlineConvertOmpRegions(copyRegion, "omp.private.copy",
- builder, moduleTranslation)))
- return llvm::createStringError(
- "failed to inline `copy` region of `omp.private`");
-
- // ignore unused value yielded from copy region
-
- // clear copy region block argument mapping in case it needs to be
- // re-created with different sources for reuse of the same reduction
- // decl
- moduleTranslation.forgetMapping(copyRegion);
- }
-
- // Initialize reduction vars
- builder.SetInsertPoint(builder.GetInsertBlock()->getTerminator());
- llvm::BasicBlock *initBlock = splitBB(builder, true, "omp.reduction.init");
- allocaIP =
- InsertPointTy(allocaIP.getBlock(),
- allocaIP.getBlock()->getTerminator()->getIterator());
-
- builder.restoreIP(allocaIP);
- SmallVector<llvm::Value *> byRefVars(opInst.getNumReductionVars());
- for (unsigned i = 0; i < opInst.getNumReductionVars(); ++i) {
- if (isByRef[i]) {
- if (!reductionDecls[i].getAllocRegion().empty())
- continue;
-
- // TODO: remove after all users of by-ref are updated to use the alloc
- // region: Allocate reduction variable (which is a pointer to the real
- // reduciton variable allocated in the inlined region)
- byRefVars[i] = builder.CreateAlloca(
- moduleTranslation.convertType(reductionDecls[i].getType()));
- }
- }
-
- builder.SetInsertPoint(initBlock->getFirstNonPHIOrDbgOrAlloca());
-
- // insert stores deferred until after all allocas
- // these store the results of the alloc region into the allocation for the
- // pointer to the reduction variable
- for (auto [data, addr] : deferredStores)
- builder.CreateStore(data, addr);
-
- for (unsigned i = 0; i < opInst.getNumReductionVars(); ++i) {
- SmallVector<llvm::Value *> phis;
-
- // map the block argument
- mapInitializationArgs(opInst, moduleTranslation, reductionDecls,
- reductionVariableMap, i);
- if (failed(inlineConvertOmpRegions(
- reductionDecls[i].getInitializerRegion(), "omp.reduction.neutral",
- builder, moduleTranslation, &phis)))
- return llvm::createStringError(
- "failed to inline `init` ...
[truncated]
|
@llvm/pr-subscribers-flang-openmp Author: Kareem Ergawy (ergawy) ChangesExtend MLIR to LLVM lowering by adding support for Parent PR: #118447. Only latest commit is relevant for this PR. Patch is 29.77 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/118463.diff 6 Files Affected:
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 35b0633a04a352..e189480e6b07d3 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -268,7 +268,6 @@ static LogicalResult checkImplementationStatus(Operation &op) {
checkAllocate(op, result);
checkLinear(op, result);
checkOrder(op, result);
- checkPrivate(op, result);
})
.Case([&](omp::ParallelOp op) { checkAllocate(op, result); })
.Case([&](omp::SimdOp op) {
@@ -1030,6 +1029,99 @@ mapInitializationArgs(T loop, LLVM::ModuleTranslation &moduleTranslation,
}
}
+template <typename OP>
+static LogicalResult
+initReductionVars(OP op, ArrayRef<BlockArgument> reductionArgs,
+ llvm::IRBuilderBase &builder,
+ LLVM::ModuleTranslation &moduleTranslation,
+ llvm::BasicBlock *latestAllocaBlock,
+ SmallVectorImpl<omp::DeclareReductionOp> &reductionDecls,
+ SmallVectorImpl<llvm::Value *> &privateReductionVariables,
+ DenseMap<Value, llvm::Value *> &reductionVariableMap,
+ llvm::ArrayRef<bool> isByRef,
+ SmallVectorImpl<DeferredStore> &deferredStores) {
+ if (op.getNumReductionVars() == 0)
+ return success();
+
+ llvm::IRBuilderBase::InsertPointGuard guard(builder);
+
+ builder.SetInsertPoint(latestAllocaBlock->getTerminator());
+ llvm::BasicBlock *initBlock = splitBB(builder, true, "omp.reduction.init");
+ auto allocaIP = llvm::IRBuilderBase::InsertPoint(
+ latestAllocaBlock, latestAllocaBlock->getTerminator()->getIterator());
+ builder.restoreIP(allocaIP);
+ SmallVector<llvm::Value *> byRefVars(op.getNumReductionVars());
+
+ for (unsigned i = 0; i < op.getNumReductionVars(); ++i) {
+ if (isByRef[i]) {
+ if (!reductionDecls[i].getAllocRegion().empty())
+ continue;
+
+ // TODO: remove after all users of by-ref are updated to use the alloc
+ // region: Allocate reduction variable (which is a pointer to the real
+ // reduciton variable allocated in the inlined region)
+ byRefVars[i] = builder.CreateAlloca(
+ moduleTranslation.convertType(reductionDecls[i].getType()));
+ }
+ }
+
+ builder.SetInsertPoint(&*initBlock->getFirstNonPHIOrDbgOrAlloca());
+
+ // store result of the alloc region to the allocated pointer to the real
+ // reduction variable
+ for (auto [data, addr] : deferredStores)
+ builder.CreateStore(data, addr);
+
+ // Before the loop, store the initial values of reductions into reduction
+ // variables. Although this could be done after allocas, we don't want to mess
+ // up with the alloca insertion point.
+ for (unsigned i = 0; i < op.getNumReductionVars(); ++i) {
+ SmallVector<llvm::Value *, 1> phis;
+
+ // map block argument to initializer region
+ mapInitializationArgs(op, moduleTranslation, reductionDecls,
+ reductionVariableMap, i);
+
+ if (failed(inlineConvertOmpRegions(reductionDecls[i].getInitializerRegion(),
+ "omp.reduction.neutral", builder,
+ moduleTranslation, &phis)))
+ return failure();
+
+ assert(phis.size() == 1 && "expected one value to be yielded from the "
+ "reduction neutral element declaration region");
+
+ builder.SetInsertPoint(builder.GetInsertBlock()->getTerminator());
+
+ if (isByRef[i]) {
+ if (!reductionDecls[i].getAllocRegion().empty())
+ // done in allocReductionVars
+ continue;
+
+ // TODO: this path can be removed once all users of by-ref are updated to
+ // use an alloc region
+
+ // Store the result of the inlined region to the allocated reduction var
+ // ptr
+ builder.CreateStore(phis[0], byRefVars[i]);
+
+ privateReductionVariables[i] = byRefVars[i];
+ moduleTranslation.mapValue(reductionArgs[i], phis[0]);
+ reductionVariableMap.try_emplace(op.getReductionVars()[i], phis[0]);
+ } else {
+ // for by-ref case the store is inside of the reduction region
+ builder.CreateStore(phis[0], privateReductionVariables[i]);
+ // the rest was handled in allocByValReductionVars
+ }
+
+ // forget the mapping for the initializer region because we might need a
+ // different mapping if this reduction declaration is re-used for a
+ // different variable
+ moduleTranslation.forgetMapping(reductionDecls[i].getInitializerRegion());
+ }
+
+ return success();
+}
+
/// Collect reduction info
template <typename T>
static void collectReductionInfo(
@@ -1183,6 +1275,7 @@ static LogicalResult allocAndInitializeReductionVars(
if (op.getNumReductionVars() == 0)
return success();
+ llvm::IRBuilderBase::InsertPointGuard guard(builder);
SmallVector<DeferredStore> deferredStores;
if (failed(allocReductionVars(op, reductionArgs, builder, moduleTranslation,
@@ -1191,59 +1284,10 @@ static LogicalResult allocAndInitializeReductionVars(
deferredStores, isByRef)))
return failure();
- // store result of the alloc region to the allocated pointer to the real
- // reduction variable
- for (auto [data, addr] : deferredStores)
- builder.CreateStore(data, addr);
-
- // Before the loop, store the initial values of reductions into reduction
- // variables. Although this could be done after allocas, we don't want to mess
- // up with the alloca insertion point.
- for (unsigned i = 0; i < op.getNumReductionVars(); ++i) {
- SmallVector<llvm::Value *, 1> phis;
-
- // map block argument to initializer region
- mapInitializationArgs(op, moduleTranslation, reductionDecls,
- reductionVariableMap, i);
-
- if (failed(inlineConvertOmpRegions(reductionDecls[i].getInitializerRegion(),
- "omp.reduction.neutral", builder,
- moduleTranslation, &phis)))
- return failure();
- assert(phis.size() == 1 && "expected one value to be yielded from the "
- "reduction neutral element declaration region");
- if (isByRef[i]) {
- if (!reductionDecls[i].getAllocRegion().empty())
- // done in allocReductionVars
- continue;
-
- // TODO: this path can be removed once all users of by-ref are updated to
- // use an alloc region
-
- // Allocate reduction variable (which is a pointer to the real reduction
- // variable allocated in the inlined region)
- llvm::Value *var = builder.CreateAlloca(
- moduleTranslation.convertType(reductionDecls[i].getType()));
- // Store the result of the inlined region to the allocated reduction var
- // ptr
- builder.CreateStore(phis[0], var);
-
- privateReductionVariables[i] = var;
- moduleTranslation.mapValue(reductionArgs[i], phis[0]);
- reductionVariableMap.try_emplace(op.getReductionVars()[i], phis[0]);
- } else {
- // for by-ref case the store is inside of the reduction region
- builder.CreateStore(phis[0], privateReductionVariables[i]);
- // the rest was handled in allocByValReductionVars
- }
-
- // forget the mapping for the initializer region because we might need a
- // different mapping if this reduction declaration is re-used for a
- // different variable
- moduleTranslation.forgetMapping(reductionDecls[i].getInitializerRegion());
- }
-
- return success();
+ return initReductionVars(op, reductionArgs, builder, moduleTranslation,
+ allocaIP.getBlock(), reductionDecls,
+ privateReductionVariables, reductionVariableMap,
+ isByRef, deferredStores);
}
/// Allocate delayed private variables. Returns the basic block which comes
@@ -1257,6 +1301,7 @@ allocatePrivateVars(llvm::IRBuilderBase &builder,
MutableArrayRef<mlir::Value> mlirPrivateVars,
llvm::SmallVectorImpl<llvm::Value *> &llvmPrivateVars,
const llvm::OpenMPIRBuilder::InsertPointTy &allocaIP) {
+ llvm::IRBuilderBase::InsertPointGuard guard(builder);
// Allocate private vars
llvm::BranchInst *allocaTerminator =
llvm::cast<llvm::BranchInst>(allocaIP.getBlock()->getTerminator());
@@ -1318,6 +1363,63 @@ allocatePrivateVars(llvm::IRBuilderBase &builder,
return afterAllocas;
}
+static LogicalResult
+initFirstPrivateVars(llvm::IRBuilderBase &builder,
+ LLVM::ModuleTranslation &moduleTranslation,
+ SmallVectorImpl<mlir::Value> &mlirPrivateVars,
+ SmallVectorImpl<llvm::Value *> &llvmPrivateVars,
+ SmallVectorImpl<omp::PrivateClauseOp> &privateDecls,
+ llvm::BasicBlock *afterAllocas) {
+ llvm::IRBuilderBase::InsertPointGuard guard(builder);
+ // Apply copy region for firstprivate.
+ bool needsFirstprivate =
+ llvm::any_of(privateDecls, [](omp::PrivateClauseOp &privOp) {
+ return privOp.getDataSharingType() ==
+ omp::DataSharingClauseType::FirstPrivate;
+ });
+
+ if (needsFirstprivate) {
+ // Find the end of the allocation blocks
+ builder.SetInsertPoint(
+ afterAllocas->getSinglePredecessor()->getTerminator());
+ llvm::BasicBlock *copyBlock =
+ splitBB(builder, /*CreateBranch=*/true, "omp.private.copy");
+ builder.SetInsertPoint(copyBlock->getFirstNonPHIOrDbgOrAlloca());
+ }
+
+ for (auto [decl, mlirVar, llvmVar] :
+ llvm::zip_equal(privateDecls, mlirPrivateVars, llvmPrivateVars)) {
+ if (decl.getDataSharingType() != omp::DataSharingClauseType::FirstPrivate)
+ continue;
+
+ // copyRegion implements `lhs = rhs`
+ Region ©Region = decl.getCopyRegion();
+
+ // map copyRegion rhs arg
+ llvm::Value *nonPrivateVar = moduleTranslation.lookupValue(mlirVar);
+ assert(nonPrivateVar);
+ moduleTranslation.mapValue(decl.getCopyMoldArg(), nonPrivateVar);
+
+ // map copyRegion lhs arg
+ moduleTranslation.mapValue(decl.getCopyPrivateArg(), llvmVar);
+
+ // in-place convert copy region
+ builder.SetInsertPoint(builder.GetInsertBlock()->getTerminator());
+ if (failed(inlineConvertOmpRegions(copyRegion, "omp.private.copy", builder,
+ moduleTranslation)))
+ return decl.emitError("failed to inline `copy` region of `omp.private`");
+
+ // ignore unused value yielded from copy region
+
+ // clear copy region block argument mapping in case it needs to be
+ // re-created with different sources for reuse of the same reduction
+ // decl
+ moduleTranslation.forgetMapping(copyRegion);
+ }
+
+ return success();
+}
+
static LogicalResult
convertOmpSections(Operation &opInst, llvm::IRBuilderBase &builder,
LLVM::ModuleTranslation &moduleTranslation) {
@@ -1577,50 +1679,10 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
if (handleError(afterAllocas, *taskOp).failed())
return llvm::make_error<PreviouslyReportedError>();
- // Apply copy region for firstprivate
- bool needsFirstPrivate =
- llvm::any_of(privateDecls, [](omp::PrivateClauseOp &privOp) {
- return privOp.getDataSharingType() ==
- omp::DataSharingClauseType::FirstPrivate;
- });
- if (needsFirstPrivate) {
- // Find the end of the allocation blocks
- assert(afterAllocas.get()->getSinglePredecessor());
- builder.SetInsertPoint(
- afterAllocas.get()->getSinglePredecessor()->getTerminator());
- llvm::BasicBlock *copyBlock =
- splitBB(builder, /*CreateBranch=*/true, "omp.private.copy");
- builder.SetInsertPoint(copyBlock->getFirstNonPHIOrDbgOrAlloca());
- }
- for (auto [decl, mlirVar, llvmVar] :
- llvm::zip_equal(privateDecls, mlirPrivateVars, llvmPrivateVars)) {
- if (decl.getDataSharingType() != omp::DataSharingClauseType::FirstPrivate)
- continue;
-
- // copyRegion implements `lhs = rhs`
- Region ©Region = decl.getCopyRegion();
-
- // map copyRegion rhs arg
- llvm::Value *nonPrivateVar = moduleTranslation.lookupValue(mlirVar);
- assert(nonPrivateVar);
- moduleTranslation.mapValue(decl.getCopyMoldArg(), nonPrivateVar);
-
- // map copyRegion lhs arg
- moduleTranslation.mapValue(decl.getCopyPrivateArg(), llvmVar);
-
- // in-place convert copy region
- builder.SetInsertPoint(builder.GetInsertBlock()->getTerminator());
- if (failed(inlineConvertOmpRegions(copyRegion, "omp.private.copy",
- builder, moduleTranslation)))
- return llvm::createStringError(
- "failed to inline `copy` region of an `omp.private` op in taskOp");
-
- // ignore unused value yielded from copy region
-
- // clear copy region block argument mapping in case it needs to be
- // re-created with different source for reuse of the same reduction decl
- moduleTranslation.forgetMapping(copyRegion);
- }
+ if (failed(initFirstPrivateVars(builder, moduleTranslation, mlirPrivateVars,
+ llvmPrivateVars, privateDecls,
+ afterAllocas.get())))
+ return llvm::make_error<PreviouslyReportedError>();
// translate the body of the task:
builder.restoreIP(codegenIP);
@@ -1732,6 +1794,18 @@ convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder,
chunk = builder.CreateSExtOrTrunc(chunkVar, ivType);
}
+ MutableArrayRef<BlockArgument> privateBlockArgs =
+ cast<omp::BlockArgOpenMPOpInterface>(*wsloopOp).getPrivateBlockArgs();
+ SmallVector<mlir::Value> mlirPrivateVars;
+ SmallVector<llvm::Value *> llvmPrivateVars;
+ SmallVector<omp::PrivateClauseOp> privateDecls;
+ mlirPrivateVars.reserve(privateBlockArgs.size());
+ llvmPrivateVars.reserve(privateBlockArgs.size());
+ collectPrivatizationDecls(wsloopOp, privateDecls);
+
+ for (mlir::Value privateVar : wsloopOp.getPrivateVars())
+ mlirPrivateVars.push_back(privateVar);
+
SmallVector<omp::DeclareReductionOp> reductionDecls;
collectReductionDecls(wsloopOp, reductionDecls);
llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
@@ -1739,15 +1813,37 @@ convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder,
SmallVector<llvm::Value *> privateReductionVariables(
wsloopOp.getNumReductionVars());
+
+ llvm::Expected<llvm::BasicBlock *> afterAllocas = allocatePrivateVars(
+ builder, moduleTranslation, privateBlockArgs, privateDecls,
+ mlirPrivateVars, llvmPrivateVars, allocaIP);
+ if (handleError(afterAllocas, opInst).failed())
+ return failure();
+
DenseMap<Value, llvm::Value *> reductionVariableMap;
MutableArrayRef<BlockArgument> reductionArgs =
cast<omp::BlockArgOpenMPOpInterface>(opInst).getReductionBlockArgs();
- if (failed(allocAndInitializeReductionVars(
- wsloopOp, reductionArgs, builder, moduleTranslation, allocaIP,
- reductionDecls, privateReductionVariables, reductionVariableMap,
- isByRef)))
+ SmallVector<DeferredStore> deferredStores;
+
+ if (failed(allocReductionVars(wsloopOp, reductionArgs, builder,
+ moduleTranslation, allocaIP, reductionDecls,
+ privateReductionVariables, reductionVariableMap,
+ deferredStores, isByRef)))
+ return failure();
+
+ if (failed(initFirstPrivateVars(builder, moduleTranslation, mlirPrivateVars,
+ llvmPrivateVars, privateDecls,
+ afterAllocas.get())))
+ return failure();
+
+ assert(afterAllocas.get()->getSinglePredecessor());
+ if (failed(initReductionVars(wsloopOp, reductionArgs, builder,
+ moduleTranslation,
+ afterAllocas.get()->getSinglePredecessor(),
+ reductionDecls, privateReductionVariables,
+ reductionVariableMap, isByRef, deferredStores)))
return failure();
// TODO: Replace this with proper composite translation support.
@@ -1914,122 +2010,18 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
deferredStores, isByRef)))
return llvm::make_error<PreviouslyReportedError>();
- // Apply copy region for firstprivate.
- bool needsFirstprivate =
- llvm::any_of(privateDecls, [](omp::PrivateClauseOp &privOp) {
- return privOp.getDataSharingType() ==
- omp::DataSharingClauseType::FirstPrivate;
- });
- if (needsFirstprivate) {
- // Find the end of the allocation blocks
- assert(afterAllocas.get()->getSinglePredecessor());
- builder.SetInsertPoint(
- afterAllocas.get()->getSinglePredecessor()->getTerminator());
- llvm::BasicBlock *copyBlock =
- splitBB(builder, /*CreateBranch=*/true, "omp.private.copy");
- builder.SetInsertPoint(copyBlock->getFirstNonPHIOrDbgOrAlloca());
- }
- for (auto [decl, mlirVar, llvmVar] :
- llvm::zip_equal(privateDecls, mlirPrivateVars, llvmPrivateVars)) {
- if (decl.getDataSharingType() != omp::DataSharingClauseType::FirstPrivate)
- continue;
-
- // copyRegion implements `lhs = rhs`
- Region ©Region = decl.getCopyRegion();
-
- // map copyRegion rhs arg
- llvm::Value *nonPrivateVar = moduleTranslation.lookupValue(mlirVar);
- assert(nonPrivateVar);
- moduleTranslation.mapValue(decl.getCopyMoldArg(), nonPrivateVar);
-
- // map copyRegion lhs arg
- moduleTranslation.mapValue(decl.getCopyPrivateArg(), llvmVar);
-
- // in-place convert copy region
- builder.SetInsertPoint(builder.GetInsertBlock()->getTerminator());
- if (failed(inlineConvertOmpRegions(copyRegion, "omp.private.copy",
- builder, moduleTranslation)))
- return llvm::createStringError(
- "failed to inline `copy` region of `omp.private`");
-
- // ignore unused value yielded from copy region
-
- // clear copy region block argument mapping in case it needs to be
- // re-created with different sources for reuse of the same reduction
- // decl
- moduleTranslation.forgetMapping(copyRegion);
- }
-
- // Initialize reduction vars
- builder.SetInsertPoint(builder.GetInsertBlock()->getTerminator());
- llvm::BasicBlock *initBlock = splitBB(builder, true, "omp.reduction.init");
- allocaIP =
- InsertPointTy(allocaIP.getBlock(),
- allocaIP.getBlock()->getTerminator()->getIterator());
-
- builder.restoreIP(allocaIP);
- SmallVector<llvm::Value *> byRefVars(opInst.getNumReductionVars());
- for (unsigned i = 0; i < opInst.getNumReductionVars(); ++i) {
- if (isByRef[i]) {
- if (!reductionDecls[i].getAllocRegion().empty())
- continue;
-
- // TODO: remove after all users of by-ref are updated to use the alloc
- // region: Allocate reduction variable (which is a pointer to the real
- // reduciton variable allocated in the inlined region)
- byRefVars[i] = builder.CreateAlloca(
- moduleTranslation.convertType(reductionDecls[i].getType()));
- }
- }
-
- builder.SetInsertPoint(initBlock->getFirstNonPHIOrDbgOrAlloca());
-
- // insert stores deferred until after all allocas
- // these store the results of the alloc region into the allocation for the
- // pointer to the reduction variable
- for (auto [data, addr] : deferredStores)
- builder.CreateStore(data, addr);
-
- for (unsigned i = 0; i < opInst.getNumReductionVars(); ++i) {
- SmallVector<llvm::Value *> phis;
-
- // map the block argument
- mapInitializationArgs(opInst, moduleTranslation, reductionDecls,
- reductionVariableMap, i);
- if (failed(inlineConvertOmpRegions(
- reductionDecls[i].getInitializerRegion(), "omp.reduction.neutral",
- builder, moduleTranslation, &phis)))
- return llvm::createStringError(
- "failed to inline `init` ...
[truncated]
|
@llvm/pr-subscribers-mlir-openmp Author: Kareem Ergawy (ergawy) ChangesExtend MLIR to LLVM lowering by adding support for Parent PR: #118447. Only latest commit is relevant for this PR. Patch is 29.77 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/118463.diff 6 Files Affected:
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 35b0633a04a352..e189480e6b07d3 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -268,7 +268,6 @@ static LogicalResult checkImplementationStatus(Operation &op) {
checkAllocate(op, result);
checkLinear(op, result);
checkOrder(op, result);
- checkPrivate(op, result);
})
.Case([&](omp::ParallelOp op) { checkAllocate(op, result); })
.Case([&](omp::SimdOp op) {
@@ -1030,6 +1029,99 @@ mapInitializationArgs(T loop, LLVM::ModuleTranslation &moduleTranslation,
}
}
+template <typename OP>
+static LogicalResult
+initReductionVars(OP op, ArrayRef<BlockArgument> reductionArgs,
+ llvm::IRBuilderBase &builder,
+ LLVM::ModuleTranslation &moduleTranslation,
+ llvm::BasicBlock *latestAllocaBlock,
+ SmallVectorImpl<omp::DeclareReductionOp> &reductionDecls,
+ SmallVectorImpl<llvm::Value *> &privateReductionVariables,
+ DenseMap<Value, llvm::Value *> &reductionVariableMap,
+ llvm::ArrayRef<bool> isByRef,
+ SmallVectorImpl<DeferredStore> &deferredStores) {
+ if (op.getNumReductionVars() == 0)
+ return success();
+
+ llvm::IRBuilderBase::InsertPointGuard guard(builder);
+
+ builder.SetInsertPoint(latestAllocaBlock->getTerminator());
+ llvm::BasicBlock *initBlock = splitBB(builder, true, "omp.reduction.init");
+ auto allocaIP = llvm::IRBuilderBase::InsertPoint(
+ latestAllocaBlock, latestAllocaBlock->getTerminator()->getIterator());
+ builder.restoreIP(allocaIP);
+ SmallVector<llvm::Value *> byRefVars(op.getNumReductionVars());
+
+ for (unsigned i = 0; i < op.getNumReductionVars(); ++i) {
+ if (isByRef[i]) {
+ if (!reductionDecls[i].getAllocRegion().empty())
+ continue;
+
+ // TODO: remove after all users of by-ref are updated to use the alloc
+ // region: Allocate reduction variable (which is a pointer to the real
+ // reduciton variable allocated in the inlined region)
+ byRefVars[i] = builder.CreateAlloca(
+ moduleTranslation.convertType(reductionDecls[i].getType()));
+ }
+ }
+
+ builder.SetInsertPoint(&*initBlock->getFirstNonPHIOrDbgOrAlloca());
+
+ // store result of the alloc region to the allocated pointer to the real
+ // reduction variable
+ for (auto [data, addr] : deferredStores)
+ builder.CreateStore(data, addr);
+
+ // Before the loop, store the initial values of reductions into reduction
+ // variables. Although this could be done after allocas, we don't want to mess
+ // up with the alloca insertion point.
+ for (unsigned i = 0; i < op.getNumReductionVars(); ++i) {
+ SmallVector<llvm::Value *, 1> phis;
+
+ // map block argument to initializer region
+ mapInitializationArgs(op, moduleTranslation, reductionDecls,
+ reductionVariableMap, i);
+
+ if (failed(inlineConvertOmpRegions(reductionDecls[i].getInitializerRegion(),
+ "omp.reduction.neutral", builder,
+ moduleTranslation, &phis)))
+ return failure();
+
+ assert(phis.size() == 1 && "expected one value to be yielded from the "
+ "reduction neutral element declaration region");
+
+ builder.SetInsertPoint(builder.GetInsertBlock()->getTerminator());
+
+ if (isByRef[i]) {
+ if (!reductionDecls[i].getAllocRegion().empty())
+ // done in allocReductionVars
+ continue;
+
+ // TODO: this path can be removed once all users of by-ref are updated to
+ // use an alloc region
+
+ // Store the result of the inlined region to the allocated reduction var
+ // ptr
+ builder.CreateStore(phis[0], byRefVars[i]);
+
+ privateReductionVariables[i] = byRefVars[i];
+ moduleTranslation.mapValue(reductionArgs[i], phis[0]);
+ reductionVariableMap.try_emplace(op.getReductionVars()[i], phis[0]);
+ } else {
+ // for by-ref case the store is inside of the reduction region
+ builder.CreateStore(phis[0], privateReductionVariables[i]);
+ // the rest was handled in allocByValReductionVars
+ }
+
+ // forget the mapping for the initializer region because we might need a
+ // different mapping if this reduction declaration is re-used for a
+ // different variable
+ moduleTranslation.forgetMapping(reductionDecls[i].getInitializerRegion());
+ }
+
+ return success();
+}
+
/// Collect reduction info
template <typename T>
static void collectReductionInfo(
@@ -1183,6 +1275,7 @@ static LogicalResult allocAndInitializeReductionVars(
if (op.getNumReductionVars() == 0)
return success();
+ llvm::IRBuilderBase::InsertPointGuard guard(builder);
SmallVector<DeferredStore> deferredStores;
if (failed(allocReductionVars(op, reductionArgs, builder, moduleTranslation,
@@ -1191,59 +1284,10 @@ static LogicalResult allocAndInitializeReductionVars(
deferredStores, isByRef)))
return failure();
- // store result of the alloc region to the allocated pointer to the real
- // reduction variable
- for (auto [data, addr] : deferredStores)
- builder.CreateStore(data, addr);
-
- // Before the loop, store the initial values of reductions into reduction
- // variables. Although this could be done after allocas, we don't want to mess
- // up with the alloca insertion point.
- for (unsigned i = 0; i < op.getNumReductionVars(); ++i) {
- SmallVector<llvm::Value *, 1> phis;
-
- // map block argument to initializer region
- mapInitializationArgs(op, moduleTranslation, reductionDecls,
- reductionVariableMap, i);
-
- if (failed(inlineConvertOmpRegions(reductionDecls[i].getInitializerRegion(),
- "omp.reduction.neutral", builder,
- moduleTranslation, &phis)))
- return failure();
- assert(phis.size() == 1 && "expected one value to be yielded from the "
- "reduction neutral element declaration region");
- if (isByRef[i]) {
- if (!reductionDecls[i].getAllocRegion().empty())
- // done in allocReductionVars
- continue;
-
- // TODO: this path can be removed once all users of by-ref are updated to
- // use an alloc region
-
- // Allocate reduction variable (which is a pointer to the real reduction
- // variable allocated in the inlined region)
- llvm::Value *var = builder.CreateAlloca(
- moduleTranslation.convertType(reductionDecls[i].getType()));
- // Store the result of the inlined region to the allocated reduction var
- // ptr
- builder.CreateStore(phis[0], var);
-
- privateReductionVariables[i] = var;
- moduleTranslation.mapValue(reductionArgs[i], phis[0]);
- reductionVariableMap.try_emplace(op.getReductionVars()[i], phis[0]);
- } else {
- // for by-ref case the store is inside of the reduction region
- builder.CreateStore(phis[0], privateReductionVariables[i]);
- // the rest was handled in allocByValReductionVars
- }
-
- // forget the mapping for the initializer region because we might need a
- // different mapping if this reduction declaration is re-used for a
- // different variable
- moduleTranslation.forgetMapping(reductionDecls[i].getInitializerRegion());
- }
-
- return success();
+ return initReductionVars(op, reductionArgs, builder, moduleTranslation,
+ allocaIP.getBlock(), reductionDecls,
+ privateReductionVariables, reductionVariableMap,
+ isByRef, deferredStores);
}
/// Allocate delayed private variables. Returns the basic block which comes
@@ -1257,6 +1301,7 @@ allocatePrivateVars(llvm::IRBuilderBase &builder,
MutableArrayRef<mlir::Value> mlirPrivateVars,
llvm::SmallVectorImpl<llvm::Value *> &llvmPrivateVars,
const llvm::OpenMPIRBuilder::InsertPointTy &allocaIP) {
+ llvm::IRBuilderBase::InsertPointGuard guard(builder);
// Allocate private vars
llvm::BranchInst *allocaTerminator =
llvm::cast<llvm::BranchInst>(allocaIP.getBlock()->getTerminator());
@@ -1318,6 +1363,63 @@ allocatePrivateVars(llvm::IRBuilderBase &builder,
return afterAllocas;
}
+static LogicalResult
+initFirstPrivateVars(llvm::IRBuilderBase &builder,
+ LLVM::ModuleTranslation &moduleTranslation,
+ SmallVectorImpl<mlir::Value> &mlirPrivateVars,
+ SmallVectorImpl<llvm::Value *> &llvmPrivateVars,
+ SmallVectorImpl<omp::PrivateClauseOp> &privateDecls,
+ llvm::BasicBlock *afterAllocas) {
+ llvm::IRBuilderBase::InsertPointGuard guard(builder);
+ // Apply copy region for firstprivate.
+ bool needsFirstprivate =
+ llvm::any_of(privateDecls, [](omp::PrivateClauseOp &privOp) {
+ return privOp.getDataSharingType() ==
+ omp::DataSharingClauseType::FirstPrivate;
+ });
+
+ if (needsFirstprivate) {
+ // Find the end of the allocation blocks
+ builder.SetInsertPoint(
+ afterAllocas->getSinglePredecessor()->getTerminator());
+ llvm::BasicBlock *copyBlock =
+ splitBB(builder, /*CreateBranch=*/true, "omp.private.copy");
+ builder.SetInsertPoint(copyBlock->getFirstNonPHIOrDbgOrAlloca());
+ }
+
+ for (auto [decl, mlirVar, llvmVar] :
+ llvm::zip_equal(privateDecls, mlirPrivateVars, llvmPrivateVars)) {
+ if (decl.getDataSharingType() != omp::DataSharingClauseType::FirstPrivate)
+ continue;
+
+ // copyRegion implements `lhs = rhs`
+ Region ©Region = decl.getCopyRegion();
+
+ // map copyRegion rhs arg
+ llvm::Value *nonPrivateVar = moduleTranslation.lookupValue(mlirVar);
+ assert(nonPrivateVar);
+ moduleTranslation.mapValue(decl.getCopyMoldArg(), nonPrivateVar);
+
+ // map copyRegion lhs arg
+ moduleTranslation.mapValue(decl.getCopyPrivateArg(), llvmVar);
+
+ // in-place convert copy region
+ builder.SetInsertPoint(builder.GetInsertBlock()->getTerminator());
+ if (failed(inlineConvertOmpRegions(copyRegion, "omp.private.copy", builder,
+ moduleTranslation)))
+ return decl.emitError("failed to inline `copy` region of `omp.private`");
+
+ // ignore unused value yielded from copy region
+
+ // clear copy region block argument mapping in case it needs to be
+ // re-created with different sources for reuse of the same reduction
+ // decl
+ moduleTranslation.forgetMapping(copyRegion);
+ }
+
+ return success();
+}
+
static LogicalResult
convertOmpSections(Operation &opInst, llvm::IRBuilderBase &builder,
LLVM::ModuleTranslation &moduleTranslation) {
@@ -1577,50 +1679,10 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
if (handleError(afterAllocas, *taskOp).failed())
return llvm::make_error<PreviouslyReportedError>();
- // Apply copy region for firstprivate
- bool needsFirstPrivate =
- llvm::any_of(privateDecls, [](omp::PrivateClauseOp &privOp) {
- return privOp.getDataSharingType() ==
- omp::DataSharingClauseType::FirstPrivate;
- });
- if (needsFirstPrivate) {
- // Find the end of the allocation blocks
- assert(afterAllocas.get()->getSinglePredecessor());
- builder.SetInsertPoint(
- afterAllocas.get()->getSinglePredecessor()->getTerminator());
- llvm::BasicBlock *copyBlock =
- splitBB(builder, /*CreateBranch=*/true, "omp.private.copy");
- builder.SetInsertPoint(copyBlock->getFirstNonPHIOrDbgOrAlloca());
- }
- for (auto [decl, mlirVar, llvmVar] :
- llvm::zip_equal(privateDecls, mlirPrivateVars, llvmPrivateVars)) {
- if (decl.getDataSharingType() != omp::DataSharingClauseType::FirstPrivate)
- continue;
-
- // copyRegion implements `lhs = rhs`
- Region ©Region = decl.getCopyRegion();
-
- // map copyRegion rhs arg
- llvm::Value *nonPrivateVar = moduleTranslation.lookupValue(mlirVar);
- assert(nonPrivateVar);
- moduleTranslation.mapValue(decl.getCopyMoldArg(), nonPrivateVar);
-
- // map copyRegion lhs arg
- moduleTranslation.mapValue(decl.getCopyPrivateArg(), llvmVar);
-
- // in-place convert copy region
- builder.SetInsertPoint(builder.GetInsertBlock()->getTerminator());
- if (failed(inlineConvertOmpRegions(copyRegion, "omp.private.copy",
- builder, moduleTranslation)))
- return llvm::createStringError(
- "failed to inline `copy` region of an `omp.private` op in taskOp");
-
- // ignore unused value yielded from copy region
-
- // clear copy region block argument mapping in case it needs to be
- // re-created with different source for reuse of the same reduction decl
- moduleTranslation.forgetMapping(copyRegion);
- }
+ if (failed(initFirstPrivateVars(builder, moduleTranslation, mlirPrivateVars,
+ llvmPrivateVars, privateDecls,
+ afterAllocas.get())))
+ return llvm::make_error<PreviouslyReportedError>();
// translate the body of the task:
builder.restoreIP(codegenIP);
@@ -1732,6 +1794,18 @@ convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder,
chunk = builder.CreateSExtOrTrunc(chunkVar, ivType);
}
+ MutableArrayRef<BlockArgument> privateBlockArgs =
+ cast<omp::BlockArgOpenMPOpInterface>(*wsloopOp).getPrivateBlockArgs();
+ SmallVector<mlir::Value> mlirPrivateVars;
+ SmallVector<llvm::Value *> llvmPrivateVars;
+ SmallVector<omp::PrivateClauseOp> privateDecls;
+ mlirPrivateVars.reserve(privateBlockArgs.size());
+ llvmPrivateVars.reserve(privateBlockArgs.size());
+ collectPrivatizationDecls(wsloopOp, privateDecls);
+
+ for (mlir::Value privateVar : wsloopOp.getPrivateVars())
+ mlirPrivateVars.push_back(privateVar);
+
SmallVector<omp::DeclareReductionOp> reductionDecls;
collectReductionDecls(wsloopOp, reductionDecls);
llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
@@ -1739,15 +1813,37 @@ convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder,
SmallVector<llvm::Value *> privateReductionVariables(
wsloopOp.getNumReductionVars());
+
+ llvm::Expected<llvm::BasicBlock *> afterAllocas = allocatePrivateVars(
+ builder, moduleTranslation, privateBlockArgs, privateDecls,
+ mlirPrivateVars, llvmPrivateVars, allocaIP);
+ if (handleError(afterAllocas, opInst).failed())
+ return failure();
+
DenseMap<Value, llvm::Value *> reductionVariableMap;
MutableArrayRef<BlockArgument> reductionArgs =
cast<omp::BlockArgOpenMPOpInterface>(opInst).getReductionBlockArgs();
- if (failed(allocAndInitializeReductionVars(
- wsloopOp, reductionArgs, builder, moduleTranslation, allocaIP,
- reductionDecls, privateReductionVariables, reductionVariableMap,
- isByRef)))
+ SmallVector<DeferredStore> deferredStores;
+
+ if (failed(allocReductionVars(wsloopOp, reductionArgs, builder,
+ moduleTranslation, allocaIP, reductionDecls,
+ privateReductionVariables, reductionVariableMap,
+ deferredStores, isByRef)))
+ return failure();
+
+ if (failed(initFirstPrivateVars(builder, moduleTranslation, mlirPrivateVars,
+ llvmPrivateVars, privateDecls,
+ afterAllocas.get())))
+ return failure();
+
+ assert(afterAllocas.get()->getSinglePredecessor());
+ if (failed(initReductionVars(wsloopOp, reductionArgs, builder,
+ moduleTranslation,
+ afterAllocas.get()->getSinglePredecessor(),
+ reductionDecls, privateReductionVariables,
+ reductionVariableMap, isByRef, deferredStores)))
return failure();
// TODO: Replace this with proper composite translation support.
@@ -1914,122 +2010,18 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
deferredStores, isByRef)))
return llvm::make_error<PreviouslyReportedError>();
- // Apply copy region for firstprivate.
- bool needsFirstprivate =
- llvm::any_of(privateDecls, [](omp::PrivateClauseOp &privOp) {
- return privOp.getDataSharingType() ==
- omp::DataSharingClauseType::FirstPrivate;
- });
- if (needsFirstprivate) {
- // Find the end of the allocation blocks
- assert(afterAllocas.get()->getSinglePredecessor());
- builder.SetInsertPoint(
- afterAllocas.get()->getSinglePredecessor()->getTerminator());
- llvm::BasicBlock *copyBlock =
- splitBB(builder, /*CreateBranch=*/true, "omp.private.copy");
- builder.SetInsertPoint(copyBlock->getFirstNonPHIOrDbgOrAlloca());
- }
- for (auto [decl, mlirVar, llvmVar] :
- llvm::zip_equal(privateDecls, mlirPrivateVars, llvmPrivateVars)) {
- if (decl.getDataSharingType() != omp::DataSharingClauseType::FirstPrivate)
- continue;
-
- // copyRegion implements `lhs = rhs`
- Region ©Region = decl.getCopyRegion();
-
- // map copyRegion rhs arg
- llvm::Value *nonPrivateVar = moduleTranslation.lookupValue(mlirVar);
- assert(nonPrivateVar);
- moduleTranslation.mapValue(decl.getCopyMoldArg(), nonPrivateVar);
-
- // map copyRegion lhs arg
- moduleTranslation.mapValue(decl.getCopyPrivateArg(), llvmVar);
-
- // in-place convert copy region
- builder.SetInsertPoint(builder.GetInsertBlock()->getTerminator());
- if (failed(inlineConvertOmpRegions(copyRegion, "omp.private.copy",
- builder, moduleTranslation)))
- return llvm::createStringError(
- "failed to inline `copy` region of `omp.private`");
-
- // ignore unused value yielded from copy region
-
- // clear copy region block argument mapping in case it needs to be
- // re-created with different sources for reuse of the same reduction
- // decl
- moduleTranslation.forgetMapping(copyRegion);
- }
-
- // Initialize reduction vars
- builder.SetInsertPoint(builder.GetInsertBlock()->getTerminator());
- llvm::BasicBlock *initBlock = splitBB(builder, true, "omp.reduction.init");
- allocaIP =
- InsertPointTy(allocaIP.getBlock(),
- allocaIP.getBlock()->getTerminator()->getIterator());
-
- builder.restoreIP(allocaIP);
- SmallVector<llvm::Value *> byRefVars(opInst.getNumReductionVars());
- for (unsigned i = 0; i < opInst.getNumReductionVars(); ++i) {
- if (isByRef[i]) {
- if (!reductionDecls[i].getAllocRegion().empty())
- continue;
-
- // TODO: remove after all users of by-ref are updated to use the alloc
- // region: Allocate reduction variable (which is a pointer to the real
- // reduciton variable allocated in the inlined region)
- byRefVars[i] = builder.CreateAlloca(
- moduleTranslation.convertType(reductionDecls[i].getType()));
- }
- }
-
- builder.SetInsertPoint(initBlock->getFirstNonPHIOrDbgOrAlloca());
-
- // insert stores deferred until after all allocas
- // these store the results of the alloc region into the allocation for the
- // pointer to the reduction variable
- for (auto [data, addr] : deferredStores)
- builder.CreateStore(data, addr);
-
- for (unsigned i = 0; i < opInst.getNumReductionVars(); ++i) {
- SmallVector<llvm::Value *> phis;
-
- // map the block argument
- mapInitializationArgs(opInst, moduleTranslation, reductionDecls,
- reductionVariableMap, i);
- if (failed(inlineConvertOmpRegions(
- reductionDecls[i].getInitializerRegion(), "omp.reduction.neutral",
- builder, moduleTranslation, &phis)))
- return llvm::createStringError(
- "failed to inline `init` ...
[truncated]
|
9c427d7
to
9e68c62
Compare
✅ With the latest revision this PR passed the C/C++ code formatter. |
9e68c62
to
eac2adc
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nice work! Thanks for the cleanups
mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
Outdated
Show resolved
Hide resolved
mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
Outdated
Show resolved
Hide resolved
mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
Outdated
Show resolved
Hide resolved
Ahh I realise now most of these comments belong on the parent PR. Apologies. |
eac2adc
to
4077ad5
Compare
No problem, now your mission for #118447 is easier :). |
Small side note: It's possible to open stacked PRs in GitHub, but only when the branches are part of the main repository. Considering that you guys are all members, you can create branches in the upstream repo. While this is somewhat discouraged, it is for sure acceptable when the alternative is to butcher diff views 😅 |
mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
Outdated
Show resolved
Hide resolved
4077ad5
to
5073120
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, thanks!
Extend MLIR to LLVM lowering by adding support for `omp.wsloop` for delayed privatization. This also refactors a few bit of code to isolate the logic needed for `firstprivate` initialization in a shared util that can be used across constructs that need it.
5073120
to
ff39a25
Compare
…`wsloop` (llvm#118463)" This reverts commit 0993335.
…on support for `wsloop` This reapplies PR llvm#118463 after introducing a fix for a bug uncovere by the test suite. The problem is that when the alloca block is terminated with a conditional branch, this violates a pre-condition of `allocatePrivateVars` (which assumes the alloca block has a single successor). Extend MLIR to LLVM lowering by adding support for `omp.wsloop` for delayed privatization. This also refactors a few bit of code to isolate the logic needed for `firstprivate` initialization in a shared util that can be used across constructs that need it. The same is done for `dealloc` regions.
…on support for `wsloop` This reapplies PR llvm#118463 after introducing a fix for a bug uncovere by the test suite. The problem is that when the alloca block is terminated with a conditional branch, this violates a pre-condition of `allocatePrivateVars` (which assumes the alloca block has a single successor). Extend MLIR to LLVM lowering by adding support for `omp.wsloop` for delayed privatization. This also refactors a few bit of code to isolate the logic needed for `firstprivate` initialization in a shared util that can be used across constructs that need it. The same is done for `dealloc` regions.
…upport for `wsloop` (#119170) This reapplies PR #118463 after introducing a fix for a bug uncovere by the test suite. The problem is that when the alloca block is terminated with a conditional branch, this violates a pre-condition of `allocatePrivateVars` (which assumes the alloca block has a single successor). This new PR includes a test that reproduces the issue. Extend MLIR to LLVM lowering by adding support for `omp.wsloop` for delayed privatization. This also refactors a few bit of code to isolate the logic needed for `firstprivate` initialization in a shared util that can be used across constructs that need it. The same is done for `dealloc` regions.
…on support for `wsloop` (llvm#119170) This reapplies PR llvm#118463 after introducing a fix for a bug uncovere by the test suite. The problem is that when the alloca block is terminated with a conditional branch, this violates a pre-condition of `allocatePrivateVars` (which assumes the alloca block has a single successor). This new PR includes a test that reproduces the issue. Extend MLIR to LLVM lowering by adding support for `omp.wsloop` for delayed privatization. This also refactors a few bit of code to isolate the logic needed for `firstprivate` initialization in a shared util that can be used across constructs that need it. The same is done for `dealloc` regions.
…llvm#118463) Extend MLIR to LLVM lowering by adding support for `omp.wsloop` for delayed privatization. This also refactors a few bit of code to isolate the logic needed for `firstprivate` initialization in a shared util that can be used across constructs that need it. The same is done for `dealloc` regions. Parent PR: llvm#118447. Only latest commit is relevant for this PR.
…vatization support for `wsloop` (llvm#119170)" This reverts commit 0e70e0e.
Extend MLIR to LLVM lowering by adding support for
omp.wsloop
fordelayed privatization. This also refactors a few bit of code to isolate
the logic needed for
firstprivate
initialization in a shared util thatcan be used across constructs that need it. The same is done for
dealloc
regions.
Parent PR: #118447. Only latest commit is relevant for this PR.