Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[reapply (#118463)][OpenMP][OMPIRBuilder] Add delayed privatization support for wsloop #119170

Merged
merged 1 commit into from
Dec 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
268 changes: 147 additions & 121 deletions mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -268,7 +268,6 @@ static LogicalResult checkImplementationStatus(Operation &op) {
checkAllocate(op, result);
checkLinear(op, result);
checkOrder(op, result);
checkPrivate(op, result);
})
.Case([&](omp::ParallelOp op) { checkAllocate(op, result); })
.Case([&](omp::SimdOp op) {
Expand Down Expand Up @@ -1302,6 +1301,7 @@ allocatePrivateVars(llvm::IRBuilderBase &builder,
MutableArrayRef<mlir::Value> mlirPrivateVars,
llvm::SmallVectorImpl<llvm::Value *> &llvmPrivateVars,
const llvm::OpenMPIRBuilder::InsertPointTy &allocaIP) {
llvm::IRBuilderBase::InsertPointGuard guard(builder);
// Allocate private vars
llvm::BranchInst *allocaTerminator =
llvm::cast<llvm::BranchInst>(allocaIP.getBlock()->getTerminator());
Expand Down Expand Up @@ -1363,6 +1363,86 @@ allocatePrivateVars(llvm::IRBuilderBase &builder,
return afterAllocas;
}

static LogicalResult
initFirstPrivateVars(llvm::IRBuilderBase &builder,
LLVM::ModuleTranslation &moduleTranslation,
SmallVectorImpl<mlir::Value> &mlirPrivateVars,
SmallVectorImpl<llvm::Value *> &llvmPrivateVars,
SmallVectorImpl<omp::PrivateClauseOp> &privateDecls,
llvm::BasicBlock *afterAllocas) {
llvm::IRBuilderBase::InsertPointGuard guard(builder);
// Apply copy region for firstprivate.
bool needsFirstprivate =
llvm::any_of(privateDecls, [](omp::PrivateClauseOp &privOp) {
return privOp.getDataSharingType() ==
omp::DataSharingClauseType::FirstPrivate;
});

if (!needsFirstprivate)
return success();

assert(afterAllocas->getSinglePredecessor());

// Find the end of the allocation blocks
builder.SetInsertPoint(afterAllocas->getSinglePredecessor()->getTerminator());
llvm::BasicBlock *copyBlock =
splitBB(builder, /*CreateBranch=*/true, "omp.private.copy");
builder.SetInsertPoint(copyBlock->getFirstNonPHIOrDbgOrAlloca());

for (auto [decl, mlirVar, llvmVar] :
llvm::zip_equal(privateDecls, mlirPrivateVars, llvmPrivateVars)) {
if (decl.getDataSharingType() != omp::DataSharingClauseType::FirstPrivate)
continue;

// copyRegion implements `lhs = rhs`
Region &copyRegion = decl.getCopyRegion();

// map copyRegion rhs arg
llvm::Value *nonPrivateVar = moduleTranslation.lookupValue(mlirVar);
assert(nonPrivateVar);
moduleTranslation.mapValue(decl.getCopyMoldArg(), nonPrivateVar);

// map copyRegion lhs arg
moduleTranslation.mapValue(decl.getCopyPrivateArg(), llvmVar);

// in-place convert copy region
builder.SetInsertPoint(builder.GetInsertBlock()->getTerminator());
if (failed(inlineConvertOmpRegions(copyRegion, "omp.private.copy", builder,
moduleTranslation)))
return decl.emitError("failed to inline `copy` region of `omp.private`");

// ignore unused value yielded from copy region

// clear copy region block argument mapping in case it needs to be
// re-created with different sources for reuse of the same reduction
// decl
moduleTranslation.forgetMapping(copyRegion);
}

return success();
}

static LogicalResult
cleanupPrivateVars(llvm::IRBuilderBase &builder,
LLVM::ModuleTranslation &moduleTranslation, Location loc,
SmallVectorImpl<llvm::Value *> &llvmPrivateVars,
SmallVectorImpl<omp::PrivateClauseOp> &privateDecls) {
// private variable deallocation
SmallVector<Region *> privateCleanupRegions;
llvm::transform(privateDecls, std::back_inserter(privateCleanupRegions),
[](omp::PrivateClauseOp privatizer) {
return &privatizer.getDeallocRegion();
});

if (failed(inlineOmpRegionCleanup(
privateCleanupRegions, llvmPrivateVars, moduleTranslation, builder,
"omp.private.dealloc", /*shouldLoadCleanupRegionArg=*/false)))
return mlir::emitError(loc, "failed to inline `dealloc` region of an "
"`omp.private` op in");

return success();
}

static LogicalResult
convertOmpSections(Operation &opInst, llvm::IRBuilderBase &builder,
LLVM::ModuleTranslation &moduleTranslation) {
Expand Down Expand Up @@ -1622,50 +1702,10 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
if (handleError(afterAllocas, *taskOp).failed())
return llvm::make_error<PreviouslyReportedError>();

// Apply copy region for firstprivate
bool needsFirstPrivate =
llvm::any_of(privateDecls, [](omp::PrivateClauseOp &privOp) {
return privOp.getDataSharingType() ==
omp::DataSharingClauseType::FirstPrivate;
});
if (needsFirstPrivate) {
// Find the end of the allocation blocks
assert(afterAllocas.get()->getSinglePredecessor());
builder.SetInsertPoint(
afterAllocas.get()->getSinglePredecessor()->getTerminator());
llvm::BasicBlock *copyBlock =
splitBB(builder, /*CreateBranch=*/true, "omp.private.copy");
builder.SetInsertPoint(copyBlock->getFirstNonPHIOrDbgOrAlloca());
}
for (auto [decl, mlirVar, llvmVar] :
llvm::zip_equal(privateDecls, mlirPrivateVars, llvmPrivateVars)) {
if (decl.getDataSharingType() != omp::DataSharingClauseType::FirstPrivate)
continue;

// copyRegion implements `lhs = rhs`
Region &copyRegion = decl.getCopyRegion();

// map copyRegion rhs arg
llvm::Value *nonPrivateVar = moduleTranslation.lookupValue(mlirVar);
assert(nonPrivateVar);
moduleTranslation.mapValue(decl.getCopyMoldArg(), nonPrivateVar);

// map copyRegion lhs arg
moduleTranslation.mapValue(decl.getCopyPrivateArg(), llvmVar);

// in-place convert copy region
builder.SetInsertPoint(builder.GetInsertBlock()->getTerminator());
if (failed(inlineConvertOmpRegions(copyRegion, "omp.private.copy",
builder, moduleTranslation)))
return llvm::createStringError(
"failed to inline `copy` region of an `omp.private` op in taskOp");

// ignore unused value yielded from copy region

// clear copy region block argument mapping in case it needs to be
// re-created with different source for reuse of the same reduction decl
moduleTranslation.forgetMapping(copyRegion);
}
if (failed(initFirstPrivateVars(builder, moduleTranslation, mlirPrivateVars,
llvmPrivateVars, privateDecls,
afterAllocas.get())))
return llvm::make_error<PreviouslyReportedError>();

// translate the body of the task:
builder.restoreIP(codegenIP);
Expand All @@ -1674,19 +1714,11 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
if (failed(handleError(continuationBlockOrError, *taskOp)))
return llvm::make_error<PreviouslyReportedError>();

// private variable deallocation
SmallVector<Region *> privateCleanupRegions;
llvm::transform(privateDecls, std::back_inserter(privateCleanupRegions),
[](omp::PrivateClauseOp privatizer) {
return &privatizer.getDeallocRegion();
});

builder.SetInsertPoint(continuationBlockOrError.get()->getTerminator());
if (failed(inlineOmpRegionCleanup(
privateCleanupRegions, llvmPrivateVars, moduleTranslation, builder,
"omp.private.dealloc", /*shouldLoadCleanupRegionArg=*/false)))
return llvm::createStringError("failed to inline `dealloc` region of an "
"`omp.private` op in an omp.task");

if (failed(cleanupPrivateVars(builder, moduleTranslation, taskOp.getLoc(),
llvmPrivateVars, privateDecls)))
return llvm::make_error<PreviouslyReportedError>();

return llvm::Error::success();
};
Expand Down Expand Up @@ -1760,7 +1792,6 @@ convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder,
return failure();

auto loopOp = cast<omp::LoopNestOp>(wsloopOp.getWrappedLoop());

llvm::ArrayRef<bool> isByRef = getIsByRef(wsloopOp.getReductionByref());
assert(isByRef.size() == wsloopOp.getNumReductionVars());

Expand All @@ -1778,22 +1809,61 @@ convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder,
chunk = builder.CreateSExtOrTrunc(chunkVar, ivType);
}

MutableArrayRef<BlockArgument> privateBlockArgs =
cast<omp::BlockArgOpenMPOpInterface>(*wsloopOp).getPrivateBlockArgs();
SmallVector<mlir::Value> mlirPrivateVars;
SmallVector<llvm::Value *> llvmPrivateVars;
SmallVector<omp::PrivateClauseOp> privateDecls;
mlirPrivateVars.reserve(privateBlockArgs.size());
llvmPrivateVars.reserve(privateBlockArgs.size());
collectPrivatizationDecls(wsloopOp, privateDecls);

for (mlir::Value privateVar : wsloopOp.getPrivateVars())
mlirPrivateVars.push_back(privateVar);

SmallVector<omp::DeclareReductionOp> reductionDecls;
collectReductionDecls(wsloopOp, reductionDecls);
llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
findAllocaInsertPoint(builder, moduleTranslation);

SmallVector<llvm::Value *> privateReductionVariables(
wsloopOp.getNumReductionVars());

splitBB(llvm::OpenMPIRBuilder::InsertPointTy(
allocaIP.getBlock(),
allocaIP.getBlock()->getTerminator()->getIterator()),
true, "omp.region.after_alloca");

llvm::Expected<llvm::BasicBlock *> afterAllocas = allocatePrivateVars(
builder, moduleTranslation, privateBlockArgs, privateDecls,
mlirPrivateVars, llvmPrivateVars, allocaIP);
if (handleError(afterAllocas, opInst).failed())
return failure();

DenseMap<Value, llvm::Value *> reductionVariableMap;

MutableArrayRef<BlockArgument> reductionArgs =
cast<omp::BlockArgOpenMPOpInterface>(opInst).getReductionBlockArgs();

if (failed(allocAndInitializeReductionVars(
wsloopOp, reductionArgs, builder, moduleTranslation, allocaIP,
reductionDecls, privateReductionVariables, reductionVariableMap,
isByRef)))
SmallVector<DeferredStore> deferredStores;

if (failed(allocReductionVars(wsloopOp, reductionArgs, builder,
moduleTranslation, allocaIP, reductionDecls,
privateReductionVariables, reductionVariableMap,
deferredStores, isByRef)))
return failure();

if (failed(initFirstPrivateVars(builder, moduleTranslation, mlirPrivateVars,
llvmPrivateVars, privateDecls,
afterAllocas.get())))
return failure();

assert(afterAllocas.get()->getSinglePredecessor());
if (failed(initReductionVars(wsloopOp, reductionArgs, builder,
moduleTranslation,
afterAllocas.get()->getSinglePredecessor(),
reductionDecls, privateReductionVariables,
reductionVariableMap, isByRef, deferredStores)))
return failure();

// TODO: Replace this with proper composite translation support.
Expand Down Expand Up @@ -1900,9 +1970,13 @@ convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder,
builder.restoreIP(afterIP);

// Process the reductions if required.
return createReductionsAndCleanup(wsloopOp, builder, moduleTranslation,
allocaIP, reductionDecls,
privateReductionVariables, isByRef);
if (failed(createReductionsAndCleanup(wsloopOp, builder, moduleTranslation,
allocaIP, reductionDecls,
privateReductionVariables, isByRef)))
return failure();

return cleanupPrivateVars(builder, moduleTranslation, wsloopOp.getLoc(),
llvmPrivateVars, privateDecls);
}

/// Converts the OpenMP parallel operation to LLVM IR.
Expand Down Expand Up @@ -1960,52 +2034,12 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
deferredStores, isByRef)))
return llvm::make_error<PreviouslyReportedError>();

// Apply copy region for firstprivate.
bool needsFirstprivate =
llvm::any_of(privateDecls, [](omp::PrivateClauseOp &privOp) {
return privOp.getDataSharingType() ==
omp::DataSharingClauseType::FirstPrivate;
});
if (needsFirstprivate) {
// Find the end of the allocation blocks
assert(afterAllocas.get()->getSinglePredecessor());
builder.SetInsertPoint(
afterAllocas.get()->getSinglePredecessor()->getTerminator());
llvm::BasicBlock *copyBlock =
splitBB(builder, /*CreateBranch=*/true, "omp.private.copy");
builder.SetInsertPoint(copyBlock->getFirstNonPHIOrDbgOrAlloca());
}
for (auto [decl, mlirVar, llvmVar] :
llvm::zip_equal(privateDecls, mlirPrivateVars, llvmPrivateVars)) {
if (decl.getDataSharingType() != omp::DataSharingClauseType::FirstPrivate)
continue;

// copyRegion implements `lhs = rhs`
Region &copyRegion = decl.getCopyRegion();

// map copyRegion rhs arg
llvm::Value *nonPrivateVar = moduleTranslation.lookupValue(mlirVar);
assert(nonPrivateVar);
moduleTranslation.mapValue(decl.getCopyMoldArg(), nonPrivateVar);

// map copyRegion lhs arg
moduleTranslation.mapValue(decl.getCopyPrivateArg(), llvmVar);

// in-place convert copy region
builder.SetInsertPoint(builder.GetInsertBlock()->getTerminator());
if (failed(inlineConvertOmpRegions(copyRegion, "omp.private.copy",
builder, moduleTranslation)))
return llvm::createStringError(
"failed to inline `copy` region of `omp.private`");

// ignore unused value yielded from copy region

// clear copy region block argument mapping in case it needs to be
// re-created with different sources for reuse of the same reduction
// decl
moduleTranslation.forgetMapping(copyRegion);
}
if (failed(initFirstPrivateVars(builder, moduleTranslation, mlirPrivateVars,
llvmPrivateVars, privateDecls,
afterAllocas.get())))
return llvm::make_error<PreviouslyReportedError>();

assert(afterAllocas.get()->getSinglePredecessor());
if (failed(
initReductionVars(opInst, reductionArgs, builder, moduleTranslation,
afterAllocas.get()->getSinglePredecessor(),
Expand Down Expand Up @@ -2090,17 +2124,9 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
return llvm::createStringError(
"failed to inline `cleanup` region of `omp.declare_reduction`");

SmallVector<Region *> privateCleanupRegions;
llvm::transform(privateDecls, std::back_inserter(privateCleanupRegions),
[](omp::PrivateClauseOp privatizer) {
return &privatizer.getDeallocRegion();
});

if (failed(inlineOmpRegionCleanup(
privateCleanupRegions, llvmPrivateVars, moduleTranslation, builder,
"omp.private.dealloc", /*shouldLoadCleanupRegionArg=*/false)))
return llvm::createStringError(
"failed to inline `dealloc` region of `omp.private`");
if (failed(cleanupPrivateVars(builder, moduleTranslation, opInst.getLoc(),
llvmPrivateVars, privateDecls)))
return llvm::make_error<PreviouslyReportedError>();

builder.restoreIP(oldIP);
return llvm::Error::success();
Expand Down
19 changes: 0 additions & 19 deletions mlir/test/Target/LLVMIR/openmp-todo.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -635,22 +635,3 @@ llvm.func @wsloop_order(%lb : i32, %ub : i32, %step : i32) {
}
llvm.return
}

// -----

omp.private {type = private} @x.privatizer : !llvm.ptr alloc {
^bb0(%arg0: !llvm.ptr):
%0 = llvm.mlir.constant(1 : i32) : i32
%1 = llvm.alloca %0 x i32 : (i32) -> !llvm.ptr
omp.yield(%1 : !llvm.ptr)
}
llvm.func @wsloop_private(%lb : i32, %ub : i32, %step : i32, %x : !llvm.ptr) {
// expected-error@below {{not yet implemented: Unhandled clause privatization in omp.wsloop operation}}
// expected-error@below {{LLVM Translation failed for operation: omp.wsloop}}
omp.wsloop private(@x.privatizer %x -> %arg0 : !llvm.ptr) {
omp.loop_nest (%iv) : i32 = (%lb) to (%ub) step (%step) {
omp.yield
}
}
llvm.return
}
Loading
Loading