Skip to content

Commit

Permalink
[OpenMP][OMPIRBuilder] Add delayed privatization support for wsloop
Browse files Browse the repository at this point in the history
Extend MLIR to LLVM lowering by adding support for `omp.wsloop` for
delayed privatization. This also refactors a few bit of code to isolate
the logic needed for `firstprivate` initialization in a shared util that
can be used across constructs that need it.
  • Loading branch information
ergawy committed Dec 3, 2024
1 parent a308530 commit 9c427d7
Show file tree
Hide file tree
Showing 3 changed files with 184 additions and 114 deletions.
200 changes: 105 additions & 95 deletions mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -268,7 +268,6 @@ static LogicalResult checkImplementationStatus(Operation &op) {
checkAllocate(op, result);
checkLinear(op, result);
checkOrder(op, result);
checkPrivate(op, result);
})
.Case([&](omp::ParallelOp op) { checkAllocate(op, result); })
.Case([&](omp::SimdOp op) {
Expand Down Expand Up @@ -1302,6 +1301,7 @@ allocatePrivateVars(llvm::IRBuilderBase &builder,
MutableArrayRef<mlir::Value> mlirPrivateVars,
llvm::SmallVectorImpl<llvm::Value *> &llvmPrivateVars,
const llvm::OpenMPIRBuilder::InsertPointTy &allocaIP) {
llvm::IRBuilderBase::InsertPointGuard guard(builder);
// Allocate private vars
llvm::BranchInst *allocaTerminator =
llvm::cast<llvm::BranchInst>(allocaIP.getBlock()->getTerminator());
Expand Down Expand Up @@ -1363,6 +1363,63 @@ allocatePrivateVars(llvm::IRBuilderBase &builder,
return afterAllocas;
}

static LogicalResult
initFirstPrivateVars(llvm::IRBuilderBase &builder,
LLVM::ModuleTranslation &moduleTranslation,
SmallVectorImpl<mlir::Value> &mlirPrivateVars,
SmallVectorImpl<llvm::Value *> &llvmPrivateVars,
SmallVectorImpl<omp::PrivateClauseOp> &privateDecls,
llvm::BasicBlock *afterAllocas) {
llvm::IRBuilderBase::InsertPointGuard guard(builder);
// Apply copy region for firstprivate.
bool needsFirstprivate =
llvm::any_of(privateDecls, [](omp::PrivateClauseOp &privOp) {
return privOp.getDataSharingType() ==
omp::DataSharingClauseType::FirstPrivate;
});

if (needsFirstprivate) {
// Find the end of the allocation blocks
builder.SetInsertPoint(
afterAllocas->getSinglePredecessor()->getTerminator());
llvm::BasicBlock *copyBlock =
splitBB(builder, /*CreateBranch=*/true, "omp.private.copy");
builder.SetInsertPoint(copyBlock->getFirstNonPHIOrDbgOrAlloca());
}

for (auto [decl, mlirVar, llvmVar] :
llvm::zip_equal(privateDecls, mlirPrivateVars, llvmPrivateVars)) {
if (decl.getDataSharingType() != omp::DataSharingClauseType::FirstPrivate)
continue;

// copyRegion implements `lhs = rhs`
Region &copyRegion = decl.getCopyRegion();

// map copyRegion rhs arg
llvm::Value *nonPrivateVar = moduleTranslation.lookupValue(mlirVar);
assert(nonPrivateVar);
moduleTranslation.mapValue(decl.getCopyMoldArg(), nonPrivateVar);

// map copyRegion lhs arg
moduleTranslation.mapValue(decl.getCopyPrivateArg(), llvmVar);

// in-place convert copy region
builder.SetInsertPoint(builder.GetInsertBlock()->getTerminator());
if (failed(inlineConvertOmpRegions(copyRegion, "omp.private.copy", builder,
moduleTranslation)))
return decl.emitError("failed to inline `copy` region of `omp.private`");

// ignore unused value yielded from copy region

// clear copy region block argument mapping in case it needs to be
// re-created with different sources for reuse of the same reduction
// decl
moduleTranslation.forgetMapping(copyRegion);
}

return success();
}

static LogicalResult
convertOmpSections(Operation &opInst, llvm::IRBuilderBase &builder,
LLVM::ModuleTranslation &moduleTranslation) {
Expand Down Expand Up @@ -1622,50 +1679,10 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
if (handleError(afterAllocas, *taskOp).failed())
return llvm::make_error<PreviouslyReportedError>();

// Apply copy region for firstprivate
bool needsFirstPrivate =
llvm::any_of(privateDecls, [](omp::PrivateClauseOp &privOp) {
return privOp.getDataSharingType() ==
omp::DataSharingClauseType::FirstPrivate;
});
if (needsFirstPrivate) {
// Find the end of the allocation blocks
assert(afterAllocas.get()->getSinglePredecessor());
builder.SetInsertPoint(
afterAllocas.get()->getSinglePredecessor()->getTerminator());
llvm::BasicBlock *copyBlock =
splitBB(builder, /*CreateBranch=*/true, "omp.private.copy");
builder.SetInsertPoint(copyBlock->getFirstNonPHIOrDbgOrAlloca());
}
for (auto [decl, mlirVar, llvmVar] :
llvm::zip_equal(privateDecls, mlirPrivateVars, llvmPrivateVars)) {
if (decl.getDataSharingType() != omp::DataSharingClauseType::FirstPrivate)
continue;

// copyRegion implements `lhs = rhs`
Region &copyRegion = decl.getCopyRegion();

// map copyRegion rhs arg
llvm::Value *nonPrivateVar = moduleTranslation.lookupValue(mlirVar);
assert(nonPrivateVar);
moduleTranslation.mapValue(decl.getCopyMoldArg(), nonPrivateVar);

// map copyRegion lhs arg
moduleTranslation.mapValue(decl.getCopyPrivateArg(), llvmVar);

// in-place convert copy region
builder.SetInsertPoint(builder.GetInsertBlock()->getTerminator());
if (failed(inlineConvertOmpRegions(copyRegion, "omp.private.copy",
builder, moduleTranslation)))
return llvm::createStringError(
"failed to inline `copy` region of an `omp.private` op in taskOp");

// ignore unused value yielded from copy region

// clear copy region block argument mapping in case it needs to be
// re-created with different source for reuse of the same reduction decl
moduleTranslation.forgetMapping(copyRegion);
}
if (failed(initFirstPrivateVars(builder, moduleTranslation, mlirPrivateVars,
llvmPrivateVars, privateDecls,
afterAllocas.get())))
return llvm::make_error<PreviouslyReportedError>();

// translate the body of the task:
builder.restoreIP(codegenIP);
Expand Down Expand Up @@ -1777,22 +1794,56 @@ convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder,
chunk = builder.CreateSExtOrTrunc(chunkVar, ivType);
}

MutableArrayRef<BlockArgument> privateBlockArgs =
cast<omp::BlockArgOpenMPOpInterface>(*wsloopOp).getPrivateBlockArgs();
SmallVector<mlir::Value> mlirPrivateVars;
SmallVector<llvm::Value *> llvmPrivateVars;
SmallVector<omp::PrivateClauseOp> privateDecls;
mlirPrivateVars.reserve(privateBlockArgs.size());
llvmPrivateVars.reserve(privateBlockArgs.size());
collectPrivatizationDecls(wsloopOp, privateDecls);

for (mlir::Value privateVar : wsloopOp.getPrivateVars())
mlirPrivateVars.push_back(privateVar);

SmallVector<omp::DeclareReductionOp> reductionDecls;
collectReductionDecls(wsloopOp, reductionDecls);
llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
findAllocaInsertPoint(builder, moduleTranslation);

SmallVector<llvm::Value *> privateReductionVariables(
wsloopOp.getNumReductionVars());

llvm::Expected<llvm::BasicBlock *> afterAllocas = allocatePrivateVars(
builder, moduleTranslation, privateBlockArgs, privateDecls,
mlirPrivateVars, llvmPrivateVars, allocaIP);
if (handleError(afterAllocas, opInst).failed())
return failure();

DenseMap<Value, llvm::Value *> reductionVariableMap;

MutableArrayRef<BlockArgument> reductionArgs =
cast<omp::BlockArgOpenMPOpInterface>(opInst).getReductionBlockArgs();

if (failed(allocAndInitializeReductionVars(
wsloopOp, reductionArgs, builder, moduleTranslation, allocaIP,
reductionDecls, privateReductionVariables, reductionVariableMap,
isByRef)))
SmallVector<DeferredStore> deferredStores;

if (failed(allocReductionVars(wsloopOp, reductionArgs, builder,
moduleTranslation, allocaIP, reductionDecls,
privateReductionVariables, reductionVariableMap,
deferredStores, isByRef)))
return failure();

if (failed(initFirstPrivateVars(builder, moduleTranslation, mlirPrivateVars,
llvmPrivateVars, privateDecls,
afterAllocas.get())))
return failure();

assert(afterAllocas.get()->getSinglePredecessor());
if (failed(initReductionVars(wsloopOp, reductionArgs, builder,
moduleTranslation,
afterAllocas.get()->getSinglePredecessor(),
reductionDecls, privateReductionVariables,
reductionVariableMap, isByRef, deferredStores)))
return failure();

// TODO: Replace this with proper composite translation support.
Expand Down Expand Up @@ -1959,53 +2010,12 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
deferredStores, isByRef)))
return llvm::make_error<PreviouslyReportedError>();

// Apply copy region for firstprivate.
bool needsFirstprivate =
llvm::any_of(privateDecls, [](omp::PrivateClauseOp &privOp) {
return privOp.getDataSharingType() ==
omp::DataSharingClauseType::FirstPrivate;
});
if (needsFirstprivate) {
// Find the end of the allocation blocks
assert(afterAllocas.get()->getSinglePredecessor());
builder.SetInsertPoint(
afterAllocas.get()->getSinglePredecessor()->getTerminator());
llvm::BasicBlock *copyBlock =
splitBB(builder, /*CreateBranch=*/true, "omp.private.copy");
builder.SetInsertPoint(copyBlock->getFirstNonPHIOrDbgOrAlloca());
}

for (auto [decl, mlirVar, llvmVar] :
llvm::zip_equal(privateDecls, mlirPrivateVars, llvmPrivateVars)) {
if (decl.getDataSharingType() != omp::DataSharingClauseType::FirstPrivate)
continue;

// copyRegion implements `lhs = rhs`
Region &copyRegion = decl.getCopyRegion();

// map copyRegion rhs arg
llvm::Value *nonPrivateVar = moduleTranslation.lookupValue(mlirVar);
assert(nonPrivateVar);
moduleTranslation.mapValue(decl.getCopyMoldArg(), nonPrivateVar);

// map copyRegion lhs arg
moduleTranslation.mapValue(decl.getCopyPrivateArg(), llvmVar);

// in-place convert copy region
builder.SetInsertPoint(builder.GetInsertBlock()->getTerminator());
if (failed(inlineConvertOmpRegions(copyRegion, "omp.private.copy",
builder, moduleTranslation)))
return llvm::createStringError(
"failed to inline `copy` region of `omp.private`");

// ignore unused value yielded from copy region

// clear copy region block argument mapping in case it needs to be
// re-created with different sources for reuse of the same reduction
// decl
moduleTranslation.forgetMapping(copyRegion);
}
if (failed(initFirstPrivateVars(builder, moduleTranslation, mlirPrivateVars,
llvmPrivateVars, privateDecls,
afterAllocas.get())))
return llvm::make_error<PreviouslyReportedError>();

assert(afterAllocas.get()->getSinglePredecessor());
if (failed(
initReductionVars(opInst, reductionArgs, builder, moduleTranslation,
afterAllocas.get()->getSinglePredecessor(),
Expand Down
19 changes: 0 additions & 19 deletions mlir/test/Target/LLVMIR/openmp-todo.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -635,22 +635,3 @@ llvm.func @wsloop_order(%lb : i32, %ub : i32, %step : i32) {
}
llvm.return
}

// -----

omp.private {type = private} @x.privatizer : !llvm.ptr alloc {
^bb0(%arg0: !llvm.ptr):
%0 = llvm.mlir.constant(1 : i32) : i32
%1 = llvm.alloca %0 x i32 : (i32) -> !llvm.ptr
omp.yield(%1 : !llvm.ptr)
}
llvm.func @wsloop_private(%lb : i32, %ub : i32, %step : i32, %x : !llvm.ptr) {
// expected-error@below {{not yet implemented: Unhandled clause privatization in omp.wsloop operation}}
// expected-error@below {{LLVM Translation failed for operation: omp.wsloop}}
omp.wsloop private(@x.privatizer %x -> %arg0 : !llvm.ptr) {
omp.loop_nest (%iv) : i32 = (%lb) to (%ub) step (%step) {
omp.yield
}
}
llvm.return
}
79 changes: 79 additions & 0 deletions mlir/test/Target/LLVMIR/openmp-wsloop-private.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
// RUN: mlir-translate -mlir-to-llvmir -split-input-file %s | FileCheck %s

// tests a wsloop private + firstprivate + reduction to make sure block structure
// is handled properly.

omp.private {type = private} @_QFwsloop_privateEi_private_ref_i32 : !llvm.ptr alloc {
^bb0(%arg0: !llvm.ptr):
%0 = llvm.mlir.constant(1 : i64) : i64
%1 = llvm.alloca %0 x i32 {bindc_name = "i", pinned} : (i64) -> !llvm.ptr
omp.yield(%1 : !llvm.ptr)
}

omp.private {type = firstprivate} @_QFwsloop_privateEc_firstprivate_ref_c8 : !llvm.ptr alloc {
^bb0(%arg0: !llvm.ptr):
%0 = llvm.mlir.constant(1 : i64) : i64
%1 = llvm.alloca %0 x !llvm.array<1 x i8> {bindc_name = "c", pinned} : (i64) -> !llvm.ptr
omp.yield(%1 : !llvm.ptr)
} copy {
^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr):
%0 = llvm.load %arg0 : !llvm.ptr -> !llvm.array<1 x i8>
llvm.store %0, %arg1 : !llvm.array<1 x i8>, !llvm.ptr
omp.yield(%arg1 : !llvm.ptr)
}

omp.declare_reduction @max_f32 : f32 init {
^bb0(%arg0: f32):
%0 = llvm.mlir.constant(-3.40282347E+38 : f32) : f32
omp.yield(%0 : f32)
} combiner {
^bb0(%arg0: f32, %arg1: f32):
%0 = llvm.intr.maxnum(%arg0, %arg1) {fastmathFlags = #llvm.fastmath<contract>} : (f32, f32) -> f32
omp.yield(%0 : f32)
}

llvm.func @wsloop_private_(%arg0: !llvm.ptr {fir.bindc_name = "y"}) attributes {fir.internal_name = "_QPwsloop_private", frame_pointer = #llvm.framePointerKind<all>, target_cpu = "x86-64"} {
%0 = llvm.mlir.constant(1 : i64) : i64
%1 = llvm.alloca %0 x f32 {bindc_name = "x"} : (i64) -> !llvm.ptr
%3 = llvm.alloca %0 x i32 {bindc_name = "i"} : (i64) -> !llvm.ptr
%5 = llvm.alloca %0 x !llvm.array<1 x i8> {bindc_name = "c"} : (i64) -> !llvm.ptr
%6 = llvm.mlir.constant(1 : i32) : i32
%7 = llvm.mlir.constant(10 : i32) : i32
%8 = llvm.mlir.constant(0 : i32) : i32
omp.parallel {
omp.wsloop private(@_QFwsloop_privateEc_firstprivate_ref_c8 %5 -> %arg1, @_QFwsloop_privateEi_private_ref_i32 %3 -> %arg2 : !llvm.ptr, !llvm.ptr) reduction(@max_f32 %1 -> %arg3 : !llvm.ptr) {
omp.loop_nest (%arg4) : i32 = (%8) to (%7) inclusive step (%6) {
omp.yield
}
}
omp.terminator
}
llvm.return
}

// CHECK: call void {{.*}} @__kmpc_fork_call(ptr @1, i32 1, ptr @[[OUTLINED:.*]], ptr %{{.*}})

// CHECK: define internal void @[[OUTLINED:.*]]{{.*}} {

// First, check that all memory for privates and reductions is allocated.
// CHECK: omp.par.entry:
// CHECK: %[[CHR:.*]] = alloca [1 x i8], i64 1, align 1
// CHECK: %[[INT:.*]] = alloca i32, i64 1, align 4
// CHECK: %[[FLT:.*]] = alloca float, align 4
// CHECK: %[[RED_ARR:.*]] = alloca [1 x ptr], align 8
// CHECK: br label %[[LATE_ALLOC_BB:.*]]

// CHECK: [[LATE_ALLOC_BB]]:
// CHECK: br label %[[PRIVATE_CPY:.*]]

// Second, check that first private was properly copied.
// CHECK: [[PRIVATE_CPY:.*]]:
// CHECK: %[[CHR_VAL:.*]] = load [1 x i8], ptr %{{.*}}, align 1
// CHECK: store [1 x i8] %[[CHR_VAL]], ptr %[[CHR]], align 1
// CHECK: br label %[[RED_INIT:.*]]

// Third, check that reduction init took place.
// CHECK: [[RED_INIT]]:
// CHECK: store float 0x{{.*}}, ptr %[[FLT]], align 4

// CHECK: }

0 comments on commit 9c427d7

Please sign in to comment.