Skip to content

Commit

Permalink
[mlir][OpenMP] - MLIR to LLVMIR translation support for delayed priva…
Browse files Browse the repository at this point in the history
…tization of allocatables in `omp.target` ops

This PR adds support to translate the `private` clause from MLIR to
LLVMIR when used on allocatables in the context of an `omp.target` op.
  • Loading branch information
ergawy committed Dec 3, 2024
1 parent 6ef4990 commit 6549daf
Show file tree
Hide file tree
Showing 7 changed files with 368 additions and 35 deletions.
9 changes: 1 addition & 8 deletions flang/lib/Optimizer/OpenMP/MapsForPrivatizedSymbols.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,13 +49,6 @@ class MapsForPrivatizedSymbolsPass
: public flangomp::impl::MapsForPrivatizedSymbolsPassBase<
MapsForPrivatizedSymbolsPass> {

bool privatizerNeedsMap(omp::PrivateClauseOp &privatizer) {
Region &allocRegion = privatizer.getAllocRegion();
Value blockArg0 = allocRegion.getArgument(0);
if (blockArg0.use_empty())
return false;
return true;
}
omp::MapInfoOp createMapInfo(Location loc, Value var,
fir::FirOpBuilder &builder) {
uint64_t mapTypeTo = static_cast<
Expand Down Expand Up @@ -134,7 +127,7 @@ class MapsForPrivatizedSymbolsPass
omp::PrivateClauseOp privatizer =
SymbolTable::lookupNearestSymbolFrom<omp::PrivateClauseOp>(
targetOp, privatizerName);
if (!privatizerNeedsMap(privatizer)) {
if (!privatizer.needsMap()) {
privVarMapIdx.push_back(-1);
continue;
}
Expand Down
9 changes: 9 additions & 0 deletions mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,15 @@ def PrivateClauseOp : OpenMP_Op<"private", [IsolatedFromAbove, RecipeInterface]>
auto &region = getDeallocRegion();
return region.empty() ? nullptr : region.getArgument(0);
}

/// needsMap returns true if the value being privatized should additionally
/// be mapped to the target region using a MapInfoOp. This is most common
/// when an allocatable is privatized. In such cases, the descriptor is used
/// in privatization and needs to be mapped on to the device.
bool needsMap() {
Value blockArg0 = getAllocRegion().getArgument(0);
return !blockArg0.use_empty();
}
}];

let hasRegionVerifier = 1;
Expand Down
134 changes: 125 additions & 9 deletions mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -300,10 +300,6 @@ static LogicalResult checkImplementationStatus(Operation &op) {
if (privatizer.getDataSharingType() ==
omp::DataSharingClauseType::FirstPrivate)
result = todo("firstprivate");

if (!privatizer.getDeallocRegion().empty())
result = op.emitError("not yet implemented: privatization of "
"structures in omp.target operation");
}
}
checkThreadLimit(op, result);
Expand Down Expand Up @@ -3810,6 +3806,43 @@ createDeviceArgumentAccessor(MapInfoData &mapData, llvm::Argument &arg,
return builder.saveIP();
}

/// Return the llvm::Value * corresponding to the `privateVar` that
/// is being privatized. It isn't always as simple as looking up
/// moduleTranslation with privateVar. For instance, in case of
/// an allocatable, the descriptor for the allocatable is privatized.
/// This descriptor is mapped using an MapInfoOp. So, this function
/// will return a pointer to the llvm::Value corresponding to the
/// block argument for the mapped descriptor.
static llvm::Value *
findHostAssociatedValue(Value privateVar, omp::TargetOp targetOp,
llvm::DenseMap<Value, int> &mappedPrivateVars,
llvm::IRBuilderBase &builder,
LLVM::ModuleTranslation &moduleTranslation) {
if (mappedPrivateVars.contains(privateVar)) {
int blockArgIndex = mappedPrivateVars[privateVar];
Value blockArg = targetOp.getRegion().getArgument(blockArgIndex);
Type privVarType = privateVar.getType();
Type blockArgType = blockArg.getType();
assert(isa<LLVM::LLVMPointerType>(blockArgType) &&
"A block argument corresponding to a mapped var should have "
"!llvm.ptr type");

if (privVarType == blockArg.getType())
return moduleTranslation.lookupValue(blockArg);

if (!isa<LLVM::LLVMPointerType>(privVarType)) {
// This typically happens when the privatized type is lowered from
// boxchar<KIND> and gets lowered to !llvm.struct<(ptr, i64)>. That is the
// struct/pair is passed by value. But, mapped values are passed only as
// pointers, so before we privatize, we must load the pointer.
llvm::Value *load =
builder.CreateLoad(moduleTranslation.convertType(privVarType),
moduleTranslation.lookupValue(blockArg));
return load;
}
}
return moduleTranslation.lookupValue(privateVar);
}
static LogicalResult
convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
LLVM::ModuleTranslation &moduleTranslation) {
Expand All @@ -3821,6 +3854,19 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
bool isTargetDevice = ompBuilder->Config.isTargetDevice();
auto parentFn = opInst.getParentOfType<LLVM::LLVMFuncOp>();
auto &targetRegion = targetOp.getRegion();
// Holds the private vars that have been mapped along with
// the block argument that corresponds to the MapInfoOp
// corresponding to the private var in question.
// So, for instance
//
// %10 = omp.map.info var_ptr(%6#0 : !fir.ref<!fir.box<!fir.heap<i32>>>, ..)
// omp.target map_entries(%10 -> %arg0) private(@box.privatizer %6#0-> %arg1)
//
// Then, %10 has been created so that the descriptor can be used by the
// privatizer
// @box.privatizer on the device side. Here we'd record {%6#0, 0} in the
// mappedPrivateVars map.
llvm::DenseMap<Value, int> mappedPrivateVars;
DataLayout dl = DataLayout(opInst.getParentOfType<ModuleOp>());
SmallVector<Value> mapVars = targetOp.getMapVars();
ArrayRef<BlockArgument> mapBlockArgs =
Expand All @@ -3832,6 +3878,55 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
bool isOffloadEntry =
isTargetDevice || !ompBuilder->Config.TargetTriples.empty();

// For some private variables, the MapsForPrivatizedVariablesPass
// creates MapInfoOp instances. Go through the private variables and
// the mapped variables so that during codegeneration we are able
// to quickly look up the corresponding map variable, if any for each
// private variable.
if (!targetOp.getPrivateVars().empty() && !targetOp.getMapVars().empty()) {
auto argIface = llvm::cast<omp::BlockArgOpenMPOpInterface>(*targetOp);
OperandRange privateVars = targetOp.getPrivateVars();
std::optional<ArrayAttr> privateSyms = targetOp.getPrivateSyms();
std::optional<DenseI64ArrayAttr> privateMapIndices =
targetOp.getPrivateMapsAttr();

for (auto [privVarIdx, privVarSymPair] :
llvm::enumerate(llvm::zip_equal(privateVars, *privateSyms))) {
auto privVar = std::get<0>(privVarSymPair);
auto privSym = std::get<1>(privVarSymPair);

SymbolRefAttr privatizerName = llvm::cast<SymbolRefAttr>(privSym);
omp::PrivateClauseOp privatizer =
findPrivatizer(targetOp, privatizerName);

if (!privatizer.needsMap())
continue;

mlir::Value mappedValue =
targetOp.getMappedValueForPrivateVar(privVarIdx);
assert(mappedValue);

// The MapInfoOp defining the map var isn't really needed later.
// So, we don't store it in any datastructure. Instead, we just
// do some sanity checks on it right now.
auto mapInfoOp = mappedValue.getDefiningOp<omp::MapInfoOp>();
Type varType = mapInfoOp.getVarType();

// Check #1: Check that the type of the private variable matches
// the type of the variable being mapped.
if (!isa<LLVM::LLVMPointerType>(privVar.getType()))
assert(
varType == privVar.getType() &&
"Type of private var doesn't match the type of the mapped value");

// Ok, only 1 sanity check for now.
// Record the index of the block argument corresponding to this
// mapvar.
mappedPrivateVars.insert({privVar, argIface.getMapBlockArgsStart() +
(*privateMapIndices)[privVarIdx]});
}
}

using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP)
-> llvm::OpenMPIRBuilder::InsertPointOrErrorTy {
Expand All @@ -3858,9 +3953,10 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
moduleTranslation.lookupValue(mapInfoOp.getVarPtr());
moduleTranslation.mapValue(arg, mapOpValue);
}

// Do privatization after moduleTranslation has already recorded
// mapped values.
SmallVector<llvm::Value *> llvmPrivateVars;
SmallVector<Region *> privateCleanupRegions;
if (!targetOp.getPrivateVars().empty()) {
builder.restoreIP(allocaIP);

Expand All @@ -3876,11 +3972,13 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
omp::PrivateClauseOp privatizer = findPrivatizer(&opInst, privSym);
assert(privatizer.getDataSharingType() !=
omp::DataSharingClauseType::FirstPrivate &&
privatizer.getDeallocRegion().empty() &&
"unsupported privatizer");
moduleTranslation.mapValue(privatizer.getAllocMoldArg(),
moduleTranslation.lookupValue(privVar));
Region &allocRegion = privatizer.getAllocRegion();
BlockArgument allocRegionArg = allocRegion.getArgument(0);
moduleTranslation.mapValue(
allocRegionArg,
findHostAssociatedValue(privVar, targetOp, mappedPrivateVars,
builder, moduleTranslation));
SmallVector<llvm::Value *, 1> yieldedValues;
if (failed(inlineConvertOmpRegions(
allocRegion, "omp.targetop.privatizer", builder,
Expand All @@ -3889,7 +3987,12 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
"failed to inline `alloc` region of `omp.private`");
}
assert(yieldedValues.size() == 1);
moduleTranslation.mapValue(privBlockArg, yieldedValues.front());
llvm::Value *llvmReplacementValue = yieldedValues.front();
moduleTranslation.mapValue(privBlockArg, llvmReplacementValue);
if (!privatizer.getDeallocRegion().empty()) {
llvmPrivateVars.push_back(llvmReplacementValue);
privateCleanupRegions.push_back(&privatizer.getDeallocRegion());
}
moduleTranslation.forgetMapping(allocRegion);
builder.restoreIP(builder.saveIP());
}
Expand All @@ -3901,6 +4004,19 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
return exitBlock.takeError();

builder.SetInsertPoint(*exitBlock);
if (!llvmPrivateVars.empty()) {
assert(llvmPrivateVars.size() == privateCleanupRegions.size() &&
"Number of private variables needing cleanup not equal to number"
"of privatizers with dealloc regions");
if (failed(inlineOmpRegionCleanup(
privateCleanupRegions, llvmPrivateVars, moduleTranslation,
builder, "omp.targetop.private.cleanup",
/*shouldLoadCleanupRegionArg=*/false))) {
return llvm::createStringError(
"failed to inline `dealloc` region of `omp.private` "
"op in the target region");
}
}
return builder.saveIP();
};

Expand Down
80 changes: 80 additions & 0 deletions mlir/test/Target/LLVMIR/openmp-target-multiple-private.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s

llvm.func @dealloc_foo_0(!llvm.ptr)

omp.private {type = private} @box.heap_privatizer0 : !llvm.ptr alloc {
^bb0(%arg0: !llvm.ptr):
%0 = llvm.mlir.constant(1 : i32) : i32
%7 = llvm.alloca %0 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> : (i32) -> !llvm.ptr
omp.yield(%7 : !llvm.ptr)
} dealloc {
^bb0(%arg0: !llvm.ptr):
llvm.call @dealloc_foo_0(%arg0) : (!llvm.ptr) -> ()
omp.yield
}

llvm.func @alloc_foo_1(!llvm.ptr)
llvm.func @dealloc_foo_1(!llvm.ptr)

omp.private {type = private} @box.heap_privatizer1 : !llvm.ptr alloc {
^bb0(%arg0: !llvm.ptr):
%0 = llvm.mlir.constant(1 : i32) : i32
%7 = llvm.alloca %0 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> : (i32) -> !llvm.ptr
llvm.call @alloc_foo_1(%arg0) : (!llvm.ptr) -> ()
omp.yield(%7 : !llvm.ptr)
} dealloc {
^bb0(%arg0: !llvm.ptr):
llvm.call @dealloc_foo_1(%arg0) : (!llvm.ptr) -> ()
omp.yield
}

llvm.func @target_allocatable_(%arg0: !llvm.ptr {fir.bindc_name = "lb"}, %arg1: !llvm.ptr {fir.bindc_name = "ub"}, %arg2: !llvm.ptr {fir.bindc_name = "l"}) attributes {fir.internal_name = "_QPtarget_allocatable"} {
%6 = llvm.mlir.constant(1 : i64) : i64
%7 = llvm.alloca %6 x i32 {bindc_name = "mapped_var"} : (i64) -> !llvm.ptr
%13 = llvm.alloca %6 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> {bindc_name = "alloc_var0"} : (i64) -> !llvm.ptr
%14 = llvm.alloca %6 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> {bindc_name = "alloc_var1"} : (i64) -> !llvm.ptr
%53 = omp.map.info var_ptr(%7 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "mapped_var"}
%54 = omp.map.info var_ptr(%13 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>) map_clauses(to) capture(ByRef) -> !llvm.ptr
%55 = omp.map.info var_ptr(%14 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>) map_clauses(to) capture(ByRef) -> !llvm.ptr
omp.target map_entries(%53 -> %arg3, %54 -> %arg4, %55 ->%arg5 : !llvm.ptr, !llvm.ptr, !llvm.ptr) private(@box.heap_privatizer0 %13 -> %arg6 [map_idx=1], @box.heap_privatizer1 %14 -> %arg7 [map_idx=2]: !llvm.ptr, !llvm.ptr) {
llvm.call @use_private_var0(%arg6) : (!llvm.ptr) -> ()
llvm.call @use_private_var1(%arg7) : (!llvm.ptr) -> ()
omp.terminator
}
llvm.return
}


llvm.func @use_private_var0(!llvm.ptr) -> ()
llvm.func @use_private_var1(!llvm.ptr) -> ()

// The first set of checks ensure that we are calling the offloaded function
// with the right arguments, especially the second argument which needs to
// be a memory reference to the descriptor for the privatized allocatable
// CHECK: define void @target_allocatable_
// CHECK-NOT: define internal void
// CHECK: %[[DESC_ALLOC0:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8 }, i64 1
// CHECK: %[[DESC_ALLOC1:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8 }, i64 1
// CHECK: call void @__omp_offloading_[[OFFLOADED_FUNCTION:.*]](ptr {{[^,]+}},
// CHECK-SAME: ptr %[[DESC_ALLOC0]], ptr %[[DESC_ALLOC1]])

// CHECK: define internal void @__omp_offloading_[[OFFLOADED_FUNCTION]]
// CHECK-SAME: (ptr {{[^,]+}}, ptr %[[DESCRIPTOR_ARG0:[^,]+]],
// CHECK-SAME: ptr %[[DESCRIPTOR_ARG1:.*]]) {

// `var0` privatrizer `alloc`
// CHECK: %[[PRIV_DESC0:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8 }

// `var1` privatrizer `alloc`
// CHECK: %[[PRIV_DESC1:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8 }
// CHECK: call void @alloc_foo_1(ptr %[[DESCRIPTOR_ARG1]])

// target op body
// CHECK: call void @use_private_var0(ptr %[[PRIV_DESC0]]
// CHECK: call void @use_private_var1(ptr %[[PRIV_DESC1]]

// `var0` privatrizer `dealloc`
// CHECK: call void @dealloc_foo_0(ptr %[[PRIV_DESC0]])

// `var1` privatrizer `dealloc`
// CHECK: call void @dealloc_foo_1(ptr %[[PRIV_DESC1]])
64 changes: 64 additions & 0 deletions mlir/test/Target/LLVMIR/openmp-target-private-allocatable.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s

llvm.func @alloc_foo_1(!llvm.ptr)
llvm.func @dealloc_foo_1(!llvm.ptr)

omp.private {type = private} @box.heap_privatizer : !llvm.ptr alloc {
^bb0(%arg0: !llvm.ptr):
%0 = llvm.mlir.constant(1 : i32) : i32
%7 = llvm.alloca %0 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> : (i32) -> !llvm.ptr
llvm.call @alloc_foo_1(%arg0) : (!llvm.ptr) -> ()
omp.yield(%7 : !llvm.ptr)
} dealloc {
^bb0(%arg0: !llvm.ptr):
llvm.call @dealloc_foo_1(%arg0) : (!llvm.ptr) -> ()
omp.yield
}

llvm.func @target_allocatable_(%arg0: !llvm.ptr {fir.bindc_name = "lb"}, %arg1: !llvm.ptr {fir.bindc_name = "ub"}, %arg2: !llvm.ptr {fir.bindc_name = "l"}) attributes {fir.internal_name = "_QPtarget_allocatable"} {
%0 = llvm.mlir.constant(1 : i32) : i32
%1 = llvm.alloca %0 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> {alignment = 8 : i64} : (i32) -> !llvm.ptr
%3 = llvm.alloca %0 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> {alignment = 8 : i64} : (i32) -> !llvm.ptr
%4 = llvm.mlir.constant(1 : i64) : i64
%5 = llvm.alloca %4 x f32 {bindc_name = "real_var"} : (i64) -> !llvm.ptr
%7 = llvm.alloca %4 x i32 {bindc_name = "mapped_var"} : (i64) -> !llvm.ptr
%9 = llvm.alloca %4 x !llvm.struct<(f32, f32)> {bindc_name = "comp_var"} : (i64) -> !llvm.ptr
%11 = llvm.alloca %0 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> {alignment = 8 : i64} : (i32) -> !llvm.ptr
%13 = llvm.alloca %4 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> {bindc_name = "alloc_var"} : (i64) -> !llvm.ptr
%39 = llvm.load %arg2 : !llvm.ptr -> i64
%52 = llvm.alloca %39 x f32 {bindc_name = "real_arr"} : (i64) -> !llvm.ptr
%53 = omp.map.info var_ptr(%7 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "mapped_var"}
%54 = omp.map.info var_ptr(%13 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>) map_clauses(to) capture(ByRef) -> !llvm.ptr
omp.target map_entries(%53 -> %arg3, %54 -> %arg4 : !llvm.ptr, !llvm.ptr) private(@box.heap_privatizer %13 -> %arg5 [map_idx=1] : !llvm.ptr) {
llvm.call @use_private_var(%arg5) : (!llvm.ptr) -> ()
omp.terminator
}
llvm.return
}

llvm.func @use_private_var(!llvm.ptr) -> ()

llvm.func @_FortranAAssign(!llvm.ptr, !llvm.ptr, !llvm.ptr, i32) -> !llvm.struct<()> attributes {fir.runtime, sym_visibility = "private"}

// The first set of checks ensure that we are calling the offloaded function
// with the right arguments, especially the second argument which needs to
// be a memory reference to the descriptor for the privatized allocatable
// CHECK: define void @target_allocatable_
// CHECK-NOT: define internal void
// CHECK: %[[DESC_ALLOC:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8 }, i64 1
// CHECK: call void @__omp_offloading_[[OFFLOADED_FUNCTION:.*]](ptr {{[^,]+}},
// CHECK-SAME: ptr %[[DESC_ALLOC]])

// The second set of checks ensure that to allocate memory for the
// allocatable, we are, in fact, using the memory reference of the descriptor
// passed as the second argument to the offloaded function.
// CHECK: define internal void @__omp_offloading_[[OFFLOADED_FUNCTION]]
// CHECK-SAME: (ptr {{[^,]+}}, ptr %[[DESCRIPTOR_ARG:.*]]) {
// CHECK: %[[DESC_TO_DEALLOC:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8 }
// CHECK: call void @alloc_foo_1(ptr %[[DESCRIPTOR_ARG]])


// CHECK: call void @use_private_var(ptr %[[DESC_TO_DEALLOC]]

// Now, check the deallocation of the private var.
// CHECK: call void @dealloc_foo_1(ptr %[[DESC_TO_DEALLOC]])
Loading

0 comments on commit 6549daf

Please sign in to comment.