Skip to content

[mlir][OpenMP] - MLIR to LLVMIR translation support for delayed privatization of allocatables in omp.target ops #116576

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Dec 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 1 addition & 8 deletions flang/lib/Optimizer/OpenMP/MapsForPrivatizedSymbols.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,13 +49,6 @@ class MapsForPrivatizedSymbolsPass
: public flangomp::impl::MapsForPrivatizedSymbolsPassBase<
MapsForPrivatizedSymbolsPass> {

bool privatizerNeedsMap(omp::PrivateClauseOp &privatizer) {
Region &allocRegion = privatizer.getAllocRegion();
Value blockArg0 = allocRegion.getArgument(0);
if (blockArg0.use_empty())
return false;
return true;
}
omp::MapInfoOp createMapInfo(Location loc, Value var,
fir::FirOpBuilder &builder) {
uint64_t mapTypeTo = static_cast<
Expand Down Expand Up @@ -134,7 +127,7 @@ class MapsForPrivatizedSymbolsPass
omp::PrivateClauseOp privatizer =
SymbolTable::lookupNearestSymbolFrom<omp::PrivateClauseOp>(
targetOp, privatizerName);
if (!privatizerNeedsMap(privatizer)) {
if (!privatizer.needsMap()) {
privVarMapIdx.push_back(-1);
continue;
}
Expand Down
7 changes: 5 additions & 2 deletions llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6821,8 +6821,11 @@ static Expected<Function *> createOutlinedFunction(
OMPBuilder.ConstantAllocaRaiseCandidates.emplace_back(Func);

// Insert target deinit call in the device compilation pass.
llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
CBFunc(Builder.saveIP(), Builder.saveIP());
BasicBlock *OutlinedBodyBB =
splitBB(Builder, /*CreateBranch=*/true, "outlined.body");
llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = CBFunc(
Builder.saveIP(),
OpenMPIRBuilder::InsertPointTy(OutlinedBodyBB, OutlinedBodyBB->begin()));
if (!AfterIP)
return AfterIP.takeError();
Builder.restoreIP(*AfterIP);
Expand Down
17 changes: 15 additions & 2 deletions llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6358,7 +6358,13 @@ TEST_F(OpenMPIRBuilderTest, TargetRegionDevice) {
auto *Load2 = Load1->getNextNode();
EXPECT_TRUE(isa<LoadInst>(Load2));

auto *Value1 = Load2->getNextNode();
auto *OutlinedBlockBr = Load2->getNextNode();
EXPECT_TRUE(isa<BranchInst>(OutlinedBlockBr));

auto *OutlinedBlock = OutlinedBlockBr->getSuccessor(0);
EXPECT_EQ(OutlinedBlock->getName(), "outlined.body");

auto *Value1 = OutlinedBlock->getFirstNonPHI();
EXPECT_EQ(Value1, Value);
EXPECT_EQ(Value1->getNextNode(), TargetStore);
auto *Deinit = TargetStore->getNextNode();
Expand Down Expand Up @@ -6510,7 +6516,14 @@ TEST_F(OpenMPIRBuilderTest, ConstantAllocaRaise) {
EXPECT_EQ(UserCodeBlock->getName(), "user_code.entry");
auto *Load1 = UserCodeBlock->getFirstNonPHI();
EXPECT_TRUE(isa<LoadInst>(Load1));
auto *Load2 = Load1->getNextNode();

auto *OutlinedBlockBr = Load1->getNextNode();
EXPECT_TRUE(isa<BranchInst>(OutlinedBlockBr));

auto *OutlinedBlock = OutlinedBlockBr->getSuccessor(0);
EXPECT_EQ(OutlinedBlock->getName(), "outlined.body");

auto *Load2 = OutlinedBlock->getFirstNonPHI();
EXPECT_TRUE(isa<LoadInst>(Load2));
EXPECT_EQ(Load2, Value);
EXPECT_EQ(Load2->getNextNode(), TargetStore);
Expand Down
8 changes: 8 additions & 0 deletions mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,14 @@ def PrivateClauseOp : OpenMP_Op<"private", [IsolatedFromAbove, RecipeInterface]>
auto &region = getDeallocRegion();
return region.empty() ? nullptr : region.getArgument(0);
}

/// needsMap returns true if the value being privatized should additionally
/// be mapped to the target region using a MapInfoOp. This is most common
/// when an allocatable is privatized. In such cases, the descriptor is used
/// in privatization and needs to be mapped on to the device.
bool needsMap() {
return !getAllocMoldArg().use_empty();
}
}];

let hasRegionVerifier = 1;
Expand Down
188 changes: 143 additions & 45 deletions mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -299,10 +299,6 @@ static LogicalResult checkImplementationStatus(Operation &op) {
if (privatizer.getDataSharingType() ==
omp::DataSharingClauseType::FirstPrivate)
result = todo("firstprivate");

if (!privatizer.getDeallocRegion().empty())
result = op.emitError("not yet implemented: privatization of "
"structures in omp.target operation");
}
}
checkThreadLimit(op, result);
Expand Down Expand Up @@ -1290,6 +1286,41 @@ static LogicalResult allocAndInitializeReductionVars(
isByRef, deferredStores);
}

/// Return the llvm::Value * corresponding to the `privateVar` that
/// is being privatized. It isn't always as simple as looking up
/// moduleTranslation with privateVar. For instance, in case of
/// an allocatable, the descriptor for the allocatable is privatized.
/// This descriptor is mapped using an MapInfoOp. So, this function
/// will return a pointer to the llvm::Value corresponding to the
/// block argument for the mapped descriptor.
static llvm::Value *
findAssociatedValue(Value privateVar, llvm::IRBuilderBase &builder,
LLVM::ModuleTranslation &moduleTranslation,
llvm::DenseMap<Value, Value> *mappedPrivateVars = nullptr) {
if (mappedPrivateVars == nullptr || !mappedPrivateVars->contains(privateVar))
return moduleTranslation.lookupValue(privateVar);

Value blockArg = (*mappedPrivateVars)[privateVar];
Type privVarType = privateVar.getType();
Type blockArgType = blockArg.getType();
assert(isa<LLVM::LLVMPointerType>(blockArgType) &&
"A block argument corresponding to a mapped var should have "
"!llvm.ptr type");

if (privVarType == blockArgType)
return moduleTranslation.lookupValue(blockArg);

// This typically happens when the privatized type is lowered from
// boxchar<KIND> and gets lowered to !llvm.struct<(ptr, i64)>. That is the
// struct/pair is passed by value. But, mapped values are passed only as
// pointers, so before we privatize, we must load the pointer.
if (!isa<LLVM::LLVMPointerType>(privVarType))
return builder.CreateLoad(moduleTranslation.convertType(privVarType),
moduleTranslation.lookupValue(blockArg));

return moduleTranslation.lookupValue(privateVar);
}

/// Allocate delayed private variables. Returns the basic block which comes
/// after all of these allocations. llvm::Value * for each of these private
/// variables are populated in llvmPrivateVars.
Expand All @@ -1300,7 +1331,8 @@ allocatePrivateVars(llvm::IRBuilderBase &builder,
MutableArrayRef<omp::PrivateClauseOp> privateDecls,
MutableArrayRef<mlir::Value> mlirPrivateVars,
llvm::SmallVectorImpl<llvm::Value *> &llvmPrivateVars,
const llvm::OpenMPIRBuilder::InsertPointTy &allocaIP) {
const llvm::OpenMPIRBuilder::InsertPointTy &allocaIP,
llvm::DenseMap<Value, Value> *mappedPrivateVars = nullptr) {
llvm::IRBuilderBase::InsertPointGuard guard(builder);
// Allocate private vars
llvm::BranchInst *allocaTerminator =
Expand Down Expand Up @@ -1330,7 +1362,8 @@ allocatePrivateVars(llvm::IRBuilderBase &builder,
Region &allocRegion = privDecl.getAllocRegion();

// map allocation region block argument
llvm::Value *nonPrivateVar = moduleTranslation.lookupValue(mlirPrivVar);
llvm::Value *nonPrivateVar = findAssociatedValue(
mlirPrivVar, builder, moduleTranslation, mappedPrivateVars);
assert(nonPrivateVar);
moduleTranslation.mapValue(privDecl.getAllocMoldArg(), nonPrivateVar);

Expand All @@ -1345,6 +1378,7 @@ allocatePrivateVars(llvm::IRBuilderBase &builder,
} else {
builder.SetInsertPoint(privAllocBlock->getTerminator());
}

if (failed(inlineConvertOmpRegions(allocRegion, "omp.private.alloc",
builder, moduleTranslation, &phis)))
return llvm::createStringError(
Expand Down Expand Up @@ -3829,6 +3863,17 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
bool isTargetDevice = ompBuilder->Config.isTargetDevice();
auto parentFn = opInst.getParentOfType<LLVM::LLVMFuncOp>();
auto &targetRegion = targetOp.getRegion();
// Holds the private vars that have been mapped along with the block argument
// that corresponds to the MapInfoOp corresponding to the private var in
// question. So, for instance:
//
// %10 = omp.map.info var_ptr(%6#0 : !fir.ref<!fir.box<!fir.heap<i32>>>, ..)
// omp.target map_entries(%10 -> %arg0) private(@box.privatizer %6#0-> %arg1)
//
// Then, %10 has been created so that the descriptor can be used by the
// privatizer @box.privatizer on the device side. Here we'd record {%6#0,
// %arg0} in the mappedPrivateVars map.
llvm::DenseMap<Value, Value> mappedPrivateVars;
DataLayout dl = DataLayout(opInst.getParentOfType<ModuleOp>());
SmallVector<Value> mapVars = targetOp.getMapVars();
ArrayRef<BlockArgument> mapBlockArgs =
Expand All @@ -3840,6 +3885,57 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
bool isOffloadEntry =
isTargetDevice || !ompBuilder->Config.TargetTriples.empty();

// For some private variables, the MapsForPrivatizedVariablesPass
// creates MapInfoOp instances. Go through the private variables and
// the mapped variables so that during codegeneration we are able
// to quickly look up the corresponding map variable, if any for each
// private variable.
if (!targetOp.getPrivateVars().empty() && !targetOp.getMapVars().empty()) {
auto argIface = llvm::cast<omp::BlockArgOpenMPOpInterface>(*targetOp);
OperandRange privateVars = targetOp.getPrivateVars();
std::optional<ArrayAttr> privateSyms = targetOp.getPrivateSyms();
std::optional<DenseI64ArrayAttr> privateMapIndices =
targetOp.getPrivateMapsAttr();

for (auto [privVarIdx, privVarSymPair] :
llvm::enumerate(llvm::zip_equal(privateVars, *privateSyms))) {
auto privVar = std::get<0>(privVarSymPair);
auto privSym = std::get<1>(privVarSymPair);

SymbolRefAttr privatizerName = llvm::cast<SymbolRefAttr>(privSym);
omp::PrivateClauseOp privatizer =
findPrivatizer(targetOp, privatizerName);

if (!privatizer.needsMap())
continue;

mlir::Value mappedValue =
targetOp.getMappedValueForPrivateVar(privVarIdx);
assert(mappedValue && "Expected to find mapped value for a privatized "
"variable that needs mapping");

// The MapInfoOp defining the map var isn't really needed later.
// So, we don't store it in any datastructure. Instead, we just
// do some sanity checks on it right now.
auto mapInfoOp = mappedValue.getDefiningOp<omp::MapInfoOp>();
Type varType = mapInfoOp.getVarType();

// Check #1: Check that the type of the private variable matches
// the type of the variable being mapped.
if (!isa<LLVM::LLVMPointerType>(privVar.getType()))
assert(
varType == privVar.getType() &&
"Type of private var doesn't match the type of the mapped value");

// Ok, only 1 sanity check for now.
// Record the block argument corresponding to this mapvar.
mappedPrivateVars.insert(
{privVar,
targetRegion.getArgument(argIface.getMapBlockArgsStart() +
(*privateMapIndices)[privVarIdx])});
}
}

using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP)
-> llvm::OpenMPIRBuilder::InsertPointOrErrorTy {
Expand All @@ -3859,7 +3955,6 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
attr.isStringAttribute())
llvmOutlinedFn->addFnAttr(attr);

builder.restoreIP(codeGenIP);
for (auto [arg, mapOp] : llvm::zip_equal(mapBlockArgs, mapVars)) {
auto mapInfoOp = cast<omp::MapInfoOp>(mapOp.getDefiningOp());
llvm::Value *mapOpValue =
Expand All @@ -3869,50 +3964,52 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,

// Do privatization after moduleTranslation has already recorded
// mapped values.
if (!targetOp.getPrivateVars().empty()) {
builder.restoreIP(allocaIP);

OperandRange privateVars = targetOp.getPrivateVars();
std::optional<ArrayAttr> privateSyms = targetOp.getPrivateSyms();
MutableArrayRef<BlockArgument> privateBlockArgs =
cast<omp::BlockArgOpenMPOpInterface>(opInst).getPrivateBlockArgs();

for (auto [privVar, privatizerNameAttr, privBlockArg] :
llvm::zip_equal(privateVars, *privateSyms, privateBlockArgs)) {

SymbolRefAttr privSym = cast<SymbolRefAttr>(privatizerNameAttr);
omp::PrivateClauseOp privatizer = findPrivatizer(&opInst, privSym);
assert(privatizer.getDataSharingType() !=
omp::DataSharingClauseType::FirstPrivate &&
privatizer.getDeallocRegion().empty() &&
"unsupported privatizer");
moduleTranslation.mapValue(privatizer.getAllocMoldArg(),
moduleTranslation.lookupValue(privVar));
Region &allocRegion = privatizer.getAllocRegion();
SmallVector<llvm::Value *, 1> yieldedValues;
if (failed(inlineConvertOmpRegions(
allocRegion, "omp.targetop.privatizer", builder,
moduleTranslation, &yieldedValues))) {
return llvm::createStringError(
"failed to inline `alloc` region of `omp.private`");
}
assert(yieldedValues.size() == 1);
moduleTranslation.mapValue(privBlockArg, yieldedValues.front());
moduleTranslation.forgetMapping(allocRegion);
builder.restoreIP(builder.saveIP());
}
}
MutableArrayRef<BlockArgument> privateBlockArgs =
cast<omp::BlockArgOpenMPOpInterface>(opInst).getPrivateBlockArgs();
SmallVector<mlir::Value> mlirPrivateVars;
SmallVector<llvm::Value *> llvmPrivateVars;
SmallVector<omp::PrivateClauseOp> privateDecls;
mlirPrivateVars.reserve(privateBlockArgs.size());
llvmPrivateVars.reserve(privateBlockArgs.size());
collectPrivatizationDecls(targetOp, privateDecls);
for (mlir::Value privateVar : targetOp.getPrivateVars())
mlirPrivateVars.push_back(privateVar);

llvm::Expected<llvm::BasicBlock *> afterAllocas = allocatePrivateVars(
builder, moduleTranslation, privateBlockArgs, privateDecls,
mlirPrivateVars, llvmPrivateVars, allocaIP, &mappedPrivateVars);

if (failed(handleError(afterAllocas, *targetOp)))
return llvm::make_error<PreviouslyReportedError>();

SmallVector<Region *> privateCleanupRegions;
llvm::transform(privateDecls, std::back_inserter(privateCleanupRegions),
[](omp::PrivateClauseOp privatizer) {
return &privatizer.getDeallocRegion();
});

builder.restoreIP(codeGenIP);
llvm::Expected<llvm::BasicBlock *> exitBlock = convertOmpOpRegions(
targetRegion, "omp.target", builder, moduleTranslation);

if (!exitBlock)
return exitBlock.takeError();

builder.SetInsertPoint(*exitBlock);
return builder.saveIP();
if (!privateCleanupRegions.empty()) {
if (failed(inlineOmpRegionCleanup(
privateCleanupRegions, llvmPrivateVars, moduleTranslation,
builder, "omp.targetop.private.cleanup",
/*shouldLoadCleanupRegionArg=*/false))) {
return llvm::createStringError(
"failed to inline `dealloc` region of `omp.private` "
"op in the target region");
}
}

return InsertPointTy(exitBlock.get(), exitBlock.get()->end());
};

llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
StringRef parentName = parentFn.getName();

llvm::TargetRegionEntryInfo entryInfo;
Expand All @@ -3923,9 +4020,6 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
int32_t defaultValTeams = -1;
int32_t defaultValThreads = 0;

llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
findAllocaInsertPoint(builder, moduleTranslation);

MapInfoData mapData;
collectMapDataFromMapOperands(mapData, mapVars, moduleTranslation, dl,
builder);
Expand Down Expand Up @@ -3973,6 +4067,10 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
buildDependData(targetOp.getDependKinds(), targetOp.getDependVars(),
moduleTranslation, dds);

llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
findAllocaInsertPoint(builder, moduleTranslation);
llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);

llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
moduleTranslation.getOpenMPBuilder()->createTarget(
ompLoc, isOffloadEntry, allocaIP, builder.saveIP(), entryInfo,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,12 @@ module attributes {omp.is_target_device = true} {

// CHECK: user_code.entry: ; preds = %entry
// CHECK: %[[LOAD_BYREF:.*]] = load ptr, ptr %[[ALLOCA_BYREF]], align 8
// CHECK: br label %outlined.body

// CHECK: outlined.body:
// CHECK: br label %omp.target

// CHECK: omp.target: ; preds = %user_code.entry
// CHECK: omp.target:
// CHECK: %[[VAL_LOAD_BYCOPY:.*]] = load i32, ptr %[[ALLOCA_BYCOPY]], align 4
// CHECK: store i32 %[[VAL_LOAD_BYCOPY]], ptr %[[LOAD_BYREF]], align 4
// CHECK: br label %omp.region.cont
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ module attributes {omp.is_target_device = true} {
llvm.func @_QQmain() attributes {} {
%0 = llvm.mlir.addressof @_QMtest_0Esp : !llvm.ptr

// CHECK-DAG: omp.target: ; preds = %user_code.entry
// CHECK-DAG: omp.target: ; preds = %outlined.body
// CHECK-DAG: %[[V:.*]] = load ptr, ptr @_QMtest_0Esp_decl_tgt_ref_ptr, align 8
// CHECK-DAG: store i32 1, ptr %[[V]], align 4
// CHECK-DAG: br label %omp.region.cont
Expand Down
Loading
Loading