Skip to content

[flang][MLIR][OpenMP] Extend delayed privatization for scalar allocatables and pointers #84740

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Mar 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 25 additions & 11 deletions flang/lib/Lower/Bridge.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -748,7 +748,8 @@ class FirConverter : public Fortran::lower::AbstractConverter {

void copyVar(mlir::Location loc, mlir::Value dst,
mlir::Value src) override final {
copyVarHLFIR(loc, dst, src);
copyVarHLFIR(loc, Fortran::lower::SymbolBox::Intrinsic{dst},
Fortran::lower::SymbolBox::Intrinsic{src});
}

void copyHostAssociateVar(
Expand Down Expand Up @@ -1009,10 +1010,7 @@ class FirConverter : public Fortran::lower::AbstractConverter {
// `omp.private`'s `alloc` block. If this is the case, we return this
// `SymbolBox::Intrinsic` value.
if (Fortran::lower::SymbolBox v = symMap->lookupSymbol(sym))
return v.match(
[&](const Fortran::lower::SymbolBox::Intrinsic &)
-> Fortran::lower::SymbolBox { return v; },
[](const auto &) -> Fortran::lower::SymbolBox { return {}; });
return v;

return {};
}
Expand Down Expand Up @@ -1060,15 +1058,16 @@ class FirConverter : public Fortran::lower::AbstractConverter {
const Fortran::lower::SymbolBox &rhs_sb) {
mlir::Location loc = genLocation(sym.name());
if (lowerToHighLevelFIR())
copyVarHLFIR(loc, lhs_sb.getAddr(), rhs_sb.getAddr());
copyVarHLFIR(loc, lhs_sb, rhs_sb);
else
copyVarFIR(loc, sym, lhs_sb, rhs_sb);
}

void copyVarHLFIR(mlir::Location loc, mlir::Value dst, mlir::Value src) {
void copyVarHLFIR(mlir::Location loc, Fortran::lower::SymbolBox dst,
Fortran::lower::SymbolBox src) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why did you need to change the argument here and the way allocatable were tested for?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

With delayed privatization, the address that the dst box is not anymore a FortranVariableOpInterface value. It is rather a block argument to the copy region of the delayed privatizer:

omp.private {type = firstprivate} @_QF... : !fir.ref<!fir.box<!fir.heap<i32>>> alloc {
  ^bb0(%arg0: !fir.ref<!fir.box<!fir.heap<i32>>>):
  ...
  } copy {
  ^bb0(%arg0: !fir.ref<!fir.box<!fir.heap<i32>>>, %arg1: !fir.ref<!fir.box<!fir.heap<i32>>>):
  ...
  }

%arg1 in the above snippet is the dst value. So we don't have access to the fortran_attrs attributes attached to the original value anymore. However, the SymbolBox still has that information since it directly checks the type of the address.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

However, I just realized that for eager privatization, we also have to check for the FortranVariableOpInterface of the SymbolBox variant. I didn't catch the issue since all tests were green so I will add more tests for eager privatization for both allocatables and pointers in a follow-up PR. Seems like this was an oversight, I think.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done in latest commit.

assert(lowerToHighLevelFIR());
hlfir::Entity lhs{dst};
hlfir::Entity rhs{src};
hlfir::Entity lhs{dst.getAddr()};
hlfir::Entity rhs{src.getAddr()};
// Temporary_lhs is set to true in hlfir.assign below to avoid user
// assignment to be used and finalization to be called on the LHS.
// This may or may not be correct but mimics the current behaviour
Expand All @@ -1082,7 +1081,22 @@ class FirConverter : public Fortran::lower::AbstractConverter {
/*keepLhsLengthInAllocatableAssignment=*/false,
/*temporary_lhs=*/true);
};
if (lhs.isAllocatable()) {

bool isBoxAllocatable = dst.match(
[](const fir::MutableBoxValue &box) { return box.isAllocatable(); },
[](const fir::FortranVariableOpInterface &box) {
return fir::FortranVariableOpInterface(box).isAllocatable();
},
[](const auto &box) { return false; });

bool isBoxPointer = dst.match(
[](const fir::MutableBoxValue &box) { return box.isPointer(); },
[](const fir::FortranVariableOpInterface &box) {
return fir::FortranVariableOpInterface(box).isPointer();
},
[](const auto &box) { return false; });

if (isBoxAllocatable) {
// Deep copy allocatable if it is allocated.
// Note that when allocated, the RHS is already allocated with the LHS
// shape for copy on entry in createHostAssociateVarClone.
Expand All @@ -1097,7 +1111,7 @@ class FirConverter : public Fortran::lower::AbstractConverter {
copyData(lhs, rhs);
})
.end();
} else if (lhs.isPointer()) {
} else if (isBoxPointer) {
// Set LHS target to the target of RHS (do not copy the RHS
// target data into the LHS target storage).
auto loadVal = builder->create<fir::LoadOp>(loc, rhs);
Expand Down
11 changes: 8 additions & 3 deletions flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ void DataSharingProcessor::processStep2(mlir::Operation *op, bool isLoop) {
}

void DataSharingProcessor::insertDeallocs() {
// TODO Extend delayed privatization to include a `dealloc` region.
for (const Fortran::semantics::Symbol *sym : privatizedSymbols)
if (Fortran::semantics::IsAllocatable(sym->GetUltimate())) {
converter.createHostAssociateVarCloneDealloc(*sym);
Expand Down Expand Up @@ -367,6 +368,7 @@ void DataSharingProcessor::doPrivatize(const Fortran::semantics::Symbol *sym) {
symLoc, uniquePrivatizerName, symType,
isFirstPrivate ? mlir::omp::DataSharingClauseType::FirstPrivate
: mlir::omp::DataSharingClauseType::Private);
fir::ExtendedValue symExV = converter.getSymbolExtendedValue(*sym);

symTable->pushScope();

Expand All @@ -377,7 +379,8 @@ void DataSharingProcessor::doPrivatize(const Fortran::semantics::Symbol *sym) {
&allocRegion, /*insertPt=*/{}, symType, symLoc);

firOpBuilder.setInsertionPointToEnd(allocEntryBlock);
symTable->addSymbol(*sym, allocRegion.getArgument(0));
symTable->addSymbol(*sym,
fir::substBase(symExV, allocRegion.getArgument(0)));
symTable->pushScope();
cloneSymbol(sym);
firOpBuilder.create<mlir::omp::YieldOp>(
Expand All @@ -394,10 +397,12 @@ void DataSharingProcessor::doPrivatize(const Fortran::semantics::Symbol *sym) {
mlir::Block *copyEntryBlock = firOpBuilder.createBlock(
&copyRegion, /*insertPt=*/{}, {symType, symType}, {symLoc, symLoc});
firOpBuilder.setInsertionPointToEnd(copyEntryBlock);
symTable->addSymbol(*sym, copyRegion.getArgument(0),
symTable->addSymbol(*sym,
fir::substBase(symExV, copyRegion.getArgument(0)),
/*force=*/true);
symTable->pushScope();
symTable->addSymbol(*sym, copyRegion.getArgument(1));
symTable->addSymbol(*sym,
fir::substBase(symExV, copyRegion.getArgument(1)));
auto ip = firOpBuilder.saveInsertionPoint();
copyFirstPrivateSymbol(sym, &ip);

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
! Test delayed privatization for allocatables: `firstprivate`.

! RUN: %flang_fc1 -emit-hlfir -fopenmp -mmlir --openmp-enable-delayed-privatization \
! RUN: -o - %s 2>&1 | FileCheck %s
! RUN: bbc -emit-hlfir -fopenmp --openmp-enable-delayed-privatization -o - %s 2>&1 |\
! RUN: FileCheck %s

subroutine delayed_privatization_allocatable
implicit none
integer, allocatable :: var1

!$omp parallel firstprivate(var1)
var1 = 10
!$omp end parallel
end subroutine

! CHECK-LABEL: omp.private {type = firstprivate}
! CHECK-SAME: @[[PRIVATIZER_SYM:.*]] : [[TYPE:!fir.ref<!fir.box<!fir.heap<i32>>>]] alloc {

! CHECK-NEXT: ^bb0(%[[PRIV_ARG:.*]]: [[TYPE]]):

Comment on lines +18 to +21
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is allocated is not visible in the test here.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is that deferred to the private test?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Indeed, instead of just copying the check lines from the private test again, I skipped most of it here. Let me know if you prefer to repeat the checks here as well.

! CHECK: } copy {
! CHECK: ^bb0(%[[PRIV_ORIG_ARG:.*]]: [[TYPE]], %[[PRIV_PRIV_ARG:.*]]: [[TYPE]]):

! CHECK-NEXT: %[[PRIV_BASE_VAL:.*]] = fir.load %[[PRIV_PRIV_ARG]]
! CHECK-NEXT: %[[PRIV_BASE_BOX:.*]] = fir.box_addr %[[PRIV_BASE_VAL]]
! CHECK-NEXT: %[[PRIV_BASE_ADDR:.*]] = fir.convert %[[PRIV_BASE_BOX]]
! CHECK-NEXT: %[[C0:.*]] = arith.constant 0 : i64
! CHECK-NEXT: %[[COPY_COND:.*]] = arith.cmpi ne, %[[PRIV_BASE_ADDR]], %[[C0]] : i64

! CHECK-NEXT: fir.if %[[COPY_COND]] {
! CHECK-NEXT: %[[ORIG_BASE_VAL:.*]] = fir.load %[[PRIV_ORIG_ARG]]
! CHECK-NEXT: %[[ORIG_BASE_ADDR:.*]] = fir.box_addr %[[ORIG_BASE_VAL]]
! CHECK-NEXT: %[[ORIG_BASE_LD:.*]] = fir.load %[[ORIG_BASE_ADDR]]
! CHECK-NEXT: hlfir.assign %[[ORIG_BASE_LD]] to %[[PRIV_BASE_BOX]] temporary_lhs
! CHECK-NEXT: }
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
! Test delayed privatization for allocatables: `private`.

! RUN: %flang_fc1 -emit-hlfir -fopenmp -mmlir --openmp-enable-delayed-privatization \
! RUN: -o - %s 2>&1 | FileCheck %s
! RUN: bbc -emit-hlfir -fopenmp --openmp-enable-delayed-privatization -o - %s 2>&1 |\
! RUN: FileCheck %s

subroutine delayed_privatization_allocatable
implicit none
integer, allocatable :: var1

!$omp parallel private(var1)
var1 = 10
!$omp end parallel
end subroutine

! CHECK-LABEL: omp.private {type = private}
! CHECK-SAME: @[[PRIVATIZER_SYM:.*]] : [[TYPE:!fir.ref<!fir.box<!fir.heap<i32>>>]] alloc {

! CHECK-NEXT: ^bb0(%[[PRIV_ARG:.*]]: [[TYPE]]):

! CHECK-NEXT: %[[PRIV_ALLOC:.*]] = fir.alloca !fir.box<!fir.heap<i32>> {bindc_name = "var1", pinned, uniq_name = "_QFdelayed_privatization_allocatableEvar1"}

! CHECK-NEXT: %[[PRIV_ARG_VAL:.*]] = fir.load %[[PRIV_ARG]] : !fir.ref<!fir.box<!fir.heap<i32>>>
! CHECK-NEXT: %[[PRIV_ARG_BOX:.*]] = fir.box_addr %[[PRIV_ARG_VAL]] : (!fir.box<!fir.heap<i32>>) -> !fir.heap<i32>
! CHECK-NEXT: %[[PRIV_ARG_ADDR:.*]] = fir.convert %[[PRIV_ARG_BOX]] : (!fir.heap<i32>) -> i64
! CHECK-NEXT: %[[C0:.*]] = arith.constant 0 : i64
! CHECK-NEXT: %[[ALLOC_COND:.*]] = arith.cmpi ne, %[[PRIV_ARG_ADDR]], %[[C0]] : i64

! CHECK-NEXT: fir.if %[[ALLOC_COND]] {
! CHECK-NEXT: %[[PRIV_ALLOCMEM:.*]] = fir.allocmem i32 {fir.must_be_heap = true, uniq_name = "_QFdelayed_privatization_allocatableEvar1.alloc"}
! CHECK-NEXT: %[[PRIV_ALLOCMEM_BOX:.*]] = fir.embox %[[PRIV_ALLOCMEM]] : (!fir.heap<i32>) -> !fir.box<!fir.heap<i32>>
! CHECK-NEXT: fir.store %[[PRIV_ALLOCMEM_BOX]] to %[[PRIV_ALLOC]] : !fir.ref<!fir.box<!fir.heap<i32>>>
! CHECK-NEXT: } else {
! CHECK-NEXT: %[[ZERO_BITS:.*]] = fir.zero_bits !fir.heap<i32>
! CHECK-NEXT: %[[ZERO_BOX:.*]] = fir.embox %[[ZERO_BITS]] : (!fir.heap<i32>) -> !fir.box<!fir.heap<i32>>
! CHECK-NEXT: fir.store %[[ZERO_BOX]] to %[[PRIV_ALLOC]] : !fir.ref<!fir.box<!fir.heap<i32>>>
! CHECK-NEXT: }

! CHECK-NEXT: %[[PRIV_DECL:.*]]:2 = hlfir.declare %[[PRIV_ALLOC]]
! CHECK-NEXT: omp.yield(%[[PRIV_DECL]]#0 : [[TYPE]])

! CHECK-NEXT: }
31 changes: 31 additions & 0 deletions flang/test/Lower/OpenMP/delayed-privatization-pointer.f90
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
! Test delayed privatization for pointers: `private`.

! RUN: %flang_fc1 -emit-hlfir -fopenmp -mmlir --openmp-enable-delayed-privatization \
! RUN: -o - %s 2>&1 | FileCheck %s
! RUN: bbc -emit-hlfir -fopenmp --openmp-enable-delayed-privatization -o - %s 2>&1 |\
! RUN: FileCheck %s

subroutine delayed_privatization_pointer
implicit none
integer, pointer :: var1

!$omp parallel firstprivate(var1)
var1 = 10
!$omp end parallel
end subroutine

! CHECK-LABEL: omp.private {type = firstprivate}
! CHECK-SAME: @[[PRIVATIZER_SYM:.*]] : [[TYPE:!fir.ref<!fir.box<!fir.ptr<i32>>>]] alloc {

! CHECK-NEXT: ^bb0(%[[PRIV_ARG:.*]]: [[TYPE]]):

! CHECK-NEXT: %[[PRIV_ALLOC:.*]] = fir.alloca !fir.box<!fir.ptr<i32>> {bindc_name = "var1", pinned, uniq_name = "_QFdelayed_privatization_pointerEvar1"}
! CHECK-NEXT: %[[PRIV_DECL:.*]]:2 = hlfir.declare %[[PRIV_ALLOC]]
! CHECK-NEXT: omp.yield(%[[PRIV_DECL]]#0 : [[TYPE]])

! CHECK-NEXT: } copy {
! CHECK: ^bb0(%[[PRIV_ORIG_ARG:.*]]: [[TYPE]], %[[PRIV_PRIV_ARG:.*]]: [[TYPE]]):
! CHECK-NEXT: %[[ORIG_BASE_VAL:.*]] = fir.load %[[PRIV_ORIG_ARG]]
! CHECK-NEXT: fir.store %[[ORIG_BASE_VAL]] to %[[PRIV_PRIV_ARG]] : !fir.ref<!fir.box<!fir.ptr<i32>>>
! CHECK-NEXT: omp.yield(%[[PRIV_PRIV_ARG]] : [[TYPE]])
! CHECK-NEXT: }