Skip to content

Commit

Permalink
[MLIR][OpenMP] Make omp.distribute into a loop wrapper (#87239)
Browse files Browse the repository at this point in the history
This patch updates the definition of `omp.distribute` to enforce the
restrictions of a wrapper operation.
  • Loading branch information
skatrak authored Apr 16, 2024
1 parent 58b49ce commit 70fe6ad
Show file tree
Hide file tree
Showing 4 changed files with 93 additions and 12 deletions.
22 changes: 18 additions & 4 deletions mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
Original file line number Diff line number Diff line change
Expand Up @@ -840,7 +840,8 @@ def YieldOp : OpenMP_Op<"yield",
//===----------------------------------------------------------------------===//
def DistributeOp : OpenMP_Op<"distribute", [AttrSizedOperandSegments,
DeclareOpInterfaceMethods<LoopWrapperInterface>,
RecursiveMemoryEffects]> {
RecursiveMemoryEffects,
SingleBlockImplicitTerminator<"TerminatorOp">]> {
let summary = "distribute construct";
let description = [{
The distribute construct specifies that the iterations of one or more loops
Expand All @@ -855,15 +856,28 @@ def DistributeOp : OpenMP_Op<"distribute", [AttrSizedOperandSegments,
The distribute loop construct specifies that the iterations of the loop(s)
will be executed in parallel by threads in the current context. These
iterations are spread across threads that already exist in the enclosing
region. The lower and upper bounds specify a half-open range: the
range includes the lower bound but does not include the upper bound. If the
`inclusive` attribute is specified then the upper bound is also included.
region.

The body region can contain a single block which must contain a single
operation and a terminator. The operation must be another compatible loop
wrapper or an `omp.loop_nest`.

The `dist_schedule_static` attribute specifies the schedule for this
loop, determining how the loop is distributed across the parallel threads.
The optional `schedule_chunk` associated with this determines further
controls this distribution.

```mlir
omp.distribute <clauses> {
omp.loop_nest (%i1, %i2) : index = (%c0, %c0) to (%c10, %c10) step (%c1, %c1) {
%a = load %arrA[%i1, %i2] : memref<?x?xf32>
%b = load %arrB[%i1, %i2] : memref<?x?xf32>
%sum = arith.addf %a, %b : f32
store %sum, %arrC[%i1, %i2] : memref<?x?xf32>
omp.yield
}
}
```
// TODO: private_var, firstprivate_var, lastprivate_var, collapse
}];
let arguments = (ins
Expand Down
11 changes: 11 additions & 0 deletions mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1656,6 +1656,17 @@ LogicalResult DistributeOp::verify() {
return emitError(
"expected equal sizes for allocate and allocator variables");

if (!isWrapper())
return emitOpError() << "must be a loop wrapper";

if (LoopWrapperInterface nested = getNestedWrapper()) {
// Check for the allowed leaf constructs that may appear in a composite
// construct directly after DISTRIBUTE.
if (!isa<ParallelOp, SimdLoopOp>(nested))
return emitError() << "only supported nested wrappers are 'omp.parallel' "
"and 'omp.simdloop'";
}

return success();
}

Expand Down
34 changes: 33 additions & 1 deletion mlir/test/Dialect/OpenMP/invalid.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -1866,7 +1866,16 @@ func.func @omp_target_depend(%data_var: memref<i32>) {

// -----

func.func @omp_distribute(%data_var : memref<i32>) -> () {
func.func @omp_distribute_schedule(%chunk_size : i32) -> () {
// expected-error @below {{op chunk size set without dist_schedule_static being present}}
"omp.distribute"(%chunk_size) <{operandSegmentSizes = array<i32: 1, 0, 0>}> ({
"omp.terminator"() : () -> ()
}) : (i32) -> ()
}

// -----

func.func @omp_distribute_allocate(%data_var : memref<i32>) -> () {
// expected-error @below {{expected equal sizes for allocate and allocator variables}}
"omp.distribute"(%data_var) <{operandSegmentSizes = array<i32: 0, 1, 0>}> ({
"omp.terminator"() : () -> ()
Expand All @@ -1875,6 +1884,29 @@ func.func @omp_distribute(%data_var : memref<i32>) -> () {

// -----

func.func @omp_distribute_wrapper() -> () {
// expected-error @below {{op must be a loop wrapper}}
"omp.distribute"() ({
%0 = arith.constant 0 : i32
"omp.terminator"() : () -> ()
}) : () -> ()
}

// -----

func.func @omp_distribute_nested_wrapper(%data_var : memref<i32>) -> () {
// expected-error @below {{only supported nested wrappers are 'omp.parallel' and 'omp.simdloop'}}
"omp.distribute"() ({
"omp.wsloop"() ({
%0 = arith.constant 0 : i32
"omp.terminator"() : () -> ()
}) : () -> ()
"omp.terminator"() : () -> ()
}) : () -> ()
}

// -----

omp.private {type = private} @x.privatizer : i32 alloc {
^bb0(%arg0: i32):
%0 = arith.constant 0.0 : f32
Expand Down
38 changes: 31 additions & 7 deletions mlir/test/Dialect/OpenMP/ops.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -559,30 +559,54 @@ func.func @omp_simdloop_pretty_multiple(%lb1 : index, %ub1 : index, %step1 : ind
}

// CHECK-LABEL: omp_distribute
func.func @omp_distribute(%chunk_size : i32, %data_var : memref<i32>) -> () {
func.func @omp_distribute(%chunk_size : i32, %data_var : memref<i32>, %arg0 : i32) -> () {
// CHECK: omp.distribute
"omp.distribute" () ({
omp.terminator
"omp.loop_nest" (%arg0, %arg0, %arg0) ({
^bb0(%iv: i32):
"omp.yield"() : () -> ()
}) : (i32, i32, i32) -> ()
"omp.terminator"() : () -> ()
}) {} : () -> ()
// CHECK: omp.distribute
omp.distribute {
omp.terminator
omp.loop_nest (%iv) : i32 = (%arg0) to (%arg0) step (%arg0) {
omp.yield
}
}
// CHECK: omp.distribute dist_schedule_static
omp.distribute dist_schedule_static {
omp.terminator
omp.loop_nest (%iv) : i32 = (%arg0) to (%arg0) step (%arg0) {
omp.yield
}
}
// CHECK: omp.distribute dist_schedule_static chunk_size(%{{.+}} : i32)
omp.distribute dist_schedule_static chunk_size(%chunk_size : i32) {
omp.terminator
omp.loop_nest (%iv) : i32 = (%arg0) to (%arg0) step (%arg0) {
omp.yield
}
}
// CHECK: omp.distribute order(concurrent)
omp.distribute order(concurrent) {
omp.terminator
omp.loop_nest (%iv) : i32 = (%arg0) to (%arg0) step (%arg0) {
omp.yield
}
}
// CHECK: omp.distribute allocate(%{{.+}} : memref<i32> -> %{{.+}} : memref<i32>)
omp.distribute allocate(%data_var : memref<i32> -> %data_var : memref<i32>) {
omp.terminator
omp.loop_nest (%iv) : i32 = (%arg0) to (%arg0) step (%arg0) {
omp.yield
}
}
// CHECK: omp.distribute
omp.distribute {
// TODO Remove induction variables from omp.simdloop.
omp.simdloop for (%iv) : i32 = (%arg0) to (%arg0) step (%arg0) {
omp.loop_nest (%iv2) : i32 = (%arg0) to (%arg0) step (%arg0) {
omp.yield
}
omp.yield
}
}
return
}
Expand Down

0 comments on commit 70fe6ad

Please sign in to comment.