From 99c76085fe77cbb286237e980ab7a642ca31a7c9 Mon Sep 17 00:00:00 2001 From: ergawy Date: Mon, 10 Mar 2025 23:47:04 -0500 Subject: [PATCH 1/4] [flang][hlfir] Add MLIR op for `do concurrent` Adds new MLIR ops to model `do concurrent`. In order to make `do concurrent` representation self-contained, a loop is modeled using 2 ops, one wrapper and one that contains the actual body of the loop. For example, a 2D `do concurrent` loop is modeled as follows: ```mlir hlfir.do_concurrent { %i = fir.alloca i32 %j = fir.alloca i32 hlfir.do_concurrent.loop (%i_iv, %j_iv) = (%i_lb, %j_lb) to (%i_ub, %j_ub) step (%i_st, %j_st) { %0 = fir.convert %i_iv : (index) -> i32 fir.store %0 to %i : !fir.ref %1 = fir.convert %j_iv : (index) -> i32 fir.store %1 to %j : !fir.ref } } ``` The `hlfir.do_concurrent` wrapper op encapsulates both the actual loop and the allocations required for the iteration variables. The `hlfir.do_concurrent.loop` op is a multi-dimensional op that contains the loop control and body. See the ops' docs for more info. --- .../include/flang/Optimizer/HLFIR/HLFIROps.td | 116 +++++++++++++ flang/lib/Optimizer/HLFIR/IR/HLFIROps.cpp | 163 ++++++++++++++++++ flang/test/HLFIR/do_concurrent.fir | 92 ++++++++++ flang/test/HLFIR/invalid.fir | 95 ++++++++++ 4 files changed, 466 insertions(+) create mode 100644 flang/test/HLFIR/do_concurrent.fir diff --git a/flang/include/flang/Optimizer/HLFIR/HLFIROps.td b/flang/include/flang/Optimizer/HLFIR/HLFIROps.td index f69930d5b53b3..089c67af5d313 100644 --- a/flang/include/flang/Optimizer/HLFIR/HLFIROps.td +++ b/flang/include/flang/Optimizer/HLFIR/HLFIROps.td @@ -21,6 +21,7 @@ include "flang/Optimizer/Dialect/FIRAttr.td" include "flang/Optimizer/Dialect/FortranVariableInterface.td" include "mlir/Dialect/Arith/IR/ArithBase.td" include "mlir/Dialect/Arith/IR/ArithOpsInterfaces.td" +include "mlir/Dialect/LLVMIR/LLVMAttrDefs.td" include "mlir/IR/BuiltinAttributes.td" // Base class for FIR operations. @@ -1863,5 +1864,120 @@ def hlfir_EvaluateInMemoryOp : hlfir_Op<"eval_in_mem", [AttrSizedOperandSegments let hasVerifier = 1; } +def hlfir_DoConcurrentOp : hlfir_Op<"do_concurrent", [SingleBlock]> { + let summary = "do concurrent loop wrapper"; + + let description = [{ + A wrapper operation for the actual op modeling `do concurrent` loops: + `hlfir.do_concurrent.loop` (see op declaration below for more info about it). + + The `hlfir.do_concurrent` wrapper op consists of one single-block region with + the following properties: + - The first ops in the region are responsible for allocating storage for the + loop's iteration variables. This is property is **not** enforced by the op + verifier, but expected to be respected when building the op. + - The terminator of the region is an instance of `hlfir.do_concurrent.loop`. + + For example, a 2D loop nest would be represented as follows: + ``` + hlfir.do_concurrent { + %i = fir.alloca i32 + %j = fir.alloca i32 + hlfir.do_concurrent.loop ... + } + ``` + }]; + + let regions = (region SizedRegion<1>:$region); + + let assemblyFormat = "$region attr-dict"; + let hasVerifier = 1; +} + +def hlfir_DoConcurrentLoopOp : hlfir_Op<"do_concurrent.loop", + [AttrSizedOperandSegments, DeclareOpInterfaceMethods, + Terminator, NoTerminator, SingleBlock, ParentOneOf<["DoConcurrentOp"]>]> { + let summary = "do concurrent loop"; + + let description = [{ + An operation that models a Fortran `do concurrent` loop's header and block. + This is a single-region single-block terminator op that is expected to + terminate the region of a `omp.do_concurrent` wrapper op. + + This op borrows from both `scf.parallel` and `fir.do_loop` ops. Similar to + `scf.parallel`, a loop nest takes 3 groups of SSA values as operands that + represent the lower bounds, upper bounds, and steps. Similar to `fir.do_loop` + the op takes one additional group of SSA values to represent reductions. + + The body region **does not** have a terminator. + + For example, a 2D loop nest with 2 reductions (sum and max) would be + represented as follows: + ``` + // The wrapper of the loop + hlfir.do_concurrent { + %i = fir.alloca i32 + %j = fir.alloca i32 + + // The actual `do concurrent` loop + hlfir.do_concurrent.loop + (%i_iv, %j_iv) = (%i_lb, %j_lb) to (%i_ub, %j_ub) step (%i_st, %j_st) + reduce(#fir.reduce_attr -> %sum : !fir.ref, + #fir.reduce_attr -> %max : !fir.ref) { + + %0 = fir.convert %i_iv : (index) -> i32 + fir.store %0 to %i : !fir.ref + + %1 = fir.convert %j_iv : (index) -> i32 + fir.store %1 to %j : !fir.ref + + // ... loop body goes here ... + } + } + ``` + + Description of arguments: + - `lowerBound`: The group of SSA values for the nest's lower bounds. + - `upperBound`: The group of SSA values for the nest's upper bounds. + - `step`: The group of SSA values for the nest's steps. + - `reduceOperands`: The reduction SSA values, if any. + - `reduceAttrs`: Attributes to store reduction operations, if any. + - `loopAnnotation`: Loop metadata to be passed down the compiler pipeline to + LLVM. + }]; + + let arguments = (ins + Variadic:$lowerBound, + Variadic:$upperBound, + Variadic:$step, + Variadic:$reduceOperands, + OptionalAttr:$reduceAttrs, + OptionalAttr:$loopAnnotation + ); + + let regions = (region SizedRegion<1>:$region); + + let hasCustomAssemblyFormat = 1; + let hasVerifier = 1; + + let extraClassDeclaration = [{ + /// Get Number of variadic operands + unsigned getNumOperands(unsigned segmentIdx) { + auto segments = (*this)->getAttrOfType( + getOperandSegmentSizeAttr()); + return static_cast(segments[segmentIdx]); + } + + // Get Number of reduction operands + unsigned getNumReduceOperands() { + return getNumOperands(3); + } + + /// Does the operation hold operands for reduction variables + bool hasReduceOperands() { + return getNumReduceOperands() > 0; + } + }]; +} #endif // FORTRAN_DIALECT_HLFIR_OPS diff --git a/flang/lib/Optimizer/HLFIR/IR/HLFIROps.cpp b/flang/lib/Optimizer/HLFIR/IR/HLFIROps.cpp index 8851a3a7187b9..c4e62df655c31 100644 --- a/flang/lib/Optimizer/HLFIR/IR/HLFIROps.cpp +++ b/flang/lib/Optimizer/HLFIR/IR/HLFIROps.cpp @@ -12,6 +12,7 @@ #include "flang/Optimizer/HLFIR/HLFIROps.h" +#include "flang/Optimizer/Dialect/FIRAttr.h" #include "flang/Optimizer/Dialect/FIROpsSupport.h" #include "flang/Optimizer/Dialect/FIRType.h" #include "flang/Optimizer/Dialect/Support/FIRContext.h" @@ -2246,6 +2247,168 @@ llvm::LogicalResult hlfir::EvaluateInMemoryOp::verify() { return mlir::success(); } +//===----------------------------------------------------------------------===// +// DoConcurrentOp +//===----------------------------------------------------------------------===// + +llvm::LogicalResult hlfir::DoConcurrentOp::verify() { + mlir::Block *body = getBody(); + + if (body->empty()) + return emitOpError("body cannot be empty"); + + if (!body->mightHaveTerminator() || + !mlir::isa(body->getTerminator())) + return emitOpError("must be terminated by 'hlfir.do_concurrent.loop'"); + + return mlir::success(); +} + +//===----------------------------------------------------------------------===// +// DoConcurrentLoopOp +//===----------------------------------------------------------------------===// + +mlir::ParseResult +hlfir::DoConcurrentLoopOp::parse(mlir::OpAsmParser &parser, + mlir::OperationState &result) { + auto &builder = parser.getBuilder(); + // Parse an opening `(` followed by induction variables followed by `)` + llvm::SmallVector ivs; + if (parser.parseArgumentList(ivs, mlir::OpAsmParser::Delimiter::Paren)) + return mlir::failure(); + + // Parse loop bounds. + llvm::SmallVector lower; + if (parser.parseEqual() || + parser.parseOperandList(lower, ivs.size(), + mlir::OpAsmParser::Delimiter::Paren) || + parser.resolveOperands(lower, builder.getIndexType(), result.operands)) + return mlir::failure(); + + llvm::SmallVector upper; + if (parser.parseKeyword("to") || + parser.parseOperandList(upper, ivs.size(), + mlir::OpAsmParser::Delimiter::Paren) || + parser.resolveOperands(upper, builder.getIndexType(), result.operands)) + return mlir::failure(); + + // Parse step values. + llvm::SmallVector steps; + if (parser.parseKeyword("step") || + parser.parseOperandList(steps, ivs.size(), + mlir::OpAsmParser::Delimiter::Paren) || + parser.resolveOperands(steps, builder.getIndexType(), result.operands)) + return mlir::failure(); + + llvm::SmallVector reduceOperands; + llvm::SmallVector reduceArgTypes; + if (succeeded(parser.parseOptionalKeyword("reduce"))) { + // Parse reduction attributes and variables. + llvm::SmallVector attributes; + if (failed(parser.parseCommaSeparatedList( + mlir::AsmParser::Delimiter::Paren, [&]() { + if (parser.parseAttribute(attributes.emplace_back()) || + parser.parseArrow() || + parser.parseOperand(reduceOperands.emplace_back()) || + parser.parseColonType(reduceArgTypes.emplace_back())) + return mlir::failure(); + return mlir::success(); + }))) + return mlir::failure(); + // Resolve input operands. + for (auto operand_type : llvm::zip(reduceOperands, reduceArgTypes)) + if (parser.resolveOperand(std::get<0>(operand_type), + std::get<1>(operand_type), result.operands)) + return mlir::failure(); + llvm::SmallVector arrayAttr(attributes.begin(), + attributes.end()); + result.addAttribute(getReduceAttrsAttrName(result.name), + builder.getArrayAttr(arrayAttr)); + } + + // Now parse the body. + mlir::Region *body = result.addRegion(); + for (auto &iv : ivs) + iv.type = builder.getIndexType(); + if (parser.parseRegion(*body, ivs)) + return mlir::failure(); + + // Set `operandSegmentSizes` attribute. + result.addAttribute(DoConcurrentLoopOp::getOperandSegmentSizeAttr(), + builder.getDenseI32ArrayAttr( + {static_cast(lower.size()), + static_cast(upper.size()), + static_cast(steps.size()), + static_cast(reduceOperands.size())})); + + // Parse attributes. + if (parser.parseOptionalAttrDict(result.attributes)) + return mlir::failure(); + + return mlir::success(); +} + +void hlfir::DoConcurrentLoopOp::print(mlir::OpAsmPrinter &p) { + p << " (" << getBody()->getArguments() << ") = (" << getLowerBound() + << ") to (" << getUpperBound() << ") step (" << getStep() << ")"; + + if (hasReduceOperands()) { + p << " reduce("; + auto attrs = getReduceAttrsAttr(); + auto operands = getReduceOperands(); + llvm::interleaveComma(llvm::zip(attrs, operands), p, [&](auto it) { + p << std::get<0>(it) << " -> " << std::get<1>(it) << " : " + << std::get<1>(it).getType(); + }); + p << ')'; + } + + p << ' '; + p.printRegion(getRegion(), /*printEntryBlockArgs=*/false); + p.printOptionalAttrDict( + (*this)->getAttrs(), + /*elidedAttrs=*/{DoConcurrentLoopOp::getOperandSegmentSizeAttr(), + DoConcurrentLoopOp::getReduceAttrsAttrName()}); +} + +llvm::SmallVector hlfir::DoConcurrentLoopOp::getLoopRegions() { + return {&getRegion()}; +} + +llvm::LogicalResult hlfir::DoConcurrentLoopOp::verify() { + mlir::Operation::operand_range lbValues = getLowerBound(); + mlir::Operation::operand_range ubValues = getUpperBound(); + mlir::Operation::operand_range stepValues = getStep(); + + if (lbValues.empty()) + return emitOpError( + "needs at least one tuple element for lowerBound, upperBound and step"); + + if (lbValues.size() != ubValues.size() || + ubValues.size() != stepValues.size()) + return emitOpError("different number of tuple elements for lowerBound, " + "upperBound or step"); + + // Check that the body defines the same number of block arguments as the + // number of tuple elements in step. + mlir::Block *body = getBody(); + if (body->getNumArguments() != stepValues.size()) + return emitOpError() << "expects the same number of induction variables: " + << body->getNumArguments() + << " as bound and step values: " << stepValues.size(); + for (auto arg : body->getArguments()) + if (!arg.getType().isIndex()) + return emitOpError( + "expects arguments for the induction variable to be of index type"); + + auto reduceAttrs = getReduceAttrsAttr(); + if (getNumReduceOperands() != (reduceAttrs ? reduceAttrs.size() : 0)) + return emitOpError( + "mismatch in number of reduction variables and reduction attributes"); + + return mlir::success(); +} + #include "flang/Optimizer/HLFIR/HLFIROpInterfaces.cpp.inc" #define GET_OP_CLASSES #include "flang/Optimizer/HLFIR/HLFIREnums.cpp.inc" diff --git a/flang/test/HLFIR/do_concurrent.fir b/flang/test/HLFIR/do_concurrent.fir new file mode 100644 index 0000000000000..aef9db2236a57 --- /dev/null +++ b/flang/test/HLFIR/do_concurrent.fir @@ -0,0 +1,92 @@ +// Test hlfir.do_concurrent operation parse, verify (no errors), and unparse + +// RUN: fir-opt %s | fir-opt | FileCheck %s + +func.func @dc_1d(%i_lb: index, %i_ub: index, %i_st: index) { + hlfir.do_concurrent { + %i = fir.alloca i32 + hlfir.do_concurrent.loop (%i_iv) = (%i_lb) to (%i_ub) step (%i_st) { + %0 = fir.convert %i_iv : (index) -> i32 + fir.store %0 to %i : !fir.ref + } + } + return +} + +// CHECK-LABEL: func.func @dc_1d +// CHECK-SAME: (%[[I_LB:.*]]: index, %[[I_UB:.*]]: index, %[[I_ST:.*]]: index) +// CHECK: hlfir.do_concurrent { +// CHECK: %[[I:.*]] = fir.alloca i32 +// CHECK: hlfir.do_concurrent.loop (%[[I_IV:.*]]) = (%[[I_LB]]) to (%[[I_UB]]) step (%[[I_ST]]) { +// CHECK: %[[I_IV_CVT:.*]] = fir.convert %[[I_IV]] : (index) -> i32 +// CHECK: fir.store %[[I_IV_CVT]] to %[[I]] : !fir.ref +// CHECK: } +// CHECK: } + +func.func @dc_2d(%i_lb: index, %i_ub: index, %i_st: index, + %j_lb: index, %j_ub: index, %j_st: index) { + hlfir.do_concurrent { + %i = fir.alloca i32 + %j = fir.alloca i32 + hlfir.do_concurrent.loop + (%i_iv, %j_iv) = (%i_lb, %j_lb) to (%i_ub, %j_ub) step (%i_st, %j_st) { + %0 = fir.convert %i_iv : (index) -> i32 + fir.store %0 to %i : !fir.ref + + %1 = fir.convert %j_iv : (index) -> i32 + fir.store %1 to %j : !fir.ref + } + } + return +} + +// CHECK-LABEL: func.func @dc_2d +// CHECK-SAME: (%[[I_LB:.*]]: index, %[[I_UB:.*]]: index, %[[I_ST:.*]]: index, %[[J_LB:.*]]: index, %[[J_UB:.*]]: index, %[[J_ST:.*]]: index) +// CHECK: hlfir.do_concurrent { +// CHECK: %[[I:.*]] = fir.alloca i32 +// CHECK: %[[J:.*]] = fir.alloca i32 +// CHECK: hlfir.do_concurrent.loop +// CHECK-SAME: (%[[I_IV:.*]], %[[J_IV:.*]]) = (%[[I_LB]], %[[J_LB]]) to (%[[I_UB]], %[[J_UB]]) step (%[[I_ST]], %[[J_ST]]) { +// CHECK: %[[I_IV_CVT:.*]] = fir.convert %[[I_IV]] : (index) -> i32 +// CHECK: fir.store %[[I_IV_CVT]] to %[[I]] : !fir.ref +// CHECK: %[[J_IV_CVT:.*]] = fir.convert %[[J_IV]] : (index) -> i32 +// CHECK: fir.store %[[J_IV_CVT]] to %[[J]] : !fir.ref +// CHECK: } +// CHECK: } + +func.func @dc_2d_reduction(%i_lb: index, %i_ub: index, %i_st: index, + %j_lb: index, %j_ub: index, %j_st: index) { + %sum = fir.alloca i32 + + hlfir.do_concurrent { + %i = fir.alloca i32 + %j = fir.alloca i32 + hlfir.do_concurrent.loop + (%i_iv, %j_iv) = (%i_lb, %j_lb) to (%i_ub, %j_ub) step (%i_st, %j_st) + reduce(#fir.reduce_attr -> %sum : !fir.ref) { + %0 = fir.convert %i_iv : (index) -> i32 + fir.store %0 to %i : !fir.ref + + %1 = fir.convert %j_iv : (index) -> i32 + fir.store %1 to %j : !fir.ref + } + } + return +} + +// CHECK-LABEL: func.func @dc_2d_reduction +// CHECK-SAME: (%[[I_LB:.*]]: index, %[[I_UB:.*]]: index, %[[I_ST:.*]]: index, %[[J_LB:.*]]: index, %[[J_UB:.*]]: index, %[[J_ST:.*]]: index) + +// CHECK: %[[SUM:.*]] = fir.alloca i32 + +// CHECK: hlfir.do_concurrent { +// CHECK: %[[I:.*]] = fir.alloca i32 +// CHECK: %[[J:.*]] = fir.alloca i32 +// CHECK: hlfir.do_concurrent.loop +// CHECK-SAME: (%[[I_IV:.*]], %[[J_IV:.*]]) = (%[[I_LB]], %[[J_LB]]) to (%[[I_UB]], %[[J_UB]]) step (%[[I_ST]], %[[J_ST]]) reduce(#fir.reduce_attr -> %[[SUM]] : !fir.ref) { +// CHECK: %[[I_IV_CVT:.*]] = fir.convert %[[I_IV]] : (index) -> i32 +// CHECK: fir.store %[[I_IV_CVT]] to %[[I]] : !fir.ref +// CHECK: %[[J_IV_CVT:.*]] = fir.convert %[[J_IV]] : (index) -> i32 +// CHECK: fir.store %[[J_IV_CVT]] to %[[J]] : !fir.ref +// CHECK: } +// CHECK: } diff --git a/flang/test/HLFIR/invalid.fir b/flang/test/HLFIR/invalid.fir index d61efe0062e69..e14284f916bd9 100644 --- a/flang/test/HLFIR/invalid.fir +++ b/flang/test/HLFIR/invalid.fir @@ -1555,3 +1555,98 @@ func.func @bad_reshape(%arg0: !hlfir.expr<1x!fir.char<1,2>>, %arg1: !hlfir.expr< %0 = hlfir.reshape %arg0 %arg1 pad %arg2 : (!hlfir.expr<1x!fir.char<1,2>>, !hlfir.expr<1xi32>, !hlfir.expr<1x!fir.char<2,?>>) -> !hlfir.expr> return } + +// ----- + +func.func @empty_dc_wrapper_body() { + // expected-error@+1 {{'hlfir.do_concurrent' op expects a non-empty block}} + hlfir.do_concurrent { + } + return +} + +// ----- + +func.func @dc_wrong_terminator() { + // expected-error@+1 {{'hlfir.do_concurrent' op must be terminated by 'hlfir.do_concurrent.loop'}} + hlfir.do_concurrent { + llvm.return + } + return +} + +// ----- + +func.func @dc_0d() { + // expected-error@+2 {{'hlfir.do_concurrent.loop' op needs at least one tuple element for lowerBound, upperBound and step}} + hlfir.do_concurrent { + hlfir.do_concurrent.loop () = () to () step () { + %tmp = fir.alloca i32 + } + } + return +} + +// ----- + +func.func @dc_invalid_parent(%arg0: index, %arg1: index) { + // expected-error@+1 {{'hlfir.do_concurrent.loop' op expects parent op 'hlfir.do_concurrent'}} + "hlfir.do_concurrent.loop"(%arg0, %arg1) <{operandSegmentSizes = array}> ({ + ^bb0(%arg2: index): + %tmp = "fir.alloca"() <{in_type = i32, operandSegmentSizes = array}> : () -> !fir.ref + }) : (index, index) -> () + return +} + +// ----- + +func.func @dc_invalid_control(%arg0: index, %arg1: index) { + // expected-error@+2 {{'hlfir.do_concurrent.loop' op different number of tuple elements for lowerBound, upperBound or step}} + hlfir.do_concurrent { + "hlfir.do_concurrent.loop"(%arg0, %arg1) <{operandSegmentSizes = array}> ({ + ^bb0(%arg2: index): + %tmp = "fir.alloca"() <{in_type = i32, operandSegmentSizes = array}> : () -> !fir.ref + }) : (index, index) -> () + } + return +} + +// ----- + +func.func @dc_invalid_ind_var(%arg0: index, %arg1: index) { + // expected-error@+2 {{'hlfir.do_concurrent.loop' op expects the same number of induction variables: 2 as bound and step values: 1}} + hlfir.do_concurrent { + "hlfir.do_concurrent.loop"(%arg0, %arg1, %arg0) <{operandSegmentSizes = array}> ({ + ^bb0(%arg3: index, %arg4: index): + %tmp = "fir.alloca"() <{in_type = i32, operandSegmentSizes = array}> : () -> !fir.ref + }) : (index, index, index) -> () + } + return +} + +// ----- + +func.func @dc_invalid_ind_var_type(%arg0: index, %arg1: index) { + // expected-error@+2 {{'hlfir.do_concurrent.loop' op expects arguments for the induction variable to be of index type}} + hlfir.do_concurrent { + "hlfir.do_concurrent.loop"(%arg0, %arg1, %arg0) <{operandSegmentSizes = array}> ({ + ^bb0(%arg3: i32): + %tmp = "fir.alloca"() <{in_type = i32, operandSegmentSizes = array}> : () -> !fir.ref + }) : (index, index, index) -> () + } + return +} + +// ----- + +func.func @dc_invalid_reduction(%arg0: index, %arg1: index) { + %sum = fir.alloca i32 + // expected-error@+2 {{'hlfir.do_concurrent.loop' op mismatch in number of reduction variables and reduction attributes}} + hlfir.do_concurrent { + "hlfir.do_concurrent.loop"(%arg0, %arg1, %arg0, %sum) <{operandSegmentSizes = array}> ({ + ^bb0(%arg3: index): + %tmp = "fir.alloca"() <{in_type = i32, operandSegmentSizes = array}> : () -> !fir.ref + }) : (index, index, index, !fir.ref) -> () + } + return +} From e3db9f02fe47e7c96313636f2ea25e0d2bfdaec9 Mon Sep 17 00:00:00 2001 From: ergawy Date: Wed, 12 Mar 2025 05:49:36 -0500 Subject: [PATCH 2/4] handle review comments --- flang/include/flang/Optimizer/HLFIR/HLFIROps.td | 14 +------------- flang/lib/Optimizer/HLFIR/IR/HLFIROps.cpp | 2 +- 2 files changed, 2 insertions(+), 14 deletions(-) diff --git a/flang/include/flang/Optimizer/HLFIR/HLFIROps.td b/flang/include/flang/Optimizer/HLFIR/HLFIROps.td index 089c67af5d313..50a054a765faf 100644 --- a/flang/include/flang/Optimizer/HLFIR/HLFIROps.td +++ b/flang/include/flang/Optimizer/HLFIR/HLFIROps.td @@ -1961,21 +1961,9 @@ def hlfir_DoConcurrentLoopOp : hlfir_Op<"do_concurrent.loop", let hasVerifier = 1; let extraClassDeclaration = [{ - /// Get Number of variadic operands - unsigned getNumOperands(unsigned segmentIdx) { - auto segments = (*this)->getAttrOfType( - getOperandSegmentSizeAttr()); - return static_cast(segments[segmentIdx]); - } - // Get Number of reduction operands unsigned getNumReduceOperands() { - return getNumOperands(3); - } - - /// Does the operation hold operands for reduction variables - bool hasReduceOperands() { - return getNumReduceOperands() > 0; + return getReduceOperands().size(); } }]; } diff --git a/flang/lib/Optimizer/HLFIR/IR/HLFIROps.cpp b/flang/lib/Optimizer/HLFIR/IR/HLFIROps.cpp index c4e62df655c31..4c19ea414aabb 100644 --- a/flang/lib/Optimizer/HLFIR/IR/HLFIROps.cpp +++ b/flang/lib/Optimizer/HLFIR/IR/HLFIROps.cpp @@ -2352,7 +2352,7 @@ void hlfir::DoConcurrentLoopOp::print(mlir::OpAsmPrinter &p) { p << " (" << getBody()->getArguments() << ") = (" << getLowerBound() << ") to (" << getUpperBound() << ") step (" << getStep() << ")"; - if (hasReduceOperands()) { + if (!getReduceOperands().empty()) { p << " reduce("; auto attrs = getReduceAttrsAttr(); auto operands = getReduceOperands(); From 1c7a3e953b3b752b03179c5e9f095f52b9e8b3b5 Mon Sep 17 00:00:00 2001 From: ergawy Date: Thu, 13 Mar 2025 00:00:50 -0500 Subject: [PATCH 3/4] handle review comments --- flang/include/flang/Optimizer/HLFIR/HLFIROps.td | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/flang/include/flang/Optimizer/HLFIR/HLFIROps.td b/flang/include/flang/Optimizer/HLFIR/HLFIROps.td index 50a054a765faf..8897961316abc 100644 --- a/flang/include/flang/Optimizer/HLFIR/HLFIROps.td +++ b/flang/include/flang/Optimizer/HLFIR/HLFIROps.td @@ -1864,7 +1864,8 @@ def hlfir_EvaluateInMemoryOp : hlfir_Op<"eval_in_mem", [AttrSizedOperandSegments let hasVerifier = 1; } -def hlfir_DoConcurrentOp : hlfir_Op<"do_concurrent", [SingleBlock]> { +def hlfir_DoConcurrentOp : hlfir_Op<"do_concurrent", + [SingleBlock, AutomaticAllocationScope]> { let summary = "do concurrent loop wrapper"; let description = [{ From 1dc58b5bf5437f5856b1bb6512cfd878d90b2f01 Mon Sep 17 00:00:00 2001 From: ergawy Date: Mon, 17 Mar 2025 01:16:06 -0500 Subject: [PATCH 4/4] Move ops to fir --- .../include/flang/Optimizer/Dialect/FIROps.td | 105 +++++++++++ .../include/flang/Optimizer/HLFIR/HLFIROps.td | 105 ----------- flang/lib/Optimizer/Dialect/FIROps.cpp | 161 +++++++++++++++++ flang/lib/Optimizer/HLFIR/IR/HLFIROps.cpp | 163 ------------------ flang/test/{HLFIR => Fir}/do_concurrent.fir | 26 +-- flang/test/Fir/invalid.fir | 95 ++++++++++ flang/test/HLFIR/invalid.fir | 95 ---------- 7 files changed, 374 insertions(+), 376 deletions(-) rename flang/test/{HLFIR => Fir}/do_concurrent.fir (84%) diff --git a/flang/include/flang/Optimizer/Dialect/FIROps.td b/flang/include/flang/Optimizer/Dialect/FIROps.td index ee9b959ba570f..c8d8ab41552c2 100644 --- a/flang/include/flang/Optimizer/Dialect/FIROps.td +++ b/flang/include/flang/Optimizer/Dialect/FIROps.td @@ -3446,4 +3446,109 @@ def fir_BoxTotalElementsOp let hasCanonicalizer = 1; } +def fir_DoConcurrentOp : fir_Op<"do_concurrent", + [SingleBlock, AutomaticAllocationScope]> { + let summary = "do concurrent loop wrapper"; + + let description = [{ + A wrapper operation for the actual op modeling `do concurrent` loops: + `fir.do_concurrent.loop` (see op declaration below for more info about it). + + The `fir.do_concurrent` wrapper op consists of one single-block region with + the following properties: + - The first ops in the region are responsible for allocating storage for the + loop's iteration variables. This is property is **not** enforced by the op + verifier, but expected to be respected when building the op. + - The terminator of the region is an instance of `fir.do_concurrent.loop`. + + For example, a 2D loop nest would be represented as follows: + ``` + fir.do_concurrent { + %i = fir.alloca i32 + %j = fir.alloca i32 + fir.do_concurrent.loop ... + } + ``` + }]; + + let regions = (region SizedRegion<1>:$region); + + let assemblyFormat = "$region attr-dict"; + let hasVerifier = 1; +} + +def fir_DoConcurrentLoopOp : fir_Op<"do_concurrent.loop", + [AttrSizedOperandSegments, DeclareOpInterfaceMethods, + Terminator, NoTerminator, SingleBlock, ParentOneOf<["DoConcurrentOp"]>]> { + let summary = "do concurrent loop"; + + let description = [{ + An operation that models a Fortran `do concurrent` loop's header and block. + This is a single-region single-block terminator op that is expected to + terminate the region of a `omp.do_concurrent` wrapper op. + + This op borrows from both `scf.parallel` and `fir.do_loop` ops. Similar to + `scf.parallel`, a loop nest takes 3 groups of SSA values as operands that + represent the lower bounds, upper bounds, and steps. Similar to `fir.do_loop` + the op takes one additional group of SSA values to represent reductions. + + The body region **does not** have a terminator. + + For example, a 2D loop nest with 2 reductions (sum and max) would be + represented as follows: + ``` + // The wrapper of the loop + fir.do_concurrent { + %i = fir.alloca i32 + %j = fir.alloca i32 + + // The actual `do concurrent` loop + fir.do_concurrent.loop + (%i_iv, %j_iv) = (%i_lb, %j_lb) to (%i_ub, %j_ub) step (%i_st, %j_st) + reduce(#fir.reduce_attr -> %sum : !fir.ref, + #fir.reduce_attr -> %max : !fir.ref) { + + %0 = fir.convert %i_iv : (index) -> i32 + fir.store %0 to %i : !fir.ref + + %1 = fir.convert %j_iv : (index) -> i32 + fir.store %1 to %j : !fir.ref + + // ... loop body goes here ... + } + } + ``` + + Description of arguments: + - `lowerBound`: The group of SSA values for the nest's lower bounds. + - `upperBound`: The group of SSA values for the nest's upper bounds. + - `step`: The group of SSA values for the nest's steps. + - `reduceOperands`: The reduction SSA values, if any. + - `reduceAttrs`: Attributes to store reduction operations, if any. + - `loopAnnotation`: Loop metadata to be passed down the compiler pipeline to + LLVM. + }]; + + let arguments = (ins + Variadic:$lowerBound, + Variadic:$upperBound, + Variadic:$step, + Variadic:$reduceOperands, + OptionalAttr:$reduceAttrs, + OptionalAttr:$loopAnnotation + ); + + let regions = (region SizedRegion<1>:$region); + + let hasCustomAssemblyFormat = 1; + let hasVerifier = 1; + + let extraClassDeclaration = [{ + // Get Number of reduction operands + unsigned getNumReduceOperands() { + return getReduceOperands().size(); + } + }]; +} + #endif diff --git a/flang/include/flang/Optimizer/HLFIR/HLFIROps.td b/flang/include/flang/Optimizer/HLFIR/HLFIROps.td index 8897961316abc..f69930d5b53b3 100644 --- a/flang/include/flang/Optimizer/HLFIR/HLFIROps.td +++ b/flang/include/flang/Optimizer/HLFIR/HLFIROps.td @@ -21,7 +21,6 @@ include "flang/Optimizer/Dialect/FIRAttr.td" include "flang/Optimizer/Dialect/FortranVariableInterface.td" include "mlir/Dialect/Arith/IR/ArithBase.td" include "mlir/Dialect/Arith/IR/ArithOpsInterfaces.td" -include "mlir/Dialect/LLVMIR/LLVMAttrDefs.td" include "mlir/IR/BuiltinAttributes.td" // Base class for FIR operations. @@ -1864,109 +1863,5 @@ def hlfir_EvaluateInMemoryOp : hlfir_Op<"eval_in_mem", [AttrSizedOperandSegments let hasVerifier = 1; } -def hlfir_DoConcurrentOp : hlfir_Op<"do_concurrent", - [SingleBlock, AutomaticAllocationScope]> { - let summary = "do concurrent loop wrapper"; - - let description = [{ - A wrapper operation for the actual op modeling `do concurrent` loops: - `hlfir.do_concurrent.loop` (see op declaration below for more info about it). - - The `hlfir.do_concurrent` wrapper op consists of one single-block region with - the following properties: - - The first ops in the region are responsible for allocating storage for the - loop's iteration variables. This is property is **not** enforced by the op - verifier, but expected to be respected when building the op. - - The terminator of the region is an instance of `hlfir.do_concurrent.loop`. - - For example, a 2D loop nest would be represented as follows: - ``` - hlfir.do_concurrent { - %i = fir.alloca i32 - %j = fir.alloca i32 - hlfir.do_concurrent.loop ... - } - ``` - }]; - - let regions = (region SizedRegion<1>:$region); - - let assemblyFormat = "$region attr-dict"; - let hasVerifier = 1; -} - -def hlfir_DoConcurrentLoopOp : hlfir_Op<"do_concurrent.loop", - [AttrSizedOperandSegments, DeclareOpInterfaceMethods, - Terminator, NoTerminator, SingleBlock, ParentOneOf<["DoConcurrentOp"]>]> { - let summary = "do concurrent loop"; - - let description = [{ - An operation that models a Fortran `do concurrent` loop's header and block. - This is a single-region single-block terminator op that is expected to - terminate the region of a `omp.do_concurrent` wrapper op. - - This op borrows from both `scf.parallel` and `fir.do_loop` ops. Similar to - `scf.parallel`, a loop nest takes 3 groups of SSA values as operands that - represent the lower bounds, upper bounds, and steps. Similar to `fir.do_loop` - the op takes one additional group of SSA values to represent reductions. - - The body region **does not** have a terminator. - - For example, a 2D loop nest with 2 reductions (sum and max) would be - represented as follows: - ``` - // The wrapper of the loop - hlfir.do_concurrent { - %i = fir.alloca i32 - %j = fir.alloca i32 - - // The actual `do concurrent` loop - hlfir.do_concurrent.loop - (%i_iv, %j_iv) = (%i_lb, %j_lb) to (%i_ub, %j_ub) step (%i_st, %j_st) - reduce(#fir.reduce_attr -> %sum : !fir.ref, - #fir.reduce_attr -> %max : !fir.ref) { - - %0 = fir.convert %i_iv : (index) -> i32 - fir.store %0 to %i : !fir.ref - - %1 = fir.convert %j_iv : (index) -> i32 - fir.store %1 to %j : !fir.ref - - // ... loop body goes here ... - } - } - ``` - - Description of arguments: - - `lowerBound`: The group of SSA values for the nest's lower bounds. - - `upperBound`: The group of SSA values for the nest's upper bounds. - - `step`: The group of SSA values for the nest's steps. - - `reduceOperands`: The reduction SSA values, if any. - - `reduceAttrs`: Attributes to store reduction operations, if any. - - `loopAnnotation`: Loop metadata to be passed down the compiler pipeline to - LLVM. - }]; - - let arguments = (ins - Variadic:$lowerBound, - Variadic:$upperBound, - Variadic:$step, - Variadic:$reduceOperands, - OptionalAttr:$reduceAttrs, - OptionalAttr:$loopAnnotation - ); - - let regions = (region SizedRegion<1>:$region); - - let hasCustomAssemblyFormat = 1; - let hasVerifier = 1; - - let extraClassDeclaration = [{ - // Get Number of reduction operands - unsigned getNumReduceOperands() { - return getReduceOperands().size(); - } - }]; -} #endif // FORTRAN_DIALECT_HLFIR_OPS diff --git a/flang/lib/Optimizer/Dialect/FIROps.cpp b/flang/lib/Optimizer/Dialect/FIROps.cpp index 90202f3cee588..474577b986372 100644 --- a/flang/lib/Optimizer/Dialect/FIROps.cpp +++ b/flang/lib/Optimizer/Dialect/FIROps.cpp @@ -4748,6 +4748,167 @@ void fir::BoxTotalElementsOp::getCanonicalizationPatterns( patterns.add(context); } +//===----------------------------------------------------------------------===// +// DoConcurrentOp +//===----------------------------------------------------------------------===// + +llvm::LogicalResult fir::DoConcurrentOp::verify() { + mlir::Block *body = getBody(); + + if (body->empty()) + return emitOpError("body cannot be empty"); + + if (!body->mightHaveTerminator() || + !mlir::isa(body->getTerminator())) + return emitOpError("must be terminated by 'fir.do_concurrent.loop'"); + + return mlir::success(); +} + +//===----------------------------------------------------------------------===// +// DoConcurrentLoopOp +//===----------------------------------------------------------------------===// + +mlir::ParseResult fir::DoConcurrentLoopOp::parse(mlir::OpAsmParser &parser, + mlir::OperationState &result) { + auto &builder = parser.getBuilder(); + // Parse an opening `(` followed by induction variables followed by `)` + llvm::SmallVector ivs; + if (parser.parseArgumentList(ivs, mlir::OpAsmParser::Delimiter::Paren)) + return mlir::failure(); + + // Parse loop bounds. + llvm::SmallVector lower; + if (parser.parseEqual() || + parser.parseOperandList(lower, ivs.size(), + mlir::OpAsmParser::Delimiter::Paren) || + parser.resolveOperands(lower, builder.getIndexType(), result.operands)) + return mlir::failure(); + + llvm::SmallVector upper; + if (parser.parseKeyword("to") || + parser.parseOperandList(upper, ivs.size(), + mlir::OpAsmParser::Delimiter::Paren) || + parser.resolveOperands(upper, builder.getIndexType(), result.operands)) + return mlir::failure(); + + // Parse step values. + llvm::SmallVector steps; + if (parser.parseKeyword("step") || + parser.parseOperandList(steps, ivs.size(), + mlir::OpAsmParser::Delimiter::Paren) || + parser.resolveOperands(steps, builder.getIndexType(), result.operands)) + return mlir::failure(); + + llvm::SmallVector reduceOperands; + llvm::SmallVector reduceArgTypes; + if (succeeded(parser.parseOptionalKeyword("reduce"))) { + // Parse reduction attributes and variables. + llvm::SmallVector attributes; + if (failed(parser.parseCommaSeparatedList( + mlir::AsmParser::Delimiter::Paren, [&]() { + if (parser.parseAttribute(attributes.emplace_back()) || + parser.parseArrow() || + parser.parseOperand(reduceOperands.emplace_back()) || + parser.parseColonType(reduceArgTypes.emplace_back())) + return mlir::failure(); + return mlir::success(); + }))) + return mlir::failure(); + // Resolve input operands. + for (auto operand_type : llvm::zip(reduceOperands, reduceArgTypes)) + if (parser.resolveOperand(std::get<0>(operand_type), + std::get<1>(operand_type), result.operands)) + return mlir::failure(); + llvm::SmallVector arrayAttr(attributes.begin(), + attributes.end()); + result.addAttribute(getReduceAttrsAttrName(result.name), + builder.getArrayAttr(arrayAttr)); + } + + // Now parse the body. + mlir::Region *body = result.addRegion(); + for (auto &iv : ivs) + iv.type = builder.getIndexType(); + if (parser.parseRegion(*body, ivs)) + return mlir::failure(); + + // Set `operandSegmentSizes` attribute. + result.addAttribute(DoConcurrentLoopOp::getOperandSegmentSizeAttr(), + builder.getDenseI32ArrayAttr( + {static_cast(lower.size()), + static_cast(upper.size()), + static_cast(steps.size()), + static_cast(reduceOperands.size())})); + + // Parse attributes. + if (parser.parseOptionalAttrDict(result.attributes)) + return mlir::failure(); + + return mlir::success(); +} + +void fir::DoConcurrentLoopOp::print(mlir::OpAsmPrinter &p) { + p << " (" << getBody()->getArguments() << ") = (" << getLowerBound() + << ") to (" << getUpperBound() << ") step (" << getStep() << ")"; + + if (!getReduceOperands().empty()) { + p << " reduce("; + auto attrs = getReduceAttrsAttr(); + auto operands = getReduceOperands(); + llvm::interleaveComma(llvm::zip(attrs, operands), p, [&](auto it) { + p << std::get<0>(it) << " -> " << std::get<1>(it) << " : " + << std::get<1>(it).getType(); + }); + p << ')'; + } + + p << ' '; + p.printRegion(getRegion(), /*printEntryBlockArgs=*/false); + p.printOptionalAttrDict( + (*this)->getAttrs(), + /*elidedAttrs=*/{DoConcurrentLoopOp::getOperandSegmentSizeAttr(), + DoConcurrentLoopOp::getReduceAttrsAttrName()}); +} + +llvm::SmallVector fir::DoConcurrentLoopOp::getLoopRegions() { + return {&getRegion()}; +} + +llvm::LogicalResult fir::DoConcurrentLoopOp::verify() { + mlir::Operation::operand_range lbValues = getLowerBound(); + mlir::Operation::operand_range ubValues = getUpperBound(); + mlir::Operation::operand_range stepValues = getStep(); + + if (lbValues.empty()) + return emitOpError( + "needs at least one tuple element for lowerBound, upperBound and step"); + + if (lbValues.size() != ubValues.size() || + ubValues.size() != stepValues.size()) + return emitOpError("different number of tuple elements for lowerBound, " + "upperBound or step"); + + // Check that the body defines the same number of block arguments as the + // number of tuple elements in step. + mlir::Block *body = getBody(); + if (body->getNumArguments() != stepValues.size()) + return emitOpError() << "expects the same number of induction variables: " + << body->getNumArguments() + << " as bound and step values: " << stepValues.size(); + for (auto arg : body->getArguments()) + if (!arg.getType().isIndex()) + return emitOpError( + "expects arguments for the induction variable to be of index type"); + + auto reduceAttrs = getReduceAttrsAttr(); + if (getNumReduceOperands() != (reduceAttrs ? reduceAttrs.size() : 0)) + return emitOpError( + "mismatch in number of reduction variables and reduction attributes"); + + return mlir::success(); +} + //===----------------------------------------------------------------------===// // FIROpsDialect //===----------------------------------------------------------------------===// diff --git a/flang/lib/Optimizer/HLFIR/IR/HLFIROps.cpp b/flang/lib/Optimizer/HLFIR/IR/HLFIROps.cpp index 4c19ea414aabb..8851a3a7187b9 100644 --- a/flang/lib/Optimizer/HLFIR/IR/HLFIROps.cpp +++ b/flang/lib/Optimizer/HLFIR/IR/HLFIROps.cpp @@ -12,7 +12,6 @@ #include "flang/Optimizer/HLFIR/HLFIROps.h" -#include "flang/Optimizer/Dialect/FIRAttr.h" #include "flang/Optimizer/Dialect/FIROpsSupport.h" #include "flang/Optimizer/Dialect/FIRType.h" #include "flang/Optimizer/Dialect/Support/FIRContext.h" @@ -2247,168 +2246,6 @@ llvm::LogicalResult hlfir::EvaluateInMemoryOp::verify() { return mlir::success(); } -//===----------------------------------------------------------------------===// -// DoConcurrentOp -//===----------------------------------------------------------------------===// - -llvm::LogicalResult hlfir::DoConcurrentOp::verify() { - mlir::Block *body = getBody(); - - if (body->empty()) - return emitOpError("body cannot be empty"); - - if (!body->mightHaveTerminator() || - !mlir::isa(body->getTerminator())) - return emitOpError("must be terminated by 'hlfir.do_concurrent.loop'"); - - return mlir::success(); -} - -//===----------------------------------------------------------------------===// -// DoConcurrentLoopOp -//===----------------------------------------------------------------------===// - -mlir::ParseResult -hlfir::DoConcurrentLoopOp::parse(mlir::OpAsmParser &parser, - mlir::OperationState &result) { - auto &builder = parser.getBuilder(); - // Parse an opening `(` followed by induction variables followed by `)` - llvm::SmallVector ivs; - if (parser.parseArgumentList(ivs, mlir::OpAsmParser::Delimiter::Paren)) - return mlir::failure(); - - // Parse loop bounds. - llvm::SmallVector lower; - if (parser.parseEqual() || - parser.parseOperandList(lower, ivs.size(), - mlir::OpAsmParser::Delimiter::Paren) || - parser.resolveOperands(lower, builder.getIndexType(), result.operands)) - return mlir::failure(); - - llvm::SmallVector upper; - if (parser.parseKeyword("to") || - parser.parseOperandList(upper, ivs.size(), - mlir::OpAsmParser::Delimiter::Paren) || - parser.resolveOperands(upper, builder.getIndexType(), result.operands)) - return mlir::failure(); - - // Parse step values. - llvm::SmallVector steps; - if (parser.parseKeyword("step") || - parser.parseOperandList(steps, ivs.size(), - mlir::OpAsmParser::Delimiter::Paren) || - parser.resolveOperands(steps, builder.getIndexType(), result.operands)) - return mlir::failure(); - - llvm::SmallVector reduceOperands; - llvm::SmallVector reduceArgTypes; - if (succeeded(parser.parseOptionalKeyword("reduce"))) { - // Parse reduction attributes and variables. - llvm::SmallVector attributes; - if (failed(parser.parseCommaSeparatedList( - mlir::AsmParser::Delimiter::Paren, [&]() { - if (parser.parseAttribute(attributes.emplace_back()) || - parser.parseArrow() || - parser.parseOperand(reduceOperands.emplace_back()) || - parser.parseColonType(reduceArgTypes.emplace_back())) - return mlir::failure(); - return mlir::success(); - }))) - return mlir::failure(); - // Resolve input operands. - for (auto operand_type : llvm::zip(reduceOperands, reduceArgTypes)) - if (parser.resolveOperand(std::get<0>(operand_type), - std::get<1>(operand_type), result.operands)) - return mlir::failure(); - llvm::SmallVector arrayAttr(attributes.begin(), - attributes.end()); - result.addAttribute(getReduceAttrsAttrName(result.name), - builder.getArrayAttr(arrayAttr)); - } - - // Now parse the body. - mlir::Region *body = result.addRegion(); - for (auto &iv : ivs) - iv.type = builder.getIndexType(); - if (parser.parseRegion(*body, ivs)) - return mlir::failure(); - - // Set `operandSegmentSizes` attribute. - result.addAttribute(DoConcurrentLoopOp::getOperandSegmentSizeAttr(), - builder.getDenseI32ArrayAttr( - {static_cast(lower.size()), - static_cast(upper.size()), - static_cast(steps.size()), - static_cast(reduceOperands.size())})); - - // Parse attributes. - if (parser.parseOptionalAttrDict(result.attributes)) - return mlir::failure(); - - return mlir::success(); -} - -void hlfir::DoConcurrentLoopOp::print(mlir::OpAsmPrinter &p) { - p << " (" << getBody()->getArguments() << ") = (" << getLowerBound() - << ") to (" << getUpperBound() << ") step (" << getStep() << ")"; - - if (!getReduceOperands().empty()) { - p << " reduce("; - auto attrs = getReduceAttrsAttr(); - auto operands = getReduceOperands(); - llvm::interleaveComma(llvm::zip(attrs, operands), p, [&](auto it) { - p << std::get<0>(it) << " -> " << std::get<1>(it) << " : " - << std::get<1>(it).getType(); - }); - p << ')'; - } - - p << ' '; - p.printRegion(getRegion(), /*printEntryBlockArgs=*/false); - p.printOptionalAttrDict( - (*this)->getAttrs(), - /*elidedAttrs=*/{DoConcurrentLoopOp::getOperandSegmentSizeAttr(), - DoConcurrentLoopOp::getReduceAttrsAttrName()}); -} - -llvm::SmallVector hlfir::DoConcurrentLoopOp::getLoopRegions() { - return {&getRegion()}; -} - -llvm::LogicalResult hlfir::DoConcurrentLoopOp::verify() { - mlir::Operation::operand_range lbValues = getLowerBound(); - mlir::Operation::operand_range ubValues = getUpperBound(); - mlir::Operation::operand_range stepValues = getStep(); - - if (lbValues.empty()) - return emitOpError( - "needs at least one tuple element for lowerBound, upperBound and step"); - - if (lbValues.size() != ubValues.size() || - ubValues.size() != stepValues.size()) - return emitOpError("different number of tuple elements for lowerBound, " - "upperBound or step"); - - // Check that the body defines the same number of block arguments as the - // number of tuple elements in step. - mlir::Block *body = getBody(); - if (body->getNumArguments() != stepValues.size()) - return emitOpError() << "expects the same number of induction variables: " - << body->getNumArguments() - << " as bound and step values: " << stepValues.size(); - for (auto arg : body->getArguments()) - if (!arg.getType().isIndex()) - return emitOpError( - "expects arguments for the induction variable to be of index type"); - - auto reduceAttrs = getReduceAttrsAttr(); - if (getNumReduceOperands() != (reduceAttrs ? reduceAttrs.size() : 0)) - return emitOpError( - "mismatch in number of reduction variables and reduction attributes"); - - return mlir::success(); -} - #include "flang/Optimizer/HLFIR/HLFIROpInterfaces.cpp.inc" #define GET_OP_CLASSES #include "flang/Optimizer/HLFIR/HLFIREnums.cpp.inc" diff --git a/flang/test/HLFIR/do_concurrent.fir b/flang/test/Fir/do_concurrent.fir similarity index 84% rename from flang/test/HLFIR/do_concurrent.fir rename to flang/test/Fir/do_concurrent.fir index aef9db2236a57..8e80ffb9c7b0b 100644 --- a/flang/test/HLFIR/do_concurrent.fir +++ b/flang/test/Fir/do_concurrent.fir @@ -1,11 +1,11 @@ -// Test hlfir.do_concurrent operation parse, verify (no errors), and unparse +// Test fir.do_concurrent operation parse, verify (no errors), and unparse // RUN: fir-opt %s | fir-opt | FileCheck %s func.func @dc_1d(%i_lb: index, %i_ub: index, %i_st: index) { - hlfir.do_concurrent { + fir.do_concurrent { %i = fir.alloca i32 - hlfir.do_concurrent.loop (%i_iv) = (%i_lb) to (%i_ub) step (%i_st) { + fir.do_concurrent.loop (%i_iv) = (%i_lb) to (%i_ub) step (%i_st) { %0 = fir.convert %i_iv : (index) -> i32 fir.store %0 to %i : !fir.ref } @@ -15,9 +15,9 @@ func.func @dc_1d(%i_lb: index, %i_ub: index, %i_st: index) { // CHECK-LABEL: func.func @dc_1d // CHECK-SAME: (%[[I_LB:.*]]: index, %[[I_UB:.*]]: index, %[[I_ST:.*]]: index) -// CHECK: hlfir.do_concurrent { +// CHECK: fir.do_concurrent { // CHECK: %[[I:.*]] = fir.alloca i32 -// CHECK: hlfir.do_concurrent.loop (%[[I_IV:.*]]) = (%[[I_LB]]) to (%[[I_UB]]) step (%[[I_ST]]) { +// CHECK: fir.do_concurrent.loop (%[[I_IV:.*]]) = (%[[I_LB]]) to (%[[I_UB]]) step (%[[I_ST]]) { // CHECK: %[[I_IV_CVT:.*]] = fir.convert %[[I_IV]] : (index) -> i32 // CHECK: fir.store %[[I_IV_CVT]] to %[[I]] : !fir.ref // CHECK: } @@ -25,10 +25,10 @@ func.func @dc_1d(%i_lb: index, %i_ub: index, %i_st: index) { func.func @dc_2d(%i_lb: index, %i_ub: index, %i_st: index, %j_lb: index, %j_ub: index, %j_st: index) { - hlfir.do_concurrent { + fir.do_concurrent { %i = fir.alloca i32 %j = fir.alloca i32 - hlfir.do_concurrent.loop + fir.do_concurrent.loop (%i_iv, %j_iv) = (%i_lb, %j_lb) to (%i_ub, %j_ub) step (%i_st, %j_st) { %0 = fir.convert %i_iv : (index) -> i32 fir.store %0 to %i : !fir.ref @@ -42,10 +42,10 @@ func.func @dc_2d(%i_lb: index, %i_ub: index, %i_st: index, // CHECK-LABEL: func.func @dc_2d // CHECK-SAME: (%[[I_LB:.*]]: index, %[[I_UB:.*]]: index, %[[I_ST:.*]]: index, %[[J_LB:.*]]: index, %[[J_UB:.*]]: index, %[[J_ST:.*]]: index) -// CHECK: hlfir.do_concurrent { +// CHECK: fir.do_concurrent { // CHECK: %[[I:.*]] = fir.alloca i32 // CHECK: %[[J:.*]] = fir.alloca i32 -// CHECK: hlfir.do_concurrent.loop +// CHECK: fir.do_concurrent.loop // CHECK-SAME: (%[[I_IV:.*]], %[[J_IV:.*]]) = (%[[I_LB]], %[[J_LB]]) to (%[[I_UB]], %[[J_UB]]) step (%[[I_ST]], %[[J_ST]]) { // CHECK: %[[I_IV_CVT:.*]] = fir.convert %[[I_IV]] : (index) -> i32 // CHECK: fir.store %[[I_IV_CVT]] to %[[I]] : !fir.ref @@ -58,10 +58,10 @@ func.func @dc_2d_reduction(%i_lb: index, %i_ub: index, %i_st: index, %j_lb: index, %j_ub: index, %j_st: index) { %sum = fir.alloca i32 - hlfir.do_concurrent { + fir.do_concurrent { %i = fir.alloca i32 %j = fir.alloca i32 - hlfir.do_concurrent.loop + fir.do_concurrent.loop (%i_iv, %j_iv) = (%i_lb, %j_lb) to (%i_ub, %j_ub) step (%i_st, %j_st) reduce(#fir.reduce_attr -> %sum : !fir.ref) { %0 = fir.convert %i_iv : (index) -> i32 @@ -79,10 +79,10 @@ func.func @dc_2d_reduction(%i_lb: index, %i_ub: index, %i_st: index, // CHECK: %[[SUM:.*]] = fir.alloca i32 -// CHECK: hlfir.do_concurrent { +// CHECK: fir.do_concurrent { // CHECK: %[[I:.*]] = fir.alloca i32 // CHECK: %[[J:.*]] = fir.alloca i32 -// CHECK: hlfir.do_concurrent.loop +// CHECK: fir.do_concurrent.loop // CHECK-SAME: (%[[I_IV:.*]], %[[J_IV:.*]]) = (%[[I_LB]], %[[J_LB]]) to (%[[I_UB]], %[[J_UB]]) step (%[[I_ST]], %[[J_ST]]) reduce(#fir.reduce_attr -> %[[SUM]] : !fir.ref) { // CHECK: %[[I_IV_CVT:.*]] = fir.convert %[[I_IV]] : (index) -> i32 // CHECK: fir.store %[[I_IV_CVT]] to %[[I]] : !fir.ref diff --git a/flang/test/Fir/invalid.fir b/flang/test/Fir/invalid.fir index d5db644eeddb2..88906890a9237 100644 --- a/flang/test/Fir/invalid.fir +++ b/flang/test/Fir/invalid.fir @@ -1162,3 +1162,98 @@ func.func @bad_box_total_elements(%arg0: !fir.ref>>) %0 = fir.box_total_elements %arg0 : (!fir.ref>>) -> i32 return %0 : i32 } + +// ----- + +func.func @empty_dc_wrapper_body() { + // expected-error@+1 {{'fir.do_concurrent' op expects a non-empty block}} + fir.do_concurrent { + } + return +} + +// ----- + +func.func @dc_wrong_terminator() { + // expected-error@+1 {{'fir.do_concurrent' op must be terminated by 'fir.do_concurrent.loop'}} + fir.do_concurrent { + llvm.return + } + return +} + +// ----- + +func.func @dc_0d() { + // expected-error@+2 {{'fir.do_concurrent.loop' op needs at least one tuple element for lowerBound, upperBound and step}} + fir.do_concurrent { + fir.do_concurrent.loop () = () to () step () { + %tmp = fir.alloca i32 + } + } + return +} + +// ----- + +func.func @dc_invalid_parent(%arg0: index, %arg1: index) { + // expected-error@+1 {{'fir.do_concurrent.loop' op expects parent op 'fir.do_concurrent'}} + "fir.do_concurrent.loop"(%arg0, %arg1) <{operandSegmentSizes = array}> ({ + ^bb0(%arg2: index): + %tmp = "fir.alloca"() <{in_type = i32, operandSegmentSizes = array}> : () -> !fir.ref + }) : (index, index) -> () + return +} + +// ----- + +func.func @dc_invalid_control(%arg0: index, %arg1: index) { + // expected-error@+2 {{'fir.do_concurrent.loop' op different number of tuple elements for lowerBound, upperBound or step}} + fir.do_concurrent { + "fir.do_concurrent.loop"(%arg0, %arg1) <{operandSegmentSizes = array}> ({ + ^bb0(%arg2: index): + %tmp = "fir.alloca"() <{in_type = i32, operandSegmentSizes = array}> : () -> !fir.ref + }) : (index, index) -> () + } + return +} + +// ----- + +func.func @dc_invalid_ind_var(%arg0: index, %arg1: index) { + // expected-error@+2 {{'fir.do_concurrent.loop' op expects the same number of induction variables: 2 as bound and step values: 1}} + fir.do_concurrent { + "fir.do_concurrent.loop"(%arg0, %arg1, %arg0) <{operandSegmentSizes = array}> ({ + ^bb0(%arg3: index, %arg4: index): + %tmp = "fir.alloca"() <{in_type = i32, operandSegmentSizes = array}> : () -> !fir.ref + }) : (index, index, index) -> () + } + return +} + +// ----- + +func.func @dc_invalid_ind_var_type(%arg0: index, %arg1: index) { + // expected-error@+2 {{'fir.do_concurrent.loop' op expects arguments for the induction variable to be of index type}} + fir.do_concurrent { + "fir.do_concurrent.loop"(%arg0, %arg1, %arg0) <{operandSegmentSizes = array}> ({ + ^bb0(%arg3: i32): + %tmp = "fir.alloca"() <{in_type = i32, operandSegmentSizes = array}> : () -> !fir.ref + }) : (index, index, index) -> () + } + return +} + +// ----- + +func.func @dc_invalid_reduction(%arg0: index, %arg1: index) { + %sum = fir.alloca i32 + // expected-error@+2 {{'fir.do_concurrent.loop' op mismatch in number of reduction variables and reduction attributes}} + fir.do_concurrent { + "fir.do_concurrent.loop"(%arg0, %arg1, %arg0, %sum) <{operandSegmentSizes = array}> ({ + ^bb0(%arg3: index): + %tmp = "fir.alloca"() <{in_type = i32, operandSegmentSizes = array}> : () -> !fir.ref + }) : (index, index, index, !fir.ref) -> () + } + return +} diff --git a/flang/test/HLFIR/invalid.fir b/flang/test/HLFIR/invalid.fir index e14284f916bd9..d61efe0062e69 100644 --- a/flang/test/HLFIR/invalid.fir +++ b/flang/test/HLFIR/invalid.fir @@ -1555,98 +1555,3 @@ func.func @bad_reshape(%arg0: !hlfir.expr<1x!fir.char<1,2>>, %arg1: !hlfir.expr< %0 = hlfir.reshape %arg0 %arg1 pad %arg2 : (!hlfir.expr<1x!fir.char<1,2>>, !hlfir.expr<1xi32>, !hlfir.expr<1x!fir.char<2,?>>) -> !hlfir.expr> return } - -// ----- - -func.func @empty_dc_wrapper_body() { - // expected-error@+1 {{'hlfir.do_concurrent' op expects a non-empty block}} - hlfir.do_concurrent { - } - return -} - -// ----- - -func.func @dc_wrong_terminator() { - // expected-error@+1 {{'hlfir.do_concurrent' op must be terminated by 'hlfir.do_concurrent.loop'}} - hlfir.do_concurrent { - llvm.return - } - return -} - -// ----- - -func.func @dc_0d() { - // expected-error@+2 {{'hlfir.do_concurrent.loop' op needs at least one tuple element for lowerBound, upperBound and step}} - hlfir.do_concurrent { - hlfir.do_concurrent.loop () = () to () step () { - %tmp = fir.alloca i32 - } - } - return -} - -// ----- - -func.func @dc_invalid_parent(%arg0: index, %arg1: index) { - // expected-error@+1 {{'hlfir.do_concurrent.loop' op expects parent op 'hlfir.do_concurrent'}} - "hlfir.do_concurrent.loop"(%arg0, %arg1) <{operandSegmentSizes = array}> ({ - ^bb0(%arg2: index): - %tmp = "fir.alloca"() <{in_type = i32, operandSegmentSizes = array}> : () -> !fir.ref - }) : (index, index) -> () - return -} - -// ----- - -func.func @dc_invalid_control(%arg0: index, %arg1: index) { - // expected-error@+2 {{'hlfir.do_concurrent.loop' op different number of tuple elements for lowerBound, upperBound or step}} - hlfir.do_concurrent { - "hlfir.do_concurrent.loop"(%arg0, %arg1) <{operandSegmentSizes = array}> ({ - ^bb0(%arg2: index): - %tmp = "fir.alloca"() <{in_type = i32, operandSegmentSizes = array}> : () -> !fir.ref - }) : (index, index) -> () - } - return -} - -// ----- - -func.func @dc_invalid_ind_var(%arg0: index, %arg1: index) { - // expected-error@+2 {{'hlfir.do_concurrent.loop' op expects the same number of induction variables: 2 as bound and step values: 1}} - hlfir.do_concurrent { - "hlfir.do_concurrent.loop"(%arg0, %arg1, %arg0) <{operandSegmentSizes = array}> ({ - ^bb0(%arg3: index, %arg4: index): - %tmp = "fir.alloca"() <{in_type = i32, operandSegmentSizes = array}> : () -> !fir.ref - }) : (index, index, index) -> () - } - return -} - -// ----- - -func.func @dc_invalid_ind_var_type(%arg0: index, %arg1: index) { - // expected-error@+2 {{'hlfir.do_concurrent.loop' op expects arguments for the induction variable to be of index type}} - hlfir.do_concurrent { - "hlfir.do_concurrent.loop"(%arg0, %arg1, %arg0) <{operandSegmentSizes = array}> ({ - ^bb0(%arg3: i32): - %tmp = "fir.alloca"() <{in_type = i32, operandSegmentSizes = array}> : () -> !fir.ref - }) : (index, index, index) -> () - } - return -} - -// ----- - -func.func @dc_invalid_reduction(%arg0: index, %arg1: index) { - %sum = fir.alloca i32 - // expected-error@+2 {{'hlfir.do_concurrent.loop' op mismatch in number of reduction variables and reduction attributes}} - hlfir.do_concurrent { - "hlfir.do_concurrent.loop"(%arg0, %arg1, %arg0, %sum) <{operandSegmentSizes = array}> ({ - ^bb0(%arg3: index): - %tmp = "fir.alloca"() <{in_type = i32, operandSegmentSizes = array}> : () -> !fir.ref - }) : (index, index, index, !fir.ref) -> () - } - return -}