From b3919715ebe223b39dd789dcd471a864666d7008 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Fri, 19 Sep 2025 14:43:48 +0200 Subject: [PATCH 01/12] Improve canonloop/iv naming --- mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp | 237 +++++++++++++----- .../Dialect/OpenMP/cli-canonical_loop.mlir | 127 ++++++++-- .../Dialect/OpenMP/cli-unroll-heuristic.mlir | 28 +-- 3 files changed, 292 insertions(+), 100 deletions(-) diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp index 3d70e28ed23ab..cf549a6bb50b4 100644 --- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp +++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp @@ -77,6 +77,178 @@ struct LLVMPointerPointerLikeModel }; } // namespace +/// Generate a name of a canonical loop nest of the format +/// `(_s_r)*` that describes its nesting inside parent +/// operations (`_r`) and that operation's region (`_s`). The region +/// number is omitted if the parent operation has just one region. If a loop +/// nest just consists of canonical loops nested inside each other, also uses +/// `d` where is the nesting depth of the loop. +static std::string generateLoopNestingName(StringRef prefix, + CanonicalLoopOp op) { + struct Component { + // An region argument of an operation + Operation *parentOp; + size_t regionInOpIdx; + bool isOnlyRegionInOp; + bool skipRegion; + + // An operation somewhere in a parent region + Operation *thisOp; + Region *parentRegion; + size_t opInRegionIdx; + bool isOnlyOpInRegion; + bool skipOp; + int depth = -1; + }; + SmallVector components; + + // Gather a list of parent regions and operations, and the position within + // their parent + Operation *o = op.getOperation(); + while (o) { + if (o->hasTrait()) + break; + + // Operation within a region + Region *r = o->getParentRegion(); + if (!r) + break; + + llvm::ReversePostOrderTraversal traversal(&r->getBlocks().front()); + size_t idx = 0; + bool found = false; + size_t sequentialIdx = -1; + bool isOnlyLoop = true; + for (Block *b : traversal) { + for (Operation &op : *b) { + if (&op == o && !found) { + sequentialIdx = idx; + found = true; + } + if (op.getNumRegions()) { + idx += 1; + if (idx > 1) + isOnlyLoop = false; + } + if (found && !isOnlyLoop) + break; + } + } + + Component &comp = components.emplace_back(); + comp.thisOp = o; + comp.parentRegion = r; + comp.opInRegionIdx = sequentialIdx; + comp.isOnlyOpInRegion = isOnlyLoop; + + // Region argument of an operation + Operation *parent = r->getParentOp(); + + comp.parentOp = parent; + comp.regionInOpIdx = 0; + comp.isOnlyRegionInOp = true; + if (parent && parent->getRegions().size() > 1) { + auto getRegionIndex = [](Operation *o, Region *r) { + for (auto [idx, region] : llvm::enumerate(o->getRegions())) { + if (®ion == r) + return idx; + } + llvm_unreachable("Region not child of its parent operation"); + }; + comp.regionInOpIdx = getRegionIndex(parent, r); + comp.isOnlyRegionInOp = false; + } + + if (!parent) + break; + + // next parent + o = parent; + } + + // Reorder components from outermost to innermost + std::reverse(components.begin(), components.end()); + + // Determine whether a component is not needed + for (auto &c : components) { + c.skipRegion = c.isOnlyRegionInOp; + c.skipOp = c.isOnlyOpInRegion && !isa(c.thisOp); + } + + // Find runs of perfect nests and merge them into a single component + int curNestRoot = 0; + int curNestDepth = 1; + auto mergeLoopNest = [&](int innermost) { + auto outermost = curNestRoot; + + // Don't do enything if it does not consist of at least 2 loops + if (outermost < innermost) { + for (auto i : llvm::seq(outermost + 1, innermost)) { + components[i].skipOp = true; + } + components[innermost].depth = curNestDepth; + } + + // Start new root + curNestRoot = innermost + 1; + curNestDepth = 1; + }; + for (auto &&[i, c] : llvm::enumerate(components)) { + if (i <= curNestRoot) + continue; + + // Check whether this region can be included + if (!c.skipRegion) { + mergeLoopNest(i); + continue; + } + + if (c.skipOp) + continue; + + if (!c.isOnlyOpInRegion) { + mergeLoopNest(i); + continue; + } + + curNestDepth += 1; + } + + // Finalize innermost loop nest + mergeLoopNest(components.size() - 1); + + // Outermost loop does not need a suffix if it has no sibling + for (auto &c : components) { + if (c.skipOp) + continue; + if (c.isOnlyOpInRegion) + c.skipOp = true; + break; + } + + // Compile name + SmallString<64> Name{prefix}; + for (auto &c : components) { + auto addComponent = [&Name](char letter, int64_t idx) { + Name += '_'; + Name += letter; + Name += idx; + }; + + if (!c.skipRegion) + addComponent('r', c.regionInOpIdx); + + if (!c.skipOp) { + if (c.depth >= 0) + addComponent('d', c.depth - 1); + else + addComponent('s', c.opInRegionIdx); + } + } + + return Name.str().str(); +} + void OpenMPDialect::initialize() { addOperations< #define GET_OP_LIST @@ -3141,67 +3313,7 @@ void NewCliOp::getAsmResultNames(OpAsmSetValueNameFn setNameFn) { cliName = TypeSwitch(gen->getOwner()) .Case([&](CanonicalLoopOp op) { - // Find the canonical loop nesting: For each ancestor add a - // "+_r" suffix (in reverse order) - SmallVector components; - Operation *o = op.getOperation(); - while (o) { - if (o->hasTrait()) - break; - - Region *r = o->getParentRegion(); - if (!r) - break; - - auto getSequentialIndex = [](Region *r, Operation *o) { - llvm::ReversePostOrderTraversal traversal( - &r->getBlocks().front()); - size_t idx = 0; - for (Block *b : traversal) { - for (Operation &op : *b) { - if (&op == o) - return idx; - // Only consider operations that are containers as - // possible children - if (!op.getRegions().empty()) - idx += 1; - } - } - llvm_unreachable("Operation not part of the region"); - }; - size_t sequentialIdx = getSequentialIndex(r, o); - components.push_back(("s" + Twine(sequentialIdx)).str()); - - Operation *parent = r->getParentOp(); - if (!parent) - break; - - // If the operation has more than one region, also count in - // which of the regions - if (parent->getRegions().size() > 1) { - auto getRegionIndex = [](Operation *o, Region *r) { - for (auto [idx, region] : - llvm::enumerate(o->getRegions())) { - if (®ion == r) - return idx; - } - llvm_unreachable("Region not child its parent operation"); - }; - size_t regionIdx = getRegionIndex(parent, r); - components.push_back(("r" + Twine(regionIdx)).str()); - } - - // next parent - o = parent; - } - - SmallString<64> Name("canonloop"); - for (const std::string &s : reverse(components)) { - Name += '_'; - Name += s; - } - - return Name; + return generateLoopNestingName("canonloop", op); }) .Case([&](UnrollHeuristicOp op) -> std::string { llvm_unreachable("heuristic unrolling does not generate a loop"); @@ -3292,7 +3404,8 @@ void CanonicalLoopOp::getAsmBlockNames(OpAsmSetBlockNameFn setNameFn) { void CanonicalLoopOp::getAsmBlockArgumentNames(Region ®ion, OpAsmSetValueNameFn setNameFn) { - setNameFn(region.getArgument(0), "iv"); + std::string ivName = generateLoopNestingName("iv", *this); + setNameFn(region.getArgument(0), ivName); } void CanonicalLoopOp::print(OpAsmPrinter &p) { diff --git a/mlir/test/Dialect/OpenMP/cli-canonical_loop.mlir b/mlir/test/Dialect/OpenMP/cli-canonical_loop.mlir index adadb8bbac49d..874e3922805ec 100644 --- a/mlir/test/Dialect/OpenMP/cli-canonical_loop.mlir +++ b/mlir/test/Dialect/OpenMP/cli-canonical_loop.mlir @@ -1,5 +1,5 @@ -// RUN: mlir-opt %s | FileCheck %s -// RUN: mlir-opt %s | mlir-opt | FileCheck %s +// RUN: mlir-opt %s | FileCheck %s --enable-var-scope +// RUN: mlir-opt %s | mlir-opt | FileCheck %s --enable-var-scope // CHECK-LABEL: @omp_canonloop_raw( @@ -24,10 +24,10 @@ func.func @omp_canonloop_raw(%tc : i32) -> () { func.func @omp_canonloop_sequential_raw(%tc : i32) -> () { // CHECK-NEXT: %canonloop_s0 = omp.new_cli %canonloop_s0 = "omp.new_cli" () : () -> (!omp.cli) - // CHECK-NEXT: omp.canonical_loop(%canonloop_s0) %iv : i32 in range(%[[tc]]) { + // CHECK-NEXT: omp.canonical_loop(%canonloop_s0) %iv_s0 : i32 in range(%[[tc]]) { "omp.canonical_loop" (%tc, %canonloop_s0) ({ ^bb_first(%iv_first: i32): - // CHECK-NEXT: = llvm.add %iv, %iv : i32 + // CHECK-NEXT: = llvm.add %iv_s0, %iv_s0 : i32 %newval = llvm.add %iv_first, %iv_first : i32 // CHECK-NEXT: omp.terminator omp.terminator @@ -36,7 +36,7 @@ func.func @omp_canonloop_sequential_raw(%tc : i32) -> () { // CHECK-NEXT: %canonloop_s1 = omp.new_cli %canonloop_s1 = "omp.new_cli" () : () -> (!omp.cli) - // CHECK-NEXT: omp.canonical_loop(%canonloop_s1) %iv : i32 in range(%[[tc]]) { + // CHECK-NEXT: omp.canonical_loop(%canonloop_s1) %iv_s1 : i32 in range(%[[tc]]) { "omp.canonical_loop" (%tc, %canonloop_s1) ({ ^bb_second(%iv_second: i32): // CHECK: omp.terminator @@ -52,17 +52,17 @@ func.func @omp_canonloop_sequential_raw(%tc : i32) -> () { // CHECK-LABEL: @omp_nested_canonloop_raw( // CHECK-SAME: %[[tc_outer:.+]]: i32, %[[tc_inner:.+]]: i32) func.func @omp_nested_canonloop_raw(%tc_outer : i32, %tc_inner : i32) -> () { - // CHECK-NEXT: %canonloop_s0 = omp.new_cli + // CHECK-NEXT: %canonloop = omp.new_cli %outer = "omp.new_cli" () : () -> (!omp.cli) - // CHECK-NEXT: %canonloop_s0_s0 = omp.new_cli + // CHECK-NEXT: %canonloop_d1 = omp.new_cli %inner = "omp.new_cli" () : () -> (!omp.cli) - // CHECK-NEXT: omp.canonical_loop(%canonloop_s0) %iv : i32 in range(%[[tc_outer]]) { + // CHECK-NEXT: omp.canonical_loop(%canonloop) %iv : i32 in range(%[[tc_outer]]) { "omp.canonical_loop" (%tc_outer, %outer) ({ ^bb_outer(%iv_outer: i32): - // CHECK-NEXT: omp.canonical_loop(%canonloop_s0_s0) %iv_0 : i32 in range(%[[tc_inner]]) { + // CHECK-NEXT: omp.canonical_loop(%canonloop_d1) %iv_d1 : i32 in range(%[[tc_inner]]) { "omp.canonical_loop" (%tc_inner, %inner) ({ ^bb_inner(%iv_inner: i32): - // CHECK-NEXT: = llvm.add %iv, %iv_0 : i32 + // CHECK-NEXT: = llvm.add %iv, %iv_d1 : i32 %newval = llvm.add %iv_outer, %iv_inner: i32 // CHECK-NEXT: omp.terminator omp.terminator @@ -108,16 +108,24 @@ func.func @omp_canonloop_constant_pretty() -> () { func.func @omp_canonloop_sequential_pretty(%tc : i32) -> () { // CHECK-NEXT: %canonloop_s0 = omp.new_cli %canonloop_s0 = omp.new_cli - // CHECK-NEXT: omp.canonical_loop(%canonloop_s0) %iv : i32 in range(%[[tc]]) { - omp.canonical_loop(%canonloop_s0) %iv : i32 in range(%tc) { + // CHECK-NEXT: omp.canonical_loop(%canonloop_s0) %iv_s0 : i32 in range(%[[tc]]) { + omp.canonical_loop(%canonloop_s0) %iv_s0 : i32 in range(%tc) { // CHECK-NEXT: omp.terminator omp.terminator } // CHECK: %canonloop_s1 = omp.new_cli %canonloop_s1 = omp.new_cli - // CHECK-NEXT: omp.canonical_loop(%canonloop_s1) %iv : i32 in range(%[[tc]]) { - omp.canonical_loop(%canonloop_s1) %iv_0 : i32 in range(%tc) { + // CHECK-NEXT: omp.canonical_loop(%canonloop_s1) %iv_s1 : i32 in range(%[[tc]]) { + omp.canonical_loop(%canonloop_s1) %iv_s1 : i32 in range(%tc) { + // CHECK-NEXT: omp.terminator + omp.terminator + } + + // CHECK: %canonloop_s2 = omp.new_cli + %canonloop_s2 = omp.new_cli + // CHECK-NEXT: omp.canonical_loop(%canonloop_s2) %iv_s2 : i32 in range(%[[tc]]) { + omp.canonical_loop(%canonloop_s2) %iv_s2 : i32 in range(%tc) { // CHECK-NEXT: omp.terminator omp.terminator } @@ -126,17 +134,17 @@ func.func @omp_canonloop_sequential_pretty(%tc : i32) -> () { } -// CHECK-LABEL: @omp_canonloop_nested_pretty( +// CHECK-LABEL: @omp_canonloop_2d_nested_pretty( // CHECK-SAME: %[[tc:.+]]: i32) -func.func @omp_canonloop_nested_pretty(%tc : i32) -> () { - // CHECK-NEXT: %canonloop_s0 = omp.new_cli - %canonloop_s0 = omp.new_cli - // CHECK-NEXT: %canonloop_s0_s0 = omp.new_cli - %canonloop_s0_s0 = omp.new_cli - // CHECK-NEXT: omp.canonical_loop(%canonloop_s0) %iv : i32 in range(%[[tc]]) { - omp.canonical_loop(%canonloop_s0) %iv : i32 in range(%tc) { - // CHECK-NEXT: omp.canonical_loop(%canonloop_s0_s0) %iv_0 : i32 in range(%[[tc]]) { - omp.canonical_loop(%canonloop_s0_s0) %iv_0 : i32 in range(%tc) { +func.func @omp_canonloop_2d_nested_pretty(%tc : i32) -> () { + // CHECK-NEXT: %canonloop = omp.new_cli + %canonloop = omp.new_cli + // CHECK-NEXT: %canonloop_d1 = omp.new_cli + %canonloop_d1 = omp.new_cli + // CHECK-NEXT: omp.canonical_loop(%canonloop) %iv : i32 in range(%[[tc]]) { + omp.canonical_loop(%canonloop) %iv : i32 in range(%tc) { + // CHECK-NEXT: omp.canonical_loop(%canonloop_d1) %iv_d1 : i32 in range(%[[tc]]) { + omp.canonical_loop(%canonloop_d1) %iv_d1 : i32 in range(%tc) { // CHECK: omp.terminator omp.terminator } @@ -147,6 +155,77 @@ func.func @omp_canonloop_nested_pretty(%tc : i32) -> () { } +// CHECK-LABEL: @omp_canonloop_3d_nested_pretty( +// CHECK-SAME: %[[tc:.+]]: i32) +func.func @omp_canonloop_3d_nested_pretty(%tc : i32) -> () { + // CHECK: %canonloop = omp.new_cli + %canonloop = omp.new_cli + // CHECK: %canonloop_d1 = omp.new_cli + %canonloop_d1 = omp.new_cli + // CHECK: %canonloop_d2 = omp.new_cli + %canonloop_d2 = omp.new_cli + // CHECK-NEXT: omp.canonical_loop(%canonloop) %iv : i32 in range(%[[tc]]) { + omp.canonical_loop(%canonloop) %iv : i32 in range(%tc) { + // CHECK-NEXT: omp.canonical_loop(%canonloop_d1) %iv_d1 : i32 in range(%[[tc]]) { + omp.canonical_loop(%canonloop_d1) %iv_1d : i32 in range(%tc) { + // CHECK-NEXT: omp.canonical_loop(%canonloop_d2) %iv_d2 : i32 in range(%[[tc]]) { + omp.canonical_loop(%canonloop_d2) %iv_d2 : i32 in range(%tc) { + // CHECK-NEXT: omp.terminator + omp.terminator + // CHECK-NEXT: } + } + // CHECK-NEXT: omp.terminator + omp.terminator + // CHECK-NEXT: } + } + // CHECK-NEXT: omp.terminator + omp.terminator + } + + return +} + + +// CHECK-LABEL: @omp_canonloop_sequential_nested_pretty( +// CHECK-SAME: %[[tc:.+]]: i32) +func.func @omp_canonloop_sequential_nested_pretty(%tc : i32) -> () { + // CHECK-NEXT: %canonloop_s0 = omp.new_cli + %canonloop_s0 = omp.new_cli + // CHECK-NEXT: %canonloop_s0_d1 = omp.new_cli + %canonloop_s0_d1 = omp.new_cli + // CHECK-NEXT: omp.canonical_loop(%canonloop_s0) %iv_s0 : i32 in range(%[[tc]]) { + omp.canonical_loop(%canonloop_s0) %iv_s0 : i32 in range(%tc) { + // CHECK-NEXT: omp.canonical_loop(%canonloop_s0_d1) %iv_s0_d1 : i32 in range(%[[tc]]) { + omp.canonical_loop(%canonloop_s0_d1) %iv_s0_d1 : i32 in range(%tc) { + // CHECK-NEXT: omp.terminator + omp.terminator + // CHECK-NEXT: } + } + // CHECK-NEXT: omp.terminator + omp.terminator + // CHECK-NEXT: } + } + + // CHECK-NEXT: %canonloop_s1 = omp.new_cli + %canonloop_s1 = omp.new_cli + // CHECK-NEXT: %canonloop_s1_d1 = omp.new_cli + %canonloop_s1_d1 = omp.new_cli + // CHECK-NEXT: omp.canonical_loop(%canonloop_s1) %iv_s1 : i32 in range(%[[tc]]) { + omp.canonical_loop(%canonloop_s1) %iv_s1 : i32 in range(%tc) { + // CHECK-NEXT: omp.canonical_loop(%canonloop_s1_d1) %iv_s1_d1 : i32 in range(%[[tc]]) { + omp.canonical_loop(%canonloop_s1_d1) %iv_s1d1 : i32 in range(%tc) { + // CHECK-NEXT: omp.terminator + omp.terminator + // CHECK-NEXT: } + } + // CHECK-NEXT: omp.terminator + omp.terminator + } + + return +} + + // CHECK-LABEL: @omp_newcli_unused( // CHECK-SAME: ) func.func @omp_newcli_unused() -> () { diff --git a/mlir/test/Dialect/OpenMP/cli-unroll-heuristic.mlir b/mlir/test/Dialect/OpenMP/cli-unroll-heuristic.mlir index cda7d0b500166..16884f4245e76 100644 --- a/mlir/test/Dialect/OpenMP/cli-unroll-heuristic.mlir +++ b/mlir/test/Dialect/OpenMP/cli-unroll-heuristic.mlir @@ -1,18 +1,18 @@ -// RUN: mlir-opt %s | FileCheck %s -// RUN: mlir-opt %s | mlir-opt | FileCheck %s +// RUN: mlir-opt %s | FileCheck %s --enable-var-scope +// RUN: mlir-opt %s | mlir-opt | FileCheck %s --enable-var-scope // CHECK-LABEL: @omp_unroll_heuristic_raw( // CHECK-SAME: %[[tc:.+]]: i32) { func.func @omp_unroll_heuristic_raw(%tc : i32) -> () { - // CHECK-NEXT: %canonloop_s0 = omp.new_cli + // CHECK-NEXT: %canonloop = omp.new_cli %canonloop = "omp.new_cli" () : () -> (!omp.cli) - // CHECK-NEXT: omp.canonical_loop(%canonloop_s0) %iv : i32 in range(%[[tc]]) { + // CHECK-NEXT: omp.canonical_loop(%canonloop) %iv : i32 in range(%[[tc]]) { "omp.canonical_loop" (%tc, %canonloop) ({ ^bb0(%iv: i32): omp.terminator }) : (i32, !omp.cli) -> () - // CHECK: omp.unroll_heuristic(%canonloop_s0) + // CHECK: omp.unroll_heuristic(%canonloop) "omp.unroll_heuristic" (%canonloop) : (!omp.cli) -> () return } @@ -22,12 +22,12 @@ func.func @omp_unroll_heuristic_raw(%tc : i32) -> () { // CHECK-SAME: %[[tc:.+]]: i32) { func.func @omp_unroll_heuristic_pretty(%tc : i32) -> () { // CHECK-NEXT: %[[CANONLOOP:.+]] = omp.new_cli - %canonloop = "omp.new_cli" () : () -> (!omp.cli) - // CHECK-NEXT: omp.canonical_loop(%canonloop_s0) %iv : i32 in range(%[[tc]]) { + %canonloop = omp.new_cli + // CHECK-NEXT: omp.canonical_loop(%canonloop) %iv : i32 in range(%[[tc]]) { omp.canonical_loop(%canonloop) %iv : i32 in range(%tc) { omp.terminator } - // CHECK: omp.unroll_heuristic(%canonloop_s0) + // CHECK: omp.unroll_heuristic(%canonloop) omp.unroll_heuristic(%canonloop) return } @@ -36,13 +36,13 @@ func.func @omp_unroll_heuristic_pretty(%tc : i32) -> () { // CHECK-LABEL: @omp_unroll_heuristic_nested_pretty( // CHECK-SAME: %[[tc:.+]]: i32) { func.func @omp_unroll_heuristic_nested_pretty(%tc : i32) -> () { - // CHECK-NEXT: %canonloop_s0 = omp.new_cli + // CHECK-NEXT: %canonloop = omp.new_cli %cli_outer = omp.new_cli - // CHECK-NEXT: %canonloop_s0_s0 = omp.new_cli + // CHECK-NEXT: %canonloop_d1 = omp.new_cli %cli_inner = omp.new_cli - // CHECK-NEXT: omp.canonical_loop(%canonloop_s0) %iv : i32 in range(%[[tc]]) { + // CHECK-NEXT: omp.canonical_loop(%canonloop) %iv : i32 in range(%[[tc]]) { omp.canonical_loop(%cli_outer) %iv_outer : i32 in range(%tc) { - // CHECK-NEXT: omp.canonical_loop(%canonloop_s0_s0) %iv_0 : i32 in range(%[[tc]]) { + // CHECK-NEXT: omp.canonical_loop(%canonloop_d1) %iv_d1 : i32 in range(%[[tc]]) { omp.canonical_loop(%cli_inner) %iv_inner : i32 in range(%tc) { // CHECK: omp.terminator omp.terminator @@ -51,9 +51,9 @@ func.func @omp_unroll_heuristic_nested_pretty(%tc : i32) -> () { omp.terminator } - // CHECK: omp.unroll_heuristic(%canonloop_s0) + // CHECK: omp.unroll_heuristic(%canonloop) omp.unroll_heuristic(%cli_outer) - // CHECK-NEXT: omp.unroll_heuristic(%canonloop_s0_s0) + // CHECK-NEXT: omp.unroll_heuristic(%canonloop_d1) omp.unroll_heuristic(%cli_inner) return } From ce66eec648f6415c199d6115f3be4d188eee59ba Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Tue, 23 Sep 2025 12:19:41 +0200 Subject: [PATCH 02/12] Avoid compiler warning --- mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp index cf549a6bb50b4..1674891410194 100644 --- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp +++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp @@ -170,22 +170,21 @@ static std::string generateLoopNestingName(StringRef prefix, std::reverse(components.begin(), components.end()); // Determine whether a component is not needed - for (auto &c : components) { + for (Component &c : components) { c.skipRegion = c.isOnlyRegionInOp; c.skipOp = c.isOnlyOpInRegion && !isa(c.thisOp); } // Find runs of perfect nests and merge them into a single component - int curNestRoot = 0; - int curNestDepth = 1; - auto mergeLoopNest = [&](int innermost) { - auto outermost = curNestRoot; + size_t curNestRoot = 0; + size_t curNestDepth = 1; + auto mergeLoopNest = [&](size_t innermost) { + size_t outermost = curNestRoot; // Don't do enything if it does not consist of at least 2 loops if (outermost < innermost) { - for (auto i : llvm::seq(outermost + 1, innermost)) { + for (auto i : llvm::seq(outermost + 1, innermost)) components[i].skipOp = true; - } components[innermost].depth = curNestDepth; } From 3a141d6729e26a7eab821b86eee240ab4bfa322f Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Tue, 23 Sep 2025 12:59:14 +0200 Subject: [PATCH 03/12] Add perfect-nest and rectangular loop nest tests --- flang/lib/Semantics/resolve-directives.cpp | 146 ++++++++++++++++++++- flang/test/Semantics/OpenMP/do08.f90 | 1 + flang/test/Semantics/OpenMP/do13.f90 | 1 + flang/test/Semantics/OpenMP/do22.f90 | 73 +++++++++++ 4 files changed, 215 insertions(+), 6 deletions(-) create mode 100644 flang/test/Semantics/OpenMP/do22.f90 diff --git a/flang/lib/Semantics/resolve-directives.cpp b/flang/lib/Semantics/resolve-directives.cpp index 2d1bec9968593..5f2c9f676099c 100644 --- a/flang/lib/Semantics/resolve-directives.cpp +++ b/flang/lib/Semantics/resolve-directives.cpp @@ -149,6 +149,9 @@ template class DirectiveAttributeVisitor { dataSharingAttributeObjects_.clear(); } bool HasDataSharingAttributeObject(const Symbol &); + std::tuple + GetLoopBounds(const parser::DoConstruct &); const parser::Name *GetLoopIndex(const parser::DoConstruct &); const parser::DoConstruct *GetDoConstructIf( const parser::ExecutionPartConstruct &); @@ -933,6 +936,13 @@ class OmpAttributeVisitor : DirectiveAttributeVisitor { privateDataSharingAttributeObjects_.clear(); } + /// Check that loops in the loop nest are perfectly nested, as well that lower + /// bound, upper bound, and step expressions do not use the iv + /// of a surrounding loop of the associated loops nest. + /// We do not support non-perfectly nested loops not non-rectangular loops yet + /// (both introduced in OpenMP 5.0) + void CheckPerfectNestAndRectangularLoop(const parser::OpenMPLoopConstruct &x); + // Predetermined DSA rules void PrivatizeAssociatedLoopIndexAndCheckLoopLevel( const parser::OpenMPLoopConstruct &); @@ -1009,14 +1019,15 @@ bool DirectiveAttributeVisitor::HasDataSharingAttributeObject( } template -const parser::Name *DirectiveAttributeVisitor::GetLoopIndex( - const parser::DoConstruct &x) { +std::tuple +DirectiveAttributeVisitor::GetLoopBounds(const parser::DoConstruct &x) { using Bounds = parser::LoopControl::Bounds; if (x.GetLoopControl()) { if (const Bounds * b{std::get_if(&x.GetLoopControl()->u)}) { - return &b->name.thing; - } else { - return nullptr; + auto &&step = b->step; + return {&b->name.thing, &b->lower, &b->upper, + step.has_value() ? &step.value() : nullptr}; } } else { context_ @@ -1024,8 +1035,15 @@ const parser::Name *DirectiveAttributeVisitor::GetLoopIndex( "Loop control is not present in the DO LOOP"_err_en_US) .Attach(GetContext().directiveSource, "associated with the enclosing LOOP construct"_en_US); - return nullptr; } + return {nullptr, nullptr, nullptr, nullptr}; +} + +template +const parser::Name *DirectiveAttributeVisitor::GetLoopIndex( + const parser::DoConstruct &x) { + auto &&[iv, lb, ub, step] = GetLoopBounds(x); + return iv; } template @@ -1957,6 +1975,7 @@ bool OmpAttributeVisitor::Pre(const parser::OpenMPLoopConstruct &x) { } } } + CheckPerfectNestAndRectangularLoop(x); PrivatizeAssociatedLoopIndexAndCheckLoopLevel(x); ordCollapseLevel = GetNumAffectedLoopsFromLoopConstruct(x) + 1; return true; @@ -2152,6 +2171,121 @@ void OmpAttributeVisitor::CollectNumAffectedLoopsFromClauses( } } +void OmpAttributeVisitor::CheckPerfectNestAndRectangularLoop( + const parser::OpenMPLoopConstruct + &x) { // GetAssociatedLoopLevelFromClauses(clauseList); + auto &&dirContext = GetContext(); + std::int64_t dirDepth{dirContext.associatedLoopLevel}; + if (dirDepth <= 0) + return; + + Symbol::Flag ivDSA; + if (!llvm::omp::allSimdSet.test(GetContext().directive)) { + ivDSA = Symbol::Flag::OmpPrivate; + } else if (dirDepth == 1) { + ivDSA = Symbol::Flag::OmpLinear; + } else { + ivDSA = Symbol::Flag::OmpLastPrivate; + } + + auto checkExprHasSymbols = [&](llvm::SmallVector &ivs, + const parser::ScalarExpr *bound) { + if (ivs.empty()) + return; + + if (auto boundExpr{semantics::AnalyzeExpr(context_, *bound)}) { + semantics::UnorderedSymbolSet boundSyms = + evaluate::CollectSymbols(*boundExpr); + for (auto iv : ivs) { + if (boundSyms.count(*iv) != 0) { + // TODO: Point to occurence of iv in boundExpr, directiveSource as a + // note + context_.Say(dirContext.directiveSource, + "Trip count must be computable and invariant"_err_en_US); + } + } + } + }; + + // Skip over loop transformation directives + const parser::OpenMPLoopConstruct *innerMostLoop = &x; + const parser::NestedConstruct *innerMostNest = nullptr; + while (auto &optLoopCons{ + std::get>(innerMostLoop->t)}) { + innerMostNest = &(optLoopCons.value()); + if (const auto *innerLoop{ + std::get_if>( + innerMostNest)}) { + innerMostLoop = &(innerLoop->value()); + } else + break; + } + + if (!innerMostNest) + return; + const auto &outer{std::get_if(innerMostNest)}; + if (!outer) + return; + + llvm::SmallVector ivs; + int curLevel = 0; + const parser::DoConstruct *loop{outer}; + while (true) { + auto [iv, lb, ub, step] = GetLoopBounds(*loop); + + if (lb) + checkExprHasSymbols(ivs, lb); + if (ub) + checkExprHasSymbols(ivs, ub); + if (step) + checkExprHasSymbols(ivs, step); + if (iv) { + if (auto *symbol{ResolveOmp(*iv, ivDSA, currScope())}) + ivs.push_back(symbol); + } + + // Stop after processing all affected loops + if (curLevel + 1 >= dirDepth) + break; + + // Recurse into nested loop + const auto &block{std::get(loop->t)}; + if (block.empty()) { + // Insufficient number of nested loops already reported by + // CheckAssocLoopLevel() + break; + } + + loop = GetDoConstructIf(block.front()); + if (!loop) { + // Insufficient number of nested loops already reported by + // CheckAssocLoopLevel() + break; + } + + auto checkPerfectNest = [&, this]() { + auto blockSize = block.size(); + if (blockSize <= 1) + return; + + if (parser::Unwrap(x)) + blockSize -= 1; + + if (blockSize <= 1) + return; + + // Non-perfectly nested loop + // TODO: Point to non-DO statement, directiveSource as a note + context_.Say(dirContext.directiveSource, + "Canonical loop nest must be perfectly nested."_err_en_US); + }; + + checkPerfectNest(); + + ++curLevel; + } +} + // 2.15.1.1 Data-sharing Attribute Rules - Predetermined // - The loop iteration variable(s) in the associated do-loop(s) of a do, // parallel do, taskloop, or distribute construct is (are) private. diff --git a/flang/test/Semantics/OpenMP/do08.f90 b/flang/test/Semantics/OpenMP/do08.f90 index 5143dff0dd315..bb3c1d0cd3855 100644 --- a/flang/test/Semantics/OpenMP/do08.f90 +++ b/flang/test/Semantics/OpenMP/do08.f90 @@ -61,6 +61,7 @@ program omp !$omp end do + !ERROR: Canonical loop nest must be perfectly nested. !ERROR: The value of the parameter in the COLLAPSE or ORDERED clause must not be larger than the number of nested loops following the construct. !$omp do collapse(3) do 60 i=2,200,2 diff --git a/flang/test/Semantics/OpenMP/do13.f90 b/flang/test/Semantics/OpenMP/do13.f90 index 6e9d1dddade4c..8f7844f4136f9 100644 --- a/flang/test/Semantics/OpenMP/do13.f90 +++ b/flang/test/Semantics/OpenMP/do13.f90 @@ -59,6 +59,7 @@ program omp !$omp end do + !ERROR: Canonical loop nest must be perfectly nested. !ERROR: The value of the parameter in the COLLAPSE or ORDERED clause must not be larger than the number of nested loops following the construct. !$omp do collapse(3) do 60 i=1,10 diff --git a/flang/test/Semantics/OpenMP/do22.f90 b/flang/test/Semantics/OpenMP/do22.f90 new file mode 100644 index 0000000000000..9d96d3af54e5c --- /dev/null +++ b/flang/test/Semantics/OpenMP/do22.f90 @@ -0,0 +1,73 @@ +! RUN: %python %S/../test_errors.py %s %flang -fopenmp +! Check for existence of loop following a DO directive + +subroutine do_imperfectly_nested_before + integer i, j + + !ERROR: The value of the parameter in the COLLAPSE or ORDERED clause must not be larger than the number of nested loops following the construct. + !$omp do collapse(2) + do i = 1, 10 + print *, i + do j = 1, 10 + print *, i, j + end do + end do + !$omp end do +end subroutine + + +subroutine do_imperfectly_nested_behind + integer i, j + + !ERROR: Canonical loop nest must be perfectly nested. + !$omp do collapse(2) + do i = 1, 10 + do j = 1, 10 + print *, i, j + end do + print *, i + end do + !$omp end do +end subroutine + + +subroutine do_nonrectangular_lb + integer i, j + + !ERROR: Trip count must be computable and invariant + !$omp do collapse(2) + do i = 1, 10 + do j = i, 10 + print *, i, j + end do + end do + !$omp end do +end subroutine + + +subroutine do_nonrectangular_ub + integer i, j + + !ERROR: Trip count must be computable and invariant + !$omp do collapse(2) + do i = 1, 10 + do j = 0, i + print *, i, j + end do + end do + !$omp end do +end subroutine + + +subroutine do_nonrectangular_step + integer i, j + + !ERROR: Trip count must be computable and invariant + !$omp do collapse(2) + do i = 1, 10 + do j = 1, 10, i + print *, i, j + end do + end do + !$omp end do +end subroutine From ff4eccb41e2973e1179734dd455f1e797e89d9be Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Tue, 23 Sep 2025 14:45:45 +0200 Subject: [PATCH 04/12] Add omp.tile operation --- .../mlir/Dialect/OpenMP/OpenMPClauses.td | 29 ++ .../mlir/Dialect/OpenMP/OpenMPOpBase.td | 69 +++- mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td | 63 ++- mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp | 387 +++++++++++++++--- .../OpenMP/OpenMPToLLVMIRTranslation.cpp | 42 ++ .../Dialect/OpenMP/cli-canonical_loop.mlir | 127 ++++-- mlir/test/Dialect/OpenMP/cli-tile.mlir | 138 +++++++ .../Dialect/OpenMP/cli-unroll-heuristic.mlir | 28 +- mlir/test/Dialect/OpenMP/invalid-tile.mlir | 119 ++++++ .../test/Target/LLVMIR/openmp-cli-tile01.mlir | 101 +++++ .../test/Target/LLVMIR/openmp-cli-tile02.mlir | 190 +++++++++ mlir/test/mlir-tblgen/op-format-invalid.td | 2 +- .../tools/mlir-tblgen/AttrOrTypeFormatGen.cpp | 1 + mlir/tools/mlir-tblgen/FormatGen.cpp | 2 +- mlir/tools/mlir-tblgen/OpFormatGen.cpp | 1 + 15 files changed, 1157 insertions(+), 142 deletions(-) create mode 100644 mlir/test/Dialect/OpenMP/cli-tile.mlir create mode 100644 mlir/test/Dialect/OpenMP/invalid-tile.mlir create mode 100644 mlir/test/Target/LLVMIR/openmp-cli-tile01.mlir create mode 100644 mlir/test/Target/LLVMIR/openmp-cli-tile02.mlir diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td index 1eda5e4bc1618..8e43c4284d078 100644 --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td @@ -995,6 +995,35 @@ class OpenMP_NumTeamsClauseSkip< def OpenMP_NumTeamsClause : OpenMP_NumTeamsClauseSkip<>; +//===----------------------------------------------------------------------===// +// V5.1: [10.1.2] `sizes` clause +//===----------------------------------------------------------------------===// + +class OpenMP_SizesClauseSkip< + bit traits = false, bit arguments = false, bit assemblyFormat = false, + bit description = false, bit extraClassDeclaration = false + > : OpenMP_Clause { + let arguments = (ins + Variadic:$sizes + ); + + let optAssemblyFormat = [{ + `sizes` `(` $sizes `:` type($sizes) `)` + }]; + + let description = [{ + The `sizes` clauses defines the size of a grid over a multi-dimensional + logical iteration space. This grid is used for loop transformations such as + `tile` and `strip`. The size per dimension can be a variable, but only + values that are not at least 2 make sense. It is not specified what happens + when smaller values are used, but should still result in a loop nest that + executes each logical iteration once. + }]; +} + +def OpenMP_SizesClause : OpenMP_SizesClauseSkip<>; + //===----------------------------------------------------------------------===// // V5.2: [10.1.2] `num_threads` clause //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOpBase.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOpBase.td index bbcfb87fa03c6..5ad4e4b5b61d1 100644 --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOpBase.td +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOpBase.td @@ -38,6 +38,44 @@ def OpenMP_MapBoundsType : OpenMP_Type<"MapBounds", "map_bounds_ty"> { let summary = "Type for representing omp map clause bounds information"; } +//===---------------------------------------------------------------------===// +// OpenMP Canonical Loop Info Type +//===---------------------------------------------------------------------===// + +def CanonicalLoopInfoType : OpenMP_Type<"CanonicalLoopInfo", "cli"> { + let summary = "Type for representing a reference to a canonical loop"; + let description = [{ + A variable of type CanonicalLoopInfo refers to an OpenMP-compatible + canonical loop in the same function. Values of this type are not + available at runtime and therefore cannot be used by the program itself, + i.e. an opaque type. It is similar to the transform dialect's + `!transform.interface` type, but instead of implementing an interface + for each transformation, the OpenMP dialect itself defines possible + operations on this type. + + A value of type CanonicalLoopInfoType (in the following: CLI) value can be + + 1. created by omp.new_cli. + 2. passed to omp.canonical_loop to associate the loop to that CLI. A CLI + can only be associated once. + 3. passed to an omp loop transformation operation that modifies the loop + associated with the CLI. The CLI is the "applyee" and the operation is + the consumer. A CLI can only be consumed once. + 4. passed to an omp loop transformation operation to associate the cli with + a result of that transformation. The CLI is the "generatee" and the + operation is the generator. + + A CLI cannot + + 1. be returned from a function. + 2. be passed to operations that are not specifically designed to take a + CanonicalLoopInfoType, including AnyType. + + A CLI directly corresponds to an object of + OpenMPIRBuilder's CanonicalLoopInfo struct when lowering to LLVM-IR. + }]; +} + //===----------------------------------------------------------------------===// // Base classes for OpenMP dialect operations. //===----------------------------------------------------------------------===// @@ -211,8 +249,35 @@ class OpenMP_Op traits = [], // Doesn't actually create a C++ base class (only defines default values for // tablegen classes that derive from this). Use LoopTransformationInterface // instead for common operations. -class OpenMPTransform_Op traits = []> : - OpenMP_Op], traits) > { +class OpenMPTransform_Op traits = [], + list clauses = []> : + OpenMP_Op], traits), + clauses = clauses> { +} + +// Base clause for loop transformations using the standard syntax. +// +// omp.opname ($generatees) <- ($applyees) clause(...) clause(...) ... +// omp.opname ($applyees) clause(...) clause(...) ... +// +// $generatees is optional and is assumed to be empty if omitted +class OpenMPTransformBase_Op traits = [], + list clauses = []> : + OpenMPTransform_Op { + + let arguments = !con( + (ins Variadic:$generatees, + Variadic:$applyees + ), clausesArgs); + + let assemblyFormat = [{ custom($generatees, $applyees) }] + # clausesAssemblyFormat + # [{ attr-dict }]; } #endif // OPENMP_OP_BASE diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td index 5c77e215467e4..b73091ea0ca53 100644 --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td @@ -357,44 +357,6 @@ def SingleOp : OpenMP_Op<"single", traits = [ let hasVerifier = 1; } -//===---------------------------------------------------------------------===// -// OpenMP Canonical Loop Info Type -//===---------------------------------------------------------------------===// - -def CanonicalLoopInfoType : OpenMP_Type<"CanonicalLoopInfo", "cli"> { - let summary = "Type for representing a reference to a canonical loop"; - let description = [{ - A variable of type CanonicalLoopInfo refers to an OpenMP-compatible - canonical loop in the same function. Values of this type are not - available at runtime and therefore cannot be used by the program itself, - i.e. an opaque type. It is similar to the transform dialect's - `!transform.interface` type, but instead of implementing an interface - for each transformation, the OpenMP dialect itself defines possible - operations on this type. - - A value of type CanonicalLoopInfoType (in the following: CLI) value can be - - 1. created by omp.new_cli. - 2. passed to omp.canonical_loop to associate the loop to that CLI. A CLI - can only be associated once. - 3. passed to an omp loop transformation operation that modifies the loop - associated with the CLI. The CLI is the "applyee" and the operation is - the consumer. A CLI can only be consumed once. - 4. passed to an omp loop transformation operation to associate the cli with - a result of that transformation. The CLI is the "generatee" and the - operation is the generator. - - A CLI cannot - - 1. be returned from a function. - 2. be passed to operations that are not specifically designed to take a - CanonicalLoopInfoType, including AnyType. - - A CLI directly corresponds to an object of - OpenMPIRBuilder's CanonicalLoopInfo struct when lowering to LLVM-IR. - }]; -} - //===---------------------------------------------------------------------===// // OpenMP Canonical Loop Info Creation //===---------------------------------------------------------------------===// @@ -563,6 +525,31 @@ def UnrollHeuristicOp : OpenMPTransform_Op<"unroll_heuristic", []> { let hasCustomAssemblyFormat = 1; } +//===----------------------------------------------------------------------===// +// OpenMP tile operation +//===----------------------------------------------------------------------===// + +def TileOp : OpenMPTransformBase_Op<"tile", + clauses = [OpenMP_SizesClause]> { + let summary = "OpenMP tile operation"; + let description = [{ + Represents the OpenMP tile directive introduced in OpenMP 5.1. + + The construct partitions the logical iteration space of the affected loops + into equally-sized tiles, then creates two sets of nested loops. The outer + loops, called the grid loops, iterate over all tiles. The inner loops, + called the intratile loops, iterate over the logical iterations of a tile. + The sizes clause determines the size of a tile. + + Currently, the affected loops must be rectangular (the tripcount of the + inner loop must not depend on any iv of an surrounding affected loop) and + perfectly nested (except for the innermost affected loop, no operations + other than the nested loop and the terminator in the loop body). + }] # clausesDescription; + + let hasVerifier = 1; +} + //===----------------------------------------------------------------------===// // 2.8.3 Workshare Construct //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp index 3d70e28ed23ab..d3cc7e55ae155 100644 --- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp +++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp @@ -33,6 +33,7 @@ #include "llvm/ADT/TypeSwitch.h" #include "llvm/ADT/bit.h" #include "llvm/Frontend/OpenMP/OMPConstants.h" +#include "llvm/Support/InterleavedRange.h" #include #include #include @@ -77,6 +78,177 @@ struct LLVMPointerPointerLikeModel }; } // namespace +/// Generate a name of a canonical loop nest of the format +/// `(_s_r)*` that describes its nesting inside parent +/// operations (`_r`) and that operation's region (`_s`). The region +/// number is omitted if the parent operation has just one region. If a loop +/// nest just consists of canonical loops nested inside each other, also uses +/// `d` where is the nesting depth of the loop. +static std::string generateLoopNestingName(StringRef prefix, + CanonicalLoopOp op) { + struct Component { + // An region argument of an operation + Operation *parentOp; + size_t regionInOpIdx; + bool isOnlyRegionInOp; + bool skipRegion; + + // An operation somewhere in a parent region + Operation *thisOp; + Region *parentRegion; + size_t opInRegionIdx; + bool isOnlyOpInRegion; + bool skipOp; + int depth = -1; + }; + SmallVector components; + + // Gather a list of parent regions and operations, and the position within + // their parent + Operation *o = op.getOperation(); + while (o) { + if (o->hasTrait()) + break; + + // Operation within a region + Region *r = o->getParentRegion(); + if (!r) + break; + + llvm::ReversePostOrderTraversal traversal(&r->getBlocks().front()); + size_t idx = 0; + bool found = false; + size_t sequentialIdx = -1; + bool isOnlyLoop = true; + for (Block *b : traversal) { + for (Operation &op : *b) { + if (&op == o && !found) { + sequentialIdx = idx; + found = true; + } + if (op.getNumRegions()) { + idx += 1; + if (idx > 1) + isOnlyLoop = false; + } + if (found && !isOnlyLoop) + break; + } + } + + Component &comp = components.emplace_back(); + comp.thisOp = o; + comp.parentRegion = r; + comp.opInRegionIdx = sequentialIdx; + comp.isOnlyOpInRegion = isOnlyLoop; + + // Region argument of an operation + Operation *parent = r->getParentOp(); + + comp.parentOp = parent; + comp.regionInOpIdx = 0; + comp.isOnlyRegionInOp = true; + if (parent && parent->getRegions().size() > 1) { + auto getRegionIndex = [](Operation *o, Region *r) { + for (auto [idx, region] : llvm::enumerate(o->getRegions())) { + if (®ion == r) + return idx; + } + llvm_unreachable("Region not child of its parent operation"); + }; + comp.regionInOpIdx = getRegionIndex(parent, r); + comp.isOnlyRegionInOp = false; + } + + if (!parent) + break; + + // next parent + o = parent; + } + + // Reorder components from outermost to innermost + std::reverse(components.begin(), components.end()); + + // Determine whether a component is not needed + for (Component &c : components) { + c.skipRegion = c.isOnlyRegionInOp; + c.skipOp = c.isOnlyOpInRegion && !isa(c.thisOp); + } + + // Find runs of perfect nests and merge them into a single component + size_t curNestRoot = 0; + size_t curNestDepth = 1; + auto mergeLoopNest = [&](size_t innermost) { + size_t outermost = curNestRoot; + + // Don't do enything if it does not consist of at least 2 loops + if (outermost < innermost) { + for (auto i : llvm::seq(outermost + 1, innermost)) + components[i].skipOp = true; + components[innermost].depth = curNestDepth; + } + + // Start new root + curNestRoot = innermost + 1; + curNestDepth = 1; + }; + for (auto &&[i, c] : llvm::enumerate(components)) { + if (i <= curNestRoot) + continue; + + // Check whether this region can be included + if (!c.skipRegion) { + mergeLoopNest(i); + continue; + } + + if (c.skipOp) + continue; + + if (!c.isOnlyOpInRegion) { + mergeLoopNest(i); + continue; + } + + curNestDepth += 1; + } + + // Finalize innermost loop nest + mergeLoopNest(components.size() - 1); + + // Outermost loop does not need a suffix if it has no sibling + for (auto &c : components) { + if (c.skipOp) + continue; + if (c.isOnlyOpInRegion) + c.skipOp = true; + break; + } + + // Compile name + SmallString<64> Name{prefix}; + for (auto &c : components) { + auto addComponent = [&Name](char letter, int64_t idx) { + Name += '_'; + Name += letter; + Name += idx; + }; + + if (!c.skipRegion) + addComponent('r', c.regionInOpIdx); + + if (!c.skipOp) { + if (c.depth >= 0) + addComponent('d', c.depth - 1); + else + addComponent('s', c.opInRegionIdx); + } + } + + return Name.str().str(); +} + void OpenMPDialect::initialize() { addOperations< #define GET_OP_LIST @@ -3141,71 +3313,29 @@ void NewCliOp::getAsmResultNames(OpAsmSetValueNameFn setNameFn) { cliName = TypeSwitch(gen->getOwner()) .Case([&](CanonicalLoopOp op) { - // Find the canonical loop nesting: For each ancestor add a - // "+_r" suffix (in reverse order) - SmallVector components; - Operation *o = op.getOperation(); - while (o) { - if (o->hasTrait()) - break; - - Region *r = o->getParentRegion(); - if (!r) - break; - - auto getSequentialIndex = [](Region *r, Operation *o) { - llvm::ReversePostOrderTraversal traversal( - &r->getBlocks().front()); - size_t idx = 0; - for (Block *b : traversal) { - for (Operation &op : *b) { - if (&op == o) - return idx; - // Only consider operations that are containers as - // possible children - if (!op.getRegions().empty()) - idx += 1; - } - } - llvm_unreachable("Operation not part of the region"); - }; - size_t sequentialIdx = getSequentialIndex(r, o); - components.push_back(("s" + Twine(sequentialIdx)).str()); - - Operation *parent = r->getParentOp(); - if (!parent) - break; - - // If the operation has more than one region, also count in - // which of the regions - if (parent->getRegions().size() > 1) { - auto getRegionIndex = [](Operation *o, Region *r) { - for (auto [idx, region] : - llvm::enumerate(o->getRegions())) { - if (®ion == r) - return idx; - } - llvm_unreachable("Region not child its parent operation"); - }; - size_t regionIdx = getRegionIndex(parent, r); - components.push_back(("r" + Twine(regionIdx)).str()); - } - - // next parent - o = parent; - } - - SmallString<64> Name("canonloop"); - for (const std::string &s : reverse(components)) { - Name += '_'; - Name += s; - } - - return Name; + return generateLoopNestingName("canonloop", op); }) .Case([&](UnrollHeuristicOp op) -> std::string { llvm_unreachable("heuristic unrolling does not generate a loop"); }) + .Case([&](TileOp op) -> std::string { + auto [generateesFirst, generateesCount] = + op.getGenerateesODSOperandIndexAndLength(); + unsigned firstGrid = generateesFirst; + unsigned firstIntratile = generateesFirst + generateesCount / 2; + unsigned end = generateesFirst + generateesCount; + unsigned opnum = gen->getOperandNumber(); + // In the OpenMP apply and looprange clauses, indices are 1-based + if (firstGrid <= opnum && opnum < firstIntratile) { + unsigned gridnum = opnum - firstGrid + 1; + return ("grid" + Twine(gridnum)).str(); + } + if (firstIntratile <= opnum && opnum < end) { + unsigned intratilenum = opnum - firstIntratile + 1; + return ("intratile" + Twine(intratilenum)).str(); + } + llvm_unreachable("Unexpected generatee argument"); + }) .Default([&](Operation *op) { assert(false && "TODO: Custom name for this operation"); return "transformed"; @@ -3292,7 +3422,8 @@ void CanonicalLoopOp::getAsmBlockNames(OpAsmSetBlockNameFn setNameFn) { void CanonicalLoopOp::getAsmBlockArgumentNames(Region ®ion, OpAsmSetValueNameFn setNameFn) { - setNameFn(region.getArgument(0), "iv"); + std::string ivName = generateLoopNestingName("iv", *this); + setNameFn(region.getArgument(0), ivName); } void CanonicalLoopOp::print(OpAsmPrinter &p) { @@ -3433,6 +3564,138 @@ UnrollHeuristicOp::getGenerateesODSOperandIndexAndLength() { return {0, 0}; } +//===----------------------------------------------------------------------===// +// TileOp +//===----------------------------------------------------------------------===// + +static void printLoopTransformClis(OpAsmPrinter &p, TileOp op, + OperandRange generatees, + OperandRange applyees) { + if (!generatees.empty()) + p << '(' << llvm::interleaved(generatees) << ')'; + + if (!applyees.empty()) + p << " <- (" << llvm::interleaved(applyees) << ')'; +} + +static ParseResult parseLoopTransformClis( + OpAsmParser &parser, + SmallVectorImpl &generateesOperands, + SmallVectorImpl &applyeesOperands) { + if (parser.parseOptionalLess()) { + // Syntax 1: generatees present + + if (parser.parseOperandList(generateesOperands, + mlir::OpAsmParser::Delimiter::Paren)) + return failure(); + + if (parser.parseLess()) + return failure(); + } else { + // Syntax 2: generatees omitted + } + + // Parse `<-` (`<` has already been parsed) + if (parser.parseMinus()) + return failure(); + + if (parser.parseOperandList(applyeesOperands, + mlir::OpAsmParser::Delimiter::Paren)) + return failure(); + + return success(); +} + +LogicalResult TileOp::verify() { + if (getApplyees().empty()) + return emitOpError() << "must apply to at least one loop"; + + if (getSizes().size() != getApplyees().size()) + return emitOpError() << "there must be one tile size for each applyee"; + + if (!getGeneratees().empty() && + 2 * getSizes().size() != getGeneratees().size()) + return emitOpError() + << "expecting two times the number of generatees than applyees"; + + DenseSet parentIVs; + + Value parent = getApplyees().front(); + for (auto &&applyee : llvm::drop_begin(getApplyees())) { + auto [parentCreate, parentGen, parentCons] = decodeCli(parent); + auto [create, gen, cons] = decodeCli(applyee); + + if (!parentGen) + return emitOpError() << "applyee CLI has no generator"; + + auto parentLoop = dyn_cast_or_null(parentGen->getOwner()); + if (!parentGen) + return emitOpError() + << "currently only supports omp.canonical_loop as applyee"; + + parentIVs.insert(parentLoop.getInductionVar()); + + if (!gen) + return emitOpError() << "applyee CLI has no generator"; + auto loop = dyn_cast_or_null(gen->getOwner()); + if (!loop) + return emitOpError() + << "currently only supports omp.canonical_loop as applyee"; + + // Canonical loop must be perfectly nested, i.e. the body of the parent must + // only contain the omp.canonical_loop of the nested loops, and + // omp.terminator + bool isPerfectlyNested = [&]() { + auto &parentBody = parentLoop.getRegion(); + if (!parentBody.hasOneBlock()) + return false; + auto &parentBlock = parentBody.getBlocks().front(); + + auto nestedLoopIt = parentBlock.begin(); + if (nestedLoopIt == parentBlock.end() || + (&*nestedLoopIt != loop.getOperation())) + return false; + + auto termIt = std::next(nestedLoopIt); + if (termIt == parentBlock.end() || !isa(termIt)) + return false; + + if (std::next(termIt) != parentBlock.end()) + return false; + + return true; + }(); + if (!isPerfectlyNested) + return emitOpError() << "tiled loop nest must be perfectly nested"; + + if (parentIVs.contains(loop.getTripCount())) + return emitOpError() << "tiled loop nest must be rectangular"; + + parent = applyee; + } + + // TODO: The tile sizes must be computed before the loop, but checking this + // requires dominance analysis. For instance: + // + // %canonloop = omp.new_cli + // omp.canonical_loop(%canonloop) %iv : i32 in range(%tc) { + // // write to %x + // omp.terminator + // } + // %ts = llvm.load %x + // omp.tile <- (%canonloop) sizes(%ts : i32) + + return success(); +} + +std::pair TileOp ::getApplyeesODSOperandIndexAndLength() { + return getODSOperandIndexAndLength(odsIndex_applyees); +} + +std::pair TileOp::getGenerateesODSOperandIndexAndLength() { + return getODSOperandIndexAndLength(odsIndex_generatees); +} + //===----------------------------------------------------------------------===// // Critical construct (2.17.1) //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index 4921a1990b6e8..171ac61dd66fe 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -3154,6 +3154,45 @@ applyUnrollHeuristic(omp::UnrollHeuristicOp op, llvm::IRBuilderBase &builder, return success(); } +/// Apply a `#pragma omp tile` / `!$omp tile` transformation using the +/// OpenMPIRBuilder. +static LogicalResult applyTile(omp::TileOp op, llvm::IRBuilderBase &builder, + LLVM::ModuleTranslation &moduleTranslation) { + llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); + llvm::OpenMPIRBuilder::LocationDescription loc(builder); + + SmallVector translatedLoops; + SmallVector translatedSizes; + + for (Value size : op.getSizes()) { + llvm::Value *translatedSize = moduleTranslation.lookupValue(size); + assert(translatedSize && + "sizes clause arguments must already be translated"); + translatedSizes.push_back(translatedSize); + } + + for (Value applyee : op.getApplyees()) { + llvm::CanonicalLoopInfo *consBuilderCLI = + moduleTranslation.lookupOMPLoop(applyee); + assert(applyee && "Canonical loop must already been translated"); + translatedLoops.push_back(consBuilderCLI); + } + + auto generatedLoops = + ompBuilder->tileLoops(loc.DL, translatedLoops, translatedSizes); + if (!op.getGeneratees().empty()) { + for (auto [mlirLoop, genLoop] : + zip_equal(op.getGeneratees(), generatedLoops)) + moduleTranslation.mapOmpLoop(mlirLoop, genLoop); + } + + // CLIs can only be consumed once + for (Value applyee : op.getApplyees()) + moduleTranslation.invalidateOmpLoop(applyee); + + return success(); +} + /// Convert an Atomic Ordering attribute to llvm::AtomicOrdering. static llvm::AtomicOrdering convertAtomicOrdering(std::optional ao) { @@ -6196,6 +6235,9 @@ convertHostOrTargetOperation(Operation *op, llvm::IRBuilderBase &builder, // the omp.canonical_loop. return applyUnrollHeuristic(op, builder, moduleTranslation); }) + .Case([&](omp::TileOp op) { + return applyTile(op, builder, moduleTranslation); + }) .Case([&](omp::TargetAllocMemOp) { return convertTargetAllocMemOp(*op, builder, moduleTranslation); }) diff --git a/mlir/test/Dialect/OpenMP/cli-canonical_loop.mlir b/mlir/test/Dialect/OpenMP/cli-canonical_loop.mlir index adadb8bbac49d..874e3922805ec 100644 --- a/mlir/test/Dialect/OpenMP/cli-canonical_loop.mlir +++ b/mlir/test/Dialect/OpenMP/cli-canonical_loop.mlir @@ -1,5 +1,5 @@ -// RUN: mlir-opt %s | FileCheck %s -// RUN: mlir-opt %s | mlir-opt | FileCheck %s +// RUN: mlir-opt %s | FileCheck %s --enable-var-scope +// RUN: mlir-opt %s | mlir-opt | FileCheck %s --enable-var-scope // CHECK-LABEL: @omp_canonloop_raw( @@ -24,10 +24,10 @@ func.func @omp_canonloop_raw(%tc : i32) -> () { func.func @omp_canonloop_sequential_raw(%tc : i32) -> () { // CHECK-NEXT: %canonloop_s0 = omp.new_cli %canonloop_s0 = "omp.new_cli" () : () -> (!omp.cli) - // CHECK-NEXT: omp.canonical_loop(%canonloop_s0) %iv : i32 in range(%[[tc]]) { + // CHECK-NEXT: omp.canonical_loop(%canonloop_s0) %iv_s0 : i32 in range(%[[tc]]) { "omp.canonical_loop" (%tc, %canonloop_s0) ({ ^bb_first(%iv_first: i32): - // CHECK-NEXT: = llvm.add %iv, %iv : i32 + // CHECK-NEXT: = llvm.add %iv_s0, %iv_s0 : i32 %newval = llvm.add %iv_first, %iv_first : i32 // CHECK-NEXT: omp.terminator omp.terminator @@ -36,7 +36,7 @@ func.func @omp_canonloop_sequential_raw(%tc : i32) -> () { // CHECK-NEXT: %canonloop_s1 = omp.new_cli %canonloop_s1 = "omp.new_cli" () : () -> (!omp.cli) - // CHECK-NEXT: omp.canonical_loop(%canonloop_s1) %iv : i32 in range(%[[tc]]) { + // CHECK-NEXT: omp.canonical_loop(%canonloop_s1) %iv_s1 : i32 in range(%[[tc]]) { "omp.canonical_loop" (%tc, %canonloop_s1) ({ ^bb_second(%iv_second: i32): // CHECK: omp.terminator @@ -52,17 +52,17 @@ func.func @omp_canonloop_sequential_raw(%tc : i32) -> () { // CHECK-LABEL: @omp_nested_canonloop_raw( // CHECK-SAME: %[[tc_outer:.+]]: i32, %[[tc_inner:.+]]: i32) func.func @omp_nested_canonloop_raw(%tc_outer : i32, %tc_inner : i32) -> () { - // CHECK-NEXT: %canonloop_s0 = omp.new_cli + // CHECK-NEXT: %canonloop = omp.new_cli %outer = "omp.new_cli" () : () -> (!omp.cli) - // CHECK-NEXT: %canonloop_s0_s0 = omp.new_cli + // CHECK-NEXT: %canonloop_d1 = omp.new_cli %inner = "omp.new_cli" () : () -> (!omp.cli) - // CHECK-NEXT: omp.canonical_loop(%canonloop_s0) %iv : i32 in range(%[[tc_outer]]) { + // CHECK-NEXT: omp.canonical_loop(%canonloop) %iv : i32 in range(%[[tc_outer]]) { "omp.canonical_loop" (%tc_outer, %outer) ({ ^bb_outer(%iv_outer: i32): - // CHECK-NEXT: omp.canonical_loop(%canonloop_s0_s0) %iv_0 : i32 in range(%[[tc_inner]]) { + // CHECK-NEXT: omp.canonical_loop(%canonloop_d1) %iv_d1 : i32 in range(%[[tc_inner]]) { "omp.canonical_loop" (%tc_inner, %inner) ({ ^bb_inner(%iv_inner: i32): - // CHECK-NEXT: = llvm.add %iv, %iv_0 : i32 + // CHECK-NEXT: = llvm.add %iv, %iv_d1 : i32 %newval = llvm.add %iv_outer, %iv_inner: i32 // CHECK-NEXT: omp.terminator omp.terminator @@ -108,16 +108,24 @@ func.func @omp_canonloop_constant_pretty() -> () { func.func @omp_canonloop_sequential_pretty(%tc : i32) -> () { // CHECK-NEXT: %canonloop_s0 = omp.new_cli %canonloop_s0 = omp.new_cli - // CHECK-NEXT: omp.canonical_loop(%canonloop_s0) %iv : i32 in range(%[[tc]]) { - omp.canonical_loop(%canonloop_s0) %iv : i32 in range(%tc) { + // CHECK-NEXT: omp.canonical_loop(%canonloop_s0) %iv_s0 : i32 in range(%[[tc]]) { + omp.canonical_loop(%canonloop_s0) %iv_s0 : i32 in range(%tc) { // CHECK-NEXT: omp.terminator omp.terminator } // CHECK: %canonloop_s1 = omp.new_cli %canonloop_s1 = omp.new_cli - // CHECK-NEXT: omp.canonical_loop(%canonloop_s1) %iv : i32 in range(%[[tc]]) { - omp.canonical_loop(%canonloop_s1) %iv_0 : i32 in range(%tc) { + // CHECK-NEXT: omp.canonical_loop(%canonloop_s1) %iv_s1 : i32 in range(%[[tc]]) { + omp.canonical_loop(%canonloop_s1) %iv_s1 : i32 in range(%tc) { + // CHECK-NEXT: omp.terminator + omp.terminator + } + + // CHECK: %canonloop_s2 = omp.new_cli + %canonloop_s2 = omp.new_cli + // CHECK-NEXT: omp.canonical_loop(%canonloop_s2) %iv_s2 : i32 in range(%[[tc]]) { + omp.canonical_loop(%canonloop_s2) %iv_s2 : i32 in range(%tc) { // CHECK-NEXT: omp.terminator omp.terminator } @@ -126,17 +134,17 @@ func.func @omp_canonloop_sequential_pretty(%tc : i32) -> () { } -// CHECK-LABEL: @omp_canonloop_nested_pretty( +// CHECK-LABEL: @omp_canonloop_2d_nested_pretty( // CHECK-SAME: %[[tc:.+]]: i32) -func.func @omp_canonloop_nested_pretty(%tc : i32) -> () { - // CHECK-NEXT: %canonloop_s0 = omp.new_cli - %canonloop_s0 = omp.new_cli - // CHECK-NEXT: %canonloop_s0_s0 = omp.new_cli - %canonloop_s0_s0 = omp.new_cli - // CHECK-NEXT: omp.canonical_loop(%canonloop_s0) %iv : i32 in range(%[[tc]]) { - omp.canonical_loop(%canonloop_s0) %iv : i32 in range(%tc) { - // CHECK-NEXT: omp.canonical_loop(%canonloop_s0_s0) %iv_0 : i32 in range(%[[tc]]) { - omp.canonical_loop(%canonloop_s0_s0) %iv_0 : i32 in range(%tc) { +func.func @omp_canonloop_2d_nested_pretty(%tc : i32) -> () { + // CHECK-NEXT: %canonloop = omp.new_cli + %canonloop = omp.new_cli + // CHECK-NEXT: %canonloop_d1 = omp.new_cli + %canonloop_d1 = omp.new_cli + // CHECK-NEXT: omp.canonical_loop(%canonloop) %iv : i32 in range(%[[tc]]) { + omp.canonical_loop(%canonloop) %iv : i32 in range(%tc) { + // CHECK-NEXT: omp.canonical_loop(%canonloop_d1) %iv_d1 : i32 in range(%[[tc]]) { + omp.canonical_loop(%canonloop_d1) %iv_d1 : i32 in range(%tc) { // CHECK: omp.terminator omp.terminator } @@ -147,6 +155,77 @@ func.func @omp_canonloop_nested_pretty(%tc : i32) -> () { } +// CHECK-LABEL: @omp_canonloop_3d_nested_pretty( +// CHECK-SAME: %[[tc:.+]]: i32) +func.func @omp_canonloop_3d_nested_pretty(%tc : i32) -> () { + // CHECK: %canonloop = omp.new_cli + %canonloop = omp.new_cli + // CHECK: %canonloop_d1 = omp.new_cli + %canonloop_d1 = omp.new_cli + // CHECK: %canonloop_d2 = omp.new_cli + %canonloop_d2 = omp.new_cli + // CHECK-NEXT: omp.canonical_loop(%canonloop) %iv : i32 in range(%[[tc]]) { + omp.canonical_loop(%canonloop) %iv : i32 in range(%tc) { + // CHECK-NEXT: omp.canonical_loop(%canonloop_d1) %iv_d1 : i32 in range(%[[tc]]) { + omp.canonical_loop(%canonloop_d1) %iv_1d : i32 in range(%tc) { + // CHECK-NEXT: omp.canonical_loop(%canonloop_d2) %iv_d2 : i32 in range(%[[tc]]) { + omp.canonical_loop(%canonloop_d2) %iv_d2 : i32 in range(%tc) { + // CHECK-NEXT: omp.terminator + omp.terminator + // CHECK-NEXT: } + } + // CHECK-NEXT: omp.terminator + omp.terminator + // CHECK-NEXT: } + } + // CHECK-NEXT: omp.terminator + omp.terminator + } + + return +} + + +// CHECK-LABEL: @omp_canonloop_sequential_nested_pretty( +// CHECK-SAME: %[[tc:.+]]: i32) +func.func @omp_canonloop_sequential_nested_pretty(%tc : i32) -> () { + // CHECK-NEXT: %canonloop_s0 = omp.new_cli + %canonloop_s0 = omp.new_cli + // CHECK-NEXT: %canonloop_s0_d1 = omp.new_cli + %canonloop_s0_d1 = omp.new_cli + // CHECK-NEXT: omp.canonical_loop(%canonloop_s0) %iv_s0 : i32 in range(%[[tc]]) { + omp.canonical_loop(%canonloop_s0) %iv_s0 : i32 in range(%tc) { + // CHECK-NEXT: omp.canonical_loop(%canonloop_s0_d1) %iv_s0_d1 : i32 in range(%[[tc]]) { + omp.canonical_loop(%canonloop_s0_d1) %iv_s0_d1 : i32 in range(%tc) { + // CHECK-NEXT: omp.terminator + omp.terminator + // CHECK-NEXT: } + } + // CHECK-NEXT: omp.terminator + omp.terminator + // CHECK-NEXT: } + } + + // CHECK-NEXT: %canonloop_s1 = omp.new_cli + %canonloop_s1 = omp.new_cli + // CHECK-NEXT: %canonloop_s1_d1 = omp.new_cli + %canonloop_s1_d1 = omp.new_cli + // CHECK-NEXT: omp.canonical_loop(%canonloop_s1) %iv_s1 : i32 in range(%[[tc]]) { + omp.canonical_loop(%canonloop_s1) %iv_s1 : i32 in range(%tc) { + // CHECK-NEXT: omp.canonical_loop(%canonloop_s1_d1) %iv_s1_d1 : i32 in range(%[[tc]]) { + omp.canonical_loop(%canonloop_s1_d1) %iv_s1d1 : i32 in range(%tc) { + // CHECK-NEXT: omp.terminator + omp.terminator + // CHECK-NEXT: } + } + // CHECK-NEXT: omp.terminator + omp.terminator + } + + return +} + + // CHECK-LABEL: @omp_newcli_unused( // CHECK-SAME: ) func.func @omp_newcli_unused() -> () { diff --git a/mlir/test/Dialect/OpenMP/cli-tile.mlir b/mlir/test/Dialect/OpenMP/cli-tile.mlir new file mode 100644 index 0000000000000..73d54784c52b7 --- /dev/null +++ b/mlir/test/Dialect/OpenMP/cli-tile.mlir @@ -0,0 +1,138 @@ +// RUN: mlir-opt %s | FileCheck %s --enable-var-scope +// RUN: mlir-opt %s | mlir-opt | FileCheck %s --enable-var-scope + + +// Raw syntax check (MLIR output is always pretty-printed) +// CHECK-LABEL: @omp_tile_raw( +// CHECK-SAME: %[[tc:.+]]: i32, %[[ts:.+]]: i32) { +func.func @omp_tile_raw(%tc : i32, %ts : i32) -> () { + // CHECK-NEXT: %canonloop = omp.new_cli + %canonloop = "omp.new_cli" () : () -> (!omp.cli) + // CHECK-NEXT: %grid1 = omp.new_cli + %grid = "omp.new_cli" () : () -> (!omp.cli) + // CHECK-NEXT: %intratile1 = omp.new_cli + %intratile = "omp.new_cli" () : () -> (!omp.cli) + // CHECK-NEXT: omp.canonical_loop(%canonloop) %iv : i32 in range(%[[tc]]) { + "omp.canonical_loop" (%tc, %canonloop) ({ + ^bb0(%iv: i32): + // CHECK: omp.terminator + omp.terminator + }) : (i32, !omp.cli) -> () + // CHECK: omp.tile (%grid1, %intratile1) <- (%canonloop) sizes(%[[ts]] : i32) + "omp.tile"(%grid, %intratile, %canonloop, %ts) <{operandSegmentSizes = array}> : (!omp.cli, !omp.cli, !omp.cli, i32) -> () + //"omp.tile" (%canonloop) : (!omp.cli) -> () + return +} + + +// Pretty syntax check +// CHECK-LABEL: @omp_tile_pretty( +// CHECK-SAME: %[[tc:.+]]: i32, %[[ts:.+]]: i32) { +func.func @omp_tile_pretty(%tc : i32, %ts : i32) -> () { + // CHECK-NEXT: %[[CANONLOOP:.+]] = omp.new_cli + %canonloop = omp.new_cli + // CHECK-NEXT: %[[CANONLOOP:.+]] = omp.new_cli + %grid = omp.new_cli + // CHECK-NEXT: %[[CANONLOOP:.+]] = omp.new_cli + %intratile = omp.new_cli + // CHECK-NEXT: omp.canonical_loop(%canonloop) %iv : i32 in range(%[[tc]]) { + omp.canonical_loop(%canonloop) %iv : i32 in range(%tc) { + // CHECK: omp.terminator + omp.terminator + } + // CHECK: omp.tile (%grid1, %intratile1) <- (%canonloop) sizes(%[[ts]] : i32) + omp.tile(%grid, %intratile) <- (%canonloop) sizes(%ts : i32) + return +} + + +// Specifying the generatees for omp.tile is optional +// CHECK-LABEL: @omp_tile_optionalgen_pretty( +// CHECK-SAME: %[[tc:.+]]: i32, %[[ts:.+]]: i32) { +func.func @omp_tile_optionalgen_pretty(%tc : i32, %ts : i32) -> () { + // CHECK-NEXT: %canonloop = omp.new_cli + %canonloop = omp.new_cli + // CHECK-NEXT: omp.canonical_loop(%canonloop) %iv : i32 in range(%[[tc]]) { + omp.canonical_loop(%canonloop) %iv : i32 in range(%tc) { + // CHECK: omp.terminator + omp.terminator + } + // CHECK: omp.tile <- (%canonloop) sizes(%[[ts]] : i32) + omp.tile <- (%canonloop) sizes(%ts : i32) + return +} + + +// Two-dimensional tiling +// CHECK-LABEL: @omp_tile_2d_pretty( +// CHECK-SAME: %[[tc1:.+]]: i32, %[[tc2:.+]]: i32, %[[ts1:.+]]: i32, %[[ts2:.+]]: i32) { +func.func @omp_tile_2d_pretty(%tc1 : i32, %tc2 : i32, %ts1 : i32, %ts2 : i32) -> () { + // CHECK-NEXT: %canonloop = omp.new_cli + %cli_outer = omp.new_cli + // CHECK-NEXT: %canonloop_d1 = omp.new_cli + %cli_inner = omp.new_cli + // CHECK-NEXT: %grid1 = omp.new_cli + %grid1 = omp.new_cli + // CHECK-NEXT: %grid2 = omp.new_cli + %grid2 = omp.new_cli + // CHECK-NEXT: %intratile1 = omp.new_cli + %intratile1 = omp.new_cli + // CHECK-NEXT: %intratile2 = omp.new_cli + %intratile2 = omp.new_cli + // CHECK-NEXT: omp.canonical_loop(%canonloop) %iv : i32 in range(%[[tc1]]) { + omp.canonical_loop(%cli_outer) %iv_outer : i32 in range(%tc1) { + // CHECK-NEXT: omp.canonical_loop(%canonloop_d1) %iv_d1 : i32 in range(%[[tc2]]) { + omp.canonical_loop(%cli_inner) %iv_inner : i32 in range(%tc2) { + // CHECK: omp.terminator + omp.terminator + } + // CHECK: omp.terminator + omp.terminator + } + // CHECK: omp.tile (%grid1, %grid2, %intratile1, %intratile2) <- (%canonloop, %canonloop_d1) sizes(%[[ts1]], %[[ts2]] : i32, i32) + omp.tile (%grid1, %grid2, %intratile1, %intratile2) <- (%cli_outer, %cli_inner) sizes(%ts1, %ts2 : i32, i32) + return +} + + +// Three-dimensional tiling +// CHECK-LABEL: @omp_tile_3d_pretty( +// CHECK-SAME: %[[tc:.+]]: i32, %[[ts:.+]]: i32) { +func.func @omp_tile_3d_pretty(%tc : i32, %ts : i32) -> () { + // CHECK-NEXT: %canonloop = omp.new_cli + %cli_outer = omp.new_cli + // CHECK-NEXT: %canonloop_d1 = omp.new_cli + %cli_middle = omp.new_cli + // CHECK-NEXT: %canonloop_d2 = omp.new_cli + %cli_inner = omp.new_cli + // CHECK-NEXT: %grid1 = omp.new_cli + %grid1 = omp.new_cli + // CHECK-NEXT: %grid2 = omp.new_cli + %grid2 = omp.new_cli + // CHECK-NEXT: %grid3 = omp.new_cli + %grid3 = omp.new_cli + // CHECK-NEXT: %intratile1 = omp.new_cli + %intratile1 = omp.new_cli + // CHECK-NEXT: %intratile2 = omp.new_cli + %intratile2 = omp.new_cli + // CHECK-NEXT: %intratile3 = omp.new_cli + %intratile3 = omp.new_cli + // CHECK-NEXT: omp.canonical_loop(%canonloop) %iv : i32 in range(%[[tc]]) { + omp.canonical_loop(%cli_outer) %iv_outer : i32 in range(%tc) { + // CHECK-NEXT: omp.canonical_loop(%canonloop_d1) %iv_d1 : i32 in range(%[[tc]]) { + omp.canonical_loop(%cli_middle) %iv_middle : i32 in range(%tc) { + // CHECK-NEXT: omp.canonical_loop(%canonloop_d2) %iv_d2 : i32 in range(%[[tc]]) { + omp.canonical_loop(%cli_inner) %iv_inner : i32 in range(%tc) { + // CHECK: omp.terminator + omp.terminator + } + // CHECK: omp.terminator + omp.terminator + } + // CHECK: omp.terminator + omp.terminator + } + // CHECK: omp.tile (%grid1, %grid2, %grid3, %intratile1, %intratile2, %intratile3) <- (%canonloop, %canonloop_d1, %canonloop_d2) sizes(%[[ts]], %[[ts]], %[[ts]] : i32, i32, i32) + omp.tile (%grid1, %grid2, %grid3, %intratile1, %intratile2, %intratile3) <- (%cli_outer, %cli_middle, %cli_inner) sizes(%ts, %ts, %ts: i32, i32, i32) + return +} diff --git a/mlir/test/Dialect/OpenMP/cli-unroll-heuristic.mlir b/mlir/test/Dialect/OpenMP/cli-unroll-heuristic.mlir index cda7d0b500166..16884f4245e76 100644 --- a/mlir/test/Dialect/OpenMP/cli-unroll-heuristic.mlir +++ b/mlir/test/Dialect/OpenMP/cli-unroll-heuristic.mlir @@ -1,18 +1,18 @@ -// RUN: mlir-opt %s | FileCheck %s -// RUN: mlir-opt %s | mlir-opt | FileCheck %s +// RUN: mlir-opt %s | FileCheck %s --enable-var-scope +// RUN: mlir-opt %s | mlir-opt | FileCheck %s --enable-var-scope // CHECK-LABEL: @omp_unroll_heuristic_raw( // CHECK-SAME: %[[tc:.+]]: i32) { func.func @omp_unroll_heuristic_raw(%tc : i32) -> () { - // CHECK-NEXT: %canonloop_s0 = omp.new_cli + // CHECK-NEXT: %canonloop = omp.new_cli %canonloop = "omp.new_cli" () : () -> (!omp.cli) - // CHECK-NEXT: omp.canonical_loop(%canonloop_s0) %iv : i32 in range(%[[tc]]) { + // CHECK-NEXT: omp.canonical_loop(%canonloop) %iv : i32 in range(%[[tc]]) { "omp.canonical_loop" (%tc, %canonloop) ({ ^bb0(%iv: i32): omp.terminator }) : (i32, !omp.cli) -> () - // CHECK: omp.unroll_heuristic(%canonloop_s0) + // CHECK: omp.unroll_heuristic(%canonloop) "omp.unroll_heuristic" (%canonloop) : (!omp.cli) -> () return } @@ -22,12 +22,12 @@ func.func @omp_unroll_heuristic_raw(%tc : i32) -> () { // CHECK-SAME: %[[tc:.+]]: i32) { func.func @omp_unroll_heuristic_pretty(%tc : i32) -> () { // CHECK-NEXT: %[[CANONLOOP:.+]] = omp.new_cli - %canonloop = "omp.new_cli" () : () -> (!omp.cli) - // CHECK-NEXT: omp.canonical_loop(%canonloop_s0) %iv : i32 in range(%[[tc]]) { + %canonloop = omp.new_cli + // CHECK-NEXT: omp.canonical_loop(%canonloop) %iv : i32 in range(%[[tc]]) { omp.canonical_loop(%canonloop) %iv : i32 in range(%tc) { omp.terminator } - // CHECK: omp.unroll_heuristic(%canonloop_s0) + // CHECK: omp.unroll_heuristic(%canonloop) omp.unroll_heuristic(%canonloop) return } @@ -36,13 +36,13 @@ func.func @omp_unroll_heuristic_pretty(%tc : i32) -> () { // CHECK-LABEL: @omp_unroll_heuristic_nested_pretty( // CHECK-SAME: %[[tc:.+]]: i32) { func.func @omp_unroll_heuristic_nested_pretty(%tc : i32) -> () { - // CHECK-NEXT: %canonloop_s0 = omp.new_cli + // CHECK-NEXT: %canonloop = omp.new_cli %cli_outer = omp.new_cli - // CHECK-NEXT: %canonloop_s0_s0 = omp.new_cli + // CHECK-NEXT: %canonloop_d1 = omp.new_cli %cli_inner = omp.new_cli - // CHECK-NEXT: omp.canonical_loop(%canonloop_s0) %iv : i32 in range(%[[tc]]) { + // CHECK-NEXT: omp.canonical_loop(%canonloop) %iv : i32 in range(%[[tc]]) { omp.canonical_loop(%cli_outer) %iv_outer : i32 in range(%tc) { - // CHECK-NEXT: omp.canonical_loop(%canonloop_s0_s0) %iv_0 : i32 in range(%[[tc]]) { + // CHECK-NEXT: omp.canonical_loop(%canonloop_d1) %iv_d1 : i32 in range(%[[tc]]) { omp.canonical_loop(%cli_inner) %iv_inner : i32 in range(%tc) { // CHECK: omp.terminator omp.terminator @@ -51,9 +51,9 @@ func.func @omp_unroll_heuristic_nested_pretty(%tc : i32) -> () { omp.terminator } - // CHECK: omp.unroll_heuristic(%canonloop_s0) + // CHECK: omp.unroll_heuristic(%canonloop) omp.unroll_heuristic(%cli_outer) - // CHECK-NEXT: omp.unroll_heuristic(%canonloop_s0_s0) + // CHECK-NEXT: omp.unroll_heuristic(%canonloop_d1) omp.unroll_heuristic(%cli_inner) return } diff --git a/mlir/test/Dialect/OpenMP/invalid-tile.mlir b/mlir/test/Dialect/OpenMP/invalid-tile.mlir new file mode 100644 index 0000000000000..e63a062d810ed --- /dev/null +++ b/mlir/test/Dialect/OpenMP/invalid-tile.mlir @@ -0,0 +1,119 @@ +// RUN: mlir-opt -split-input-file -verify-diagnostics %s + + +func.func @missing_sizes(%tc : i32, %ts : i32) { + %canonloop = omp.new_cli + omp.canonical_loop(%canonloop) %iv : i32 in range(%tc) { + omp.terminator + } + + // expected-error@+1 {{'omp.tile' op there must be one tile size for each applyee}} + omp.tile <-(%canonloop) + + llvm.return +} + +// ----- + +func.func @no_loop(%tc : i32, %ts : i32) { + // expected-error@+1 {{'omp.tile' op must apply to at least one loop}} + omp.tile <-() + + return +} + +// ----- + +func.func @missing_generator(%tc : i32, %ts : i32) { + // expected-error@+1 {{'omp.new_cli' op CLI has no generator}} + %canonloop = omp.new_cli + + // expected-note@+1 {{see consumer here: "omp.tile"(%0, %arg1) <{operandSegmentSizes = array}> : (!omp.cli, i32) -> ()}} + omp.tile <-(%canonloop) sizes(%ts : i32) + + return +} + +// ----- + +func.func @insufficient_sizes(%tc : i32, %ts : i32) { + %canonloop1 = omp.new_cli + %canonloop2 = omp.new_cli + omp.canonical_loop(%canonloop1) %iv : i32 in range(%tc) { + omp.terminator + } + omp.canonical_loop(%canonloop2) %iv : i32 in range(%tc) { + omp.terminator + } + + // expected-error@+1 {{'omp.tile' op there must be one tile size for each applyee}} + omp.tile <-(%canonloop1, %canonloop2) sizes(%ts : i32) + + llvm.return +} + +// ----- + +func.func @insufficient_applyees(%tc : i32, %ts : i32) { + %canonloop = omp.new_cli + omp.canonical_loop(%canonloop) %iv : i32 in range(%tc) { + omp.terminator + } + + // expected-error@+1 {{omp.tile' op there must be one tile size for each applyee}} + omp.tile <- (%canonloop) sizes(%ts, %ts : i32, i32) + + return +} + +// ----- + +func.func @insufficient_generatees(%tc : i32, %ts : i32) { + %canonloop = omp.new_cli + %grid = omp.new_cli + omp.canonical_loop(%canonloop) %iv : i32 in range(%tc) { + omp.terminator + } + + // expected-error@+1 {{'omp.tile' op expecting two times the number of generatees than applyees}} + omp.tile (%grid) <- (%canonloop) sizes(%ts : i32) + + return +} + +// ----- + +func.func @not_perfectly_nested(%tc : i32, %ts : i32) { + %canonloop1 = omp.new_cli + %canonloop2 = omp.new_cli + omp.canonical_loop(%canonloop1) %iv1 : i32 in range(%tc) { + %v = arith.constant 42 : i32 + omp.canonical_loop(%canonloop2) %iv2 : i32 in range(%tc) { + omp.terminator + } + omp.terminator + } + + // expected-error@+1 {{'omp.tile' op tiled loop nest must be perfectly nested}} + omp.tile <-(%canonloop1, %canonloop2) sizes(%ts, %ts : i32, i32) + + llvm.return +} + +// ----- + +func.func @non_nectangular(%tc : i32, %ts : i32) { + %canonloop1 = omp.new_cli + %canonloop2 = omp.new_cli + omp.canonical_loop(%canonloop1) %iv1 : i32 in range(%tc) { + omp.canonical_loop(%canonloop2) %iv2 : i32 in range(%iv1) { + omp.terminator + } + omp.terminator + } + + // expected-error@+1 {{'omp.tile' op tiled loop nest must be rectangular}} + omp.tile <-(%canonloop1, %canonloop2) sizes(%ts, %ts : i32, i32) + + llvm.return +} diff --git a/mlir/test/Target/LLVMIR/openmp-cli-tile01.mlir b/mlir/test/Target/LLVMIR/openmp-cli-tile01.mlir new file mode 100644 index 0000000000000..4ac4f02103e8c --- /dev/null +++ b/mlir/test/Target/LLVMIR/openmp-cli-tile01.mlir @@ -0,0 +1,101 @@ +// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s + + + +llvm.func @tile_trivial_loop(%baseptr: !llvm.ptr, %tc: i32, %ts: i32) -> () { + %literal_cli = omp.new_cli + omp.canonical_loop(%literal_cli) %iv : i32 in range(%tc) { + %ptr = llvm.getelementptr inbounds %baseptr[%iv] : (!llvm.ptr, i32) -> !llvm.ptr, f32 + %val = llvm.mlir.constant(42.0 : f32) : f32 + llvm.store %val, %ptr : f32, !llvm.ptr + omp.terminator + } + omp.tile <- (%literal_cli) sizes(%ts : i32) + llvm.return +} + + +// CHECK: ; ModuleID = 'LLVMDialectModule' +// CHECK-NEXT: source_filename = "LLVMDialectModule" +// CHECK-EMPTY: +// CHECK-NEXT: define void @tile_trivial_loop(ptr %0, i32 %1, i32 %2) { +// CHECK-NEXT: br label %omp_omp.loop.preheader +// CHECK-EMPTY: +// CHECK-NEXT: omp_omp.loop.preheader: ; preds = %3 +// CHECK-NEXT: %4 = udiv i32 %1, %2 +// CHECK-NEXT: %5 = urem i32 %1, %2 +// CHECK-NEXT: %6 = icmp ne i32 %5, 0 +// CHECK-NEXT: %7 = zext i1 %6 to i32 +// CHECK-NEXT: %omp_floor0.tripcount = add nuw i32 %4, %7 +// CHECK-NEXT: br label %omp_floor0.preheader +// CHECK-EMPTY: +// CHECK-NEXT: omp_floor0.preheader: ; preds = %omp_omp.loop.preheader +// CHECK-NEXT: br label %omp_floor0.header +// CHECK-EMPTY: +// CHECK-NEXT: omp_floor0.header: ; preds = %omp_floor0.inc, %omp_floor0.preheader +// CHECK-NEXT: %omp_floor0.iv = phi i32 [ 0, %omp_floor0.preheader ], [ %omp_floor0.next, %omp_floor0.inc ] +// CHECK-NEXT: br label %omp_floor0.cond +// CHECK-EMPTY: +// CHECK-NEXT: omp_floor0.cond: ; preds = %omp_floor0.header +// CHECK-NEXT: %omp_floor0.cmp = icmp ult i32 %omp_floor0.iv, %omp_floor0.tripcount +// CHECK-NEXT: br i1 %omp_floor0.cmp, label %omp_floor0.body, label %omp_floor0.exit +// CHECK-EMPTY: +// CHECK-NEXT: omp_floor0.body: ; preds = %omp_floor0.cond +// CHECK-NEXT: %8 = icmp eq i32 %omp_floor0.iv, %4 +// CHECK-NEXT: %9 = select i1 %8, i32 %5, i32 %2 +// CHECK-NEXT: br label %omp_tile0.preheader +// CHECK-EMPTY: +// CHECK-NEXT: omp_tile0.preheader: ; preds = %omp_floor0.body +// CHECK-NEXT: br label %omp_tile0.header +// CHECK-EMPTY: +// CHECK-NEXT: omp_tile0.header: ; preds = %omp_tile0.inc, %omp_tile0.preheader +// CHECK-NEXT: %omp_tile0.iv = phi i32 [ 0, %omp_tile0.preheader ], [ %omp_tile0.next, %omp_tile0.inc ] +// CHECK-NEXT: br label %omp_tile0.cond +// CHECK-EMPTY: +// CHECK-NEXT: omp_tile0.cond: ; preds = %omp_tile0.header +// CHECK-NEXT: %omp_tile0.cmp = icmp ult i32 %omp_tile0.iv, %9 +// CHECK-NEXT: br i1 %omp_tile0.cmp, label %omp_tile0.body, label %omp_tile0.exit +// CHECK-EMPTY: +// CHECK-NEXT: omp_tile0.body: ; preds = %omp_tile0.cond +// CHECK-NEXT: %10 = mul nuw i32 %2, %omp_floor0.iv +// CHECK-NEXT: %11 = add nuw i32 %10, %omp_tile0.iv +// CHECK-NEXT: br label %omp_omp.loop.body +// CHECK-EMPTY: +// CHECK-NEXT: omp_omp.loop.body: ; preds = %omp_tile0.body +// CHECK-NEXT: br label %omp.loop.region +// CHECK-EMPTY: +// CHECK-NEXT: omp.loop.region: ; preds = %omp_omp.loop.body +// CHECK-NEXT: %12 = getelementptr inbounds float, ptr %0, i32 %11 +// CHECK-NEXT: store float 4.200000e+01, ptr %12, align 4 +// CHECK-NEXT: br label %omp.region.cont +// CHECK-EMPTY: +// CHECK-NEXT: omp.region.cont: ; preds = %omp.loop.region +// CHECK-NEXT: br label %omp_tile0.inc +// CHECK-EMPTY: +// CHECK-NEXT: omp_tile0.inc: ; preds = %omp.region.cont +// CHECK-NEXT: %omp_tile0.next = add nuw i32 %omp_tile0.iv, 1 +// CHECK-NEXT: br label %omp_tile0.header +// CHECK-EMPTY: +// CHECK-NEXT: omp_tile0.exit: ; preds = %omp_tile0.cond +// CHECK-NEXT: br label %omp_tile0.after +// CHECK-EMPTY: +// CHECK-NEXT: omp_tile0.after: ; preds = %omp_tile0.exit +// CHECK-NEXT: br label %omp_floor0.inc +// CHECK-EMPTY: +// CHECK-NEXT: omp_floor0.inc: ; preds = %omp_tile0.after +// CHECK-NEXT: %omp_floor0.next = add nuw i32 %omp_floor0.iv, 1 +// CHECK-NEXT: br label %omp_floor0.header +// CHECK-EMPTY: +// CHECK-NEXT: omp_floor0.exit: ; preds = %omp_floor0.cond +// CHECK-NEXT: br label %omp_floor0.after +// CHECK-EMPTY: +// CHECK-NEXT: omp_floor0.after: ; preds = %omp_floor0.exit +// CHECK-NEXT: br label %omp_omp.loop.after +// CHECK-EMPTY: +// CHECK-NEXT: omp_omp.loop.after: ; preds = %omp_floor0.after +// CHECK-NEXT: ret void +// CHECK-NEXT: } +// CHECK-EMPTY: +// CHECK-NEXT: !llvm.module.flags = !{!0} +// CHECK-EMPTY: +// CHECK-NEXT: !0 = !{i32 2, !"Debug Info Version", i32 3} diff --git a/mlir/test/Target/LLVMIR/openmp-cli-tile02.mlir b/mlir/test/Target/LLVMIR/openmp-cli-tile02.mlir new file mode 100644 index 0000000000000..6fad81cd0c299 --- /dev/null +++ b/mlir/test/Target/LLVMIR/openmp-cli-tile02.mlir @@ -0,0 +1,190 @@ +// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s + + +llvm.func @tile_2d_loop(%baseptr: !llvm.ptr, %tc1: i32, %tc2: i32, %ts1: i32, %ts2: i32) -> () { + %literal_outer = omp.new_cli + %literal_inner = omp.new_cli + omp.canonical_loop(%literal_outer) %iv1 : i32 in range(%tc1) { + omp.canonical_loop(%literal_inner) %iv2 : i32 in range(%tc2) { + %idx = llvm.add %iv1, %iv2 : i32 + %ptr = llvm.getelementptr inbounds %baseptr[%idx] : (!llvm.ptr, i32) -> !llvm.ptr, f32 + %val = llvm.mlir.constant(42.0 : f32) : f32 + llvm.store %val, %ptr : f32, !llvm.ptr + omp.terminator + } + omp.terminator + } + omp.tile <- (%literal_outer, %literal_inner) sizes(%ts1, %ts2 : i32,i32) + llvm.return +} + + +// CHECK: ; ModuleID = 'LLVMDialectModule' +// CHECK-NEXT: source_filename = "LLVMDialectModule" +// CHECK-EMPTY: +// CHECK-NEXT: define void @tile_2d_loop(ptr %0, i32 %1, i32 %2, i32 %3, i32 %4) { +// CHECK-NEXT: br label %omp_omp.loop.preheader +// CHECK-EMPTY: +// CHECK-NEXT: omp_omp.loop.preheader: ; preds = %5 +// CHECK-NEXT: %6 = udiv i32 %1, %3 +// CHECK-NEXT: %7 = urem i32 %1, %3 +// CHECK-NEXT: %8 = icmp ne i32 %7, 0 +// CHECK-NEXT: %9 = zext i1 %8 to i32 +// CHECK-NEXT: %omp_floor0.tripcount = add nuw i32 %6, %9 +// CHECK-NEXT: %10 = udiv i32 %2, %4 +// CHECK-NEXT: %11 = urem i32 %2, %4 +// CHECK-NEXT: %12 = icmp ne i32 %11, 0 +// CHECK-NEXT: %13 = zext i1 %12 to i32 +// CHECK-NEXT: %omp_floor1.tripcount = add nuw i32 %10, %13 +// CHECK-NEXT: br label %omp_floor0.preheader +// CHECK-EMPTY: +// CHECK-NEXT: omp_omp.loop.header: ; preds = %omp_omp.loop.inc +// CHECK-NEXT: %omp_omp.loop.iv = phi i32 [ %omp_omp.loop.next, %omp_omp.loop.inc ] +// CHECK-NEXT: br label %omp_omp.loop.cond +// CHECK-EMPTY: +// CHECK-NEXT: omp_omp.loop.cond: ; preds = %omp_omp.loop.header +// CHECK-NEXT: %omp_omp.loop.cmp = icmp ult i32 %19, %1 +// CHECK-NEXT: br i1 %omp_omp.loop.cmp, label %omp_omp.loop.body, label %omp_omp.loop.exit +// CHECK-EMPTY: +// CHECK-NEXT: omp_omp.loop.body: ; preds = %omp_tile1.body, %omp_omp.loop.cond +// CHECK-NEXT: br label %omp.loop.region +// CHECK-EMPTY: +// CHECK-NEXT: omp.loop.region: ; preds = %omp_omp.loop.body +// CHECK-NEXT: br label %omp_omp.loop.preheader1 +// CHECK-EMPTY: +// CHECK-NEXT: omp_omp.loop.preheader1: ; preds = %omp.loop.region +// CHECK-NEXT: br label %omp_omp.loop.body4 +// CHECK-EMPTY: +// CHECK-NEXT: omp_floor0.preheader: ; preds = %omp_omp.loop.preheader +// CHECK-NEXT: br label %omp_floor0.header +// CHECK-EMPTY: +// CHECK-NEXT: omp_floor0.header: ; preds = %omp_floor0.inc, %omp_floor0.preheader +// CHECK-NEXT: %omp_floor0.iv = phi i32 [ 0, %omp_floor0.preheader ], [ %omp_floor0.next, %omp_floor0.inc ] +// CHECK-NEXT: br label %omp_floor0.cond +// CHECK-EMPTY: +// CHECK-NEXT: omp_floor0.cond: ; preds = %omp_floor0.header +// CHECK-NEXT: %omp_floor0.cmp = icmp ult i32 %omp_floor0.iv, %omp_floor0.tripcount +// CHECK-NEXT: br i1 %omp_floor0.cmp, label %omp_floor0.body, label %omp_floor0.exit +// CHECK-EMPTY: +// CHECK-NEXT: omp_floor0.body: ; preds = %omp_floor0.cond +// CHECK-NEXT: br label %omp_floor1.preheader +// CHECK-EMPTY: +// CHECK-NEXT: omp_floor1.preheader: ; preds = %omp_floor0.body +// CHECK-NEXT: br label %omp_floor1.header +// CHECK-EMPTY: +// CHECK-NEXT: omp_floor1.header: ; preds = %omp_floor1.inc, %omp_floor1.preheader +// CHECK-NEXT: %omp_floor1.iv = phi i32 [ 0, %omp_floor1.preheader ], [ %omp_floor1.next, %omp_floor1.inc ] +// CHECK-NEXT: br label %omp_floor1.cond +// CHECK-EMPTY: +// CHECK-NEXT: omp_floor1.cond: ; preds = %omp_floor1.header +// CHECK-NEXT: %omp_floor1.cmp = icmp ult i32 %omp_floor1.iv, %omp_floor1.tripcount +// CHECK-NEXT: br i1 %omp_floor1.cmp, label %omp_floor1.body, label %omp_floor1.exit +// CHECK-EMPTY: +// CHECK-NEXT: omp_floor1.body: ; preds = %omp_floor1.cond +// CHECK-NEXT: %14 = icmp eq i32 %omp_floor0.iv, %6 +// CHECK-NEXT: %15 = select i1 %14, i32 %7, i32 %3 +// CHECK-NEXT: %16 = icmp eq i32 %omp_floor1.iv, %10 +// CHECK-NEXT: %17 = select i1 %16, i32 %11, i32 %4 +// CHECK-NEXT: br label %omp_tile0.preheader +// CHECK-EMPTY: +// CHECK-NEXT: omp_tile0.preheader: ; preds = %omp_floor1.body +// CHECK-NEXT: br label %omp_tile0.header +// CHECK-EMPTY: +// CHECK-NEXT: omp_tile0.header: ; preds = %omp_tile0.inc, %omp_tile0.preheader +// CHECK-NEXT: %omp_tile0.iv = phi i32 [ 0, %omp_tile0.preheader ], [ %omp_tile0.next, %omp_tile0.inc ] +// CHECK-NEXT: br label %omp_tile0.cond +// CHECK-EMPTY: +// CHECK-NEXT: omp_tile0.cond: ; preds = %omp_tile0.header +// CHECK-NEXT: %omp_tile0.cmp = icmp ult i32 %omp_tile0.iv, %15 +// CHECK-NEXT: br i1 %omp_tile0.cmp, label %omp_tile0.body, label %omp_tile0.exit +// CHECK-EMPTY: +// CHECK-NEXT: omp_tile0.body: ; preds = %omp_tile0.cond +// CHECK-NEXT: br label %omp_tile1.preheader +// CHECK-EMPTY: +// CHECK-NEXT: omp_tile1.preheader: ; preds = %omp_tile0.body +// CHECK-NEXT: br label %omp_tile1.header +// CHECK-EMPTY: +// CHECK-NEXT: omp_tile1.header: ; preds = %omp_tile1.inc, %omp_tile1.preheader +// CHECK-NEXT: %omp_tile1.iv = phi i32 [ 0, %omp_tile1.preheader ], [ %omp_tile1.next, %omp_tile1.inc ] +// CHECK-NEXT: br label %omp_tile1.cond +// CHECK-EMPTY: +// CHECK-NEXT: omp_tile1.cond: ; preds = %omp_tile1.header +// CHECK-NEXT: %omp_tile1.cmp = icmp ult i32 %omp_tile1.iv, %17 +// CHECK-NEXT: br i1 %omp_tile1.cmp, label %omp_tile1.body, label %omp_tile1.exit +// CHECK-EMPTY: +// CHECK-NEXT: omp_tile1.body: ; preds = %omp_tile1.cond +// CHECK-NEXT: %18 = mul nuw i32 %3, %omp_floor0.iv +// CHECK-NEXT: %19 = add nuw i32 %18, %omp_tile0.iv +// CHECK-NEXT: %20 = mul nuw i32 %4, %omp_floor1.iv +// CHECK-NEXT: %21 = add nuw i32 %20, %omp_tile1.iv +// CHECK-NEXT: br label %omp_omp.loop.body +// CHECK-EMPTY: +// CHECK-NEXT: omp_omp.loop.body4: ; preds = %omp_omp.loop.preheader1 +// CHECK-NEXT: br label %omp.loop.region12 +// CHECK-EMPTY: +// CHECK-NEXT: omp.loop.region12: ; preds = %omp_omp.loop.body4 +// CHECK-NEXT: %22 = add i32 %19, %21 +// CHECK-NEXT: %23 = getelementptr inbounds float, ptr %0, i32 %22 +// CHECK-NEXT: store float 4.200000e+01, ptr %23, align 4 +// CHECK-NEXT: br label %omp.region.cont11 +// CHECK-EMPTY: +// CHECK-NEXT: omp.region.cont11: ; preds = %omp.loop.region12 +// CHECK-NEXT: br label %omp_tile1.inc +// CHECK-EMPTY: +// CHECK-NEXT: omp_tile1.inc: ; preds = %omp.region.cont11 +// CHECK-NEXT: %omp_tile1.next = add nuw i32 %omp_tile1.iv, 1 +// CHECK-NEXT: br label %omp_tile1.header +// CHECK-EMPTY: +// CHECK-NEXT: omp_tile1.exit: ; preds = %omp_tile1.cond +// CHECK-NEXT: br label %omp_tile1.after +// CHECK-EMPTY: +// CHECK-NEXT: omp_tile1.after: ; preds = %omp_tile1.exit +// CHECK-NEXT: br label %omp_tile0.inc +// CHECK-EMPTY: +// CHECK-NEXT: omp_tile0.inc: ; preds = %omp_tile1.after +// CHECK-NEXT: %omp_tile0.next = add nuw i32 %omp_tile0.iv, 1 +// CHECK-NEXT: br label %omp_tile0.header +// CHECK-EMPTY: +// CHECK-NEXT: omp_tile0.exit: ; preds = %omp_tile0.cond +// CHECK-NEXT: br label %omp_tile0.after +// CHECK-EMPTY: +// CHECK-NEXT: omp_tile0.after: ; preds = %omp_tile0.exit +// CHECK-NEXT: br label %omp_floor1.inc +// CHECK-EMPTY: +// CHECK-NEXT: omp_floor1.inc: ; preds = %omp_tile0.after +// CHECK-NEXT: %omp_floor1.next = add nuw i32 %omp_floor1.iv, 1 +// CHECK-NEXT: br label %omp_floor1.header +// CHECK-EMPTY: +// CHECK-NEXT: omp_floor1.exit: ; preds = %omp_floor1.cond +// CHECK-NEXT: br label %omp_floor1.after +// CHECK-EMPTY: +// CHECK-NEXT: omp_floor1.after: ; preds = %omp_floor1.exit +// CHECK-NEXT: br label %omp_floor0.inc +// CHECK-EMPTY: +// CHECK-NEXT: omp_floor0.inc: ; preds = %omp_floor1.after +// CHECK-NEXT: %omp_floor0.next = add nuw i32 %omp_floor0.iv, 1 +// CHECK-NEXT: br label %omp_floor0.header +// CHECK-EMPTY: +// CHECK-NEXT: omp_floor0.exit: ; preds = %omp_floor0.cond +// CHECK-NEXT: br label %omp_floor0.after +// CHECK-EMPTY: +// CHECK-NEXT: omp_floor0.after: ; preds = %omp_floor0.exit +// CHECK-NEXT: br label %omp_omp.loop.after +// CHECK-EMPTY: +// CHECK-NEXT: omp.region.cont: ; No predecessors! +// CHECK-NEXT: br label %omp_omp.loop.inc +// CHECK-EMPTY: +// CHECK-NEXT: omp_omp.loop.inc: ; preds = %omp.region.cont +// CHECK-NEXT: %omp_omp.loop.next = add nuw i32 %19, 1 +// CHECK-NEXT: br label %omp_omp.loop.header +// CHECK-EMPTY: +// CHECK-NEXT: omp_omp.loop.exit: ; preds = %omp_omp.loop.cond +// CHECK-NEXT: br label %omp_omp.loop.after +// CHECK-EMPTY: +// CHECK-NEXT: omp_omp.loop.after: ; preds = %omp_floor0.after, %omp_omp.loop.exit +// CHECK-NEXT: ret void +// CHECK-NEXT: } +// CHECK-EMPTY: +// CHECK-NEXT: !llvm.module.flags = !{!0} +// CHECK-EMPTY: +// CHECK-NEXT: !0 = !{i32 2, !"Debug Info Version", i32 3} diff --git a/mlir/test/mlir-tblgen/op-format-invalid.td b/mlir/test/mlir-tblgen/op-format-invalid.td index 2f29543f67381..0a022ad43a749 100644 --- a/mlir/test/mlir-tblgen/op-format-invalid.td +++ b/mlir/test/mlir-tblgen/op-format-invalid.td @@ -307,7 +307,7 @@ def DirectiveTypeZOperandInvalidI : TestFormat_Op<[{ def LiteralInvalidA : TestFormat_Op<[{ `a:` }]>; -// CHECK: error: expected valid literal but got '1': single character literal must be a letter or one of '_:,=<>()[]{}?+*' +// CHECK: error: expected valid literal but got '1': single character literal must be a letter or one of '_:,=<>()[]{}?+-*' def LiteralInvalidB : TestFormat_Op<[{ `1` }]>; diff --git a/mlir/tools/mlir-tblgen/AttrOrTypeFormatGen.cpp b/mlir/tools/mlir-tblgen/AttrOrTypeFormatGen.cpp index a1899a81afcce..8dd971374fa21 100644 --- a/mlir/tools/mlir-tblgen/AttrOrTypeFormatGen.cpp +++ b/mlir/tools/mlir-tblgen/AttrOrTypeFormatGen.cpp @@ -403,6 +403,7 @@ void DefFormat::genLiteralParser(StringRef value, FmtContext &ctx, .Case("]", "RSquare") .Case("?", "Question") .Case("+", "Plus") + .Case("-", "Minus") .Case("*", "Star") .Case("...", "Ellipsis") << "()"; diff --git a/mlir/tools/mlir-tblgen/FormatGen.cpp b/mlir/tools/mlir-tblgen/FormatGen.cpp index 4dfdde2146679..04d3ed1f3b70d 100644 --- a/mlir/tools/mlir-tblgen/FormatGen.cpp +++ b/mlir/tools/mlir-tblgen/FormatGen.cpp @@ -518,7 +518,7 @@ bool mlir::tblgen::isValidLiteral(StringRef value, // If there is only one character, this must either be punctuation or a // single character bare identifier. if (value.size() == 1) { - StringRef bare = "_:,=<>()[]{}?+*"; + StringRef bare = "_:,=<>()[]{}?+-*"; if (isalpha(front) || bare.contains(front)) return true; if (emitError) diff --git a/mlir/tools/mlir-tblgen/OpFormatGen.cpp b/mlir/tools/mlir-tblgen/OpFormatGen.cpp index 0d113b3748354..ccf21d16005af 100644 --- a/mlir/tools/mlir-tblgen/OpFormatGen.cpp +++ b/mlir/tools/mlir-tblgen/OpFormatGen.cpp @@ -852,6 +852,7 @@ static void genLiteralParser(StringRef value, MethodBody &body) { .Case("]", "RSquare()") .Case("?", "Question()") .Case("+", "Plus()") + .Case("-", "Minus()") .Case("*", "Star()") .Case("...", "Ellipsis()"); } From bfe9c6b642ebc01f113dbf0a574e424e83f7162a Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Tue, 23 Sep 2025 15:33:52 +0200 Subject: [PATCH 05/12] [flang] Add standalone tile support --- flang/lib/Lower/OpenMP/ClauseProcessor.cpp | 13 + flang/lib/Lower/OpenMP/ClauseProcessor.h | 2 + flang/lib/Lower/OpenMP/OpenMP.cpp | 360 ++++++++++++------ flang/lib/Lower/OpenMP/Utils.cpp | 23 +- flang/lib/Lower/OpenMP/Utils.h | 7 + .../lib/Semantics/check-directive-structure.h | 7 +- flang/lib/Semantics/check-omp-structure.cpp | 8 +- flang/lib/Semantics/resolve-directives.cpp | 16 +- flang/test/Lower/OpenMP/tile01.f90 | 58 +++ flang/test/Lower/OpenMP/tile02.f90 | 88 +++++ .../loop-transformation-construct02.f90 | 5 +- flang/test/Parser/OpenMP/tile-fail.f90 | 32 ++ flang/test/Parser/OpenMP/tile.f90 | 15 +- flang/test/Semantics/OpenMP/tile01.f90 | 26 ++ flang/test/Semantics/OpenMP/tile02.f90 | 15 + flang/test/Semantics/OpenMP/tile03.f90 | 15 + flang/test/Semantics/OpenMP/tile04.f90 | 38 ++ flang/test/Semantics/OpenMP/tile05.f90 | 14 + flang/test/Semantics/OpenMP/tile06.f90 | 44 +++ flang/test/Semantics/OpenMP/tile07.f90 | 35 ++ flang/test/Semantics/OpenMP/tile08.f90 | 15 + llvm/include/llvm/Frontend/OpenMP/OMP.td | 3 + openmp/runtime/test/transform/tile/intfor.f90 | 31 ++ .../runtime/test/transform/tile/intfor_2d.f90 | 53 +++ .../transform/tile/intfor_2d_varsizes.F90 | 60 +++ 25 files changed, 841 insertions(+), 142 deletions(-) create mode 100644 flang/test/Lower/OpenMP/tile01.f90 create mode 100644 flang/test/Lower/OpenMP/tile02.f90 create mode 100644 flang/test/Parser/OpenMP/tile-fail.f90 create mode 100644 flang/test/Semantics/OpenMP/tile01.f90 create mode 100644 flang/test/Semantics/OpenMP/tile02.f90 create mode 100644 flang/test/Semantics/OpenMP/tile03.f90 create mode 100644 flang/test/Semantics/OpenMP/tile04.f90 create mode 100644 flang/test/Semantics/OpenMP/tile05.f90 create mode 100644 flang/test/Semantics/OpenMP/tile06.f90 create mode 100644 flang/test/Semantics/OpenMP/tile07.f90 create mode 100644 flang/test/Semantics/OpenMP/tile08.f90 create mode 100644 openmp/runtime/test/transform/tile/intfor.f90 create mode 100644 openmp/runtime/test/transform/tile/intfor_2d.f90 create mode 100644 openmp/runtime/test/transform/tile/intfor_2d_varsizes.F90 diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp index a96884f5680ba..55eda7e3404c1 100644 --- a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp +++ b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp @@ -431,6 +431,19 @@ bool ClauseProcessor::processNumTasks( return false; } +bool ClauseProcessor::processSizes(StatementContext &stmtCtx, + mlir::omp::SizesClauseOps &result) const { + if (auto *clause = findUniqueClause()) { + result.sizes.reserve(clause->v.size()); + for (const ExprTy &vv : clause->v) + result.sizes.push_back(fir::getBase(converter.genExprValue(vv, stmtCtx))); + + return true; + } + + return false; +} + bool ClauseProcessor::processNumTeams( lower::StatementContext &stmtCtx, mlir::omp::NumTeamsClauseOps &result) const { diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.h b/flang/lib/Lower/OpenMP/ClauseProcessor.h index 324ea3c1047a5..9e352fa574a97 100644 --- a/flang/lib/Lower/OpenMP/ClauseProcessor.h +++ b/flang/lib/Lower/OpenMP/ClauseProcessor.h @@ -66,6 +66,8 @@ class ClauseProcessor { mlir::omp::LoopRelatedClauseOps &loopResult, mlir::omp::CollapseClauseOps &collapseResult, llvm::SmallVectorImpl &iv) const; + bool processSizes(StatementContext &stmtCtx, + mlir::omp::SizesClauseOps &result) const; bool processDevice(lower::StatementContext &stmtCtx, mlir::omp::DeviceClauseOps &result) const; bool processDeviceType(mlir::omp::DeviceTypeClauseOps &result) const; diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index 5681be664d450..7812d9fe00be2 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -1984,125 +1984,241 @@ genLoopOp(lower::AbstractConverter &converter, lower::SymMap &symTable, return loopOp; } -static mlir::omp::CanonicalLoopOp -genCanonicalLoopOp(lower::AbstractConverter &converter, lower::SymMap &symTable, - semantics::SemanticsContext &semaCtx, - lower::pft::Evaluation &eval, mlir::Location loc, - const ConstructQueue &queue, - ConstructQueue::const_iterator item, - llvm::ArrayRef ivs, - llvm::omp::Directive directive) { +static void genCanonicalLoopNest( + lower::AbstractConverter &converter, lower::SymMap &symTable, + semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, + mlir::Location loc, const ConstructQueue &queue, + ConstructQueue::const_iterator item, size_t numLoops, + llvm::SmallVectorImpl &loops) { + assert(loops.empty() && "Expecting empty list to fill"); + assert(numLoops >= 1 && "Expecting at least one loop"); + fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); - assert(ivs.size() == 1 && "Nested loops not yet implemented"); - const semantics::Symbol *iv = ivs[0]; + mlir::omp::LoopRelatedClauseOps loopInfo; + llvm::SmallVector ivs; + collectLoopRelatedInfo(converter, loc, eval, numLoops, loopInfo, ivs); + assert(ivs.size() == numLoops && + "Expected to parse as many loop variables as there are loops"); + + // Steps that follow: + // 1. Emit all of the loop's prologues (compute the tripcount) + // 2. Emit omp.canonical_loop nested inside each other (iteratively) + // 2.1. In the innermost omp.canonical_loop, emit the loop body prologue (in + // the body callback) + // + // Since emitting prologues and body code is split, remember prologue values + // for use when emitting the same loop's epilogues. + llvm::SmallVector tripcounts; + llvm::SmallVector clis; + llvm::SmallVector evals; + llvm::SmallVector loopVarTypes; + llvm::SmallVector loopStepVars; + llvm::SmallVector loopLBVars; + llvm::SmallVector blockArgs; + + // Step 1: Loop prologues + // Computing the trip count must happen before entering the outermost loop + lower::pft::Evaluation *innermostEval = &eval.getFirstNestedEvaluation(); + for ([[maybe_unused]] auto iv : ivs) { + if (innermostEval->getIf()->IsDoConcurrent()) { + // OpenMP specifies DO CONCURRENT only with the `!omp loop` construct. + // Will need to add special cases for this combination. + TODO(loc, "DO CONCURRENT as canonical loop not supported"); + } + + auto &doLoopEval = innermostEval->getFirstNestedEvaluation(); + evals.push_back(innermostEval); + + // Get the loop bounds (and increment) + // auto &doLoopEval = nestedEval.getFirstNestedEvaluation(); + auto *doStmt = doLoopEval.getIf(); + assert(doStmt && "Expected do loop to be in the nested evaluation"); + auto &loopControl = std::get>(doStmt->t); + assert(loopControl.has_value()); + auto *bounds = std::get_if(&loopControl->u); + assert(bounds && "Expected bounds for canonical loop"); + lower::StatementContext stmtCtx; + mlir::Value loopLBVar = fir::getBase( + converter.genExprValue(*semantics::GetExpr(bounds->lower), stmtCtx)); + mlir::Value loopUBVar = fir::getBase( + converter.genExprValue(*semantics::GetExpr(bounds->upper), stmtCtx)); + mlir::Value loopStepVar = [&]() { + if (bounds->step) { + return fir::getBase( + converter.genExprValue(*semantics::GetExpr(bounds->step), stmtCtx)); + } - auto &nestedEval = eval.getFirstNestedEvaluation(); - if (nestedEval.getIf()->IsDoConcurrent()) { - // OpenMP specifies DO CONCURRENT only with the `!omp loop` construct. Will - // need to add special cases for this combination. - TODO(loc, "DO CONCURRENT as canonical loop not supported"); + // If `step` is not present, assume it is `1`. + auto intTy = firOpBuilder.getI32Type(); + return firOpBuilder.createIntegerConstant(loc, intTy, 1); + }(); + + // Get the integer kind for the loop variable and cast the loop bounds + size_t loopVarTypeSize = bounds->name.thing.symbol->GetUltimate().size(); + mlir::Type loopVarType = getLoopVarType(converter, loopVarTypeSize); + loopVarTypes.push_back(loopVarType); + loopLBVar = firOpBuilder.createConvert(loc, loopVarType, loopLBVar); + loopUBVar = firOpBuilder.createConvert(loc, loopVarType, loopUBVar); + loopStepVar = firOpBuilder.createConvert(loc, loopVarType, loopStepVar); + loopLBVars.push_back(loopLBVar); + loopStepVars.push_back(loopStepVar); + + // Start lowering + mlir::Value zero = firOpBuilder.createIntegerConstant(loc, loopVarType, 0); + mlir::Value one = firOpBuilder.createIntegerConstant(loc, loopVarType, 1); + mlir::Value isDownwards = firOpBuilder.create( + loc, mlir::arith::CmpIPredicate::slt, loopStepVar, zero); + + // Ensure we are counting upwards. If not, negate step and swap lb and ub. + mlir::Value negStep = + firOpBuilder.create(loc, zero, loopStepVar); + mlir::Value incr = firOpBuilder.create( + loc, isDownwards, negStep, loopStepVar); + mlir::Value lb = firOpBuilder.create( + loc, isDownwards, loopUBVar, loopLBVar); + mlir::Value ub = firOpBuilder.create( + loc, isDownwards, loopLBVar, loopUBVar); + + // Compute the trip count assuming lb <= ub. This guarantees that the result + // is non-negative and we can use unsigned arithmetic. + mlir::Value span = firOpBuilder.create( + loc, ub, lb, ::mlir::arith::IntegerOverflowFlags::nuw); + mlir::Value tcMinusOne = + firOpBuilder.create(loc, span, incr); + mlir::Value tcIfLooping = firOpBuilder.create( + loc, tcMinusOne, one, ::mlir::arith::IntegerOverflowFlags::nuw); + + // Fall back to 0 if lb > ub + mlir::Value isZeroTC = firOpBuilder.create( + loc, mlir::arith::CmpIPredicate::slt, ub, lb); + mlir::Value tripcount = firOpBuilder.create( + loc, isZeroTC, zero, tcIfLooping); + tripcounts.push_back(tripcount); + + // Create the CLI handle. + auto newcli = firOpBuilder.create(loc); + mlir::Value cli = newcli.getResult(); + clis.push_back(cli); + + innermostEval = &*std::next(innermostEval->getNestedEvaluations().begin()); } - // Get the loop bounds (and increment) - auto &doLoopEval = nestedEval.getFirstNestedEvaluation(); - auto *doStmt = doLoopEval.getIf(); - assert(doStmt && "Expected do loop to be in the nested evaluation"); - auto &loopControl = std::get>(doStmt->t); - assert(loopControl.has_value()); - auto *bounds = std::get_if(&loopControl->u); - assert(bounds && "Expected bounds for canonical loop"); - lower::StatementContext stmtCtx; - mlir::Value loopLBVar = fir::getBase( - converter.genExprValue(*semantics::GetExpr(bounds->lower), stmtCtx)); - mlir::Value loopUBVar = fir::getBase( - converter.genExprValue(*semantics::GetExpr(bounds->upper), stmtCtx)); - mlir::Value loopStepVar = [&]() { - if (bounds->step) { - return fir::getBase( - converter.genExprValue(*semantics::GetExpr(bounds->step), stmtCtx)); - } + // Step 2: Create nested canoncial loops + for (auto i : llvm::seq(numLoops)) { + bool isInnermost = (i == numLoops - 1); + mlir::Type loopVarType = loopVarTypes[i]; + mlir::Value tripcount = tripcounts[i]; + mlir::Value cli = clis[i]; + auto &&eval = evals[i]; + + auto ivCallback = [&, i, isInnermost](mlir::Operation *op) + -> llvm::SmallVector { + mlir::Region ®ion = op->getRegion(0); + + // Create the op's region skeleton (BB taking the iv as argument) + firOpBuilder.createBlock(®ion, {}, {loopVarType}, {loc}); + blockArgs.push_back(region.front().getArgument(0)); + + // Step 2.1: Emit body prologue code + // Compute the translation from logical iteration number to the value of + // the loop's iteration variable only in the innermost body. Currently, + // loop transformations do not allow any instruction between loops, but + // this will change with + if (isInnermost) { + assert(blockArgs.size() == numLoops && + "Expecting all block args to have been collected by now"); + for (auto j : llvm::seq(numLoops)) { + mlir::Value natIterNum = fir::getBase(blockArgs[j]); + mlir::Value scaled = firOpBuilder.create( + loc, natIterNum, loopStepVars[j]); + mlir::Value userVal = firOpBuilder.create( + loc, loopLBVars[j], scaled); + + mlir::OpBuilder::InsertPoint insPt = + firOpBuilder.saveInsertionPoint(); + firOpBuilder.setInsertionPointToStart(firOpBuilder.getAllocaBlock()); + mlir::Type tempTy = converter.genType(*ivs[j]); + firOpBuilder.restoreInsertionPoint(insPt); + + // Write the loop value into loop variable + mlir::Value cvtVal = firOpBuilder.createConvert(loc, tempTy, userVal); + hlfir::Entity lhs{converter.getSymbolAddress(*ivs[j])}; + lhs = hlfir::derefPointersAndAllocatables(loc, firOpBuilder, lhs); + mlir::Operation *storeOp = + hlfir::AssignOp::create(firOpBuilder, loc, cvtVal, lhs); + firOpBuilder.setInsertionPointAfter(storeOp); + } + } - // If `step` is not present, assume it is `1`. - return firOpBuilder.createIntegerConstant(loc, firOpBuilder.getI32Type(), - 1); - }(); + return {ivs[i]}; + }; - // Get the integer kind for the loop variable and cast the loop bounds - size_t loopVarTypeSize = bounds->name.thing.symbol->GetUltimate().size(); - mlir::Type loopVarType = getLoopVarType(converter, loopVarTypeSize); - loopLBVar = firOpBuilder.createConvert(loc, loopVarType, loopLBVar); - loopUBVar = firOpBuilder.createConvert(loc, loopVarType, loopUBVar); - loopStepVar = firOpBuilder.createConvert(loc, loopVarType, loopStepVar); - - // Start lowering - mlir::Value zero = firOpBuilder.createIntegerConstant(loc, loopVarType, 0); - mlir::Value one = firOpBuilder.createIntegerConstant(loc, loopVarType, 1); - mlir::Value isDownwards = mlir::arith::CmpIOp::create( - firOpBuilder, loc, mlir::arith::CmpIPredicate::slt, loopStepVar, zero); - - // Ensure we are counting upwards. If not, negate step and swap lb and ub. - mlir::Value negStep = - mlir::arith::SubIOp::create(firOpBuilder, loc, zero, loopStepVar); - mlir::Value incr = mlir::arith::SelectOp::create( - firOpBuilder, loc, isDownwards, negStep, loopStepVar); - mlir::Value lb = mlir::arith::SelectOp::create(firOpBuilder, loc, isDownwards, - loopUBVar, loopLBVar); - mlir::Value ub = mlir::arith::SelectOp::create(firOpBuilder, loc, isDownwards, - loopLBVar, loopUBVar); - - // Compute the trip count assuming lb <= ub. This guarantees that the result - // is non-negative and we can use unsigned arithmetic. - mlir::Value span = mlir::arith::SubIOp::create( - firOpBuilder, loc, ub, lb, ::mlir::arith::IntegerOverflowFlags::nuw); - mlir::Value tcMinusOne = - mlir::arith::DivUIOp::create(firOpBuilder, loc, span, incr); - mlir::Value tcIfLooping = - mlir::arith::AddIOp::create(firOpBuilder, loc, tcMinusOne, one, - ::mlir::arith::IntegerOverflowFlags::nuw); - - // Fall back to 0 if lb > ub - mlir::Value isZeroTC = mlir::arith::CmpIOp::create( - firOpBuilder, loc, mlir::arith::CmpIPredicate::slt, ub, lb); - mlir::Value tripcount = mlir::arith::SelectOp::create( - firOpBuilder, loc, isZeroTC, zero, tcIfLooping); - - // Create the CLI handle. - auto newcli = mlir::omp::NewCliOp::create(firOpBuilder, loc); - mlir::Value cli = newcli.getResult(); - - auto ivCallback = [&](mlir::Operation *op) - -> llvm::SmallVector { - mlir::Region ®ion = op->getRegion(0); - - // Create the op's region skeleton (BB taking the iv as argument) - firOpBuilder.createBlock(®ion, {}, {loopVarType}, {loc}); - - // Compute the value of the loop variable from the logical iteration number. - mlir::Value natIterNum = fir::getBase(region.front().getArgument(0)); - mlir::Value scaled = - mlir::arith::MulIOp::create(firOpBuilder, loc, natIterNum, loopStepVar); - mlir::Value userVal = - mlir::arith::AddIOp::create(firOpBuilder, loc, loopLBVar, scaled); - - // Write loop value to loop variable - mlir::Operation *storeOp = setLoopVar(converter, loc, userVal, iv); - - firOpBuilder.setInsertionPointAfter(storeOp); - return {iv}; - }; + // Create the omp.canonical_loop operation + auto opGenInfo = OpWithBodyGenInfo(converter, symTable, semaCtx, loc, *eval, + llvm::omp::Directive::OMPD_unknown) + .setGenSkeletonOnly(!isInnermost) + .setClauses(&item->clauses) + .setPrivatize(false) + .setGenRegionEntryCb(ivCallback); + auto canonLoop = genOpWithBody( + std::move(opGenInfo), queue, item, tripcount, cli); + loops.push_back(canonLoop); + + // Insert next loop nested inside last loop + firOpBuilder.setInsertionPoint( + canonLoop.getRegion().back().getTerminator()); + } - // Create the omp.canonical_loop operation - auto canonLoop = genOpWithBody( - OpWithBodyGenInfo(converter, symTable, semaCtx, loc, nestedEval, - directive) - .setClauses(&item->clauses) - .setPrivatize(false) - .setGenRegionEntryCb(ivCallback), - queue, item, tripcount, cli); + firOpBuilder.setInsertionPointAfter(loops.front()); +} + +static void genTileOp(Fortran::lower::AbstractConverter &converter, + Fortran::lower::SymMap &symTable, + lower::StatementContext &stmtCtx, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::pft::Evaluation &eval, mlir::Location loc, + const ConstructQueue &queue, + ConstructQueue::const_iterator item) { + fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); - firOpBuilder.setInsertionPointAfter(canonLoop); - return canonLoop; + mlir::omp::SizesClauseOps sizesClause; + ClauseProcessor cp(converter, semaCtx, item->clauses); + cp.processSizes(stmtCtx, sizesClause); + + size_t numLoops = sizesClause.sizes.size(); + llvm::SmallVector canonLoops; + canonLoops.reserve(numLoops); + + genCanonicalLoopNest(converter, symTable, semaCtx, eval, loc, queue, item, + numLoops, canonLoops); + assert((canonLoops.size() == numLoops) && + "Expecting the predetermined number of loops"); + + llvm::SmallVector applyees; + applyees.reserve(numLoops); + for (mlir::omp::CanonicalLoopOp l : canonLoops) + applyees.push_back(l.getCli()); + + // Emit the associated loops and create a CLI for each affected loop + llvm::SmallVector gridGeneratees; + llvm::SmallVector intratileGeneratees; + gridGeneratees.reserve(numLoops); + intratileGeneratees.reserve(numLoops); + for ([[maybe_unused]] auto i : llvm::seq(0, sizesClause.sizes.size())) { + auto gridCLI = firOpBuilder.create(loc); + gridGeneratees.push_back(gridCLI.getResult()); + auto intratileCLI = firOpBuilder.create(loc); + intratileGeneratees.push_back(intratileCLI.getResult()); + } + + llvm::SmallVector generatees; + generatees.reserve(2 * numLoops); + generatees.append(gridGeneratees); + generatees.append(intratileGeneratees); + + firOpBuilder.create(loc, generatees, applyees, + sizesClause.sizes); } static void genUnrollOp(Fortran::lower::AbstractConverter &converter, @@ -2114,22 +2230,22 @@ static void genUnrollOp(Fortran::lower::AbstractConverter &converter, ConstructQueue::const_iterator item) { fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); - mlir::omp::LoopRelatedClauseOps loopInfo; - llvm::SmallVector iv; - collectLoopRelatedInfo(converter, loc, eval, item->clauses, loopInfo, iv); - // Clauses for unrolling not yet implemnted ClauseProcessor cp(converter, semaCtx, item->clauses); cp.processTODO( loc, llvm::omp::Directive::OMPD_unroll); // Emit the associated loop - auto canonLoop = - genCanonicalLoopOp(converter, symTable, semaCtx, eval, loc, queue, item, - iv, llvm::omp::Directive::OMPD_unroll); + llvm::SmallVector canonLoops; + genCanonicalLoopNest(converter, symTable, semaCtx, eval, loc, queue, item, 1, + canonLoops); + + llvm::SmallVector applyees; + for (auto &&canonLoop : canonLoops) + applyees.push_back(canonLoop.getCli()); // Apply unrolling to it - auto cli = canonLoop.getCli(); + auto cli = llvm::getSingleElement(canonLoops).getCli(); mlir::omp::UnrollHeuristicOp::create(firOpBuilder, loc, cli); } @@ -3362,13 +3478,9 @@ static void genOMPDispatch(lower::AbstractConverter &converter, newOp = genTeamsOp(converter, symTable, stmtCtx, semaCtx, eval, loc, queue, item); break; - case llvm::omp::Directive::OMPD_tile: { - unsigned version = semaCtx.langOptions().OpenMPVersion; - if (!semaCtx.langOptions().OpenMPSimd) - TODO(loc, "Unhandled loop directive (" + - llvm::omp::getOpenMPDirectiveName(dir, version) + ")"); + case llvm::omp::Directive::OMPD_tile: + genTileOp(converter, symTable, stmtCtx, semaCtx, eval, loc, queue, item); break; - } case llvm::omp::Directive::OMPD_unroll: genUnrollOp(converter, symTable, stmtCtx, semaCtx, eval, loc, queue, item); break; diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp index 83b7ccb1ce0ee..4a392d4944fc8 100644 --- a/flang/lib/Lower/OpenMP/Utils.cpp +++ b/flang/lib/Lower/OpenMP/Utils.cpp @@ -667,6 +667,25 @@ int64_t collectLoopRelatedInfo( numCollapse = collapseValue; } + collectLoopRelatedInfo(converter, currentLocation, eval, numCollapse, result, + iv); + return numCollapse; +} + +void collectLoopRelatedInfo( + lower::AbstractConverter &converter, mlir::Location currentLocation, + lower::pft::Evaluation &eval, int64_t numCollapse, + mlir::omp::LoopRelatedClauseOps &result, + llvm::SmallVectorImpl &iv) { + + fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); + + // Collect the loops to collapse. + lower::pft::Evaluation *doConstructEval = &eval.getFirstNestedEvaluation(); + if (doConstructEval->getIf()->IsDoConcurrent()) { + TODO(currentLocation, "Do Concurrent in Worksharing loop construct"); + } + // Collect sizes from tile directive if present. std::int64_t sizesLengthValue = 0l; if (auto *ompCons{eval.getIf()}) { @@ -676,7 +695,7 @@ int64_t collectLoopRelatedInfo( }); } - collapseValue = std::max(collapseValue, sizesLengthValue); + std::int64_t collapseValue = std::max(numCollapse, sizesLengthValue); std::size_t loopVarTypeSize = 0; do { lower::pft::Evaluation *doLoop = @@ -709,8 +728,6 @@ int64_t collectLoopRelatedInfo( } while (collapseValue > 0); convertLoopBounds(converter, currentLocation, result, loopVarTypeSize); - - return numCollapse; } } // namespace omp diff --git a/flang/lib/Lower/OpenMP/Utils.h b/flang/lib/Lower/OpenMP/Utils.h index 5f191d89ae205..69499f9c7b621 100644 --- a/flang/lib/Lower/OpenMP/Utils.h +++ b/flang/lib/Lower/OpenMP/Utils.h @@ -165,6 +165,13 @@ int64_t collectLoopRelatedInfo( mlir::omp::LoopRelatedClauseOps &result, llvm::SmallVectorImpl &iv); +void collectLoopRelatedInfo( + lower::AbstractConverter &converter, mlir::Location currentLocation, + lower::pft::Evaluation &eval, std::int64_t collapseValue, + // const omp::List &clauses, + mlir::omp::LoopRelatedClauseOps &result, + llvm::SmallVectorImpl &iv); + void collectTileSizesFromOpenMPConstruct( const parser::OpenMPConstruct *ompCons, llvm::SmallVectorImpl &tileSizes, diff --git a/flang/lib/Semantics/check-directive-structure.h b/flang/lib/Semantics/check-directive-structure.h index b1bf3e550aebc..bd78d3cfe91e7 100644 --- a/flang/lib/Semantics/check-directive-structure.h +++ b/flang/lib/Semantics/check-directive-structure.h @@ -383,7 +383,8 @@ class DirectiveStructureChecker : public virtual BaseChecker { const C &clause, const parser::ScalarIntConstantExpr &i); void RequiresPositiveParameter(const C &clause, - const parser::ScalarIntExpr &i, llvm::StringRef paramName = "parameter"); + const parser::ScalarIntExpr &i, llvm::StringRef paramName = "parameter", + bool allowZero = true); void OptionalConstantPositiveParameter( const C &clause, const std::optional &o); @@ -657,9 +658,9 @@ void DirectiveStructureChecker::SayNotMatching( template void DirectiveStructureChecker::RequiresPositiveParameter(const C &clause, - const parser::ScalarIntExpr &i, llvm::StringRef paramName) { + const parser::ScalarIntExpr &i, llvm::StringRef paramName, bool allowZero) { if (const auto v{GetIntValue(i)}) { - if (*v < 0) { + if (*v < (allowZero ? 0 : 1)) { context_.Say(GetContext().clauseSource, "The %s of the %s clause must be " "a positive integer expression"_err_en_US, diff --git a/flang/lib/Semantics/check-omp-structure.cpp b/flang/lib/Semantics/check-omp-structure.cpp index c39daef6b0ea9..ab182c7674062 100644 --- a/flang/lib/Semantics/check-omp-structure.cpp +++ b/flang/lib/Semantics/check-omp-structure.cpp @@ -2749,6 +2749,13 @@ void OmpStructureChecker::Enter(const parser::OmpClause &x) { } } +void OmpStructureChecker::Enter(const parser::OmpClause::Sizes &c) { + CheckAllowedClause(llvm::omp::Clause::OMPC_sizes); + for (const parser::Cosubscript &v : c.v) + RequiresPositiveParameter(llvm::omp::Clause::OMPC_sizes, v, + /*paramName=*/"parameter", /*allowZero=*/false); +} + // Following clauses do not have a separate node in parse-tree.h. CHECK_SIMPLE_CLAUSE(Absent, OMPC_absent) CHECK_SIMPLE_CLAUSE(Affinity, OMPC_affinity) @@ -2790,7 +2797,6 @@ CHECK_SIMPLE_CLAUSE(Notinbranch, OMPC_notinbranch) CHECK_SIMPLE_CLAUSE(Partial, OMPC_partial) CHECK_SIMPLE_CLAUSE(ProcBind, OMPC_proc_bind) CHECK_SIMPLE_CLAUSE(Simd, OMPC_simd) -CHECK_SIMPLE_CLAUSE(Sizes, OMPC_sizes) CHECK_SIMPLE_CLAUSE(Permutation, OMPC_permutation) CHECK_SIMPLE_CLAUSE(Uniform, OMPC_uniform) CHECK_SIMPLE_CLAUSE(Unknown, OMPC_unknown) diff --git a/flang/lib/Semantics/resolve-directives.cpp b/flang/lib/Semantics/resolve-directives.cpp index 5f2c9f676099c..2fa6cbc5d8167 100644 --- a/flang/lib/Semantics/resolve-directives.cpp +++ b/flang/lib/Semantics/resolve-directives.cpp @@ -2392,10 +2392,18 @@ void OmpAttributeVisitor::PrivatizeAssociatedLoopIndexAndCheckLoopLevel( void OmpAttributeVisitor::CheckAssocLoopLevel( std::int64_t level, const parser::OmpClause *clause) { if (clause && level != 0) { - context_.Say(clause->source, - "The value of the parameter in the COLLAPSE or ORDERED clause must" - " not be larger than the number of nested loops" - " following the construct."_err_en_US); + switch (clause->Id()) { + case llvm::omp::OMPC_sizes: + context_.Say(clause->source, + "The SIZES clause has more entries than there are nested canonical loops."_err_en_US); + break; + default: + context_.Say(clause->source, + "The value of the parameter in the COLLAPSE or ORDERED clause must" + " not be larger than the number of nested loops" + " following the construct."_err_en_US); + break; + } } } diff --git a/flang/test/Lower/OpenMP/tile01.f90 b/flang/test/Lower/OpenMP/tile01.f90 new file mode 100644 index 0000000000000..7603eee4b18d8 --- /dev/null +++ b/flang/test/Lower/OpenMP/tile01.f90 @@ -0,0 +1,58 @@ +! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=51 -o - %s | FileCheck %s + + +subroutine omp_tile01(lb, ub, inc) + integer res, i, lb, ub, inc + + !$omp tile sizes(4) + do i = lb, ub, inc + res = i + end do + !$omp end tile + +end subroutine omp_tile01 + + +! CHECK: func.func @_QPomp_tile01( +! CHECK: %[[ARG0:.*]]: !fir.ref {fir.bindc_name = "lb"}, +! CHECK: %[[ARG1:.*]]: !fir.ref {fir.bindc_name = "ub"}, +! CHECK: %[[ARG2:.*]]: !fir.ref {fir.bindc_name = "inc"}) { +! CHECK: %[[VAL_0:.*]] = fir.dummy_scope : !fir.dscope +! CHECK: %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFomp_tile01Ei"} +! CHECK: %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_1]] {uniq_name = "_QFomp_tile01Ei"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[ARG2]] dummy_scope %[[VAL_0]] {uniq_name = "_QFomp_tile01Einc"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %[[VAL_0]] {uniq_name = "_QFomp_tile01Elb"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_5:.*]] = fir.alloca i32 {bindc_name = "res", uniq_name = "_QFomp_tile01Eres"} +! CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]] {uniq_name = "_QFomp_tile01Eres"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_7:.*]]:2 = hlfir.declare %[[ARG1]] dummy_scope %[[VAL_0]] {uniq_name = "_QFomp_tile01Eub"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_8:.*]] = arith.constant 4 : i32 +! CHECK: %[[VAL_9:.*]] = fir.load %[[VAL_4]]#0 : !fir.ref +! CHECK: %[[VAL_10:.*]] = fir.load %[[VAL_7]]#0 : !fir.ref +! CHECK: %[[VAL_11:.*]] = fir.load %[[VAL_3]]#0 : !fir.ref +! CHECK: %[[VAL_12:.*]] = arith.constant 0 : i32 +! CHECK: %[[VAL_13:.*]] = arith.constant 1 : i32 +! CHECK: %[[VAL_14:.*]] = arith.cmpi slt, %[[VAL_11]], %[[VAL_12]] : i32 +! CHECK: %[[VAL_15:.*]] = arith.subi %[[VAL_12]], %[[VAL_11]] : i32 +! CHECK: %[[VAL_16:.*]] = arith.select %[[VAL_14]], %[[VAL_15]], %[[VAL_11]] : i32 +! CHECK: %[[VAL_17:.*]] = arith.select %[[VAL_14]], %[[VAL_10]], %[[VAL_9]] : i32 +! CHECK: %[[VAL_18:.*]] = arith.select %[[VAL_14]], %[[VAL_9]], %[[VAL_10]] : i32 +! CHECK: %[[VAL_19:.*]] = arith.subi %[[VAL_18]], %[[VAL_17]] overflow : i32 +! CHECK: %[[VAL_20:.*]] = arith.divui %[[VAL_19]], %[[VAL_16]] : i32 +! CHECK: %[[VAL_21:.*]] = arith.addi %[[VAL_20]], %[[VAL_13]] overflow : i32 +! CHECK: %[[VAL_22:.*]] = arith.cmpi slt, %[[VAL_18]], %[[VAL_17]] : i32 +! CHECK: %[[VAL_23:.*]] = arith.select %[[VAL_22]], %[[VAL_12]], %[[VAL_21]] : i32 +! CHECK: %[[VAL_24:.*]] = omp.new_cli +! CHECK: omp.canonical_loop(%[[VAL_24]]) %[[VAL_25:.*]] : i32 in range(%[[VAL_23]]) { +! CHECK: %[[VAL_26:.*]] = arith.muli %[[VAL_25]], %[[VAL_11]] : i32 +! CHECK: %[[VAL_27:.*]] = arith.addi %[[VAL_9]], %[[VAL_26]] : i32 +! CHECK: hlfir.assign %[[VAL_27]] to %[[VAL_2]]#0 : i32, !fir.ref +! CHECK: %[[VAL_28:.*]] = fir.load %[[VAL_2]]#0 : !fir.ref +! CHECK: hlfir.assign %[[VAL_28]] to %[[VAL_6]]#0 : i32, !fir.ref +! CHECK: omp.terminator +! CHECK: } +! CHECK: %[[VAL_29:.*]] = omp.new_cli +! CHECK: %[[VAL_30:.*]] = omp.new_cli +! CHECK: omp.tile (%[[VAL_29]], %[[VAL_30]]) <- (%[[VAL_24]]) sizes(%[[VAL_8]] : i32) +! CHECK: return +! CHECK: } + diff --git a/flang/test/Lower/OpenMP/tile02.f90 b/flang/test/Lower/OpenMP/tile02.f90 new file mode 100644 index 0000000000000..5df506d17ed05 --- /dev/null +++ b/flang/test/Lower/OpenMP/tile02.f90 @@ -0,0 +1,88 @@ +! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=51 -o - %s | FileCheck %s + + +subroutine omp_tile02(lb, ub, inc) + integer res, i, lb, ub, inc + + !$omp tile sizes(3,7) + do i = lb, ub, inc + do j = lb, ub, inc + res = i + j + end do + end do + !$omp end tile + +end subroutine omp_tile02 + + +! CHECK: func.func @_QPomp_tile02( +! CHECK: %[[ARG0:.*]]: !fir.ref {fir.bindc_name = "lb"}, +! CHECK: %[[ARG1:.*]]: !fir.ref {fir.bindc_name = "ub"}, +! CHECK: %[[ARG2:.*]]: !fir.ref {fir.bindc_name = "inc"}) { +! CHECK: %[[VAL_0:.*]] = fir.dummy_scope : !fir.dscope +! CHECK: %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFomp_tile02Ei"} +! CHECK: %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_1]] {uniq_name = "_QFomp_tile02Ei"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[ARG2]] dummy_scope %[[VAL_0]] {uniq_name = "_QFomp_tile02Einc"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_4:.*]] = fir.alloca i32 {bindc_name = "j", uniq_name = "_QFomp_tile02Ej"} +! CHECK: %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_4]] {uniq_name = "_QFomp_tile02Ej"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %[[VAL_0]] {uniq_name = "_QFomp_tile02Elb"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_7:.*]] = fir.alloca i32 {bindc_name = "res", uniq_name = "_QFomp_tile02Eres"} +! CHECK: %[[VAL_8:.*]]:2 = hlfir.declare %[[VAL_7]] {uniq_name = "_QFomp_tile02Eres"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_9:.*]]:2 = hlfir.declare %[[ARG1]] dummy_scope %[[VAL_0]] {uniq_name = "_QFomp_tile02Eub"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_10:.*]] = arith.constant 3 : i32 +! CHECK: %[[VAL_11:.*]] = arith.constant 7 : i32 +! CHECK: %[[VAL_12:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref +! CHECK: %[[VAL_13:.*]] = fir.load %[[VAL_9]]#0 : !fir.ref +! CHECK: %[[VAL_14:.*]] = fir.load %[[VAL_3]]#0 : !fir.ref +! CHECK: %[[VAL_15:.*]] = arith.constant 0 : i32 +! CHECK: %[[VAL_16:.*]] = arith.constant 1 : i32 +! CHECK: %[[VAL_17:.*]] = arith.cmpi slt, %[[VAL_14]], %[[VAL_15]] : i32 +! CHECK: %[[VAL_18:.*]] = arith.subi %[[VAL_15]], %[[VAL_14]] : i32 +! CHECK: %[[VAL_19:.*]] = arith.select %[[VAL_17]], %[[VAL_18]], %[[VAL_14]] : i32 +! CHECK: %[[VAL_20:.*]] = arith.select %[[VAL_17]], %[[VAL_13]], %[[VAL_12]] : i32 +! CHECK: %[[VAL_21:.*]] = arith.select %[[VAL_17]], %[[VAL_12]], %[[VAL_13]] : i32 +! CHECK: %[[VAL_22:.*]] = arith.subi %[[VAL_21]], %[[VAL_20]] overflow : i32 +! CHECK: %[[VAL_23:.*]] = arith.divui %[[VAL_22]], %[[VAL_19]] : i32 +! CHECK: %[[VAL_24:.*]] = arith.addi %[[VAL_23]], %[[VAL_16]] overflow : i32 +! CHECK: %[[VAL_25:.*]] = arith.cmpi slt, %[[VAL_21]], %[[VAL_20]] : i32 +! CHECK: %[[VAL_26:.*]] = arith.select %[[VAL_25]], %[[VAL_15]], %[[VAL_24]] : i32 +! CHECK: %[[VAL_27:.*]] = omp.new_cli +! CHECK: %[[VAL_28:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref +! CHECK: %[[VAL_29:.*]] = fir.load %[[VAL_9]]#0 : !fir.ref +! CHECK: %[[VAL_30:.*]] = fir.load %[[VAL_3]]#0 : !fir.ref +! CHECK: %[[VAL_31:.*]] = arith.constant 0 : i32 +! CHECK: %[[VAL_32:.*]] = arith.constant 1 : i32 +! CHECK: %[[VAL_33:.*]] = arith.cmpi slt, %[[VAL_30]], %[[VAL_31]] : i32 +! CHECK: %[[VAL_34:.*]] = arith.subi %[[VAL_31]], %[[VAL_30]] : i32 +! CHECK: %[[VAL_35:.*]] = arith.select %[[VAL_33]], %[[VAL_34]], %[[VAL_30]] : i32 +! CHECK: %[[VAL_36:.*]] = arith.select %[[VAL_33]], %[[VAL_29]], %[[VAL_28]] : i32 +! CHECK: %[[VAL_37:.*]] = arith.select %[[VAL_33]], %[[VAL_28]], %[[VAL_29]] : i32 +! CHECK: %[[VAL_38:.*]] = arith.subi %[[VAL_37]], %[[VAL_36]] overflow : i32 +! CHECK: %[[VAL_39:.*]] = arith.divui %[[VAL_38]], %[[VAL_35]] : i32 +! CHECK: %[[VAL_40:.*]] = arith.addi %[[VAL_39]], %[[VAL_32]] overflow : i32 +! CHECK: %[[VAL_41:.*]] = arith.cmpi slt, %[[VAL_37]], %[[VAL_36]] : i32 +! CHECK: %[[VAL_42:.*]] = arith.select %[[VAL_41]], %[[VAL_31]], %[[VAL_40]] : i32 +! CHECK: %[[VAL_43:.*]] = omp.new_cli +! CHECK: omp.canonical_loop(%[[VAL_27]]) %[[VAL_44:.*]] : i32 in range(%[[VAL_26]]) { +! CHECK: omp.canonical_loop(%[[VAL_43]]) %[[VAL_45:.*]] : i32 in range(%[[VAL_42]]) { +! CHECK: %[[VAL_46:.*]] = arith.muli %[[VAL_44]], %[[VAL_14]] : i32 +! CHECK: %[[VAL_47:.*]] = arith.addi %[[VAL_12]], %[[VAL_46]] : i32 +! CHECK: hlfir.assign %[[VAL_47]] to %[[VAL_2]]#0 : i32, !fir.ref +! CHECK: %[[VAL_48:.*]] = arith.muli %[[VAL_45]], %[[VAL_30]] : i32 +! CHECK: %[[VAL_49:.*]] = arith.addi %[[VAL_28]], %[[VAL_48]] : i32 +! CHECK: hlfir.assign %[[VAL_49]] to %[[VAL_5]]#0 : i32, !fir.ref +! CHECK: %[[VAL_50:.*]] = fir.load %[[VAL_2]]#0 : !fir.ref +! CHECK: %[[VAL_51:.*]] = fir.load %[[VAL_5]]#0 : !fir.ref +! CHECK: %[[VAL_52:.*]] = arith.addi %[[VAL_50]], %[[VAL_51]] : i32 +! CHECK: hlfir.assign %[[VAL_52]] to %[[VAL_8]]#0 : i32, !fir.ref +! CHECK: omp.terminator +! CHECK: } +! CHECK: omp.terminator +! CHECK: } +! CHECK: %[[VAL_53:.*]] = omp.new_cli +! CHECK: %[[VAL_54:.*]] = omp.new_cli +! CHECK: %[[VAL_55:.*]] = omp.new_cli +! CHECK: %[[VAL_56:.*]] = omp.new_cli +! CHECK: omp.tile (%[[VAL_53]], %[[VAL_55]], %[[VAL_54]], %[[VAL_56]]) <- (%[[VAL_27]], %[[VAL_43]]) sizes(%[[VAL_10]], %[[VAL_11]] : i32, i32) +! CHECK: return +! CHECK: } diff --git a/flang/test/Parser/OpenMP/loop-transformation-construct02.f90 b/flang/test/Parser/OpenMP/loop-transformation-construct02.f90 index a6af35a0111a3..a876c77a274b5 100644 --- a/flang/test/Parser/OpenMP/loop-transformation-construct02.f90 +++ b/flang/test/Parser/OpenMP/loop-transformation-construct02.f90 @@ -11,7 +11,7 @@ subroutine loop_transformation_construct !$omp do !$omp unroll - !$omp tile + !$omp tile sizes(2) do i = 1, I y(i) = y(i) * 5 end do @@ -34,7 +34,8 @@ subroutine loop_transformation_construct !CHECK-PARSE-NEXT: | | | | OpenMPLoopConstruct !CHECK-PARSE-NEXT: | | | | | OmpBeginLoopDirective !CHECK-PARSE-NEXT: | | | | | | OmpDirectiveName -> llvm::omp::Directive = tile -!CHECK-PARSE-NEXT: | | | | | | OmpClauseList -> +!CHECK-PARSE-NEXT: | | | | | | OmpClauseList -> OmpClause -> Sizes -> Scalar -> Integer -> Expr = '2_4' +!CHECK-PARSE-NEXT: | | | | | | | LiteralConstant -> IntLiteralConstant = '2' !CHECK-PARSE-NEXT: | | | | | | Flags = None !CHECK-PARSE-NEXT: | | | | | DoConstruct !CHECK-PARSE-NEXT: | | | | | | NonLabelDoStmt diff --git a/flang/test/Parser/OpenMP/tile-fail.f90 b/flang/test/Parser/OpenMP/tile-fail.f90 new file mode 100644 index 0000000000000..267ed0ad48437 --- /dev/null +++ b/flang/test/Parser/OpenMP/tile-fail.f90 @@ -0,0 +1,32 @@ +! RUN: split-file %s %t +! RUN: not %flang_fc1 -fsyntax-only -fopenmp %t/stray_end1.f90 2>&1 | FileCheck %t/stray_end1.f90 +! RUN: not %flang_fc1 -fsyntax-only -fopenmp %t/stray_end2.f90 2>&1 | FileCheck %t/stray_end2.f90 +! RUN: not %flang_fc1 -fsyntax-only -fopenmp %t/stray_begin.f90 2>&1 | FileCheck %t/stray_begin.f90 + + +!--- stray_end1.f90 +! Parser error + +subroutine stray_end1 + !CHECK: error: expected OpenMP construct + !$omp end tile +end subroutine + + +!--- stray_end2.f90 +! Semantic error + +subroutine stray_end2 + print * + !CHECK: error: The END TILE directive must follow the DO loop associated with the loop construct + !$omp end tile +end subroutine + + +!--- stray_begin.f90 + +subroutine stray_begin + !CHECK: error: A DO loop must follow the TILE directive + !$omp tile sizes(2) +end subroutine + diff --git a/flang/test/Parser/OpenMP/tile.f90 b/flang/test/Parser/OpenMP/tile.f90 index 2ea17471866a4..82004fd37a0f2 100644 --- a/flang/test/Parser/OpenMP/tile.f90 +++ b/flang/test/Parser/OpenMP/tile.f90 @@ -1,12 +1,12 @@ -! RUN: %flang_fc1 -fdebug-unparse -fopenmp %s | FileCheck --ignore-case %s -! RUN: %flang_fc1 -fdebug-dump-parse-tree -fopenmp %s | FileCheck --check-prefix="PARSE-TREE" %s +! RUN: %flang_fc1 -fdebug-unparse -fopenmp -fopenmp-version=51 %s | FileCheck --ignore-case %s +! RUN: %flang_fc1 -fdebug-dump-parse-tree -fopenmp -fopenmp-version=51 %s | FileCheck --check-prefix="PARSE-TREE" %s subroutine openmp_tiles(x) integer, intent(inout)::x -!CHECK: !$omp tile -!$omp tile +!CHECK: !$omp tile sizes(2_4) +!$omp tile sizes(2) !CHECK: do do x = 1, 100 call F1() @@ -17,7 +17,12 @@ subroutine openmp_tiles(x) !PARSE-TREE: OpenMPConstruct -> OpenMPLoopConstruct !PARSE-TREE: OmpBeginLoopDirective +!PARSE-TREE: OmpClauseList -> OmpClause -> Sizes -> Scalar -> Integer -> Expr = '2_4' +!PARSE-TREE: LiteralConstant -> IntLiteralConstant = '2' +!PARSE-TREE: Flags = None +!PARSE-TREE: DoConstruct +!PARSE-TREE: EndDoStmt +!PARSE-TREE: OmpEndLoopDirective !PARSE-TREE: OmpDirectiveName -> llvm::omp::Directive = tile END subroutine openmp_tiles - diff --git a/flang/test/Semantics/OpenMP/tile01.f90 b/flang/test/Semantics/OpenMP/tile01.f90 new file mode 100644 index 0000000000000..3d7b3f4f42e92 --- /dev/null +++ b/flang/test/Semantics/OpenMP/tile01.f90 @@ -0,0 +1,26 @@ +! Testing the Semantics of tile +!RUN: %python %S/../test_errors.py %s %flang -fopenmp -fopenmp-version=51 + + +subroutine missing_sizes + implicit none + integer i + + !ERROR: At least one of SIZES clause must appear on the TILE directive + !$omp tile + do i = 1, 42 + print *, i + end do +end subroutine + + +subroutine double_sizes + implicit none + integer i + + !ERROR: At most one SIZES clause can appear on the TILE directive + !$omp tile sizes(2) sizes(2) + do i = 1, 5 + print *, i + end do +end subroutine diff --git a/flang/test/Semantics/OpenMP/tile02.f90 b/flang/test/Semantics/OpenMP/tile02.f90 new file mode 100644 index 0000000000000..676796375353f --- /dev/null +++ b/flang/test/Semantics/OpenMP/tile02.f90 @@ -0,0 +1,15 @@ +! Testing the Semantics of tile +!RUN: %python %S/../test_errors.py %s %flang -fopenmp -fopenmp-version=51 + + +subroutine on_unroll + implicit none + integer i + + !ERROR: If a loop construct has been fully unrolled, it cannot then be tiled + !$omp tile sizes(2) + !$omp unroll + do i = 1, 5 + print *, i + end do +end subroutine diff --git a/flang/test/Semantics/OpenMP/tile03.f90 b/flang/test/Semantics/OpenMP/tile03.f90 new file mode 100644 index 0000000000000..e5c134638ac8d --- /dev/null +++ b/flang/test/Semantics/OpenMP/tile03.f90 @@ -0,0 +1,15 @@ +! Testing the Semantics of tile +!RUN: %python %S/../test_errors.py %s %flang -fopenmp -fopenmp-version=51 + + +subroutine loop_assoc + implicit none + integer :: i = 0 + + !$omp tile sizes(2) + !ERROR: The associated loop of a loop-associated directive cannot be a DO WHILE. + do while (i <= 10) + i = i + 1 + print *, i + end do +end subroutine diff --git a/flang/test/Semantics/OpenMP/tile04.f90 b/flang/test/Semantics/OpenMP/tile04.f90 new file mode 100644 index 0000000000000..2b503efbcf52b --- /dev/null +++ b/flang/test/Semantics/OpenMP/tile04.f90 @@ -0,0 +1,38 @@ +! Testing the Semantics of tile +!RUN: %python %S/../test_errors.py %s %flang -fopenmp -fopenmp-version=51 + + +subroutine threads_zero + implicit none + integer i + + !ERROR: The parameter of the NUM_THREADS clause must be a positive integer expression + !$omp parallel do num_threads(-1) + do i = 1, 5 + print *, i + end do +end subroutine + + +subroutine sizes_zero + implicit none + integer i + + !ERROR: The parameter of the SIZES clause must be a positive integer expression + !$omp tile sizes(0) + do i = 1, 5 + print *, i + end do +end subroutine + + +subroutine sizes_negative + implicit none + integer i + + !ERROR: The parameter of the SIZES clause must be a positive integer expression + !$omp tile sizes(-1) + do i = 1, 5 + print *, i + end do +end subroutine diff --git a/flang/test/Semantics/OpenMP/tile05.f90 b/flang/test/Semantics/OpenMP/tile05.f90 new file mode 100644 index 0000000000000..70c43811a5832 --- /dev/null +++ b/flang/test/Semantics/OpenMP/tile05.f90 @@ -0,0 +1,14 @@ +! Testing the Semantics of tile +!RUN: %python %S/../test_errors.py %s %flang -fopenmp -fopenmp-version=51 + + +subroutine insufficient_loops + implicit none + integer i + + !ERROR: The SIZES clause has more entries than there are nested canonical loops. + !$omp tile sizes(2, 2) + do i = 1, 5 + print *, i + end do +end subroutine diff --git a/flang/test/Semantics/OpenMP/tile06.f90 b/flang/test/Semantics/OpenMP/tile06.f90 new file mode 100644 index 0000000000000..52518d43f0554 --- /dev/null +++ b/flang/test/Semantics/OpenMP/tile06.f90 @@ -0,0 +1,44 @@ +! Testing the Semantics of tile +!RUN: %python %S/../test_errors.py %s %flang -fopenmp -fopenmp-version=51 + + +subroutine nonrectangular_loop_lb + implicit none + integer i, j + + !ERROR: Trip count must be computable and invariant + !$omp tile sizes(2,2) + do i = 1, 5 + do j = 1, i + print *, i, j + end do + end do +end subroutine + + +subroutine nonrectangular_loop_ub + implicit none + integer i, j + + !ERROR: Trip count must be computable and invariant + !$omp tile sizes(2,2) + do i = 1, 5 + do j = 1, i + print *, i, j + end do + end do +end subroutine + + +subroutine nonrectangular_loop_step + implicit none + integer i, j + + !ERROR: Trip count must be computable and invariant + !$omp tile sizes(2,2) + do i = 1, 5 + do j = 1, 42, i + print *, i, j + end do + end do +end subroutine diff --git a/flang/test/Semantics/OpenMP/tile07.f90 b/flang/test/Semantics/OpenMP/tile07.f90 new file mode 100644 index 0000000000000..70a6f5fc529a4 --- /dev/null +++ b/flang/test/Semantics/OpenMP/tile07.f90 @@ -0,0 +1,35 @@ +! Testing the Semantics of tile +!RUN: %python %S/../test_errors.py %s %flang -fopenmp -fopenmp-version=51 + + +subroutine non_perfectly_nested_loop_behind + implicit none + integer i, j + + !ERROR: Canonical loop nest must be perfectly nested. + !$omp tile sizes(2,2) + do i = 1, 5 + do j = 1, 42 + print *, j + end do + print *, i + end do +end subroutine + + +subroutine non_perfectly_nested_loop_before + implicit none + integer i, j + + !ERROR: The SIZES clause has more entries than there are nested canonical loops. + !$omp tile sizes(2,2) + do i = 1, 5 + print *, i + do j = 1, 42 + print *, j + end do + end do +end subroutine + + + diff --git a/flang/test/Semantics/OpenMP/tile08.f90 b/flang/test/Semantics/OpenMP/tile08.f90 new file mode 100644 index 0000000000000..f42805cb81b7d --- /dev/null +++ b/flang/test/Semantics/OpenMP/tile08.f90 @@ -0,0 +1,15 @@ +! Testing the Semantics of tile +!RUN: %python %S/../test_errors.py %s %flang -fopenmp -fopenmp-version=51 + + +subroutine do_concurrent + implicit none + integer i, j + + + !$omp tile sizes(2,2) + !ERROR: DO CONCURRENT loops cannot form part of a loop nest. + do concurrent (i = 1:42, j = 1:42) + print *, i, j + end do +end subroutine diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.td b/llvm/include/llvm/Frontend/OpenMP/OMP.td index 4d9b8f8a6c51e..2911b4c8df1b1 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMP.td +++ b/llvm/include/llvm/Frontend/OpenMP/OMP.td @@ -1324,6 +1324,9 @@ def OMP_Tile : Directive<[Spelling<"tile">]> { let allowedOnceClauses = [ VersionedClause, ]; + let requiredClauses = [ + VersionedClause, + ]; let association = AS_Loop; let category = CA_Executable; } diff --git a/openmp/runtime/test/transform/tile/intfor.f90 b/openmp/runtime/test/transform/tile/intfor.f90 new file mode 100644 index 0000000000000..dac0de6a99021 --- /dev/null +++ b/openmp/runtime/test/transform/tile/intfor.f90 @@ -0,0 +1,31 @@ +! This test checks lowering of the OpenMP tile directive +! It is done 3 times corresponding to every possible fraction of the last +! iteration before passing beyond UB. + +! RUN: %flang %flags %openmp_flags -fopenmp-version=51 -DUB=16 %s -o %t-ub16.exe +! RUN: %flang %flags %openmp_flags -fopenmp-version=51 -DUB=17 %s -o %t-ub17.exe +! RUN: %flang %flags %openmp_flags -fopenmp-version=51 -DUB=18 %s -o %t-ub18.exe +! RUN: %t-ub16.exe | FileCheck %s --match-full-lines +! RUN: %t-ub17.exe | FileCheck %s --match-full-lines +! RUN: %t-ub18.exe | FileCheck %s --match-full-lines + +program tile_intfor_1d + integer i + print *, 'do' + + !$OMP TILE SIZES(2) + do i=7, UB, 3 + print '("i=", I0)', i + end do + !$OMP END TILE + + print *, 'done' +end program + + +! CHECK: do +! CHECK-NEXT: i=7 +! CHECK-NEXT: i=10 +! CHECK-NEXT: i=13 +! CHECK-NEXT: i=16 +! CHECK-NEXT: done diff --git a/openmp/runtime/test/transform/tile/intfor_2d.f90 b/openmp/runtime/test/transform/tile/intfor_2d.f90 new file mode 100644 index 0000000000000..6bc90c768b8d3 --- /dev/null +++ b/openmp/runtime/test/transform/tile/intfor_2d.f90 @@ -0,0 +1,53 @@ +! This test checks lowering of OpenMP tile directive + +! RUN: %flang %flags %openmp_flags -fopenmp-version=51 %s -o %t.exe +! RUN: %t.exe | FileCheck %s --match-full-lines + + +program tile_intfor_2d + integer i, j + print *, 'do' + + !$OMP TILE SIZES(2,3) + do i = 7, 16, 3 + do j = 0, 4 + print '("i=", I0," j=", I0)', i, j + end do + end do + !$OMP END TILE + + print *, 'done' +end program + + +! CHECK: do + +! complete tile +! CHECK-NEXT: i=7 j=0 +! CHECK-NEXT: i=7 j=1 +! CHECK-NEXT: i=7 j=2 +! CHECK-NEXT: i=10 j=0 +! CHECK-NEXT: i=10 j=1 +! CHECK-NEXT: i=10 j=2 + +! partial tile +! CHECK-NEXT: i=7 j=3 +! CHECK-NEXT: i=7 j=4 +! CHECK-NEXT: i=10 j=3 +! CHECK-NEXT: i=10 j=4 + +! complete tile +! CHECK-NEXT: i=13 j=0 +! CHECK-NEXT: i=13 j=1 +! CHECK-NEXT: i=13 j=2 +! CHECK-NEXT: i=16 j=0 +! CHECK-NEXT: i=16 j=1 +! CHECK-NEXT: i=16 j=2 + +! partial tile +! CHECK-NEXT: i=13 j=3 +! CHECK-NEXT: i=13 j=4 +! CHECK-NEXT: i=16 j=3 +! CHECK-NEXT: i=16 j=4 + +! CHECK-NEXT: done diff --git a/openmp/runtime/test/transform/tile/intfor_2d_varsizes.F90 b/openmp/runtime/test/transform/tile/intfor_2d_varsizes.F90 new file mode 100644 index 0000000000000..4cb5adf606dd2 --- /dev/null +++ b/openmp/runtime/test/transform/tile/intfor_2d_varsizes.F90 @@ -0,0 +1,60 @@ +! This test checks lowering of OpenMP tile directive + +! RUN: %flang %flags %openmp_flags -fopenmp-version=51 %s -o %t.exe +! RUN: %t.exe | FileCheck %s --match-full-lines + +program tile_intfor_varsizes + integer i + + call kernel(7,17,3,2) + call kernel(7,17,3,3) + +end program + + +subroutine kernel(lb, ub, step, ts) + integer i, j, lb, ub, step, ts + + print *, 'do' + + !$OMP TILE SIZES(ts,ts) + do i = lb, ub, step + do j = 0, 2 + print '("i=", I0," j=", I0)', i, j + end do + end do + !$OMP END TILE + + print *, 'done' + +end subroutine + +! CHECK: do +! CHECK-NEXT: i=7 j=0 +! CHECK-NEXT: i=7 j=1 +! CHECK-NEXT: i=10 j=0 +! CHECK-NEXT: i=10 j=1 +! CHECK-NEXT: i=7 j=2 +! CHECK-NEXT: i=10 j=2 +! CHECK-NEXT: i=13 j=0 +! CHECK-NEXT: i=13 j=1 +! CHECK-NEXT: i=16 j=0 +! CHECK-NEXT: i=16 j=1 +! CHECK-NEXT: i=13 j=2 +! CHECK-NEXT: i=16 j=2 +! CHECK-NEXT: done + +! CHECK: do +! CHECK-NEXT: i=7 j=0 +! CHECK-NEXT: i=7 j=1 +! CHECK-NEXT: i=7 j=2 +! CHECK-NEXT: i=10 j=0 +! CHECK-NEXT: i=10 j=1 +! CHECK-NEXT: i=10 j=2 +! CHECK-NEXT: i=13 j=0 +! CHECK-NEXT: i=13 j=1 +! CHECK-NEXT: i=13 j=2 +! CHECK-NEXT: i=16 j=0 +! CHECK-NEXT: i=16 j=1 +! CHECK-NEXT: i=16 j=2 +! CHECK-NEXT: done From e7c0c5a1d64797acaddbbca50927545868f7256c Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Tue, 23 Sep 2025 16:03:43 +0200 Subject: [PATCH 06/12] dos2unix --- flang/test/Parser/OpenMP/tile-fail.f90 | 64 +++++++++++++------------- 1 file changed, 32 insertions(+), 32 deletions(-) diff --git a/flang/test/Parser/OpenMP/tile-fail.f90 b/flang/test/Parser/OpenMP/tile-fail.f90 index 267ed0ad48437..0a92e5bcb6570 100644 --- a/flang/test/Parser/OpenMP/tile-fail.f90 +++ b/flang/test/Parser/OpenMP/tile-fail.f90 @@ -1,32 +1,32 @@ -! RUN: split-file %s %t -! RUN: not %flang_fc1 -fsyntax-only -fopenmp %t/stray_end1.f90 2>&1 | FileCheck %t/stray_end1.f90 -! RUN: not %flang_fc1 -fsyntax-only -fopenmp %t/stray_end2.f90 2>&1 | FileCheck %t/stray_end2.f90 -! RUN: not %flang_fc1 -fsyntax-only -fopenmp %t/stray_begin.f90 2>&1 | FileCheck %t/stray_begin.f90 - - -!--- stray_end1.f90 -! Parser error - -subroutine stray_end1 - !CHECK: error: expected OpenMP construct - !$omp end tile -end subroutine - - -!--- stray_end2.f90 -! Semantic error - -subroutine stray_end2 - print * - !CHECK: error: The END TILE directive must follow the DO loop associated with the loop construct - !$omp end tile -end subroutine - - -!--- stray_begin.f90 - -subroutine stray_begin - !CHECK: error: A DO loop must follow the TILE directive - !$omp tile sizes(2) -end subroutine - +! RUN: split-file %s %t +! RUN: not %flang_fc1 -fsyntax-only -fopenmp %t/stray_end1.f90 2>&1 | FileCheck %t/stray_end1.f90 +! RUN: not %flang_fc1 -fsyntax-only -fopenmp %t/stray_end2.f90 2>&1 | FileCheck %t/stray_end2.f90 +! RUN: not %flang_fc1 -fsyntax-only -fopenmp %t/stray_begin.f90 2>&1 | FileCheck %t/stray_begin.f90 + + +!--- stray_end1.f90 +! Parser error + +subroutine stray_end1 + !CHECK: error: expected OpenMP construct + !$omp end tile +end subroutine + + +!--- stray_end2.f90 +! Semantic error + +subroutine stray_end2 + print * + !CHECK: error: The END TILE directive must follow the DO loop associated with the loop construct + !$omp end tile +end subroutine + + +!--- stray_begin.f90 + +subroutine stray_begin + !CHECK: error: A DO loop must follow the TILE directive + !$omp tile sizes(2) +end subroutine + From 493442453cbac2366aa89abde361f7badaad4948 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Tue, 23 Sep 2025 16:39:44 +0200 Subject: [PATCH 07/12] Fix symbol resolution --- flang/lib/Semantics/resolve-directives.cpp | 48 ++++++++++------------ 1 file changed, 22 insertions(+), 26 deletions(-) diff --git a/flang/lib/Semantics/resolve-directives.cpp b/flang/lib/Semantics/resolve-directives.cpp index 5f2c9f676099c..a0109324e546c 100644 --- a/flang/lib/Semantics/resolve-directives.cpp +++ b/flang/lib/Semantics/resolve-directives.cpp @@ -1975,7 +1975,10 @@ bool OmpAttributeVisitor::Pre(const parser::OpenMPLoopConstruct &x) { } } } + + // Must be done before iv privatization CheckPerfectNestAndRectangularLoop(x); + PrivatizeAssociatedLoopIndexAndCheckLoopLevel(x); ordCollapseLevel = GetNumAffectedLoopsFromLoopConstruct(x) + 1; return true; @@ -2172,37 +2175,29 @@ void OmpAttributeVisitor::CollectNumAffectedLoopsFromClauses( } void OmpAttributeVisitor::CheckPerfectNestAndRectangularLoop( - const parser::OpenMPLoopConstruct - &x) { // GetAssociatedLoopLevelFromClauses(clauseList); - auto &&dirContext = GetContext(); + const parser::OpenMPLoopConstruct &x) { + auto &dirContext = GetContext(); std::int64_t dirDepth{dirContext.associatedLoopLevel}; if (dirDepth <= 0) return; - Symbol::Flag ivDSA; - if (!llvm::omp::allSimdSet.test(GetContext().directive)) { - ivDSA = Symbol::Flag::OmpPrivate; - } else if (dirDepth == 1) { - ivDSA = Symbol::Flag::OmpLinear; - } else { - ivDSA = Symbol::Flag::OmpLastPrivate; - } - auto checkExprHasSymbols = [&](llvm::SmallVector &ivs, const parser::ScalarExpr *bound) { if (ivs.empty()) return; - - if (auto boundExpr{semantics::AnalyzeExpr(context_, *bound)}) { - semantics::UnorderedSymbolSet boundSyms = - evaluate::CollectSymbols(*boundExpr); - for (auto iv : ivs) { - if (boundSyms.count(*iv) != 0) { - // TODO: Point to occurence of iv in boundExpr, directiveSource as a - // note - context_.Say(dirContext.directiveSource, - "Trip count must be computable and invariant"_err_en_US); - } + auto boundExpr{semantics::AnalyzeExpr(context_, *bound)}; + if (!boundExpr) + return; + semantics::UnorderedSymbolSet boundSyms = + evaluate::CollectSymbols(*boundExpr); + if (boundSyms.empty()) + return; + for (Symbol *iv : ivs) { + if (boundSyms.count(*iv) != 0) { + // TODO: Point to occurence of iv in boundExpr, directiveSource as a + // note + context_.Say(dirContext.directiveSource, + "Trip count must be computable and invariant"_err_en_US); } } }; @@ -2217,8 +2212,9 @@ void OmpAttributeVisitor::CheckPerfectNestAndRectangularLoop( std::get_if>( innerMostNest)}) { innerMostLoop = &(innerLoop->value()); - } else + } else { break; + } } if (!innerMostNest) @@ -2228,7 +2224,7 @@ void OmpAttributeVisitor::CheckPerfectNestAndRectangularLoop( return; llvm::SmallVector ivs; - int curLevel = 0; + int curLevel{0}; const parser::DoConstruct *loop{outer}; while (true) { auto [iv, lb, ub, step] = GetLoopBounds(*loop); @@ -2240,7 +2236,7 @@ void OmpAttributeVisitor::CheckPerfectNestAndRectangularLoop( if (step) checkExprHasSymbols(ivs, step); if (iv) { - if (auto *symbol{ResolveOmp(*iv, ivDSA, currScope())}) + if (auto *symbol{currScope().FindSymbol(iv->source)}) ivs.push_back(symbol); } From 01a056f711c139ee0f1e26b19cf7513701f52992 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Wed, 24 Sep 2025 12:01:39 +0200 Subject: [PATCH 08/12] avoid compiler warning --- flang/lib/Lower/OpenMP/Utils.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp index 4a392d4944fc8..29cccbd1bfe5a 100644 --- a/flang/lib/Lower/OpenMP/Utils.cpp +++ b/flang/lib/Lower/OpenMP/Utils.cpp @@ -652,7 +652,6 @@ int64_t collectLoopRelatedInfo( mlir::omp::LoopRelatedClauseOps &result, llvm::SmallVectorImpl &iv) { int64_t numCollapse = 1; - fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); // Collect the loops to collapse. lower::pft::Evaluation *doConstructEval = &eval.getFirstNestedEvaluation(); From 7655a11ac10f0cca877e56a73e2948a265db00ab Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Wed, 24 Sep 2025 12:45:18 +0200 Subject: [PATCH 09/12] Avoid structured binding capture to appease compiler --- mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp index d3cc7e55ae155..f681b0346f489 100644 --- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp +++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp @@ -3300,6 +3300,9 @@ void NewCliOp::getAsmResultNames(OpAsmSetValueNameFn setNameFn) { Value result = getResult(); auto [newCli, gen, cons] = decodeCli(result); + // Structured binding `gen` cannot be captured in lambdas before C++20 + OpOperand *generator = gen; + // Derive the CLI variable name from its generator: // * "canonloop" for omp.canonical_loop // * custom name for loop transformation generatees @@ -3324,7 +3327,7 @@ void NewCliOp::getAsmResultNames(OpAsmSetValueNameFn setNameFn) { unsigned firstGrid = generateesFirst; unsigned firstIntratile = generateesFirst + generateesCount / 2; unsigned end = generateesFirst + generateesCount; - unsigned opnum = gen->getOperandNumber(); + unsigned opnum = generator->getOperandNumber(); // In the OpenMP apply and looprange clauses, indices are 1-based if (firstGrid <= opnum && opnum < firstIntratile) { unsigned gridnum = opnum - firstGrid + 1; From f078a8e81f28f88083d0156b91414fddd5a13e8a Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Wed, 1 Oct 2025 11:23:52 +0200 Subject: [PATCH 10/12] regexify varnames --- .../test/Target/LLVMIR/openmp-cli-tile01.mlir | 127 ++++----- .../test/Target/LLVMIR/openmp-cli-tile02.mlir | 262 +++++++++--------- 2 files changed, 188 insertions(+), 201 deletions(-) diff --git a/mlir/test/Target/LLVMIR/openmp-cli-tile01.mlir b/mlir/test/Target/LLVMIR/openmp-cli-tile01.mlir index 4ac4f02103e8c..0d559b69a3ad1 100644 --- a/mlir/test/Target/LLVMIR/openmp-cli-tile01.mlir +++ b/mlir/test/Target/LLVMIR/openmp-cli-tile01.mlir @@ -1,5 +1,4 @@ -// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s - +// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s --enable-var-scope llvm.func @tile_trivial_loop(%baseptr: !llvm.ptr, %tc: i32, %ts: i32) -> () { @@ -15,87 +14,81 @@ llvm.func @tile_trivial_loop(%baseptr: !llvm.ptr, %tc: i32, %ts: i32) -> () { } -// CHECK: ; ModuleID = 'LLVMDialectModule' -// CHECK-NEXT: source_filename = "LLVMDialectModule" -// CHECK-EMPTY: -// CHECK-NEXT: define void @tile_trivial_loop(ptr %0, i32 %1, i32 %2) { -// CHECK-NEXT: br label %omp_omp.loop.preheader -// CHECK-EMPTY: -// CHECK-NEXT: omp_omp.loop.preheader: ; preds = %3 -// CHECK-NEXT: %4 = udiv i32 %1, %2 -// CHECK-NEXT: %5 = urem i32 %1, %2 -// CHECK-NEXT: %6 = icmp ne i32 %5, 0 -// CHECK-NEXT: %7 = zext i1 %6 to i32 -// CHECK-NEXT: %omp_floor0.tripcount = add nuw i32 %4, %7 -// CHECK-NEXT: br label %omp_floor0.preheader -// CHECK-EMPTY: -// CHECK-NEXT: omp_floor0.preheader: ; preds = %omp_omp.loop.preheader -// CHECK-NEXT: br label %omp_floor0.header +// CHECK-LABEL: define void @tile_trivial_loop( +// CHECK-SAME: ptr %[[TMP0:.+]], i32 %[[TMP1:.+]], i32 %[[TMP2:.+]]) { +// CHECK-NEXT: br label %[[OMP_OMP_LOOP_PREHEADER:.+]] // CHECK-EMPTY: -// CHECK-NEXT: omp_floor0.header: ; preds = %omp_floor0.inc, %omp_floor0.preheader -// CHECK-NEXT: %omp_floor0.iv = phi i32 [ 0, %omp_floor0.preheader ], [ %omp_floor0.next, %omp_floor0.inc ] -// CHECK-NEXT: br label %omp_floor0.cond +// CHECK-NEXT: [[OMP_OMP_LOOP_PREHEADER]]: +// CHECK-NEXT: %[[TMP4:.+]] = udiv i32 %[[TMP1:.+]], %[[TMP2:.+]] +// CHECK-NEXT: %[[TMP5:.+]] = urem i32 %[[TMP1:.+]], %[[TMP2:.+]] +// CHECK-NEXT: %[[TMP6:.+]] = icmp ne i32 %[[TMP5:.+]], 0 +// CHECK-NEXT: %[[TMP7:.+]] = zext i1 %[[TMP6:.+]] to i32 +// CHECK-NEXT: %[[OMP_FLOOR0_TRIPCOUNT:.+]] = add nuw i32 %[[TMP4:.+]], %[[TMP7:.+]] +// CHECK-NEXT: br label %[[OMP_FLOOR0_PREHEADER:.+]] // CHECK-EMPTY: -// CHECK-NEXT: omp_floor0.cond: ; preds = %omp_floor0.header -// CHECK-NEXT: %omp_floor0.cmp = icmp ult i32 %omp_floor0.iv, %omp_floor0.tripcount -// CHECK-NEXT: br i1 %omp_floor0.cmp, label %omp_floor0.body, label %omp_floor0.exit +// CHECK-NEXT: [[OMP_FLOOR0_PREHEADER]]: +// CHECK-NEXT: br label %[[OMP_FLOOR0_HEADER:.+]] // CHECK-EMPTY: -// CHECK-NEXT: omp_floor0.body: ; preds = %omp_floor0.cond -// CHECK-NEXT: %8 = icmp eq i32 %omp_floor0.iv, %4 -// CHECK-NEXT: %9 = select i1 %8, i32 %5, i32 %2 -// CHECK-NEXT: br label %omp_tile0.preheader +// CHECK-NEXT: [[OMP_FLOOR0_HEADER]]: +// CHECK-NEXT: %[[OMP_FLOOR0_IV:.+]] = phi i32 [ 0, %[[OMP_FLOOR0_PREHEADER:.+]] ], [ %[[OMP_FLOOR0_NEXT:.+]], %[[OMP_FLOOR0_INC:.+]] ] +// CHECK-NEXT: br label %[[OMP_FLOOR0_COND:.+]] // CHECK-EMPTY: -// CHECK-NEXT: omp_tile0.preheader: ; preds = %omp_floor0.body -// CHECK-NEXT: br label %omp_tile0.header +// CHECK-NEXT: [[OMP_FLOOR0_COND]]: +// CHECK-NEXT: %[[OMP_FLOOR0_CMP:.+]] = icmp ult i32 %[[OMP_FLOOR0_IV:.+]], %[[OMP_FLOOR0_TRIPCOUNT:.+]] +// CHECK-NEXT: br i1 %[[OMP_FLOOR0_CMP:.+]], label %[[OMP_FLOOR0_BODY:.+]], label %[[OMP_FLOOR0_EXIT:.+]] // CHECK-EMPTY: -// CHECK-NEXT: omp_tile0.header: ; preds = %omp_tile0.inc, %omp_tile0.preheader -// CHECK-NEXT: %omp_tile0.iv = phi i32 [ 0, %omp_tile0.preheader ], [ %omp_tile0.next, %omp_tile0.inc ] -// CHECK-NEXT: br label %omp_tile0.cond +// CHECK-NEXT: [[OMP_FLOOR0_BODY]]: +// CHECK-NEXT: %[[TMP8:.+]] = icmp eq i32 %[[OMP_FLOOR0_IV:.+]], %[[TMP4:.+]] +// CHECK-NEXT: %[[TMP9:.+]] = select i1 %[[TMP8:.+]], i32 %[[TMP5:.+]], i32 %[[TMP2:.+]] +// CHECK-NEXT: br label %[[OMP_TILE0_PREHEADER:.+]] // CHECK-EMPTY: -// CHECK-NEXT: omp_tile0.cond: ; preds = %omp_tile0.header -// CHECK-NEXT: %omp_tile0.cmp = icmp ult i32 %omp_tile0.iv, %9 -// CHECK-NEXT: br i1 %omp_tile0.cmp, label %omp_tile0.body, label %omp_tile0.exit +// CHECK-NEXT: [[OMP_TILE0_PREHEADER]]: +// CHECK-NEXT: br label %[[OMP_TILE0_HEADER:.+]] // CHECK-EMPTY: -// CHECK-NEXT: omp_tile0.body: ; preds = %omp_tile0.cond -// CHECK-NEXT: %10 = mul nuw i32 %2, %omp_floor0.iv -// CHECK-NEXT: %11 = add nuw i32 %10, %omp_tile0.iv -// CHECK-NEXT: br label %omp_omp.loop.body +// CHECK-NEXT: [[OMP_TILE0_HEADER]]: +// CHECK-NEXT: %[[OMP_TILE0_IV:.+]] = phi i32 [ 0, %[[OMP_TILE0_PREHEADER:.+]] ], [ %[[OMP_TILE0_NEXT:.+]], %[[OMP_TILE0_INC:.+]] ] +// CHECK-NEXT: br label %[[OMP_TILE0_COND:.+]] // CHECK-EMPTY: -// CHECK-NEXT: omp_omp.loop.body: ; preds = %omp_tile0.body -// CHECK-NEXT: br label %omp.loop.region +// CHECK-NEXT: [[OMP_TILE0_COND]]: +// CHECK-NEXT: %[[OMP_TILE0_CMP:.+]] = icmp ult i32 %[[OMP_TILE0_IV:.+]], %[[TMP9:.+]] +// CHECK-NEXT: br i1 %[[OMP_TILE0_CMP:.+]], label %[[OMP_TILE0_BODY:.+]], label %[[OMP_TILE0_EXIT:.+]] // CHECK-EMPTY: -// CHECK-NEXT: omp.loop.region: ; preds = %omp_omp.loop.body -// CHECK-NEXT: %12 = getelementptr inbounds float, ptr %0, i32 %11 -// CHECK-NEXT: store float 4.200000e+01, ptr %12, align 4 -// CHECK-NEXT: br label %omp.region.cont +// CHECK-NEXT: [[OMP_TILE0_BODY]]: +// CHECK-NEXT: %[[TMP10:.+]] = mul nuw i32 %[[TMP2:.+]], %[[OMP_FLOOR0_IV:.+]] +// CHECK-NEXT: %[[TMP11:.+]] = add nuw i32 %[[TMP10:.+]], %[[OMP_TILE0_IV:.+]] +// CHECK-NEXT: br label %[[OMP_OMP_LOOP_BODY:.+]] // CHECK-EMPTY: -// CHECK-NEXT: omp.region.cont: ; preds = %omp.loop.region -// CHECK-NEXT: br label %omp_tile0.inc +// CHECK-NEXT: [[OMP_OMP_LOOP_BODY]]: +// CHECK-NEXT: br label %[[OMP_LOOP_REGION:.+]] // CHECK-EMPTY: -// CHECK-NEXT: omp_tile0.inc: ; preds = %omp.region.cont -// CHECK-NEXT: %omp_tile0.next = add nuw i32 %omp_tile0.iv, 1 -// CHECK-NEXT: br label %omp_tile0.header +// CHECK-NEXT: [[OMP_LOOP_REGION]]: +// CHECK-NEXT: %[[TMP12:.+]] = getelementptr inbounds float, ptr %[[TMP0:.+]], i32 %[[TMP11:.+]] +// CHECK-NEXT: store float 4.200000e+01, ptr %[[TMP12:.+]], align 4 +// CHECK-NEXT: br label %[[OMP_REGION_CONT:.+]] // CHECK-EMPTY: -// CHECK-NEXT: omp_tile0.exit: ; preds = %omp_tile0.cond -// CHECK-NEXT: br label %omp_tile0.after +// CHECK-NEXT: [[OMP_REGION_CONT]]: +// CHECK-NEXT: br label %[[OMP_TILE0_INC:.+]] // CHECK-EMPTY: -// CHECK-NEXT: omp_tile0.after: ; preds = %omp_tile0.exit -// CHECK-NEXT: br label %omp_floor0.inc +// CHECK-NEXT: [[OMP_TILE0_INC]]: +// CHECK-NEXT: %[[OMP_TILE0_NEXT:.+]] = add nuw i32 %[[OMP_TILE0_IV:.+]], 1 +// CHECK-NEXT: br label %[[OMP_TILE0_HEADER:.+]] // CHECK-EMPTY: -// CHECK-NEXT: omp_floor0.inc: ; preds = %omp_tile0.after -// CHECK-NEXT: %omp_floor0.next = add nuw i32 %omp_floor0.iv, 1 -// CHECK-NEXT: br label %omp_floor0.header +// CHECK-NEXT: [[OMP_TILE0_EXIT]]: +// CHECK-NEXT: br label %[[OMP_TILE0_AFTER:.+]] // CHECK-EMPTY: -// CHECK-NEXT: omp_floor0.exit: ; preds = %omp_floor0.cond -// CHECK-NEXT: br label %omp_floor0.after +// CHECK-NEXT: [[OMP_TILE0_AFTER]]: +// CHECK-NEXT: br label %[[OMP_FLOOR0_INC:.+]] // CHECK-EMPTY: -// CHECK-NEXT: omp_floor0.after: ; preds = %omp_floor0.exit -// CHECK-NEXT: br label %omp_omp.loop.after +// CHECK-NEXT: [[OMP_FLOOR0_INC]]: +// CHECK-NEXT: %[[OMP_FLOOR0_NEXT:.+]] = add nuw i32 %[[OMP_FLOOR0_IV:.+]], 1 +// CHECK-NEXT: br label %[[OMP_FLOOR0_HEADER:.+]] // CHECK-EMPTY: -// CHECK-NEXT: omp_omp.loop.after: ; preds = %omp_floor0.after -// CHECK-NEXT: ret void -// CHECK-NEXT: } +// CHECK-NEXT: [[OMP_FLOOR0_EXIT]]: +// CHECK-NEXT: br label %[[OMP_FLOOR0_AFTER:.+]] // CHECK-EMPTY: -// CHECK-NEXT: !llvm.module.flags = !{!0} +// CHECK-NEXT: [[OMP_FLOOR0_AFTER]]: +// CHECK-NEXT: br label %[[OMP_OMP_LOOP_AFTER:.+]] // CHECK-EMPTY: -// CHECK-NEXT: !0 = !{i32 2, !"Debug Info Version", i32 3} +// CHECK-NEXT: [[OMP_OMP_LOOP_AFTER]]: +// CHECK-NEXT: ret void +// CHECK-NEXT: } diff --git a/mlir/test/Target/LLVMIR/openmp-cli-tile02.mlir b/mlir/test/Target/LLVMIR/openmp-cli-tile02.mlir index 6fad81cd0c299..22c2973164159 100644 --- a/mlir/test/Target/LLVMIR/openmp-cli-tile02.mlir +++ b/mlir/test/Target/LLVMIR/openmp-cli-tile02.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s +// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s --enable-var-scope llvm.func @tile_2d_loop(%baseptr: !llvm.ptr, %tc1: i32, %tc2: i32, %ts1: i32, %ts2: i32) -> () { @@ -19,172 +19,166 @@ llvm.func @tile_2d_loop(%baseptr: !llvm.ptr, %tc1: i32, %tc2: i32, %ts1: i32, %t } -// CHECK: ; ModuleID = 'LLVMDialectModule' -// CHECK-NEXT: source_filename = "LLVMDialectModule" +// CHECK-LABEL: define void @tile_2d_loop( +// CHECK-SAME: ptr %[[TMP0:.+]], i32 %[[TMP1:.+]], i32 %[[TMP2:.+]], i32 %[[TMP3:.+]], i32 %[[TMP4:.+]]) { +// CHECK-NEXT: br label %[[OMP_OMP_LOOP_PREHEADER:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_OMP_LOOP_PREHEADER]]: +// CHECK-NEXT: %[[TMP6:.+]] = udiv i32 %[[TMP1:.+]], %[[TMP3:.+]] +// CHECK-NEXT: %[[TMP7:.+]] = urem i32 %[[TMP1:.+]], %[[TMP3:.+]] +// CHECK-NEXT: %[[TMP8:.+]] = icmp ne i32 %[[TMP7:.+]], 0 +// CHECK-NEXT: %[[TMP9:.+]] = zext i1 %[[TMP8:.+]] to i32 +// CHECK-NEXT: %[[OMP_FLOOR0_TRIPCOUNT:.+]] = add nuw i32 %[[TMP6:.+]], %[[TMP9:.+]] +// CHECK-NEXT: %[[TMP10:.+]] = udiv i32 %[[TMP2:.+]], %[[TMP4:.+]] +// CHECK-NEXT: %[[TMP11:.+]] = urem i32 %[[TMP2:.+]], %[[TMP4:.+]] +// CHECK-NEXT: %[[TMP12:.+]] = icmp ne i32 %[[TMP11:.+]], 0 +// CHECK-NEXT: %[[TMP13:.+]] = zext i1 %[[TMP12:.+]] to i32 +// CHECK-NEXT: %[[OMP_FLOOR1_TRIPCOUNT:.+]] = add nuw i32 %[[TMP10:.+]], %[[TMP13:.+]] +// CHECK-NEXT: br label %[[OMP_FLOOR0_PREHEADER:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_OMP_LOOP_HEADER:.+]]: +// CHECK-NEXT: %[[OMP_OMP_LOOP_IV:.+]] = phi i32 [ %[[OMP_OMP_LOOP_NEXT:.+]], %[[OMP_OMP_LOOP_INC:.+]] ] +// CHECK-NEXT: br label %[[OMP_OMP_LOOP_COND:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_OMP_LOOP_COND]]: +// CHECK-NEXT: %[[OMP_OMP_LOOP_CMP:.+]] = icmp ult i32 %[[TMP19:.+]], %[[TMP1:.+]] +// CHECK-NEXT: br i1 %[[OMP_OMP_LOOP_CMP:.+]], label %[[OMP_OMP_LOOP_BODY:.+]], label %[[OMP_OMP_LOOP_EXIT:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_OMP_LOOP_BODY]]: +// CHECK-NEXT: br label %[[OMP_LOOP_REGION:.+]] +// CHECK-EMPTY: +// CHECK-NEXT: [[OMP_LOOP_REGION]]: +// CHECK-NEXT: br label %[[OMP_OMP_LOOP_PREHEADER1:.+]] // CHECK-EMPTY: -// CHECK-NEXT: define void @tile_2d_loop(ptr %0, i32 %1, i32 %2, i32 %3, i32 %4) { -// CHECK-NEXT: br label %omp_omp.loop.preheader -// CHECK-EMPTY: -// CHECK-NEXT: omp_omp.loop.preheader: ; preds = %5 -// CHECK-NEXT: %6 = udiv i32 %1, %3 -// CHECK-NEXT: %7 = urem i32 %1, %3 -// CHECK-NEXT: %8 = icmp ne i32 %7, 0 -// CHECK-NEXT: %9 = zext i1 %8 to i32 -// CHECK-NEXT: %omp_floor0.tripcount = add nuw i32 %6, %9 -// CHECK-NEXT: %10 = udiv i32 %2, %4 -// CHECK-NEXT: %11 = urem i32 %2, %4 -// CHECK-NEXT: %12 = icmp ne i32 %11, 0 -// CHECK-NEXT: %13 = zext i1 %12 to i32 -// CHECK-NEXT: %omp_floor1.tripcount = add nuw i32 %10, %13 -// CHECK-NEXT: br label %omp_floor0.preheader +// CHECK-NEXT: [[OMP_OMP_LOOP_PREHEADER1]]: +// CHECK-NEXT: br label %[[OMP_OMP_LOOP_BODY4:.+]] // CHECK-EMPTY: -// CHECK-NEXT: omp_omp.loop.header: ; preds = %omp_omp.loop.inc -// CHECK-NEXT: %omp_omp.loop.iv = phi i32 [ %omp_omp.loop.next, %omp_omp.loop.inc ] -// CHECK-NEXT: br label %omp_omp.loop.cond +// CHECK-NEXT: [[OMP_FLOOR0_PREHEADER]]: +// CHECK-NEXT: br label %[[OMP_FLOOR0_HEADER:.+]] // CHECK-EMPTY: -// CHECK-NEXT: omp_omp.loop.cond: ; preds = %omp_omp.loop.header -// CHECK-NEXT: %omp_omp.loop.cmp = icmp ult i32 %19, %1 -// CHECK-NEXT: br i1 %omp_omp.loop.cmp, label %omp_omp.loop.body, label %omp_omp.loop.exit +// CHECK-NEXT: [[OMP_FLOOR0_HEADER]]: +// CHECK-NEXT: %[[OMP_FLOOR0_IV:.+]] = phi i32 [ 0, %[[OMP_FLOOR0_PREHEADER:.+]] ], [ %[[OMP_FLOOR0_NEXT:.+]], %[[OMP_FLOOR0_INC:.+]] ] +// CHECK-NEXT: br label %[[OMP_FLOOR0_COND:.+]] // CHECK-EMPTY: -// CHECK-NEXT: omp_omp.loop.body: ; preds = %omp_tile1.body, %omp_omp.loop.cond -// CHECK-NEXT: br label %omp.loop.region +// CHECK-NEXT: [[OMP_FLOOR0_COND]]: +// CHECK-NEXT: %[[OMP_FLOOR0_CMP:.+]] = icmp ult i32 %[[OMP_FLOOR0_IV:.+]], %[[OMP_FLOOR0_TRIPCOUNT:.+]] +// CHECK-NEXT: br i1 %[[OMP_FLOOR0_CMP:.+]], label %[[OMP_FLOOR0_BODY:.+]], label %[[OMP_FLOOR0_EXIT:.+]] // CHECK-EMPTY: -// CHECK-NEXT: omp.loop.region: ; preds = %omp_omp.loop.body -// CHECK-NEXT: br label %omp_omp.loop.preheader1 +// CHECK-NEXT: [[OMP_FLOOR0_BODY]]: +// CHECK-NEXT: br label %[[OMP_FLOOR1_PREHEADER:.+]] // CHECK-EMPTY: -// CHECK-NEXT: omp_omp.loop.preheader1: ; preds = %omp.loop.region -// CHECK-NEXT: br label %omp_omp.loop.body4 +// CHECK-NEXT: [[OMP_FLOOR1_PREHEADER]]: +// CHECK-NEXT: br label %[[OMP_FLOOR1_HEADER:.+]] // CHECK-EMPTY: -// CHECK-NEXT: omp_floor0.preheader: ; preds = %omp_omp.loop.preheader -// CHECK-NEXT: br label %omp_floor0.header +// CHECK-NEXT: [[OMP_FLOOR1_HEADER]]: +// CHECK-NEXT: %[[OMP_FLOOR1_IV:.+]] = phi i32 [ 0, %[[OMP_FLOOR1_PREHEADER:.+]] ], [ %[[OMP_FLOOR1_NEXT:.+]], %[[OMP_FLOOR1_INC:.+]] ] +// CHECK-NEXT: br label %[[OMP_FLOOR1_COND:.+]] // CHECK-EMPTY: -// CHECK-NEXT: omp_floor0.header: ; preds = %omp_floor0.inc, %omp_floor0.preheader -// CHECK-NEXT: %omp_floor0.iv = phi i32 [ 0, %omp_floor0.preheader ], [ %omp_floor0.next, %omp_floor0.inc ] -// CHECK-NEXT: br label %omp_floor0.cond +// CHECK-NEXT: [[OMP_FLOOR1_COND]]: +// CHECK-NEXT: %[[OMP_FLOOR1_CMP:.+]] = icmp ult i32 %[[OMP_FLOOR1_IV:.+]], %[[OMP_FLOOR1_TRIPCOUNT:.+]] +// CHECK-NEXT: br i1 %[[OMP_FLOOR1_CMP:.+]], label %[[OMP_FLOOR1_BODY:.+]], label %[[OMP_FLOOR1_EXIT:.+]] // CHECK-EMPTY: -// CHECK-NEXT: omp_floor0.cond: ; preds = %omp_floor0.header -// CHECK-NEXT: %omp_floor0.cmp = icmp ult i32 %omp_floor0.iv, %omp_floor0.tripcount -// CHECK-NEXT: br i1 %omp_floor0.cmp, label %omp_floor0.body, label %omp_floor0.exit +// CHECK-NEXT: [[OMP_FLOOR1_BODY]]: +// CHECK-NEXT: %[[TMP14:.+]] = icmp eq i32 %[[OMP_FLOOR0_IV:.+]], %[[TMP6:.+]] +// CHECK-NEXT: %[[TMP15:.+]] = select i1 %[[TMP14:.+]], i32 %[[TMP7:.+]], i32 %[[TMP3:.+]] +// CHECK-NEXT: %[[TMP16:.+]] = icmp eq i32 %[[OMP_FLOOR1_IV:.+]], %[[TMP10:.+]] +// CHECK-NEXT: %[[TMP17:.+]] = select i1 %[[TMP16:.+]], i32 %[[TMP11:.+]], i32 %[[TMP4:.+]] +// CHECK-NEXT: br label %[[OMP_TILE0_PREHEADER:.+]] // CHECK-EMPTY: -// CHECK-NEXT: omp_floor0.body: ; preds = %omp_floor0.cond -// CHECK-NEXT: br label %omp_floor1.preheader +// CHECK-NEXT: [[OMP_TILE0_PREHEADER]]: +// CHECK-NEXT: br label %[[OMP_TILE0_HEADER:.+]] // CHECK-EMPTY: -// CHECK-NEXT: omp_floor1.preheader: ; preds = %omp_floor0.body -// CHECK-NEXT: br label %omp_floor1.header +// CHECK-NEXT: [[OMP_TILE0_HEADER]]: +// CHECK-NEXT: %[[OMP_TILE0_IV:.+]] = phi i32 [ 0, %[[OMP_TILE0_PREHEADER:.+]] ], [ %[[OMP_TILE0_NEXT:.+]], %[[OMP_TILE0_INC:.+]] ] +// CHECK-NEXT: br label %[[OMP_TILE0_COND:.+]] // CHECK-EMPTY: -// CHECK-NEXT: omp_floor1.header: ; preds = %omp_floor1.inc, %omp_floor1.preheader -// CHECK-NEXT: %omp_floor1.iv = phi i32 [ 0, %omp_floor1.preheader ], [ %omp_floor1.next, %omp_floor1.inc ] -// CHECK-NEXT: br label %omp_floor1.cond +// CHECK-NEXT: [[OMP_TILE0_COND]]: +// CHECK-NEXT: %[[OMP_TILE0_CMP:.+]] = icmp ult i32 %[[OMP_TILE0_IV:.+]], %[[TMP15:.+]] +// CHECK-NEXT: br i1 %[[OMP_TILE0_CMP:.+]], label %[[OMP_TILE0_BODY:.+]], label %[[OMP_TILE0_EXIT:.+]] // CHECK-EMPTY: -// CHECK-NEXT: omp_floor1.cond: ; preds = %omp_floor1.header -// CHECK-NEXT: %omp_floor1.cmp = icmp ult i32 %omp_floor1.iv, %omp_floor1.tripcount -// CHECK-NEXT: br i1 %omp_floor1.cmp, label %omp_floor1.body, label %omp_floor1.exit +// CHECK-NEXT: [[OMP_TILE0_BODY]]: +// CHECK-NEXT: br label %[[OMP_TILE1_PREHEADER:.+]] // CHECK-EMPTY: -// CHECK-NEXT: omp_floor1.body: ; preds = %omp_floor1.cond -// CHECK-NEXT: %14 = icmp eq i32 %omp_floor0.iv, %6 -// CHECK-NEXT: %15 = select i1 %14, i32 %7, i32 %3 -// CHECK-NEXT: %16 = icmp eq i32 %omp_floor1.iv, %10 -// CHECK-NEXT: %17 = select i1 %16, i32 %11, i32 %4 -// CHECK-NEXT: br label %omp_tile0.preheader +// CHECK-NEXT: [[OMP_TILE1_PREHEADER]]: +// CHECK-NEXT: br label %[[OMP_TILE1_HEADER:.+]] // CHECK-EMPTY: -// CHECK-NEXT: omp_tile0.preheader: ; preds = %omp_floor1.body -// CHECK-NEXT: br label %omp_tile0.header +// CHECK-NEXT: [[OMP_TILE1_HEADER]]: +// CHECK-NEXT: %[[OMP_TILE1_IV:.+]] = phi i32 [ 0, %[[OMP_TILE1_PREHEADER:.+]] ], [ %[[OMP_TILE1_NEXT:.+]], %[[OMP_TILE1_INC:.+]] ] +// CHECK-NEXT: br label %[[OMP_TILE1_COND:.+]] // CHECK-EMPTY: -// CHECK-NEXT: omp_tile0.header: ; preds = %omp_tile0.inc, %omp_tile0.preheader -// CHECK-NEXT: %omp_tile0.iv = phi i32 [ 0, %omp_tile0.preheader ], [ %omp_tile0.next, %omp_tile0.inc ] -// CHECK-NEXT: br label %omp_tile0.cond +// CHECK-NEXT: [[OMP_TILE1_COND]]: +// CHECK-NEXT: %[[OMP_TILE1_CMP:.+]] = icmp ult i32 %[[OMP_TILE1_IV:.+]], %[[TMP17:.+]] +// CHECK-NEXT: br i1 %[[OMP_TILE1_CMP:.+]], label %[[OMP_TILE1_BODY:.+]], label %[[OMP_TILE1_EXIT:.+]] // CHECK-EMPTY: -// CHECK-NEXT: omp_tile0.cond: ; preds = %omp_tile0.header -// CHECK-NEXT: %omp_tile0.cmp = icmp ult i32 %omp_tile0.iv, %15 -// CHECK-NEXT: br i1 %omp_tile0.cmp, label %omp_tile0.body, label %omp_tile0.exit +// CHECK-NEXT: [[OMP_TILE1_BODY]]: +// CHECK-NEXT: %[[TMP18:.+]] = mul nuw i32 %[[TMP3:.+]], %[[OMP_FLOOR0_IV:.+]] +// CHECK-NEXT: %[[TMP19:.+]] = add nuw i32 %[[TMP18:.+]], %[[OMP_TILE0_IV:.+]] +// CHECK-NEXT: %[[TMP20:.+]] = mul nuw i32 %[[TMP4:.+]], %[[OMP_FLOOR1_IV:.+]] +// CHECK-NEXT: %[[TMP21:.+]] = add nuw i32 %[[TMP20:.+]], %[[OMP_TILE1_IV:.+]] +// CHECK-NEXT: br label %[[OMP_OMP_LOOP_BODY:.+]] // CHECK-EMPTY: -// CHECK-NEXT: omp_tile0.body: ; preds = %omp_tile0.cond -// CHECK-NEXT: br label %omp_tile1.preheader +// CHECK-NEXT: [[OMP_OMP_LOOP_BODY4]]: +// CHECK-NEXT: br label %[[OMP_LOOP_REGION12:.+]] // CHECK-EMPTY: -// CHECK-NEXT: omp_tile1.preheader: ; preds = %omp_tile0.body -// CHECK-NEXT: br label %omp_tile1.header +// CHECK-NEXT: [[OMP_LOOP_REGION12]]: +// CHECK-NEXT: %[[TMP22:.+]] = add i32 %[[TMP19:.+]], %[[TMP21:.+]] +// CHECK-NEXT: %[[TMP23:.+]] = getelementptr inbounds float, ptr %[[TMP0:.+]], i32 %[[TMP22:.+]] +// CHECK-NEXT: store float 4.200000e+01, ptr %[[TMP23:.+]], align 4 +// CHECK-NEXT: br label %[[OMP_REGION_CONT11:.+]] // CHECK-EMPTY: -// CHECK-NEXT: omp_tile1.header: ; preds = %omp_tile1.inc, %omp_tile1.preheader -// CHECK-NEXT: %omp_tile1.iv = phi i32 [ 0, %omp_tile1.preheader ], [ %omp_tile1.next, %omp_tile1.inc ] -// CHECK-NEXT: br label %omp_tile1.cond +// CHECK-NEXT: [[OMP_REGION_CONT11]]: +// CHECK-NEXT: br label %[[OMP_TILE1_INC:.+]] // CHECK-EMPTY: -// CHECK-NEXT: omp_tile1.cond: ; preds = %omp_tile1.header -// CHECK-NEXT: %omp_tile1.cmp = icmp ult i32 %omp_tile1.iv, %17 -// CHECK-NEXT: br i1 %omp_tile1.cmp, label %omp_tile1.body, label %omp_tile1.exit +// CHECK-NEXT: [[OMP_TILE1_INC]]: +// CHECK-NEXT: %[[OMP_TILE1_NEXT:.+]] = add nuw i32 %[[OMP_TILE1_IV:.+]], 1 +// CHECK-NEXT: br label %[[OMP_TILE1_HEADER:.+]] // CHECK-EMPTY: -// CHECK-NEXT: omp_tile1.body: ; preds = %omp_tile1.cond -// CHECK-NEXT: %18 = mul nuw i32 %3, %omp_floor0.iv -// CHECK-NEXT: %19 = add nuw i32 %18, %omp_tile0.iv -// CHECK-NEXT: %20 = mul nuw i32 %4, %omp_floor1.iv -// CHECK-NEXT: %21 = add nuw i32 %20, %omp_tile1.iv -// CHECK-NEXT: br label %omp_omp.loop.body +// CHECK-NEXT: [[OMP_TILE1_EXIT]]: +// CHECK-NEXT: br label %[[OMP_TILE1_AFTER:.+]] // CHECK-EMPTY: -// CHECK-NEXT: omp_omp.loop.body4: ; preds = %omp_omp.loop.preheader1 -// CHECK-NEXT: br label %omp.loop.region12 +// CHECK-NEXT: [[OMP_TILE1_AFTER]]: +// CHECK-NEXT: br label %[[OMP_TILE0_INC:.+]] // CHECK-EMPTY: -// CHECK-NEXT: omp.loop.region12: ; preds = %omp_omp.loop.body4 -// CHECK-NEXT: %22 = add i32 %19, %21 -// CHECK-NEXT: %23 = getelementptr inbounds float, ptr %0, i32 %22 -// CHECK-NEXT: store float 4.200000e+01, ptr %23, align 4 -// CHECK-NEXT: br label %omp.region.cont11 +// CHECK-NEXT: [[OMP_TILE0_INC]]: +// CHECK-NEXT: %[[OMP_TILE0_NEXT:.+]] = add nuw i32 %[[OMP_TILE0_IV:.+]], 1 +// CHECK-NEXT: br label %[[OMP_TILE0_HEADER:.+]] // CHECK-EMPTY: -// CHECK-NEXT: omp.region.cont11: ; preds = %omp.loop.region12 -// CHECK-NEXT: br label %omp_tile1.inc +// CHECK-NEXT: [[OMP_TILE0_EXIT]]: +// CHECK-NEXT: br label %[[OMP_TILE0_AFTER:.+]] // CHECK-EMPTY: -// CHECK-NEXT: omp_tile1.inc: ; preds = %omp.region.cont11 -// CHECK-NEXT: %omp_tile1.next = add nuw i32 %omp_tile1.iv, 1 -// CHECK-NEXT: br label %omp_tile1.header +// CHECK-NEXT: [[OMP_TILE0_AFTER]]: +// CHECK-NEXT: br label %[[OMP_FLOOR1_INC:.+]] // CHECK-EMPTY: -// CHECK-NEXT: omp_tile1.exit: ; preds = %omp_tile1.cond -// CHECK-NEXT: br label %omp_tile1.after +// CHECK-NEXT: [[OMP_FLOOR1_INC]]: +// CHECK-NEXT: %[[OMP_FLOOR1_NEXT:.+]] = add nuw i32 %[[OMP_FLOOR1_IV:.+]], 1 +// CHECK-NEXT: br label %[[OMP_FLOOR1_HEADER:.+]] // CHECK-EMPTY: -// CHECK-NEXT: omp_tile1.after: ; preds = %omp_tile1.exit -// CHECK-NEXT: br label %omp_tile0.inc +// CHECK-NEXT: [[OMP_FLOOR1_EXIT]]: +// CHECK-NEXT: br label %[[OMP_FLOOR1_AFTER:.+]] // CHECK-EMPTY: -// CHECK-NEXT: omp_tile0.inc: ; preds = %omp_tile1.after -// CHECK-NEXT: %omp_tile0.next = add nuw i32 %omp_tile0.iv, 1 -// CHECK-NEXT: br label %omp_tile0.header +// CHECK-NEXT: [[OMP_FLOOR1_AFTER]]: +// CHECK-NEXT: br label %[[OMP_FLOOR0_INC:.+]] // CHECK-EMPTY: -// CHECK-NEXT: omp_tile0.exit: ; preds = %omp_tile0.cond -// CHECK-NEXT: br label %omp_tile0.after +// CHECK-NEXT: [[OMP_FLOOR0_INC]]: +// CHECK-NEXT: %[[OMP_FLOOR0_NEXT:.+]] = add nuw i32 %[[OMP_FLOOR0_IV:.+]], 1 +// CHECK-NEXT: br label %[[OMP_FLOOR0_HEADER:.+]] // CHECK-EMPTY: -// CHECK-NEXT: omp_tile0.after: ; preds = %omp_tile0.exit -// CHECK-NEXT: br label %omp_floor1.inc +// CHECK-NEXT: [[OMP_FLOOR0_EXIT]]: +// CHECK-NEXT: br label %[[OMP_FLOOR0_AFTER:.+]] // CHECK-EMPTY: -// CHECK-NEXT: omp_floor1.inc: ; preds = %omp_tile0.after -// CHECK-NEXT: %omp_floor1.next = add nuw i32 %omp_floor1.iv, 1 -// CHECK-NEXT: br label %omp_floor1.header +// CHECK-NEXT: [[OMP_FLOOR0_AFTER]]: +// CHECK-NEXT: br label %[[OMP_OMP_LOOP_AFTER:.+]] // CHECK-EMPTY: -// CHECK-NEXT: omp_floor1.exit: ; preds = %omp_floor1.cond -// CHECK-NEXT: br label %omp_floor1.after +// CHECK-NEXT: [[OMP_REGION_CONT:.+]]: +// CHECK-NEXT: br label %[[OMP_OMP_LOOP_INC:.+]] // CHECK-EMPTY: -// CHECK-NEXT: omp_floor1.after: ; preds = %omp_floor1.exit -// CHECK-NEXT: br label %omp_floor0.inc +// CHECK-NEXT: [[OMP_OMP_LOOP_INC]]: +// CHECK-NEXT: %[[OMP_OMP_LOOP_NEXT:.+]] = add nuw i32 %[[TMP19:.+]], 1 +// CHECK-NEXT: br label %[[OMP_OMP_LOOP_HEADER:.+]] // CHECK-EMPTY: -// CHECK-NEXT: omp_floor0.inc: ; preds = %omp_floor1.after -// CHECK-NEXT: %omp_floor0.next = add nuw i32 %omp_floor0.iv, 1 -// CHECK-NEXT: br label %omp_floor0.header +// CHECK-NEXT: [[OMP_OMP_LOOP_EXIT]]: +// CHECK-NEXT: br label %[[OMP_OMP_LOOP_AFTER:.+]] // CHECK-EMPTY: -// CHECK-NEXT: omp_floor0.exit: ; preds = %omp_floor0.cond -// CHECK-NEXT: br label %omp_floor0.after -// CHECK-EMPTY: -// CHECK-NEXT: omp_floor0.after: ; preds = %omp_floor0.exit -// CHECK-NEXT: br label %omp_omp.loop.after -// CHECK-EMPTY: -// CHECK-NEXT: omp.region.cont: ; No predecessors! -// CHECK-NEXT: br label %omp_omp.loop.inc -// CHECK-EMPTY: -// CHECK-NEXT: omp_omp.loop.inc: ; preds = %omp.region.cont -// CHECK-NEXT: %omp_omp.loop.next = add nuw i32 %19, 1 -// CHECK-NEXT: br label %omp_omp.loop.header -// CHECK-EMPTY: -// CHECK-NEXT: omp_omp.loop.exit: ; preds = %omp_omp.loop.cond -// CHECK-NEXT: br label %omp_omp.loop.after -// CHECK-EMPTY: -// CHECK-NEXT: omp_omp.loop.after: ; preds = %omp_floor0.after, %omp_omp.loop.exit -// CHECK-NEXT: ret void -// CHECK-NEXT: } -// CHECK-EMPTY: -// CHECK-NEXT: !llvm.module.flags = !{!0} -// CHECK-EMPTY: -// CHECK-NEXT: !0 = !{i32 2, !"Debug Info Version", i32 3} +// CHECK-NEXT: [[OMP_OMP_LOOP_AFTER]]: +// CHECK-NEXT: ret void +// CHECK-NEXT: } From 44ea4603871557d460b1a147245bd9fc6241de95 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 2 Oct 2025 17:45:03 +0200 Subject: [PATCH 11/12] remove merge conflict leftovers --- flang/test/Semantics/OpenMP/do08.f90 | 1 - flang/test/Semantics/OpenMP/do13.f90 | 1 - 2 files changed, 2 deletions(-) diff --git a/flang/test/Semantics/OpenMP/do08.f90 b/flang/test/Semantics/OpenMP/do08.f90 index bb3c1d0cd3855..5143dff0dd315 100644 --- a/flang/test/Semantics/OpenMP/do08.f90 +++ b/flang/test/Semantics/OpenMP/do08.f90 @@ -61,7 +61,6 @@ program omp !$omp end do - !ERROR: Canonical loop nest must be perfectly nested. !ERROR: The value of the parameter in the COLLAPSE or ORDERED clause must not be larger than the number of nested loops following the construct. !$omp do collapse(3) do 60 i=2,200,2 diff --git a/flang/test/Semantics/OpenMP/do13.f90 b/flang/test/Semantics/OpenMP/do13.f90 index 8f7844f4136f9..6e9d1dddade4c 100644 --- a/flang/test/Semantics/OpenMP/do13.f90 +++ b/flang/test/Semantics/OpenMP/do13.f90 @@ -59,7 +59,6 @@ program omp !$omp end do - !ERROR: Canonical loop nest must be perfectly nested. !ERROR: The value of the parameter in the COLLAPSE or ORDERED clause must not be larger than the number of nested loops following the construct. !$omp do collapse(3) do 60 i=1,10 From bcc5cd5ac3586d72574630d39ec59a81e272a3f3 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 2 Oct 2025 17:46:51 +0200 Subject: [PATCH 12/12] Add - token to format test --- mlir/test/mlir-tblgen/op-format-spec.td | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlir/test/mlir-tblgen/op-format-spec.td b/mlir/test/mlir-tblgen/op-format-spec.td index 1541cd09f53e0..1ac231116454b 100644 --- a/mlir/test/mlir-tblgen/op-format-spec.td +++ b/mlir/test/mlir-tblgen/op-format-spec.td @@ -123,7 +123,7 @@ def DirectiveTypeValid : TestFormat_Op<[{ // CHECK-NOT: error def LiteralValid : TestFormat_Op<[{ - `_` `:` `,` `=` `<` `>` `(` `)` `[` `]` `?` `+` `*` ` ` `` `->` `\n` `abc$._` + `_` `:` `,` `=` `<` `>` `(` `)` `[` `]` `?` `+` `-` `*` ` ` `` `->` `\n` `abc$._` attr-dict }]>;