Skip to content

Commit 11fcaae

Browse files
committed
[AMDGPU][FixIrreducible][UnifyLoopExits] Support callbr with inline-asm
First batch of changes to add support for basic inline-asm callbr for the AMDGPU backend.
1 parent 4d6fb88 commit 11fcaae

File tree

17 files changed

+2974
-50
lines changed

17 files changed

+2974
-50
lines changed

llvm/include/llvm/ADT/GenericCycleImpl.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -561,6 +561,17 @@ auto GenericCycleInfo<ContextT>::getSmallestCommonCycle(CycleT *A,
561561
return A;
562562
}
563563

564+
/// \brief Find the innermost cycle containing both given blocks.
565+
///
566+
/// \returns the innermost cycle containing both \p A and \p B
567+
/// or nullptr if there is no such cycle.
568+
template <typename ContextT>
569+
auto GenericCycleInfo<ContextT>::getSmallestCommonCycle(BlockT *A,
570+
BlockT *B) const
571+
-> CycleT * {
572+
return getSmallestCommonCycle(getCycle(A), getCycle(B));
573+
}
574+
564575
/// \brief get the depth for the cycle which containing a given block.
565576
///
566577
/// \returns the depth for the innermost cycle containing \p Block or 0 if it is

llvm/include/llvm/ADT/GenericCycleInfo.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -298,6 +298,7 @@ template <typename ContextT> class GenericCycleInfo {
298298

299299
CycleT *getCycle(const BlockT *Block) const;
300300
CycleT *getSmallestCommonCycle(CycleT *A, CycleT *B) const;
301+
CycleT *getSmallestCommonCycle(BlockT *A, BlockT *B) const;
301302
unsigned getCycleDepth(const BlockT *Block) const;
302303
CycleT *getTopLevelParentCycle(BlockT *Block);
303304

llvm/include/llvm/Support/GenericLoopInfo.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -615,6 +615,17 @@ template <class BlockT, class LoopT> class LoopInfoBase {
615615
return L ? L->getLoopDepth() : 0;
616616
}
617617

618+
/// \brief Find the innermost loop containing both given loops.
619+
///
620+
/// \returns the innermost loop containing both \p A and \p B
621+
/// or nullptr if there is no such loop.
622+
LoopT *getSmallestCommonLoop(LoopT *A, LoopT *B) const;
623+
/// \brief Find the innermost loop containing both given blocks.
624+
///
625+
/// \returns the innermost loop containing both \p A and \p B
626+
/// or nullptr if there is no such loop.
627+
LoopT *getSmallestCommonLoop(BlockT *A, BlockT *B) const;
628+
618629
// True if the block is a loop header node
619630
bool isLoopHeader(const BlockT *BB) const {
620631
const LoopT *L = getLoopFor(BB);

llvm/include/llvm/Support/GenericLoopInfoImpl.h

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -355,7 +355,7 @@ void LoopBase<BlockT, LoopT>::verifyLoop() const {
355355
if (BB == getHeader()) {
356356
assert(!OutsideLoopPreds.empty() && "Loop is unreachable!");
357357
} else if (!OutsideLoopPreds.empty()) {
358-
// A non-header loop shouldn't be reachable from outside the loop,
358+
// A non-header loop block shouldn't be reachable from outside the loop,
359359
// though it is permitted if the predecessor is not itself actually
360360
// reachable.
361361
BlockT *EntryBB = &BB->getParent()->front();
@@ -645,6 +645,36 @@ LoopInfoBase<BlockT, LoopT>::getLoopsInReverseSiblingPreorder() const {
645645
return PreOrderLoops;
646646
}
647647

648+
template <class BlockT, class LoopT>
649+
LoopT *LoopInfoBase<BlockT, LoopT>::getSmallestCommonLoop(LoopT *A,
650+
LoopT *B) const {
651+
if (!A || !B)
652+
return nullptr;
653+
654+
// If lops A and B have different depth replace them with parent loop
655+
// until they have the same depth.
656+
while (A->getLoopDepth() > B->getLoopDepth())
657+
A = A->getParentLoop();
658+
while (B->getLoopDepth() > A->getLoopDepth())
659+
B = B->getParentLoop();
660+
661+
// Loops A and B are at same depth but may be disjoint, replace them with
662+
// parent loops until we find loop that contains both or we run out of
663+
// parent loops.
664+
while (A != B) {
665+
A = A->getParentLoop();
666+
B = B->getParentLoop();
667+
}
668+
669+
return A;
670+
}
671+
672+
template <class BlockT, class LoopT>
673+
LoopT *LoopInfoBase<BlockT, LoopT>::getSmallestCommonLoop(BlockT *A,
674+
BlockT *B) const {
675+
return getSmallestCommonLoop(getLoopFor(A), getLoopFor(B));
676+
}
677+
648678
// Debugging
649679
template <class BlockT, class LoopT>
650680
void LoopInfoBase<BlockT, LoopT>::print(raw_ostream &OS) const {

llvm/include/llvm/Transforms/Utils/ControlFlowUtils.h

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,13 @@
1515

1616
#include "llvm/ADT/SmallVector.h"
1717
#include "llvm/ADT/StringRef.h"
18+
#include "llvm/IR/CycleInfo.h"
1819

1920
namespace llvm {
2021

2122
class BasicBlock;
23+
class CallBrInst;
24+
class LoopInfo;
2225
class DomTreeUpdater;
2326

2427
/// Given a set of branch descriptors [BB, Succ0, Succ1], create a "hub" such
@@ -104,7 +107,8 @@ struct ControlFlowHub {
104107
: BB(BB), Succ0(Succ0), Succ1(Succ1) {}
105108
};
106109

107-
void addBranch(BasicBlock *BB, BasicBlock *Succ0, BasicBlock *Succ1) {
110+
void addBranch(BasicBlock *BB, BasicBlock *Succ0,
111+
BasicBlock *Succ1 = nullptr) {
108112
assert(BB);
109113
assert(Succ0 || Succ1);
110114
Branches.emplace_back(BB, Succ0, Succ1);
@@ -118,6 +122,34 @@ struct ControlFlowHub {
118122
std::optional<unsigned> MaxControlFlowBooleans = std::nullopt);
119123

120124
SmallVector<BranchDescriptor> Branches;
125+
126+
/// \brief Create a new intermediate target block for a callbr edge.
127+
///
128+
/// This function creates a new basic block (the "target block") that sits
129+
/// between a callbr instruction and one of its successors. The callbr's
130+
/// successor is rewired to this new block, and the new block unconditionally
131+
/// branches to the original successor. This is useful for normalizing control
132+
/// flow, e.g., when transforming irreducible loops.
133+
///
134+
/// \param CallBr The callbr instruction whose edge is to be split.
135+
/// \param Succ The original successor basic block to be reached.
136+
/// \param SuccIdx The index of the successor in the callbr
137+
/// instruction.
138+
/// \param CI Optional CycleInfo for updating cycle membership.
139+
/// \param DTU Optional DomTreeUpdater for updating the dominator
140+
/// tree.
141+
/// \param LI Optional LoopInfo for updating loop membership.
142+
/// \param UpdatedLI Optional output flag indicating if LoopInfo has been
143+
/// updated.
144+
///
145+
/// \returns The newly created intermediate target block.
146+
///
147+
/// \note This function updates PHI nodes, dominator tree, loop info, and
148+
/// cycle info as needed.
149+
static BasicBlock *
150+
createCallBrTarget(CallBrInst *CallBr, BasicBlock *Succ, unsigned SuccIdx,
151+
CycleInfo *CI = nullptr, DomTreeUpdater *DTU = nullptr,
152+
LoopInfo *LI = nullptr, bool *UpdatedLI = nullptr);
121153
};
122154

123155
} // end namespace llvm

llvm/lib/Transforms/Utils/ControlFlowUtils.cpp

Lines changed: 65 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#include "llvm/Transforms/Utils/ControlFlowUtils.h"
1414
#include "llvm/ADT/SetVector.h"
1515
#include "llvm/Analysis/DomTreeUpdater.h"
16+
#include "llvm/Analysis/LoopInfo.h"
1617
#include "llvm/IR/Constants.h"
1718
#include "llvm/IR/Instructions.h"
1819
#include "llvm/IR/ValueHandle.h"
@@ -281,7 +282,9 @@ std::pair<BasicBlock *, bool> ControlFlowHub::finalize(
281282

282283
for (auto [BB, Succ0, Succ1] : Branches) {
283284
#ifndef NDEBUG
284-
assert(Incoming.insert(BB).second && "Duplicate entry for incoming block.");
285+
assert(
286+
(Incoming.insert(BB).second || isa<CallBrInst>(BB->getTerminator())) &&
287+
"Duplicate entry for incoming block.");
285288
#endif
286289
if (Succ0)
287290
Outgoing.insert(Succ0);
@@ -341,3 +344,64 @@ std::pair<BasicBlock *, bool> ControlFlowHub::finalize(
341344

342345
return {FirstGuardBlock, true};
343346
}
347+
348+
/// Helper function to update the cycle or loop information after inserting a
349+
/// new block between a callbr instruction and one of its target blocks. Adds
350+
/// the new block to the innermost cycle or loop that the callbr instruction and
351+
/// the original target block share.
352+
/// \p LCI cycle or loop information to update
353+
/// \p CallBrBlock block containing the callbr instruction
354+
/// \p CallBrTarget new target block of the callbr instruction
355+
/// \p Succ original target block of the callbr instruction
356+
template <typename TI, typename T>
357+
static bool updateCycleLoopInfo(TI *LCI, BasicBlock *CallBrBlock,
358+
BasicBlock *CallBrTarget, BasicBlock *Succ) {
359+
static_assert(std::is_same_v<TI, CycleInfo> || std::is_same_v<TI, LoopInfo>,
360+
"type must be CycleInfo or LoopInfo");
361+
if (!LCI)
362+
return false;
363+
364+
T *LC;
365+
if constexpr (std::is_same_v<TI, CycleInfo>)
366+
LC = LCI->getSmallestCommonCycle(CallBrBlock, Succ);
367+
else
368+
LC = LCI->getSmallestCommonLoop(CallBrBlock, Succ);
369+
if (!LC)
370+
return false;
371+
372+
if constexpr (std::is_same_v<TI, CycleInfo>)
373+
LCI->addBlockToCycle(CallBrTarget, LC);
374+
else
375+
LC->addBasicBlockToLoop(CallBrTarget, *LCI);
376+
377+
return true;
378+
}
379+
380+
BasicBlock *ControlFlowHub::createCallBrTarget(CallBrInst *CallBr,
381+
BasicBlock *Succ,
382+
unsigned SuccIdx, CycleInfo *CI,
383+
DomTreeUpdater *DTU,
384+
LoopInfo *LI, bool *UpdatedLI) {
385+
BasicBlock *CallBrBlock = CallBr->getParent();
386+
BasicBlock *CallBrTarget =
387+
BasicBlock::Create(CallBrBlock->getContext(),
388+
CallBrBlock->getName() + ".target." + Succ->getName(),
389+
CallBrBlock->getParent());
390+
// Rewire control flow from callbr to the new target block.
391+
Succ->replacePhiUsesWith(CallBrBlock, CallBrTarget);
392+
CallBr->setSuccessor(SuccIdx, CallBrTarget);
393+
// Jump from the new target block to the original successor.
394+
BranchInst::Create(Succ, CallBrTarget);
395+
bool Updated =
396+
updateCycleLoopInfo<LoopInfo, Loop>(LI, CallBrBlock, CallBrTarget, Succ);
397+
if (UpdatedLI)
398+
*UpdatedLI = Updated;
399+
updateCycleLoopInfo<CycleInfo, Cycle>(CI, CallBrBlock, CallBrTarget, Succ);
400+
if (DTU) {
401+
DTU->applyUpdates({{DominatorTree::Insert, CallBrBlock, CallBrTarget}});
402+
if (DTU->getDomTree().dominates(CallBrBlock, Succ))
403+
DTU->applyUpdates({{DominatorTree::Delete, CallBrBlock, Succ},
404+
{DominatorTree::Insert, CallBrTarget, Succ}});
405+
}
406+
return CallBrTarget;
407+
}

0 commit comments

Comments
 (0)