diff --git a/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp b/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp index e7f5c257b21c1..693649c2e8e58 100644 --- a/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp +++ b/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp @@ -35,6 +35,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" +#include #include #include @@ -80,8 +81,10 @@ class HexagonOptAddrMode : public MachineFunctionPass { private: using MISetType = DenseSet; using InstrEvalMap = DenseMap; + DenseSet ProcessedAddiInsts; MachineRegisterInfo *MRI = nullptr; + const TargetRegisterInfo *TRI = nullptr; const HexagonInstrInfo *HII = nullptr; const HexagonRegisterInfo *HRI = nullptr; MachineDominatorTree *MDT = nullptr; @@ -93,6 +96,15 @@ class HexagonOptAddrMode : public MachineFunctionPass { bool processBlock(NodeAddr BA); bool xformUseMI(MachineInstr *TfrMI, MachineInstr *UseMI, NodeAddr UseN, unsigned UseMOnum); + bool processAddBases(NodeAddr AddSN, MachineInstr *AddMI); + bool usedInLoadStore(NodeAddr CurrentInstSN, int64_t NewOffset); + bool findFirstReachedInst( + MachineInstr *AddMI, + std::vector, NodeAddr>> + &AddiList, + NodeAddr &UseSN); + bool updateAddBases(MachineInstr *CurrentMI, MachineInstr *FirstReachedMI, + int64_t NewOffset); bool processAddUses(NodeAddr AddSN, MachineInstr *AddMI, const NodeList &UNodeList); bool updateAddUses(MachineInstr *AddMI, MachineInstr *UseMI); @@ -207,8 +219,17 @@ bool HexagonOptAddrMode::canRemoveAddasl(NodeAddr AddAslSN, return false; for (auto &Mo : UseMI.operands()) + // Is it a frame index? if (Mo.isFI()) return false; + // Is the OffsetReg definition actually reaches UseMI? + if (!UseMI.getParent()->isLiveIn(OffsetReg) && + MI.getParent() != UseMI.getParent()) { + LLVM_DEBUG(dbgs() << " The offset reg " << printReg(OffsetReg, TRI) + << " is NOT live in to MBB " + << UseMI.getParent()->getName() << "\n"); + return false; + } } return true; } @@ -327,6 +348,14 @@ bool HexagonOptAddrMode::isSafeToExtLR(NodeAddr SN, if ((LRExtRegDN.Addr->getFlags() & NodeAttrs::PhiRef) && MI->getParent() != UseMI->getParent()) return false; + // Is the OffsetReg definition actually reaches UseMI? + if (!UseMI->getParent()->isLiveIn(LRExtReg) && + MI->getParent() != UseMI->getParent()) { + LLVM_DEBUG(dbgs() << " The LRExtReg reg " << printReg(LRExtReg, TRI) + << " is NOT live in to MBB " + << UseMI->getParent()->getName() << "\n"); + return false; + } } return true; } @@ -344,6 +373,12 @@ bool HexagonOptAddrMode::isValidOffset(MachineInstr *MI, int Offset) { case Hexagon::V6_vgathermhwq_pseudo: return HII->isValidOffset(MI->getOpcode(), Offset, HRI, false); default: + if (HII->getAddrMode(*MI) == HexagonII::BaseImmOffset) { + // The immediates are mentioned in multiples of vector counts + unsigned AlignMask = HII->getMemAccessSize(*MI) - 1; + if ((AlignMask & Offset) == 0) + return HII->isValidOffset(MI->getOpcode(), Offset, HRI, false); + } return false; } } @@ -414,6 +449,264 @@ unsigned HexagonOptAddrMode::getOffsetOpPosition(MachineInstr *MI) { } } +bool HexagonOptAddrMode::usedInLoadStore(NodeAddr CurrentInstSN, + int64_t NewOffset) { + NodeList LoadStoreUseList; + + getAllRealUses(CurrentInstSN, LoadStoreUseList); + bool FoundLoadStoreUse = false; + for (auto I = LoadStoreUseList.begin(), E = LoadStoreUseList.end(); I != E; + ++I) { + NodeAddr UN = *I; + NodeAddr SN = UN.Addr->getOwner(*DFG); + MachineInstr *LoadStoreMI = SN.Addr->getCode(); + const MCInstrDesc &MID = LoadStoreMI->getDesc(); + if ((MID.mayLoad() || MID.mayStore()) && + isValidOffset(LoadStoreMI, NewOffset)) { + FoundLoadStoreUse = true; + break; + } + } + return FoundLoadStoreUse; +} + +bool HexagonOptAddrMode::findFirstReachedInst( + MachineInstr *AddMI, + std::vector, NodeAddr>> &AddiList, + NodeAddr &UseSN) { + // Find the very first Addi instruction in the current basic block among the + // AddiList This is the Addi that should be preserved so that we do not need + // to handle the complexity of moving instructions + // + // TODO: find Addi instructions across basic blocks + // + // TODO: Try to remove this and add a solution that optimizes the number of + // Addi instructions that can be modified. + // This change requires choosing the Addi with the median offset value, but + // would also require moving that instruction above the others. Since this + // pass runs after register allocation, there might be multiple cases that + // need to be handled if we move instructions around + MachineBasicBlock *CurrentMBB = AddMI->getParent(); + for (auto &InstIter : *CurrentMBB) { + // If the instruction is an Addi and is in the AddiList + if (InstIter.getOpcode() == Hexagon::A2_addi) { + auto Iter = std::find_if( + AddiList.begin(), AddiList.end(), [&InstIter](const auto &SUPair) { + return SUPair.first.Addr->getCode() == &InstIter; + }); + if (Iter != AddiList.end()) { + UseSN = Iter->first; + return true; + } + } + } + return false; +} + +// This function tries to modify the immediate value in Hexagon::Addi +// instructions, so that the immediates could then be moved into a load/store +// instruction with offset and the add removed completely when we call +// processAddUses +// +// For Example, If we have the below sequence of instructions: +// +// r1 = add(r2,#1024) +// ... +// r3 = add(r2,#1152) +// ... +// r4 = add(r2,#1280) +// +// Where the register r2 has the same reaching definition, They get modified to +// the below sequence: +// +// r1 = add(r2,#1024) +// ... +// r3 = add(r1,#128) +// ... +// r4 = add(r1,#256) +// +// The below change helps the processAddUses method to later move the +// immediates #128 and #256 into a load/store instruction that can take an +// offset, like the Vd = mem(Rt+#s4) +bool HexagonOptAddrMode::processAddBases(NodeAddr AddSN, + MachineInstr *AddMI) { + + bool Changed = false; + + LLVM_DEBUG(dbgs() << "\n\t\t[Processing Addi]: " << *AddMI << "\n"); + + auto Processed = + [](const MachineInstr *MI, + const DenseSet &ProcessedAddiInsts) -> bool { + // If we've already processed this Addi, just return + if (ProcessedAddiInsts.find(MI) != ProcessedAddiInsts.end()) { + LLVM_DEBUG(dbgs() << "\t\t\tAddi already found in ProcessedAddiInsts: " + << *MI << "\n\t\t\tSkipping..."); + return true; + } + return false; + }; + + if (Processed(AddMI, ProcessedAddiInsts)) + return Changed; + ProcessedAddiInsts.insert(AddMI); + + // Get the base register that would be shared by other Addi Intructions + Register BaseReg = AddMI->getOperand(1).getReg(); + + // Store a list of all Addi instructions that share the above common base + // register + std::vector, NodeAddr>> AddiList; + + NodeId UAReachingDefID; + // Find the UseNode that contains the base register and it's reachingDef + for (NodeAddr UA : AddSN.Addr->members_if(DFG->IsUse, *DFG)) { + RegisterRef URR = UA.Addr->getRegRef(*DFG); + if (BaseReg != URR.Reg) + continue; + + UAReachingDefID = UA.Addr->getReachingDef(); + NodeAddr UADef = DFG->addr(UAReachingDefID); + if (!UAReachingDefID || UADef.Addr->getFlags() & NodeAttrs::PhiRef) { + LLVM_DEBUG(dbgs() << "\t\t\t Could not find reachingDef. Skipping...\n"); + return false; + } + } + + NodeAddr UAReachingDef = DFG->addr(UAReachingDefID); + NodeAddr ReachingDefStmt = UAReachingDef.Addr->getOwner(*DFG); + + // If the reaching definition is a predicated instruction, this might not be + // the only definition of our base register, so return immediately. + MachineInstr *ReachingDefInstr = ReachingDefStmt.Addr->getCode(); + if (HII->isPredicated(*ReachingDefInstr)) + return false; + + NodeList AddiUseList; + + // Find all Addi instructions that share the same base register and add them + // to the AddiList + getAllRealUses(ReachingDefStmt, AddiUseList); + for (auto I = AddiUseList.begin(), E = AddiUseList.end(); I != E; ++I) { + NodeAddr UN = *I; + NodeAddr SN = UN.Addr->getOwner(*DFG); + MachineInstr *MI = SN.Addr->getCode(); + + // Only add instructions if it's an Addi and it's not already processed. + if (MI->getOpcode() == Hexagon::A2_addi && + !(MI != AddMI && Processed(MI, ProcessedAddiInsts))) { + AddiList.push_back({SN, UN}); + + // This ensures that we process each instruction only once + ProcessedAddiInsts.insert(MI); + } + } + + // If there's only one Addi instruction, nothing to do here + if (AddiList.size() <= 1) + return Changed; + + NodeAddr FirstReachedUseSN; + // Find the first reached use of Addi instruction from the list + if (!findFirstReachedInst(AddMI, AddiList, FirstReachedUseSN)) + return Changed; + + // If we reach this point we know that the StmtNode FirstReachedUseSN is for + // an Addi instruction. So, we're guaranteed to have just one DefNode, and + // hence we can access the front() directly without checks + NodeAddr FirstReachedUseDN = + FirstReachedUseSN.Addr->members_if(DFG->IsDef, *DFG).front(); + + MachineInstr *FirstReachedMI = FirstReachedUseSN.Addr->getCode(); + const MachineOperand FirstReachedMIImmOp = FirstReachedMI->getOperand(2); + if (!FirstReachedMIImmOp.isImm()) + return false; + + for (auto &I : AddiList) { + NodeAddr CurrentInstSN = I.first; + NodeAddr CurrentInstUN = I.second; + + MachineInstr *CurrentMI = CurrentInstSN.Addr->getCode(); + MachineOperand &CurrentMIImmOp = CurrentMI->getOperand(2); + + int64_t NewOffset; + + // Even though we know it's an Addi instruction, the second operand could be + // a global value and not an immediate + if (!CurrentMIImmOp.isImm()) + continue; + + NewOffset = CurrentMIImmOp.getImm() - FirstReachedMIImmOp.getImm(); + + // This is the first occuring Addi, so skip modifying this + if (CurrentMI == FirstReachedMI) { + continue; + } + + if (CurrentMI->getParent() != FirstReachedMI->getParent()) + continue; + + // Modify the Addi instruction only if it could be used to modify a + // future load/store instruction and get removed + // + // This check is needed because, if we modify the current Addi instruction + // we create RAW dependence between the FirstReached Addi and the current + // one, which could result in extra packets. So we only do this change if + // we know the current Addi would get removed later + if (!usedInLoadStore(CurrentInstSN, NewOffset)) { + return false; + } + + // Verify whether the First Addi's definition register is still live when + // we reach the current Addi + RegisterRef FirstReachedDefRR = FirstReachedUseDN.Addr->getRegRef(*DFG); + NodeAddr CurrentAddiIN = CurrentInstUN.Addr->getOwner(*DFG); + NodeAddr NearestAA = + LV->getNearestAliasedRef(FirstReachedDefRR, CurrentAddiIN); + if ((DFG->IsDef(NearestAA) && NearestAA.Id != FirstReachedUseDN.Id) || + (!DFG->IsDef(NearestAA) && + NearestAA.Addr->getReachingDef() != FirstReachedUseDN.Id)) { + // Found another definition of FirstReachedDef + LLVM_DEBUG(dbgs() << "\t\t\tCould not modify below Addi since the first " + "defined Addi register was redefined\n"); + continue; + } + + MachineOperand CurrentMIBaseOp = CurrentMI->getOperand(1); + if (CurrentMIBaseOp.getReg() != FirstReachedMI->getOperand(1).getReg()) { + continue; + } + + // If we reached this point, then we can modify MI to use the result of + // FirstReachedMI + Changed |= updateAddBases(CurrentMI, FirstReachedMI, NewOffset); + + // Update the reachingDef of the Current AddI use after change + CurrentInstUN.Addr->linkToDef(CurrentInstUN.Id, FirstReachedUseDN); + } + + return Changed; +} + +bool HexagonOptAddrMode::updateAddBases(MachineInstr *CurrentMI, + MachineInstr *FirstReachedMI, + int64_t NewOffset) { + LLVM_DEBUG(dbgs() << "[About to modify the Addi]: " << *CurrentMI << "\n"); + const MachineOperand FirstReachedDef = FirstReachedMI->getOperand(0); + Register FirstDefRegister = FirstReachedDef.getReg(); + + MachineOperand &CurrentMIBaseOp = CurrentMI->getOperand(1); + MachineOperand &CurrentMIImmOp = CurrentMI->getOperand(2); + + CurrentMIBaseOp.setReg(FirstDefRegister); + CurrentMIBaseOp.setIsUndef(FirstReachedDef.isUndef()); + CurrentMIBaseOp.setImplicit(FirstReachedDef.isImplicit()); + CurrentMIImmOp.setImm(NewOffset); + ProcessedAddiInsts.insert(CurrentMI); + MRI->clearKillFlags(FirstDefRegister); + return true; +} + bool HexagonOptAddrMode::processAddUses(NodeAddr AddSN, MachineInstr *AddMI, const NodeList &UNodeList) { @@ -737,7 +1030,6 @@ bool HexagonOptAddrMode::changeAddAsl(NodeAddr AddAslUN, for (unsigned i = OpStart; i < OpEnd; ++i) MIB.add(UseMI->getOperand(i)); - Deleted.insert(UseMI); } @@ -782,6 +1074,8 @@ bool HexagonOptAddrMode::processBlock(NodeAddr BA) { << "]: " << *MI << "\n\t[InstrNode]: " << Print>(IA, *DFG) << '\n'); + if (MI->getOpcode() == Hexagon::A2_addi) + Changed |= processAddBases(SA, MI); NodeList UNodeList; getAllRealUses(SA, UNodeList); @@ -869,6 +1163,7 @@ bool HexagonOptAddrMode::runOnMachineFunction(MachineFunction &MF) { bool Changed = false; auto &HST = MF.getSubtarget(); MRI = &MF.getRegInfo(); + TRI = MF.getSubtarget().getRegisterInfo(); HII = HST.getInstrInfo(); HRI = HST.getRegisterInfo(); const auto &MDF = getAnalysis(); @@ -885,6 +1180,7 @@ bool HexagonOptAddrMode::runOnMachineFunction(MachineFunction &MF) { LV = &L; Deleted.clear(); + ProcessedAddiInsts.clear(); NodeAddr FA = DFG->getFunc(); LLVM_DEBUG(dbgs() << "==== [RefMap#]=====:\n " << Print>(FA, *DFG) << "\n"); diff --git a/llvm/test/CodeGen/Hexagon/autohvx/addi-offset-opt-addr-mode.ll b/llvm/test/CodeGen/Hexagon/autohvx/addi-offset-opt-addr-mode.ll new file mode 100644 index 0000000000000..00ee5842f93ca --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/autohvx/addi-offset-opt-addr-mode.ll @@ -0,0 +1,41 @@ +; RUN: llc -march=hexagon -disable-hexagon-amodeopt < %s | FileCheck %s --check-prefix=CHECK-NO-AMODE1 + +; RUN: llc -march=hexagon -disable-hexagon-amodeopt=0 < %s | FileCheck %s --check-prefix=CHECK-AMODE + +; CHECK-NO-AMODE1: r{{[0-9]+}} = add([[REG1:(r[0-9]+)]],#{{[0-9]+}}) +; CHECK-NO-AMODE1: r{{[0-9]+}} = add([[REG1]],#{{[0-9]+}}) + +; CHECK-AMODE: [[REG3:(r[0-9]+)]] = add(r{{[0-9]+}},#{{[0-9]+}}) +; CHECK-AMODE: v{{.*}} = vmem([[REG3]]+#{{[0-9]+}}) +; CHECK-AMODE: v{{.*}} = vmem([[REG3]]+#{{[0-9]+}}) + +target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048" +target triple = "hexagon" + +define dllexport void @foo() local_unnamed_addr #0 { +entry: + %0 = load i8*, i8** undef, align 4 + %1 = bitcast i8* %0 to half* + %2 = or i32 undef, 128 + %3 = getelementptr half, half* %1, i32 %2 + %4 = bitcast half* %3 to <64 x half>* + %5 = load i8*, i8** undef, align 4 + %6 = getelementptr i8, i8* %5, i32 1024 + %7 = bitcast i8* %6 to <64 x half>* + %8 = load <64 x half>, <64 x half>* %7 + %9 = getelementptr i8, i8* %5, i32 1152 + %10 = bitcast i8* %9 to <64 x half>* + %11 = load <64 x half>, <64 x half>* %10 + %12 = fadd <64 x half> %8, %11 + store <64 x half> %12, <64 x half>* %4, align 128 + call void @llvm.assume(i1 true) [ "align"(i8* undef, i32 128) ] + ret void +} + +; Function Attrs: inaccessiblememonly nofree nosync nounwind willreturn +declare void @llvm.assume(i1 noundef) #1 + +; Function Attrs: argmemonly nofree nosync nounwind readonly willreturn +declare <64 x half> @llvm.masked.load.v64f16.p0v64f16(<64 x half>*, i32 immarg, <64 x i1>, <64 x half>) #2 + +attributes #0 = { "target-features"="+hvxv68,+hvx-length128b,+hvx-qfloat" } diff --git a/llvm/test/CodeGen/Hexagon/autohvx/addi-opt-predicated-def-bug.ll b/llvm/test/CodeGen/Hexagon/autohvx/addi-opt-predicated-def-bug.ll new file mode 100644 index 0000000000000..0464bea339fa2 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/autohvx/addi-opt-predicated-def-bug.ll @@ -0,0 +1,36 @@ +; RUN: llc -O3 -march=hexagon < %s | FileCheck %s + +; We do not want the opt-addr-mode pass to modify the addi instructions whose +; base register has a predicated register definition +; CHECK: if ({{.*}}) [[REG1:r([0-9]+)]] = {{.*}} +; CHECK: r{{[0-9]+}} = add([[REG1]],#{{[0-9]+}}) +; CHECK: r{{[0-9]+}} = add([[REG1]],#{{[0-9]+}}) + +@seqToUnseq = external dso_local local_unnamed_addr global [256 x i8], align 8 +@unseqToSeq = external dso_local local_unnamed_addr global [256 x i8], align 8 + +define dso_local void @makeMaps() local_unnamed_addr { +entry: + br label %for.body + +for.body: ; preds = %for.inc.3, %entry + %0 = phi i32 [ 0, %entry ], [ %inc.7, %for.inc.3 ] + %inc.1 = add nsw i32 %0, 1 + br i1 undef, label %for.inc.3, label %if.then.3 + +if.then.3: ; preds = %for.body + %arrayidx1.3 = getelementptr inbounds [256 x i8], [256 x i8]* @seqToUnseq, i32 0, i32 %inc.1 + store i8 undef, i8* %arrayidx1.3, align 1 + br label %for.inc.3 + +for.inc.3: ; preds = %if.then.3, %for.body + %1 = phi i32 [ %inc.1, %for.body ], [ 0, %if.then.3 ] + %arrayidx3.4 = getelementptr inbounds [256 x i8], [256 x i8]* @unseqToSeq, i32 0, i32 undef + store i8 0, i8* %arrayidx3.4, align 4 + %inc.4 = add nsw i32 %1, 1 + %conv2.7 = trunc i32 %inc.4 to i8 + %arrayidx3.7 = getelementptr inbounds [256 x i8], [256 x i8]* @unseqToSeq, i32 0, i32 undef + store i8 %conv2.7, i8* %arrayidx3.7, align 1 + %inc.7 = add nsw i32 %inc.4, 1 + br label %for.body +} diff --git a/llvm/test/CodeGen/Hexagon/vgather-opt-addr.ll b/llvm/test/CodeGen/Hexagon/vgather-opt-addr.ll index d2db6da0a3716..ae3b7c9b9dafa 100644 --- a/llvm/test/CodeGen/Hexagon/vgather-opt-addr.ll +++ b/llvm/test/CodeGen/Hexagon/vgather-opt-addr.ll @@ -15,15 +15,19 @@ ; CHECK-NO-AMODE: vmem([[REG5]]+#0) = vtmp.new ; CHECK-NO-AMODE: vmem([[REG6]]+#0) = vtmp.new +; Since we added some extra code to modify the addi offsets and bring them into +; the range of load/store instructions, we cannot guarantee which registers +; would be preserved, but we know for sure that only one Addi should be present +; and the other one should be removed followed by vmems with non-zero offset -; CHECK-AMODE: [[REG1:(r[0-9]+)]] = add({{r[0-9]+}},#0) +; CHECK-AMODE: [[REG1:(r[0-9]+)]] = add({{r[0-9]+}},#{{[0-9]+}}) ; CHECK-AMODE-NOT: {{r[0-9]+}} = add([[REG1]],{{[0-9]+}}) -; CHECK-AMODE: vmem([[REG1]]+#0) = vtmp.new -; CHECK-AMODE: vmem([[REG1]]+#1) = vtmp.new -; CHECK-AMODE: vmem([[REG1]]+#2) = vtmp.new -; CHECK-AMODE: vmem([[REG1]]+#3) = vtmp.new -; CHECK-AMODE: vmem([[REG1]]+#4) = vtmp.new -; CHECK-AMODE: vmem([[REG1]]+#5) = vtmp.new +; CHECK-AMODE: vmem([[REG1]]+#{{[0-9]}}) = vtmp.new +; CHECK-AMODE: vmem([[REG2:(r[0-9]+)]]+#{{-?[0-9]}}) = vtmp.new +; CHECK-AMODE: vmem([[REG2]]+#{{-?[1-9]}}) = vtmp.new +; CHECK-AMODE: vmem([[REG2]]+#{{-?[1-9]}}) = vtmp.new +; CHECK-AMODE: vmem([[REG2]]+#{{-?[1-9]}}) = vtmp.new +; CHECK-AMODE: vmem([[REG2]]+#{{-?[1-9]}}) = vtmp.new target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048" target triple = "hexagon" diff --git a/llvm/test/CodeGen/MIR/Hexagon/addrmode-opt-nonreaching.mir b/llvm/test/CodeGen/MIR/Hexagon/addrmode-opt-nonreaching.mir new file mode 100644 index 0000000000000..f6296d8ddf374 --- /dev/null +++ b/llvm/test/CodeGen/MIR/Hexagon/addrmode-opt-nonreaching.mir @@ -0,0 +1,233 @@ +# It is not safe to make the transformation if the definition is killed by a call. +# Use debug output for simplicity and test resilience. +# +# RUN: llc -march=hexagon -run-pass amode-opt %s -print-after-all -o %t_1.mir 2>&1 | FileCheck %s +# CHECK: bb.4.if.else +# CHECK-NOT: liveins: $r2 +--- | + ; ModuleID = 'foo.reduced.i' + source_filename = "foo.reduced.i" + target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048" + target triple = "hexagon-unknown-unknown-elf" + + %struct.struct_1 = type { i8, i8, [7 x %struct.struct_3] } + %struct.struct_3 = type { i8, %struct.struct_4 } + %struct.struct_4 = type { i32, i32 } + %struct.struct_2 = type { i32, i32 } + + ; Function Attrs: nounwind + define dso_local zeroext i8 @fun_4(i32 noundef %arg_1, i8 noundef zeroext %arg_2, ptr nocapture noundef readonly %arg_3, ptr nocapture noundef readonly %arg_4) local_unnamed_addr #0 { + entry: + %conv = zext i8 %arg_2 to i32 + %cmp = icmp ult i8 %arg_2, 7 + br i1 %cmp, label %if.then, label %if.end + + if.then: ; preds = %entry + tail call void @fun_2(ptr noundef null, i32 noundef %conv) #3 + unreachable + + if.end: ; preds = %entry + %cgep = getelementptr inbounds %struct.struct_1, ptr %arg_3, i32 0, i32 2, i32 %conv + %cgep23 = getelementptr inbounds %struct.struct_3, ptr %cgep, i32 0, i32 1, i32 1 + %0 = load i32, ptr %cgep23, align 4 + %cmp3 = icmp eq i32 %0, 4 + br i1 %cmp3, label %land.lhs.true, label %if.else + + land.lhs.true: ; preds = %if.end + %cgep22 = getelementptr inbounds %struct.struct_3, ptr %cgep, i32 0, i32 1 + %1 = load i32, ptr %cgep22, align 4 + %call = tail call zeroext i8 @fun_5(i32 noundef %arg_1, i32 noundef %1) #4 + %tobool.not = icmp eq i8 %call, 0 + br i1 %tobool.not, label %if.else, label %if.end12 + + if.else: ; preds = %land.lhs.true, %if.end + %2 = load i8, ptr %cgep, align 4 + %cmp.i = icmp eq i8 %2, 1 + br i1 %cmp.i, label %if.then.i, label %fun_3.exit + + if.then.i: ; preds = %if.else + %cgep2027 = bitcast ptr %arg_4 to ptr + %cgep26 = getelementptr inbounds %struct.struct_2, ptr %cgep2027, i32 0, i32 1 + %3 = load i32, ptr %cgep26, align 4 + %call.i = tail call i32 @fun_1(i32 noundef %arg_1) #4 + %cmp2.i = icmp ult i32 %3, %call.i + %cgep25 = getelementptr inbounds %struct.struct_2, ptr %cgep2027, i32 0, i32 1 + %4 = load i32, ptr %cgep25, align 4 + %call6.i = tail call i32 @fun_1(i32 noundef %arg_1) #4 + %cmp7.i = icmp ult i32 %4, %call6.i + %5 = select i1 %cmp2.i, i1 true, i1 %cmp7.i + br label %fun_3.exit + + fun_3.exit: ; preds = %if.else, %if.then.i + %resume_ttl.1.i = phi i1 [ false, %if.else ], [ %5, %if.then.i ] + %conv15.i = zext i1 %resume_ttl.1.i to i8 + br label %if.end12 + + if.end12: ; preds = %land.lhs.true, %fun_3.exit + %resume_loops.0 = phi i8 [ %conv15.i, %fun_3.exit ], [ 1, %land.lhs.true ] + ret i8 %resume_loops.0 + } + + ; Function Attrs: noreturn + declare dso_local void @fun_2(ptr noundef, i32 noundef) local_unnamed_addr #1 + + declare dso_local zeroext i8 @fun_5(i32 noundef, i32 noundef) local_unnamed_addr #2 + + declare dso_local i32 @fun_1(i32 noundef) local_unnamed_addr #2 + + !llvm.module.flags = !{!0, !1} + !llvm.ident = !{!2} + + !0 = !{i32 1, !"wchar_size", i32 4} + !1 = !{i32 7, !"frame-pointer", i32 2} + !2 = !{!""} + +... +--- +name: fun_4 +alignment: 16 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +callsEHReturn: false +callsUnwindInit: false +hasEHCatchret: false +hasEHScopes: false +hasEHFunclets: false +isOutlined: false +debugInstrRef: false +failsVerification: false +tracksDebugUserValues: true +registers: [] +liveins: + - { reg: '$r0', virtual-reg: '' } + - { reg: '$r1', virtual-reg: '' } + - { reg: '$r2', virtual-reg: '' } + - { reg: '$r3', virtual-reg: '' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 4 + adjustsStack: false + hasCalls: true + stackProtector: '' + functionContext: '' + maxCallFrameSize: 4294967295 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + hasTailCall: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: + - { id: 0, name: '', type: spill-slot, offset: 0, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +entry_values: [] +callSites: [] +debugValueSubstitutions: [] +constants: [] +machineFunctionInfo: {} +body: | + bb.0.entry: + successors: %bb.1(0x00000800), %bb.2(0x7ffff800) + liveins: $r0:0x0000000000000001, $r1:0x0000000000000001, $r2:0x0000000000000001, $r3:0x0000000000000001 + + renamable $p0 = C2_cmpgtui renamable $r1, 6 + renamable $r16 = COPY killed $r3 + renamable $r17 = COPY killed $r0 + J2_jumpt killed renamable $p0, %bb.2, implicit-def dead $pc + J2_jump %bb.1, implicit-def dead $pc + + bb.1.if.then: + successors: + liveins: $r1:0x0000000000000001 + + $r0 = A2_tfrsi 0 + ADJCALLSTACKDOWN 0, 0, implicit-def $r29, implicit-def dead $r30, implicit killed $r31, implicit killed $r30, implicit $r29 + PS_call_nr @fun_2, hexagoncsr, implicit killed $r0, implicit killed $r1, implicit-def $r29 + ADJCALLSTACKUP 0, 0, implicit-def dead $r29, implicit-def dead $r30, implicit-def dead $r31, implicit killed $r29 + + bb.2.if.end: + successors: %bb.3(0x40000000), %bb.4(0x40000000) + liveins: $r1:0x0000000000000001, $r2:0x0000000000000001, $r16:0x0000000000000001, $r17:0x0000000000000001 + + renamable $r2 = M2_macsip killed renamable $r2, killed renamable $r1, 12 + renamable $r3 = L2_loadri_io renamable $r2, 12 :: (load (s32) from %ir.cgep23) + renamable $p0 = C2_cmpeqi killed renamable $r3, 4 + renamable $r18 = A2_addi killed renamable $r2, 4 + J2_jumpf killed renamable $p0, %bb.4, implicit-def dead $pc + J2_jump %bb.3, implicit-def dead $pc + + bb.3.land.lhs.true: + successors: %bb.4(0x30000000), %bb.8(0x50000000) + liveins: $r16:0x0000000000000001, $r17:0x0000000000000001, $r18:0x0000000000000001 + + renamable $r1 = L2_loadri_io renamable $r18, 4 :: (load (s32) from %ir.cgep22) + ADJCALLSTACKDOWN 0, 0, implicit-def $r29, implicit-def dead $r30, implicit killed $r31, implicit killed $r30, implicit $r29 + $r0 = COPY renamable $r17 + J2_call @fun_5, hexagoncsr, implicit-def dead $pc, implicit-def dead $r31, implicit $r29, implicit $r0, implicit killed $r1, implicit-def $r29, implicit-def $r0 + renamable $p0 = C2_cmpeqi killed renamable $r0, 0 + ADJCALLSTACKUP 0, 0, implicit-def dead $r29, implicit-def dead $r30, implicit-def dead $r31, implicit killed $r29 + renamable $r0 = A2_tfrsi 1 + J2_jumpf killed renamable $p0, %bb.8, implicit-def dead $pc + J2_jump %bb.4, implicit-def dead $pc + + bb.4.if.else: + successors: %bb.6(0x40000000), %bb.5(0x40000000) + liveins: $r16:0x0000000000000001, $r17:0x0000000000000001, $r18:0x0000000000000001 + + renamable $r2 = L2_loadrb_io killed renamable $r18, 0 :: (load (s8) from %ir.cgep, align 4) + renamable $p0 = C2_cmpeqi killed renamable $r2, 1 + J2_jumpt killed renamable $p0, %bb.6, implicit-def $pc + + bb.5: + successors: %bb.7(0x80000000) + + renamable $p0 = PS_false + J2_jump %bb.7, implicit-def $pc + + bb.6.if.then.i: + successors: %bb.7(0x80000000) + liveins: $r16:0x0000000000000001, $r17:0x0000000000000001 + + ADJCALLSTACKDOWN 0, 0, implicit-def $r29, implicit-def dead $r30, implicit $r31, implicit $r30, implicit $r29 + $r0 = COPY renamable $r17 + renamable $r18 = L2_loadri_io renamable $r16, 4 :: (load (s32) from %ir.cgep26) + J2_call @fun_1, hexagoncsr, implicit-def dead $pc, implicit-def dead $r31, implicit $r29, implicit $r0, implicit-def $r29, implicit-def $r0 + ADJCALLSTACKUP 0, 0, implicit-def dead $r29, implicit-def dead $r30, implicit-def dead $r31, implicit $r29 + renamable $p0 = C2_cmpgtu killed renamable $r0, killed renamable $r18 + STriw_pred %stack.0, 0, killed renamable $p0 :: (store (s32) into %stack.0) + $r0 = COPY killed renamable $r17 + ADJCALLSTACKDOWN 0, 0, implicit-def $r29, implicit-def dead $r30, implicit killed $r31, implicit killed $r30, implicit $r29 + renamable $r16 = L2_loadri_io killed renamable $r16, 4 :: (load (s32) from %ir.cgep25) + J2_call @fun_1, hexagoncsr, implicit-def dead $pc, implicit-def dead $r31, implicit $r29, implicit $r0, implicit-def $r29, implicit-def $r0 + renamable $p0 = C2_cmpgtu killed renamable $r0, killed renamable $r16 + ADJCALLSTACKUP 0, 0, implicit-def dead $r29, implicit-def dead $r30, implicit-def dead $r31, implicit killed $r29 + renamable $p1 = LDriw_pred %stack.0, 0 :: (load (s32) from %stack.0) + renamable $p0 = C2_or killed renamable $p1, killed renamable $p0 + + bb.7.fun_3.exit: + successors: %bb.8(0x80000000) + liveins: $p0 + + renamable $r0 = C2_cmoveit renamable $p0, 1 + renamable $r0 = C2_cmoveif killed renamable $p0, 0, implicit killed renamable $r0(tied-def 0) + + bb.8.if.end12: + liveins: $r0:0x0000000000000001 + + PS_jmpret killed $r31, implicit-def dead $pc, implicit killed $r0 + +...