diff --git a/llvm/include/llvm/CodeGen/MachineInstr.h b/llvm/include/llvm/CodeGen/MachineInstr.h index 6bbe2d03f9e58b..f8d97c2c07a6f5 100644 --- a/llvm/include/llvm/CodeGen/MachineInstr.h +++ b/llvm/include/llvm/CodeGen/MachineInstr.h @@ -1156,6 +1156,10 @@ class MachineInstr return getOpcode() == TargetOpcode::CFI_INSTRUCTION; } + bool isPseudoProbe() const { + return getOpcode() == TargetOpcode::PSEUDO_PROBE; + } + // True if the instruction represents a position in the function. bool isPosition() const { return isLabel() || isCFIInstruction(); } @@ -1165,6 +1169,9 @@ class MachineInstr bool isDebugInstr() const { return isDebugValue() || isDebugLabel() || isDebugRef(); } + bool isDebugOrPseudoInstr() const { + return isDebugInstr() || isPseudoProbe(); + } bool isDebugOffsetImm() const { return getDebugOffset().isImm(); } diff --git a/llvm/include/llvm/IR/Instruction.h b/llvm/include/llvm/IR/Instruction.h index 85afaed5225e77..b99dc62bbb9de8 100644 --- a/llvm/include/llvm/IR/Instruction.h +++ b/llvm/include/llvm/IR/Instruction.h @@ -654,6 +654,9 @@ class Instruction : public User, /// llvm.lifetime.end marker. bool isLifetimeStartOrEnd() const; + /// Return true if the instruction is a DbgInfoIntrinsic or PseudoProbeInst. + bool isDebugOrPseudoInst() const; + /// Return a pointer to the next non-debug instruction in the same basic /// block as 'this', or nullptr if no such instruction exists. Skip any pseudo /// operations if \c SkipPseudoOp is true. diff --git a/llvm/lib/CodeGen/LiveRangeShrink.cpp b/llvm/lib/CodeGen/LiveRangeShrink.cpp index 26439a6569179b..7fa14fd902efea 100644 --- a/llvm/lib/CodeGen/LiveRangeShrink.cpp +++ b/llvm/lib/CodeGen/LiveRangeShrink.cpp @@ -156,7 +156,8 @@ bool LiveRangeShrink::runOnMachineFunction(MachineFunction &MF) { // If MI has side effects, it should become a barrier for code motion. // IOM is rebuild from the next instruction to prevent later // instructions from being moved before this MI. - if (MI.hasUnmodeledSideEffects() && Next != MBB.end()) { + if (MI.hasUnmodeledSideEffects() && !MI.isPseudoProbe() && + Next != MBB.end()) { BuildInstOrderMap(Next, IOM); SawStore = false; } diff --git a/llvm/lib/CodeGen/MachineInstr.cpp b/llvm/lib/CodeGen/MachineInstr.cpp index 59d98054e3a22b..b6cfd7dcbfbc82 100644 --- a/llvm/lib/CodeGen/MachineInstr.cpp +++ b/llvm/lib/CodeGen/MachineInstr.cpp @@ -1462,7 +1462,8 @@ bool MachineInstr::hasUnmodeledSideEffects() const { } bool MachineInstr::isLoadFoldBarrier() const { - return mayStore() || isCall() || hasUnmodeledSideEffects(); + return mayStore() || isCall() || + (hasUnmodeledSideEffects() && !isPseudoProbe()); } /// allDefsAreDead - Return true if all the defs of this instruction are dead. diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 6638ff6a6358bb..a6bd774934ac13 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -9660,8 +9660,9 @@ findArgumentCopyElisionCandidates(const DataLayout &DL, // We will look through cast uses, so ignore them completely. if (I.isCast()) continue; - // Ignore debug info intrinsics, they don't escape or store to allocas. - if (isa(I)) + // Ignore debug info and pseudo op intrinsics, they don't escape or store + // to allocas. + if (I.isDebugOrPseudoInst()) continue; // This is an unknown instruction. Assume it escapes or writes to all // static alloca operands. diff --git a/llvm/lib/CodeGen/StackProtector.cpp b/llvm/lib/CodeGen/StackProtector.cpp index 0411faabbcc393..8d91afb6e99d42 100644 --- a/llvm/lib/CodeGen/StackProtector.cpp +++ b/llvm/lib/CodeGen/StackProtector.cpp @@ -192,7 +192,7 @@ bool StackProtector::HasAddressTaken(const Instruction *AI, // Ignore intrinsics that do not become real instructions. // TODO: Narrow this to intrinsics that have store-like effects. const auto *CI = cast(I); - if (!isa(CI) && !CI->isLifetimeStartOrEnd()) + if (!CI->isDebugOrPseudoInst() && !CI->isLifetimeStartOrEnd()) return true; break; } diff --git a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp index ecee4aed7f881a..2a9132bd2fe082 100644 --- a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -801,8 +801,8 @@ bool TwoAddressInstructionPass::rescheduleMIBelowKill( MachineBasicBlock::iterator KillPos = KillMI; ++KillPos; for (MachineInstr &OtherMI : make_range(End, KillPos)) { - // Debug instructions cannot be counted against the limit. - if (OtherMI.isDebugInstr()) + // Debug or pseudo instructions cannot be counted against the limit. + if (OtherMI.isDebugOrPseudoInstr()) continue; if (NumVisited > 10) // FIXME: Arbitrary limit to reduce compile time cost. return false; @@ -974,8 +974,8 @@ bool TwoAddressInstructionPass::rescheduleKillAboveMI( unsigned NumVisited = 0; for (MachineInstr &OtherMI : make_range(mi, MachineBasicBlock::iterator(KillMI))) { - // Debug instructions cannot be counted against the limit. - if (OtherMI.isDebugInstr()) + // Debug or pseudo instructions cannot be counted against the limit. + if (OtherMI.isDebugOrPseudoInstr()) continue; if (NumVisited > 10) // FIXME: Arbitrary limit to reduce compile time cost. return false; diff --git a/llvm/lib/IR/Instruction.cpp b/llvm/lib/IR/Instruction.cpp index 246180e721723e..8e52dd3ddc71b3 100644 --- a/llvm/lib/IR/Instruction.cpp +++ b/llvm/lib/IR/Instruction.cpp @@ -651,6 +651,10 @@ bool Instruction::isLifetimeStartOrEnd() const { return ID == Intrinsic::lifetime_start || ID == Intrinsic::lifetime_end; } +bool Instruction::isDebugOrPseudoInst() const { + return isa(this) || isa(this); +} + const Instruction * Instruction::getNextNonDebugInstruction(bool SkipPseudoOp) const { for (const Instruction *I = getNextNode(); I; I = I->getNextNode()) diff --git a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp index 256acd7e1d1772..6730824e860ac9 100644 --- a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp +++ b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp @@ -149,6 +149,13 @@ static MemoryAccessKind checkFunctionMemoryAccess(Function &F, bool ThisBody, if (isNoModRef(MRI)) continue; + // A pseudo probe call shouldn't change any function attribute since it + // doesn't translate to a real instruction. It comes with a memory access + // tag to prevent itself being removed by optimizations and not block + // other instructions being optimized. + if (isa(I)) + continue; + if (!AliasAnalysis::onlyAccessesArgPointees(MRB)) { // The call could access any memory. If that includes writes, note it. if (isModSet(MRI)) diff --git a/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp b/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp index d687ec654438a7..b211b0813611a2 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp @@ -592,8 +592,14 @@ static bool isSafeAndProfitableToSinkLoad(LoadInst *L) { BasicBlock::iterator BBI = L->getIterator(), E = L->getParent()->end(); for (++BBI; BBI != E; ++BBI) - if (BBI->mayWriteToMemory()) + if (BBI->mayWriteToMemory()) { + // Calls that only access inaccessible memory do not block sinking the + // load. + if (auto *CB = dyn_cast(BBI)) + if (CB->onlyAccessesInaccessibleMemory()) + continue; return false; + } // Check for non-address taken alloca. If not address-taken already, it isn't // profitable to do this xform. diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index 518e909e8ab400..828fd49524ec16 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -3878,9 +3878,10 @@ static bool prepareICWorklistFromFunction(Function &F, const DataLayout &DL, } } - // Skip processing debug intrinsics in InstCombine. Processing these call instructions - // consumes non-trivial amount of time and provides no value for the optimization. - if (!isa(Inst)) { + // Skip processing debug and pseudo intrinsics in InstCombine. Processing + // these call instructions consumes non-trivial amount of time and + // provides no value for the optimization. + if (!Inst->isDebugOrPseudoInst()) { InstrsForInstCombineWorklist.push_back(Inst); SeenAliasScopes.analyse(Inst); } diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-instcombine.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-instcombine.ll new file mode 100644 index 00000000000000..e5bb7bc541c667 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-instcombine.ll @@ -0,0 +1,66 @@ +; RUN: opt -passes=instcombine -S < %s | FileCheck %s + +%struct.nonbonded = type { [2 x %struct.CompAtom*], [2 x %struct.CompAtomExt*], [2 x %struct.CompAtom*], [2 x %class.Vector*], [2 x %class.Vector*], [2 x i32], %class.Vector, double*, double*, %class.ComputeNonbondedWorkArrays*, %class.Pairlists*, i32, i32, double, double, i32, i32, i32, i32 } +%struct.CompAtomExt = type { i32 } +%struct.CompAtom = type { %class.Vector, float, i16, i8, i8 } +%class.Vector = type { double, double, double } +%class.ComputeNonbondedWorkArrays = type { %class.ResizeArray, %class.ResizeArray.0, %class.ResizeArray, %class.ResizeArray, %class.ResizeArray, %class.ResizeArray, %class.ResizeArray, %class.ResizeArray, %class.ResizeArray, %class.ResizeArray, %class.ResizeArray, %class.ResizeArray, %class.ResizeArray, %class.ResizeArray, %class.ResizeArray, %class.ResizeArray, %class.ResizeArray, %class.ResizeArray.2, %class.ResizeArray.2 } +%class.ResizeArray.0 = type { i32 (...)**, %class.ResizeArrayRaw.1* } +%class.ResizeArrayRaw.1 = type <{ double*, i8*, i32, i32, i32, float, i32, [4 x i8] }> +%class.ResizeArray = type { i32 (...)**, %class.ResizeArrayRaw* } +%class.ResizeArrayRaw = type <{ i16*, i8*, i32, i32, i32, float, i32, [4 x i8] }> +%class.ResizeArray.2 = type { i32 (...)**, %class.ResizeArrayRaw.3* } +%class.ResizeArrayRaw.3 = type <{ %class.Vector*, i8*, i32, i32, i32, float, i32, [4 x i8] }> +%class.Pairlists = type { i16*, i32, i32 } + +;; Check the minPart4 and minPart assignments are merged. +; CHECK-COUNT-1: getelementptr inbounds %struct.nonbonded, %struct.nonbonded* %params, i64 0, i32 16 +; CHECK-NOT: getelementptr inbounds %struct.nonbonded, %struct.nonbonded* %params, i64 0, i32 16 + +define dso_local void @_ZN20ComputeNonbondedUtil9calc_pairEP9nonbonded(%struct.nonbonded* nocapture readonly %params) local_unnamed_addr align 2 { +entry: + %savePairlists3 = getelementptr inbounds %struct.nonbonded, %struct.nonbonded* %params, i64 0, i32 11 + %0 = load i32, i32* %savePairlists3, align 8 + %usePairlists4 = getelementptr inbounds %struct.nonbonded, %struct.nonbonded* %params, i64 0, i32 12 + %1 = load i32, i32* %usePairlists4, align 4 + %tobool54.not = icmp eq i32 %0, 0 + br i1 %tobool54.not, label %lor.lhs.false55, label %if.end109 + +lor.lhs.false55: ; preds = %entry + %tobool56.not = icmp eq i32 %1, 0 + br i1 %tobool56.not, label %if.end109, label %if.end109.thread + +if.end109.thread: ; preds = %lor.lhs.false55 + %minPart4 = getelementptr inbounds %struct.nonbonded, %struct.nonbonded* %params, i64 0, i32 16 + %2 = load i32, i32* %minPart4, align 4 + call void @llvm.pseudoprobe(i64 -6172701105289426098, i64 2, i32 0, i64 -1) + br label %if.then138 + +if.end109: ; preds = %lor.lhs.false55, %entry + %minPart = getelementptr inbounds %struct.nonbonded, %struct.nonbonded* %params, i64 0, i32 16 + %3 = load i32, i32* %minPart, align 4 + call void @llvm.pseudoprobe(i64 -6172701105289426098, i64 3, i32 0, i64 -1) + %tobool116.not = icmp eq i32 %1, 0 + br i1 %tobool116.not, label %if.then117, label %if.then138 + +if.then117: ; preds = %if.end109 + ret void + +if.then138: ; preds = %if.end109.thread, %if.end109 + %4 = phi i32 [ %2, %if.end109.thread ], [ %3, %if.end109 ] + %tobool139.not = icmp eq i32 %4, 0 + br i1 %tobool139.not, label %if.else147, label %if.then140 + +if.then140: ; preds = %if.then138 + ret void + +if.else147: ; preds = %if.then138 + ret void +} + +declare dso_local void @_ZN9Pairlists8addIndexEv() align 2 + +; Function Attrs: inaccessiblememonly nounwind willreturn +declare void @llvm.pseudoprobe(i64, i64, i32, i64) #0 + +attributes #0 = { inaccessiblememonly nounwind willreturn } diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-instsched.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-instsched.ll new file mode 100644 index 00000000000000..609af90db610ac --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-instsched.ll @@ -0,0 +1,33 @@ +; PR1075 +; RUN: llc < %s -mcpu=generic -mtriple=x86_64-apple-darwin -pseudo-probe-for-profiling -O3 | FileCheck %s + +define float @foo(float %x) #0 { + %tmp1 = fmul float %x, 3.000000e+00 + %tmp3 = fmul float %x, 5.000000e+00 + %tmp5 = fmul float %x, 7.000000e+00 + %tmp7 = fmul float %x, 1.100000e+01 + call void @llvm.pseudoprobe(i64 6699318081062747564, i64 1, i32 0, i64 -1) + %tmp10 = fadd float %tmp1, %tmp3 + %tmp12 = fadd float %tmp10, %tmp5 + %tmp14 = fadd float %tmp12, %tmp7 + ret float %tmp14 +; CHECK: mulss +; CHECK: mulss +; CHECK: addss +; CHECK: mulss +; CHECK: addss +; CHECK: mulss +; CHECK: addss +; CHECK: ret +} + +; Function Attrs: inaccessiblememonly nounwind willreturn +declare void @llvm.pseudoprobe(i64, i64, i32, i64) #1 + +attributes #0 = { nounwind } +attributes #1 = { inaccessiblememonly nounwind willreturn } + +!llvm.pseudo_probe_desc = !{!0} + +!0 = !{i64 6699318081062747564, i64 4294967295, !"foo", null} + diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-peep.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-peep.ll new file mode 100644 index 00000000000000..a1fb25c95936ea --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-peep.ll @@ -0,0 +1,29 @@ +; RUN: llc -mtriple=x86_64-- -stop-after=peephole-opt -o - %s | FileCheck %s + +define internal i32 @arc_compare() { +entry: + %0 = load i64, i64* undef, align 8 + br i1 undef, label %return, label %if.end + +if.end: ; preds = %entry +; Chek a register copy has been sinked into the compare instruction. +; CHECK: %[[#REG:]]:gr64 = IMPLICIT_DEF +; CHECK-NOT: %[[#]]:gr64 = MOV64rm %[[#REG]] +; CHECK: PSEUDO_PROBE 5116412291814990879, 3, 0, 0 +; CHECK: CMP64mr %[[#REG]], 1 + call void @llvm.pseudoprobe(i64 5116412291814990879, i64 3, i32 0, i64 -1) + %cmp4 = icmp slt i64 %0, undef + br i1 %cmp4, label %return, label %if.end6 + +if.end6: ; preds = %if.end + call void @llvm.pseudoprobe(i64 5116412291814990879, i64 5, i32 0, i64 -1) + br label %return + +return: ; preds = %if.end6, %if.end, %entry + ret i32 undef +} + +; Function Attrs: inaccessiblememonly nounwind willreturn +declare void @llvm.pseudoprobe(i64, i64, i32, i64) #0 + +attributes #0 = { inaccessiblememonly nounwind willreturn } diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-twoaddr.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-twoaddr.ll new file mode 100644 index 00000000000000..81f72d3c5871df --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-twoaddr.ll @@ -0,0 +1,37 @@ +; RUN: llc -stop-after=twoaddressinstruction -mtriple=x86_64-- -o - %s | FileCheck %s + + +define dso_local double @twoaddressinstruction() local_unnamed_addr { +for.end: + %0 = load i64, i64* undef, align 8 + br label %for.body14.preheader + +for.body14.preheader: ; preds = %for.end + br i1 undef, label %for.cond25.preheader.loopexit.unr-lcssa, label %for.body14.preheader.new + +for.body14.preheader.new: ; preds = %for.body14.preheader + %unroll_iter136 = and i64 %0, -4 + br label %for.body14 + +for.cond25.preheader.loopexit.unr-lcssa: ; preds = %for.body14, %for.body14.preheader + %indvars.iv127.unr = phi i64 [ 1, %for.body14.preheader ], [ %indvars.iv.next128.3, %for.body14 ] + ret double undef + +for.body14: ; preds = %for.body14, %for.body14.preheader.new + %indvars.iv127 = phi i64 [ 1, %for.body14.preheader.new ], [ %indvars.iv.next128.3, %for.body14 ] + %niter137 = phi i64 [ %unroll_iter136, %for.body14.preheader.new ], [ %niter137.nsub.3, %for.body14 ] + %indvars.iv.next128.3 = add nuw nsw i64 %indvars.iv127, 4 +; CHECK: PSEUDO_PROBE -6878943695821059507, 9, 0, 0 + call void @llvm.pseudoprobe(i64 -6878943695821059507, i64 9, i32 0, i64 -1) +;; Check an opeq form of instruction is created. +; CHECK: %[[#REG:]]:gr64_nosp = COPY killed %[[#]] +; CHECK: %[[#REG]]:gr64_nosp = nuw ADD64ri8 %[[#REG]], 4, implicit-def dead $eflags + %niter137.nsub.3 = add i64 %niter137, -4 + %niter137.ncmp.3 = icmp eq i64 %niter137.nsub.3, 0 + br i1 %niter137.ncmp.3, label %for.cond25.preheader.loopexit.unr-lcssa, label %for.body14 +} + +; Function Attrs: inaccessiblememonly nounwind willreturn +declare void @llvm.pseudoprobe(i64, i64, i32, i64) #0 + +attributes #0 = { inaccessiblememonly nounwind willreturn } \ No newline at end of file