@@ -660,8 +660,9 @@ class DataflowSrcSafetyAnalysis
660660//
661661// Then, a function can be split into a number of disjoint contiguous sequences
662662// of instructions without labels in between. These sequences can be processed
663- // the same way basic blocks are processed by data-flow analysis, assuming
664- // pessimistically that all registers are unsafe at the start of each sequence.
663+ // the same way basic blocks are processed by data-flow analysis, with the same
664+ // pessimistic estimation of the initial state at the start of each sequence
665+ // (except the first instruction of the function).
665666class CFGUnawareSrcSafetyAnalysis : public SrcSafetyAnalysis {
666667 BinaryFunction &BF;
667668 MCPlusBuilder::AllocatorIdTy AllocId;
@@ -672,6 +673,30 @@ class CFGUnawareSrcSafetyAnalysis : public SrcSafetyAnalysis {
672673 BC.MIB ->removeAnnotation (I.second , StateAnnotationIndex);
673674 }
674675
676+ // / Compute a reasonably pessimistic estimation of the register state when
677+ // / the previous instruction is not known for sure. Take the set of registers
678+ // / which are trusted at function entry and remove all registers that can be
679+ // / clobbered inside this function.
680+ SrcState computePessimisticState (BinaryFunction &BF) {
681+ BitVector ClobberedRegs (NumRegs);
682+ for (auto &I : BF.instrs ()) {
683+ MCInst &Inst = I.second ;
684+ BC.MIB ->getClobberedRegs (Inst, ClobberedRegs);
685+
686+ // If this is a call instruction, no register is safe anymore, unless
687+ // it is a tail call. Ignore tail calls for the purpose of estimating the
688+ // worst-case scenario, assuming no instructions are executed in the
689+ // caller after this point anyway.
690+ if (BC.MIB ->isCall (Inst) && !BC.MIB ->isTailCall (Inst))
691+ ClobberedRegs.set ();
692+ }
693+
694+ SrcState S = createEntryState ();
695+ S.SafeToDerefRegs .reset (ClobberedRegs);
696+ S.TrustedRegs .reset (ClobberedRegs);
697+ return S;
698+ }
699+
675700public:
676701 CFGUnawareSrcSafetyAnalysis (BinaryFunction &BF,
677702 MCPlusBuilder::AllocatorIdTy AllocId,
@@ -682,6 +707,7 @@ class CFGUnawareSrcSafetyAnalysis : public SrcSafetyAnalysis {
682707 }
683708
684709 void run () override {
710+ const SrcState DefaultState = computePessimisticState (BF);
685711 SrcState S = createEntryState ();
686712 for (auto &I : BF.instrs ()) {
687713 MCInst &Inst = I.second ;
@@ -696,7 +722,7 @@ class CFGUnawareSrcSafetyAnalysis : public SrcSafetyAnalysis {
696722 LLVM_DEBUG ({
697723 traceInst (BC, " Due to label, resetting the state before" , Inst);
698724 });
699- S = createUnsafeState () ;
725+ S = DefaultState ;
700726 }
701727
702728 // Check if we need to remove an old annotation (this is the case if
@@ -1233,6 +1259,83 @@ shouldReportReturnGadget(const BinaryContext &BC, const MCInstReference &Inst,
12331259 return make_gadget_report (RetKind, Inst, *RetReg);
12341260}
12351261
1262+ // / While BOLT already marks some of the branch instructions as tail calls,
1263+ // / this function tries to improve the coverage by including less obvious cases
1264+ // / when it is possible to do without introducing too many false positives.
1265+ static bool shouldAnalyzeTailCallInst (const BinaryContext &BC,
1266+ const BinaryFunction &BF,
1267+ const MCInstReference &Inst) {
1268+ // Some BC.MIB->isXYZ(Inst) methods simply delegate to MCInstrDesc::isXYZ()
1269+ // (such as isBranch at the time of writing this comment), some don't (such
1270+ // as isCall). For that reason, call MCInstrDesc's methods explicitly when
1271+ // it is important.
1272+ const MCInstrDesc &Desc =
1273+ BC.MII ->get (static_cast <const MCInst &>(Inst).getOpcode ());
1274+ // Tail call should be a branch (but not necessarily an indirect one).
1275+ if (!Desc.isBranch ())
1276+ return false ;
1277+
1278+ // Always analyze the branches already marked as tail calls by BOLT.
1279+ if (BC.MIB ->isTailCall (Inst))
1280+ return true ;
1281+
1282+ // Try to also check the branches marked as "UNKNOWN CONTROL FLOW" - the
1283+ // below is a simplified condition from BinaryContext::printInstruction.
1284+ bool IsUnknownControlFlow =
1285+ BC.MIB ->isIndirectBranch (Inst) && !BC.MIB ->getJumpTable (Inst);
1286+
1287+ if (BF.hasCFG () && IsUnknownControlFlow)
1288+ return true ;
1289+
1290+ return false ;
1291+ }
1292+
1293+ static std::optional<PartialReport<MCPhysReg>>
1294+ shouldReportUnsafeTailCall (const BinaryContext &BC, const BinaryFunction &BF,
1295+ const MCInstReference &Inst, const SrcState &S) {
1296+ static const GadgetKind UntrustedLRKind (
1297+ " untrusted link register found before tail call" );
1298+
1299+ if (!shouldAnalyzeTailCallInst (BC, BF, Inst))
1300+ return std::nullopt ;
1301+
1302+ // Not only the set of registers returned by getTrustedLiveInRegs() can be
1303+ // seen as a reasonable target-independent _approximation_ of "the LR", these
1304+ // are *exactly* those registers used by SrcSafetyAnalysis to initialize the
1305+ // set of trusted registers on function entry.
1306+ // Thus, this function basically checks that the precondition expected to be
1307+ // imposed by a function call instruction (which is hardcoded into the target-
1308+ // specific getTrustedLiveInRegs() function) is also respected on tail calls.
1309+ SmallVector<MCPhysReg> RegsToCheck = BC.MIB ->getTrustedLiveInRegs ();
1310+ LLVM_DEBUG ({
1311+ traceInst (BC, " Found tail call inst" , Inst);
1312+ traceRegMask (BC, " Trusted regs" , S.TrustedRegs );
1313+ });
1314+
1315+ // In musl on AArch64, the _start function sets LR to zero and calls the next
1316+ // stage initialization function at the end, something along these lines:
1317+ //
1318+ // _start:
1319+ // mov x30, #0
1320+ // ; ... other initialization ...
1321+ // b _start_c ; performs "exit" system call at some point
1322+ //
1323+ // As this would produce a false positive for every executable linked with
1324+ // such libc, ignore tail calls performed by ELF entry function.
1325+ if (BC.StartFunctionAddress &&
1326+ *BC.StartFunctionAddress == Inst.getFunction ()->getAddress ()) {
1327+ LLVM_DEBUG ({ dbgs () << " Skipping tail call in ELF entry function.\n " ; });
1328+ return std::nullopt ;
1329+ }
1330+
1331+ // Returns at most one report per instruction - this is probably OK...
1332+ for (auto Reg : RegsToCheck)
1333+ if (!S.TrustedRegs [Reg])
1334+ return make_gadget_report (UntrustedLRKind, Inst, Reg);
1335+
1336+ return std::nullopt ;
1337+ }
1338+
12361339static std::optional<PartialReport<MCPhysReg>>
12371340shouldReportCallGadget (const BinaryContext &BC, const MCInstReference &Inst,
12381341 const SrcState &S) {
@@ -1400,6 +1503,9 @@ void FunctionAnalysisContext::findUnsafeUses(
14001503 if (PacRetGadgetsOnly)
14011504 return ;
14021505
1506+ if (auto Report = shouldReportUnsafeTailCall (BC, BF, Inst, S))
1507+ Reports.push_back (*Report);
1508+
14031509 if (auto Report = shouldReportCallGadget (BC, Inst, S))
14041510 Reports.push_back (*Report);
14051511 if (auto Report = shouldReportSigningOracle (BC, Inst, S))
0 commit comments