@@ -659,8 +659,9 @@ class DataflowSrcSafetyAnalysis
659659//
660660// Then, a function can be split into a number of disjoint contiguous sequences
661661// of instructions without labels in between. These sequences can be processed
662- // the same way basic blocks are processed by data-flow analysis, assuming
663- // pessimistically that all registers are unsafe at the start of each sequence.
662+ // the same way basic blocks are processed by data-flow analysis, with the same
663+ // pessimistic estimation of the initial state at the start of each sequence
664+ // (except the first instruction of the function).
664665class CFGUnawareSrcSafetyAnalysis : public SrcSafetyAnalysis {
665666 BinaryFunction &BF;
666667 MCPlusBuilder::AllocatorIdTy AllocId;
@@ -671,6 +672,30 @@ class CFGUnawareSrcSafetyAnalysis : public SrcSafetyAnalysis {
671672 BC.MIB ->removeAnnotation (I.second , StateAnnotationIndex);
672673 }
673674
675+ // / Compute a reasonably pessimistic estimation of the register state when
676+ // / the previous instruction is not known for sure. Take the set of registers
677+ // / which are trusted at function entry and remove all registers that can be
678+ // / clobbered inside this function.
679+ SrcState computePessimisticState (BinaryFunction &BF) {
680+ BitVector ClobberedRegs (NumRegs);
681+ for (auto &I : BF.instrs ()) {
682+ MCInst &Inst = I.second ;
683+ BC.MIB ->getClobberedRegs (Inst, ClobberedRegs);
684+
685+ // If this is a call instruction, no register is safe anymore, unless
686+ // it is a tail call. Ignore tail calls for the purpose of estimating the
687+ // worst-case scenario, assuming no instructions are executed in the
688+ // caller after this point anyway.
689+ if (BC.MIB ->isCall (Inst) && !BC.MIB ->isTailCall (Inst))
690+ ClobberedRegs.set ();
691+ }
692+
693+ SrcState S = createEntryState ();
694+ S.SafeToDerefRegs .reset (ClobberedRegs);
695+ S.TrustedRegs .reset (ClobberedRegs);
696+ return S;
697+ }
698+
674699public:
675700 CFGUnawareSrcSafetyAnalysis (BinaryFunction &BF,
676701 MCPlusBuilder::AllocatorIdTy AllocId,
@@ -681,6 +706,7 @@ class CFGUnawareSrcSafetyAnalysis : public SrcSafetyAnalysis {
681706 }
682707
683708 void run () override {
709+ const SrcState DefaultState = computePessimisticState (BF);
684710 SrcState S = createEntryState ();
685711 for (auto &I : BF.instrs ()) {
686712 MCInst &Inst = I.second ;
@@ -695,7 +721,7 @@ class CFGUnawareSrcSafetyAnalysis : public SrcSafetyAnalysis {
695721 LLVM_DEBUG ({
696722 traceInst (BC, " Due to label, resetting the state before" , Inst);
697723 });
698- S = createUnsafeState () ;
724+ S = DefaultState ;
699725 }
700726
701727 // Check if we need to remove an old annotation (this is the case if
@@ -1240,6 +1266,83 @@ shouldReportReturnGadget(const BinaryContext &BC, const MCInstReference &Inst,
12401266 return make_gadget_report (RetKind, Inst, *RetReg);
12411267}
12421268
1269+ // / While BOLT already marks some of the branch instructions as tail calls,
1270+ // / this function tries to improve the coverage by including less obvious cases
1271+ // / when it is possible to do without introducing too many false positives.
1272+ static bool shouldAnalyzeTailCallInst (const BinaryContext &BC,
1273+ const BinaryFunction &BF,
1274+ const MCInstReference &Inst) {
1275+ // Some BC.MIB->isXYZ(Inst) methods simply delegate to MCInstrDesc::isXYZ()
1276+ // (such as isBranch at the time of writing this comment), some don't (such
1277+ // as isCall). For that reason, call MCInstrDesc's methods explicitly when
1278+ // it is important.
1279+ const MCInstrDesc &Desc =
1280+ BC.MII ->get (static_cast <const MCInst &>(Inst).getOpcode ());
1281+ // Tail call should be a branch (but not necessarily an indirect one).
1282+ if (!Desc.isBranch ())
1283+ return false ;
1284+
1285+ // Always analyze the branches already marked as tail calls by BOLT.
1286+ if (BC.MIB ->isTailCall (Inst))
1287+ return true ;
1288+
1289+ // Try to also check the branches marked as "UNKNOWN CONTROL FLOW" - the
1290+ // below is a simplified condition from BinaryContext::printInstruction.
1291+ bool IsUnknownControlFlow =
1292+ BC.MIB ->isIndirectBranch (Inst) && !BC.MIB ->getJumpTable (Inst);
1293+
1294+ if (BF.hasCFG () && IsUnknownControlFlow)
1295+ return true ;
1296+
1297+ return false ;
1298+ }
1299+
1300+ static std::optional<PartialReport<MCPhysReg>>
1301+ shouldReportUnsafeTailCall (const BinaryContext &BC, const BinaryFunction &BF,
1302+ const MCInstReference &Inst, const SrcState &S) {
1303+ static const GadgetKind UntrustedLRKind (
1304+ " untrusted link register found before tail call" );
1305+
1306+ if (!shouldAnalyzeTailCallInst (BC, BF, Inst))
1307+ return std::nullopt ;
1308+
1309+ // Not only the set of registers returned by getTrustedLiveInRegs() can be
1310+ // seen as a reasonable target-independent _approximation_ of "the LR", these
1311+ // are *exactly* those registers used by SrcSafetyAnalysis to initialize the
1312+ // set of trusted registers on function entry.
1313+ // Thus, this function basically checks that the precondition expected to be
1314+ // imposed by a function call instruction (which is hardcoded into the target-
1315+ // specific getTrustedLiveInRegs() function) is also respected on tail calls.
1316+ SmallVector<MCPhysReg> RegsToCheck = BC.MIB ->getTrustedLiveInRegs ();
1317+ LLVM_DEBUG ({
1318+ traceInst (BC, " Found tail call inst" , Inst);
1319+ traceRegMask (BC, " Trusted regs" , S.TrustedRegs );
1320+ });
1321+
1322+ // In musl on AArch64, the _start function sets LR to zero and calls the next
1323+ // stage initialization function at the end, something along these lines:
1324+ //
1325+ // _start:
1326+ // mov x30, #0
1327+ // ; ... other initialization ...
1328+ // b _start_c ; performs "exit" system call at some point
1329+ //
1330+ // As this would produce a false positive for every executable linked with
1331+ // such libc, ignore tail calls performed by ELF entry function.
1332+ if (BC.StartFunctionAddress &&
1333+ *BC.StartFunctionAddress == Inst.getFunction ()->getAddress ()) {
1334+ LLVM_DEBUG ({ dbgs () << " Skipping tail call in ELF entry function.\n " ; });
1335+ return std::nullopt ;
1336+ }
1337+
1338+ // Returns at most one report per instruction - this is probably OK...
1339+ for (auto Reg : RegsToCheck)
1340+ if (!S.TrustedRegs [Reg])
1341+ return make_gadget_report (UntrustedLRKind, Inst, Reg);
1342+
1343+ return std::nullopt ;
1344+ }
1345+
12431346static std::optional<PartialReport<MCPhysReg>>
12441347shouldReportCallGadget (const BinaryContext &BC, const MCInstReference &Inst,
12451348 const SrcState &S) {
@@ -1407,6 +1510,9 @@ void FunctionAnalysisContext::findUnsafeUses(
14071510 if (PacRetGadgetsOnly)
14081511 return ;
14091512
1513+ if (auto Report = shouldReportUnsafeTailCall (BC, BF, Inst, S))
1514+ Reports.push_back (*Report);
1515+
14101516 if (auto Report = shouldReportCallGadget (BC, Inst, S))
14111517 Reports.push_back (*Report);
14121518 if (auto Report = shouldReportSigningOracle (BC, Inst, S))
0 commit comments