From d1891f505dcb9c6f5570d203f8c4360d624861f7 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Fri, 10 May 2024 13:51:23 +0200 Subject: [PATCH 1/7] Add alias set filter --- .../PhasarLLVM/Pointer/FilteredLLVMAliasSet.h | 125 +++++++++ .../phasar/PhasarLLVM/Pointer/LLVMAliasSet.h | 2 + include/phasar/Utils/MaybeUniquePtr.h | 8 +- .../Pointer/FilteredLLVMAliasSet.cpp | 257 ++++++++++++++++++ 4 files changed, 389 insertions(+), 3 deletions(-) create mode 100644 include/phasar/PhasarLLVM/Pointer/FilteredLLVMAliasSet.h create mode 100644 lib/PhasarLLVM/Pointer/FilteredLLVMAliasSet.cpp diff --git a/include/phasar/PhasarLLVM/Pointer/FilteredLLVMAliasSet.h b/include/phasar/PhasarLLVM/Pointer/FilteredLLVMAliasSet.h new file mode 100644 index 000000000..e49267c74 --- /dev/null +++ b/include/phasar/PhasarLLVM/Pointer/FilteredLLVMAliasSet.h @@ -0,0 +1,125 @@ +/****************************************************************************** + * Copyright (c) 2020 Fabian Schiebel. + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Fabian Schiebel and others + *****************************************************************************/ + +#ifndef PHASAR_PHASARLLVM_POINTER_FILTEREDLLVMALIASSET_H +#define PHASAR_PHASARLLVM_POINTER_FILTEREDLLVMALIASSET_H + +#include "phasar/Pointer/AliasAnalysisType.h" +#include "phasar/Pointer/AliasInfoTraits.h" +#include "phasar/Pointer/AliasResult.h" +#include "phasar/Pointer/AliasSetOwner.h" +#include "phasar/Utils/AnalysisProperties.h" +#include "phasar/Utils/MaybeUniquePtr.h" +#include "phasar/Utils/StableVector.h" + +#include "llvm/Support/ErrorHandling.h" + +#include "nlohmann/json_fwd.hpp" + +#include +#include + +namespace llvm { +class Value; +class Instruction; +class Function; +} // namespace llvm + +namespace psr { + +class LLVMAliasSet; +class FilteredLLVMAliasSet; + +template <> +struct AliasInfoTraits + : DefaultAATraits {}; + +class FilteredLLVMAliasSet { +public: + using traits_t = AliasInfoTraits; + using n_t = traits_t::n_t; + using v_t = traits_t::v_t; + using AliasSetTy = traits_t::AliasSetTy; + using AliasSetPtrTy = traits_t::AliasSetPtrTy; + using AllocationSiteSetPtrTy = traits_t::AllocationSiteSetPtrTy; + + FilteredLLVMAliasSet(LLVMAliasSet *AS) noexcept; + + FilteredLLVMAliasSet(const FilteredLLVMAliasSet &) = delete; + FilteredLLVMAliasSet &operator=(const FilteredLLVMAliasSet &) = delete; + FilteredLLVMAliasSet(FilteredLLVMAliasSet &&) = delete; + FilteredLLVMAliasSet &operator=(FilteredLLVMAliasSet &&) = delete; + + ~FilteredLLVMAliasSet(); + + template >> + explicit FilteredLLVMAliasSet(ArgsT &&...Args) + : FilteredLLVMAliasSet(std::forward(Args)...) {} + + // --- API Functions: + + [[nodiscard]] inline bool isInterProcedural() const noexcept { + return false; + }; + + [[nodiscard]] AliasAnalysisType getAliasAnalysisType() const noexcept; + + [[nodiscard]] AliasResult alias(const llvm::Value *V1, const llvm::Value *V2, + const llvm::Instruction *I); + [[nodiscard]] AliasResult alias(const llvm::Value *V1, const llvm::Value *V2, + const llvm::Function *Fun); + + [[nodiscard]] AliasSetPtrTy getAliasSet(const llvm::Value *V, + const llvm::Instruction *I); + [[nodiscard]] AliasSetPtrTy getAliasSet(const llvm::Value *V, + const llvm::Function *Fun); + + [[nodiscard]] AllocationSiteSetPtrTy + getReachableAllocationSites(const llvm::Value *V, bool IntraProcOnly = false, + const llvm::Instruction *I = nullptr); + + // Checks if PotentialValue is in the reachable allocation sites of V. + [[nodiscard]] bool isInReachableAllocationSites( + const llvm::Value *V, const llvm::Value *PotentialValue, + bool IntraProcOnly = false, const llvm::Instruction *I = nullptr); + + void mergeWith(const FilteredLLVMAliasSet & /*OtherPTI*/) { + llvm::report_fatal_error("Not Supported"); + } + + void introduceAlias(const llvm::Value *V1, const llvm::Value *V2, + const llvm::Instruction *I = nullptr, + AliasResult Kind = AliasResult::MustAlias) { + llvm::report_fatal_error("Not Supported"); + } + + void print(llvm::raw_ostream &OS = llvm::outs()) const; + + [[nodiscard]] nlohmann::json getAsJson() const; + + void printAsJson(llvm::raw_ostream &OS = llvm::outs()) const; + + [[nodiscard]] AnalysisProperties getAnalysisProperties() const noexcept { + return AnalysisProperties::None; + } + +private: + FilteredLLVMAliasSet(MaybeUniquePtr AS) noexcept; + + MaybeUniquePtr AS; + AliasSetOwner::memory_resource_type MRes; + AliasSetOwner Owner{&MRes}; + llvm::DenseMap, AliasSetPtrTy> + AliasSetMap; +}; +} // namespace psr + +#endif // PHASAR_PHASARLLVM_POINTER_FILTEREDLLVMALIASSET_H diff --git a/include/phasar/PhasarLLVM/Pointer/LLVMAliasSet.h b/include/phasar/PhasarLLVM/Pointer/LLVMAliasSet.h index 0775e98d2..00ad51dfa 100644 --- a/include/phasar/PhasarLLVM/Pointer/LLVMAliasSet.h +++ b/include/phasar/PhasarLLVM/Pointer/LLVMAliasSet.h @@ -43,6 +43,8 @@ struct AliasInfoTraits class LLVMAliasSet : public AnalysisPropertiesMixin, public AliasInfoBaseUtils { + // For int*IsReachableAllocationSiteTy: + friend class FilteredLLVMAliasSet; public: using traits_t = AliasInfoTraits; diff --git a/include/phasar/Utils/MaybeUniquePtr.h b/include/phasar/Utils/MaybeUniquePtr.h index fe086fc85..d770de1f3 100644 --- a/include/phasar/Utils/MaybeUniquePtr.h +++ b/include/phasar/Utils/MaybeUniquePtr.h @@ -59,7 +59,8 @@ template class MaybeUniquePtrBase { /// \tparam RequireAlignment If true, the datastructure only works if /// alignof(T) > 1 holds. Enables incomplete T types template -class MaybeUniquePtr : detail::MaybeUniquePtrBase { +class [[clang::trivial_abi]] MaybeUniquePtr + : detail::MaybeUniquePtrBase { using detail::MaybeUniquePtrBase::Data; public: @@ -79,8 +80,9 @@ class MaybeUniquePtr : detail::MaybeUniquePtrBase { : MaybeUniquePtr(Owner.release(), true) {} constexpr MaybeUniquePtr(MaybeUniquePtr &&Other) noexcept - : detail::MaybeUniquePtrBase( - std::exchange(Other.Data, {})) {} + : detail::MaybeUniquePtrBase(std::move(Other)) { + Data = {}; + } constexpr void swap(MaybeUniquePtr &Other) noexcept { std::swap(Data, Other.Data); diff --git a/lib/PhasarLLVM/Pointer/FilteredLLVMAliasSet.cpp b/lib/PhasarLLVM/Pointer/FilteredLLVMAliasSet.cpp new file mode 100644 index 000000000..f4839a07e --- /dev/null +++ b/lib/PhasarLLVM/Pointer/FilteredLLVMAliasSet.cpp @@ -0,0 +1,257 @@ +#include "phasar/PhasarLLVM/Pointer/FilteredLLVMAliasSet.h" + +#include "phasar/PhasarLLVM/Pointer/LLVMAliasInfo.h" +#include "phasar/PhasarLLVM/Pointer/LLVMAliasSet.h" +#include "phasar/PhasarLLVM/Pointer/LLVMPointsToUtils.h" +#include "phasar/PhasarLLVM/Utils/LLVMShorthands.h" +#include "phasar/Pointer/AliasResult.h" +#include "phasar/Utils/NlohmannLogging.h" + +#include "llvm/IR/Instructions.h" + +#include "nlohmann/json_fwd.hpp" + +#include + +using namespace psr; + +static const llvm::Function *getFunction(const llvm::Value *V) { + if (const auto *Inst = llvm::dyn_cast(V)) { + return Inst->getFunction(); + } + if (const auto *Arg = llvm::dyn_cast(V)) { + return Arg->getParent(); + } + return nullptr; +} + +[[nodiscard]] static bool isConstantGlob(const llvm::GlobalValue *GlobV) { + if (const auto *Glob = llvm::dyn_cast(GlobV)) { + return Glob->isConstant(); + } + if (const auto *Alias = llvm::dyn_cast(GlobV)) { + if (const auto *AliasGlob = + llvm::dyn_cast(Alias->getAliasee())) { + return AliasGlob->isConstant(); + } + } + return true; +} + +static bool mustNoalias(const llvm::Value *p1, const llvm::Value *p2) { + if (p1 == p2) { + return false; + } + assert(p1); + assert(p2); + + // Assumptions: + // - Globals do not alias with allocas + // - Globals do not alias with each other (this may be a bit unsound, though) + // - Allocas do not alias each other (relax a bit for allocas of pointers) + // - Constant globals are not generated as data-flow facts + + if (const auto *Alloca1 = llvm::dyn_cast(p1)) { + if (llvm::isa(p2)) { + return true; + } + if (const auto *Alloca2 = llvm::dyn_cast(p2)) { + return !Alloca1->getAllocatedType()->isPointerTy() && + !Alloca2->getAllocatedType()->isPointerTy(); + } + } else if (const auto *Glob1 = llvm::dyn_cast(p1)) { + if (llvm::isa(p2) || isConstantGlob(Glob1)) { + return true; + } + if (const auto *Glob2 = llvm::dyn_cast(p2)) { + return true; // approximation + } + } else if (const auto *Glob2 = llvm::dyn_cast(p2)) { + return isConstantGlob(Glob2); + } + + return false; +} + +static void fillAliasSet(FilteredLLVMAliasSet::AliasSetTy &Set, + LLVMAliasSet::AliasSetPtrTy AS, const llvm::Value *V, + const llvm::Function *Fun) { + if (!Fun) { + Set.insert(AS->begin(), AS->end()); + return; + } + + const auto *Base = V->stripPointerCastsAndAliases(); + for (const auto *Alias : *AS) { + + // Skip inter-procedural aliases + const auto *AliasFun = getFunction(Alias); + if (AliasFun && Fun != AliasFun) { + continue; + } + + if (V == Alias) { + Set.insert(Alias); + continue; + } + + if (llvm::isa(Alias) || + llvm::isa(Alias)) { + // Assume: Compile-time constants are not generated as data-flow facts! + continue; + } + + const auto *AliasBase = Alias->stripPointerCastsAndAliases(); + + if (mustNoalias(Base, AliasBase)) { + continue; + } + + Set.insert(Alias); + } +} + +FilteredLLVMAliasSet::FilteredLLVMAliasSet(LLVMAliasSet *AS) noexcept + : AS(AS) {} + +FilteredLLVMAliasSet::FilteredLLVMAliasSet( + MaybeUniquePtr AS) noexcept + : AS(std::move(AS)) {} + +AliasAnalysisType FilteredLLVMAliasSet::getAliasAnalysisType() const noexcept { + return AS->getAliasAnalysisType(); +} + +AliasResult FilteredLLVMAliasSet::alias(const llvm::Value *V1, + const llvm::Value *V2, + const llvm::Function *Fun) { + auto V1AS = getAliasSet(V1, Fun); + return V1AS->contains(V2) ? AliasResult::MayAlias : AliasResult::NoAlias; +} + +AliasResult FilteredLLVMAliasSet::alias(const llvm::Value *V1, + const llvm::Value *V2, + const llvm::Instruction *I) { + if (!I) { + return AS->alias(V1, V2); + } + + return alias(V1, V2, I->getFunction()); +} + +auto FilteredLLVMAliasSet::getAliasSet(const llvm::Value *V, + const llvm::Function *Fun) + -> AliasSetPtrTy { + auto &Entry = AliasSetMap[{Fun, V}]; + if (!Entry) { + auto Set = Owner.acquire(); + fillAliasSet(*Set, AS->getAliasSet(V), V, Fun); + Entry = Set; + } + return Entry; +} + +auto FilteredLLVMAliasSet::getAliasSet(const llvm::Value *V, + const llvm::Instruction *I) + -> AliasSetPtrTy { + const auto *Fun = I ? I->getFunction() : nullptr; + return getAliasSet(V, Fun); +} + +auto FilteredLLVMAliasSet::getReachableAllocationSites( + const llvm::Value *V, bool IntraProcOnly, const llvm::Instruction *I) + -> AllocationSiteSetPtrTy { + auto AllocSites = std::make_unique(); + + // if V is not a (interesting) pointer we can return an empty set + if (!isInterestingPointer(V)) { + return AllocSites; + } + + const auto PTS = getAliasSet(V, I); + // consider the full inter-procedural points-to/alias information + if (!IntraProcOnly) { + for (const auto *P : *PTS) { + if (AS->interIsReachableAllocationSiteTy(V, P)) { + AllocSites->insert(P); + } + } + } else { + // consider the function-local, i.e. intra-procedural, points-to/alias + // information only + const auto *VFun = getFunction(V); + const auto *VG = llvm::dyn_cast(V); + // We may not be able to retrieve a function for the given value since some + // pointer values can exist outside functions, for instance, in case of + // vtables, etc. + for (const auto *P : *PTS) { + if (AS->intraIsReachableAllocationSiteTy(V, P, VFun, VG)) { + AllocSites->insert(P); + } + } + } + return AllocSites; +} + +// Checks if PotentialValue is in the reachable allocation sites of V. +bool FilteredLLVMAliasSet::isInReachableAllocationSites( + const llvm::Value *V, const llvm::Value *PotentialValue, bool IntraProcOnly, + const llvm::Instruction *I) { + // if V is not a (interesting) pointer we can return an empty set + if (!isInterestingPointer(V)) { + return false; + } + + bool PVIsReachableAllocationSiteType = false; + if (IntraProcOnly) { + const auto *VFun = getFunction(V); + const auto *VG = llvm::dyn_cast(V); + PVIsReachableAllocationSiteType = + AS->intraIsReachableAllocationSiteTy(V, PotentialValue, VFun, VG); + } else { + PVIsReachableAllocationSiteType = + AS->interIsReachableAllocationSiteTy(V, PotentialValue); + } + + if (PVIsReachableAllocationSiteType) { + const auto PTS = getAliasSet(V, I); + return PTS->count(PotentialValue); + } + + return false; +} + +void FilteredLLVMAliasSet::print(llvm::raw_ostream &OS) const { + for (const auto &[FV, PTS] : AliasSetMap) { + OS << "V: " << llvmIRToString(FV.second) << " in function '" + << FV.first->getName() << "'\n"; + for (const auto &Ptr : *PTS) { + OS << "\taliases with -> " << llvmIRToString(Ptr) << '\n'; + } + } +} + +nlohmann::json FilteredLLVMAliasSet::getAsJson() const { + nlohmann::json J; + + for (const auto &[FV, PTS] : AliasSetMap) { + auto &JFV = J.emplace_back(); + + JFV["Value"] = getMetaDataID(FV.second); + JFV["Function"] = FV.first->getName().str(); + + auto &JSet = JFV["Aliases"]; + + for (const auto &Ptr : *PTS) { + JSet.push_back(getMetaDataID(Ptr)); + } + } + + return J; +} + +void FilteredLLVMAliasSet::printAsJson(llvm::raw_ostream &OS) const { + OS << getAsJson(); +} + +static_assert(std::is_convertible_v); From 52b24a0c4094aa55e5e487868e9fe006b7561688 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Fri, 10 May 2024 14:25:50 +0200 Subject: [PATCH 2/7] Add test --- .../PhasarLLVM/Pointer/FilteredLLVMAliasSet.h | 13 +- .../IfdsIde/Problems/IFDSTaintAnalysis.cpp | 2 +- .../Pointer/FilteredLLVMAliasSet.cpp | 6 +- unittests/PhasarLLVM/Pointer/CMakeLists.txt | 1 + .../Pointer/FilteredLLVMAliasSetTest.cpp | 138 ++++++++++++++++++ 5 files changed, 150 insertions(+), 10 deletions(-) create mode 100644 unittests/PhasarLLVM/Pointer/FilteredLLVMAliasSetTest.cpp diff --git a/include/phasar/PhasarLLVM/Pointer/FilteredLLVMAliasSet.h b/include/phasar/PhasarLLVM/Pointer/FilteredLLVMAliasSet.h index e49267c74..83d63de8d 100644 --- a/include/phasar/PhasarLLVM/Pointer/FilteredLLVMAliasSet.h +++ b/include/phasar/PhasarLLVM/Pointer/FilteredLLVMAliasSet.h @@ -16,7 +16,6 @@ #include "phasar/Pointer/AliasSetOwner.h" #include "phasar/Utils/AnalysisProperties.h" #include "phasar/Utils/MaybeUniquePtr.h" -#include "phasar/Utils/StableVector.h" #include "llvm/Support/ErrorHandling.h" @@ -53,9 +52,10 @@ class FilteredLLVMAliasSet { FilteredLLVMAliasSet(const FilteredLLVMAliasSet &) = delete; FilteredLLVMAliasSet &operator=(const FilteredLLVMAliasSet &) = delete; - FilteredLLVMAliasSet(FilteredLLVMAliasSet &&) = delete; FilteredLLVMAliasSet &operator=(FilteredLLVMAliasSet &&) = delete; + FilteredLLVMAliasSet(FilteredLLVMAliasSet &&) noexcept = default; + ~FilteredLLVMAliasSet(); template AS) noexcept; MaybeUniquePtr AS; - AliasSetOwner::memory_resource_type MRes; - AliasSetOwner Owner{&MRes}; + AliasSetOwner Owner; llvm::DenseMap, AliasSetPtrTy> AliasSetMap; }; diff --git a/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IFDSTaintAnalysis.cpp b/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IFDSTaintAnalysis.cpp index a39fbee4b..ccaa742fa 100644 --- a/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IFDSTaintAnalysis.cpp +++ b/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IFDSTaintAnalysis.cpp @@ -155,7 +155,7 @@ void IFDSTaintAnalysis::populateWithMayAliases( container_type &Facts, const llvm::Instruction *Context) const { container_type Tmp = Facts; for (const auto *Fact : Facts) { - auto Aliases = PT.getAliasSet(Fact); + auto Aliases = PT.getAliasSet(Fact, Context); for (const auto *Alias : *Aliases) { if (canSkipAtContext(Alias, Context)) { continue; diff --git a/lib/PhasarLLVM/Pointer/FilteredLLVMAliasSet.cpp b/lib/PhasarLLVM/Pointer/FilteredLLVMAliasSet.cpp index f4839a07e..19bb489ba 100644 --- a/lib/PhasarLLVM/Pointer/FilteredLLVMAliasSet.cpp +++ b/lib/PhasarLLVM/Pointer/FilteredLLVMAliasSet.cpp @@ -112,11 +112,13 @@ static void fillAliasSet(FilteredLLVMAliasSet::AliasSetTy &Set, } FilteredLLVMAliasSet::FilteredLLVMAliasSet(LLVMAliasSet *AS) noexcept - : AS(AS) {} + : AS(AS), Owner(&AS->MRes) {} FilteredLLVMAliasSet::FilteredLLVMAliasSet( MaybeUniquePtr AS) noexcept - : AS(std::move(AS)) {} + : AS(std::move(AS)), Owner(&this->AS->MRes) {} + +FilteredLLVMAliasSet::~FilteredLLVMAliasSet() = default; AliasAnalysisType FilteredLLVMAliasSet::getAliasAnalysisType() const noexcept { return AS->getAliasAnalysisType(); diff --git a/unittests/PhasarLLVM/Pointer/CMakeLists.txt b/unittests/PhasarLLVM/Pointer/CMakeLists.txt index a8745bbbc..8cf7152ad 100644 --- a/unittests/PhasarLLVM/Pointer/CMakeLists.txt +++ b/unittests/PhasarLLVM/Pointer/CMakeLists.txt @@ -1,6 +1,7 @@ set(ControlFlowSources LLVMAliasSetTest.cpp LLVMAliasSetSerializationTest.cpp + FilteredLLVMAliasSetTest.cpp ) foreach(TEST_SRC ${ControlFlowSources}) diff --git a/unittests/PhasarLLVM/Pointer/FilteredLLVMAliasSetTest.cpp b/unittests/PhasarLLVM/Pointer/FilteredLLVMAliasSetTest.cpp new file mode 100644 index 000000000..ccfb76b60 --- /dev/null +++ b/unittests/PhasarLLVM/Pointer/FilteredLLVMAliasSetTest.cpp @@ -0,0 +1,138 @@ +#include "phasar/PhasarLLVM/Pointer/FilteredLLVMAliasSet.h" + +#include "phasar/ControlFlow/CallGraphAnalysisType.h" +#include "phasar/DataFlow/IfdsIde/Solver/IFDSSolver.h" +#include "phasar/PhasarLLVM/ControlFlow/LLVMBasedICFG.h" +#include "phasar/PhasarLLVM/DB/LLVMProjectIRDB.h" +#include "phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IFDSTaintAnalysis.h" +#include "phasar/PhasarLLVM/Pointer/LLVMAliasSet.h" +#include "phasar/PhasarLLVM/TaintConfig/LLVMTaintConfig.h" +#include "phasar/PhasarLLVM/Utils/LLVMShorthands.h" + +#include "llvm/ADT/StringRef.h" +#include "llvm/IR/InstrTypes.h" + +#include "TestConfig.h" +#include "gtest/gtest.h" + +using namespace psr; + +namespace { + +static LLVMTaintConfig getDefaultConfig() { + auto SourceCB = [](const llvm::Instruction *Inst) { + std::set Ret; + if (const auto *Call = llvm::dyn_cast(Inst); + Call && Call->getCalledFunction() && + Call->getCalledFunction()->getName() == "_Z6sourcev") { + Ret.insert(Call); + } + return Ret; + }; + auto SinkCB = [](const llvm::Instruction *Inst) { + std::set Ret; + if (const auto *Call = llvm::dyn_cast(Inst); + Call && Call->getCalledFunction() && + Call->getCalledFunction()->getName() == "_Z4sinki") { + assert(Call->arg_size() > 0); + Ret.insert(Call->getArgOperand(0)); + } + return Ret; + }; + return LLVMTaintConfig(std::move(SourceCB), std::move(SinkCB)); +} + +static LLVMTaintConfig getDoubleFreeConfig() { + auto SourceCB = [](const llvm::Instruction *Inst) { + std::set Ret; + if (const auto *Call = llvm::dyn_cast(Inst); + Call && Call->getCalledFunction() && + Call->getCalledFunction()->getName() == "free") { + Ret.insert(Call->getArgOperand(0)); + } + return Ret; + }; + + return LLVMTaintConfig(SourceCB, SourceCB); +} + +class TaintAnalysis : public ::testing::TestWithParam { +protected: + static constexpr auto PathToLlFiles = + PHASAR_BUILD_SUBFOLDER("taint_analysis/"); + const std::vector EntryPoints = {"main"}; + +}; // Test Fixture + +TEST_P(TaintAnalysis, LeaksWithAndWithoutAliasFilteringEqual) { + + LLVMProjectIRDB IRDB(PathToLlFiles + GetParam()); + LLVMAliasSet AS(&IRDB, false); + FilteredLLVMAliasSet FAS(&AS); + LLVMBasedICFG ICF(&IRDB, CallGraphAnalysisType::OTF, EntryPoints, nullptr, + &AS); + + auto TSF = llvm::StringRef(GetParam()).startswith("double_free") + ? getDoubleFreeConfig() + : getDefaultConfig(); + + IFDSTaintAnalysis TaintProblem(&IRDB, &AS, &TSF, EntryPoints); + IFDSTaintAnalysis FilterTaintProblem(&IRDB, &FAS, &TSF, EntryPoints); + + solveIFDSProblem(TaintProblem, ICF).dumpResults(ICF); + solveIFDSProblem(FilterTaintProblem, ICF).dumpResults(ICF); + + EXPECT_EQ(TaintProblem.Leaks.size(), FilterTaintProblem.Leaks.size()); + + for (const auto &[LeakInst, LeakFacts] : TaintProblem.Leaks) { + const auto It = FilterTaintProblem.Leaks.find(LeakInst); + + EXPECT_NE(It, FilterTaintProblem.Leaks.end()) + << "Expected to find leak at " + llvmIRToString(LeakInst); + + if (It == FilterTaintProblem.Leaks.end()) { + continue; + } + + for (const auto *LeakFact : LeakFacts) { + EXPECT_TRUE(It->second.count(LeakFact)) + << "Expected to find leak-fact " + llvmIRToShortString(LeakFact) + + " at " + llvmIRToString(LeakInst); + } + } +} + +static constexpr std::string_view TaintTestFiles[] = { + // -- dummy-source-sink + "dummy_source_sink/taint_01_cpp_dbg.ll", + "dummy_source_sink/taint_01_cpp_m2r_dbg.ll", + "dummy_source_sink/taint_02_cpp_dbg.ll", + "dummy_source_sink/taint_03_cpp_dbg.ll", + "dummy_source_sink/taint_04_cpp_dbg.ll", + "dummy_source_sink/taint_05_cpp_dbg.ll", + "dummy_source_sink/taint_06_cpp_m2r_dbg.ll", + "dummy_source_sink/taint_exception_01_cpp_dbg.ll", + "dummy_source_sink/taint_exception_01_cpp_m2r_dbg.ll", + "dummy_source_sink/taint_exception_02_cpp_dbg.ll", + "dummy_source_sink/taint_exception_03_cpp_dbg.ll", + "dummy_source_sink/taint_exception_04_cpp_dbg.ll", + "dummy_source_sink/taint_exception_05_cpp_dbg.ll", + "dummy_source_sink/taint_exception_06_cpp_dbg.ll", + "dummy_source_sink/taint_exception_07_cpp_dbg.ll", + "dummy_source_sink/taint_exception_08_cpp_dbg.ll", + "dummy_source_sink/taint_exception_09_cpp_dbg.ll", + "dummy_source_sink/taint_exception_10_cpp_dbg.ll", + // -- double-free + "double_free_01_c.ll", + "double_free_02_c.ll", +}; + +INSTANTIATE_TEST_SUITE_P(InteractiveIDESolverTest, TaintAnalysis, + ::testing::ValuesIn(TaintTestFiles)); + +} // namespace + +int main(int Argc, char **Argv) { + ::testing::InitGoogleTest(&Argc, Argv); + return RUN_ALL_TESTS(); +} From 4e107926b9b033e21cfc371f14c7bccca7eea78c Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Sun, 12 May 2024 13:05:27 +0200 Subject: [PATCH 3/7] apply review comments from vulder --- .../PhasarLLVM/Pointer/FilteredLLVMAliasSet.h | 14 +++++++------- lib/PhasarLLVM/Pointer/FilteredLLVMAliasSet.cpp | 10 +++++----- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/include/phasar/PhasarLLVM/Pointer/FilteredLLVMAliasSet.h b/include/phasar/PhasarLLVM/Pointer/FilteredLLVMAliasSet.h index 83d63de8d..a157aabfc 100644 --- a/include/phasar/PhasarLLVM/Pointer/FilteredLLVMAliasSet.h +++ b/include/phasar/PhasarLLVM/Pointer/FilteredLLVMAliasSet.h @@ -41,12 +41,12 @@ struct AliasInfoTraits class FilteredLLVMAliasSet { public: - using traits_t = AliasInfoTraits; - using n_t = traits_t::n_t; - using v_t = traits_t::v_t; - using AliasSetTy = traits_t::AliasSetTy; - using AliasSetPtrTy = traits_t::AliasSetPtrTy; - using AllocationSiteSetPtrTy = traits_t::AllocationSiteSetPtrTy; + using alias_traits_t = AliasInfoTraits; + using n_t = alias_traits_t::n_t; + using v_t = alias_traits_t::v_t; + using AliasSetTy = alias_traits_t::AliasSetTy; + using AliasSetPtrTy = alias_traits_t::AliasSetPtrTy; + using AllocationSiteSetPtrTy = alias_traits_t::AllocationSiteSetPtrTy; FilteredLLVMAliasSet(LLVMAliasSet *AS) noexcept; @@ -114,7 +114,7 @@ class FilteredLLVMAliasSet { private: FilteredLLVMAliasSet(MaybeUniquePtr AS) noexcept; - MaybeUniquePtr AS; + MaybeUniquePtr AS; AliasSetOwner Owner; llvm::DenseMap, AliasSetPtrTy> AliasSetMap; diff --git a/lib/PhasarLLVM/Pointer/FilteredLLVMAliasSet.cpp b/lib/PhasarLLVM/Pointer/FilteredLLVMAliasSet.cpp index 19bb489ba..49b5b91fe 100644 --- a/lib/PhasarLLVM/Pointer/FilteredLLVMAliasSet.cpp +++ b/lib/PhasarLLVM/Pointer/FilteredLLVMAliasSet.cpp @@ -25,7 +25,8 @@ static const llvm::Function *getFunction(const llvm::Value *V) { return nullptr; } -[[nodiscard]] static bool isConstantGlob(const llvm::GlobalValue *GlobV) { +[[nodiscard]] static bool +isConstantGlobalValue(const llvm::GlobalValue *GlobV) { if (const auto *Glob = llvm::dyn_cast(GlobV)) { return Glob->isConstant(); } @@ -60,14 +61,14 @@ static bool mustNoalias(const llvm::Value *p1, const llvm::Value *p2) { !Alloca2->getAllocatedType()->isPointerTy(); } } else if (const auto *Glob1 = llvm::dyn_cast(p1)) { - if (llvm::isa(p2) || isConstantGlob(Glob1)) { + if (llvm::isa(p2) || isConstantGlobalValue(Glob1)) { return true; } if (const auto *Glob2 = llvm::dyn_cast(p2)) { return true; // approximation } } else if (const auto *Glob2 = llvm::dyn_cast(p2)) { - return isConstantGlob(Glob2); + return isConstantGlobalValue(Glob2); } return false; @@ -95,8 +96,7 @@ static void fillAliasSet(FilteredLLVMAliasSet::AliasSetTy &Set, continue; } - if (llvm::isa(Alias) || - llvm::isa(Alias)) { + if (llvm::isa(Alias)) { // Assume: Compile-time constants are not generated as data-flow facts! continue; } From e1e8fb53482267200533505d2bfa4c205ed2935c Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Sun, 12 May 2024 14:57:59 +0200 Subject: [PATCH 4/7] Add caching for reachable allocation sites --- .../PhasarLLVM/Pointer/FilteredLLVMAliasSet.h | 32 ++++++++- include/phasar/Pointer/AliasInfoTraits.h | 5 +- .../Pointer/FilteredLLVMAliasSet.cpp | 67 ++++++++++++------- 3 files changed, 77 insertions(+), 27 deletions(-) diff --git a/include/phasar/PhasarLLVM/Pointer/FilteredLLVMAliasSet.h b/include/phasar/PhasarLLVM/Pointer/FilteredLLVMAliasSet.h index a157aabfc..27369bd85 100644 --- a/include/phasar/PhasarLLVM/Pointer/FilteredLLVMAliasSet.h +++ b/include/phasar/PhasarLLVM/Pointer/FilteredLLVMAliasSet.h @@ -17,6 +17,11 @@ #include "phasar/Utils/AnalysisProperties.h" #include "phasar/Utils/MaybeUniquePtr.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseMapInfo.h" +#include "llvm/ADT/Hashing.h" +#include "llvm/ADT/PointerIntPair.h" +#include "llvm/IR/Function.h" #include "llvm/Support/ErrorHandling.h" #include "nlohmann/json_fwd.hpp" @@ -27,7 +32,6 @@ namespace llvm { class Value; class Instruction; -class Function; } // namespace llvm namespace psr { @@ -112,12 +116,38 @@ class FilteredLLVMAliasSet { } private: + struct ReachableAllocationSitesKey { + llvm::PointerIntPair FunAndIntraProcOnly; + v_t Value{}; + }; + + struct ReachableAllocationSitesKeyDMI { + inline static ReachableAllocationSitesKey getEmptyKey() noexcept { + return {{}, llvm::DenseMapInfo::getEmptyKey()}; + } + inline static ReachableAllocationSitesKey getTombstoneKey() noexcept { + return {{}, llvm::DenseMapInfo::getTombstoneKey()}; + } + inline static auto getHashValue(ReachableAllocationSitesKey Key) noexcept { + return llvm::hash_combine(Key.FunAndIntraProcOnly.getOpaqueValue(), + Key.Value); + } + inline static bool isEqual(ReachableAllocationSitesKey Key1, + ReachableAllocationSitesKey Key2) noexcept { + return Key1.FunAndIntraProcOnly == Key2.FunAndIntraProcOnly && + Key1.Value == Key2.Value; + } + }; + FilteredLLVMAliasSet(MaybeUniquePtr AS) noexcept; MaybeUniquePtr AS; AliasSetOwner Owner; llvm::DenseMap, AliasSetPtrTy> AliasSetMap; + llvm::DenseMap, + ReachableAllocationSitesKeyDMI> + ReachableAllocationSitesMap; }; } // namespace psr diff --git a/include/phasar/Pointer/AliasInfoTraits.h b/include/phasar/Pointer/AliasInfoTraits.h index 4892db434..716b31744 100644 --- a/include/phasar/Pointer/AliasInfoTraits.h +++ b/include/phasar/Pointer/AliasInfoTraits.h @@ -11,11 +11,10 @@ #define PHASAR_POINTER_ALIASINFOTRAITS_H #include "phasar/Utils/BoxedPointer.h" +#include "phasar/Utils/MaybeUniquePtr.h" #include "llvm/ADT/DenseSet.h" -#include - namespace psr { template struct AliasInfoTraits { @@ -31,7 +30,7 @@ template struct DefaultAATraits { using v_t = V; using AliasSetTy = llvm::DenseSet; using AliasSetPtrTy = BoxedConstPtr; - using AllocationSiteSetPtrTy = std::unique_ptr; + using AllocationSiteSetPtrTy = MaybeUniquePtr; }; } // namespace psr diff --git a/lib/PhasarLLVM/Pointer/FilteredLLVMAliasSet.cpp b/lib/PhasarLLVM/Pointer/FilteredLLVMAliasSet.cpp index 49b5b91fe..1eac91310 100644 --- a/lib/PhasarLLVM/Pointer/FilteredLLVMAliasSet.cpp +++ b/lib/PhasarLLVM/Pointer/FilteredLLVMAliasSet.cpp @@ -5,12 +5,15 @@ #include "phasar/PhasarLLVM/Pointer/LLVMPointsToUtils.h" #include "phasar/PhasarLLVM/Utils/LLVMShorthands.h" #include "phasar/Pointer/AliasResult.h" +#include "phasar/Utils/DefaultValue.h" #include "phasar/Utils/NlohmannLogging.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/IR/Instructions.h" #include "nlohmann/json_fwd.hpp" +#include #include using namespace psr; @@ -74,11 +77,12 @@ static bool mustNoalias(const llvm::Value *p1, const llvm::Value *p2) { return false; } -static void fillAliasSet(FilteredLLVMAliasSet::AliasSetTy &Set, - LLVMAliasSet::AliasSetPtrTy AS, const llvm::Value *V, - const llvm::Function *Fun) { +template +static void foreachValidAliasIn(LLVMAliasSet::AliasSetPtrTy AS, + const llvm::Value *V, const llvm::Function *Fun, + WithAliasFn &&WithAlias) { if (!Fun) { - Set.insert(AS->begin(), AS->end()); + llvm::for_each(*AS, WithAlias); return; } @@ -92,7 +96,7 @@ static void fillAliasSet(FilteredLLVMAliasSet::AliasSetTy &Set, } if (V == Alias) { - Set.insert(Alias); + std::invoke(WithAlias, Alias); continue; } @@ -107,7 +111,7 @@ static void fillAliasSet(FilteredLLVMAliasSet::AliasSetTy &Set, continue; } - Set.insert(Alias); + std::invoke(WithAlias, Alias); } } @@ -144,10 +148,15 @@ AliasResult FilteredLLVMAliasSet::alias(const llvm::Value *V1, auto FilteredLLVMAliasSet::getAliasSet(const llvm::Value *V, const llvm::Function *Fun) -> AliasSetPtrTy { + if (!isInterestingPointer(V)) { + return AS->getEmptyAliasSet(); + } + auto &Entry = AliasSetMap[{Fun, V}]; if (!Entry) { auto Set = Owner.acquire(); - fillAliasSet(*Set, AS->getAliasSet(V), V, Fun); + foreachValidAliasIn(AS->getAliasSet(V), V, Fun, + [&Set](v_t Alias) { Set->insert(Alias); }); Entry = Set; } return Entry; @@ -163,36 +172,48 @@ auto FilteredLLVMAliasSet::getAliasSet(const llvm::Value *V, auto FilteredLLVMAliasSet::getReachableAllocationSites( const llvm::Value *V, bool IntraProcOnly, const llvm::Instruction *I) -> AllocationSiteSetPtrTy { - auto AllocSites = std::make_unique(); // if V is not a (interesting) pointer we can return an empty set if (!isInterestingPointer(V)) { - return AllocSites; + return &getDefaultValue(); } - const auto PTS = getAliasSet(V, I); + const auto *Fun = I->getFunction(); + auto &AllocSites = ReachableAllocationSitesMap[ReachableAllocationSitesKey{ + {Fun, IntraProcOnly}, V}]; + if (AllocSites) { + return AllocSites.get(); + } + + AllocSites = std::make_unique(); + // consider the full inter-procedural points-to/alias information if (!IntraProcOnly) { - for (const auto *P : *PTS) { - if (AS->interIsReachableAllocationSiteTy(V, P)) { - AllocSites->insert(P); - } - } + foreachValidAliasIn(AS->getAliasSet(V), V, Fun, + [Set = AllocSites.get(), AS = AS.get(), V](v_t Alias) { + if (AS->interIsReachableAllocationSiteTy(V, Alias)) { + Set->insert(Alias); + } + }); + } else { // consider the function-local, i.e. intra-procedural, points-to/alias // information only - const auto *VFun = getFunction(V); - const auto *VG = llvm::dyn_cast(V); + // We may not be able to retrieve a function for the given value since some // pointer values can exist outside functions, for instance, in case of // vtables, etc. - for (const auto *P : *PTS) { - if (AS->intraIsReachableAllocationSiteTy(V, P, VFun, VG)) { - AllocSites->insert(P); - } - } + const auto *VFun = getFunction(V); + const auto *VG = llvm::dyn_cast(V); + foreachValidAliasIn( + AS->getAliasSet(V), V, Fun, + [Set = AllocSites.get(), AS = AS.get(), V, VFun, VG](v_t Alias) { + if (AS->intraIsReachableAllocationSiteTy(V, Alias, VFun, VG)) { + Set->insert(Alias); + } + }); } - return AllocSites; + return AllocSites.get(); } // Checks if PotentialValue is in the reachable allocation sites of V. From 5ef333183ce3a397d4df1a6e056cdd13c28fa8b6 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Sun, 12 May 2024 15:50:52 +0200 Subject: [PATCH 5/7] Bugfix --- include/phasar/Utils/MaybeUniquePtr.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/phasar/Utils/MaybeUniquePtr.h b/include/phasar/Utils/MaybeUniquePtr.h index d770de1f3..4e50f2304 100644 --- a/include/phasar/Utils/MaybeUniquePtr.h +++ b/include/phasar/Utils/MaybeUniquePtr.h @@ -81,7 +81,7 @@ class [[clang::trivial_abi]] MaybeUniquePtr constexpr MaybeUniquePtr(MaybeUniquePtr &&Other) noexcept : detail::MaybeUniquePtrBase(std::move(Other)) { - Data = {}; + Other.Data = {}; } constexpr void swap(MaybeUniquePtr &Other) noexcept { From f027b398ae03a43f59a8bf0950b3283951f1c729 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Mon, 13 May 2024 20:23:02 +0200 Subject: [PATCH 6/7] adding noexcept --- include/phasar/PhasarLLVM/Pointer/FilteredLLVMAliasSet.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/phasar/PhasarLLVM/Pointer/FilteredLLVMAliasSet.h b/include/phasar/PhasarLLVM/Pointer/FilteredLLVMAliasSet.h index 27369bd85..91599637c 100644 --- a/include/phasar/PhasarLLVM/Pointer/FilteredLLVMAliasSet.h +++ b/include/phasar/PhasarLLVM/Pointer/FilteredLLVMAliasSet.h @@ -56,7 +56,7 @@ class FilteredLLVMAliasSet { FilteredLLVMAliasSet(const FilteredLLVMAliasSet &) = delete; FilteredLLVMAliasSet &operator=(const FilteredLLVMAliasSet &) = delete; - FilteredLLVMAliasSet &operator=(FilteredLLVMAliasSet &&) = delete; + FilteredLLVMAliasSet &operator=(FilteredLLVMAliasSet &&) noexcept = delete; FilteredLLVMAliasSet(FilteredLLVMAliasSet &&) noexcept = default; From 90007609ec97287cafcfbcc92a331a8ce6dbf672 Mon Sep 17 00:00:00 2001 From: Fabian Schiebel Date: Sat, 18 May 2024 12:11:31 +0200 Subject: [PATCH 7/7] Rename Context to AliasQueryInst in IFDSTaintAnalysis --- .../IfdsIde/Problems/IFDSTaintAnalysis.h | 4 ++-- .../IfdsIde/Problems/IFDSTaintAnalysis.cpp | 21 ++++++++++--------- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IFDSTaintAnalysis.h b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IFDSTaintAnalysis.h index 6c58a032d..90a730f0a 100644 --- a/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IFDSTaintAnalysis.h +++ b/include/phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IFDSTaintAnalysis.h @@ -95,9 +95,9 @@ class IFDSTaintAnalysis const llvm::Function *Callee) const; void populateWithMayAliases(container_type &Facts, - const llvm::Instruction *Context) const; + const llvm::Instruction *AliasQueryInst) const; void populateWithMustAliases(container_type &Facts, - const llvm::Instruction *Context) const; + const llvm::Instruction *AliasQueryInst) const; }; } // namespace psr diff --git a/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IFDSTaintAnalysis.cpp b/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IFDSTaintAnalysis.cpp index ccaa742fa..9fdf05ece 100644 --- a/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IFDSTaintAnalysis.cpp +++ b/lib/PhasarLLVM/DataFlow/IfdsIde/Problems/IFDSTaintAnalysis.cpp @@ -117,16 +117,17 @@ bool IFDSTaintAnalysis::isSanitizerCall(const llvm::CallBase * /*CB*/, [this](const auto &Arg) { return Config->isSanitizer(&Arg); }); } -static bool canSkipAtContext(const llvm::Value *Val, - const llvm::Instruction *Context) noexcept { +static bool +canSkipAtQueryInst(const llvm::Value *Val, + const llvm::Instruction *AliasQueryInst) noexcept { if (const auto *Inst = llvm::dyn_cast(Val)) { /// Mapping instructions between functions is done via the call-FF and /// ret-FF - if (Inst->getFunction() != Context->getFunction()) { + if (Inst->getFunction() != AliasQueryInst->getFunction()) { return true; } - if (Inst->getParent() == Context->getParent() && - Context->comesBefore(Inst)) { + if (Inst->getParent() == AliasQueryInst->getParent() && + AliasQueryInst->comesBefore(Inst)) { // We will see that inst later return true; } @@ -135,7 +136,7 @@ static bool canSkipAtContext(const llvm::Value *Val, if (const auto *Arg = llvm::dyn_cast(Val)) { // An argument is only valid in the function it belongs to - if (Arg->getParent() != Context->getFunction()) { + if (Arg->getParent() != AliasQueryInst->getFunction()) { return true; } } @@ -152,12 +153,12 @@ static bool isCompiletimeConstantData(const llvm::Value *Val) noexcept { } void IFDSTaintAnalysis::populateWithMayAliases( - container_type &Facts, const llvm::Instruction *Context) const { + container_type &Facts, const llvm::Instruction *AliasQueryInst) const { container_type Tmp = Facts; for (const auto *Fact : Facts) { - auto Aliases = PT.getAliasSet(Fact, Context); + auto Aliases = PT.getAliasSet(Fact, AliasQueryInst); for (const auto *Alias : *Aliases) { - if (canSkipAtContext(Alias, Context)) { + if (canSkipAtQueryInst(Alias, AliasQueryInst)) { continue; } @@ -179,7 +180,7 @@ void IFDSTaintAnalysis::populateWithMayAliases( } void IFDSTaintAnalysis::populateWithMustAliases( - container_type &Facts, const llvm::Instruction *Context) const { + container_type &Facts, const llvm::Instruction *AliasQueryInst) const { /// TODO: Find must-aliases; Currently the AliasSet only contains /// may-aliases }