From 52bde9dc86cbfe37d6ab7a99ca6a80d7e7c14692 Mon Sep 17 00:00:00 2001 From: Valeriy Savchenko Date: Fri, 19 Mar 2021 17:00:00 +0300 Subject: [PATCH 1/8] [analyzer][solver] Fix infeasible constraints (PR49642) Additionally, this patch puts an assertion checking for feasible constraints in every place where constraints are assigned to states. Differential Revision: https://reviews.llvm.org/D98948 (cherry picked from commit 3085bda2b348f6a8b4e0bd1d230af4e9c900c9c4) --- .../Core/RangeConstraintManager.cpp | 59 ++++++++++++------- clang/test/Analysis/PR49642.c | 24 ++++++++ 2 files changed, 61 insertions(+), 22 deletions(-) create mode 100644 clang/test/Analysis/PR49642.c diff --git a/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp b/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp index 95e61f9c8c61cb..6ae80b3ae773e3 100644 --- a/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp +++ b/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp @@ -448,12 +448,12 @@ class EquivalenceClass : public llvm::FoldingSetNode { EquivalenceClass Other); /// Return a set of class members for the given state. - LLVM_NODISCARD inline SymbolSet getClassMembers(ProgramStateRef State); + LLVM_NODISCARD inline SymbolSet getClassMembers(ProgramStateRef State) const; /// Return true if the current class is trivial in the given state. - LLVM_NODISCARD inline bool isTrivial(ProgramStateRef State); + LLVM_NODISCARD inline bool isTrivial(ProgramStateRef State) const; /// Return true if the current class is trivial and its only member is dead. LLVM_NODISCARD inline bool isTriviallyDead(ProgramStateRef State, - SymbolReaper &Reaper); + SymbolReaper &Reaper) const; LLVM_NODISCARD static inline ProgramStateRef markDisequal(BasicValueFactory &BV, RangeSet::Factory &F, @@ -521,7 +521,7 @@ class EquivalenceClass : public llvm::FoldingSetNode { ProgramStateRef State, SymbolSet Members, EquivalenceClass Other, SymbolSet OtherMembers); - static inline void + static inline bool addToDisequalityInfo(DisequalityMapTy &Info, ConstraintRangeTy &Constraints, BasicValueFactory &BV, RangeSet::Factory &F, ProgramStateRef State, EquivalenceClass First, @@ -535,6 +535,15 @@ class EquivalenceClass : public llvm::FoldingSetNode { // Constraint functions //===----------------------------------------------------------------------===// +LLVM_NODISCARD LLVM_ATTRIBUTE_UNUSED bool +areFeasible(ConstraintRangeTy Constraints) { + return llvm::none_of( + Constraints, + [](const std::pair &ClassConstraint) { + return ClassConstraint.second.isEmpty(); + }); +} + LLVM_NODISCARD inline const RangeSet *getConstraint(ProgramStateRef State, EquivalenceClass Class) { return State->get(Class); @@ -1397,15 +1406,6 @@ class RangeConstraintManager : public RangedConstraintManager { return EquivalenceClass::merge(getBasicVals(), F, State, LHS, RHS); } - LLVM_NODISCARD LLVM_ATTRIBUTE_UNUSED static bool - areFeasible(ConstraintRangeTy Constraints) { - return llvm::none_of( - Constraints, - [](const std::pair &ClassConstraint) { - return ClassConstraint.second.isEmpty(); - }); - } - LLVM_NODISCARD ProgramStateRef setConstraint(ProgramStateRef State, EquivalenceClass Class, RangeSet Constraint) { @@ -1428,7 +1428,7 @@ class RangeConstraintManager : public RangedConstraintManager { getRange(State, DisequalClass).Delete(getBasicVals(), F, *Point); // If we end up with at least one of the disequal classes to be - // constrainted with an empty range-set, the state is infeasible. + // constrained with an empty range-set, the state is infeasible. if (UpdatedConstraint.isEmpty()) return nullptr; @@ -1574,6 +1574,9 @@ EquivalenceClass::mergeImpl(BasicValueFactory &ValueFactory, // Assign new constraints for this class. Constraints = CRF.add(Constraints, *this, *NewClassConstraint); + assert(areFeasible(Constraints) && "Constraint manager shouldn't produce " + "a state with infeasible constraints"); + State = State->set(Constraints); } @@ -1644,7 +1647,7 @@ EquivalenceClass::getMembersFactory(ProgramStateRef State) { return State->get_context(); } -SymbolSet EquivalenceClass::getClassMembers(ProgramStateRef State) { +SymbolSet EquivalenceClass::getClassMembers(ProgramStateRef State) const { if (const SymbolSet *Members = State->get(*this)) return *Members; @@ -1654,12 +1657,12 @@ SymbolSet EquivalenceClass::getClassMembers(ProgramStateRef State) { return F.add(F.getEmptySet(), getRepresentativeSymbol()); } -bool EquivalenceClass::isTrivial(ProgramStateRef State) { +bool EquivalenceClass::isTrivial(ProgramStateRef State) const { return State->get(*this) == nullptr; } bool EquivalenceClass::isTriviallyDead(ProgramStateRef State, - SymbolReaper &Reaper) { + SymbolReaper &Reaper) const { return isTrivial(State) && Reaper.isDead(getRepresentativeSymbol()); } @@ -1694,10 +1697,14 @@ EquivalenceClass::markDisequal(BasicValueFactory &VF, RangeSet::Factory &RF, // Disequality is a symmetric relation, so if we mark A as disequal to B, // we should also mark B as disequalt to A. - addToDisequalityInfo(DisequalityInfo, Constraints, VF, RF, State, *this, - Other); - addToDisequalityInfo(DisequalityInfo, Constraints, VF, RF, State, Other, - *this); + if (!addToDisequalityInfo(DisequalityInfo, Constraints, VF, RF, State, *this, + Other) || + !addToDisequalityInfo(DisequalityInfo, Constraints, VF, RF, State, Other, + *this)) + return nullptr; + + assert(areFeasible(Constraints) && "Constraint manager shouldn't produce " + "a state with infeasible constraints"); State = State->set(DisequalityInfo); State = State->set(Constraints); @@ -1705,7 +1712,7 @@ EquivalenceClass::markDisequal(BasicValueFactory &VF, RangeSet::Factory &RF, return State; } -inline void EquivalenceClass::addToDisequalityInfo( +inline bool EquivalenceClass::addToDisequalityInfo( DisequalityMapTy &Info, ConstraintRangeTy &Constraints, BasicValueFactory &VF, RangeSet::Factory &RF, ProgramStateRef State, EquivalenceClass First, EquivalenceClass Second) { @@ -1734,8 +1741,16 @@ inline void EquivalenceClass::addToDisequalityInfo( VF, RF, State, First.getRepresentativeSymbol()); FirstConstraint = FirstConstraint.Delete(VF, RF, *Point); + + // If the First class is about to be constrained with an empty + // range-set, the state is infeasible. + if (FirstConstraint.isEmpty()) + return false; + Constraints = CRF.add(Constraints, First, FirstConstraint); } + + return true; } inline Optional EquivalenceClass::areEqual(ProgramStateRef State, diff --git a/clang/test/Analysis/PR49642.c b/clang/test/Analysis/PR49642.c new file mode 100644 index 00000000000000..af691d6afd6fd3 --- /dev/null +++ b/clang/test/Analysis/PR49642.c @@ -0,0 +1,24 @@ +// RUN: %clang_analyze_cc1 -w -verify %s \ +// RUN: -analyzer-checker=core \ +// RUN: -analyzer-checker=apiModeling.StdCLibraryFunctions + +// expected-no-diagnostics + +typedef ssize_t; +b; + +unsigned c; +int write(int, const void *, unsigned long); + +a() { + d(); + while (c > 0) { + b = write(0, d, c); + if (b) + c -= b; + b < 1; + } + if (c && c) { + // ^ no-crash + } +} From 1aede399e40ea758f61f33bc54e132964f700a1c Mon Sep 17 00:00:00 2001 From: Valeriy Savchenko Date: Fri, 24 Jul 2020 14:13:31 +0300 Subject: [PATCH 2/8] [analyzer][solver] Redesign constraint ranges data structure ImmutableSet doesn't seem like the perfect fit for the RangeSet data structure. It is good for saving memory in a persistent setting, but not for the case when the population of the container is tiny. This commit replaces RangeSet implementation and redesigns the most common operations to be more efficient. Differential Revision: https://reviews.llvm.org/D86465 (cherry picked from commit 02b51e5316cd501ede547d0223e0f5416ebd9845) --- .../PathSensitive/RangedConstraintManager.h | 315 +++++++++--- .../Core/RangeConstraintManager.cpp | 473 +++++++++++------- .../Core/RangedConstraintManager.cpp | 1 - .../unittests/StaticAnalyzer/RangeSetTest.cpp | 420 ++++++++++++---- 4 files changed, 857 insertions(+), 352 deletions(-) diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/RangedConstraintManager.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/RangedConstraintManager.h index bc5d5f57cd6874..4a118074463d1b 100644 --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/RangedConstraintManager.h +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/RangedConstraintManager.h @@ -16,6 +16,8 @@ #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h" #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" #include "clang/StaticAnalyzer/Core/PathSensitive/SimpleConstraintManager.h" +#include "llvm/ADT/APSInt.h" +#include "llvm/Support/Allocator.h" namespace clang { @@ -24,21 +26,19 @@ namespace ento { /// A Range represents the closed range [from, to]. The caller must /// guarantee that from <= to. Note that Range is immutable, so as not /// to subvert RangeSet's immutability. -class Range : public std::pair { +class Range { public: - Range(const llvm::APSInt &from, const llvm::APSInt &to) - : std::pair(&from, &to) { - assert(from <= to); + Range(const llvm::APSInt &From, const llvm::APSInt &To) : Impl(&From, &To) { + assert(From <= To); } - Range(const llvm::APSInt &point) - : std::pair(&point, &point) {} + Range(const llvm::APSInt &Point) : Range(Point, Point) {} - bool Includes(const llvm::APSInt &v) const { - return *first <= v && v <= *second; + bool Includes(const llvm::APSInt &Point) const { + return From() <= Point && Point <= To(); } - const llvm::APSInt &From() const { return *first; } - const llvm::APSInt &To() const { return *second; } + const llvm::APSInt &From() const { return *Impl.first; } + const llvm::APSInt &To() const { return *Impl.second; } const llvm::APSInt *getConcreteValue() const { return &From() == &To() ? &From() : nullptr; } @@ -47,93 +47,264 @@ class Range : public std::pair { ID.AddPointer(&From()); ID.AddPointer(&To()); } -}; + void dump(raw_ostream &OS) const; -class RangeTrait : public llvm::ImutContainerInfo { -public: - // When comparing if one Range is less than another, we should compare - // the actual APSInt values instead of their pointers. This keeps the order - // consistent (instead of comparing by pointer values) and can potentially - // be used to speed up some of the operations in RangeSet. - static inline bool isLess(key_type_ref lhs, key_type_ref rhs) { - return *lhs.first < *rhs.first || - (!(*rhs.first < *lhs.first) && *lhs.second < *rhs.second); - } + // In order to keep non-overlapping ranges sorted, we can compare only From + // points. + bool operator<(const Range &RHS) const { return From() < RHS.From(); } + + bool operator==(const Range &RHS) const { return Impl == RHS.Impl; } + bool operator!=(const Range &RHS) const { return !operator==(RHS); } + +private: + std::pair Impl; }; -/// RangeSet contains a set of ranges. If the set is empty, then -/// there the value of a symbol is overly constrained and there are no -/// possible values for that symbol. +/// @class RangeSet is a persistent set of non-overlapping ranges. +/// +/// New RangeSet objects can be ONLY produced by RangeSet::Factory object, which +/// also supports the most common operations performed on range sets. +/// +/// Empty set corresponds to an overly constrained symbol meaning that there +/// are no possible values for that symbol. class RangeSet { - typedef llvm::ImmutableSet PrimRangeSet; - PrimRangeSet ranges; // no need to make const, since it is an - // ImmutableSet - this allows default operator= - // to work. public: - typedef PrimRangeSet::Factory Factory; - typedef PrimRangeSet::iterator iterator; - - RangeSet(PrimRangeSet RS) : ranges(RS) {} - - /// Create a new set with all ranges of this set and RS. - /// Possible intersections are not checked here. - RangeSet addRange(Factory &F, const RangeSet &RS) { - PrimRangeSet Ranges(RS.ranges); - for (const auto &range : ranges) - Ranges = F.add(Ranges, range); - return RangeSet(Ranges); - } - - iterator begin() const { return ranges.begin(); } - iterator end() const { return ranges.end(); } + class Factory; - bool isEmpty() const { return ranges.isEmpty(); } +private: + // We use llvm::SmallVector as the underlying container for the following + // reasons: + // + // * Range sets are usually very simple, 1 or 2 ranges. + // That's why llvm::ImmutableSet is not perfect. + // + // * Ranges in sets are NOT overlapping, so it is natural to keep them + // sorted for efficient operations and queries. For this reason, + // llvm::SmallSet doesn't fit the requirements, it is not sorted when it + // is a vector. + // + // * Range set operations usually a bit harder than add/remove a range. + // Complex operations might do many of those for just one range set. + // Formerly it used to be llvm::ImmutableSet, which is inefficient for our + // purposes as we want to make these operations BOTH immutable AND + // efficient. + // + // * Iteration over ranges is widespread and a more cache-friendly + // structure is preferred. + using ImplType = llvm::SmallVector; + + struct ContainerType : public ImplType, public llvm::FoldingSetNode { + void Profile(llvm::FoldingSetNodeID &ID) const { + for (const Range &It : *this) { + It.Profile(ID); + } + } + }; + // This is a non-owning pointer to an actual container. + // The memory is fully managed by the factory and is alive as long as the + // factory itself is alive. + // It is a pointer as opposed to a reference, so we can easily reassign + // RangeSet objects. + using UnderlyingType = const ContainerType *; + UnderlyingType Impl; - /// Construct a new RangeSet representing '{ [from, to] }'. - RangeSet(Factory &F, const llvm::APSInt &from, const llvm::APSInt &to) - : ranges(F.add(F.getEmptySet(), Range(from, to))) {} +public: + using const_iterator = ImplType::const_iterator; + + const_iterator begin() const { return Impl->begin(); } + const_iterator end() const { return Impl->end(); } + size_t size() const { return Impl->size(); } + + bool isEmpty() const { return Impl->empty(); } + + class Factory { + public: + Factory(BasicValueFactory &BV) : ValueFactory(BV) {} + + /// Create a new set with all ranges from both LHS and RHS. + /// Possible intersections are not checked here. + /// + /// Complexity: O(N + M) + /// where N = size(LHS), M = size(RHS) + RangeSet add(RangeSet LHS, RangeSet RHS); + /// Create a new set with all ranges from the original set plus the new one. + /// Possible intersections are not checked here. + /// + /// Complexity: O(N) + /// where N = size(Original) + RangeSet add(RangeSet Original, Range Element); + /// Create a new set with all ranges from the original set plus the point. + /// Possible intersections are not checked here. + /// + /// Complexity: O(N) + /// where N = size(Original) + RangeSet add(RangeSet Original, const llvm::APSInt &Point); + + RangeSet getEmptySet() { return &EmptySet; } + + /// Create a new set with just one range. + /// @{ + RangeSet getRangeSet(Range Origin); + RangeSet getRangeSet(const llvm::APSInt &From, const llvm::APSInt &To) { + return getRangeSet(Range(From, To)); + } + RangeSet getRangeSet(const llvm::APSInt &Origin) { + return getRangeSet(Origin, Origin); + } + /// @} + + /// Intersect the given range sets. + /// + /// Complexity: O(N + M) + /// where N = size(LHS), M = size(RHS) + RangeSet intersect(RangeSet LHS, RangeSet RHS); + /// Intersect the given set with the closed range [Lower, Upper]. + /// + /// Unlike the Range type, this range uses modular arithmetic, corresponding + /// to the common treatment of C integer overflow. Thus, if the Lower bound + /// is greater than the Upper bound, the range is taken to wrap around. This + /// is equivalent to taking the intersection with the two ranges [Min, + /// Upper] and [Lower, Max], or, alternatively, /removing/ all integers + /// between Upper and Lower. + /// + /// Complexity: O(N) + /// where N = size(What) + RangeSet intersect(RangeSet What, llvm::APSInt Lower, llvm::APSInt Upper); + /// Intersect the given range with the given point. + /// + /// The result can be either an empty set or a set containing the given + /// point depending on whether the point is in the range set. + /// + /// Complexity: O(logN) + /// where N = size(What) + RangeSet intersect(RangeSet What, llvm::APSInt Point); + + /// Delete the given point from the range set. + /// + /// Complexity: O(N) + /// where N = size(From) + RangeSet deletePoint(RangeSet From, const llvm::APSInt &Point); + /// Negate the given range set. + /// + /// Turn all [A, B] ranges to [-B, -A], when "-" is a C-like unary minus + /// operation under the values of the type. + /// + /// We also handle MIN because applying unary minus to MIN does not change + /// it. + /// Example 1: + /// char x = -128; // -128 is a MIN value in a range of 'char' + /// char y = -x; // y: -128 + /// + /// Example 2: + /// unsigned char x = 0; // 0 is a MIN value in a range of 'unsigned char' + /// unsigned char y = -x; // y: 0 + /// + /// And it makes us to separate the range + /// like [MIN, N] to [MIN, MIN] U [-N, MAX]. + /// For instance, whole range is {-128..127} and subrange is [-128,-126], + /// thus [-128,-127,-126,...] negates to [-128,...,126,127]. + /// + /// Negate restores disrupted ranges on bounds, + /// e.g. [MIN, B] => [MIN, MIN] U [-B, MAX] => [MIN, B]. + /// + /// Negate is a self-inverse function, i.e. negate(negate(R)) == R. + /// + /// Complexity: O(N) + /// where N = size(What) + RangeSet negate(RangeSet What); + + private: + /// Return a persistent version of the given container. + RangeSet makePersistent(ContainerType &&From); + /// Construct a new persistent version of the given container. + ContainerType *construct(ContainerType &&From); + + RangeSet intersect(const ContainerType &LHS, const ContainerType &RHS); + + // Many operations include producing new APSInt values and that's why + // we need this factory. + BasicValueFactory &ValueFactory; + // Allocator for all the created containers. + // Containers might own their own memory and that's why it is specific + // for the type, so it calls container destructors upon deletion. + llvm::SpecificBumpPtrAllocator Arena; + // Usually we deal with the same ranges and range sets over and over. + // Here we track all created containers and try not to repeat ourselves. + llvm::FoldingSet Cache; + static ContainerType EmptySet; + }; + + RangeSet(const RangeSet &) = default; + RangeSet &operator=(const RangeSet &) = default; + RangeSet(RangeSet &&) = default; + RangeSet &operator=(RangeSet &&) = default; + ~RangeSet() = default; + + /// Construct a new RangeSet representing '{ [From, To] }'. + RangeSet(Factory &F, const llvm::APSInt &From, const llvm::APSInt &To) + : RangeSet(F.getRangeSet(From, To)) {} /// Construct a new RangeSet representing the given point as a range. - RangeSet(Factory &F, const llvm::APSInt &point) : RangeSet(F, point, point) {} + RangeSet(Factory &F, const llvm::APSInt &Point) + : RangeSet(F.getRangeSet(Point)) {} + + static void Profile(llvm::FoldingSetNodeID &ID, const RangeSet &RS) { + ID.AddPointer(RS.Impl); + } /// Profile - Generates a hash profile of this RangeSet for use /// by FoldingSet. - void Profile(llvm::FoldingSetNodeID &ID) const { ranges.Profile(ID); } + void Profile(llvm::FoldingSetNodeID &ID) const { Profile(ID, *this); } /// getConcreteValue - If a symbol is contrained to equal a specific integer /// constant then this method returns that value. Otherwise, it returns /// NULL. const llvm::APSInt *getConcreteValue() const { - return ranges.isSingleton() ? ranges.begin()->getConcreteValue() : nullptr; + return Impl->size() == 1 ? begin()->getConcreteValue() : nullptr; } - /// Get a minimal value covered by the ranges in the set + /// Get the minimal value covered by the ranges in the set. + /// + /// Complexity: O(1) const llvm::APSInt &getMinValue() const; - /// Get a maximal value covered by the ranges in the set + /// Get the maximal value covered by the ranges in the set. + /// + /// Complexity: O(1) const llvm::APSInt &getMaxValue() const; -private: - void IntersectInRange(BasicValueFactory &BV, Factory &F, - const llvm::APSInt &Lower, const llvm::APSInt &Upper, - PrimRangeSet &newRanges, PrimRangeSet::iterator &i, - PrimRangeSet::iterator &e) const; + /// Test whether the given point is contained by any of the ranges. + /// + /// Complexity: O(logN) + /// where N = size(this) + bool contains(llvm::APSInt Point) const { return containsImpl(Point); } + + void dump(raw_ostream &OS) const; + + bool operator==(const RangeSet &Other) const { return *Impl == *Other.Impl; } + bool operator!=(const RangeSet &Other) const { return !(*this == Other); } +private: + /* implicit */ RangeSet(ContainerType *RawContainer) : Impl(RawContainer) {} + /* implicit */ RangeSet(UnderlyingType Ptr) : Impl(Ptr) {} + + /// Pin given points to the type represented by the current range set. + /// + /// This makes parameter points to be in-out parameters. + /// In order to maintain consistent types across all of the ranges in the set + /// and to keep all the operations to compare ONLY points of the same type, we + /// need to pin every point before any operation. + /// + /// @Returns true if the given points can be converted to the target type + /// without changing the values (i.e. trivially) and false otherwise. + /// @{ bool pin(llvm::APSInt &Lower, llvm::APSInt &Upper) const; + bool pin(llvm::APSInt &Point) const; + /// @} -public: - RangeSet Intersect(BasicValueFactory &BV, Factory &F, llvm::APSInt Lower, - llvm::APSInt Upper) const; - RangeSet Intersect(BasicValueFactory &BV, Factory &F, - const RangeSet &Other) const; - RangeSet Negate(BasicValueFactory &BV, Factory &F) const; - RangeSet Delete(BasicValueFactory &BV, Factory &F, - const llvm::APSInt &Point) const; - - void print(raw_ostream &os) const; - - bool operator==(const RangeSet &other) const { - return ranges == other.ranges; - } + // This version of this function modifies its arguments (pins it). + bool containsImpl(llvm::APSInt &Point) const; + + friend class Factory; }; using ConstraintMap = llvm::ImmutableMap; diff --git a/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp b/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp index 6ae80b3ae773e3..974535952d0f0e 100644 --- a/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp +++ b/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp @@ -22,6 +22,8 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/raw_ostream.h" +#include +#include using namespace clang; using namespace ento; @@ -99,47 +101,63 @@ class OperatorRelationsTable { return CmpOpTable[getIndexFromOp(CurrentOP)][CmpOpCount]; } }; + //===----------------------------------------------------------------------===// // RangeSet implementation //===----------------------------------------------------------------------===// -void RangeSet::IntersectInRange(BasicValueFactory &BV, Factory &F, - const llvm::APSInt &Lower, - const llvm::APSInt &Upper, - PrimRangeSet &newRanges, - PrimRangeSet::iterator &i, - PrimRangeSet::iterator &e) const { - // There are six cases for each range R in the set: - // 1. R is entirely before the intersection range. - // 2. R is entirely after the intersection range. - // 3. R contains the entire intersection range. - // 4. R starts before the intersection range and ends in the middle. - // 5. R starts in the middle of the intersection range and ends after it. - // 6. R is entirely contained in the intersection range. - // These correspond to each of the conditions below. - for (/* i = begin(), e = end() */; i != e; ++i) { - if (i->To() < Lower) { - continue; - } - if (i->From() > Upper) { - break; - } +RangeSet::ContainerType RangeSet::Factory::EmptySet{}; - if (i->Includes(Lower)) { - if (i->Includes(Upper)) { - newRanges = - F.add(newRanges, Range(BV.getValue(Lower), BV.getValue(Upper))); - break; - } else - newRanges = F.add(newRanges, Range(BV.getValue(Lower), i->To())); - } else { - if (i->Includes(Upper)) { - newRanges = F.add(newRanges, Range(i->From(), BV.getValue(Upper))); - break; - } else - newRanges = F.add(newRanges, *i); - } +RangeSet RangeSet::Factory::add(RangeSet Original, Range Element) { + ContainerType Result; + Result.reserve(Original.size() + 1); + + const_iterator Lower = llvm::lower_bound(Original, Element); + Result.insert(Result.end(), Original.begin(), Lower); + Result.push_back(Element); + Result.insert(Result.end(), Lower, Original.end()); + + return makePersistent(std::move(Result)); +} + +RangeSet RangeSet::Factory::add(RangeSet Original, const llvm::APSInt &Point) { + return add(Original, Range(Point)); +} + +RangeSet RangeSet::Factory::getRangeSet(Range From) { + ContainerType Result; + Result.push_back(From); + return makePersistent(std::move(Result)); +} + +RangeSet RangeSet::Factory::makePersistent(ContainerType &&From) { + llvm::FoldingSetNodeID ID; + void *InsertPos; + + From.Profile(ID); + ContainerType *Result = Cache.FindNodeOrInsertPos(ID, InsertPos); + + if (!Result) { + // It is cheaper to fully construct the resulting range on stack + // and move it to the freshly allocated buffer if we don't have + // a set like this already. + Result = construct(std::move(From)); + Cache.InsertNode(Result, InsertPos); } + + return Result; +} + +RangeSet::ContainerType *RangeSet::Factory::construct(ContainerType &&From) { + void *Buffer = Arena.Allocate(); + return new (Buffer) ContainerType(std::move(From)); +} + +RangeSet RangeSet::Factory::add(RangeSet LHS, RangeSet RHS) { + ContainerType Result; + std::merge(LHS.begin(), LHS.end(), RHS.begin(), RHS.end(), + std::back_inserter(Result)); + return makePersistent(std::move(Result)); } const llvm::APSInt &RangeSet::getMinValue() const { @@ -149,22 +167,31 @@ const llvm::APSInt &RangeSet::getMinValue() const { const llvm::APSInt &RangeSet::getMaxValue() const { assert(!isEmpty()); - // NOTE: It's a shame that we can't implement 'getMaxValue' without scanning - // the whole tree to get to the last element. - // llvm::ImmutableSet should support decrement for 'end' iterators - // or reverse order iteration. - auto It = begin(); - for (auto End = end(); std::next(It) != End; ++It) { - } - return It->To(); + return std::prev(end())->To(); } -bool RangeSet::pin(llvm::APSInt &Lower, llvm::APSInt &Upper) const { - if (isEmpty()) { - // This range is already infeasible. +bool RangeSet::containsImpl(llvm::APSInt &Point) const { + if (isEmpty() || !pin(Point)) return false; - } + Range Dummy(Point); + const_iterator It = llvm::upper_bound(*this, Dummy); + if (It == begin()) + return false; + + return std::prev(It)->Includes(Point); +} + +bool RangeSet::pin(llvm::APSInt &Point) const { + APSIntType Type(getMinValue()); + if (Type.testInRange(Point, true) != APSIntType::RTR_Within) + return false; + + Type.apply(Point); + return true; +} + +bool RangeSet::pin(llvm::APSInt &Lower, llvm::APSInt &Upper) const { // This function has nine cases, the cartesian product of range-testing // both the upper and lower bounds against the symbol's type. // Each case requires a different pinning operation. @@ -245,129 +272,216 @@ bool RangeSet::pin(llvm::APSInt &Lower, llvm::APSInt &Upper) const { return true; } -// Returns a set containing the values in the receiving set, intersected with -// the closed range [Lower, Upper]. Unlike the Range type, this range uses -// modular arithmetic, corresponding to the common treatment of C integer -// overflow. Thus, if the Lower bound is greater than the Upper bound, the -// range is taken to wrap around. This is equivalent to taking the -// intersection with the two ranges [Min, Upper] and [Lower, Max], -// or, alternatively, /removing/ all integers between Upper and Lower. -RangeSet RangeSet::Intersect(BasicValueFactory &BV, Factory &F, - llvm::APSInt Lower, llvm::APSInt Upper) const { - PrimRangeSet newRanges = F.getEmptySet(); - - if (isEmpty() || !pin(Lower, Upper)) - return newRanges; - - PrimRangeSet::iterator i = begin(), e = end(); - if (Lower <= Upper) - IntersectInRange(BV, F, Lower, Upper, newRanges, i, e); - else { - // The order of the next two statements is important! - // IntersectInRange() does not reset the iteration state for i and e. - // Therefore, the lower range most be handled first. - IntersectInRange(BV, F, BV.getMinValue(Upper), Upper, newRanges, i, e); - IntersectInRange(BV, F, Lower, BV.getMaxValue(Lower), newRanges, i, e); - } - - return newRanges; -} - -// Returns a set containing the values in the receiving set, intersected with -// the range set passed as parameter. -RangeSet RangeSet::Intersect(BasicValueFactory &BV, Factory &F, - const RangeSet &Other) const { - PrimRangeSet newRanges = F.getEmptySet(); - - for (iterator i = Other.begin(), e = Other.end(); i != e; ++i) { - RangeSet newPiece = Intersect(BV, F, i->From(), i->To()); - for (iterator j = newPiece.begin(), ee = newPiece.end(); j != ee; ++j) { - newRanges = F.add(newRanges, *j); - } +RangeSet RangeSet::Factory::intersect(RangeSet What, llvm::APSInt Lower, + llvm::APSInt Upper) { + if (What.isEmpty() || !What.pin(Lower, Upper)) + return getEmptySet(); + + ContainerType DummyContainer; + + if (Lower <= Upper) { + // [Lower, Upper] is a regular range. + // + // Shortcut: check that there is even a possibility of the intersection + // by checking the two following situations: + // + // <---[ What ]---[------]------> + // Lower Upper + // -or- + // <----[------]----[ What ]----> + // Lower Upper + if (What.getMaxValue() < Lower || Upper < What.getMinValue()) + return getEmptySet(); + + DummyContainer.push_back( + Range(ValueFactory.getValue(Lower), ValueFactory.getValue(Upper))); + } else { + // [Lower, Upper] is an inverted range, i.e. [MIN, Upper] U [Lower, MAX] + // + // Shortcut: check that there is even a possibility of the intersection + // by checking the following situation: + // + // <------]---[ What ]---[------> + // Upper Lower + if (What.getMaxValue() < Lower && Upper < What.getMinValue()) + return getEmptySet(); + + DummyContainer.push_back( + Range(ValueFactory.getMinValue(Upper), ValueFactory.getValue(Upper))); + DummyContainer.push_back( + Range(ValueFactory.getValue(Lower), ValueFactory.getMaxValue(Lower))); } - return newRanges; + return intersect(*What.Impl, DummyContainer); } -// Turn all [A, B] ranges to [-B, -A], when "-" is a C-like unary minus -// operation under the values of the type. -// -// We also handle MIN because applying unary minus to MIN does not change it. -// Example 1: -// char x = -128; // -128 is a MIN value in a range of 'char' -// char y = -x; // y: -128 -// Example 2: -// unsigned char x = 0; // 0 is a MIN value in a range of 'unsigned char' -// unsigned char y = -x; // y: 0 -// -// And it makes us to separate the range -// like [MIN, N] to [MIN, MIN] U [-N,MAX]. -// For instance, whole range is {-128..127} and subrange is [-128,-126], -// thus [-128,-127,-126,.....] negates to [-128,.....,126,127]. -// -// Negate restores disrupted ranges on bounds, -// e.g. [MIN, B] => [MIN, MIN] U [-B, MAX] => [MIN, B]. -RangeSet RangeSet::Negate(BasicValueFactory &BV, Factory &F) const { - PrimRangeSet newRanges = F.getEmptySet(); +RangeSet RangeSet::Factory::intersect(const RangeSet::ContainerType &LHS, + const RangeSet::ContainerType &RHS) { + ContainerType Result; + Result.reserve(std::max(LHS.size(), RHS.size())); + + const_iterator First = LHS.begin(), Second = RHS.begin(), + FirstEnd = LHS.end(), SecondEnd = RHS.end(); + + const auto SwapIterators = [&First, &FirstEnd, &Second, &SecondEnd]() { + std::swap(First, Second); + std::swap(FirstEnd, SecondEnd); + }; + + // If we ran out of ranges in one set, but not in the other, + // it means that those elements are definitely not in the + // intersection. + while (First != FirstEnd && Second != SecondEnd) { + // We want to keep the following invariant at all times: + // + // ----[ First ----------------------> + // --------[ Second -----------------> + if (Second->From() < First->From()) + SwapIterators(); + + // Loop where the invariant holds: + do { + // Check for the following situation: + // + // ----[ First ]---------------------> + // ---------------[ Second ]---------> + // + // which means that... + if (Second->From() > First->To()) { + // ...First is not in the intersection. + // + // We should move on to the next range after First and break out of the + // loop because the invariant might not be true. + ++First; + break; + } + + // We have a guaranteed intersection at this point! + // And this is the current situation: + // + // ----[ First ]-----------------> + // -------[ Second ------------------> + // + // Additionally, it definitely starts with Second->From(). + const llvm::APSInt &IntersectionStart = Second->From(); + + // It is important to know which of the two ranges' ends + // is greater. That "longer" range might have some other + // intersections, while the "shorter" range might not. + if (Second->To() > First->To()) { + // Here we make a decision to keep First as the "longer" + // range. + SwapIterators(); + } + + // At this point, we have the following situation: + // + // ---- First ]--------------------> + // ---- Second ]--[ Second+1 ----------> + // + // We don't know the relationship between First->From and + // Second->From and we don't know whether Second+1 intersects + // with First. + // + // However, we know that [IntersectionStart, Second->To] is + // a part of the intersection... + Result.push_back(Range(IntersectionStart, Second->To())); + ++Second; + // ...and that the invariant will hold for a valid Second+1 + // because First->From <= Second->To < (Second+1)->From. + } while (Second != SecondEnd); + } + + if (Result.empty()) + return getEmptySet(); + + return makePersistent(std::move(Result)); +} + +RangeSet RangeSet::Factory::intersect(RangeSet LHS, RangeSet RHS) { + // Shortcut: let's see if the intersection is even possible. + if (LHS.isEmpty() || RHS.isEmpty() || LHS.getMaxValue() < RHS.getMinValue() || + RHS.getMaxValue() < LHS.getMinValue()) + return getEmptySet(); + + return intersect(*LHS.Impl, *RHS.Impl); +} + +RangeSet RangeSet::Factory::intersect(RangeSet LHS, llvm::APSInt Point) { + if (LHS.containsImpl(Point)) + return getRangeSet(ValueFactory.getValue(Point)); + + return getEmptySet(); +} + +RangeSet RangeSet::Factory::negate(RangeSet What) { + if (What.isEmpty()) + return getEmptySet(); - if (isEmpty()) - return newRanges; + const llvm::APSInt SampleValue = What.getMinValue(); + const llvm::APSInt &MIN = ValueFactory.getMinValue(SampleValue); + const llvm::APSInt &MAX = ValueFactory.getMaxValue(SampleValue); - const llvm::APSInt sampleValue = getMinValue(); - const llvm::APSInt &MIN = BV.getMinValue(sampleValue); - const llvm::APSInt &MAX = BV.getMaxValue(sampleValue); + ContainerType Result; + Result.reserve(What.size() + (SampleValue == MIN)); // Handle a special case for MIN value. - iterator i = begin(); - const llvm::APSInt &from = i->From(); - const llvm::APSInt &to = i->To(); - if (from == MIN) { - // If [from, to] are [MIN, MAX], then just return the same [MIN, MAX]. - if (to == MAX) { - newRanges = ranges; + const_iterator It = What.begin(); + const_iterator End = What.end(); + + const llvm::APSInt &From = It->From(); + const llvm::APSInt &To = It->To(); + + if (From == MIN) { + // If the range [From, To] is [MIN, MAX], then result is also [MIN, MAX]. + if (To == MAX) { + return What; + } + + const_iterator Last = std::prev(End); + + // Try to find and unite the following ranges: + // [MIN, MIN] & [MIN + 1, N] => [MIN, N]. + if (Last->To() == MAX) { + // It means that in the original range we have ranges + // [MIN, A], ... , [B, MAX] + // And the result should be [MIN, -B], ..., [-A, MAX] + Result.emplace_back(MIN, ValueFactory.getValue(-Last->From())); + // We already negated Last, so we can skip it. + End = Last; } else { - // Add separate range for the lowest value. - newRanges = F.add(newRanges, Range(MIN, MIN)); - // Skip adding the second range in case when [from, to] are [MIN, MIN]. - if (to != MIN) { - newRanges = F.add(newRanges, Range(BV.getValue(-to), MAX)); - } + // Add a separate range for the lowest value. + Result.emplace_back(MIN, MIN); } + + // Skip adding the second range in case when [From, To] are [MIN, MIN]. + if (To != MIN) { + Result.emplace_back(ValueFactory.getValue(-To), MAX); + } + // Skip the first range in the loop. - ++i; + ++It; } // Negate all other ranges. - for (iterator e = end(); i != e; ++i) { + for (; It != End; ++It) { // Negate int values. - const llvm::APSInt &newFrom = BV.getValue(-i->To()); - const llvm::APSInt &newTo = BV.getValue(-i->From()); - // Add a negated range. - newRanges = F.add(newRanges, Range(newFrom, newTo)); - } - - if (newRanges.isSingleton()) - return newRanges; + const llvm::APSInt &NewFrom = ValueFactory.getValue(-It->To()); + const llvm::APSInt &NewTo = ValueFactory.getValue(-It->From()); - // Try to find and unite next ranges: - // [MIN, MIN] & [MIN + 1, N] => [MIN, N]. - iterator iter1 = newRanges.begin(); - iterator iter2 = std::next(iter1); - - if (iter1->To() == MIN && (iter2->From() - 1) == MIN) { - const llvm::APSInt &to = iter2->To(); - // remove adjacent ranges - newRanges = F.remove(newRanges, *iter1); - newRanges = F.remove(newRanges, *newRanges.begin()); - // add united range - newRanges = F.add(newRanges, Range(MIN, to)); + // Add a negated range. + Result.emplace_back(NewFrom, NewTo); } - return newRanges; + llvm::sort(Result); + return makePersistent(std::move(Result)); } -RangeSet RangeSet::Delete(BasicValueFactory &BV, Factory &F, - const llvm::APSInt &Point) const { +RangeSet RangeSet::Factory::deletePoint(RangeSet From, + const llvm::APSInt &Point) { + if (!From.contains(Point)) + return From; + llvm::APSInt Upper = Point; llvm::APSInt Lower = Point; @@ -375,22 +489,17 @@ RangeSet RangeSet::Delete(BasicValueFactory &BV, Factory &F, --Lower; // Notice that the lower bound is greater than the upper bound. - return Intersect(BV, F, Upper, Lower); + return intersect(From, Upper, Lower); } -void RangeSet::print(raw_ostream &os) const { - bool isFirst = true; - os << "{ "; - for (iterator i = begin(), e = end(); i != e; ++i) { - if (isFirst) - isFirst = false; - else - os << ", "; +void Range::dump(raw_ostream &OS) const { + OS << '[' << From().toString(10) << ", " << To().toString(10) << ']'; +} - os << '[' << i->From().toString(10) << ", " << i->To().toString(10) - << ']'; - } - os << " }"; +void RangeSet::dump(raw_ostream &OS) const { + OS << "{ "; + llvm::interleaveComma(*this, OS, [&OS](const Range &R) { R.dump(OS); }); + OS << " }"; } REGISTER_SET_FACTORY_WITH_PROGRAMSTATE(SymbolSet, SymbolRef) @@ -662,7 +771,7 @@ LLVM_NODISCARD inline RangeSet intersect(BasicValueFactory &BV, RangeSet Second, RestTy... Tail) { // Here we call either the or version // of the function and can be sure that the result is RangeSet. - return intersect(BV, F, Head.Intersect(BV, F, Second), Tail...); + return intersect(BV, F, F.intersect(Head, Second), Tail...); } template @@ -951,7 +1060,7 @@ class SymbolicRangeInferrer /// Return a range set subtracting zero from \p Domain. RangeSet assumeNonZero(RangeSet Domain, QualType T) { APSIntType IntType = ValueFactory.getAPSIntType(T); - return Domain.Delete(ValueFactory, RangeFactory, IntType.getZeroValue()); + return RangeFactory.deletePoint(Domain, IntType.getZeroValue()); } // FIXME: Once SValBuilder supports unary minus, we should use SValBuilder to @@ -974,7 +1083,7 @@ class SymbolicRangeInferrer SymMgr.getSymSymExpr(SSE->getRHS(), BO_Sub, SSE->getLHS(), T); if (const RangeSet *NegatedRange = getConstraint(State, NegatedSym)) { - return NegatedRange->Negate(ValueFactory, RangeFactory); + return RangeFactory.negate(*NegatedRange); } } } @@ -1268,7 +1377,7 @@ RangeSet SymbolicRangeInferrer::VisitBinaryOperator(Range LHS, class RangeConstraintManager : public RangedConstraintManager { public: RangeConstraintManager(ExprEngine *EE, SValBuilder &SVB) - : RangedConstraintManager(EE, SVB) {} + : RangedConstraintManager(EE, SVB), F(getBasicVals()) {} //===------------------------------------------------------------------===// // Implementation for interface from ConstraintManager. @@ -1424,8 +1533,8 @@ class RangeConstraintManager : public RangedConstraintManager { // be simply a constant because we can't reason about range disequalities. if (const llvm::APSInt *Point = Constraint.getConcreteValue()) for (EquivalenceClass DisequalClass : Class.getDisequalClasses(State)) { - RangeSet UpdatedConstraint = - getRange(State, DisequalClass).Delete(getBasicVals(), F, *Point); + RangeSet UpdatedConstraint = getRange(State, DisequalClass); + UpdatedConstraint = F.deletePoint(UpdatedConstraint, *Point); // If we end up with at least one of the disequal classes to be // constrained with an empty range-set, the state is infeasible. @@ -1740,7 +1849,7 @@ inline bool EquivalenceClass::addToDisequalityInfo( RangeSet FirstConstraint = SymbolicRangeInferrer::inferRange( VF, RF, State, First.getRepresentativeSymbol()); - FirstConstraint = FirstConstraint.Delete(VF, RF, *Point); + FirstConstraint = RF.deletePoint(FirstConstraint, *Point); // If the First class is about to be constrained with an empty // range-set, the state is infeasible. @@ -1899,7 +2008,7 @@ ConditionTruthVal RangeConstraintManager::checkNull(ProgramStateRef State, llvm::APSInt Zero = IntType.getZeroValue(); // Check if zero is in the set of possible values. - if (Ranges->Intersect(BV, F, Zero, Zero).isEmpty()) + if (!Ranges->contains(Zero)) return false; // Zero is a possible value, but it is not the /only/ possible value. @@ -2085,7 +2194,8 @@ RangeConstraintManager::assumeSymNE(ProgramStateRef St, SymbolRef Sym, llvm::APSInt Point = AdjustmentType.convert(Int) - Adjustment; - RangeSet New = getRange(St, Sym).Delete(getBasicVals(), F, Point); + RangeSet New = getRange(St, Sym); + New = F.deletePoint(New, Point); return trackNE(New, St, Sym, Int, Adjustment); } @@ -2101,7 +2211,8 @@ RangeConstraintManager::assumeSymEQ(ProgramStateRef St, SymbolRef Sym, // [Int-Adjustment, Int-Adjustment] llvm::APSInt AdjInt = AdjustmentType.convert(Int) - Adjustment; - RangeSet New = getRange(St, Sym).Intersect(getBasicVals(), F, AdjInt, AdjInt); + RangeSet New = getRange(St, Sym); + New = F.intersect(New, AdjInt); return trackEQ(New, St, Sym, Int, Adjustment); } @@ -2131,7 +2242,8 @@ RangeSet RangeConstraintManager::getSymLTRange(ProgramStateRef St, llvm::APSInt Upper = ComparisonVal - Adjustment; --Upper; - return getRange(St, Sym).Intersect(getBasicVals(), F, Lower, Upper); + RangeSet Result = getRange(St, Sym); + return F.intersect(Result, Lower, Upper); } ProgramStateRef @@ -2167,7 +2279,8 @@ RangeSet RangeConstraintManager::getSymGTRange(ProgramStateRef St, llvm::APSInt Upper = Max - Adjustment; ++Lower; - return getRange(St, Sym).Intersect(getBasicVals(), F, Lower, Upper); + RangeSet SymRange = getRange(St, Sym); + return F.intersect(SymRange, Lower, Upper); } ProgramStateRef @@ -2203,7 +2316,8 @@ RangeSet RangeConstraintManager::getSymGERange(ProgramStateRef St, llvm::APSInt Lower = ComparisonVal - Adjustment; llvm::APSInt Upper = Max - Adjustment; - return getRange(St, Sym).Intersect(getBasicVals(), F, Lower, Upper); + RangeSet SymRange = getRange(St, Sym); + return F.intersect(SymRange, Lower, Upper); } ProgramStateRef @@ -2239,7 +2353,8 @@ RangeConstraintManager::getSymLERange(llvm::function_ref RS, llvm::APSInt Lower = Min - Adjustment; llvm::APSInt Upper = ComparisonVal - Adjustment; - return RS().Intersect(getBasicVals(), F, Lower, Upper); + RangeSet Default = RS(); + return F.intersect(Default, Lower, Upper); } RangeSet RangeConstraintManager::getSymLERange(ProgramStateRef St, @@ -2272,7 +2387,7 @@ ProgramStateRef RangeConstraintManager::assumeSymOutsideInclusiveRange( const llvm::APSInt &To, const llvm::APSInt &Adjustment) { RangeSet RangeLT = getSymLTRange(State, Sym, From, Adjustment); RangeSet RangeGT = getSymGTRange(State, Sym, To, Adjustment); - RangeSet New(RangeLT.addRange(F, RangeGT)); + RangeSet New(F.add(RangeLT, RangeGT)); return New.isEmpty() ? nullptr : setConstraint(State, Sym, New); } @@ -2307,7 +2422,7 @@ void RangeConstraintManager::printJson(raw_ostream &Out, ProgramStateRef State, } Indent(Out, Space, IsDot) << "{ \"symbol\": \"" << ClassMember << "\", \"range\": \""; - P.second.print(Out); + P.second.dump(Out); Out << "\" }"; } } diff --git a/clang/lib/StaticAnalyzer/Core/RangedConstraintManager.cpp b/clang/lib/StaticAnalyzer/Core/RangedConstraintManager.cpp index e7a03e6ed582e2..1b8945fb66af06 100644 --- a/clang/lib/StaticAnalyzer/Core/RangedConstraintManager.cpp +++ b/clang/lib/StaticAnalyzer/Core/RangedConstraintManager.cpp @@ -220,5 +220,4 @@ void RangedConstraintManager::computeAdjustment(SymbolRef &Sym, } } // end of namespace ento - } // end of namespace clang diff --git a/clang/unittests/StaticAnalyzer/RangeSetTest.cpp b/clang/unittests/StaticAnalyzer/RangeSetTest.cpp index 83b4fac15a1986..aeeff47a9af9e2 100644 --- a/clang/unittests/StaticAnalyzer/RangeSetTest.cpp +++ b/clang/unittests/StaticAnalyzer/RangeSetTest.cpp @@ -11,120 +11,340 @@ #include "clang/Basic/SourceManager.h" #include "clang/StaticAnalyzer/Core/PathSensitive/RangedConstraintManager.h" #include "clang/Tooling/Tooling.h" +#include "llvm/ADT/APSInt.h" +#include "llvm/Support/raw_ostream.h" +#include "gtest/gtest-typed-test.h" #include "gtest/gtest.h" +using namespace clang; +using namespace ento; + namespace clang { namespace ento { -namespace { -// TestCase contains to lists of ranges. -// Original one has to be negated. -// Expected one has to be compared to negated original range. -template struct TestCase { - RangeSet original; - RangeSet expected; - - TestCase(BasicValueFactory &BVF, RangeSet::Factory &F, - const std::initializer_list &originalList, - const std::initializer_list &expectedList) - : original(createRangeSetFromList(BVF, F, originalList)), - expected(createRangeSetFromList(BVF, F, expectedList)) {} - -private: - RangeSet createRangeSetFromList(BasicValueFactory &BVF, RangeSet::Factory &F, - const std::initializer_list rangeList) { - llvm::APSInt from(sizeof(T) * 8, std::is_unsigned::value); - llvm::APSInt to = from; - RangeSet rangeSet = F.getEmptySet(); - for (auto it = rangeList.begin(); it != rangeList.end(); it += 2) { - from = *it; - to = *(it + 1); - rangeSet = rangeSet.addRange( - F, RangeSet(F, BVF.getValue(from), BVF.getValue(to))); - } - return rangeSet; - } +template static std::string toString(const RangeOrSet &Obj) { + std::string ObjRepresentation; + llvm::raw_string_ostream SS(ObjRepresentation); + Obj.dump(SS); + return SS.str(); +} +LLVM_ATTRIBUTE_UNUSED static std::string toString(const llvm::APSInt &Point) { + return Point.toString(10); +} +// We need it here for better fail diagnostics from gtest. +LLVM_ATTRIBUTE_UNUSED static std::ostream &operator<<(std::ostream &OS, + const RangeSet &Set) { + return OS << toString(Set); +} - void printNegate(const TestCase &TestCase) { - TestCase.original.print(llvm::dbgs()); - llvm::dbgs() << " => "; - TestCase.expected.print(llvm::dbgs()); - } -}; +} // namespace ento +} // namespace clang -class RangeSetTest : public testing::Test { -protected: +namespace { + +template class RangeSetTest : public testing::Test { +public: // Init block std::unique_ptr AST = tooling::buildASTFromCode("struct foo;"); - ASTContext &context = AST->getASTContext(); - llvm::BumpPtrAllocator alloc; - BasicValueFactory BVF{context, alloc}; - RangeSet::Factory F; + ASTContext &Context = AST->getASTContext(); + llvm::BumpPtrAllocator Arena; + BasicValueFactory BVF{Context, Arena}; + RangeSet::Factory F{BVF}; // End init block - template void checkNegate() { - using type = T; - - // Use next values of the range {MIN, A, B, MID, C, D, MAX}. - - // MID is a value in the middle of the range - // which unary minus does not affect on, - // e.g. int8/int32(0), uint8(128), uint32(2147483648). - - constexpr type MIN = std::numeric_limits::min(); - constexpr type MAX = std::numeric_limits::max(); - constexpr type MID = std::is_signed::value - ? 0 - : ~(static_cast(-1) / static_cast(2)); - constexpr type A = MID - static_cast(42 + 42); - constexpr type B = MID - static_cast(42); - constexpr type C = -B; - constexpr type D = -A; - - static_assert(MIN < A && A < B && B < MID && MID < C && C < D && D < MAX, - "Values shall be in an ascending order"); - - // Left {[x, y], [x, y]} is what shall be negated. - // Right {[x, y], [x, y]} is what shall be compared to a negation result. - TestCase cases[] = { - {BVF, F, {MIN, A}, {MIN, MIN, D, MAX}}, - {BVF, F, {MIN, C}, {MIN, MIN, B, MAX}}, - {BVF, F, {MIN, MID}, {MIN, MIN, MID, MAX}}, - {BVF, F, {MIN, MAX}, {MIN, MAX}}, - {BVF, F, {A, D}, {A, D}}, - {BVF, F, {A, B}, {C, D}}, - {BVF, F, {MIN, A, D, MAX}, {MIN, A, D, MAX}}, - {BVF, F, {MIN, B, MID, D}, {MIN, MIN, A, MID, C, MAX}}, - {BVF, F, {MIN, MID, C, D}, {MIN, MIN, A, B, MID, MAX}}, - {BVF, F, {MIN, MID, C, MAX}, {MIN, B, MID, MAX}}, - {BVF, F, {A, MID, D, MAX}, {MIN + 1, A, MID, D}}, - {BVF, F, {A, A}, {D, D}}, - {BVF, F, {MID, MID}, {MID, MID}}, - {BVF, F, {MAX, MAX}, {MIN + 1, MIN + 1}}, - }; - - for (const auto &c : cases) { - // Negate original and check with expected. - RangeSet negatedFromOriginal = c.original.Negate(BVF, F); - EXPECT_EQ(negatedFromOriginal, c.expected); - // Negate negated back and check with original. - RangeSet negatedBackward = negatedFromOriginal.Negate(BVF, F); - EXPECT_EQ(negatedBackward, c.original); + using Self = RangeSetTest; + using RawRange = std::pair; + using RawRangeSet = std::initializer_list; + + static constexpr BaseType getMin() { + return std::numeric_limits::min(); + } + static constexpr BaseType getMax() { + return std::numeric_limits::max(); + } + static constexpr BaseType getMid() { + return isSigned() ? 0 : ~(fromInt(-1) / fromInt(2)); + } + + static constexpr bool isSigned() { return std::is_signed::value; } + static constexpr BaseType fromInt(int X) { return static_cast(X); } + + static llvm::APSInt Base; + const llvm::APSInt &from(BaseType X) { + llvm::APSInt Dummy = Base; + Dummy = X; + return BVF.getValue(Dummy); + } + + Range from(const RawRange &Init) { + return Range(from(Init.first), from(Init.second)); + } + + RangeSet from(const RawRangeSet &Init) { + RangeSet RangeSet = F.getEmptySet(); + for (const auto &Raw : Init) { + RangeSet = F.add(RangeSet, from(Raw)); } + return RangeSet; + } + + template + void wrap(F ActualFunction, RawArgTypes &&... Args) { + (this->*ActualFunction)(from(std::forward(Args))...); + } + + void checkNegateImpl(RangeSet Original, RangeSet Expected) { + RangeSet NegatedFromOriginal = F.negate(Original); + EXPECT_EQ(NegatedFromOriginal, Expected); + // Negate negated back and check with original. + RangeSet NegatedBackward = F.negate(NegatedFromOriginal); + EXPECT_EQ(NegatedBackward, Original); + } + + void checkNegate(RawRangeSet RawOriginal, RawRangeSet RawExpected) { + wrap(&Self::checkNegateImpl, RawOriginal, RawExpected); + } + + template + void checkIntersectImpl(RangeSet LHS, PointOrSet RHS, RangeSet Expected) { + RangeSet Result = F.intersect(LHS, RHS); + EXPECT_EQ(Result, Expected) + << "while intersecting " << toString(LHS) << " and " << toString(RHS); + } + + void checkIntersectRangeImpl(RangeSet LHS, const llvm::APSInt &Lower, + const llvm::APSInt &Upper, RangeSet Expected) { + RangeSet Result = F.intersect(LHS, Lower, Upper); + EXPECT_EQ(Result, Expected) + << "while intersecting " << toString(LHS) << " and [" << toString(Lower) + << ", " << toString(Upper) << "]"; + } + + void checkIntersect(RawRangeSet RawLHS, RawRangeSet RawRHS, + RawRangeSet RawExpected) { + wrap(&Self::checkIntersectImpl, RawLHS, RawRHS, RawExpected); + } + + void checkIntersect(RawRangeSet RawLHS, BaseType RawRHS, + RawRangeSet RawExpected) { + wrap(&Self::checkIntersectImpl, RawLHS, RawRHS, + RawExpected); + } + + void checkIntersect(RawRangeSet RawLHS, BaseType RawLower, BaseType RawUpper, + RawRangeSet RawExpected) { + wrap(&Self::checkIntersectRangeImpl, RawLHS, RawLower, RawUpper, + RawExpected); + } + + void checkContainsImpl(RangeSet LHS, const llvm::APSInt &RHS, bool Expected) { + bool Result = LHS.contains(RHS); + EXPECT_EQ(Result, Expected) + << toString(LHS) << (Result ? " contains " : " doesn't contain ") + << toString(RHS); + } + + void checkContains(RawRangeSet RawLHS, BaseType RawRHS, bool Expected) { + checkContainsImpl(from(RawLHS), from(RawRHS), Expected); + } + + template + void checkAddImpl(RangeSet LHS, RHSType RHS, RangeSet Expected) { + RangeSet Result = F.add(LHS, RHS); + EXPECT_EQ(Result, Expected) + << "while adding " << toString(LHS) << " and " << toString(RHS); + } + + void checkAdd(RawRangeSet RawLHS, RawRange RawRHS, RawRangeSet RawExpected) { + wrap(&Self::checkAddImpl, RawLHS, RawRHS, RawExpected); + } + + void checkAdd(RawRangeSet RawLHS, RawRangeSet RawRHS, + RawRangeSet RawExpected) { + wrap(&Self::checkAddImpl, RawRHS, RawLHS, RawExpected); + } + + void checkAdd(RawRangeSet RawLHS, BaseType RawRHS, RawRangeSet RawExpected) { + wrap(&Self::checkAddImpl, RawLHS, RawRHS, + RawExpected); + } + + void checkDeleteImpl(const llvm::APSInt &Point, RangeSet From, + RangeSet Expected) { + RangeSet Result = F.deletePoint(From, Point); + EXPECT_EQ(Result, Expected) + << "while deleting " << toString(Point) << " from " << toString(From); + } + + void checkDelete(BaseType Point, RawRangeSet RawFrom, + RawRangeSet RawExpected) { + wrap(&Self::checkDeleteImpl, Point, RawFrom, RawExpected); } }; -TEST_F(RangeSetTest, RangeSetNegateTest) { - checkNegate(); - checkNegate(); - checkNegate(); - checkNegate(); - checkNegate(); - checkNegate(); - checkNegate(); - checkNegate(); +} // namespace + +template +llvm::APSInt RangeSetTest::Base{sizeof(BaseType) * 8, !isSigned()}; + +using IntTypes = ::testing::Types; +TYPED_TEST_CASE(RangeSetTest, IntTypes); + +TYPED_TEST(RangeSetTest, RangeSetNegateTest) { + // Use next values of the range {MIN, A, B, MID, C, D, MAX}. + + constexpr TypeParam MIN = TestFixture::getMin(); + constexpr TypeParam MAX = TestFixture::getMax(); + // MID is a value in the middle of the range + // which unary minus does not affect on, + // e.g. int8/int32(0), uint8(128), uint32(2147483648). + constexpr TypeParam MID = TestFixture::getMid(); + constexpr TypeParam A = MID - TestFixture::fromInt(42 + 42); + constexpr TypeParam B = MID - TestFixture::fromInt(42); + constexpr TypeParam C = -B; + constexpr TypeParam D = -A; + + static_assert(MIN < A && A < B && B < MID && MID < C && C < D && D < MAX, + "Values shall be in an ascending order"); + + this->checkNegate({{MIN, A}}, {{MIN, MIN}, {D, MAX}}); + this->checkNegate({{MIN, C}}, {{MIN, MIN}, {B, MAX}}); + this->checkNegate({{MIN, MID}}, {{MIN, MIN}, {MID, MAX}}); + this->checkNegate({{MIN, MAX}}, {{MIN, MAX}}); + this->checkNegate({{A, D}}, {{A, D}}); + this->checkNegate({{A, B}}, {{C, D}}); + this->checkNegate({{MIN, A}, {D, MAX}}, {{MIN, A}, {D, MAX}}); + this->checkNegate({{MIN, B}, {MID, D}}, {{MIN, MIN}, {A, MID}, {C, MAX}}); + this->checkNegate({{MIN, MID}, {C, D}}, {{MIN, MIN}, {A, B}, {MID, MAX}}); + this->checkNegate({{MIN, MID}, {C, MAX}}, {{MIN, B}, {MID, MAX}}); + this->checkNegate({{A, MID}, {D, MAX}}, {{MIN + 1, A}, {MID, D}}); + this->checkNegate({{A, A}}, {{D, D}}); + this->checkNegate({{MID, MID}}, {{MID, MID}}); + this->checkNegate({{MAX, MAX}}, {{MIN + 1, MIN + 1}}); } -} // namespace -} // namespace ento -} // namespace clang +TYPED_TEST(RangeSetTest, RangeSetPointIntersectTest) { + // Check that we can correctly intersect empty sets. + this->checkIntersect({}, 42, {}); + // Check that intersection with itself produces the same set. + this->checkIntersect({{42, 42}}, 42, {{42, 42}}); + // Check more general cases. + this->checkIntersect({{0, 10}, {20, 30}, {30, 40}, {50, 60}}, 42, {}); + this->checkIntersect({{0, 10}, {20, 30}, {30, 60}}, 42, {{42, 42}}); +} + +TYPED_TEST(RangeSetTest, RangeSetRangeIntersectTest) { + constexpr TypeParam MIN = TestFixture::getMin(); + constexpr TypeParam MAX = TestFixture::getMax(); + + // Check that we can correctly intersect empty sets. + this->checkIntersect({}, 10, 20, {}); + this->checkIntersect({}, 20, 10, {}); + // Check that intersection with itself produces the same set. + this->checkIntersect({{10, 20}}, 10, 20, {{10, 20}}); + this->checkIntersect({{MIN, 10}, {20, MAX}}, 20, 10, {{MIN, 10}, {20, MAX}}); + // Check non-overlapping range intersections. + this->checkIntersect({{10, 20}}, 21, 9, {}); + this->checkIntersect({{MIN, 9}, {21, MAX}}, 10, 20, {}); + // Check more general cases. + this->checkIntersect({{0, 10}, {20, 30}, {30, 40}, {50, 60}}, 10, 35, + {{10, 10}, {20, 30}, {30, 35}}); + this->checkIntersect({{0, 10}, {20, 30}, {30, 40}, {50, 60}}, 35, 10, + {{0, 10}, {35, 40}, {50, 60}}); +} + +TYPED_TEST(RangeSetTest, RangeSetGenericIntersectTest) { + // Check that we can correctly intersect empty sets. + this->checkIntersect({}, {}, {}); + this->checkIntersect({}, {{0, 10}}, {}); + this->checkIntersect({{0, 10}}, {}, {}); + + this->checkIntersect({{0, 10}}, {{4, 6}}, {{4, 6}}); + this->checkIntersect({{0, 10}}, {{4, 20}}, {{4, 10}}); + // Check that intersection with points works as expected. + this->checkIntersect({{0, 10}}, {{4, 4}}, {{4, 4}}); + // All ranges are closed, check that intersection with edge points works as + // expected. + this->checkIntersect({{0, 10}}, {{10, 10}}, {{10, 10}}); + + // Let's check that we can skip some intervals and partially intersect + // other intervals. + this->checkIntersect({{0, 2}, {4, 5}, {6, 9}, {10, 11}, {12, 12}, {13, 15}}, + {{8, 14}, {20, 30}}, + {{8, 9}, {10, 11}, {12, 12}, {13, 14}}); + // Check more generic case. + this->checkIntersect( + {{0, 1}, {2, 3}, {5, 6}, {7, 15}, {25, 30}}, + {{4, 10}, {11, 11}, {12, 16}, {17, 17}, {19, 20}, {21, 23}, {24, 27}}, + {{5, 6}, {7, 10}, {11, 11}, {12, 15}, {25, 27}}); +} + +TYPED_TEST(RangeSetTest, RangeSetContainsTest) { + // Check with an empty set. + this->checkContains({}, 10, false); + // Check contains with sets of size one: + // * when the whole range is less + this->checkContains({{0, 5}}, 10, false); + // * when the whole range is greater + this->checkContains({{20, 25}}, 10, false); + // * when the range is just the point we are looking for + this->checkContains({{10, 10}}, 10, true); + // * when the range starts with the point + this->checkContains({{10, 15}}, 10, true); + // * when the range ends with the point + this->checkContains({{5, 10}}, 10, true); + // * when the range has the point somewhere in the middle + this->checkContains({{0, 25}}, 10, true); + // Check similar cases, but with larger sets. + this->checkContains({{0, 5}, {10, 10}, {15, 20}}, 10, true); + this->checkContains({{0, 5}, {10, 12}, {15, 20}}, 10, true); + this->checkContains({{0, 5}, {5, 7}, {8, 10}, {12, 41}}, 10, true); + + constexpr TypeParam MIN = TestFixture::getMin(); + constexpr TypeParam MAX = TestFixture::getMax(); + constexpr TypeParam MID = TestFixture::getMid(); + this->checkContains({{MIN, MAX}}, 0, true); + this->checkContains({{MIN, MAX}}, MID, true); + this->checkContains({{MIN, MAX}}, -10, true); + this->checkContains({{MIN, MAX}}, 10, true); +} + +TYPED_TEST(RangeSetTest, RangeSetAddTest) { + // Check adding single points + this->checkAdd({}, 10, {{10, 10}}); + this->checkAdd({{0, 5}}, 10, {{0, 5}, {10, 10}}); + this->checkAdd({{0, 5}, {30, 40}}, 10, {{0, 5}, {10, 10}, {30, 40}}); + + // Check adding single ranges. + this->checkAdd({}, {10, 20}, {{10, 20}}); + this->checkAdd({{0, 5}}, {10, 20}, {{0, 5}, {10, 20}}); + this->checkAdd({{0, 5}, {30, 40}}, {10, 20}, {{0, 5}, {10, 20}, {30, 40}}); + + // Check adding whole sets of ranges. + this->checkAdd({{0, 5}}, {{10, 20}}, {{0, 5}, {10, 20}}); + // Check that ordering of ranges is as expected. + this->checkAdd({{0, 5}, {30, 40}}, {{10, 20}}, {{0, 5}, {10, 20}, {30, 40}}); + this->checkAdd({{0, 5}, {30, 40}}, {{10, 20}, {50, 60}}, + {{0, 5}, {10, 20}, {30, 40}, {50, 60}}); + this->checkAdd({{10, 20}, {50, 60}}, {{0, 5}, {30, 40}, {70, 80}}, + {{0, 5}, {10, 20}, {30, 40}, {50, 60}, {70, 80}}); +} + +TYPED_TEST(RangeSetTest, RangeSetDeletePointTest) { + constexpr TypeParam MIN = TestFixture::getMin(); + constexpr TypeParam MAX = TestFixture::getMax(); + constexpr TypeParam MID = TestFixture::getMid(); + + this->checkDelete(MID, {{MIN, MAX}}, {{MIN, MID - 1}, {MID + 1, MAX}}); + // Check that delete works with an empty set. + this->checkDelete(10, {}, {}); + // Check that delete can remove entire ranges. + this->checkDelete(10, {{10, 10}}, {}); + this->checkDelete(10, {{0, 5}, {10, 10}, {20, 30}}, {{0, 5}, {20, 30}}); + // Check that delete can split existing ranges into two. + this->checkDelete(10, {{0, 5}, {7, 15}, {20, 30}}, + {{0, 5}, {7, 9}, {11, 15}, {20, 30}}); + // Check that delete of the point not from the range set works as expected. + this->checkDelete(10, {{0, 5}, {20, 30}}, {{0, 5}, {20, 30}}); +} From 414a80da8418f8e1d56e54024dc5ead1ddfd896e Mon Sep 17 00:00:00 2001 From: Dmitri Gribenko Date: Mon, 22 Mar 2021 13:30:28 +0100 Subject: [PATCH 3/8] [analyzer] Don't include private gtest headers (cherry picked from commit 9cd7c4130635a6f0c94046f529fb1ee19118bbfb) --- clang/unittests/StaticAnalyzer/RangeSetTest.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/clang/unittests/StaticAnalyzer/RangeSetTest.cpp b/clang/unittests/StaticAnalyzer/RangeSetTest.cpp index aeeff47a9af9e2..5be2ee3fc520b5 100644 --- a/clang/unittests/StaticAnalyzer/RangeSetTest.cpp +++ b/clang/unittests/StaticAnalyzer/RangeSetTest.cpp @@ -13,7 +13,6 @@ #include "clang/Tooling/Tooling.h" #include "llvm/ADT/APSInt.h" #include "llvm/Support/raw_ostream.h" -#include "gtest/gtest-typed-test.h" #include "gtest/gtest.h" using namespace clang; From c918d1d613df2364491d96dca2ddd56e1bacb397 Mon Sep 17 00:00:00 2001 From: Gabor Marton Date: Wed, 24 Mar 2021 11:46:10 +0100 Subject: [PATCH 4/8] [Analyzer][NFC] Fix typos in comments (cherry picked from commit f8a850ccf452f9709c652823f11f1e95fd2c0e24) --- .../clang/StaticAnalyzer/Core/BugReporter/BugReporterVisitors.h | 2 +- clang/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/include/clang/StaticAnalyzer/Core/BugReporter/BugReporterVisitors.h b/clang/include/clang/StaticAnalyzer/Core/BugReporter/BugReporterVisitors.h index 58a88f452ed95d..2975d50de33341 100644 --- a/clang/include/clang/StaticAnalyzer/Core/BugReporter/BugReporterVisitors.h +++ b/clang/include/clang/StaticAnalyzer/Core/BugReporter/BugReporterVisitors.h @@ -93,7 +93,7 @@ enum class TrackingKind { /// gathered about the tracked expression value as possible. Thorough, /// Specifies that a more moderate tracking should be used for the expression - /// value. This will essentially make sure that functions relevant to the it + /// value. This will essentially make sure that functions relevant to it /// aren't pruned, but otherwise relies on the user reading the code or /// following the arrows. Condition diff --git a/clang/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp b/clang/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp index bc72f4f8c1e300..cb2301093366e7 100644 --- a/clang/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp +++ b/clang/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp @@ -1942,7 +1942,7 @@ bool bugreporter::trackExpressionValue(const ExplodedNode *InputNode, const StackFrameContext *SFC = LVNode->getStackFrame(); // We only track expressions if we believe that they are important. Chances - // are good that control dependencies to the tracking point are also improtant + // are good that control dependencies to the tracking point are also important // because of this, let's explain why we believe control reached this point. // TODO: Shouldn't we track control dependencies of every bug location, rather // than only tracked expressions? From 3e27500ca387d10fda4a8e5db483fdcbaf91987e Mon Sep 17 00:00:00 2001 From: Gabor Marton Date: Thu, 25 Mar 2021 15:29:41 +0100 Subject: [PATCH 5/8] [Analyzer] Infer 0 value when the divisible is 0 (bug fix) Currently, we infer 0 if the divisible of the modulo op is 0: int a = x < 0; // a can be 0 int b = a % y; // b is either 1 % sym or 0 However, we don't when the op is / : int a = x < 0; // a can be 0 int b = a / y; // b is either 1 / sym or 0 / sym This commit fixes the discrepancy. Differential Revision: https://reviews.llvm.org/D99343 (cherry picked from commit 015c39882ebc1771713a7523ae76903ebae83288) --- .../StaticAnalyzer/Core/SimpleSValBuilder.cpp | 2 + clang/test/Analysis/zero-operands.c | 53 +++++++++++++++++++ 2 files changed, 55 insertions(+) create mode 100644 clang/test/Analysis/zero-operands.c diff --git a/clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp b/clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp index facadaf1225f8a..872616fedb4eb2 100644 --- a/clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp +++ b/clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp @@ -652,6 +652,8 @@ SVal SimpleSValBuilder::evalBinOpNN(ProgramStateRef state, if (LHSValue == 0) return evalCastFromNonLoc(lhs, resultTy); return makeSymExprValNN(op, InputLHS, InputRHS, resultTy); + case BO_Div: + // 0 / x == 0 case BO_Rem: // 0 % x == 0 if (LHSValue == 0) diff --git a/clang/test/Analysis/zero-operands.c b/clang/test/Analysis/zero-operands.c new file mode 100644 index 00000000000000..3311c524f81460 --- /dev/null +++ b/clang/test/Analysis/zero-operands.c @@ -0,0 +1,53 @@ +// RUN: %clang_analyze_cc1 -analyzer-checker=core \ +// RUN: -analyzer-checker=debug.ExprInspection \ +// RUN: -verify %s + +void clang_analyzer_dump(int); + +void test_0_multiplier1(int x, int y) { + int a = x < 0; // Eagerly bifurcate. + clang_analyzer_dump(a); + // expected-warning@-1{{0 S32b}} + // expected-warning@-2{{1 S32b}} + + int b = a * y; + clang_analyzer_dump(b); + // expected-warning@-1{{0 S32b}} + // expected-warning-re@-2{{reg_${{[[:digit:]]+}}}} +} + +void test_0_multiplier2(int x, int y) { + int a = x < 0; // Eagerly bifurcate. + clang_analyzer_dump(a); + // expected-warning@-1{{0 S32b}} + // expected-warning@-2{{1 S32b}} + + int b = y * a; + clang_analyzer_dump(b); + // expected-warning@-1{{0 S32b}} + // expected-warning-re@-2{{reg_${{[[:digit:]]+}}}} +} + +void test_0_modulo(int x, int y) { + int a = x < 0; // Eagerly bifurcate. + clang_analyzer_dump(a); + // expected-warning@-1{{0 S32b}} + // expected-warning@-2{{1 S32b}} + + int b = a % y; + clang_analyzer_dump(b); + // expected-warning@-1{{0 S32b}} + // expected-warning-re@-2{{1 % (reg_${{[[:digit:]]+}})}} +} + +void test_0_divisible(int x, int y) { + int a = x < 0; // Eagerly bifurcate. + clang_analyzer_dump(a); + // expected-warning@-1{{0 S32b}} + // expected-warning@-2{{1 S32b}} + + int b = a / y; + clang_analyzer_dump(b); + // expected-warning@-1{{0 S32b}} + // expected-warning-re@-2{{1 / (reg_${{[[:digit:]]+}})}} +} From 654d8342502da9572265c4055c07c313d232e033 Mon Sep 17 00:00:00 2001 From: Gabor Marton Date: Thu, 25 Mar 2021 15:40:26 +0100 Subject: [PATCH 6/8] [Analyzer] Track RValue expressions It makes sense to track rvalue expressions in the case of special concrete integer values. The most notable special value is zero (later we may find other values). By tracking the origin of 0, we can provide a better explanation for users e.g. in case of division by 0 warnings. When the divisor is a product of a multiplication then now we can show which operand (or both) was (were) zero and why. Differential Revision: https://reviews.llvm.org/D99344 (cherry picked from commit efa7df1682c2859dabe3646ee7dc01e68629417f) --- .../Core/BugReporterVisitors.cpp | 43 ++++++++ .../Analysis/division-by-zero-track-zero.c | 11 +++ .../Analysis/division-by-zero-track-zero.cpp | 98 +++++++++++++++++++ clang/test/Analysis/nullptr.cpp | 2 +- 4 files changed, 153 insertions(+), 1 deletion(-) create mode 100644 clang/test/Analysis/division-by-zero-track-zero.c create mode 100644 clang/test/Analysis/division-by-zero-track-zero.cpp diff --git a/clang/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp b/clang/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp index cb2301093366e7..59f344842b3787 100644 --- a/clang/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp +++ b/clang/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp @@ -1924,6 +1924,44 @@ static const ExplodedNode* findNodeForExpression(const ExplodedNode *N, return N; } +/// Attempts to add visitors to track an RValue expression back to its point of +/// origin. Works similarly to trackExpressionValue, but accepts only RValues. +static void trackRValueExpression(const ExplodedNode *InputNode, const Expr *E, + PathSensitiveBugReport &report, + bugreporter::TrackingKind TKind, + bool EnableNullFPSuppression) { + assert(E->isRValue() && "The expression is not an rvalue!"); + const ExplodedNode *RVNode = findNodeForExpression(InputNode, E); + if (!RVNode) + return; + ProgramStateRef RVState = RVNode->getState(); + SVal V = RVState->getSValAsScalarOrLoc(E, RVNode->getLocationContext()); + const auto *BO = dyn_cast(E); + if (!BO) + return; + if (!V.isZeroConstant()) + return; + if (!BO->isMultiplicativeOp()) + return; + + SVal RHSV = RVState->getSVal(BO->getRHS(), RVNode->getLocationContext()); + SVal LHSV = RVState->getSVal(BO->getLHS(), RVNode->getLocationContext()); + + // Track both LHS and RHS of a multiplication. + if (BO->getOpcode() == BO_Mul) { + if (LHSV.isZeroConstant()) + trackExpressionValue(InputNode, BO->getLHS(), report, TKind, + EnableNullFPSuppression); + if (RHSV.isZeroConstant()) + trackExpressionValue(InputNode, BO->getRHS(), report, TKind, + EnableNullFPSuppression); + } else { // Track only the LHS of a division or a modulo. + if (LHSV.isZeroConstant()) + trackExpressionValue(InputNode, BO->getLHS(), report, TKind, + EnableNullFPSuppression); + } +} + bool bugreporter::trackExpressionValue(const ExplodedNode *InputNode, const Expr *E, PathSensitiveBugReport &report, @@ -2069,6 +2107,11 @@ bool bugreporter::trackExpressionValue(const ExplodedNode *InputNode, loc::MemRegionVal(RegionRVal), /*assumption=*/false)); } } + + if (Inner->isRValue()) + trackRValueExpression(LVNode, Inner, report, TKind, + EnableNullFPSuppression); + return true; } diff --git a/clang/test/Analysis/division-by-zero-track-zero.c b/clang/test/Analysis/division-by-zero-track-zero.c new file mode 100644 index 00000000000000..f6b2a78ed70175 --- /dev/null +++ b/clang/test/Analysis/division-by-zero-track-zero.c @@ -0,0 +1,11 @@ +// RUN: %clang_analyze_cc1 -analyzer-checker=core \ +// RUN: -analyzer-output=text \ +// RUN: -verify %s + +int track_mul_lhs_0(int x, int y) { + int p0 = x < 0; // expected-note {{Assuming 'x' is >= 0}} \ + // expected-note {{'p0' initialized to 0}} + int div = p0 * y; // expected-note {{'div' initialized to 0}} + return 1 / div; // expected-note {{Division by zero}} \ + // expected-warning {{Division by zero}} +} diff --git a/clang/test/Analysis/division-by-zero-track-zero.cpp b/clang/test/Analysis/division-by-zero-track-zero.cpp new file mode 100644 index 00000000000000..c4b9550c76c0f0 --- /dev/null +++ b/clang/test/Analysis/division-by-zero-track-zero.cpp @@ -0,0 +1,98 @@ +// RUN: %clang_analyze_cc1 -analyzer-checker=core \ +// RUN: -analyzer-output=text \ +// RUN: -verify %s + +namespace test_tracking_of_lhs_multiplier { + int f(int x, int y) { + bool p0 = x < 0; // expected-note {{Assuming 'x' is >= 0}} \ + // expected-note {{'p0' initialized to 0}} + int div = p0 * y; // expected-note {{'div' initialized to 0}} + return 1 / div; // expected-note {{Division by zero}} \ + // expected-warning {{Division by zero}} + } +} // namespace test_tracking_of_lhs_multiplier + +namespace test_tracking_of_rhs_multiplier { + int f(int x, int y) { + bool p0 = x < 0; // expected-note {{Assuming 'x' is >= 0}} \ + // expected-note {{'p0' initialized to 0}} + int div = y * p0; // expected-note {{'div' initialized to 0}} + return 1 / div; // expected-note {{Division by zero}} \ + // expected-warning {{Division by zero}} + } +} // namespace test_tracking_of_rhs_multiplier + +namespace test_tracking_of_nested_multiplier { + int f(int x, int y, int z) { + bool p0 = x < 0; // expected-note {{Assuming 'x' is >= 0}} \ + // expected-note {{'p0' initialized to 0}} + int div = y*z*p0; // expected-note {{'div' initialized to 0}} + return 1 / div; // expected-note {{Division by zero}} \ + // expected-warning {{Division by zero}} + } +} // namespace test_tracking_of_nested_multiplier + +namespace test_tracking_through_multiple_stmts { + int f(int x, int y) { + bool p0 = x < 0; // expected-note {{Assuming 'x' is >= 0}} + bool p1 = p0 ? 0 : 1; // expected-note {{'p0' is false}} \ + // expected-note {{'?' condition is false}} + bool p2 = 1 - p1; // expected-note {{'p2' initialized to 0}} + int div = p2 * y; // expected-note {{'div' initialized to 0}} + return 1 / div; // expected-note {{Division by zero}} \ + // expected-warning {{Division by zero}} + } +} // namespace test_tracking_through_multiple_stmts + +namespace test_tracking_both_lhs_and_rhs { + int f(int x, int y) { + bool p0 = x < 0; // expected-note {{Assuming 'x' is >= 0}} \ + // expected-note {{'p0' initialized to 0}} + bool p1 = y < 0; // expected-note {{Assuming 'y' is >= 0}} \ + // expected-note {{'p1' initialized to 0}} + int div = p0 * p1; // expected-note {{'div' initialized to 0}} + return 1 / div; // expected-note {{Division by zero}} \ + // expected-warning {{Division by zero}} + } +} // namespace test_tracking_both_lhs_and_rhs + +namespace test_tracking_of_multiplier_and_parens { + int f(int x, int y, int z) { + bool p0 = x < 0; // expected-note {{Assuming 'x' is >= 0}} \ + // expected-note {{'p0' initialized to 0}} + int div = y*(z*p0); // expected-note {{'div' initialized to 0}} + return 1 / div; // expected-note {{Division by zero}} \ + // expected-warning {{Division by zero}} + } +} // namespace test_tracking_of_multiplier_and_parens + +namespace test_tracking_of_divisible { + int f(int x, int y) { + bool p0 = x < 0; // expected-note {{Assuming 'x' is >= 0}} \ + // expected-note {{'p0' initialized to 0}} + int div = p0 / y; // expected-note {{'div' initialized to 0}} + return 1 / div; // expected-note {{Division by zero}} \ + // expected-warning {{Division by zero}} + } +} // namespace test_tracking_of_divisible + +namespace test_tracking_of_modulo { + int f(int x, int y) { + bool p0 = x < 0; // expected-note {{Assuming 'x' is >= 0}} \ + // expected-note {{'p0' initialized to 0}} + int div = p0 % y; // expected-note {{'div' initialized to 0}} + return 1 / div; // expected-note {{Division by zero}} \ + // expected-warning {{Division by zero}} + } +} // namespace test_tracking_of_modulo + +namespace test_tracking_of_assignment { + int f(int x) { + bool p0 = x < 0; // expected-note {{Assuming 'x' is >= 0}} \ + // expected-note {{'p0' initialized to 0}} + int div = 1; + div *= p0; // expected-note {{The value 0 is assigned to 'div'}} + return 1 / div; // expected-note {{Division by zero}} \ + // expected-warning {{Division by zero}} + } +} // namespace test_tracking_of_assignment diff --git a/clang/test/Analysis/nullptr.cpp b/clang/test/Analysis/nullptr.cpp index e9b975c148aa61..24b574a4ccfe28 100644 --- a/clang/test/Analysis/nullptr.cpp +++ b/clang/test/Analysis/nullptr.cpp @@ -64,7 +64,7 @@ void zoo1backwards() { typedef __INTPTR_TYPE__ intptr_t; void zoo1multiply() { - char **p = 0; // FIXME-should-be-note:{{'p' initialized to a null pointer value}} + char **p = 0; // expected-note{{'p' initialized to a null pointer value}} delete *((char **)((intptr_t)p * 2)); // expected-warning{{Dereference of null pointer}} // expected-note@-1{{Dereference of null pointer}} } From c9aaa2ef74f76ef9e4c219121594184f9799b96f Mon Sep 17 00:00:00 2001 From: Valeriy Savchenko Date: Mon, 29 Mar 2021 15:38:22 +0300 Subject: [PATCH 7/8] [analyzer] Support allocClassWithName in OSObjectCStyleCast checker `allocClassWithName` allocates an object with the given type. The type is actually provided as a string argument (type's name). This creates a possibility for not particularly useful warnings from the analyzer. In order to combat with those, this patch checks for casts of the `allocClassWithName` results to types mentioned directly as its argument. All other uses of this method should be reasoned about as before. rdar://72165694 Differential Revision: https://reviews.llvm.org/D99500 (cherry picked from commit 90377308de6cac8239bc1a1dcd32b57b9ec91444) --- .../Checkers/OSObjectCStyleCast.cpp | 53 +++++++++++++++---- clang/test/Analysis/os_object_base.h | 1 + .../osobjectcstylecastchecker_test.cpp | 9 ++++ 3 files changed, 53 insertions(+), 10 deletions(-) diff --git a/clang/lib/StaticAnalyzer/Checkers/OSObjectCStyleCast.cpp b/clang/lib/StaticAnalyzer/Checkers/OSObjectCStyleCast.cpp index 270b66dab02087..0a8379d9ab99d3 100644 --- a/clang/lib/StaticAnalyzer/Checkers/OSObjectCStyleCast.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/OSObjectCStyleCast.cpp @@ -32,7 +32,21 @@ class OSObjectCStyleCastChecker : public Checker { void checkASTCodeBody(const Decl *D, AnalysisManager &AM, BugReporter &BR) const; }; +} // namespace + +namespace clang { +namespace ast_matchers { +AST_MATCHER_P(StringLiteral, mentionsBoundType, std::string, BindingID) { + return Builder->removeBindings([this, &Node](const BoundNodesMap &Nodes) { + const auto &BN = Nodes.getNode(this->BindingID); + if (const auto *ND = BN.get()) { + return ND->getName() != Node.getString(); + } + return true; + }); } +} // end namespace ast_matchers +} // end namespace clang static void emitDiagnostics(const BoundNodes &Nodes, BugReporter &BR, @@ -63,22 +77,41 @@ static decltype(auto) hasTypePointingTo(DeclarationMatcher DeclM) { return hasType(pointerType(pointee(hasDeclaration(DeclM)))); } -void OSObjectCStyleCastChecker::checkASTCodeBody(const Decl *D, AnalysisManager &AM, +void OSObjectCStyleCastChecker::checkASTCodeBody(const Decl *D, + AnalysisManager &AM, BugReporter &BR) const { AnalysisDeclContext *ADC = AM.getAnalysisDeclContext(D); auto DynamicCastM = callExpr(callee(functionDecl(hasName("safeMetaCast")))); - - auto OSObjTypeM = hasTypePointingTo(cxxRecordDecl(isDerivedFrom("OSMetaClassBase"))); + // 'allocClassWithName' allocates an object with the given type. + // The type is actually provided as a string argument (type's name). + // This makes the following pattern possible: + // + // Foo *object = (Foo *)allocClassWithName("Foo"); + // + // While OSRequiredCast can be used here, it is still not a useful warning. + auto AllocClassWithNameM = callExpr( + callee(functionDecl(hasName("allocClassWithName"))), + // Here we want to make sure that the string argument matches the + // type in the cast expression. + hasArgument(0, stringLiteral(mentionsBoundType(WarnRecordDecl)))); + + auto OSObjTypeM = + hasTypePointingTo(cxxRecordDecl(isDerivedFrom("OSMetaClassBase"))); auto OSObjSubclassM = hasTypePointingTo( - cxxRecordDecl(isDerivedFrom("OSObject")).bind(WarnRecordDecl)); - - auto CastM = cStyleCastExpr( - allOf(hasSourceExpression(allOf(OSObjTypeM, unless(DynamicCastM))), - OSObjSubclassM)).bind(WarnAtNode); - - auto Matches = match(stmt(forEachDescendant(CastM)), *D->getBody(), AM.getASTContext()); + cxxRecordDecl(isDerivedFrom("OSObject")).bind(WarnRecordDecl)); + + auto CastM = + cStyleCastExpr( + allOf(OSObjSubclassM, + hasSourceExpression( + allOf(OSObjTypeM, + unless(anyOf(DynamicCastM, AllocClassWithNameM)))))) + .bind(WarnAtNode); + + auto Matches = + match(stmt(forEachDescendant(CastM)), *D->getBody(), AM.getASTContext()); for (BoundNodes Match : Matches) emitDiagnostics(Match, BR, ADC, this); } diff --git a/clang/test/Analysis/os_object_base.h b/clang/test/Analysis/os_object_base.h index 4698185f2b3cbb..c3d5d6271d486c 100644 --- a/clang/test/Analysis/os_object_base.h +++ b/clang/test/Analysis/os_object_base.h @@ -66,6 +66,7 @@ struct OSObject : public OSMetaClassBase { struct OSMetaClass : public OSMetaClassBase { virtual OSObject * alloc() const; + static OSObject * allocClassWithName(const char * name); virtual ~OSMetaClass(){} }; diff --git a/clang/test/Analysis/osobjectcstylecastchecker_test.cpp b/clang/test/Analysis/osobjectcstylecastchecker_test.cpp index a5ebb2823a9289..aa43a18e6346f8 100644 --- a/clang/test/Analysis/osobjectcstylecastchecker_test.cpp +++ b/clang/test/Analysis/osobjectcstylecastchecker_test.cpp @@ -37,3 +37,12 @@ unsigned no_warn_on_other_type_cast(A *a) { return b->getCount(); } +unsigned no_warn_alloc_class_with_name() { + OSArray *a = (OSArray *)OSMetaClass::allocClassWithName("OSArray"); // no warning + return a->getCount(); +} + +unsigned warn_alloc_class_with_name() { + OSArray *a = (OSArray *)OSMetaClass::allocClassWithName("OSObject"); // expected-warning{{C-style cast of an OSObject is prone to type confusion attacks; use 'OSRequiredCast' if the object is definitely of type 'OSArray', or 'OSDynamicCast' followed by a null check if unsure}} + return a->getCount(); +} From 4b749e57f25e2c06afd5f2e69dd532c8a717ed4d Mon Sep 17 00:00:00 2001 From: Valeriy Savchenko Date: Wed, 24 Mar 2021 18:58:22 +0300 Subject: [PATCH 8/8] [analyzer] Fix crash when reasoning about C11 atomics (PR49422) rdar://75020762 Differential Revision: https://reviews.llvm.org/D99274 (cherry picked from commit af7e1f07ac03074647897498358aaec165c1aaea) --- .../StaticAnalyzer/Core/PathSensitive/BasicValueFactory.h | 6 ++++++ clang/test/Analysis/atomics.c | 8 ++++++++ 2 files changed, 14 insertions(+) diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/BasicValueFactory.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/BasicValueFactory.h index 142b1ab117508c..9b4a49a8b8ab85 100644 --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/BasicValueFactory.h +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/BasicValueFactory.h @@ -139,6 +139,12 @@ class BasicValueFactory { /// Returns the type of the APSInt used to store values of the given QualType. APSIntType getAPSIntType(QualType T) const { + // For the purposes of the analysis and constraints, we treat atomics + // as their underlying types. + if (const AtomicType *AT = T->getAs()) { + T = AT->getValueType(); + } + assert(T->isIntegralOrEnumerationType() || Loc::isLocType(T)); return APSIntType(Ctx.getIntWidth(T), !T->isSignedIntegerOrEnumerationType()); diff --git a/clang/test/Analysis/atomics.c b/clang/test/Analysis/atomics.c index b3d2d352a22831..ef1a216c7d5778 100644 --- a/clang/test/Analysis/atomics.c +++ b/clang/test/Analysis/atomics.c @@ -93,3 +93,11 @@ void test_atomic_compare_exchange_weak(struct RefCountedStruct *s) { clang_analyzer_eval(s->refCount == 3); // expected-warning {{UNKNOWN}} clang_analyzer_eval(expected == 2); // expected-warning {{UNKNOWN}} } + +// PR49422 +void test_atomic_compare(int input) { + _Atomic(int) x = input; + if (x > 0) { + // no crash + } +}