Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[analyzer] Don't copy field-by-field conjured LazyCompoundVals (2/4) #115917

Merged
merged 3 commits into from
Nov 14, 2024

Conversation

steakhal
Copy link
Contributor

Split from #114835

@llvmbot llvmbot added the clang Clang issues not falling into any other category label Nov 12, 2024
@llvmbot
Copy link

llvmbot commented Nov 12, 2024

@llvm/pr-subscribers-clang

@llvm/pr-subscribers-clang-static-analyzer-1

Author: Balazs Benics (steakhal)

Changes

Split from #114835


Full diff: https://github.com/llvm/llvm-project/pull/115917.diff

2 Files Affected:

  • (modified) clang/lib/StaticAnalyzer/Core/RegionStore.cpp (+44-9)
  • (modified) clang/test/Analysis/ctor-trivial-copy.cpp (+93-7)
diff --git a/clang/lib/StaticAnalyzer/Core/RegionStore.cpp b/clang/lib/StaticAnalyzer/Core/RegionStore.cpp
index 6bad9a93a30169..b54ba43ff42414 100644
--- a/clang/lib/StaticAnalyzer/Core/RegionStore.cpp
+++ b/clang/lib/StaticAnalyzer/Core/RegionStore.cpp
@@ -608,6 +608,12 @@ class RegionStoreManager : public StoreManager {
     return getBinding(getRegionBindings(S), L, T);
   }
 
+  /// Returns the value of the default binding of region \p BaseR
+  /// if and only if that is the unique binding in the cluster of \p BaseR.
+  /// \p BaseR must be a base region.
+  std::optional<SVal> getUniqueDefaultBinding(Store S,
+                                              const MemRegion *BaseR) const;
+
   std::optional<SVal> getDefaultBinding(Store S, const MemRegion *R) override {
     RegionBindingsRef B = getRegionBindings(S);
     // Default bindings are always applied over a base region so look up the
@@ -2336,20 +2342,16 @@ NonLoc RegionStoreManager::createLazyBinding(RegionBindingsConstRef B,
   return svalBuilder.makeLazyCompoundVal(StoreRef(B.asStore(), *this), R);
 }
 
-static bool isRecordEmpty(const RecordDecl *RD) {
-  if (!RD->field_empty())
-    return false;
-  if (const CXXRecordDecl *CRD = dyn_cast<CXXRecordDecl>(RD))
-    return CRD->getNumBases() == 0;
-  return true;
-}
-
 SVal RegionStoreManager::getBindingForStruct(RegionBindingsConstRef B,
                                              const TypedValueRegion *R) {
   const RecordDecl *RD = R->getValueType()->castAs<RecordType>()->getDecl();
-  if (!RD->getDefinition() || isRecordEmpty(RD))
+  if (!RD->getDefinition())
     return UnknownVal();
 
+  // We also create a LCV for copying empty structs because then the store
+  // behavior doesn't depend on the struct layout.
+  // This way even an empty struct can carry taint, no matter if creduce drops
+  // the last field member or not.
   return createLazyBinding(B, R);
 }
 
@@ -2609,9 +2611,42 @@ RegionBindingsRef RegionStoreManager::bindVector(RegionBindingsConstRef B,
   return NewB;
 }
 
+std::optional<SVal>
+RegionStoreManager::getUniqueDefaultBinding(Store S,
+                                            const MemRegion *BaseR) const {
+  assert(BaseR == BaseR->getBaseRegion() && "Expecting a base region");
+  const auto *Cluster = getRegionBindings(S).lookup(BaseR);
+  if (!Cluster || !llvm::hasSingleElement(*Cluster))
+    return std::nullopt;
+
+  const auto [Key, Value] = *Cluster->begin();
+  return Key.isDirect() ? std::optional<SVal>{} : Value;
+}
+
 std::optional<RegionBindingsRef> RegionStoreManager::tryBindSmallStruct(
     RegionBindingsConstRef B, const TypedValueRegion *R, const RecordDecl *RD,
     nonloc::LazyCompoundVal LCV) {
+  // If we try to copy a Conjured value representing the value of the whole
+  // struct, don't try to element-wise copy each field.
+  // That would unnecessarily bind Derived symbols slicing off the subregion for
+  // the field from the whole Conjured symbol.
+  //
+  //   struct Window { int width; int height; };
+  //   Window getWindow(); <-- opaque fn.
+  //   Window w = getWindow(); <-- conjures a new Window.
+  //   Window w2 = w; <-- trivial copy "w", calling "tryBindSmallStruct"
+  //
+  // We should not end up with a new Store for "w2" like this:
+  //   Direct [ 0..31]: Derived{Conj{}, w.width}
+  //   Direct [32..63]: Derived{Conj{}, w.height}
+  // Instead, we should just bind that Conjured value instead.
+  if (LCV.getRegion()->getBaseRegion() == LCV.getRegion()) {
+    if (auto Val = getUniqueDefaultBinding(LCV.getStore(), LCV.getRegion())) {
+      return B.addBinding(BindingKey::Make(R, BindingKey::Default),
+                          Val.value());
+    }
+  }
+
   FieldVector Fields;
 
   if (const CXXRecordDecl *Class = dyn_cast<CXXRecordDecl>(RD))
diff --git a/clang/test/Analysis/ctor-trivial-copy.cpp b/clang/test/Analysis/ctor-trivial-copy.cpp
index 5ed188aa8f1eae..ab623c1919e152 100644
--- a/clang/test/Analysis/ctor-trivial-copy.cpp
+++ b/clang/test/Analysis/ctor-trivial-copy.cpp
@@ -1,8 +1,12 @@
-// RUN: %clang_analyze_cc1 -analyzer-checker=core,debug.ExprInspection -analyzer-config c++-inlining=constructors -verify %s
+// RUN: %clang_analyze_cc1 -analyzer-checker=core,debug.ExprInspection -analyzer-config c++-inlining=constructors -verify %s \
+// RUN:   2>&1 | FileCheck %s
 
 
-template<typename T>
-void clang_analyzer_dump(T&);
+void clang_analyzer_printState();
+template<typename T> void clang_analyzer_dump_lref(T&);
+template<typename T> void clang_analyzer_dump_val(T);
+template <typename T> T conjure();
+template <typename... Ts> void nop(const Ts &... args) {}
 
 struct aggr {
   int x;
@@ -15,20 +19,102 @@ struct empty {
 void test_copy_return() {
   aggr s1 = {1, 2};
   aggr const& cr1 = aggr(s1);
-  clang_analyzer_dump(cr1); // expected-warning-re {{&lifetime_extended_object{aggr, cr1, S{{[0-9]+}}} }}
+  clang_analyzer_dump_lref(cr1); // expected-warning-re {{&lifetime_extended_object{aggr, cr1, S{{[0-9]+}}} }}
 
   empty s2;
   empty const& cr2 = empty{s2};
-  clang_analyzer_dump(cr2); // expected-warning-re {{&lifetime_extended_object{empty, cr2, S{{[0-9]+}}} }}
+  clang_analyzer_dump_lref(cr2); // expected-warning-re {{&lifetime_extended_object{empty, cr2, S{{[0-9]+}}} }}
 }
 
 void test_assign_return() {
   aggr s1 = {1, 2};
   aggr d1;
-  clang_analyzer_dump(d1 = s1); // expected-warning {{&d1 }}
+  clang_analyzer_dump_lref(d1 = s1); // expected-warning {{&d1 }}
 
   empty s2;
   empty d2;
-  clang_analyzer_dump(d2 = s2); // expected-warning {{&d2 }} was Unknown
+  clang_analyzer_dump_lref(d2 = s2); // expected-warning {{&d2 }} was Unknown
 }
 
+
+namespace trivial_struct_copy {
+
+void _01_empty_structs() {
+  clang_analyzer_dump_val(conjure<empty>()); // expected-warning {{lazyCompoundVal}}
+  empty Empty = conjure<empty>();
+  empty Empty2 = Empty;
+  empty Empty3 = Empty2;
+  // All of these should refer to the exact same LCV, because all of
+  // these trivial copies refer to the original conjured value.
+  // There were Unknown before:
+  clang_analyzer_dump_val(Empty);  // expected-warning {{lazyCompoundVal}}
+  clang_analyzer_dump_val(Empty2); // expected-warning {{lazyCompoundVal}}
+  clang_analyzer_dump_val(Empty3); // expected-warning {{lazyCompoundVal}}
+
+  // Notice that we don't have entries for the copies, only for the original "Empty" object.
+  // That binding was added by the opaque "conjure" call, directly constructing to the variable "Empty" by copy-elision.
+  // The copies are not present because the ExprEngine skips the evalBind of empty structs.
+  // And even if such binds would reach the Store, the Store copies small structs by copying the individual fields,
+  // and there are no fields within "Empty", thus we wouldn't have any entries anyways.
+  clang_analyzer_printState();
+  // CHECK:       "store": { "pointer": "0x{{[0-9a-f]+}}", "items": [
+  // CHECK-NEXT:    { "cluster": "GlobalInternalSpaceRegion", "pointer": "0x{{[0-9a-f]+}}", "items": [
+  // CHECK-NEXT:      { "kind": "Default", "offset": 0, "value": "conj_$
+  // CHECK-NEXT:    ]},
+  // CHECK-NEXT:    { "cluster": "GlobalSystemSpaceRegion", "pointer": "0x{{[0-9a-f]+}}", "items": [
+  // CHECK-NEXT:      { "kind": "Default", "offset": 0, "value": "conj_$
+  // CHECK-NEXT:    ]},
+  // CHECK-NEXT:    { "cluster": "Empty", "pointer": "0x{{[0-9a-f]+}}", "items": [
+  // CHECK-NEXT:      { "kind": "Default", "offset": 0, "value": "[[EMPTY_CONJ:conj_\$[0-9]+{int, LC[0-9]+, S[0-9]+, #[0-9]+}]]" }
+  // CHECK-NEXT:    ]}
+  // CHECK-NEXT:  ]},
+
+  nop(Empty, Empty2, Empty3);
+}
+
+void _02_structs_with_members() {
+  clang_analyzer_dump_val(conjure<aggr>()); // expected-warning {{lazyCompoundVal}}
+  aggr Aggr = conjure<aggr>();
+  aggr Aggr2 = Aggr;
+  aggr Aggr3 = Aggr2;
+  // All of these should refer to the exact same LCV, because all of
+  // these trivial copies refer to the original conjured value.
+  clang_analyzer_dump_val(Aggr);  // expected-warning {{lazyCompoundVal}}
+  clang_analyzer_dump_val(Aggr2); // expected-warning {{lazyCompoundVal}}
+  clang_analyzer_dump_val(Aggr3); // expected-warning {{lazyCompoundVal}}
+
+  // We should have the same Conjured symbol for "Aggr", "Aggr2" and "Aggr3".
+  // We used to have Derived symbols for the individual fields that were
+  // copied as part of copying the whole struct.
+  clang_analyzer_printState();
+  // CHECK:       "store": { "pointer": "0x{{[0-9a-f]+}}", "items": [
+  // CHECK-NEXT:    { "cluster": "GlobalInternalSpaceRegion", "pointer": "0x{{[0-9a-f]+}}", "items": [
+  // CHECK-NEXT:      { "kind": "Default", "offset": 0, "value": "conj_$
+  // CHECK-NEXT:    ]},
+  // CHECK-NEXT:    { "cluster": "GlobalSystemSpaceRegion", "pointer": "0x{{[0-9a-f]+}}", "items": [
+  // CHECK-NEXT:      { "kind": "Default", "offset": 0, "value": "conj_$
+  // CHECK-NEXT:    ]},
+  // CHECK-NEXT:    { "cluster": "Aggr", "pointer": "0x{{[0-9a-f]+}}", "items": [
+  // CHECK-NEXT:      { "kind": "Default", "offset": 0, "value": "[[AGGR_CONJ:conj_\$[0-9]+{int, LC[0-9]+, S[0-9]+, #[0-9]+}]]" }
+  // CHECK-NEXT:    ]},
+  // CHECK-NEXT:    { "cluster": "Aggr2", "pointer": "0x{{[0-9a-f]+}}", "items": [
+  // CHECK-NEXT:      { "kind": "Default", "offset": 0, "value": "[[AGGR_CONJ]]" }
+  // CHECK-NEXT:    ]},
+  // CHECK-NEXT:    { "cluster": "Aggr3", "pointer": "0x{{[0-9a-f]+}}", "items": [
+  // CHECK-NEXT:      { "kind": "Default", "offset": 0, "value": "[[AGGR_CONJ]]" }
+  // CHECK-NEXT:    ]}
+  // CHECK-NEXT:  ]},
+
+  nop(Aggr, Aggr2, Aggr3);
+}
+
+// Tests that use `clang_analyzer_printState()` must share the analysis entry
+// point, and have a strict ordering between. This is to meet the different
+// `clang_analyzer_printState()` calls in a fixed relative ordering, thus
+// FileCheck could check the stdouts.
+void entrypoint() {
+  _01_empty_structs();
+  _02_structs_with_members();
+}
+
+} // namespace trivial_struct_copy

Copy link
Contributor

@NagyDonat NagyDonat left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Overall LGTM, I added a few minor inline comments, but they are not blocking issues.

clang/lib/StaticAnalyzer/Core/RegionStore.cpp Outdated Show resolved Hide resolved
clang/lib/StaticAnalyzer/Core/RegionStore.cpp Outdated Show resolved Hide resolved
@steakhal steakhal merged commit 4610e5c into llvm:main Nov 14, 2024
5 of 7 checks passed
@steakhal steakhal deleted the bb/lcv-patches-2 branch November 14, 2024 15:28
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
clang:static analyzer clang Clang issues not falling into any other category
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants