Merged main:ea2036e1e56b into amd-gfx:e1b9ddbd68c9

Local branch amd-gfx e1b9ddb Merged main:4f98fb2e937a into amd-gfx:5c2235853bc1 Remote branch main ea2036e [scudo] Fix the use of ASSERT_CAPABILITY in TSD (llvm#68273)
GPUOpen-Drivers · Oct 5, 2023 · bedba19 · bedba19
2 parents e1b9ddb + ea2036e
commit bedba19
Show file tree

Hide file tree

Showing 27 changed files with 541 additions and 142 deletions.
diff --git a/compiler-rt/lib/scudo/standalone/combined.h b/compiler-rt/lib/scudo/standalone/combined.h
@@ -245,12 +245,14 @@ class Allocator {
   // - unlinking the local stats from the global ones (destroying the cache does
   //   the last two items).
   void commitBack(TSD<ThisT> *TSD) {
+    TSD->assertLocked(/*BypassCheck=*/true);
     Quarantine.drain(&TSD->getQuarantineCache(),
                      QuarantineCallback(*this, TSD->getCache()));
     TSD->getCache().destroy(&Stats);
   }
 
   void drainCache(TSD<ThisT> *TSD) {
+    TSD->assertLocked(/*BypassCheck=*/true);
     Quarantine.drainAndRecycle(&TSD->getQuarantineCache(),
                                QuarantineCallback(*this, TSD->getCache()));
     TSD->getCache().drain();
@@ -363,6 +365,7 @@ class Allocator {
       DCHECK_NE(ClassId, 0U);
       bool UnlockRequired;
       auto *TSD = TSDRegistry.getTSDAndLock(&UnlockRequired);
+      TSD->assertLocked(/*BypassCheck=*/!UnlockRequired);
       Block = TSD->getCache().allocate(ClassId);
       // If the allocation failed, the most likely reason with a 32-bit primary
       // is the region being full. In that event, retry in each successively
@@ -1147,6 +1150,7 @@ class Allocator {
       if (LIKELY(ClassId)) {
         bool UnlockRequired;
         auto *TSD = TSDRegistry.getTSDAndLock(&UnlockRequired);
+        TSD->assertLocked(/*BypassCheck=*/!UnlockRequired);
         const bool CacheDrained =
             TSD->getCache().deallocate(ClassId, BlockBegin);
         if (UnlockRequired)
@@ -1166,6 +1170,7 @@ class Allocator {
     } else {
       bool UnlockRequired;
       auto *TSD = TSDRegistry.getTSDAndLock(&UnlockRequired);
+      TSD->assertLocked(/*BypassCheck=*/!UnlockRequired);
       Quarantine.put(&TSD->getQuarantineCache(),
                      QuarantineCallback(*this, TSD->getCache()), Ptr, Size);
       if (UnlockRequired)

diff --git a/compiler-rt/lib/scudo/standalone/tests/combined_test.cpp b/compiler-rt/lib/scudo/standalone/tests/combined_test.cpp
@@ -512,6 +512,7 @@ SCUDO_TYPED_TEST(ScudoCombinedTest, CacheDrain) NO_THREAD_SAFETY_ANALYSIS {
 
   bool UnlockRequired;
   auto *TSD = Allocator->getTSDRegistry()->getTSDAndLock(&UnlockRequired);
+  TSD->assertLocked(/*BypassCheck=*/!UnlockRequired);
   EXPECT_TRUE(!TSD->getCache().isEmpty());
   TSD->getCache().drain();
   EXPECT_TRUE(TSD->getCache().isEmpty());
@@ -536,6 +537,7 @@ SCUDO_TYPED_TEST(ScudoCombinedTest, ForceCacheDrain) NO_THREAD_SAFETY_ANALYSIS {
 
   bool UnlockRequired;
   auto *TSD = Allocator->getTSDRegistry()->getTSDAndLock(&UnlockRequired);
+  TSD->assertLocked(/*BypassCheck=*/!UnlockRequired);
   EXPECT_TRUE(TSD->getCache().isEmpty());
   EXPECT_EQ(TSD->getQuarantineCache().getSize(), 0U);
   EXPECT_TRUE(Allocator->getQuarantine()->isEmpty());
@@ -842,6 +844,7 @@ TEST(ScudoCombinedTest, BasicTrustyConfig) {
 
   bool UnlockRequired;
   auto *TSD = Allocator->getTSDRegistry()->getTSDAndLock(&UnlockRequired);
+  TSD->assertLocked(/*BypassCheck=*/!UnlockRequired);
   TSD->getCache().drain();
 
   Allocator->releaseToOS(scudo::ReleaseToOS::Force);

diff --git a/compiler-rt/lib/scudo/standalone/tests/tsd_test.cpp b/compiler-rt/lib/scudo/standalone/tests/tsd_test.cpp
@@ -101,13 +101,15 @@ template <class AllocatorT> static void testRegistry() {
 
   bool UnlockRequired;
   auto TSD = Registry->getTSDAndLock(&UnlockRequired);
+  TSD->assertLocked(/*BypassCheck=*/!UnlockRequired);
   EXPECT_NE(TSD, nullptr);
   EXPECT_EQ(TSD->getCache().Canary, 0U);
   if (UnlockRequired)
     TSD->unlock();
 
   Registry->initThreadMaybe(Allocator.get(), /*MinimalInit=*/false);
   TSD = Registry->getTSDAndLock(&UnlockRequired);
+  TSD->assertLocked(/*BypassCheck=*/!UnlockRequired);
   EXPECT_NE(TSD, nullptr);
   EXPECT_EQ(TSD->getCache().Canary, 0U);
   memset(&TSD->getCache(), 0x42, sizeof(TSD->getCache()));
@@ -137,6 +139,7 @@ template <typename AllocatorT> static void stressCache(AllocatorT *Allocator) {
   Registry->initThreadMaybe(Allocator, /*MinimalInit=*/false);
   bool UnlockRequired;
   auto TSD = Registry->getTSDAndLock(&UnlockRequired);
+  TSD->assertLocked(/*BypassCheck=*/!UnlockRequired);
   EXPECT_NE(TSD, nullptr);
   // For an exclusive TSD, the cache should be empty. We cannot guarantee the
   // same for a shared TSD.
@@ -195,6 +198,7 @@ static void stressSharedRegistry(MockAllocator<SharedCaches> *Allocator) {
   bool UnlockRequired;
   for (scudo::uptr I = 0; I < 4096U; I++) {
     auto TSD = Registry->getTSDAndLock(&UnlockRequired);
+    TSD->assertLocked(/*BypassCheck=*/!UnlockRequired);
     EXPECT_NE(TSD, nullptr);
     Set.insert(reinterpret_cast<void *>(TSD));
     if (UnlockRequired)

diff --git a/compiler-rt/lib/scudo/standalone/tsd.h b/compiler-rt/lib/scudo/standalone/tsd.h
@@ -53,8 +53,14 @@ template <class Allocator> struct alignas(SCUDO_CACHE_LINE_SIZE) TSD {
   inline void unlock() NO_THREAD_SAFETY_ANALYSIS { Mutex.unlock(); }
   inline uptr getPrecedence() { return atomic_load_relaxed(&Precedence); }
 
-  void commitBack(Allocator *Instance) ASSERT_CAPABILITY(Mutex) {
-    Instance->commitBack(this);
+  void commitBack(Allocator *Instance) { Instance->commitBack(this); }
+
+  // As the comments attached to `getCache()`, the TSD doesn't always need to be
+  // locked. In that case, we would only skip the check before we have all TSDs
+  // locked in all paths.
+  void assertLocked(bool BypassCheck) ASSERT_CAPABILITY(Mutex) {
+    if (SCUDO_DEBUG && !BypassCheck)
+      Mutex.assertHeld();
   }
 
   // Ideally, we may want to assert that all the operations on
@@ -66,11 +72,8 @@ template <class Allocator> struct alignas(SCUDO_CACHE_LINE_SIZE) TSD {
   // TODO(chiahungduan): Ideally, we want to do `Mutex.assertHeld` but acquiring
   // TSD doesn't always require holding the lock. Add this assertion while the
   // lock is always acquired.
-  typename Allocator::CacheT &getCache() ASSERT_CAPABILITY(Mutex) {
-    return Cache;
-  }
-  typename Allocator::QuarantineCacheT &getQuarantineCache()
-      ASSERT_CAPABILITY(Mutex) {
+  typename Allocator::CacheT &getCache() REQUIRES(Mutex) { return Cache; }
+  typename Allocator::QuarantineCacheT &getQuarantineCache() REQUIRES(Mutex) {
     return QuarantineCache;
   }
 

diff --git a/compiler-rt/lib/scudo/standalone/tsd_shared.h b/compiler-rt/lib/scudo/standalone/tsd_shared.h
@@ -120,6 +120,11 @@ struct TSDRegistrySharedT {
                 TSDsArraySize);
     for (uptr I = 0; I < NumberOfTSDs; ++I) {
       TSDs[I].lock();
+      // Theoretically, we want to mark TSD::lock()/TSD::unlock() with proper
+      // thread annotations. However, given the TSD is only locked on shared
+      // path, do the assertion in a separate path to avoid confusing the
+      // analyzer.
+      TSDs[I].assertLocked(/*BypassCheck=*/true);
       Str->append("  Shared TSD[%zu]:\n", I);
       TSDs[I].getCache().getStats(Str);
       TSDs[I].unlock();

diff --git a/llvm/include/llvm/BinaryFormat/DXContainer.h b/llvm/include/llvm/BinaryFormat/DXContainer.h
@@ -453,7 +453,7 @@ struct ProgramSignatureHeader {
 
   void swapBytes() {
     sys::swapByteOrder(ParamCount);
-    sys::swapByteOrder(ParamCount);
+    sys::swapByteOrder(FirstParamOffset);
   }
 };
 

diff --git a/llvm/include/llvm/CodeGen/AccelTable.h b/llvm/include/llvm/CodeGen/AccelTable.h
@@ -106,6 +106,7 @@ namespace llvm {
 class AsmPrinter;
 class DwarfCompileUnit;
 class DwarfDebug;
+class DwarfTypeUnit;
 class MCSymbol;
 class raw_ostream;
 
@@ -197,6 +198,9 @@ template <typename DataT> class AccelTable : public AccelTableBase {
 
   template <typename... Types>
   void addName(DwarfStringPoolEntryRef Name, Types &&... Args);
+  void clear() { Entries.clear(); }
+  void addEntries(AccelTable<DataT> &Table);
+  const StringEntries getEntries() const { return Entries; }
 };
 
 template <typename AccelTableDataT>
@@ -215,6 +219,16 @@ void AccelTable<AccelTableDataT>::addName(DwarfStringPoolEntryRef Name,
                           AccelTableDataT(std::forward<Types>(Args)...));
 }
 
+template <typename AccelTableDataT>
+void AccelTable<AccelTableDataT>::addEntries(
+    AccelTable<AccelTableDataT> &Table) {
+  for (auto &Entry : Table.getEntries()) {
+    for (AccelTableData *Value : Entry.second.Values)
+      addName(Entry.second.Name,
+              static_cast<AccelTableDataT *>(Value)->getDie());
+  }
+}
+
 /// A base class for different implementations of Data classes for Apple
 /// Accelerator Tables. The columns in the table are defined by the static Atoms
 /// variable defined on the subclasses.
@@ -250,6 +264,10 @@ class AppleAccelTableData : public AccelTableData {
 /// emitDWARF5AccelTable function.
 class DWARF5AccelTableData : public AccelTableData {
 public:
+  struct AttributeEncoding {
+    dwarf::Index Index;
+    dwarf::Form Form;
+  };
   static uint32_t hash(StringRef Name) { return caseFoldingDjbHash(Name); }
 
   DWARF5AccelTableData(const DIE &Die) : Die(Die) {}
@@ -309,17 +327,20 @@ void emitAppleAccelTable(AsmPrinter *Asm, AccelTable<DataT> &Contents,
 void emitDWARF5AccelTable(AsmPrinter *Asm,
                           AccelTable<DWARF5AccelTableData> &Contents,
                           const DwarfDebug &DD,
-                          ArrayRef<std::unique_ptr<DwarfCompileUnit>> CUs);
-
+                          ArrayRef<std::unique_ptr<DwarfCompileUnit>> CUs,
+                          ArrayRef<std::unique_ptr<DwarfTypeUnit>> TUs);
+using GetIndexForEntryReturnType =
+    std::optional<std::pair<unsigned, DWARF5AccelTableData::AttributeEncoding>>;
 /// Emit a DWARFv5 Accelerator Table consisting of entries in the specified
 /// AccelTable. The \p CUs contains either symbols keeping offsets to the
 /// start of compilation unit, either offsets to the start of compilation
 /// unit themselves.
-void emitDWARF5AccelTable(
-    AsmPrinter *Asm, AccelTable<DWARF5AccelTableStaticData> &Contents,
-    ArrayRef<std::variant<MCSymbol *, uint64_t>> CUs,
-    llvm::function_ref<unsigned(const DWARF5AccelTableStaticData &)>
-        getCUIndexForEntry);
+void emitDWARF5AccelTable(AsmPrinter *Asm,
+                          AccelTable<DWARF5AccelTableStaticData> &Contents,
+                          ArrayRef<std::variant<MCSymbol *, uint64_t>> CUs,
+                          llvm::function_ref<GetIndexForEntryReturnType(
+                              const DWARF5AccelTableStaticData &)>
+                              getIndexForEntry);
 
 /// Accelerator table data implementation for simple Apple accelerator tables
 /// with just a DIE reference.

diff --git a/llvm/include/llvm/Config/llvm-config.h.cmake b/llvm/include/llvm/Config/llvm-config.h.cmake
@@ -16,7 +16,7 @@
 
 /* Indicate that this is LLVM compiled from the amd-gfx branch. */
 #define LLVM_HAVE_BRANCH_AMD_GFX
-#define LLVM_MAIN_REVISION 476916
+#define LLVM_MAIN_REVISION 476920
 
 /* Define if LLVM_ENABLE_DUMP is enabled */
 #cmakedefine LLVM_ENABLE_DUMP