microsoft · mjp41 · Dec 21, 2020 · Dec 14, 2020 · Dec 14, 2020 · Dec 14, 2020
diff --git a/src/mem/alloc.h b/src/mem/alloc.h
@@ -510,9 +510,19 @@ namespace snmalloc
 #endif
     }
 
-    size_t get_id()
+    /**
+     * Return this allocator's "truncated" ID, an integer useful as a hash
+     * value of this allocator.
+     *
+     * Specifically, this is the address of this allocator's message queue
+     * with the least significant bits missing, masked by SIZECLASS_MASK.
+     * This will be unique for Allocs with inline queues; Allocs with
+     * out-of-line queues must ensure that no two queues' addresses collide
+     * under this masking.
+     */
+    size_t get_trunc_id()
     {
-      return id();
+      return public_state()->trunc_id();
     }
 
   private:
@@ -571,30 +581,26 @@ namespace snmalloc
                                                  IsQueueInline>);
         constexpr size_t initial_shift =
           bits::next_pow2_bits_const(allocator_size);
+        static_assert(
+          initial_shift >= 8,
+          "Can't embed sizeclass_t into allocator ID low bits");
         SNMALLOC_ASSERT((initial_shift + (r * REMOTE_SLOT_BITS)) < 64);
         return (id >> (initial_shift + (r * REMOTE_SLOT_BITS))) & REMOTE_MASK;
       }
 
       SNMALLOC_FAST_PATH void
-      dealloc_sized(alloc_id_t target_id, void* p, size_t objectsize)
+      dealloc(alloc_id_t target_id, void* p, sizeclass_t sizeclass)
       {
-        this->capacity -= objectsize;
+        this->capacity -= sizeclass_to_size(sizeclass);
 
         Remote* r = static_cast<Remote*>(p);
-        r->set_target_id(target_id);
-        SNMALLOC_ASSERT(r->target_id() == target_id);
+        r->set_info(target_id, sizeclass);
 
         RemoteList* l = &list[get_slot(target_id, 0)];
         l->last->non_atomic_next = r;
         l->last = r;
       }
 
-      SNMALLOC_FAST_PATH void
-      dealloc(alloc_id_t target_id, void* p, sizeclass_t sizeclass)
-      {
-        dealloc_sized(target_id, p, sizeclass_to_size(sizeclass));
-      }
-
       void post(alloc_id_t id)
       {
         // When the cache gets big, post lists to their target allocators.
@@ -640,7 +646,7 @@ namespace snmalloc
           {
             // Use the next N bits to spread out remote deallocs in our own
             // slot.
-            size_t slot = get_slot(r->target_id(), post_round);
+            size_t slot = get_slot(r->trunc_target_id(), post_round);
             RemoteList* l = &list[slot];
             l->last->non_atomic_next = r;
             l->last = r;
@@ -694,11 +700,6 @@ namespace snmalloc
       }
     }
 
-    alloc_id_t id()
-    {
-      return public_state()->id();
-    }
-
     auto& message_queue()
     {
       return public_state()->message_queue;
@@ -725,9 +726,6 @@ namespace snmalloc
         remote_alloc = r;
       }
 
-      if (id() >= static_cast<alloc_id_t>(-1))
-        error("Id should not be -1");
-
       // If this is fake, don't do any of the bits of initialisation that may
       // allocate memory.
       if (isFake)
@@ -863,56 +861,40 @@ namespace snmalloc
       {
         error("Critical error: Out-of-memory during initialisation.");
       }
-      dummy->set_target_id(id());
+      dummy->set_info(get_trunc_id(), size_to_sizeclass_const(MIN_ALLOC_SIZE));
       message_queue().init(dummy);
     }
 
     SNMALLOC_FAST_PATH void handle_dealloc_remote(Remote* p)
     {
-      Superslab* super = Superslab::get(p);
+      if (likely(p->trunc_target_id() == get_trunc_id()))
+      {
+        // Destined for my slabs
+        Superslab* super = Superslab::get(p);
 
 #ifdef CHECK_CLIENT
-      if (p->target_id() != super->get_allocator()->id())
-        error("Detected memory corruption.  Potential use-after-free");
+        if (p->trunc_target_id() != (super->get_allocator()->trunc_id()))
+          error("Detected memory corruption.  Potential use-after-free");
 #endif
-      if (likely(super->get_kind() == Super))
-      {
-        Slab* slab = Metaslab::get_slab(p);
-        Metaslab& meta = super->get_meta(slab);
-        if (likely(p->target_id() == id()))
-        {
-          small_dealloc_offseted(super, p, meta.sizeclass);
-          return;
-        }
-      }
-      handle_dealloc_remote_slow(p);
-    }
+        // Guard against remote queues that have colliding IDs
+        SNMALLOC_ASSERT(super->get_allocator() == public_state());
 
-    SNMALLOC_SLOW_PATH void handle_dealloc_remote_slow(Remote* p)
-    {
-      Superslab* super = Superslab::get(p);
-      if (likely(super->get_kind() == Medium))
-      {
-        Mediumslab* slab = Mediumslab::get(p);
-        if (p->target_id() == id())
+        if (likely(p->sizeclass() < NUM_SMALL_CLASSES))
         {
-          sizeclass_t sizeclass = slab->get_sizeclass();
-          void* start = remove_cache_friendly_offset(p, sizeclass);
-          medium_dealloc(slab, start, sizeclass);
+          SNMALLOC_ASSERT(super->get_kind() == Super);
+          small_dealloc_offseted(super, p, p->sizeclass());
         }
         else
         {
-          // Queue for remote dealloc elsewhere.
-          remote.dealloc(p->target_id(), p, slab->get_sizeclass());
+          SNMALLOC_ASSERT(super->get_kind() == Medium);
+          void* start = remove_cache_friendly_offset(p, p->sizeclass());
+          medium_dealloc(Mediumslab::get(p), start, p->sizeclass());
         }
       }
       else
       {
-        SNMALLOC_ASSERT(likely(p->target_id() != id()));
-        Slab* slab = Metaslab::get_slab(p);
-        Metaslab& meta = super->get_meta(slab);
-        // Queue for remote dealloc elsewhere.
-        remote.dealloc(p->target_id(), p, meta.sizeclass);
+        // Merely routing
+        remote.dealloc(p->trunc_target_id(), p, p->sizeclass());
       }
     }
 
@@ -933,7 +915,7 @@ namespace snmalloc
         return;
 
       stats().remote_post();
-      remote.post(id());
+      remote.post(get_trunc_id());
     }
 
     /**
@@ -1468,17 +1450,16 @@ namespace snmalloc
     void remote_dealloc(RemoteAllocator* target, void* p, sizeclass_t sizeclass)
     {
       MEASURE_TIME(remote_dealloc, 4, 16);
-      SNMALLOC_ASSERT(target->id() != id());
+      SNMALLOC_ASSERT(target->trunc_id() != get_trunc_id());
 
       // Check whether this will overflow the cache first.  If we are a fake
       // allocator, then our cache will always be full and so we will never hit
       // this path.
-      size_t sz = sizeclass_to_size(sizeclass);
       if (remote.capacity > 0)
       {
         void* offseted = apply_cache_friendly_offset(p, sizeclass);
         stats().remote_free(sizeclass);
-        remote.dealloc_sized(target->id(), offseted, sz);
+        remote.dealloc(target->trunc_id(), offseted, sizeclass);
         return;
       }
 
@@ -1488,7 +1469,7 @@ namespace snmalloc
     SNMALLOC_SLOW_PATH void
     remote_dealloc_slow(RemoteAllocator* target, void* p, sizeclass_t sizeclass)
     {
-      SNMALLOC_ASSERT(target->id() != id());
+      SNMALLOC_ASSERT(target->trunc_id() != get_trunc_id());
 
       // Now that we've established that we're in the slow path (if we're a
       // real allocator, we will have to empty our cache now), check if we are
@@ -1506,10 +1487,10 @@ namespace snmalloc
 
       stats().remote_free(sizeclass);
       void* offseted = apply_cache_friendly_offset(p, sizeclass);
-      remote.dealloc(target->id(), offseted, sizeclass);
+      remote.dealloc(target->trunc_id(), offseted, sizeclass);
 
       stats().remote_post();
-      remote.post(id());
+      remote.post(get_trunc_id());
     }
 
     ChunkMap& chunkmap()

diff --git a/src/mem/globalalloc.h b/src/mem/globalalloc.h
@@ -126,7 +126,7 @@ namespace snmalloc
           if (alloc->remote.capacity < REMOTE_CACHE)
           {
             alloc->stats().remote_post();
-            alloc->remote.post(alloc->id());
+            alloc->remote.post(alloc->get_trunc_id());
             done = false;
           }
 

diff --git a/src/mem/remoteallocator.h b/src/mem/remoteallocator.h
@@ -22,16 +22,34 @@ namespace snmalloc
       std::atomic<Remote*> next{nullptr};
     };
 
-    alloc_id_t allocator_id;
+    /*
+     * We embed the size class in the bottom 8 bits of an allocator ID (i.e.,
+     * the address of an Alloc's remote_alloc's message_queue; in practice we
+     * only need 7 bits, but using 8 is conjectured to be faster).  The hashing
+     * algorithm of the Alloc's RemoteCache already ignores the bottom
+     * "initial_shift" bits, which is, in practice, well above 8.  There's a
+     * static_assert() over there that helps ensure this stays true.
+     *
+     * This does mean that we might have message_queues that always collide in
+     * the hash algorithm, if they're within "initial_shift" of each other. Such
+     * pairings will substantially decrease performance and so we prohibit them
+     * and use SNMALLOC_ASSERT to verify that they do not exist in debug builds.
+     */
+    alloc_id_t alloc_id_and_sizeclass;
 
-    void set_target_id(alloc_id_t id)
+    void set_info(alloc_id_t id, sizeclass_t sc)
     {
-      allocator_id = id;
+      alloc_id_and_sizeclass = (id & ~SIZECLASS_MASK) | sc;
     }
 
-    alloc_id_t target_id()
+    alloc_id_t trunc_target_id()
     {
-      return allocator_id;
+      return alloc_id_and_sizeclass & ~SIZECLASS_MASK;
+    }
+
+    sizeclass_t sizeclass()
+    {
+      return alloc_id_and_sizeclass & SIZECLASS_MASK;
     }
   };
 
@@ -46,10 +64,11 @@ namespace snmalloc
     // is read by other threads.
     alignas(CACHELINE_SIZE) MPSCQ<Remote> message_queue;
 
-    alloc_id_t id()
+    alloc_id_t trunc_id()
     {
       return static_cast<alloc_id_t>(
-        reinterpret_cast<uintptr_t>(&message_queue));
+               reinterpret_cast<uintptr_t>(&message_queue)) &
+        ~SIZECLASS_MASK;
     }
   };
 } // namespace snmalloc
diff --git a/src/mem/sizeclass.h b/src/mem/sizeclass.h
@@ -11,6 +11,8 @@ namespace snmalloc
   //  using sizeclass_t = uint8_t;
   using sizeclass_compress_t = uint8_t;
 
+  constexpr static uintptr_t SIZECLASS_MASK = 0xFF;
+
   constexpr static uint16_t get_initial_offset(sizeclass_t sc, bool is_short);
   constexpr static size_t sizeclass_to_size(sizeclass_t sizeclass);
   constexpr static size_t