Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CHERI prep: stave off amplification for Remote routing #266

Merged
merged 3 commits into from
Dec 21, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
103 changes: 42 additions & 61 deletions src/mem/alloc.h
Original file line number Diff line number Diff line change
Expand Up @@ -510,9 +510,19 @@ namespace snmalloc
#endif
}

size_t get_id()
/**
* Return this allocator's "truncated" ID, an integer useful as a hash
* value of this allocator.
*
* Specifically, this is the address of this allocator's message queue
* with the least significant bits missing, masked by SIZECLASS_MASK.
* This will be unique for Allocs with inline queues; Allocs with
* out-of-line queues must ensure that no two queues' addresses collide
* under this masking.
*/
size_t get_trunc_id()
{
return id();
return public_state()->trunc_id();
}

private:
Expand Down Expand Up @@ -571,30 +581,26 @@ namespace snmalloc
IsQueueInline>);
constexpr size_t initial_shift =
bits::next_pow2_bits_const(allocator_size);
static_assert(
initial_shift >= 8,
"Can't embed sizeclass_t into allocator ID low bits");
SNMALLOC_ASSERT((initial_shift + (r * REMOTE_SLOT_BITS)) < 64);
return (id >> (initial_shift + (r * REMOTE_SLOT_BITS))) & REMOTE_MASK;
}

SNMALLOC_FAST_PATH void
dealloc_sized(alloc_id_t target_id, void* p, size_t objectsize)
dealloc(alloc_id_t target_id, void* p, sizeclass_t sizeclass)
{
this->capacity -= objectsize;
this->capacity -= sizeclass_to_size(sizeclass);

Remote* r = static_cast<Remote*>(p);
r->set_target_id(target_id);
SNMALLOC_ASSERT(r->target_id() == target_id);
r->set_info(target_id, sizeclass);

RemoteList* l = &list[get_slot(target_id, 0)];
l->last->non_atomic_next = r;
l->last = r;
}

SNMALLOC_FAST_PATH void
dealloc(alloc_id_t target_id, void* p, sizeclass_t sizeclass)
{
dealloc_sized(target_id, p, sizeclass_to_size(sizeclass));
}

void post(alloc_id_t id)
{
// When the cache gets big, post lists to their target allocators.
Expand Down Expand Up @@ -640,7 +646,7 @@ namespace snmalloc
{
// Use the next N bits to spread out remote deallocs in our own
// slot.
size_t slot = get_slot(r->target_id(), post_round);
size_t slot = get_slot(r->trunc_target_id(), post_round);
RemoteList* l = &list[slot];
l->last->non_atomic_next = r;
l->last = r;
Expand Down Expand Up @@ -694,11 +700,6 @@ namespace snmalloc
}
}

alloc_id_t id()
{
return public_state()->id();
}

auto& message_queue()
{
return public_state()->message_queue;
Expand All @@ -725,9 +726,6 @@ namespace snmalloc
remote_alloc = r;
}

if (id() >= static_cast<alloc_id_t>(-1))
error("Id should not be -1");

// If this is fake, don't do any of the bits of initialisation that may
// allocate memory.
if (isFake)
Expand Down Expand Up @@ -863,56 +861,40 @@ namespace snmalloc
{
error("Critical error: Out-of-memory during initialisation.");
}
dummy->set_target_id(id());
dummy->set_info(get_trunc_id(), size_to_sizeclass_const(MIN_ALLOC_SIZE));
message_queue().init(dummy);
}

SNMALLOC_FAST_PATH void handle_dealloc_remote(Remote* p)
{
Superslab* super = Superslab::get(p);
if (likely(p->trunc_target_id() == get_trunc_id()))
{
// Destined for my slabs
Superslab* super = Superslab::get(p);

#ifdef CHECK_CLIENT
if (p->target_id() != super->get_allocator()->id())
error("Detected memory corruption. Potential use-after-free");
if (p->trunc_target_id() != (super->get_allocator()->trunc_id()))
error("Detected memory corruption. Potential use-after-free");
#endif
if (likely(super->get_kind() == Super))
{
Slab* slab = Metaslab::get_slab(p);
Metaslab& meta = super->get_meta(slab);
if (likely(p->target_id() == id()))
{
small_dealloc_offseted(super, p, meta.sizeclass);
return;
}
}
handle_dealloc_remote_slow(p);
}
// Guard against remote queues that have colliding IDs
SNMALLOC_ASSERT(super->get_allocator() == public_state());

SNMALLOC_SLOW_PATH void handle_dealloc_remote_slow(Remote* p)
{
Superslab* super = Superslab::get(p);
if (likely(super->get_kind() == Medium))
{
Mediumslab* slab = Mediumslab::get(p);
if (p->target_id() == id())
if (likely(p->sizeclass() < NUM_SMALL_CLASSES))
{
sizeclass_t sizeclass = slab->get_sizeclass();
void* start = remove_cache_friendly_offset(p, sizeclass);
medium_dealloc(slab, start, sizeclass);
SNMALLOC_ASSERT(super->get_kind() == Super);
small_dealloc_offseted(super, p, p->sizeclass());
}
else
{
// Queue for remote dealloc elsewhere.
remote.dealloc(p->target_id(), p, slab->get_sizeclass());
SNMALLOC_ASSERT(super->get_kind() == Medium);
void* start = remove_cache_friendly_offset(p, p->sizeclass());
medium_dealloc(Mediumslab::get(p), start, p->sizeclass());
}
}
else
{
SNMALLOC_ASSERT(likely(p->target_id() != id()));
Slab* slab = Metaslab::get_slab(p);
Metaslab& meta = super->get_meta(slab);
// Queue for remote dealloc elsewhere.
remote.dealloc(p->target_id(), p, meta.sizeclass);
// Merely routing
remote.dealloc(p->trunc_target_id(), p, p->sizeclass());
}
}

Expand All @@ -933,7 +915,7 @@ namespace snmalloc
return;

stats().remote_post();
remote.post(id());
remote.post(get_trunc_id());
}

/**
Expand Down Expand Up @@ -1468,17 +1450,16 @@ namespace snmalloc
void remote_dealloc(RemoteAllocator* target, void* p, sizeclass_t sizeclass)
{
MEASURE_TIME(remote_dealloc, 4, 16);
SNMALLOC_ASSERT(target->id() != id());
SNMALLOC_ASSERT(target->trunc_id() != get_trunc_id());

// Check whether this will overflow the cache first. If we are a fake
// allocator, then our cache will always be full and so we will never hit
// this path.
size_t sz = sizeclass_to_size(sizeclass);
if (remote.capacity > 0)
{
void* offseted = apply_cache_friendly_offset(p, sizeclass);
stats().remote_free(sizeclass);
remote.dealloc_sized(target->id(), offseted, sz);
remote.dealloc(target->trunc_id(), offseted, sizeclass);
return;
}

Expand All @@ -1488,7 +1469,7 @@ namespace snmalloc
SNMALLOC_SLOW_PATH void
remote_dealloc_slow(RemoteAllocator* target, void* p, sizeclass_t sizeclass)
{
SNMALLOC_ASSERT(target->id() != id());
SNMALLOC_ASSERT(target->trunc_id() != get_trunc_id());

// Now that we've established that we're in the slow path (if we're a
// real allocator, we will have to empty our cache now), check if we are
Expand All @@ -1506,10 +1487,10 @@ namespace snmalloc

stats().remote_free(sizeclass);
void* offseted = apply_cache_friendly_offset(p, sizeclass);
remote.dealloc(target->id(), offseted, sizeclass);
remote.dealloc(target->trunc_id(), offseted, sizeclass);

stats().remote_post();
remote.post(id());
remote.post(get_trunc_id());
}

ChunkMap& chunkmap()
Expand Down
2 changes: 1 addition & 1 deletion src/mem/globalalloc.h
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ namespace snmalloc
if (alloc->remote.capacity < REMOTE_CACHE)
{
alloc->stats().remote_post();
alloc->remote.post(alloc->id());
alloc->remote.post(alloc->get_trunc_id());
done = false;
}

Expand Down
33 changes: 26 additions & 7 deletions src/mem/remoteallocator.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,16 +22,34 @@ namespace snmalloc
std::atomic<Remote*> next{nullptr};
};

alloc_id_t allocator_id;
/*
* We embed the size class in the bottom 8 bits of an allocator ID (i.e.,
* the address of an Alloc's remote_alloc's message_queue; in practice we
* only need 7 bits, but using 8 is conjectured to be faster). The hashing
* algorithm of the Alloc's RemoteCache already ignores the bottom
* "initial_shift" bits, which is, in practice, well above 8. There's a
* static_assert() over there that helps ensure this stays true.
*
* This does mean that we might have message_queues that always collide in
* the hash algorithm, if they're within "initial_shift" of each other. Such
* pairings will substantially decrease performance and so we prohibit them
* and use SNMALLOC_ASSERT to verify that they do not exist in debug builds.
*/
alloc_id_t alloc_id_and_sizeclass;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can this be a class type? We have some helpers for bitfield extracts, so we could easily provide an abstract interface for getting the two bits.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So this is pretty contained in a single class already adding another layer of abstraction probably wouldn't buy us much.


void set_target_id(alloc_id_t id)
void set_info(alloc_id_t id, sizeclass_t sc)
{
allocator_id = id;
alloc_id_and_sizeclass = (id & ~SIZECLASS_MASK) | sc;
}

alloc_id_t target_id()
alloc_id_t trunc_target_id()
{
return allocator_id;
return alloc_id_and_sizeclass & ~SIZECLASS_MASK;
}

sizeclass_t sizeclass()
{
return alloc_id_and_sizeclass & SIZECLASS_MASK;
}
};

Expand All @@ -46,10 +64,11 @@ namespace snmalloc
// is read by other threads.
alignas(CACHELINE_SIZE) MPSCQ<Remote> message_queue;

alloc_id_t id()
alloc_id_t trunc_id()
{
return static_cast<alloc_id_t>(
reinterpret_cast<uintptr_t>(&message_queue));
reinterpret_cast<uintptr_t>(&message_queue)) &
~SIZECLASS_MASK;
}
};
} // namespace snmalloc
2 changes: 2 additions & 0 deletions src/mem/sizeclass.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ namespace snmalloc
// using sizeclass_t = uint8_t;
using sizeclass_compress_t = uint8_t;

constexpr static uintptr_t SIZECLASS_MASK = 0xFF;

constexpr static uint16_t get_initial_offset(sizeclass_t sc, bool is_short);
constexpr static size_t sizeclass_to_size(sizeclass_t sizeclass);
constexpr static size_t
Expand Down