-
Notifications
You must be signed in to change notification settings - Fork 112
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Improve slow path performance for allocation (#143)
* Remote dealloc refactor. * Improve remote dealloc Change remote to count down to 0, so fast path does not need a constant. Use signed value so that branch does not depend on addition. * Inline remote_dealloc The fast path of remote_dealloc is sufficiently compact that it can be inlined. * Improve fast path in Slab::alloc Turn the internal structure into tail calls, to improve fast path. Should be no algorithmic changes. * Refactor initialisation to help fast path. Break lazy initialisation into two functions, so it is easier to codegen fast paths. * Minor tidy to statically sized dealloc. * Refactor semi-slow path for alloc Make the backup path a bit faster. Only algorithmic change is to delay checking for first allocation. Otherwise, should be unchanged. * Test initial operation of a thread The first operation a new thread takes is special. It results in allocating an allocator, and swinging it into the TLS. This makes this a very special path, that is rarely tested. This test generates a lot of threads to cover the first alloc and dealloc operations. * Correctly handle reusing get_noncachable * Fix large alloc stats Large alloc stats aren't necessarily balanced on a thread, this changes to tracking individual pushs and pops, rather than the net effect (with an unsigned value). * Fix TLS init on large alloc path * Add Bump ptrs to allocator Each allocator has a bump ptr for each size class. This is no longer slab local. Slabs that haven't been fully allocated no longer need to be in the DLL for this sizeclass. * Change to a cycle non-empty list This change reduces the branching in the case of finding a new free list. Using a non-empty cyclic list enables branch free add, and a single branch in remove to detect the empty case. * Update differences * Rename first allocation Use needs initialisation as makes more sense for other scenarios. * Use a ptrdiff to help with zero init. * Make GlobalPlaceholder zero init The GlobalPlaceholder allocator is now a zero init block of memory. This removes various issues for when things are initialised. It is made read-only to we detect write to it on some platforms.
- Loading branch information
Showing
20 changed files
with
688 additions
and
237 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,122 @@ | ||
#pragma once | ||
|
||
#include "defines.h" | ||
|
||
#include <cstdint> | ||
#include <type_traits> | ||
|
||
namespace snmalloc | ||
{ | ||
/** | ||
* Special class for cyclic doubly linked non-empty linked list | ||
* | ||
* This code assumes there is always one element in the list. The client | ||
* must ensure there is a sentinal element. | ||
*/ | ||
class CDLLNode | ||
{ | ||
/** | ||
* to_next is used to handle a zero initialised data structure. | ||
* This means that `is_empty` works even when the constructor hasn't | ||
* been run. | ||
*/ | ||
ptrdiff_t to_next = 0; | ||
|
||
// TODO: CHERI will need a real pointer too | ||
// CDLLNode* next = nullptr; | ||
CDLLNode* prev = nullptr; | ||
|
||
void set_next(CDLLNode* c) | ||
{ | ||
// TODO: CHERI will need a real pointer too | ||
// next = c; | ||
to_next = pointer_diff_signed(this, c); | ||
} | ||
|
||
public: | ||
/** | ||
* Single element cyclic list. This is the empty case. | ||
*/ | ||
CDLLNode() | ||
{ | ||
set_next(this); | ||
prev = this; | ||
} | ||
|
||
SNMALLOC_FAST_PATH bool is_empty() | ||
{ | ||
return to_next == 0; | ||
} | ||
|
||
/** | ||
* Removes this element from the cyclic list is it part of. | ||
*/ | ||
SNMALLOC_FAST_PATH void remove() | ||
{ | ||
SNMALLOC_ASSERT(!is_empty()); | ||
debug_check(); | ||
get_next()->prev = prev; | ||
prev->set_next(get_next()); | ||
// As this is no longer in the list, check invariant for | ||
// neighbouring element. | ||
get_next()->debug_check(); | ||
|
||
#ifndef NDEBUG | ||
set_next(nullptr); | ||
prev = nullptr; | ||
#endif | ||
} | ||
|
||
SNMALLOC_FAST_PATH CDLLNode* get_next() | ||
{ | ||
// TODO: CHERI will require a real pointer | ||
// return next; | ||
return pointer_offset_signed(this, to_next); | ||
} | ||
|
||
SNMALLOC_FAST_PATH CDLLNode* get_prev() | ||
{ | ||
return prev; | ||
} | ||
|
||
SNMALLOC_FAST_PATH void insert_next(CDLLNode* item) | ||
{ | ||
debug_check(); | ||
item->set_next(get_next()); | ||
get_next()->prev = item; | ||
item->prev = this; | ||
set_next(item); | ||
debug_check(); | ||
} | ||
|
||
SNMALLOC_FAST_PATH void insert_prev(CDLLNode* item) | ||
{ | ||
debug_check(); | ||
item->prev = prev; | ||
prev->set_next(item); | ||
item->set_next(this); | ||
prev = item; | ||
debug_check(); | ||
} | ||
|
||
/** | ||
* Checks the lists invariants | ||
* x->next->prev = x | ||
* for all x in the list. | ||
*/ | ||
void debug_check() | ||
{ | ||
#ifndef NDEBUG | ||
CDLLNode* item = get_next(); | ||
CDLLNode* p = this; | ||
|
||
do | ||
{ | ||
SNMALLOC_ASSERT(item->prev == p); | ||
p = item; | ||
item = item->get_next(); | ||
} while (item != this); | ||
#endif | ||
} | ||
}; | ||
} // namespace snmalloc |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.