Skip to content

Commit

Permalink
[Core] Improve CowData and Memory metadata alignment.
Browse files Browse the repository at this point in the history
  • Loading branch information
bruvzg committed Feb 5, 2024
1 parent 36847f6 commit b173a4d
Show file tree
Hide file tree
Showing 3 changed files with 105 additions and 52 deletions.
46 changes: 34 additions & 12 deletions include/godot_cpp/core/memory.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,6 @@

#include <type_traits>

#ifndef PAD_ALIGN
#define PAD_ALIGN 16 //must always be greater than this at much
#endif

// p_dummy argument is added to avoid conflicts with the engine functions when both engine and GDExtension are built as a static library on iOS.
void *operator new(size_t p_size, const char *p_dummy, const char *p_description); ///< operator new that takes a description and uses MemoryStaticPool
void *operator new(size_t p_size, const char *p_dummy, void *(*p_allocfunc)(size_t p_size)); ///< operator new that takes a description and uses MemoryStaticPool
Expand All @@ -69,6 +65,18 @@ class Memory {
Memory();

public:
// Alignment: ↓ max_align_t ↓ uint64_t ↓ max_align_t
// ┌─────────────────┬──┬────────────────┬──┬───────────...
// │ uint64_t │░░│ uint64_t │░░│ T[]
// │ alloc size │░░│ element count │░░│ data
// └─────────────────┴──┴────────────────┴──┴───────────...
// Offset: ↑ SIZE_OFFSET ↑ ELEMENT_OFFSET ↑ DATA_OFFSET
// Note: "alloc size" is used and set by the engine and is never accessed or changed for the extension.

static constexpr size_t SIZE_OFFSET = 0;
static constexpr size_t ELEMENT_OFFSET = ((SIZE_OFFSET + sizeof(uint64_t)) % alignof(uint64_t) == 0) ? (SIZE_OFFSET + sizeof(uint64_t)) : ((SIZE_OFFSET + sizeof(uint64_t)) + alignof(uint64_t) - ((SIZE_OFFSET + sizeof(uint64_t)) % alignof(uint64_t)));
static constexpr size_t DATA_OFFSET = ((ELEMENT_OFFSET + sizeof(uint64_t)) % alignof(max_align_t) == 0) ? (ELEMENT_OFFSET + sizeof(uint64_t)) : ((ELEMENT_OFFSET + sizeof(uint64_t)) + alignof(max_align_t) - ((ELEMENT_OFFSET + sizeof(uint64_t)) % alignof(max_align_t)));

static void *alloc_static(size_t p_bytes, bool p_pad_align = false);
static void *realloc_static(void *p_memory, size_t p_bytes, bool p_pad_align = false);
static void free_static(void *p_ptr, bool p_pad_align = false);
Expand Down Expand Up @@ -99,7 +107,7 @@ struct Comparator {

template <class T>
void memdelete(T *p_class, typename std::enable_if<!std::is_base_of_v<godot::Wrapped, T>>::type * = nullptr) {
if (!std::is_trivially_destructible<T>::value) {
if constexpr (!std::is_trivially_destructible_v<T>) {
p_class->~T();
}

Expand All @@ -113,7 +121,7 @@ void memdelete(T *p_class) {

template <class T, class A>
void memdelete_allocator(T *p_class) {
if (!std::is_trivially_destructible<T>::value) {
if constexpr (!std::is_trivially_destructible_v<T>) {
p_class->~T();
}

Expand All @@ -136,6 +144,10 @@ class DefaultTypedAllocator {

#define memnew_arr(m_class, m_count) memnew_arr_template<m_class>(m_count)

_FORCE_INLINE_ uint64_t *_get_element_count_ptr(uint8_t *p_ptr) {
return (uint64_t *)(p_ptr - Memory::DATA_OFFSET + Memory::ELEMENT_OFFSET);
}

template <typename T>
T *memnew_arr_template(size_t p_elements, const char *p_descr = "") {
if (p_elements == 0) {
Expand All @@ -145,12 +157,14 @@ T *memnew_arr_template(size_t p_elements, const char *p_descr = "") {
same strategy used by std::vector, and the Vector class, so it should be safe.*/

size_t len = sizeof(T) * p_elements;
uint64_t *mem = (uint64_t *)Memory::alloc_static(len, true);
uint8_t *mem = (uint8_t *)Memory::alloc_static(len, true);
T *failptr = nullptr; // Get rid of a warning.
ERR_FAIL_NULL_V(mem, failptr);
*(mem - 1) = p_elements;

if (!std::is_trivially_destructible<T>::value) {
uint64_t *_elem_count_ptr = _get_element_count_ptr(mem);
*(_elem_count_ptr) = p_elements;

if constexpr (!std::is_trivially_destructible_v<T>) {
T *elems = (T *)mem;

/* call operator new */
Expand All @@ -162,12 +176,20 @@ T *memnew_arr_template(size_t p_elements, const char *p_descr = "") {
return (T *)mem;
}

template <typename T>
size_t memarr_len(const T *p_class) {
uint8_t *ptr = (uint8_t *)p_class;
uint64_t *_elem_count_ptr = _get_element_count_ptr(ptr);
return *(_elem_count_ptr);
}

template <typename T>
void memdelete_arr(T *p_class) {
uint64_t *ptr = (uint64_t *)p_class;
uint8_t *ptr = (uint8_t *)p_class;

if (!std::is_trivially_destructible<T>::value) {
uint64_t elem_count = *(ptr - 1);
if constexpr (!std::is_trivially_destructible_v<T>) {
uint64_t *_elem_count_ptr = _get_element_count_ptr(ptr);
uint64_t elem_count = *(_elem_count_ptr);

for (uint64_t i = 0; i < elem_count; i++) {
p_class[i].~T();
Expand Down
99 changes: 65 additions & 34 deletions include/godot_cpp/templates/cowdata.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ class VMap;
template <class T>
class CharStringT;

SAFE_NUMERIC_TYPE_PUN_GUARANTEES(uint64_t)
static_assert(std::is_trivially_destructible_v<std::atomic<uint64_t>>);

// Silence a false positive warning (see GH-52119).
#if defined(__GNUC__) && !defined(__clang__)
Expand Down Expand Up @@ -96,26 +96,47 @@ class CowData {
return ++x;
}

static constexpr USize ALLOC_PAD = sizeof(USize) * 2; // For size and atomic refcount.
// Alignment: ↓ max_align_t ↓ USize ↓ max_align_t
// ┌────────────────────┬──┬─────────────┬──┬───────────...
// │ SafeNumeric<USize> │░░│ USize │░░│ T[]
// │ ref. count │░░│ data size │░░│ data
// └────────────────────┴──┴─────────────┴──┴───────────...
// Offset: ↑ REF_COUNT_OFFSET ↑ SIZE_OFFSET ↑ DATA_OFFSET

static constexpr size_t REF_COUNT_OFFSET = 0;
static constexpr size_t SIZE_OFFSET = ((REF_COUNT_OFFSET + sizeof(SafeNumeric<USize>)) % alignof(USize) == 0) ? (REF_COUNT_OFFSET + sizeof(SafeNumeric<USize>)) : ((REF_COUNT_OFFSET + sizeof(SafeNumeric<USize>)) + alignof(USize) - ((REF_COUNT_OFFSET + sizeof(SafeNumeric<USize>)) % alignof(USize)));
static constexpr size_t DATA_OFFSET = ((SIZE_OFFSET + sizeof(USize)) % alignof(max_align_t) == 0) ? (SIZE_OFFSET + sizeof(USize)) : ((SIZE_OFFSET + sizeof(USize)) + alignof(max_align_t) - ((SIZE_OFFSET + sizeof(USize)) % alignof(max_align_t)));

mutable T *_ptr = nullptr;

// internal helpers

static _FORCE_INLINE_ SafeNumeric<USize> *_get_refcount_ptr(uint8_t *p_ptr) {
return (SafeNumeric<USize> *)(p_ptr + REF_COUNT_OFFSET);
}

static _FORCE_INLINE_ USize *_get_size_ptr(uint8_t *p_ptr) {
return (USize *)(p_ptr + SIZE_OFFSET);
}

static _FORCE_INLINE_ T *_get_data_ptr(uint8_t *p_ptr) {
return (T *)(p_ptr + DATA_OFFSET);
}

_FORCE_INLINE_ SafeNumeric<USize> *_get_refcount() const {
if (!_ptr) {
return nullptr;
}

return reinterpret_cast<SafeNumeric<USize> *>(_ptr) - 2;
return (SafeNumeric<USize> *)((uint8_t *)_ptr - DATA_OFFSET + REF_COUNT_OFFSET);
}

_FORCE_INLINE_ USize *_get_size() const {
if (!_ptr) {
return nullptr;
}

return reinterpret_cast<USize *>(_ptr) - 1;
return (USize *)((uint8_t *)_ptr - DATA_OFFSET + SIZE_OFFSET);
}

_FORCE_INLINE_ USize _get_alloc_size(USize p_elements) const {
Expand Down Expand Up @@ -240,7 +261,7 @@ void CowData<T>::_unref(void *p_data) {
}
// clean up

if (!std::is_trivially_destructible<T>::value) {
if constexpr (!std::is_trivially_destructible_v<T>) {
USize *count = _get_size();
T *data = (T *)(count + 1);

Expand All @@ -251,7 +272,7 @@ void CowData<T>::_unref(void *p_data) {
}

// free mem
Memory::free_static(((uint8_t *)p_data) - ALLOC_PAD, false);
Memory::free_static(((uint8_t *)p_data) - DATA_OFFSET, false);
}

template <class T>
Expand All @@ -267,26 +288,27 @@ typename CowData<T>::USize CowData<T>::_copy_on_write() {
/* in use by more than me */
USize current_size = *_get_size();

USize *mem_new = (USize *)Memory::alloc_static(_get_alloc_size(current_size) + ALLOC_PAD, false);
mem_new += 2;
uint8_t *mem_new = (uint8_t *)Memory::alloc_static(_get_alloc_size(current_size) + DATA_OFFSET, false);
ERR_FAIL_NULL_V(mem_new, 0);

new (mem_new - 2) SafeNumeric<USize>(1); //refcount
*(mem_new - 1) = current_size; //size
SafeNumeric<USize> *_refc_ptr = _get_refcount_ptr(mem_new);
USize *_size_ptr = _get_size_ptr(mem_new);
T *_data_ptr = _get_data_ptr(mem_new);

T *_data = (T *)(mem_new);
new (_refc_ptr) SafeNumeric<USize>(1); //refcount
*(_size_ptr) = current_size; //size

// initialize new elements
if (std::is_trivially_copyable<T>::value) {
memcpy(mem_new, _ptr, current_size * sizeof(T));

if constexpr (std::is_trivially_copyable_v<T>) {
memcpy((uint8_t *)_data_ptr, _ptr, current_size * sizeof(T));
} else {
for (USize i = 0; i < current_size; i++) {
memnew_placement(&_data[i], T(_ptr[i]));
memnew_placement(&_data_ptr[i], T(_ptr[i]));
}
}

_unref(_ptr);
_ptr = _data;
_ptr = _data_ptr;

rc = 1;
}
Expand Down Expand Up @@ -322,27 +344,33 @@ Error CowData<T>::resize(Size p_size) {
if (alloc_size != current_alloc_size) {
if (current_size == 0) {
// alloc from scratch
USize *ptr = (USize *)Memory::alloc_static(alloc_size + ALLOC_PAD, false);
ptr += 2;
ERR_FAIL_NULL_V(ptr, ERR_OUT_OF_MEMORY);
*(ptr - 1) = 0; //size, currently none
new (ptr - 2) SafeNumeric<USize>(1); //refcount
uint8_t *mem_new = (uint8_t *)Memory::alloc_static(alloc_size + DATA_OFFSET, false);
ERR_FAIL_NULL_V(mem_new, ERR_OUT_OF_MEMORY);

SafeNumeric<USize> *_refc_ptr = _get_refcount_ptr(mem_new);
USize *_size_ptr = _get_size_ptr(mem_new);
T *_data_ptr = _get_data_ptr(mem_new);

_ptr = (T *)ptr;
new (_refc_ptr) SafeNumeric<USize>(1); //refcount
*(_size_ptr) = 0; //size, currently none

_ptr = _data_ptr;
} else {
USize *_ptrnew = (USize *)Memory::realloc_static(((uint8_t *)_ptr) - ALLOC_PAD, alloc_size + ALLOC_PAD, false);
ERR_FAIL_NULL_V(_ptrnew, ERR_OUT_OF_MEMORY);
_ptrnew += 2;
new (_ptrnew - 2) SafeNumeric<USize>(rc); //refcount
uint8_t *mem_new = (uint8_t *)Memory::realloc_static(((uint8_t *)_ptr) - DATA_OFFSET, alloc_size + DATA_OFFSET, false);
ERR_FAIL_NULL_V(mem_new, ERR_OUT_OF_MEMORY);

SafeNumeric<USize> *_refc_ptr = _get_refcount_ptr(mem_new);
T *_data_ptr = _get_data_ptr(mem_new);

_ptr = (T *)(_ptrnew);
new (_refc_ptr) SafeNumeric<USize>(rc); //refcount

_ptr = _data_ptr;
}
}

// construct the newly created elements

if (!std::is_trivially_constructible<T>::value) {
if constexpr (!std::is_trivially_constructible_v<T>) {
for (Size i = *_get_size(); i < p_size; i++) {
memnew_placement(&_ptr[i], T);
}
Expand All @@ -353,7 +381,7 @@ Error CowData<T>::resize(Size p_size) {
*_get_size() = p_size;

} else if (p_size < current_size) {
if (!std::is_trivially_destructible<T>::value) {
if constexpr (!std::is_trivially_destructible_v<T>) {
// deinitialize no longer needed elements
for (USize i = p_size; i < *_get_size(); i++) {
T *t = &_ptr[i];
Expand All @@ -362,12 +390,15 @@ Error CowData<T>::resize(Size p_size) {
}

if (alloc_size != current_alloc_size) {
USize *_ptrnew = (USize *)Memory::realloc_static(((uint8_t *)_ptr) - ALLOC_PAD, alloc_size + ALLOC_PAD, false);
ERR_FAIL_NULL_V(_ptrnew, ERR_OUT_OF_MEMORY);
_ptrnew += 2;
new (_ptrnew - 2) SafeNumeric<USize>(rc); //refcount
uint8_t *mem_new = (uint8_t *)Memory::realloc_static(((uint8_t *)_ptr) - DATA_OFFSET, alloc_size + DATA_OFFSET, false);
ERR_FAIL_NULL_V(mem_new, ERR_OUT_OF_MEMORY);

SafeNumeric<USize> *_refc_ptr = _get_refcount_ptr(mem_new);
T *_data_ptr = _get_data_ptr(mem_new);

new (_refc_ptr) SafeNumeric<USize>(rc); //refcount

_ptr = (T *)(_ptrnew);
_ptr = _data_ptr;
}

*_get_size() = p_size;
Expand Down
12 changes: 6 additions & 6 deletions src/core/memory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,12 +41,12 @@ void *Memory::alloc_static(size_t p_bytes, bool p_pad_align) {
bool prepad = p_pad_align;
#endif

void *mem = internal::gdextension_interface_mem_alloc(p_bytes + (prepad ? PAD_ALIGN : 0));
void *mem = internal::gdextension_interface_mem_alloc(p_bytes + (prepad ? DATA_OFFSET : 0));
ERR_FAIL_NULL_V(mem, nullptr);

if (prepad) {
uint8_t *s8 = (uint8_t *)mem;
return s8 + PAD_ALIGN;
return s8 + DATA_OFFSET;
} else {
return mem;
}
Expand All @@ -69,10 +69,10 @@ void *Memory::realloc_static(void *p_memory, size_t p_bytes, bool p_pad_align) {
#endif

if (prepad) {
mem -= PAD_ALIGN;
mem = (uint8_t *)internal::gdextension_interface_mem_realloc(mem, p_bytes + PAD_ALIGN);
mem -= DATA_OFFSET;
mem = (uint8_t *)internal::gdextension_interface_mem_realloc(mem, p_bytes + DATA_OFFSET);
ERR_FAIL_NULL_V(mem, nullptr);
return mem + PAD_ALIGN;
return mem + DATA_OFFSET;
} else {
return (uint8_t *)internal::gdextension_interface_mem_realloc(mem, p_bytes);
}
Expand All @@ -88,7 +88,7 @@ void Memory::free_static(void *p_ptr, bool p_pad_align) {
#endif

if (prepad) {
mem -= PAD_ALIGN;
mem -= DATA_OFFSET;
}
internal::gdextension_interface_mem_free(mem);
}
Expand Down

0 comments on commit b173a4d

Please sign in to comment.