From 48db2577dcdcd406542c39eac9fd85edb858e077 Mon Sep 17 00:00:00 2001 From: Lucy Qiu Date: Tue, 6 May 2025 17:50:54 -0700 Subject: [PATCH] Introduce ET_ALIGN_ALLOC for portable aligned alloc (#10660) Summary: Issue with aligned buffers: P1800967583 The alignment requested is 16, and std::max_align_t is also 16. This means we do not need to pad the size to meet any alignment. However, the buffer we get from malloc is aligned to 8, not 16. When we try to align the buffer, we overflow and error out. Seems like malloc is not guaranteed to return 8 or 16 byte-aligned buffers, so also a bit hard to test definitively. So far we've only seen this when the buffer size is small (size 2, 4) ``` The malloc(), calloc(), realloc(), and reallocarray() functions return a pointer to the allocated memory, which is suitably aligned for any type that fits into the requested size or less. ``` Use std::aligned_alloc (C++17) to ensure buffer is aligned. For systems that do not have aligned_alloc (or posix_memalign) fallback to malloc. The malloc implementation is similar to what file_data_loader.cpp does. Except, we do not have a custom free function from FreeableBuffer, so we store an offset just before the aligned ptr to free the actual buffer. 1. Allocate via malloc; buffer = malloc(size + sizeof(uint16_t) + alignment - 1) - size: the size requested. - sizeof(uint16_t): a place to store the offset between the aligned buffer and the original ptr. - alignment-1: extra padding to allow for alignment. 2. Align (buffer + sizeof(uint16_t)) to alignment. This (usually) pushes the buffer forward by `alignment`. 3. Store the difference of (aligned_ptr - buffer). The memory will look like this: | buffer start | maybe padding | offset (aligned_buffer - buffer) | aligned_buffer start | maybe padding | buffer end | We should have between offset_size (2) and 1 aligned block before the actual aligned buffer. https://embeddedartistry.com/blog/2017/02/22/generating-aligned-memory/ NOTE: this increase binary size (linux, clang) by 8 bytes, so also raising it there. Reviewed By: larryliu0820, mcr229 Differential Revision: D74041198 --- .github/workflows/pull.yml | 4 +- extension/data_loader/file_data_loader.cpp | 69 +++----------- .../test/backend_integration_test.cpp | 4 +- runtime/platform/compiler.h | 95 +++++++++++++++++++ 4 files changed, 110 insertions(+), 62 deletions(-) diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml index 795272688bd..2dc1fcde36e 100644 --- a/.github/workflows/pull.yml +++ b/.github/workflows/pull.yml @@ -434,9 +434,7 @@ jobs: output=$(ls -la cmake-out/test/size_test) arr=($output) size=${arr[4]} - # threshold=48120 on devserver with gcc11.4 - # todo(lfq): update once binary size is below 50kb. - threshold="47552" + threshold="47560" if [[ "$size" -le "$threshold" ]]; then echo "Success $size <= $threshold" else diff --git a/extension/data_loader/file_data_loader.cpp b/extension/data_loader/file_data_loader.cpp index 503539774a5..e9922eb8323 100644 --- a/extension/data_loader/file_data_loader.cpp +++ b/extension/data_loader/file_data_loader.cpp @@ -49,20 +49,6 @@ namespace { static bool is_power_of_2(size_t value) { return value > 0 && (value & ~(value - 1)) == value; } - -/** - * Returns the next alignment for a given pointer. - */ -static uint8_t* align_pointer(void* ptr, size_t alignment) { - intptr_t addr = reinterpret_cast(ptr); - if ((addr & (alignment - 1)) == 0) { - // Already aligned. - return reinterpret_cast(ptr); - } - // Bump forward. - addr = (addr | (alignment - 1)) + 1; - return reinterpret_cast(addr); -} } // namespace FileDataLoader::~FileDataLoader() { @@ -129,13 +115,13 @@ namespace { /** * FreeableBuffer::FreeFn-compatible callback. * - * `context` is actually a ptrdiff_t value (not a pointer) that contains the - * offset in bytes between `data` and the actual pointer to free. + * `context` is the original buffer pointer. It is allocated with + * ET_ALIGNED_ALLOC, and must be freed with ET_ALIGNED_FREE. + * + * `data` and `size` are unused. */ void FreeSegment(void* context, void* data, ET_UNUSED size_t size) { - ptrdiff_t offset = reinterpret_cast(context); - ET_DCHECK_MSG(offset >= 0, "Unexpected offset %ld", (long int)offset); - std::free(static_cast(data) - offset); + ET_ALIGNED_FREE(context); } } // namespace @@ -163,57 +149,26 @@ Result FileDataLoader::load( } // Allocate memory for the FreeableBuffer. - size_t alloc_size = size; - if (alignment_ > alignof(std::max_align_t)) { - // malloc() will align to smaller values, but we must manually align to - // larger values. - alloc_size += alignment_; - } - void* buffer = std::malloc(alloc_size); - if (buffer == nullptr) { + void* aligned_buffer = ET_ALIGNED_ALLOC(alignment_, size); + if (aligned_buffer == nullptr) { ET_LOG( Error, - "Reading from %s at offset %zu: malloc(%zd) failed", + "Reading from %s at offset %zu: ET_ALIGNED_ALLOC(%zd, %zd) failed", file_name_, offset, + alignment_, size); return Error::MemoryAllocationFailed; } - // Align. - void* aligned_buffer = align_pointer(buffer, alignment_); - - // Assert that the alignment didn't overflow the buffer. - ET_DCHECK_MSG( - reinterpret_cast(aligned_buffer) + size <= - reinterpret_cast(buffer) + alloc_size, - "aligned_buffer %p + size %zu > buffer %p + alloc_size %zu", - aligned_buffer, - size, - buffer, - alloc_size); - auto err = load_into(offset, size, segment_info, aligned_buffer); if (err != Error::Ok) { - // Free `buffer`, which is what malloc() gave us, not `aligned_buffer`. - std::free(buffer); + ET_ALIGNED_FREE(aligned_buffer); return err; } - // We can't naively free this pointer, since it may not be what malloc() gave - // us. Pass the offset to the real buffer as context. This is the number of - // bytes that need to be subtracted from the FreeableBuffer::data() pointer to - // find the actual pointer to free. - return FreeableBuffer( - aligned_buffer, - size, - FreeSegment, - /*free_fn_context=*/ - reinterpret_cast( - // Using signed types here because it will produce a signed ptrdiff_t - // value, though for us it will always be non-negative. - reinterpret_cast(aligned_buffer) - - reinterpret_cast(buffer))); + // Pass the aligned_buffer pointer as context to FreeSegment. + return FreeableBuffer(aligned_buffer, size, FreeSegment, aligned_buffer); } Result FileDataLoader::size() const { diff --git a/runtime/executor/test/backend_integration_test.cpp b/runtime/executor/test/backend_integration_test.cpp index ea9467907c7..e2e61f171eb 100644 --- a/runtime/executor/test/backend_integration_test.cpp +++ b/runtime/executor/test/backend_integration_test.cpp @@ -656,8 +656,8 @@ class DelegateDataAlignmentTest : public ::testing::TestWithParam { // The delegate data inline alignment used by the -da1024 file. return 1024; } else { - // A small alignment that's compatible with any realistic alignment. - return 4; + // Minimum alignment expected by program.cpp. + return alignof(std::max_align_t); } } diff --git a/runtime/platform/compiler.h b/runtime/platform/compiler.h index 7467d5c1e04..da7e0988a62 100644 --- a/runtime/platform/compiler.h +++ b/runtime/platform/compiler.h @@ -171,6 +171,101 @@ using ssize_t = ptrdiff_t; #endif +/** + * Platform-specific aligned memory allocation and deallocation. + * + * Usage: + * void* ptr = ET_ALIGNED_ALLOC(alignment, size); + * // use ptr... + * ET_ALIGNED_FREE(ptr); + * + * Note: alignment must be a power of 2 and size must be an integral multiple of + * alignment. + */ +#if defined(_MSC_VER) +#include +#define ET_ALIGNED_ALLOC(alignment, size) \ + _aligned_malloc(((size + alignment - 1) & ~(alignment - 1)), (alignment)) +#define ET_ALIGNED_FREE(ptr) _aligned_free(ptr) +#elif defined(__APPLE__) +#include // For posix_memalign and free +inline void* et_apple_aligned_alloc(size_t alignment, size_t size) { + void* ptr = nullptr; + // The address of the allocated memory must be a multiple of sizeof(void*). + if (alignment < sizeof(void*)) { + alignment = sizeof(void*); + } + if (posix_memalign( + &ptr, alignment, (size + alignment - 1) & ~(alignment - 1)) != 0) { + return nullptr; + } + return ptr; +} +#define ET_ALIGNED_ALLOC(alignment, size) \ + et_apple_aligned_alloc((alignment), (size)) +#define ET_ALIGNED_FREE(ptr) free(ptr) +#elif __has_builtin(__builtin_aligned_alloc) || defined(_ISOC11_SOURCE) +// Linux and posix systems that support aligned_alloc and are >= C++17. +#include +#define ET_ALIGNED_ALLOC(alignment, size) \ + ::aligned_alloc(alignment, (size + alignment - 1) & ~(alignment - 1)) +#define ET_ALIGNED_FREE(ptr) free(ptr) +#else +// If the platform doesn't support aligned_alloc, fallback to malloc. +#include +#include +inline void* et_aligned_malloc(size_t alignment, size_t size) { + // Place to store the offset to the original pointer. + size_t offset_size = sizeof(uint16_t); + + // Malloc extra space for offset + alignment. + size_t alloc_size = size + offset_size + alignment - 1; + void* ptr = std::malloc(alloc_size); + + if (ptr == nullptr) { + // Malloc failed. + return nullptr; + } + + uintptr_t addr = reinterpret_cast(ptr); + // Align the address past addr + offset_size bytes. + // This provides space to store the offset before the aligned pointer. + addr = addr + offset_size; + uintptr_t aligned_ptr = (addr + alignment - 1) & ~(alignment - 1); + + // Check that alignment didn't overflow the buffer. + if (reinterpret_cast(aligned_ptr) + size > + reinterpret_cast(ptr) + alloc_size) { + std::free(ptr); + return nullptr; + } + + // Store the offset to the original pointer. + // Used to free the original allocated buffer. + *(reinterpret_cast(aligned_ptr) - 1) = + (uint16_t)(reinterpret_cast(aligned_ptr) - + reinterpret_cast(ptr)); + + return reinterpret_cast(aligned_ptr); +} + +inline void et_aligned_free(void* ptr) { + if (ptr == nullptr) { + return; + } + + // Get the original pointer using the offset. + uint16_t* original_ptr = reinterpret_cast( + reinterpret_cast(ptr) - + *(reinterpret_cast(ptr) - 1)); + std::free(original_ptr); +} + +#define ET_ALIGNED_ALLOC(alignment, size) et_aligned_malloc((alignment), (size)) +#define ET_ALIGNED_FREE(ptr) et_aligned_free(ptr) + +#endif + // DEPRECATED: Use the non-underscore-prefixed versions instead. // TODO(T199005537): Remove these once all users have stopped using them. #define __ET_DEPRECATED ET_DEPRECATED