diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml index 795272688bd..2dc1fcde36e 100644 --- a/.github/workflows/pull.yml +++ b/.github/workflows/pull.yml @@ -434,9 +434,7 @@ jobs: output=$(ls -la cmake-out/test/size_test) arr=($output) size=${arr[4]} - # threshold=48120 on devserver with gcc11.4 - # todo(lfq): update once binary size is below 50kb. - threshold="47552" + threshold="47560" if [[ "$size" -le "$threshold" ]]; then echo "Success $size <= $threshold" else diff --git a/extension/data_loader/file_data_loader.cpp b/extension/data_loader/file_data_loader.cpp index 503539774a5..e9922eb8323 100644 --- a/extension/data_loader/file_data_loader.cpp +++ b/extension/data_loader/file_data_loader.cpp @@ -49,20 +49,6 @@ namespace { static bool is_power_of_2(size_t value) { return value > 0 && (value & ~(value - 1)) == value; } - -/** - * Returns the next alignment for a given pointer. - */ -static uint8_t* align_pointer(void* ptr, size_t alignment) { - intptr_t addr = reinterpret_cast(ptr); - if ((addr & (alignment - 1)) == 0) { - // Already aligned. - return reinterpret_cast(ptr); - } - // Bump forward. - addr = (addr | (alignment - 1)) + 1; - return reinterpret_cast(addr); -} } // namespace FileDataLoader::~FileDataLoader() { @@ -129,13 +115,13 @@ namespace { /** * FreeableBuffer::FreeFn-compatible callback. * - * `context` is actually a ptrdiff_t value (not a pointer) that contains the - * offset in bytes between `data` and the actual pointer to free. + * `context` is the original buffer pointer. It is allocated with + * ET_ALIGNED_ALLOC, and must be freed with ET_ALIGNED_FREE. + * + * `data` and `size` are unused. */ void FreeSegment(void* context, void* data, ET_UNUSED size_t size) { - ptrdiff_t offset = reinterpret_cast(context); - ET_DCHECK_MSG(offset >= 0, "Unexpected offset %ld", (long int)offset); - std::free(static_cast(data) - offset); + ET_ALIGNED_FREE(context); } } // namespace @@ -163,57 +149,26 @@ Result FileDataLoader::load( } // Allocate memory for the FreeableBuffer. - size_t alloc_size = size; - if (alignment_ > alignof(std::max_align_t)) { - // malloc() will align to smaller values, but we must manually align to - // larger values. - alloc_size += alignment_; - } - void* buffer = std::malloc(alloc_size); - if (buffer == nullptr) { + void* aligned_buffer = ET_ALIGNED_ALLOC(alignment_, size); + if (aligned_buffer == nullptr) { ET_LOG( Error, - "Reading from %s at offset %zu: malloc(%zd) failed", + "Reading from %s at offset %zu: ET_ALIGNED_ALLOC(%zd, %zd) failed", file_name_, offset, + alignment_, size); return Error::MemoryAllocationFailed; } - // Align. - void* aligned_buffer = align_pointer(buffer, alignment_); - - // Assert that the alignment didn't overflow the buffer. - ET_DCHECK_MSG( - reinterpret_cast(aligned_buffer) + size <= - reinterpret_cast(buffer) + alloc_size, - "aligned_buffer %p + size %zu > buffer %p + alloc_size %zu", - aligned_buffer, - size, - buffer, - alloc_size); - auto err = load_into(offset, size, segment_info, aligned_buffer); if (err != Error::Ok) { - // Free `buffer`, which is what malloc() gave us, not `aligned_buffer`. - std::free(buffer); + ET_ALIGNED_FREE(aligned_buffer); return err; } - // We can't naively free this pointer, since it may not be what malloc() gave - // us. Pass the offset to the real buffer as context. This is the number of - // bytes that need to be subtracted from the FreeableBuffer::data() pointer to - // find the actual pointer to free. - return FreeableBuffer( - aligned_buffer, - size, - FreeSegment, - /*free_fn_context=*/ - reinterpret_cast( - // Using signed types here because it will produce a signed ptrdiff_t - // value, though for us it will always be non-negative. - reinterpret_cast(aligned_buffer) - - reinterpret_cast(buffer))); + // Pass the aligned_buffer pointer as context to FreeSegment. + return FreeableBuffer(aligned_buffer, size, FreeSegment, aligned_buffer); } Result FileDataLoader::size() const { diff --git a/runtime/executor/test/backend_integration_test.cpp b/runtime/executor/test/backend_integration_test.cpp index ea9467907c7..e2e61f171eb 100644 --- a/runtime/executor/test/backend_integration_test.cpp +++ b/runtime/executor/test/backend_integration_test.cpp @@ -656,8 +656,8 @@ class DelegateDataAlignmentTest : public ::testing::TestWithParam { // The delegate data inline alignment used by the -da1024 file. return 1024; } else { - // A small alignment that's compatible with any realistic alignment. - return 4; + // Minimum alignment expected by program.cpp. + return alignof(std::max_align_t); } } diff --git a/runtime/platform/compiler.h b/runtime/platform/compiler.h index 7467d5c1e04..da7e0988a62 100644 --- a/runtime/platform/compiler.h +++ b/runtime/platform/compiler.h @@ -171,6 +171,101 @@ using ssize_t = ptrdiff_t; #endif +/** + * Platform-specific aligned memory allocation and deallocation. + * + * Usage: + * void* ptr = ET_ALIGNED_ALLOC(alignment, size); + * // use ptr... + * ET_ALIGNED_FREE(ptr); + * + * Note: alignment must be a power of 2 and size must be an integral multiple of + * alignment. + */ +#if defined(_MSC_VER) +#include +#define ET_ALIGNED_ALLOC(alignment, size) \ + _aligned_malloc(((size + alignment - 1) & ~(alignment - 1)), (alignment)) +#define ET_ALIGNED_FREE(ptr) _aligned_free(ptr) +#elif defined(__APPLE__) +#include // For posix_memalign and free +inline void* et_apple_aligned_alloc(size_t alignment, size_t size) { + void* ptr = nullptr; + // The address of the allocated memory must be a multiple of sizeof(void*). + if (alignment < sizeof(void*)) { + alignment = sizeof(void*); + } + if (posix_memalign( + &ptr, alignment, (size + alignment - 1) & ~(alignment - 1)) != 0) { + return nullptr; + } + return ptr; +} +#define ET_ALIGNED_ALLOC(alignment, size) \ + et_apple_aligned_alloc((alignment), (size)) +#define ET_ALIGNED_FREE(ptr) free(ptr) +#elif __has_builtin(__builtin_aligned_alloc) || defined(_ISOC11_SOURCE) +// Linux and posix systems that support aligned_alloc and are >= C++17. +#include +#define ET_ALIGNED_ALLOC(alignment, size) \ + ::aligned_alloc(alignment, (size + alignment - 1) & ~(alignment - 1)) +#define ET_ALIGNED_FREE(ptr) free(ptr) +#else +// If the platform doesn't support aligned_alloc, fallback to malloc. +#include +#include +inline void* et_aligned_malloc(size_t alignment, size_t size) { + // Place to store the offset to the original pointer. + size_t offset_size = sizeof(uint16_t); + + // Malloc extra space for offset + alignment. + size_t alloc_size = size + offset_size + alignment - 1; + void* ptr = std::malloc(alloc_size); + + if (ptr == nullptr) { + // Malloc failed. + return nullptr; + } + + uintptr_t addr = reinterpret_cast(ptr); + // Align the address past addr + offset_size bytes. + // This provides space to store the offset before the aligned pointer. + addr = addr + offset_size; + uintptr_t aligned_ptr = (addr + alignment - 1) & ~(alignment - 1); + + // Check that alignment didn't overflow the buffer. + if (reinterpret_cast(aligned_ptr) + size > + reinterpret_cast(ptr) + alloc_size) { + std::free(ptr); + return nullptr; + } + + // Store the offset to the original pointer. + // Used to free the original allocated buffer. + *(reinterpret_cast(aligned_ptr) - 1) = + (uint16_t)(reinterpret_cast(aligned_ptr) - + reinterpret_cast(ptr)); + + return reinterpret_cast(aligned_ptr); +} + +inline void et_aligned_free(void* ptr) { + if (ptr == nullptr) { + return; + } + + // Get the original pointer using the offset. + uint16_t* original_ptr = reinterpret_cast( + reinterpret_cast(ptr) - + *(reinterpret_cast(ptr) - 1)); + std::free(original_ptr); +} + +#define ET_ALIGNED_ALLOC(alignment, size) et_aligned_malloc((alignment), (size)) +#define ET_ALIGNED_FREE(ptr) et_aligned_free(ptr) + +#endif + // DEPRECATED: Use the non-underscore-prefixed versions instead. // TODO(T199005537): Remove these once all users have stopped using them. #define __ET_DEPRECATED ET_DEPRECATED