Skip to content

Use std::align_alloc in file_data_loader #10660

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
May 9, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions .github/workflows/pull.yml
Original file line number Diff line number Diff line change
Expand Up @@ -434,9 +434,7 @@ jobs:
output=$(ls -la cmake-out/test/size_test)
arr=($output)
size=${arr[4]}
# threshold=48120 on devserver with gcc11.4
# todo(lfq): update once binary size is below 50kb.
threshold="47552"
threshold="47560"
if [[ "$size" -le "$threshold" ]]; then
echo "Success $size <= $threshold"
else
Expand Down
69 changes: 12 additions & 57 deletions extension/data_loader/file_data_loader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,20 +49,6 @@ namespace {
static bool is_power_of_2(size_t value) {
return value > 0 && (value & ~(value - 1)) == value;
}

/**
* Returns the next alignment for a given pointer.
*/
static uint8_t* align_pointer(void* ptr, size_t alignment) {
intptr_t addr = reinterpret_cast<intptr_t>(ptr);
if ((addr & (alignment - 1)) == 0) {
// Already aligned.
return reinterpret_cast<uint8_t*>(ptr);
}
// Bump forward.
addr = (addr | (alignment - 1)) + 1;
return reinterpret_cast<uint8_t*>(addr);
}
} // namespace

FileDataLoader::~FileDataLoader() {
Expand Down Expand Up @@ -129,13 +115,13 @@ namespace {
/**
* FreeableBuffer::FreeFn-compatible callback.
*
* `context` is actually a ptrdiff_t value (not a pointer) that contains the
* offset in bytes between `data` and the actual pointer to free.
* `context` is the original buffer pointer. It is allocated with
* ET_ALIGNED_ALLOC, and must be freed with ET_ALIGNED_FREE.
*
* `data` and `size` are unused.
*/
void FreeSegment(void* context, void* data, ET_UNUSED size_t size) {
ptrdiff_t offset = reinterpret_cast<ptrdiff_t>(context);
ET_DCHECK_MSG(offset >= 0, "Unexpected offset %ld", (long int)offset);
std::free(static_cast<uint8_t*>(data) - offset);
ET_ALIGNED_FREE(context);
}
} // namespace

Expand Down Expand Up @@ -163,57 +149,26 @@ Result<FreeableBuffer> FileDataLoader::load(
}

// Allocate memory for the FreeableBuffer.
size_t alloc_size = size;
if (alignment_ > alignof(std::max_align_t)) {
// malloc() will align to smaller values, but we must manually align to
// larger values.
alloc_size += alignment_;
}
void* buffer = std::malloc(alloc_size);
if (buffer == nullptr) {
void* aligned_buffer = ET_ALIGNED_ALLOC(alignment_, size);
if (aligned_buffer == nullptr) {
ET_LOG(
Error,
"Reading from %s at offset %zu: malloc(%zd) failed",
"Reading from %s at offset %zu: ET_ALIGNED_ALLOC(%zd, %zd) failed",
file_name_,
offset,
alignment_,
size);
return Error::MemoryAllocationFailed;
}

// Align.
void* aligned_buffer = align_pointer(buffer, alignment_);

// Assert that the alignment didn't overflow the buffer.
ET_DCHECK_MSG(
reinterpret_cast<uintptr_t>(aligned_buffer) + size <=
reinterpret_cast<uintptr_t>(buffer) + alloc_size,
"aligned_buffer %p + size %zu > buffer %p + alloc_size %zu",
aligned_buffer,
size,
buffer,
alloc_size);

auto err = load_into(offset, size, segment_info, aligned_buffer);
if (err != Error::Ok) {
// Free `buffer`, which is what malloc() gave us, not `aligned_buffer`.
std::free(buffer);
ET_ALIGNED_FREE(aligned_buffer);
return err;
}

// We can't naively free this pointer, since it may not be what malloc() gave
// us. Pass the offset to the real buffer as context. This is the number of
// bytes that need to be subtracted from the FreeableBuffer::data() pointer to
// find the actual pointer to free.
return FreeableBuffer(
aligned_buffer,
size,
FreeSegment,
/*free_fn_context=*/
reinterpret_cast<void*>(
// Using signed types here because it will produce a signed ptrdiff_t
// value, though for us it will always be non-negative.
reinterpret_cast<intptr_t>(aligned_buffer) -
reinterpret_cast<intptr_t>(buffer)));
// Pass the aligned_buffer pointer as context to FreeSegment.
return FreeableBuffer(aligned_buffer, size, FreeSegment, aligned_buffer);
}

Result<size_t> FileDataLoader::size() const {
Expand Down
4 changes: 2 additions & 2 deletions runtime/executor/test/backend_integration_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -656,8 +656,8 @@ class DelegateDataAlignmentTest : public ::testing::TestWithParam<bool> {
// The delegate data inline alignment used by the -da1024 file.
return 1024;
} else {
// A small alignment that's compatible with any realistic alignment.
return 4;
// Minimum alignment expected by program.cpp.
return alignof(std::max_align_t);
}
}

Expand Down
95 changes: 95 additions & 0 deletions runtime/platform/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,101 @@
using ssize_t = ptrdiff_t;
#endif

/**
* Platform-specific aligned memory allocation and deallocation.
*
* Usage:
* void* ptr = ET_ALIGNED_ALLOC(alignment, size);
* // use ptr...
* ET_ALIGNED_FREE(ptr);
*
* Note: alignment must be a power of 2 and size must be an integral multiple of
* alignment.
*/
#if defined(_MSC_VER)
#include <malloc.h>
#define ET_ALIGNED_ALLOC(alignment, size) \
_aligned_malloc(((size + alignment - 1) & ~(alignment - 1)), (alignment))
#define ET_ALIGNED_FREE(ptr) _aligned_free(ptr)
#elif defined(__APPLE__)
#include <stdlib.h> // For posix_memalign and free
inline void* et_apple_aligned_alloc(size_t alignment, size_t size) {
void* ptr = nullptr;
// The address of the allocated memory must be a multiple of sizeof(void*).
if (alignment < sizeof(void*)) {
alignment = sizeof(void*);
}
if (posix_memalign(
&ptr, alignment, (size + alignment - 1) & ~(alignment - 1)) != 0) {
return nullptr;
}
return ptr;
}
Comment on lines +190 to +203
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

would be nice to include citations for why we need all these varied solutions despite std::aligned_alloc being required in C++17. https://gitlab.com/gromacs/gromacs/-/issues/3968 is one example but I'm curious where you got this from.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah from: https://www.internalfb.com/code/fbsource/[0afaf0bdc6ee692ec9a9b3e3c57bca4af2930abc]/third-party/pocket_fft/pocketfft_hdronly.h?lines=156%2C165

And mostly from ci tests - xtensa seems to only have malloc available? Thanks for the link!

#define ET_ALIGNED_ALLOC(alignment, size) \
et_apple_aligned_alloc((alignment), (size))
#define ET_ALIGNED_FREE(ptr) free(ptr)
#elif __has_builtin(__builtin_aligned_alloc) || defined(_ISOC11_SOURCE)
// Linux and posix systems that support aligned_alloc and are >= C++17.
#include <cstdlib>
#define ET_ALIGNED_ALLOC(alignment, size) \
::aligned_alloc(alignment, (size + alignment - 1) & ~(alignment - 1))
#define ET_ALIGNED_FREE(ptr) free(ptr)
#else
// If the platform doesn't support aligned_alloc, fallback to malloc.
#include <stdint.h>
#include <cstdlib>
inline void* et_aligned_malloc(size_t alignment, size_t size) {
// Place to store the offset to the original pointer.
size_t offset_size = sizeof(uint16_t);

// Malloc extra space for offset + alignment.
size_t alloc_size = size + offset_size + alignment - 1;
void* ptr = std::malloc(alloc_size);

if (ptr == nullptr) {
// Malloc failed.
return nullptr;
}

uintptr_t addr = reinterpret_cast<uintptr_t>(ptr);
// Align the address past addr + offset_size bytes.
// This provides space to store the offset before the aligned pointer.
addr = addr + offset_size;
uintptr_t aligned_ptr = (addr + alignment - 1) & ~(alignment - 1);

// Check that alignment didn't overflow the buffer.
if (reinterpret_cast<uintptr_t>(aligned_ptr) + size >
reinterpret_cast<uintptr_t>(ptr) + alloc_size) {
std::free(ptr);
return nullptr;
}

// Store the offset to the original pointer.
// Used to free the original allocated buffer.
*(reinterpret_cast<uint16_t*>(aligned_ptr) - 1) =
(uint16_t)(reinterpret_cast<uintptr_t>(aligned_ptr) -
reinterpret_cast<uintptr_t>(ptr));

return reinterpret_cast<uint16_t*>(aligned_ptr);
}

inline void et_aligned_free(void* ptr) {
if (ptr == nullptr) {
return;
}

// Get the original pointer using the offset.
uint16_t* original_ptr = reinterpret_cast<uint16_t*>(
reinterpret_cast<uintptr_t>(ptr) -
*(reinterpret_cast<uint16_t*>(ptr) - 1));
std::free(original_ptr);
}

#define ET_ALIGNED_ALLOC(alignment, size) et_aligned_malloc((alignment), (size))
#define ET_ALIGNED_FREE(ptr) et_aligned_free(ptr)

#endif

// DEPRECATED: Use the non-underscore-prefixed versions instead.
// TODO(T199005537): Remove these once all users have stopped using them.
#define __ET_DEPRECATED ET_DEPRECATED
Expand Down
Loading