Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[AOT][Stack Allocator] Fix Initial Memory Misalignment #8487

Merged
merged 16 commits into from
Jul 30, 2021
13 changes: 10 additions & 3 deletions src/runtime/crt/memory/stack_allocator.c
Original file line number Diff line number Diff line change
Expand Up @@ -79,8 +79,15 @@ tvm_crt_error_t StackMemoryManager_Free(tvm_workspace_t* tvm_runtime_workspace,

tvm_crt_error_t StackMemoryManager_Init(tvm_workspace_t* tvm_runtime_workspace,
uint8_t* g_aot_memory, size_t workspace_size) {
tvm_runtime_workspace->next_alloc = g_aot_memory;
tvm_runtime_workspace->workspace = g_aot_memory;
tvm_runtime_workspace->workspace_size = workspace_size;
// We need to round up g_aot_memory in case it is not aligned to
// TVM_RUNTIME_ALLOC_ALIGNMENT_BYTES.
uintptr_t unaligned_mask = TVM_RUNTIME_ALLOC_ALIGNMENT_BYTES - 1;
uint8_t* memory_aligned =
(uint8_t*)(((uintptr_t)g_aot_memory + unaligned_mask) & ~unaligned_mask);
uint32_t offset = (uintptr_t)(memory_aligned - g_aot_memory);

tvm_runtime_workspace->next_alloc = memory_aligned;
tvm_runtime_workspace->workspace = memory_aligned;
tvm_runtime_workspace->workspace_size = workspace_size - offset;
return kTvmErrorNoError;
}
115 changes: 93 additions & 22 deletions tests/crt/aot_memory_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
* specific language governing permissions and limitations
* under the License.
*/

#include <gtest/gtest.h>
#include <tvm/runtime/crt/stack_allocator.h>

Expand All @@ -24,83 +25,126 @@

// Check with LIFO checks enabled for stack allocator
#define TVM_CRT_STACK_ALLOCATOR_ENABLE_LIFO_CHECK

// Number of memory misalignment in bytes
#define NUM_MEMORY_MISALIGNMENT_BYTES 1

/*!
* Align memory pointer.
* This function modifies memory_ptr to adjust alignment.
* \return Number of memory offset.
*/
static uint32_t align_pointer(uint8_t** memory_ptr) {
uint32_t extra = (uintptr_t)(*memory_ptr) % TVM_RUNTIME_ALLOC_ALIGNMENT_BYTES;
uint32_t offset =
(TVM_RUNTIME_ALLOC_ALIGNMENT_BYTES - extra) & (TVM_RUNTIME_ALLOC_ALIGNMENT_BYTES - 1);
*memory_ptr += offset;
return offset;
}

/*!
* Add misalignment to memory pointer.
* This function modifies memory_ptr.
* \return Number of memory offset.
*/
static uint32_t misalign_pointer(uint8_t** memory_ptr) {
uint32_t extra = (uintptr_t)(*memory_ptr) % TVM_RUNTIME_ALLOC_ALIGNMENT_BYTES;
if (extra == 0) {
*memory_ptr += NUM_MEMORY_MISALIGNMENT_BYTES;
return 1;
}
return 0;
}

/*
* Tests allocations are properly aligned when allocated
* Tests allocations are properly aligned when allocated.
*/
TEST(AOTMemory, Allocate) {
static uint8_t model_memory[96];
static uint8_t model_memory[128];
tvm_workspace_t tvm_runtime_workspace;
uint8_t* model_memory_ptr = model_memory;
mehrdadh marked this conversation as resolved.
Show resolved Hide resolved
uint32_t offset = align_pointer(&model_memory_ptr);
ASSERT_EQ(StackMemoryManager_Init(&tvm_runtime_workspace, model_memory_ptr,
sizeof(model_memory) - offset),
kTvmErrorNoError);

ASSERT_EQ(StackMemoryManager_Init(&tvm_runtime_workspace, model_memory, 96), kTvmErrorNoError);
void* block_one = NULL;
ASSERT_EQ(StackMemoryManager_Allocate_Body(&tvm_runtime_workspace, 1, &block_one, 1),
kTvmErrorNoError);
ASSERT_EQ(block_one, &model_memory[0]);
ASSERT_EQ(block_one, &model_memory_ptr[0]);

void* block_two = NULL;
ASSERT_EQ(StackMemoryManager_Allocate_Body(&tvm_runtime_workspace, 2, &block_two, 1),
kTvmErrorNoError);
ASSERT_EQ(block_two, &model_memory[16 + STACK_ALLOCATOR_TAG_SIZE_BYTES]);
ASSERT_EQ(block_two, &model_memory_ptr[16 + STACK_ALLOCATOR_TAG_SIZE_BYTES]);

void* two_blocks = NULL;
ASSERT_EQ(StackMemoryManager_Allocate_Body(&tvm_runtime_workspace, 24, &two_blocks, 1),
kTvmErrorNoError);
ASSERT_EQ(two_blocks, &model_memory[32 + 2 * STACK_ALLOCATOR_TAG_SIZE_BYTES]);
ASSERT_EQ(two_blocks, &model_memory_ptr[32 + 2 * STACK_ALLOCATOR_TAG_SIZE_BYTES]);

void* block_three = NULL;
ASSERT_EQ(StackMemoryManager_Allocate_Body(&tvm_runtime_workspace, 1, &block_three, 1),
kTvmErrorNoError);
ASSERT_EQ(block_three, &model_memory[64 + 3 * STACK_ALLOCATOR_TAG_SIZE_BYTES]);
ASSERT_EQ(block_three, &model_memory_ptr[64 + 3 * STACK_ALLOCATOR_TAG_SIZE_BYTES]);
}

/*
* Tests resetting the stack after dealloc
* Tests resetting the stack after dealloc.
*/
TEST(AOTMemory, Free) {
static uint8_t model_memory[80];
tvm_workspace_t tvm_runtime_workspace;
ASSERT_EQ(StackMemoryManager_Init(&tvm_runtime_workspace, model_memory, 80), kTvmErrorNoError);
uint8_t* model_memory_ptr = model_memory;
uint32_t offset = align_pointer(&model_memory_ptr);
ASSERT_EQ(StackMemoryManager_Init(&tvm_runtime_workspace, model_memory_ptr,
sizeof(model_memory) - offset),
kTvmErrorNoError);

void* block_one = NULL;
ASSERT_EQ(StackMemoryManager_Allocate_Body(&tvm_runtime_workspace, 1, &block_one, 1),
kTvmErrorNoError);
ASSERT_EQ(block_one, &model_memory[0]);
ASSERT_EQ(block_one, &model_memory_ptr[0]);

void* block_two = NULL;
ASSERT_EQ(StackMemoryManager_Allocate_Body(&tvm_runtime_workspace, 1, &block_two, 1),
kTvmErrorNoError);
ASSERT_EQ(block_two, &model_memory[16 + STACK_ALLOCATOR_TAG_SIZE_BYTES]);
ASSERT_EQ(block_two, &model_memory_ptr[16 + STACK_ALLOCATOR_TAG_SIZE_BYTES]);
ASSERT_EQ(kTvmErrorNoError, StackMemoryManager_Free_Body(&tvm_runtime_workspace, block_two, 1));

void* two_blocks = NULL;
ASSERT_EQ(StackMemoryManager_Allocate_Body(&tvm_runtime_workspace, 2, &two_blocks, 1),
kTvmErrorNoError);
ASSERT_EQ(two_blocks, &model_memory[16 + STACK_ALLOCATOR_TAG_SIZE_BYTES]);
ASSERT_EQ(two_blocks, &model_memory_ptr[16 + STACK_ALLOCATOR_TAG_SIZE_BYTES]);
ASSERT_EQ(kTvmErrorNoError, StackMemoryManager_Free_Body(&tvm_runtime_workspace, two_blocks, 1));

void* block_three = NULL;
ASSERT_EQ(StackMemoryManager_Allocate_Body(&tvm_runtime_workspace, 1, &block_three, 1),
kTvmErrorNoError);
ASSERT_EQ(block_three, &model_memory[16 + STACK_ALLOCATOR_TAG_SIZE_BYTES]);
ASSERT_EQ(block_three, &model_memory_ptr[16 + STACK_ALLOCATOR_TAG_SIZE_BYTES]);
}

/*
* Tests we return NULL if we over allocate
* Tests we return NULL if we over allocate.
*/
TEST(AOTMemory, OverAllocate) {
static uint8_t model_memory[72];
static uint8_t model_memory[80];
tvm_workspace_t tvm_runtime_workspace;
ASSERT_EQ(StackMemoryManager_Init(&tvm_runtime_workspace, model_memory, 80), kTvmErrorNoError);
uint8_t* model_memory_ptr = model_memory;
uint32_t offset = align_pointer(&model_memory_ptr);
ASSERT_EQ(StackMemoryManager_Init(&tvm_runtime_workspace, model_memory_ptr,
sizeof(model_memory) - offset),
kTvmErrorNoError);

void* block_one = NULL;
ASSERT_EQ(StackMemoryManager_Allocate_Body(&tvm_runtime_workspace, 1, &block_one, 1),
kTvmErrorNoError);
ASSERT_EQ(block_one, &model_memory[0]);
ASSERT_EQ(block_one, &model_memory_ptr[0]);

void* block_two = NULL;
ASSERT_EQ(StackMemoryManager_Allocate_Body(&tvm_runtime_workspace, 1, &block_two, 1),
kTvmErrorNoError);
ASSERT_EQ(block_two, &model_memory[16 + STACK_ALLOCATOR_TAG_SIZE_BYTES]);
ASSERT_EQ(block_two, &model_memory_ptr[16 + STACK_ALLOCATOR_TAG_SIZE_BYTES]);

void* two_blocks = NULL;
ASSERT_EQ(StackMemoryManager_Allocate_Body(&tvm_runtime_workspace, 64, &two_blocks, 1),
Expand All @@ -109,27 +153,54 @@ TEST(AOTMemory, OverAllocate) {
}

/*
* Test for out-of-order memory deallocation
* Test for out-of-order memory deallocation.
*/
TEST(AOTMemory, FreeOutOfOrder) {
static uint8_t model_memory[80];
tvm_workspace_t tvm_runtime_workspace;
ASSERT_EQ(StackMemoryManager_Init(&tvm_runtime_workspace, model_memory, 80), kTvmErrorNoError);
uint8_t* model_memory_ptr = model_memory;
uint32_t offset = align_pointer(&model_memory_ptr);
ASSERT_EQ(StackMemoryManager_Init(&tvm_runtime_workspace, model_memory_ptr,
sizeof(model_memory) - offset),
kTvmErrorNoError);

void* block_one = NULL;
ASSERT_EQ(StackMemoryManager_Allocate_Body(&tvm_runtime_workspace, 1, &block_one, 1),
kTvmErrorNoError);
ASSERT_EQ(block_one, &model_memory[0]);
ASSERT_EQ(block_one, &model_memory_ptr[0]);

void* block_two = NULL;
ASSERT_EQ(StackMemoryManager_Allocate_Body(&tvm_runtime_workspace, 1, &block_two, 1),
kTvmErrorNoError);
ASSERT_EQ(block_two, &model_memory[16 + STACK_ALLOCATOR_TAG_SIZE_BYTES]);
ASSERT_EQ(block_two, &model_memory_ptr[16 + STACK_ALLOCATOR_TAG_SIZE_BYTES]);

ASSERT_EQ(StackMemoryManager_Free_Body(&tvm_runtime_workspace, block_one, 1),
kTvmErrorPlatformStackAllocBadFree);
}

/*
* Test for initial memory misalignment.
*/
TEST(AOTMemory, InitialMemoryMisAlignment) {
static uint8_t model_memory[80];
tvm_workspace_t tvm_runtime_workspace;
uint8_t* model_memory_ptr = model_memory;

// Add misaslignment to memory pointer
uint32_t offset = misalign_pointer(&model_memory_ptr);

// Calculate expected offset
uint8_t* misaligned_ptr = model_memory_ptr;
uint32_t alignment_offset = align_pointer(&misaligned_ptr);

ASSERT_EQ(StackMemoryManager_Init(&tvm_runtime_workspace, model_memory_ptr,
sizeof(model_memory) - offset),
kTvmErrorNoError);

ASSERT_EQ(tvm_runtime_workspace.next_alloc, &model_memory_ptr[alignment_offset]);
ASSERT_EQ(tvm_runtime_workspace.workspace_size, sizeof(model_memory) - offset - alignment_offset);
}

int main(int argc, char** argv) {
testing::InitGoogleTest(&argc, argv);
testing::FLAGS_gtest_death_test_style = "threadsafe";
Expand Down
16 changes: 10 additions & 6 deletions tests/python/relay/aot/aot_test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,10 @@ def emit_main_network_definition(main_file, mod_name):


def emit_main_prologue(main_file, workspace_bytes):
main_file.write(f"#define WORKSPACE_SIZE ({workspace_bytes})\n")
# Add TVM_RUNTIME_ALLOC_ALIGNMENT_BYTES because of memory alignment.
main_file.write(
f"#define WORKSPACE_SIZE ({workspace_bytes} + TVM_RUNTIME_ALLOC_ALIGNMENT_BYTES)\n"
)
main_file.write("static uint8_t g_aot_memory[WORKSPACE_SIZE];\n")
main_file.write("tvm_workspace_t app_workspace;\n")
main_file.write(
Expand All @@ -125,9 +128,8 @@ def emit_main_prologue(main_file, workspace_bytes):
void TVMLogf(const char* msg, ...) { }

TVM_DLL int TVMFuncRegisterGlobal(const char* name, TVMFunctionHandle f, int override) {}
int main(){\n

"""
int main(){\n
"""
)


Expand Down Expand Up @@ -157,6 +159,7 @@ def emit_main_run(main_file, input_list, output_list, mod_name):
main_file.write(
f'tvm_runtime_run(&{mangle_name(mod_name,"network")}, {mangle_name(mod_name,"inputs")}, {mangle_name(mod_name,"outputs")});'
)
main_file.write("\n")


def emit_main_compare(main_file, output_list, mod_name):
Expand All @@ -165,17 +168,18 @@ def emit_main_compare(main_file, output_list, mod_name):
main_file.write(f'for (int i = 0; i<{mangle_name(mod_name,"output_data")}{i}_len; i++){{\n')
if is_float_dtype:
main_file.write(
f'if (fabs({mangle_name(mod_name,"output_data")}{i}[i]-{mangle_name(mod_name,"expected_output_data")}{i}[i]) > 0.001f){{printf("ko\\n");return -1;}}\n'
f'if (fabs({mangle_name(mod_name,"output_data")}{i}[i]-{mangle_name(mod_name,"expected_output_data")}{i}[i]) > 0.001f){{\n\tprintf("ko\\n");\n\treturn -1;}}\n'
)
else:
main_file.write(
f'if ({mangle_name(mod_name,"output_data")}{i}[i]!={mangle_name(mod_name, "expected_output_data")}{i}[i]){{printf("ko\\n");return -1;}}\n'
f'if ({mangle_name(mod_name,"output_data")}{i}[i]!={mangle_name(mod_name, "expected_output_data")}{i}[i]){{\n\tprintf("ko\\n");\n\treturn -1;}}\n'
)
main_file.write("}\n")


def emit_main_init_memory_manager(main_file):
main_file.write("StackMemoryManager_Init(&app_workspace, g_aot_memory, WORKSPACE_SIZE);")
main_file.write("\n")


def emit_main_epilogue(main_file):
Expand Down