diff --git a/tests/python/relay/aot/aot_test_utils.py b/tests/python/relay/aot/aot_test_utils.py
index 36c415ec8c83..e5ac85b115aa 100644
--- a/tests/python/relay/aot/aot_test_utils.py
+++ b/tests/python/relay/aot/aot_test_utils.py
@@ -21,6 +21,7 @@
 import logging
 import os
 import pathlib
+import platform
 import shutil
 import subprocess
 import tarfile
@@ -39,6 +40,9 @@
 
 _LOG = logging.getLogger(__name__)
 
+AOT_SUCCESS_TOKEN = "AOT_TEST_SUCCESS"
+AOT_FAILURE_TOKEN = "AOT_TEST_FAILURE"
+
 
 class AOTTestModel(NamedTuple):
     """Class to describe a model under test
@@ -64,6 +68,41 @@ class AOTTestModel(NamedTuple):
     params: Optional[Dict[str, np.array]] = None
 
 
+class AOTTestRunner(NamedTuple):
+    """Class to describe a test runner for AOT code
+
+    Parameters
+    ----------
+    makefile: str
+        Premade Makefile to use from the AOT test folder
+    prologue: str
+        Code to prepend to the main function
+    includes: List[str]
+        Additional includes required to run the AOT test runner
+    parameters: Map[str, str]
+        Additional parameters to pass to the make command
+    """
+
+    makefile: str = "default"
+    prologue: str = ""
+    includes: List[str] = []
+    parameters: Dict[str, str] = {}
+
+
+AOT_DEFAULT_RUNNER = AOTTestRunner()
+
+# AOT Test Runner using the Arm® Corstone™-300 Reference Systems
+# see: https://developer.arm.com/ip-products/subsystem/corstone/corstone-300
+AOT_CORSTONE300_RUNNER = AOTTestRunner(
+    makefile="corstone300",
+    prologue="""
+    uart_init();
+    """,
+    includes=["uart.h"],
+    parameters={"NPU_VARIANT": "256"},
+)
+
+
 def mangle_name(mod_name, name):
     mod_name = mangle_module_name(mod_name)
     return mod_name + "_" + name
@@ -112,20 +151,41 @@ def convert_to_list(x):
 def parametrize_aot_options(test):
     """Parametrize over valid option combinations"""
 
+    skip_i386 = pytest.mark.skipif(
+        platform.machine() == "i686", reason="Reference system unavailable in i386 container"
+    )
     interface_api = ["packed", "c"]
     use_unpacked_api = [True, False]
-    use_calculated_workspaces = [True, False]
+    test_runner = [AOT_DEFAULT_RUNNER, AOT_CORSTONE300_RUNNER]
+
+    all_combinations = itertools.product(interface_api, use_unpacked_api, test_runner)
 
-    all_combinations = itertools.product(interface_api, use_unpacked_api, use_calculated_workspaces)
     # Filter out packed operators with c interface
     valid_combinations = filter(
-        lambda parameters: not (parameters[0] == "c" and parameters[1] == False),
+        lambda parameters: not (parameters[0] == "c" and not parameters[1]),
         all_combinations,
     )
 
-    return pytest.mark.parametrize(
-        ["interface_api", "use_unpacked_api", "use_calculated_workspaces"],
+    # Only use reference system for C interface and unpacked API calls
+    valid_combinations = filter(
+        lambda parameters: not (
+            parameters[2] == AOT_CORSTONE300_RUNNER
+            and (parameters[0] == "packed" or not parameters[1])
+        ),
         valid_combinations,
+    )
+
+    # Skip reference system tests if running in i386 container
+    marked_combinations = map(
+        lambda parameters: pytest.param(*parameters, marks=skip_i386)
+        if parameters[2] == AOT_CORSTONE300_RUNNER
+        else parameters,
+        valid_combinations,
+    )
+
+    return pytest.mark.parametrize(
+        ["interface_api", "use_unpacked_api", "test_runner"],
+        marked_combinations,
     )(test)
 
 
@@ -160,7 +220,7 @@ def subprocess_log_output(cmd, cwd, logfile):
     return proc.wait()
 
 
-def emit_main_prologue(main_file, workspace_bytes):
+def emit_main_prologue(main_file, custom_prologue, workspace_bytes):
     # Add TVM_RUNTIME_ALLOC_ALIGNMENT_BYTES because of memory alignment.
     main_file.write(
         f"#define WORKSPACE_SIZE ({workspace_bytes} + TVM_RUNTIME_ALLOC_ALIGNMENT_BYTES)\n"
@@ -185,6 +245,7 @@ def emit_main_prologue(main_file, workspace_bytes):
 int main(){\n
 """
     )
+    main_file.write(custom_prologue)
 
 
 def emit_main_data(main_file, input_map, output_list, mod_name):
@@ -297,11 +358,11 @@ def emit_main_compare(main_file, output_list, mod_name):
         main_file.write(f"for (int i = 0; i<{actual_data_name}{i}_len; i++){{\n")
         if is_float_dtype:
             main_file.write(
-                f'if (fabs({actual_data_name}{i}[i]-{expected_data_name}{i}[i]) > 0.001f){{\n\tprintf("ko\\n");\n\treturn -1;}}\n'
+                f'if (fabs({actual_data_name}{i}[i]-{expected_data_name}{i}[i]) > 0.001f){{\n\tprintf("{AOT_FAILURE_TOKEN}\\n");\n\treturn -1;}}\n'
             )
         else:
             main_file.write(
-                f'if ({actual_data_name}{i}[i]!={expected_data_name}{i}[i]){{\n\tprintf("ko\\n");\n\treturn -1;}}\n'
+                f'if ({actual_data_name}{i}[i]!={expected_data_name}{i}[i]){{\n\tprintf("{AOT_FAILURE_TOKEN}\\n");\n\treturn -1;}}\n'
             )
         main_file.write("}\n")
 
@@ -312,36 +373,40 @@ def emit_main_init_memory_manager(main_file):
 
 
 def emit_main_epilogue(main_file):
-    main_file.write('printf("ok\\n");')
+    main_file.write(f'printf("{AOT_SUCCESS_TOKEN}\\n");')
     main_file.write("return 0;")
     main_file.write("}\n")
 
 
-def emit_main_common_includes(main_file):
+def emit_main_common_includes(main_file, custom_includes):
     main_file.write("#include <stdio.h>\n")
     main_file.write("#include <math.h>\n")
     main_file.write('#include "tvm/runtime/c_runtime_api.h"\n')
     main_file.write('#include "tvm/runtime/crt/stack_allocator.h"\n')
+    for include in custom_includes:
+        main_file.write(f'#include "{include}"\n')
 
 
 def emit_main_micro_include(main_file, mod_name):
     main_file.write(f"#include <{mangle_module_name(mod_name)}.h>\n")
 
 
-def create_main(test_name, models, output_path, interface_api, workspace_bytes):
+def create_main(
+    test_name, models, output_path, custom_includes, custom_prologue, interface_api, workspace_bytes
+):
     file_path = pathlib.Path(f"{output_path}/" + test_name).resolve()
     # create header file
     raw_path = file_path.with_suffix(".c").resolve()
     with open(raw_path, "w") as main_file:
-        emit_main_common_includes(main_file)
+        emit_main_common_includes(main_file, custom_includes)
 
         if interface_api == "c":
             for model in models:
                 emit_main_micro_include(main_file, model.name)
-
-        emit_main_prologue(main_file, workspace_bytes)
         for model in models:
             emit_main_data(main_file, model.inputs, model.outputs, model.name)
+
+        emit_main_prologue(main_file, custom_prologue, workspace_bytes)
         emit_main_init_memory_manager(main_file)
 
         if interface_api == "c":
@@ -396,9 +461,10 @@ def extract_main_workspace_size_bytes(extract_dir):
 
 def compile_and_run(
     models: Union[List[AOTTestModel], AOTTestModel],
+    runner: AOTTestRunner,
     interface_api,
     use_unpacked_api,
-    use_calculated_workspaces,
+    debug_calculated_workspaces=False,
     workspace_byte_alignment=8,
     enable_op_fusion=True,
 ):
@@ -414,7 +480,7 @@ def compile_and_run(
         models = [models]
 
     # The calculated workspaces will not account for stack allocator tags used for debugging
-    if not use_calculated_workspaces:
+    if debug_calculated_workspaces:
         cflags += "-DTVM_CRT_STACK_ALLOCATOR_ENABLE_LIFO_CHECK "
 
     config = {"tir.disable_vectorize": True}
@@ -452,10 +518,7 @@ def compile_and_run(
         t = tarfile.open(tar_file)
         t.extractall(base_path)
 
-        if use_calculated_workspaces:
-            workspace_bytes += extract_main_workspace_size_bytes(base_path)
-        else:
-            workspace_bytes += 16384 * 1024
+        workspace_bytes += extract_main_workspace_size_bytes(base_path)
 
         for key in model.inputs:
             create_header_file(
@@ -480,6 +543,8 @@ def compile_and_run(
         "test.c",
         models,
         build_path,
+        runner.includes,
+        runner.prologue,
         interface_api,
         workspace_bytes,
     )
@@ -487,24 +552,32 @@ def compile_and_run(
     # Verify that compiles fine
     file_dir = os.path.dirname(os.path.abspath(__file__))
     codegen_path = os.path.join(base_path, "codegen")
-    makefile = os.path.join(file_dir, "aot_test.mk")
-    make_cmd = (
-        f"make CFLAGS='{cflags}' -f {makefile} build_dir="
-        + build_path
+    makefile = os.path.join(file_dir, f"{runner.makefile}.mk")
+    custom_params = " ".join([f" {param}='{value}'" for param, value in runner.parameters.items()])
+    make_command = (
+        f"make -f {makefile} build_dir={build_path}"
+        + f" CFLAGS='{cflags}'"
         + f" TVM_ROOT={file_dir}/../../../.."
+        + f" AOT_TEST_ROOT={file_dir}"
         + f" CODEGEN_ROOT={codegen_path}"
         + f" STANDALONE_CRT_DIR={tvm.micro.get_standalone_crt_dir()}"
+        + custom_params
     )
 
     compile_log_path = os.path.join(build_path, "test_compile.log")
-    ret = subprocess_log_output(make_cmd, ".", compile_log_path)
+    compile_command = f"{make_command} aot_test_runner"
+    ret = subprocess_log_output(compile_command, ".", compile_log_path)
     assert ret == 0
 
     # Verify that runs fine
     run_log_path = os.path.join(build_path, "test_run.log")
-    ret = subprocess_log_output("./aot_test_runner", build_path, run_log_path)
+    run_command = f"{make_command} run"
+    ret = subprocess_log_output(run_command, build_path, run_log_path)
     assert ret == 0
 
+    with open(run_log_path) as run_log:
+        assert AOT_SUCCESS_TOKEN in run_log.read()
+
 
 def generate_ref_data(mod, input_data, params=None, target="llvm"):
     """Generate reference data through executing the relay module"""
diff --git a/tests/python/relay/aot/corstone300.ld b/tests/python/relay/aot/corstone300.ld
new file mode 100644
index 000000000000..4a6b22480d9f
--- /dev/null
+++ b/tests/python/relay/aot/corstone300.ld
@@ -0,0 +1,287 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*------------------ Reference System Memories -------------
+  +===================+============+=======+============+============+
+  | Memory            | Address    | Size  | CPU Access | NPU Access |
+  +===================+============+=======+============+============+
+  | ITCM              | 0x00000000 | 512KB | Yes (RO)   | No         |
+  +-------------------+------------+-------+------------+------------+
+  | DTCM              | 0x20000000 | 512KB | Yes (R/W)  | No         |
+  +-------------------+------------+-------+------------+------------+
+  | SSE-300 SRAM      | 0x21000000 |   2MB | Yes (R/W)  | Yes (R/W)  |
+  +-------------------+------------+-------+------------+------------+
+  | Data SRAM         | 0x01000000 |   2MB | Yes (R/W)  | Yes (R/W)  |
+  +-------------------+------------+-------+------------+------------+
+  | DDR               | 0x60000000 |  32MB | Yes (R/W)  | Yes (R/W)  |
+  +-------------------+------------+-------+------------+------------+ */
+
+/*---------------------- ITCM Configuration ----------------------------------
+  <h> Flash Configuration
+    <o0> Flash Base Address <0x0-0xFFFFFFFF:8>
+    <o1> Flash Size (in Bytes) <0x0-0xFFFFFFFF:8>
+  </h>
+  -----------------------------------------------------------------------------*/
+__ROM_BASE = 0x00000000;
+__ROM_SIZE = 0x00080000;
+
+/*--------------------- DTCM RAM Configuration ----------------------------
+  <h> RAM Configuration
+    <o0> RAM Base Address    <0x0-0xFFFFFFFF:8>
+    <o1> RAM Size (in Bytes) <0x0-0xFFFFFFFF:8>
+  </h>
+ -----------------------------------------------------------------------------*/
+__RAM_BASE = 0x20000000;
+__RAM_SIZE = 0x00080000;
+
+/*----------------------- Data SRAM Configuration ------------------------------
+  <h> Data SRAM Configuration
+    <o0> DATA_SRAM Base Address    <0x0-0xFFFFFFFF:8>
+    <o1> DATA_SRAM Size (in Bytes) <0x0-0xFFFFFFFF:8>
+  </h>
+ -----------------------------------------------------------------------------*/
+__DATA_SRAM_BASE = 0x01000000;
+__DATA_SRAM_SIZE = 0x00200000;
+
+/*--------------------- Embedded SRAM Configuration ----------------------------
+  <h> SRAM Configuration
+    <o0> SRAM Base Address    <0x0-0xFFFFFFFF:8>
+    <o1> SRAM Size (in Bytes) <0x0-0xFFFFFFFF:8>
+  </h>
+ -----------------------------------------------------------------------------*/
+__SRAM_BASE = 0x21000000;
+__SRAM_SIZE = 0x00200000;
+
+/*--------------------- Stack / Heap Configuration ----------------------------
+  <h> Stack / Heap Configuration
+    <o0> Stack Size (in Bytes) <0x0-0xFFFFFFFF:8>
+    <o1> Heap Size (in Bytes) <0x0-0xFFFFFFFF:8>
+  </h>
+  -----------------------------------------------------------------------------*/
+__STACK_SIZE = 0x00008000;
+__HEAP_SIZE  = 0x00008000;
+
+/*--------------------- Embedded RAM Configuration ----------------------------
+  <h> DDR Configuration
+    <o0> DDR Base Address    <0x0-0xFFFFFFFF:8>
+    <o1> DDR Size (in Bytes) <0x0-0xFFFFFFFF:8>
+  </h>
+ -----------------------------------------------------------------------------*/
+__DDR_BASE = 0x60000000;
+__DDR_SIZE = 0x02000000;
+
+/*
+ *-------------------- <<< end of configuration section >>> -------------------
+ */
+
+MEMORY
+{
+  ITCM       (rx)  : ORIGIN = __ROM_BASE, LENGTH = __ROM_SIZE
+  DTCM       (rwx) : ORIGIN = __RAM_BASE, LENGTH = __RAM_SIZE
+  DATA_SRAM  (rwx) : ORIGIN = __DATA_SRAM_BASE, LENGTH = __DATA_SRAM_SIZE
+  SRAM       (rwx) : ORIGIN = __SRAM_BASE, LENGTH = __SRAM_SIZE
+  DDR        (rwx) : ORIGIN = __DDR_BASE, LENGTH = __DDR_SIZE
+}
+
+/* Linker script to place sections and symbol values. Should be used together
+ * with other linker script that defines memory regions ITCM and RAM.
+ * It references following symbols, which must be defined in code:
+ *   Reset_Handler : Entry of reset handler
+ *
+ * It defines following symbols, which code can use without definition:
+ *   __exidx_start
+ *   __exidx_end
+ *   __copy_table_start__
+ *   __copy_table_end__
+ *   __zero_table_start__
+ *   __zero_table_end__
+ *   __etext
+ *   __data_start__
+ *   __preinit_array_start
+ *   __preinit_array_end
+ *   __init_array_start
+ *   __init_array_end
+ *   __fini_array_start
+ *   __fini_array_end
+ *   __data_end__
+ *   __bss_start__
+ *   __bss_end__
+ *   __end__
+ *   end
+ *   __HeapLimit
+ *   __StackLimit
+ *   __StackTop
+ *   __stack
+ */
+ENTRY(Reset_Handler)
+
+SECTIONS
+{
+  .text :
+  {
+    KEEP(*(.vectors))
+    *(.text*)
+
+    KEEP(*(.init))
+    KEEP(*(.fini))
+
+    /* .ctors */
+    *crtbegin.o(.ctors)
+    *crtbegin?.o(.ctors)
+    *(EXCLUDE_FILE(*crtend?.o *crtend.o) .ctors)
+    *(SORT(.ctors.*))
+    *(.ctors)
+
+    /* .dtors */
+    *crtbegin.o(.dtors)
+    *crtbegin?.o(.dtors)
+    *(EXCLUDE_FILE(*crtend?.o *crtend.o) .dtors)
+    *(SORT(.dtors.*))
+    *(.dtors)
+
+    *(.rodata*)
+
+    KEEP(*(.eh_frame*))
+  } > ITCM
+
+  .ARM.extab :
+  {
+    *(.ARM.extab* .gnu.linkonce.armextab.*)
+  } > ITCM
+
+  __exidx_start = .;
+  .ARM.exidx :
+  {
+    *(.ARM.exidx* .gnu.linkonce.armexidx.*)
+  } > ITCM
+  __exidx_end = .;
+
+  .copy.table :
+  {
+    . = ALIGN(4);
+    __copy_table_start__ = .;
+    LONG (__etext)
+    LONG (__data_start__)
+    LONG (__data_end__ - __data_start__)
+    /* Add each additional data section here */
+    __copy_table_end__ = .;
+  } > ITCM
+
+  .zero.table :
+  {
+    . = ALIGN(4);
+    __zero_table_start__ = .;
+    __zero_table_end__ = .;
+  } > ITCM
+
+  /**
+   * Location counter can end up 2byte aligned with narrow Thumb code but
+   * __etext is assumed by startup code to be the LMA of a section in DTCM
+   * which must be 4byte aligned
+   */
+  __etext = ALIGN (4);
+
+  .data : AT (__etext)
+  {
+    __data_start__ = .;
+    *(vtable)
+    *(.data)
+    *(.data.*)
+
+    . = ALIGN(4);
+    /* preinit data */
+    PROVIDE_HIDDEN (__preinit_array_start = .);
+    KEEP(*(.preinit_array))
+    PROVIDE_HIDDEN (__preinit_array_end = .);
+
+    . = ALIGN(4);
+    /* init data */
+    PROVIDE_HIDDEN (__init_array_start = .);
+    KEEP(*(SORT(.init_array.*)))
+    KEEP(*(.init_array))
+    PROVIDE_HIDDEN (__init_array_end = .);
+
+
+    . = ALIGN(4);
+    /* finit data */
+    PROVIDE_HIDDEN (__fini_array_start = .);
+    KEEP(*(SORT(.fini_array.*)))
+    KEEP(*(.fini_array))
+    PROVIDE_HIDDEN (__fini_array_end = .);
+
+    KEEP(*(.jcr*))
+    . = ALIGN(4);
+    /* All data end */
+    __data_end__ = .;
+
+  } > DTCM
+
+  .sram :
+  {
+    . = ALIGN(16);
+    *(.bss.ethosu_fast_memory);
+    . = ALIGN(16);
+  } > SRAM AT > SRAM
+
+  .bss.NoInit :
+  {
+    . = ALIGN(16);
+    *(.bss.NoInit)
+    . = ALIGN(16);
+  } > DDR AT > DDR
+
+  .bss :
+  {
+    . = ALIGN(4);
+    __bss_start__ = .;
+    *(.bss)
+    *(.bss.*)
+    *(COMMON)
+    . = ALIGN(4);
+    __bss_end__ = .;
+  } > DTCM AT > DTCM
+
+  .data_sram :
+  {
+    . = ALIGN(16);
+  } > DATA_SRAM
+
+  .heap (COPY) :
+  {
+    . = ALIGN(8);
+    __end__ = .;
+    PROVIDE(end = .);
+    . = . + __HEAP_SIZE;
+    . = ALIGN(8);
+    __HeapLimit = .;
+  } > DTCM
+
+  .stack (ORIGIN(DTCM) + LENGTH(DTCM) - __STACK_SIZE) (COPY) :
+  {
+    . = ALIGN(8);
+    __StackLimit = .;
+    . = . + __STACK_SIZE;
+    . = ALIGN(8);
+    __StackTop = .;
+  } > DTCM
+  PROVIDE(__stack = __StackTop);
+
+  /* Check if data + stack exceeds DTCM limit */
+  ASSERT(__StackLimit >= __bss_end__, "region DTCM overflowed with stack")
+}
diff --git a/tests/python/relay/aot/corstone300.mk b/tests/python/relay/aot/corstone300.mk
new file mode 100644
index 000000000000..bca5dd266491
--- /dev/null
+++ b/tests/python/relay/aot/corstone300.mk
@@ -0,0 +1,109 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Makefile to build and run AOT tests against the reference system
+
+# Setup build environment
+build_dir := build
+TVM_ROOT=$(shell cd ../../../../..; pwd)
+CRT_ROOT ?= ${TVM_ROOT}/build/standalone_crt
+ifeq ($(shell ls -lhd $(CRT_ROOT)),)
+$(error "CRT not found. Ensure you have built the standalone_crt target and try again")
+endif
+
+ARM_CPU=ARMCM55
+DMLC_CORE=${TVM_ROOT}/3rdparty/dmlc-core
+ETHOSU_PATH=/opt/arm/ethosu
+CMSIS_PATH=${ETHOSU_PATH}/cmsis
+PLATFORM_PATH=${ETHOSU_PATH}/core_platform/targets/corstone-300
+PKG_COMPILE_OPTS = -g -Wall -O2 -Wno-incompatible-pointer-types -Wno-format -mcpu=cortex-m55 -mthumb -mfloat-abi=hard -std=gnu99
+CC = arm-none-eabi-gcc
+AR = arm-none-eabi-ar
+RANLIB = arm-none-eabi-ranlib
+CC_OPTS = CC=$(CC) AR=$(AR) RANLIB=$(RANLIB)
+PKG_CFLAGS = ${PKG_COMPILE_OPTS} \
+	${CFLAGS} \
+	-I$(build_dir)/../include \
+	-I$(CODEGEN_ROOT)/host/include \
+	-I${PLATFORM_PATH} \
+	-I${CMSIS_PATH}/Device/ARM/${ARM_CPU}/Include/ \
+	-I${CMSIS_PATH}/CMSIS/Core/Include \
+	-isystem$(STANDALONE_CRT_DIR)/include \
+
+PKG_LDFLAGS = -lm -specs=nosys.specs -static -T ${AOT_TEST_ROOT}/corstone300.ld
+
+$(ifeq VERBOSE,1)
+QUIET ?=
+$(else)
+QUIET ?= @
+$(endif)
+
+CRT_SRCS = $(shell find $(CRT_ROOT))
+CODEGEN_SRCS = $(shell find $(abspath $(CODEGEN_ROOT)/host/src/*.c))
+CODEGEN_OBJS = $(subst .c,.o,$(CODEGEN_SRCS))
+CMSIS_STARTUP_SRCS = $(shell find ${CMSIS_PATH}/Device/ARM/${ARM_CPU}/Source/*.c)
+UART_SRCS = $(shell find ${PLATFORM_PATH}/*.c)
+
+aot_test_runner: $(build_dir)/aot_test_runner
+
+$(build_dir)/stack_allocator.o: $(TVM_ROOT)/src/runtime/crt/memory/stack_allocator.c
+	$(QUIET)mkdir -p $(@D)
+	$(QUIET)$(CC) -c $(PKG_CFLAGS) -o $@  $^
+
+$(build_dir)/crt_backend_api.o: $(TVM_ROOT)/src/runtime/crt/common/crt_backend_api.c
+	$(QUIET)mkdir -p $(@D)
+	$(QUIET)$(CC) -c $(PKG_CFLAGS) -o $@  $^
+
+$(build_dir)/libcodegen.a: $(CODEGEN_SRCS)
+	$(QUIET)cd $(abspath $(CODEGEN_ROOT)/host/src) && $(CC) -c $(PKG_CFLAGS) $(CODEGEN_SRCS)
+	$(QUIET)$(AR) -cr $(abspath $(build_dir)/libcodegen.a) $(CODEGEN_OBJS)
+	$(QUIET)$(RANLIB) $(abspath $(build_dir)/libcodegen.a)
+
+${build_dir}/libcmsis_startup.a: $(CMSIS_STARTUP_SRCS)
+	$(QUIET)mkdir -p $(abspath $(build_dir)/libcmsis_startup)
+	$(QUIET)cd $(abspath $(build_dir)/libcmsis_startup) && $(CC) -c $(PKG_CFLAGS) -D${ARM_CPU} $^
+	$(QUIET)$(AR) -cr $(abspath $(build_dir)/libcmsis_startup.a) $(abspath $(build_dir))/libcmsis_startup/*.o
+	$(QUIET)$(RANLIB) $(abspath $(build_dir)/libcmsis_startup.a)
+
+${build_dir}/libuart.a: $(UART_SRCS)
+	$(QUIET)mkdir -p $(abspath $(build_dir)/libuart)
+	$(QUIET)cd $(abspath $(build_dir)/libuart) && $(CC) -c $(PKG_CFLAGS) $^
+	$(QUIET)$(AR) -cr $(abspath $(build_dir)/libuart.a) $(abspath $(build_dir))/libuart/*.o
+	$(QUIET)$(RANLIB) $(abspath $(build_dir)/libuart.a)
+
+$(build_dir)/aot_test_runner: $(build_dir)/test.c $(build_dir)/crt_backend_api.o $(build_dir)/stack_allocator.o ${build_dir}/libcmsis_startup.a ${build_dir}/libuart.a $(build_dir)/libcodegen.a
+	$(QUIET)mkdir -p $(@D)
+	$(QUIET)$(CC) $(PKG_CFLAGS) -o $@ -Wl,--whole-archive $^ -Wl,--no-whole-archive $(PKG_LDFLAGS)
+
+clean:
+	$(QUIET)rm -rf $(build_dir)/crt
+
+cleanall:
+	$(QUIET)rm -rf $(build_dir)
+
+run: $(build_dir)/aot_test_runner
+	/opt/arm/FVP_Corstone_SSE-300_Ethos-U55/models/Linux64_GCC-6.4/FVP_Corstone_SSE-300_Ethos-U55 -C cpu0.CFGDTCMSZ=15 \
+	-C cpu0.CFGITCMSZ=15 -C mps3_board.uart0.out_file=\"-\" -C mps3_board.uart0.shutdown_tag=\"EXITTHESIM\" \
+	-C mps3_board.visualisation.disable-visualisation=1 -C mps3_board.telnetterminal0.start_telnet=0 \
+	-C mps3_board.telnetterminal1.start_telnet=0 -C mps3_board.telnetterminal2.start_telnet=0 -C mps3_board.telnetterminal5.start_telnet=0 \
+	-C ethosu.num_macs=$(NPU_VARIANT) $(build_dir)/aot_test_runner
+
+.SUFFIXES:
+
+.DEFAULT: aot_test_runner
+
+.PHONY: run
\ No newline at end of file
diff --git a/tests/python/relay/aot/aot_test.mk b/tests/python/relay/aot/default.mk
similarity index 95%
rename from tests/python/relay/aot/aot_test.mk
rename to tests/python/relay/aot/default.mk
index 04a77f0e59d0..f5edcb3d6422 100644
--- a/tests/python/relay/aot/aot_test.mk
+++ b/tests/python/relay/aot/default.mk
@@ -62,5 +62,13 @@ clean:
 	$(QUIET)rm -rf $(build_dir)/crt
 cleanall:
 	$(QUIET)rm -rf $(build_dir)
+
+run: $(build_dir)/aot_test_runner
+	$(build_dir)/aot_test_runner
+
 # Don't define implicit rules; they tend to match on logical target names that aren't targets (i.e. bundle_static)
 .SUFFIXES:
+
+.DEFAULT: aot_test_runner
+
+.PHONY: run
diff --git a/tests/python/relay/aot/test_crt_aot.py b/tests/python/relay/aot/test_crt_aot.py
index abbf350bff77..36cffefcd0bb 100644
--- a/tests/python/relay/aot/test_crt_aot.py
+++ b/tests/python/relay/aot/test_crt_aot.py
@@ -28,6 +28,7 @@
 from tvm.relay.testing import byoc
 from aot_test_utils import (
     AOTTestModel,
+    AOT_DEFAULT_RUNNER,
     generate_ref_data,
     convert_to_relay,
     compile_and_run,
@@ -38,7 +39,7 @@
 def test_error_c_interface_with_packed_api():
     interface_api = "c"
     use_unpacked_api = False
-    use_calculated_workspaces = True
+    test_runner = AOT_DEFAULT_RUNNER
 
     two = relay.add(relay.const(1), relay.const(1))
     func = relay.Function([], two)
@@ -48,14 +49,14 @@ def test_error_c_interface_with_packed_api():
             AOTTestModel(
                 module=IRModule.from_expr(func), inputs={}, outputs=generate_ref_data(func, {})
             ),
+            test_runner,
             interface_api,
             use_unpacked_api,
-            use_calculated_workspaces,
         )
 
 
 @parametrize_aot_options
-def test_conv_with_params(interface_api, use_unpacked_api, use_calculated_workspaces):
+def test_conv_with_params(interface_api, use_unpacked_api, test_runner):
     RELAY_MODEL = """
 #[version = "0.0.5"]
 def @main(%data : Tensor[(1, 3, 64, 64), uint8], %weight : Tensor[(8, 3, 5, 5), int8]) {
@@ -85,14 +86,14 @@ def @main(%data : Tensor[(1, 3, 64, 64), uint8], %weight : Tensor[(8, 3, 5, 5),
 
     compile_and_run(
         AOTTestModel(module=mod, inputs=inputs, outputs=output_list, params=params),
+        test_runner,
         interface_api,
         use_unpacked_api,
-        use_calculated_workspaces,
     )
 
 
 @parametrize_aot_options
-def test_add_with_params(interface_api, use_unpacked_api, use_calculated_workspaces):
+def test_add_with_params(interface_api, use_unpacked_api, test_runner):
     x = relay.var("x", shape=(1, 10))
     y = relay.var("y", shape=(1, 10))
     z = relay.add(x, y)
@@ -109,15 +110,15 @@ def test_add_with_params(interface_api, use_unpacked_api, use_calculated_workspa
         AOTTestModel(
             module=IRModule.from_expr(func), inputs=inputs, outputs=output_list, params=params
         ),
+        test_runner,
         interface_api,
         use_unpacked_api,
-        use_calculated_workspaces,
     )
 
 
 @parametrize_aot_options
 @pytest.mark.parametrize("groups,weight_shape", [(1, 32), (32, 1)])
-def test_conv2d(use_calculated_workspaces, interface_api, use_unpacked_api, groups, weight_shape):
+def test_conv2d(interface_api, use_unpacked_api, test_runner, groups, weight_shape):
     """Test a subgraph with a single conv2d operator."""
     dtype = "float32"
     ishape = (1, 32, 14, 14)
@@ -139,14 +140,14 @@ def test_conv2d(use_calculated_workspaces, interface_api, use_unpacked_api, grou
     output_list = generate_ref_data(mod, inputs)
     compile_and_run(
         AOTTestModel(module=mod, inputs=inputs, outputs=output_list),
+        test_runner,
         interface_api,
         use_unpacked_api,
-        use_calculated_workspaces,
     )
 
 
 @parametrize_aot_options
-def test_concatenate(interface_api, use_unpacked_api, use_calculated_workspaces):
+def test_concatenate(interface_api, use_unpacked_api, test_runner):
     dtype = "float32"
     x = relay.var("x", shape=(10, 5), dtype=dtype)
     y = relay.var("y", shape=(10, 5), dtype=dtype)
@@ -163,14 +164,14 @@ def test_concatenate(interface_api, use_unpacked_api, use_calculated_workspaces)
     output_list = generate_ref_data(func, inputs)
     compile_and_run(
         AOTTestModel(module=IRModule.from_expr(func), inputs=inputs, outputs=output_list),
+        test_runner,
         interface_api,
         use_unpacked_api,
-        use_calculated_workspaces,
     )
 
 
 @parametrize_aot_options
-def test_nested_tuples(interface_api, use_unpacked_api, use_calculated_workspaces):
+def test_nested_tuples(interface_api, use_unpacked_api, test_runner):
     x = relay.var("x", shape=(10,))
     x1 = x + relay.const(1.0)
     x2 = x1 + relay.const(1.0)
@@ -185,27 +186,27 @@ def test_nested_tuples(interface_api, use_unpacked_api, use_calculated_workspace
 
     compile_and_run(
         AOTTestModel(module=IRModule.from_expr(func), inputs=inputs, outputs=output_list),
+        test_runner,
         interface_api,
         use_unpacked_api,
-        use_calculated_workspaces,
     )
 
 
 @parametrize_aot_options
-def test_tuple_getitem(interface_api, use_unpacked_api, use_calculated_workspaces):
+def test_tuple_getitem(interface_api, use_unpacked_api, test_runner):
     func = relay.Function([], relay.TupleGetItem(relay.Tuple([relay.const(1), relay.const(2)]), 0))
     output_list = generate_ref_data(func, {})
 
     compile_and_run(
         AOTTestModel(module=IRModule.from_expr(func), inputs={}, outputs=output_list),
+        test_runner,
         interface_api,
         use_unpacked_api,
-        use_calculated_workspaces,
     )
 
 
 @parametrize_aot_options
-def test_id(interface_api, use_unpacked_api, use_calculated_workspaces):
+def test_id(interface_api, use_unpacked_api, test_runner):
     x = relay.var("x", "float32")
     ident = relay.Function([x], x)
     one = np.array(1.0, "float32")
@@ -214,28 +215,28 @@ def test_id(interface_api, use_unpacked_api, use_calculated_workspaces):
 
     compile_and_run(
         AOTTestModel(module=IRModule.from_expr(ident), inputs=inputs, outputs=output_list),
+        test_runner,
         interface_api,
         use_unpacked_api,
-        use_calculated_workspaces,
     )
 
 
 @parametrize_aot_options
-def test_add_const(interface_api, use_unpacked_api, use_calculated_workspaces):
+def test_add_const(interface_api, use_unpacked_api, test_runner):
     two = relay.add(relay.const(1), relay.const(1))
     func = relay.Function([], two)
     output_list = generate_ref_data(func, {})
 
     compile_and_run(
         AOTTestModel(module=IRModule.from_expr(func), inputs={}, outputs=output_list),
+        test_runner,
         interface_api,
         use_unpacked_api,
-        use_calculated_workspaces,
     )
 
 
 @parametrize_aot_options
-def test_mul_param(interface_api, use_unpacked_api, use_calculated_workspaces):
+def test_mul_param(interface_api, use_unpacked_api, test_runner):
     x = relay.var("x", shape=(10, 10))
     y = relay.var("y", shape=(1, 10))
     func = relay.Function([x, y], relay.multiply(x, y))
@@ -247,14 +248,14 @@ def test_mul_param(interface_api, use_unpacked_api, use_calculated_workspaces):
 
     compile_and_run(
         AOTTestModel(module=IRModule.from_expr(func), inputs=inputs, outputs=output_list),
+        test_runner,
         interface_api,
         use_unpacked_api,
-        use_calculated_workspaces,
     )
 
 
 @parametrize_aot_options
-def test_subtract(interface_api, use_unpacked_api, use_calculated_workspaces):
+def test_subtract(interface_api, use_unpacked_api, test_runner):
     i = relay.var("i", shape=[], dtype="int32")
     sub = relay.subtract(i, relay.const(1, dtype="int32"))
     func = relay.Function([i], sub, ret_type=relay.TensorType([], "int32"))
@@ -263,14 +264,14 @@ def test_subtract(interface_api, use_unpacked_api, use_calculated_workspaces):
     output_list = generate_ref_data(func, inputs)
     compile_and_run(
         AOTTestModel(module=IRModule.from_expr(func), inputs=inputs, outputs=output_list),
+        test_runner,
         interface_api,
         use_unpacked_api,
-        use_calculated_workspaces,
     )
 
 
 @parametrize_aot_options
-def test_tuple_output(interface_api, use_unpacked_api, use_calculated_workspaces):
+def test_tuple_output(interface_api, use_unpacked_api, test_runner):
     x = relay.var("x", shape=(6, 9))
     y = relay.split(x, 3).astuple()
     a = relay.TupleGetItem(y, 0)
@@ -282,18 +283,19 @@ def test_tuple_output(interface_api, use_unpacked_api, use_calculated_workspaces
     output_list = generate_ref_data(func, inputs)
     compile_and_run(
         AOTTestModel(module=IRModule.from_expr(func), inputs=inputs, outputs=output_list),
+        test_runner,
         interface_api,
         use_unpacked_api,
-        use_calculated_workspaces,
     )
 
 
 @pytest.mark.parametrize(
-    ["use_calculated_workspaces", "workspace_byte_alignment"], [(True, 1), (True, 16), (False, 1)]
+    ["debug_calculated_workspaces", "workspace_byte_alignment"], [(True, 1), (True, 16), (False, 1)]
 )
-def test_mobilenet(use_calculated_workspaces, workspace_byte_alignment):
+def test_mobilenet(debug_calculated_workspaces, workspace_byte_alignment):
     use_unpacked_api = True
     interface_api = "c"
+    test_runner = AOT_DEFAULT_RUNNER
 
     mod, params = testing.mobilenet.get_workload(batch_size=1)
     data_shape = [int(x) for x in mod["main"].checked_type.arg_types[0].shape]
@@ -302,18 +304,19 @@ def test_mobilenet(use_calculated_workspaces, workspace_byte_alignment):
     output_list = generate_ref_data(mod, inputs, params)
     compile_and_run(
         AOTTestModel(module=mod, inputs=inputs, outputs=output_list, params=params),
+        test_runner,
         interface_api,
         use_unpacked_api,
-        use_calculated_workspaces,
-        workspace_byte_alignment,
+        workspace_byte_alignment=workspace_byte_alignment,
+        debug_calculated_workspaces=debug_calculated_workspaces,
     )
 
 
-@pytest.mark.parametrize("use_calculated_workspaces", [True, False])
-def test_byoc_microtvm(use_calculated_workspaces):
+def test_byoc_microtvm():
     """This is a simple test case to check BYOC capabilities of AOT"""
     use_unpacked_api = False
     interface_api = "packed"
+    test_runner = AOT_DEFAULT_RUNNER
 
     x = relay.var("x", shape=(10, 10))
     w0 = relay.var("w0", shape=(10, 10))
@@ -358,14 +361,14 @@ def test_byoc_microtvm(use_calculated_workspaces):
     input_list.extend([map_inputs["w{}".format(i)] for i in range(8)])
     compile_and_run(
         AOTTestModel(name="my_mod", module=mod, inputs=map_inputs, outputs=output_list),
+        test_runner,
         interface_api,
         use_unpacked_api,
-        use_calculated_workspaces,
     )
 
 
 @parametrize_aot_options
-def test_add_name_mangling_with_params(interface_api, use_unpacked_api, use_calculated_workspaces):
+def test_add_name_mangling_with_params(interface_api, use_unpacked_api, test_runner):
     x = relay.var("x", shape=(1, 10))
     y = relay.var("y", shape=(1, 10))
     z = relay.add(x, y)
@@ -380,14 +383,14 @@ def test_add_name_mangling_with_params(interface_api, use_unpacked_api, use_calc
 
     compile_and_run(
         AOTTestModel(name="my_mod", module=func, inputs=inputs, outputs=output_list, params=params),
+        test_runner,
         interface_api,
         use_unpacked_api,
-        use_calculated_workspaces,
     )
 
 
 @parametrize_aot_options
-def test_multiple_models(interface_api, use_unpacked_api, use_calculated_workspaces):
+def test_multiple_models(interface_api, use_unpacked_api, test_runner):
     # Identity model without params
     x = relay.var("x", "float32")
     mod1 = relay.Function([x], x)
@@ -433,9 +436,9 @@ def @main(%data : Tensor[(1, 3, 64, 64), uint8], %weight : Tensor[(8, 3, 5, 5),
                 name="mod2", module=mod2, inputs=inputs2, outputs=output_list2, params=params2
             ),
         ],
+        test_runner,
         interface_api,
         use_unpacked_api,
-        use_calculated_workspaces,
     )
 
 
@@ -449,7 +452,7 @@ def test_quant_mobilenet_tfl():
 
     interface_api = "packed"
     use_unpacked_api = False
-    use_calculated_workspaces = True
+    test_runner = AOT_DEFAULT_RUNNER
 
     tflite_model_file = tf_testing.get_workload_official(
         "https://storage.googleapis.com/download.tensorflow.org/"
@@ -466,14 +469,14 @@ def test_quant_mobilenet_tfl():
     output_list = generate_ref_data(mod, inputs, params)
     compile_and_run(
         AOTTestModel(module=mod, inputs=inputs, outputs=output_list, params=params),
+        test_runner,
         interface_api,
         use_unpacked_api,
-        use_calculated_workspaces,
     )
 
 
 @parametrize_aot_options
-def test_transpose(interface_api, use_unpacked_api, use_calculated_workspaces):
+def test_transpose(interface_api, use_unpacked_api, test_runner):
     """Test that non-inpleaceable operations (e.g., transpose) do not happen in-place."""
 
     dtype = "float32"
@@ -493,9 +496,9 @@ def test_transpose(interface_api, use_unpacked_api, use_calculated_workspaces):
     output_list = generate_ref_data(func, inputs)
     compile_and_run(
         AOTTestModel(module=IRModule.from_expr(func), inputs=inputs, outputs=output_list),
+        test_runner,
         interface_api,
         use_unpacked_api,
-        use_calculated_workspaces,
         enable_op_fusion=False,
     )
 
diff --git a/tests/python/relay/test_external_codegen.py b/tests/python/relay/test_external_codegen.py
index 36453cd41d5d..30db5facc208 100644
--- a/tests/python/relay/test_external_codegen.py
+++ b/tests/python/relay/test_external_codegen.py
@@ -80,16 +80,16 @@ def check_aot_executor_result(
         pytest.skip("MicroTVM support not enabled. Set USE_MICRO=ON in config.cmake to enable.")
 
     # Late import to avoid breaking test with USE_MICRO=OFF.
-    from aot.aot_test_utils import AOTTestModel, compile_and_run
+    from aot.aot_test_utils import AOTTestModel, AOT_DEFAULT_RUNNER, compile_and_run
 
     interface_api = "packed"
     use_unpacked_api = False
-    use_calculated_workspaces = True
+    test_runner = AOT_DEFAULT_RUNNER
     compile_and_run(
         AOTTestModel(module=mod, inputs=map_inputs, outputs=[result]),
+        test_runner,
         interface_api,
         use_unpacked_api,
-        use_calculated_workspaces,
     )