#include #include #include #include "aarch64.h" void print_X(char* msg) { int16_t t[32]; printf("%s\n", msg); for (uint64_t i = 0; i < 8; i++) { AMX_STX((uint64_t)t | (i << 56)); printf("X[%d] = %3d", (int)i, t[0]); for (int j = 1; j < 32; j++) { printf(", %3d", t[j]); } printf(" ]\n"); } printf("\n"); } int main() { int16_t t[256]; for (int i = 0; i < 256; i++) { t[i] = i + 1; } AMX_SET(); print_X("init"); AMX_LDX((uint64_t)t); print_X("load single"); AMX_CLR(); AMX_SET(); print_X("reinit"); AMX_LDX((uint64_t)t | (1ULL << 62)); print_X("load pair"); AMX_CLR(); AMX_SET(); print_X("reinit"); AMX_LDX((uint64_t)t | (1ULL << 62) | (1ULL << 61)); print_X("load pair, stride 4 (starting from X0)"); AMX_CLR(); AMX_SET(); print_X("reinit"); AMX_LDX((uint64_t)t | (1ULL << 62) | (1ULL << 60)); print_X("load 4"); AMX_CLR(); AMX_SET(); print_X("reinit"); AMX_LDX((uint64_t)t | (1ULL << 62) | (1ULL << 61) | (1ULL << 60)); print_X("load 4, stride 2 (starting from X0)"); AMX_CLR(); AMX_SET(); print_X("reinit"); AMX_LDX((uint64_t)t | (1ULL << 62) | (1ULL << 61) | (1ULL << 60) | (1ULL << 56)); print_X("load 4, stride 2 (starting from X1)"); AMX_CLR(); AMX_SET(); print_X("reinit"); AMX_LDX((uint64_t)t | (1ULL << 62) | (1ULL << 61) | (1ULL << 60) | (2ULL << 56)); print_X("load 4, stride 2 (starting from X2"); return 0; }