Skip to content

Commit eb84aac

Browse files
authored
Merge pull request #5084 from quic/topic/sgemm_direct_sme1
Support for SGEMM_DIRECT Kernel based on SME1
2 parents abbd78a + f66ca05 commit eb84aac

25 files changed

+617
-14
lines changed

Makefile.arm64

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,11 @@ FCOMMON_OPT += -march=armv8-a+sve
3030
endif
3131
endif
3232

33+
ifeq ($(CORE), ARMV9SME)
34+
CCOMMON_OPT += -march=armv9-a+sve2+sme
35+
FCOMMON_OPT += -march=armv9-a+sve2
36+
endif
37+
3338
ifeq ($(CORE), CORTEXA53)
3439
CCOMMON_OPT += -march=armv8-a -mtune=cortex-a53
3540
ifneq ($(F_COMPILER), NAG)

Makefile.system

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -420,6 +420,7 @@ ifeq ($(ARCH), arm64)
420420
export MACOSX_DEPLOYMENT_TARGET=11.0
421421
ifeq ($(C_COMPILER), GCC)
422422
export NO_SVE = 1
423+
export NO_SME = 1
423424
endif
424425
else
425426
export MACOSX_DEPLOYMENT_TARGET=10.8
@@ -709,6 +710,9 @@ DYNAMIC_CORE += NEOVERSEN2
709710
DYNAMIC_CORE += ARMV8SVE
710711
DYNAMIC_CORE += A64FX
711712
endif
713+
ifneq ($(NO_SME), 1)
714+
DYNAMIC_CORE += ARMV9SME
715+
endif
712716
DYNAMIC_CORE += THUNDERX
713717
DYNAMIC_CORE += THUNDERX2T99
714718
DYNAMIC_CORE += TSV110
@@ -1472,6 +1476,10 @@ ifeq ($(NO_SVE), 1)
14721476
CCOMMON_OPT += -DNO_SVE
14731477
endif
14741478

1479+
ifeq ($(NO_SME), 1)
1480+
CCOMMON_OPT += -DNO_SME
1481+
endif
1482+
14751483
ifdef SMP
14761484
CCOMMON_OPT += -DSMP_SERVER
14771485

TargetList.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,7 @@ THUNDERX3T110
111111
VORTEX
112112
A64FX
113113
ARMV8SVE
114+
ARMV9SME
114115
FT2000
115116

116117
9.System Z:

c_check

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -334,6 +334,24 @@ if [ "$architecture" = "arm64" ]; then
334334
rm -rf "$tmpd"
335335
fi
336336

337+
no_sme=0
338+
if [ "$architecture" = "arm64" ]; then
339+
tmpd=$(mktemp -d 2>/dev/null || mktemp -d -t 'OBC')
340+
tmpf="$tmpd/a.S"
341+
printf ".text \n.global sme_test\n\nsme_test:\nsmstart\nsmstop\nret\n">> "$tmpf"
342+
args=" -march=armv9-a+sve2+sme -c -o $tmpf.o $tmpf"
343+
no_sme=0
344+
{
345+
$compiler_name $flags $args >/dev/null 2>&1
346+
} || {
347+
args=" -march=armv9-a+sme -c -o $tmpf.o $tmpf"
348+
$compiler_name $flags $args >/dev/null 2>&1
349+
} || {
350+
no_sme=1
351+
}
352+
rm -rf "$tmpd"
353+
fi
354+
337355
c11_atomics=0
338356
case "$data" in
339357
*HAVE_C11*)
@@ -475,6 +493,7 @@ done
475493
printf "CEXTRALIB=%s %s %s\n" "$linker_L" "$linker_l" "$linker_a"
476494
[ "$no_msa" -eq 1 ] && printf "NO_MSA=1\n"
477495
[ "$no_sve" -eq 1 ] && printf "NO_SVE=1\n"
496+
[ "$no_sme" -eq 1 ] && printf "NO_SME=1\n"
478497
[ "$no_rv64gv" -eq 1 ] && printf "NO_RV64GV=1\n"
479498
[ "$no_avx512" -eq 1 ] && printf "NO_AVX512=1\n"
480499
[ "$no_avx512bf" -eq 1 ] && printf "NO_AVX512BF16=1\n"

cmake/arch.cmake

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,9 +44,21 @@ endif ()
4444

4545
if (DYNAMIC_ARCH)
4646
if (ARM64)
47-
set(DYNAMIC_CORE ARMV8 CORTEXA53 CORTEXA57 THUNDERX THUNDERX2T99 TSV110 EMAG8180 NEOVERSEN1 THUNDERX3T110)
48-
if (${CMAKE_C_COMPILER_VERSION} VERSION_GREATER 9.99)
49-
set(DYNAMIC_CORE ${DYNAMIC_CORE} NEOVERSEV1 NEOVERSEN2 ARMV8SVE A64FX)
47+
set(DYNAMIC_CORE ARMV8 CORTEXA53 CORTEXA57 THUNDERX THUNDERX2T99 TSV110 EMAG8180 NEOVERSEN1 THUNDERX3T110)
48+
if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU")
49+
if (${CMAKE_C_COMPILER_VERSION} VERSION_GREATER_EQUAL 10) # SVE ACLE supported in GCC >= 10
50+
set(DYNAMIC_CORE ${DYNAMIC_CORE} NEOVERSEV1 NEOVERSEN2 ARMV8SVE A64FX)
51+
endif ()
52+
if (${CMAKE_C_COMPILER_VERSION} VERSION_GREATER_EQUAL 14) # SME ACLE supported in GCC >= 14
53+
set(DYNAMIC_CORE ${DYNAMIC_CORE} ARMV9SME)
54+
endif()
55+
elseif (${CMAKE_C_COMPILER_ID} MATCHES "Clang")
56+
if (${CMAKE_C_COMPILER_VERSION} VERSION_GREATER_EQUAL 11) # SVE ACLE supported in LLVM >= 11
57+
set(DYNAMIC_CORE ${DYNAMIC_CORE} NEOVERSEV1 NEOVERSEN2 ARMV8SVE A64FX)
58+
endif ()
59+
if (${CMAKE_C_COMPILER_VERSION} VERSION_GREATER_EQUAL 19) # SME ACLE supported in LLVM >= 19
60+
set(DYNAMIC_CORE ${DYNAMIC_CORE} ARMV9SME)
61+
endif()
5062
endif ()
5163
if (DYNAMIC_LIST)
5264
set(DYNAMIC_CORE ARMV8 ${DYNAMIC_LIST})

cmake/cc.cmake

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -238,6 +238,12 @@ if (${CORE} STREQUAL ARMV8SVE)
238238
endif ()
239239
endif ()
240240

241+
if (${CORE} STREQUAL ARMV9SME)
242+
if (NOT DYNAMIC_ARCH)
243+
set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv9-a+sme")
244+
endif ()
245+
endif ()
246+
241247
if (${CORE} STREQUAL CORTEXA510)
242248
if (NOT DYNAMIC_ARCH)
243249
set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8-a+sve")

cmake/prebuild.cmake

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1014,7 +1014,7 @@ endif ()
10141014
set(ZGEMM_UNROLL_M 4)
10151015
set(ZGEMM_UNROLL_N 4)
10161016
set(SYMV_P 16)
1017-
elseif ("${TCORE}" STREQUAL "NEOVERSEN2")
1017+
elseif ("${TCORE}" STREQUAL "NEOVERSEN2" or "${TCORE}" STREQUAL "ARMV9SME")
10181018
file(APPEND ${TARGET_CONF_TEMP}
10191019
"#define L1_CODE_SIZE\t65536\n"
10201020
"#define L1_CODE_LINESIZE\t64\n"

cmake/system.cmake

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -318,6 +318,9 @@ if (${TARGET} STREQUAL NEOVERSEV1)
318318
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=armv8.2-a+sve")
319319
endif()
320320
endif()
321+
if (${TARGET} STREQUAL ARMV9SME)
322+
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=armv9-a+sme -O3")
323+
endif()
321324
if (${TARGET} STREQUAL A64FX)
322325
if (${CMAKE_C_COMPILER_ID} STREQUAL "PGI" AND NOT NO_SVE)
323326
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -Msve-intrinsics -march=armv8.2-a+sve -mtune=a64fx")

cmake/system_check.cmake

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,17 @@ endif()
139139
endif()
140140
endif()
141141

142+
if (ARM64)
143+
if (NOT NO_SME)
144+
file(WRITE ${PROJECT_BINARY_DIR}/sme.c ".text \n.global sme_test\n\nsme_test:\nsmstart\nsmstop\nret\n")
145+
execute_process(COMMAND ${CMAKE_C_COMPILER} -march=armv9-a+sve2+sme -c -v -o ${PROJECT_BINARY_DIR}/sme.o ${PROJECT_BINARY_DIR}/sme.c OUTPUT_QUIET ERROR_QUIET RESULT_VARIABLE NO_SME)
146+
if (NO_SME EQUAL 1)
147+
set (CCOMMON_OPT "${CCOMMON_OPT} -DNO_SME")
148+
endif()
149+
file(REMOVE "${PROJECT_BINARY_DIR}/sme.c" "${PROJECT_BINARY_DIR}/sme.o")
150+
endif()
151+
endif()
152+
142153
include(CheckIncludeFile)
143154
CHECK_INCLUDE_FILE("stdatomic.h" HAVE_C11)
144155
if (HAVE_C11 EQUAL 1)

common.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -702,6 +702,7 @@ void gotoblas_profile_init(void);
702702
void gotoblas_profile_quit(void);
703703

704704
int support_avx512(void);
705+
int support_sme1(void);
705706

706707
#ifdef USE_OPENMP
707708

0 commit comments

Comments
 (0)