Skip to content

Commit

Permalink
Update the Arm Optimized Routine library to v25.01
Browse files Browse the repository at this point in the history
Sponsored by:	Arm Ltd
  • Loading branch information
zxombie committed Jan 10, 2025
2 parents 16f0d01 + 9d1de25 commit f3087be
Show file tree
Hide file tree
Showing 472 changed files with 11,852 additions and 14,525 deletions.
9 changes: 3 additions & 6 deletions contrib/arm-optimized-routines/MAINTAINERS
Original file line number Diff line number Diff line change
@@ -1,12 +1,9 @@
/
Szabolcs Nagy <szabolcs.nagy@arm.com>
Tamar Christina <tamar.christina@arm.com>
math/
Szabolcs Nagy <szabolcs.nagy@arm.com>
networking/
Szabolcs Nagy <szabolcs.nagy@arm.com>
pl/
Pierre Blanchard <pierre.blanchard@arm.com>
Joe Ramsay <joe.ramsay@arm.com>
networking/
Ola Liljedahl <ola.liljedahl@arm.com>
string/
Szabolcs Nagy <szabolcs.nagy@arm.com>
Wilco Dijkstra <wilco.dijkstra@arm.com>
16 changes: 12 additions & 4 deletions contrib/arm-optimized-routines/Makefile
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Makefile - requires GNU make
#
# Copyright (c) 2018-2022, Arm Limited.
# Copyright (c) 2018-2024, Arm Limited.
# SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception

srcdir = .
Expand All @@ -11,7 +11,6 @@ includedir = $(prefix)/include

# Configure these in config.mk, do not make changes in this file.
SUBS = math string networking
PLSUBS = math
HOST_CC = cc
HOST_CFLAGS = -std=c99 -O2
HOST_LDFLAGS =
Expand All @@ -21,12 +20,22 @@ CPPFLAGS =
CFLAGS = -std=c99 -O2
CFLAGS_SHARED = -fPIC
CFLAGS_ALL = -Ibuild/include $(CPPFLAGS) $(CFLAGS)
CFLAGS_PL = -Ibuild/pl/include $(CPPFLAGS) $(CFLAGS) -DPL
LDFLAGS =
LDLIBS =
AR = $(CROSS_COMPILE)ar
RANLIB = $(CROSS_COMPILE)ranlib
INSTALL = install
# Detect OS.
# Assume Unix environment: Linux, Darwin, or Msys.
OS := $(shell uname -s)
OS := $(patsubst MSYS%,Msys,$(OS))
# Following math dependencies can be adjusted in config file
# if necessary, e.g. for Msys.
libm-libs = -lm
libc-libs = -lc
mpfr-libs = -lmpfr
gmp-libs = -lgmp
mpc-libs = -lmpc

all:

Expand All @@ -53,7 +62,6 @@ $(DIRS):
mkdir -p $@

$(filter %.os,$(ALL_FILES)): CFLAGS_ALL += $(CFLAGS_SHARED)
$(filter %.os,$(ALL_FILES)): CFLAGS_PL += $(CFLAGS_SHARED)

build/%.o: $(srcdir)/%.S
$(CC) $(CFLAGS_ALL) -c -o $@ $<
Expand Down
33 changes: 30 additions & 3 deletions contrib/arm-optimized-routines/README
Original file line number Diff line number Diff line change
Expand Up @@ -12,22 +12,42 @@ contribution requirements are documented in README.contributors of
the appropriate subdirectory.

Regular quarterly releases are tagged as vYY.MM, the latest
release is v24.01.
release is v25.01.

Source code layout:

build/ - build directory (created by make).
math/ - math subproject sources.
math/ - math subproject sources for generic scalar
subroutines and sources shared with
subdirectories of math/.
All math routines should meet the quality
requirements stated in math/README.contributors,
routines that fail to do so are located in an
experimental/ directory.
math/aarch64/ - math subproject AArch64-specific sources
and sources shared with subdirectories.
math/aarch64/advsimd - AdvSIMD-specific math sources.
math/aarch64/experimental - Experimental math sources do not
meet quality requirements stated in
math/README.contributors.
math/aarch64/sve - SVE-specific math sources.
math/include/ - math library public headers.
math/test/ - math test and benchmark related sources.
math/tools/ - tools used for designing the algorithms.
networking/ - networking subproject sources.
networking/include/ - networking library public headers.
networking/test/ - networking test and benchmark related sources.
string/ - string routines subproject sources.
All string routines should meet the quality
requirements stated in string/README.contributors,
routines that fail to do so are located in an
experimental/ directory.
string/<arch> - <arch>-specific string routines sources for
<arch>=aarch64, and arm.
string/aarch64/experimental - Experimental string routines which
may not be fully optimized yet.
string/include/ - string library public headers.
string/test/ - string test and benchmark related sources.
pl/... - separately maintained performance library code.

The steps to build the target libraries and run the tests:

Expand All @@ -50,6 +70,13 @@ Or building and testing the math subproject only:
make all-math
make check-math

Note on compiler compability/requirement:

SVE routines are always built by default - this means that on AArch64
GCC >= 10 or LLVM >= 5 are always required for SVE ACLE compatibility.
There is no explicit check for compatible compiler, therefore the SVE
routines will fail to build if CC is too old.

The test system requires libmpfr and libmpc.
For example on debian linux they can be installed as:

Expand Down
99 changes: 77 additions & 22 deletions contrib/arm-optimized-routines/config.mk.dist
Original file line number Diff line number Diff line change
@@ -1,14 +1,11 @@
# Example config.mk
#
# Copyright (c) 2018-2023, Arm Limited.
# Copyright (c) 2018-2024, Arm Limited.
# SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception

# Subprojects to build
SUBS = math string networking

# Subsubprojects to build if subproject pl is built
PLSUBS = math

# Target architecture: aarch64, arm or x86_64
ARCH = aarch64

Expand All @@ -30,6 +27,27 @@ HOST_CFLAGS += -Wall -Wno-unused-function
HOST_CFLAGS += -g
CFLAGS += -g

ifeq ($(OS),Msys)
# llvm is the only available/valid native compiler
CC = clang
AR = llvm-ar
RANLIB = llvm-ranlib
HOST_CC = clang
SYSROOT = /c/wenv/msys2/msys64/clangarm64
# Common windows flags
COMMON_WIN_CFLAGS = -D_CRT_SECURE_NO_WARNINGS -D_CRT_NONSTDC_NO_DEPRECATE
COMMON_WIN_CFLAGS += -Wno-deprecated-declarations -Wno-unused-variable
# For mathtest
HOST_CFLAGS += -I$(SYSROOT)/include
HOST_CFLAGS += $(COMMON_WIN_CFLAGS) -Wno-ignored-attributes
# Clear the default flag -fPIC, as not supported on Windows
CFLAGS_SHARED =
# For ulp.h with MPFR
CFLAGS += -I$(SYSROOT)/include
# For clang on Windows
CFLAGS += $(COMMON_WIN_CFLAGS)
endif

# Optimize the shared libraries on aarch64 assuming they fit in 1M.
#CFLAGS_SHARED = -fPIC -mcmodel=tiny

Expand All @@ -45,12 +63,33 @@ math-cflags =
math-ldlibs =
math-ulpflags =
math-testflags =
string-cflags =
string-cflags = -falign-functions=64
networking-cflags =

# Use if mpfr is available on the target for ulp error checking.
#math-ldlibs += -lmpfr -lgmp
#math-cflags += -DUSE_MPFR
ifeq ($(OS),Msys)
# Libraries can be installed with pacman
libm-libs = -lmsvcrt -lvcruntime -lucrt
libc-libs =
# Linker will look for .lib but some systems only have .dll.a,
# therefore we have to give absolute path to libraries.
# This is system dependent and might need adjusting.
mpfr-libs = $(SYSROOT)/lib/libmpfr.dll.a
gmp-libs = $(SYSROOT)/lib/libgmp.dll.a
mpc-libs = $(SYSROOT)/lib/libmpc.dll.a
endif

# Use if mpfr is available on the target for ulp error checking. If
# enabling this, it is advised to disable fenv checks by uncommenting
# the two lines at the bottom of this block.
USE_MPFR=0
math-cflags += -DUSE_MPFR=$(USE_MPFR)
ifeq ($(USE_MPFR), 1)
math-ldlibs += $(mpfr-libs) $(gmp-libs)
math-ulpflags += -m
endif
# Disable fenv checks
#math-ulpflags = -q -f
#math-testflags = -nostatus

# Use with gcc.
math-cflags += -frounding-math -fexcess-precision=standard -fno-stack-protector
Expand All @@ -59,30 +98,36 @@ math-cflags += -ffp-contract=fast -fno-math-errno
# Use with clang.
#math-cflags += -ffp-contract=fast

# Disable/enable SVE vector math code and tests.
# If WANT_SVE_MATH is enabled, math-sve-cflags is added for SVE
# routines only so that SVE code does not leak into scalar
# routines. It is also necessary to add it for tools (e.g. ulp,
# mathbench)
WANT_SVE_MATH = 0
ifeq ($(WANT_SVE_MATH), 1)
math-sve-cflags = -march=armv8-a+sve
endif
math-cflags += -DWANT_SVE_MATH=$(WANT_SVE_MATH)

# If defined to 1, set errno in math functions according to ISO C. Many math
# libraries do not set errno, so this is 0 by default. It may need to be
# set to 1 if math.h has (math_errhandling & MATH_ERRNO) != 0.
WANT_ERRNO = 0
math-cflags += -DWANT_ERRNO=$(WANT_ERRNO)

# Disable/enable SVE vector math tests/tools.
ifeq ($(ARCH),aarch64)
WANT_SVE_TESTS = 1
else
WANT_SVE_TESTS = 0
endif
math-cflags += -DWANT_SVE_TESTS=$(WANT_SVE_TESTS)

# If set to 1, set fenv in vector math routines.
WANT_SIMD_EXCEPT = 0
math-cflags += -DWANT_SIMD_EXCEPT=$(WANT_SIMD_EXCEPT)

# Disable fenv checks
#math-ulpflags = -q -f
#math-testflags = -nostatus
# If set to 1, enable tests for exp10.
WANT_EXP10_TESTS = 1
math-cflags += -DWANT_EXP10_TESTS=$(WANT_EXP10_TESTS)

# If set to 1, enable tests for sinpi and cospi. These functions are
# only supported on aarch64
ifeq ($(ARCH),aarch64)
WANT_TRIGPI_TESTS = 1
else
WANT_TRIGPI_TESTS = 0
endif
math-cflags += -DWANT_TRIGPI_TESTS=$(WANT_TRIGPI_TESTS)

# Remove GNU Property Notes from asm files.
#string-cflags += -DWANT_GNU_PROPERTY=0
Expand All @@ -92,3 +137,13 @@ math-cflags += -DWANT_SIMD_EXCEPT=$(WANT_SIMD_EXCEPT)

# Avoid auto-vectorization of scalar code and unroll loops
networking-cflags += -O2 -fno-tree-vectorize -funroll-loops

# Provide *_finite symbols and some of the glibc hidden symbols
# so libmathlib can be used with binaries compiled against glibc
# to interpose math functions with both static and dynamic linking
USE_GLIBC_ABI = 1
math-cflags += -DUSE_GLIBC_ABI=$(USE_GLIBC_ABI)

# Enable experimental math routines - non-C23 vector math and low-accuracy scalar
WANT_EXPERIMENTAL_MATH = 0
math-cflags += -DWANT_EXPERIMENTAL_MATH=$(WANT_EXPERIMENTAL_MATH)
Loading

0 comments on commit f3087be

Please sign in to comment.