Skip to content

arm neon optimzation for rmsnorm (#5668) #4931

arm neon optimzation for rmsnorm (#5668)

arm neon optimzation for rmsnorm (#5668) #4931

name: linux-aarch64-cpu-gcc
on:
push:
branches: [master]
paths:
- '.github/workflows/linux-aarch64-cpu-gcc.yml'
- 'toolchains/aarch64-linux-gnu.toolchain.cmake'
- 'CMakeLists.txt'
- 'cmake/**'
- 'src/*'
- 'src/layer/*'
- 'src/layer/arm/**'
- 'tests/**'
pull_request:
branches: [master]
paths:
- '.github/workflows/linux-aarch64-cpu-gcc.yml'
- 'toolchains/aarch64-linux-gnu.toolchain.cmake'
- 'CMakeLists.txt'
- 'cmake/**'
- 'src/*'
- 'src/layer/*'
- 'src/layer/arm/**'
- 'tests/**'
concurrency:
group: linux-aarch64-cpu-gcc-${{ github.ref }}
cancel-in-progress: true
permissions:
contents: read
jobs:
linux-gcc:
runs-on: ubuntu-20.04
steps:
- uses: actions/checkout@v4
- name: cache-qemu
id: cache-qemu
uses: actions/cache@v4
with:
path: qemu-install
key: qemu-aarch64-install-20220502-ubuntu-2004-2
- name: install-qemu-build-deps
if: steps.cache-qemu.outputs.cache-hit != 'true'
run: |
sudo apt-get update
sudo apt-get install autoconf automake autotools-dev ninja-build
- name: checkout-qemu
if: steps.cache-qemu.outputs.cache-hit != 'true'
uses: actions/checkout@v4
with:
repository: qemu/qemu
path: qemu
ref: f5643914a9e8f79c606a76e6a9d7ea82a3fc3e65
- name: qemu
if: steps.cache-qemu.outputs.cache-hit != 'true'
run: |
cd qemu
./configure --prefix=$GITHUB_WORKSPACE/qemu-install --target-list=aarch64-linux-user --disable-system
make -j$(nproc)
make install
- name: aarch64-gnu-toolchain
run: |
sudo apt-get update
sudo apt-get install g++-aarch64-linux-gnu
- name: build
run: |
mkdir build && cd build
cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/aarch64-linux-gnu.toolchain.cmake -DNCNN_ARM82=OFF -DNCNN_ARM82DOT=OFF -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_BUILD_TESTS=ON ..
cmake --build . -j $(nproc)
- name: test
run: |
export PATH=$GITHUB_WORKSPACE/qemu-install/bin:$PATH
cd build
TESTS_EXECUTABLE_LOADER=qemu-aarch64 TESTS_EXECUTABLE_LOADER_ARGUMENTS="-L;/usr/aarch64-linux-gnu" ctest --output-on-failure -j $(nproc)
- name: build-noint8
run: |
mkdir build-noint8 && cd build-noint8
cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/aarch64-linux-gnu.toolchain.cmake -DNCNN_ARM82=OFF -DNCNN_ARM82DOT=OFF -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_BUILD_TESTS=ON ..
cmake --build . -j $(nproc)
- name: test-noint8
run: |
export PATH=$GITHUB_WORKSPACE/qemu-install/bin:$PATH
cd build-noint8
TESTS_EXECUTABLE_LOADER=qemu-aarch64 TESTS_EXECUTABLE_LOADER_ARGUMENTS="-L;/usr/aarch64-linux-gnu" ctest --output-on-failure -j $(nproc)
- name: build-simplestl-simplemath
run: |
mkdir build-simplestl-simplemath && cd build-simplestl-simplemath
cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/aarch64-linux-gnu-c.toolchain.cmake -DNCNN_STDIO=ON -DNCNN_STRING=ON -DNCNN_SIMPLESTL=ON -DNCNN_SIMPLEMATH=ON -DNCNN_BUILD_TESTS=ON -DNCNN_BUILD_BENCHMARK=OFF -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF ..
cmake --build . -j $(nproc)
- name: test-simplestl-simplemath
run: |
export PATH=$GITHUB_WORKSPACE/qemu-install/bin:$PATH
cd build-simplestl-simplemath
TESTS_EXECUTABLE_LOADER=qemu-aarch64 TESTS_EXECUTABLE_LOADER_ARGUMENTS="-L;/usr/aarch64-linux-gnu" ctest --output-on-failure -j $(nproc)
linux-gcc-arm82:
runs-on: ubuntu-20.04
steps:
- uses: actions/checkout@v4
- name: cache-qemu
id: cache-qemu
uses: actions/cache@v4
with:
path: qemu-install
key: qemu-aarch64-install-20220502-ubuntu-2004-2
- name: install-qemu-build-deps
if: steps.cache-qemu.outputs.cache-hit != 'true'
run: |
sudo apt-get update
sudo apt-get install autoconf automake autotools-dev ninja-build
- name: checkout-qemu
if: steps.cache-qemu.outputs.cache-hit != 'true'
uses: actions/checkout@v4
with:
repository: qemu/qemu
path: qemu
ref: f5643914a9e8f79c606a76e6a9d7ea82a3fc3e65
- name: qemu
if: steps.cache-qemu.outputs.cache-hit != 'true'
run: |
cd qemu
./configure --prefix=$GITHUB_WORKSPACE/qemu-install --target-list=aarch64-linux-user --disable-system
make -j$(nproc)
make install
- name: aarch64-gnu-toolchain
run: |
sudo apt-get update
sudo apt-get install g++-aarch64-linux-gnu
- name: build
run: |
mkdir build && cd build
cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/aarch64-linux-gnu.toolchain.cmake -DNCNN_ARM82=ON -DNCNN_ARM82DOT=ON -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_BUILD_TESTS=ON ..
cmake --build . -j $(nproc)
- name: test
run: |
export PATH=$GITHUB_WORKSPACE/qemu-install/bin:$PATH
cd build
TESTS_EXECUTABLE_LOADER=qemu-aarch64 TESTS_EXECUTABLE_LOADER_ARGUMENTS="-L;/usr/aarch64-linux-gnu" ctest --output-on-failure -j $(nproc)
- name: build-noint8
run: |
mkdir build-noint8 && cd build-noint8
cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/aarch64-linux-gnu.toolchain.cmake -DNCNN_ARM82=ON -DNCNN_ARM82DOT=ON -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_BUILD_TESTS=ON -DNCNN_INT8=OFF ..
cmake --build . -j $(nproc)
- name: test-noint8
run: |
export PATH=$GITHUB_WORKSPACE/qemu-install/bin:$PATH
cd build-noint8
TESTS_EXECUTABLE_LOADER=qemu-aarch64 TESTS_EXECUTABLE_LOADER_ARGUMENTS="-L;/usr/aarch64-linux-gnu" ctest --output-on-failure -j $(nproc)
linux-gcc-arm86:
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v4
- name: cache-qemu
id: cache-qemu
uses: actions/cache@v4
with:
path: qemu-install
key: qemu-aarch64-install-20230717
- name: install-qemu-build-deps
if: steps.cache-qemu.outputs.cache-hit != 'true'
run: |
sudo apt-get update
sudo apt-get install autoconf automake autotools-dev ninja-build
- name: checkout-qemu
if: steps.cache-qemu.outputs.cache-hit != 'true'
uses: actions/checkout@v4
with:
repository: qemu/qemu
path: qemu
ref: ed8ad9728a9c0eec34db9dff61dfa2f1dd625637
- name: qemu
if: steps.cache-qemu.outputs.cache-hit != 'true'
run: |
cd qemu
./configure --prefix=$GITHUB_WORKSPACE/qemu-install --target-list=aarch64-linux-user --disable-system
make -j$(nproc)
make install
- name: aarch64-gnu-toolchain
run: |
sudo apt-get update
sudo apt-get install g++-aarch64-linux-gnu
- name: build
run: |
mkdir build && cd build
cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/aarch64-linux-gnu.toolchain.cmake -DNCNN_ARM82=ON -DNCNN_ARM82DOT=ON -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_BUILD_TESTS=ON ..
cmake --build . -j $(nproc)
- name: test
run: |
export PATH=$GITHUB_WORKSPACE/qemu-install/bin:$PATH
cd build
TESTS_EXECUTABLE_LOADER=qemu-aarch64 TESTS_EXECUTABLE_LOADER_ARGUMENTS="-L;/usr/aarch64-linux-gnu" ctest --output-on-failure -j $(nproc)