Skip to content

Commit

Permalink
Add POWER9 VSX toolchains
Browse files Browse the repository at this point in the history
Translating x86_64 SSE to ppc64le VSX intrinsics yields a quite large
speedup on POWER9. See this article for background:

https://www.talospace.com/2019/07/easier-power-vectorizing-for-fun-and.html
  • Loading branch information
Jeremy Rand committed Jul 3, 2023
1 parent a7fa19c commit 880eb24
Show file tree
Hide file tree
Showing 5 changed files with 153 additions and 0 deletions.
92 changes: 92 additions & 0 deletions .github/workflows/linux-ppc64-cpu-gcc.yml
Original file line number Diff line number Diff line change
Expand Up @@ -73,3 +73,95 @@ jobs:
export PATH=$GITHUB_WORKSPACE/qemu-install/bin:$PATH
cd build
TESTS_EXECUTABLE_LOADER=qemu-ppc64le TESTS_EXECUTABLE_LOADER_ARGUMENTS="-L;/usr/powerpc64le-linux-gnu" ctest --output-on-failure -j 2
linux-gcc-power9le-vsx:
runs-on: ubuntu-20.04
steps:
- uses: actions/checkout@v3

- name: cache-qemu
id: cache-qemu
uses: actions/cache@v3
with:
path: qemu-install
key: qemu-ppc64le-install-20220502-2
- name: install-qemu-build-deps
if: steps.cache-qemu.outputs.cache-hit != 'true'
run: |
sudo apt-get update
sudo apt-get install autoconf automake autotools-dev ninja-build
- name: checkout-qemu
if: steps.cache-qemu.outputs.cache-hit != 'true'
uses: actions/checkout@v3
with:
repository: qemu/qemu
path: qemu
ref: f5643914a9e8f79c606a76e6a9d7ea82a3fc3e65
- name: qemu
if: steps.cache-qemu.outputs.cache-hit != 'true'
run: |
cd qemu
./configure --prefix=$GITHUB_WORKSPACE/qemu-install --target-list=ppc64le-linux-user --disable-system
make -j2
make install
- name: powerpc64le-gnu-toolchain
run: |
sudo apt-get update
sudo apt-get install g++-powerpc64le-linux-gnu
- name: configure
run: mkdir build && cd build && cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/power9le-linux-gnu-vsx.toolchain.cmake -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_BUILD_TESTS=ON ..
- name: build
run: cmake --build build -j 2

- name: test
run: |
export PATH=$GITHUB_WORKSPACE/qemu-install/bin:$PATH
cd build
TESTS_EXECUTABLE_LOADER=qemu-ppc64le TESTS_EXECUTABLE_LOADER_ARGUMENTS="-L;/usr/powerpc64le-linux-gnu" ctest --output-on-failure -j 2
linux-clang-power9le-vsx:
runs-on: ubuntu-20.04
steps:
- uses: actions/checkout@v3

- name: cache-qemu
id: cache-qemu
uses: actions/cache@v3
with:
path: qemu-install
key: qemu-ppc64le-install-20220502-2
- name: install-qemu-build-deps
if: steps.cache-qemu.outputs.cache-hit != 'true'
run: |
sudo apt-get update
sudo apt-get install autoconf automake autotools-dev ninja-build
- name: checkout-qemu
if: steps.cache-qemu.outputs.cache-hit != 'true'
uses: actions/checkout@v3
with:
repository: qemu/qemu
path: qemu
ref: f5643914a9e8f79c606a76e6a9d7ea82a3fc3e65
- name: qemu
if: steps.cache-qemu.outputs.cache-hit != 'true'
run: |
cd qemu
./configure --prefix=$GITHUB_WORKSPACE/qemu-install --target-list=ppc64le-linux-user --disable-system
make -j2
make install
- name: powerpc64le-gnu-toolchain
run: |
sudo apt-get update
sudo apt-get install g++-powerpc64le-linux-gnu clang libc6-dev-ppc64el-cross
- name: configure
run: mkdir build && cd build && cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/power9le-linux-gnu-vsx.clang.toolchain.cmake -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_BUILD_TESTS=ON ..
- name: build
run: cmake --build build -j 2

- name: test
run: |
export PATH=$GITHUB_WORKSPACE/qemu-install/bin:$PATH
cd build
TESTS_EXECUTABLE_LOADER=qemu-ppc64le TESTS_EXECUTABLE_LOADER_ARGUMENTS="-L;/usr/powerpc64le-linux-gnu" ctest --output-on-failure -j 2
14 changes: 14 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -364,6 +364,20 @@ elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(riscv)")
endif()
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(powerpc|ppc)")
set(NCNN_TARGET_ARCH powerpc)

if(NCNN_PPC64LE_VSX)
set(NCNN_TARGET_ARCH x86)

set(CMAKE_REQUIRED_FLAGS "-DNO_WARN_X86_INTRINSICS -D__SSE4_1__")
check_cxx_source_compiles("#include <smmintrin.h>\nint main() { __m128i _v, _a, _b; _v = _mm_packus_epi32(_a, _b); return 0; }" NCNN_COMPILER_SUPPORT_PPC64LE_SSE4_1)
unset(CMAKE_REQUIRED_FLAGS)

if(NCNN_COMPILER_SUPPORT_PPC64LE_SSE4_1)
option(NCNN_SSE4_1 "optimize ppc64le platform with sse4.1 extension" ON)
else()
message(WARNING "The compiler does not support sse4.1 extension. NCNN_SSE4_1 will be OFF.")
endif()
endif()
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(xtensa)")
set(NCNN_TARGET_ARCH xtensa)
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(s390x)")
Expand Down
7 changes: 7 additions & 0 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -502,6 +502,13 @@ if(NCNN_TARGET_ARCH STREQUAL "riscv" AND NOT C906)
endif()
endif()

if(NCNN_PPC64LE_VSX)
# Auto-translate SSE4.1 to VSX if compiler is new enough.
if(NCNN_SSE4_1)
target_compile_options(ncnn PRIVATE -DNO_WARN_X86_INTRINSICS -D__SSE4_1__)
endif()
endif()

if(NCNN_COVERAGE)
target_compile_options(ncnn PUBLIC -coverage -fprofile-arcs -ftest-coverage)
target_link_libraries(ncnn PUBLIC -coverage -lgcov)
Expand Down
20 changes: 20 additions & 0 deletions toolchains/power9le-linux-gnu-vsx.clang.toolchain.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
set(CMAKE_SYSTEM_NAME Linux)
set(CMAKE_SYSTEM_PROCESSOR powerpc64le)

set(CMAKE_C_COMPILER "clang")
set(CMAKE_CXX_COMPILER "clang++")

set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)

set(CMAKE_C_FLAGS "-target powerpc64le-linux-gnu -I/usr/powerpc64le-linux-gnu/include -mcpu=power9 -mtune=power9 -DNO_WARN_X86_INTRINSICS -D__MMX__ -D__SSE__ -D__SSE2__ -D__SSSE3__")
set(CMAKE_CXX_FLAGS "-target powerpc64le-linux-gnu -I/usr/powerpc64le-linux-gnu/include -mcpu=power9 -mtune=power9 -DNO_WARN_X86_INTRINSICS -D__MMX__ -D__SSE__ -D__SSE2__ -D__SSSE3__")

# cache flags
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS}" CACHE STRING "c flags")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}" CACHE STRING "c++ flags")

# Auto-translate SSE to VSX
set(NCNN_TARGET_ARCH x86)
set(NCNN_PPC64LE_VSX ON)
20 changes: 20 additions & 0 deletions toolchains/power9le-linux-gnu-vsx.toolchain.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
set(CMAKE_SYSTEM_NAME Linux)
set(CMAKE_SYSTEM_PROCESSOR powerpc64le)

set(CMAKE_C_COMPILER "powerpc64le-linux-gnu-gcc")
set(CMAKE_CXX_COMPILER "powerpc64le-linux-gnu-g++")

set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)

set(CMAKE_C_FLAGS "-mcpu=power9 -mtune=power9 -DNO_WARN_X86_INTRINSICS -D__MMX__ -D__SSE__ -D__SSE2__ -D__SSSE3__")
set(CMAKE_CXX_FLAGS "-mcpu=power9 -mtune=power9 -DNO_WARN_X86_INTRINSICS -D__MMX__ -D__SSE__ -D__SSE2__ -D__SSSE3__")

# cache flags
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS}" CACHE STRING "c flags")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}" CACHE STRING "c++ flags")

# Auto-translate SSE to VSX
set(NCNN_TARGET_ARCH x86)
set(NCNN_PPC64LE_VSX ON)

0 comments on commit 880eb24

Please sign in to comment.