RVV: use new interface for segment load/store & change word_type to s…

…ize_t&add clang ci (part #4100) (#4118) * RVV: use size_t for vl * RVV: replace vsseg.v tuple type by using regex ----- search: vsseg([1-9])e(8|16|32)_v_(f|i|u)\2m(1|2|4|8)x\1$([ -~]+), vcreate_\3\2m\4x\1\(([ -~]+)$, vl\); substitute by: vsseg$1e$2_v_$3$2m$4($5, $6, vl); * RVV: replace vssseg.v tuple types by using regex --- search: vssseg([1-9])e(8|16|32)_v_f\2m1x\1$([ -~]+), vcreate_f\2m1x\1\(([ -~]+)$, vl\); substitute by: vssseg$1e$2_v_f$2m1($3, $4, vl); * RVV: replace vlseg.v tuple types in load/store * RVV: replace vloxseg2ei32.v tuple types * RVV: add a wrapper for old compilers * RVV: add segment load/store wrapper in pakcing * RVV: fix cmake test * RVV: make clang happy by dropping VLAs in sgemm * RVV: add clang cmake toolchain configure * RVV: add clang ci, riscv64-unknown-linux-gnu Co-authored-by: thelastlin <thelastlin@users.noreply.github.com> Co-authored-by: nihui <shuizhuyuanluo@126.com>
Tencent · Oct 1, 2022 · e7eadca · e7eadca
1 parent 4f9e398
commit e7eadca
Show file tree

Hide file tree

Showing 85 changed files with 1,295 additions and 709 deletions.
diff --git a/.github/workflows/linux-riscv64-cpu-gcc.yml b/.github/workflows/linux-riscv64-cpu-gcc.yml
@@ -117,7 +117,7 @@ jobs:
       #id: cache-riscv
       #uses: actions/cache@v3
       #with:
-        #path: rv64gcv-install
+        #path: rv64gcv-install-next
         #key: rv64gcv-linux-install-20210504
 
     #- name: install-riscv-build-deps
@@ -132,42 +132,42 @@ jobs:
       #with:
         #repository: riscv/riscv-gnu-toolchain
         #path: riscv-gnu-toolchain
-        #ref: 28271f03bb538d926ad2889dc8ad1b0cb1b3b45c
+        #ref: da01ba455ce3802ffa84fdca3a089079996dbfc3
     #- name: checkout-riscv-gnu-toolchain-submodules
       #if: steps.cache-riscv.outputs.cache-hit != 'true'
       #run: |
         #cd riscv-gnu-toolchain
+        #git submodule update --init --recursive --depth 1 glibc
+        #git submodule update --init --recursive --depth 1 newlib
         #git submodule update --init --recursive --depth 1 riscv-binutils
         #git submodule update --init --recursive --depth 1 riscv-gcc
-        #git submodule update --init --recursive --depth 1 riscv-glibc
         #git submodule update --init --recursive --depth 1 riscv-dejagnu
-        #git submodule update --init --recursive --depth 1 riscv-newlib
         #git submodule update --init --recursive --depth 1 riscv-gdb
     #- name: riscv-gnu-toolchain
       #if: steps.cache-riscv.outputs.cache-hit != 'true'
       #run: |
         #cd riscv-gnu-toolchain
-        #sed -i '/__OBSOLETE_MATH/d' riscv-newlib/newlib/libm/common/math_errf.c
-        #./configure --prefix=$GITHUB_WORKSPACE/rv64gcv-install --with-arch=rv64gcv_zfh
+        #sed -i '/__OBSOLETE_MATH/d' newlib/newlib/libm/common/math_errf.c
+        #./configure --prefix=$GITHUB_WORKSPACE/rv64gcv-install-next --with-arch=rv64gcv_zfh
         #make linux
 
     #- name: riscv-strip-install
       #if: steps.cache-riscv.outputs.cache-hit != 'true'
-      #run: find $GITHUB_WORKSPACE/rv64gcv-install -type f | xargs -i strip -g {} || true
+      #run: find $GITHUB_WORKSPACE/rv64gcv-install-next -type f | xargs -i strip -g {} || true
 
     - name: configure
-      run: export RISCV_ROOT_PATH=/data/action/osd/rv64gcv-install && mkdir build && cd build && cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/riscv64-unknown-linux-gnu.toolchain.cmake -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_BUILD_TESTS=ON ..
+      run: export RISCV_ROOT_PATH=/data/action/osd/rv64gcv-install-next && mkdir build && cd build && cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/riscv64-unknown-linux-gnu.toolchain.cmake -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_BUILD_TESTS=ON ..
     - name: build
       run: cmake --build build -j 4
 
     - name: test-vlen256
       run: |
         export PATH=/data/action/osd/qemu-install/bin:$PATH
         cd build
-        TESTS_EXECUTABLE_LOADER=qemu-riscv64 TESTS_EXECUTABLE_LOADER_ARGUMENTS="-cpu;rv64,v=true,Zfh=true,vlen=256,elen=64,vext_spec=v1.0;-L;/data/action/osd/rv64gcv-install/sysroot" ctest --output-on-failure -j 4
+        TESTS_EXECUTABLE_LOADER=qemu-riscv64 TESTS_EXECUTABLE_LOADER_ARGUMENTS="-cpu;rv64,v=true,Zfh=true,vlen=256,elen=64,vext_spec=v1.0;-L;/data/action/osd/rv64gcv-install-next/sysroot" ctest --output-on-failure -j 4
 
     - name: test-vlen128
       run: |
         export PATH=/data/action/osd/qemu-install/bin:$PATH
         cd build
-        TESTS_EXECUTABLE_LOADER=qemu-riscv64 TESTS_EXECUTABLE_LOADER_ARGUMENTS="-cpu;rv64,v=true,Zfh=true,vlen=128,elen=64,vext_spec=v1.0;-L;/data/action/osd/rv64gcv-install/sysroot" ctest --output-on-failure -j 4
+        TESTS_EXECUTABLE_LOADER=qemu-riscv64 TESTS_EXECUTABLE_LOADER_ARGUMENTS="-cpu;rv64,v=true,Zfh=true,vlen=128,elen=64,vext_spec=v1.0;-L;/data/action/osd/rv64gcv-install-next/sysroot" ctest --output-on-failure -j 4
diff --git a/.github/workflows/linux-riscv64-cpu-gnu-clang.yml b/.github/workflows/linux-riscv64-cpu-gnu-clang.yml
@@ -0,0 +1,142 @@
+name: linux-riscv64-cpu-gnu-clang
+on:
+  push:
+    branches: [master]
+    paths:
+    - '.github/workflows/linux-riscv64-cpu-gnu-clang.yml'
+    - 'toolchains/riscv64-unknown-linux-gnu.llvm-toolchain.cmake'
+    - 'CMakeLists.txt'
+    - 'cmake/**'
+    - 'src/*'
+    - 'src/layer/*'
+    - 'src/layer/riscv/**'
+    - 'tests/**'
+  pull_request:
+    branches: [master]
+    paths:
+    - '.github/workflows/linux-riscv64-cpu-gnu-clang.yml'
+    - 'toolchains/riscv64-unknown-linux-gnu.llvm-toolchain.cmake'
+    - 'CMakeLists.txt'
+    - 'cmake/**'
+    - 'src/*'
+    - 'src/layer/*'
+    - 'src/layer/riscv/**'
+    - 'tests/**'
+concurrency:
+  group: linux-riscv64-cpu-gnu-clang-${{ github.ref }}
+  cancel-in-progress: true
+permissions:
+  contents: read
+
+jobs:
+  linux-gcc-riscv64-rvv:
+    runs-on: [self-hosted, linux, centos]
+    steps:
+    - uses: actions/checkout@v3
+
+    #- name: cache-qemu
+      #id: cache-qemu
+      #uses: actions/cache@v3
+      #with:
+        #path: qemu-install
+        #key: qemu-riscv64-install-20220502-3
+    #- name: install-qemu-build-deps
+      #if: steps.cache-qemu.outputs.cache-hit != 'true'
+      #run: |
+        #sudo apt-get update
+        #sudo apt-get install autoconf automake autotools-dev ninja-build
+    #- name: checkout-qemu
+      #if: steps.cache-qemu.outputs.cache-hit != 'true'
+      #uses: actions/checkout@v3
+      #with:
+        #repository: qemu/qemu
+        #path: qemu
+        #ref: f5643914a9e8f79c606a76e6a9d7ea82a3fc3e65
+    #- name: qemu
+      #if: steps.cache-qemu.outputs.cache-hit != 'true'
+      #run: |
+        #cd qemu
+        #wget https://raw.githubusercontent.com/nihui/ncnn-assets/master/qemu-patches/0007-linux-user-Expose-risc-v-V-isa-bit-in-get_elf_hwcap.patch
+        #patch -p1 -i 0007-linux-user-Expose-risc-v-V-isa-bit-in-get_elf_hwcap.patch
+        #./configure --prefix=$GITHUB_WORKSPACE/qemu-install --target-list=riscv64-linux-user --disable-system
+        #make -j2
+        #make install
+
+    #- name: cache-riscv
+      #id: cache-riscv
+      #uses: actions/cache@v3
+      #with:
+        #path: rv64gcv-install-next
+        #key: rv64gcv-linux-install-20210504
+
+    #- name: install-riscv-build-deps
+      #if: steps.cache-riscv.outputs.cache-hit != 'true'
+      #run: |
+        #sudo apt-get update
+        #sudo apt-get install autoconf automake autotools-dev curl python3 libmpc-dev libmpfr-dev libgmp-dev gawk build-essential bison flex texinfo gperf libtool patchutils bc zlib1g-dev libexpat-dev device-tree-compiler
+
+    #- name: checkout-riscv-gnu-toolchain
+      #if: steps.cache-riscv.outputs.cache-hit != 'true'
+      #uses: actions/checkout@v3
+      #with:
+        #repository: riscv/riscv-gnu-toolchain
+        #path: riscv-gnu-toolchain
+        #ref: da01ba455ce3802ffa84fdca3a089079996dbfc3
+    #- name: checkout-riscv-gnu-toolchain-submodules
+      #if: steps.cache-riscv.outputs.cache-hit != 'true'
+      #run: |
+        #cd riscv-gnu-toolchain
+        #git submodule update --init --recursive --depth 1 glibc
+        #git submodule update --init --recursive --depth 1 newlib
+        #git submodule update --init --recursive --depth 1 riscv-binutils
+        #git submodule update --init --recursive --depth 1 riscv-gcc
+        #git submodule update --init --recursive --depth 1 riscv-dejagnu
+        #git submodule update --init --recursive --depth 1 riscv-gdb
+    #- name: riscv-gnu-toolchain
+      #if: steps.cache-riscv.outputs.cache-hit != 'true'
+      #run: |
+        #cd riscv-gnu-toolchain
+        #sed -i '/__OBSOLETE_MATH/d' newlib/newlib/libm/common/math_errf.c
+        #./configure --prefix=$GITHUB_WORKSPACE/rv64gcv-install-next --with-arch=rv64gcv_zfh
+        #make linux
+
+    #- name: riscv-strip-install
+      #if: steps.cache-riscv.outputs.cache-hit != 'true'
+      #run: find $GITHUB_WORKSPACE/rv64gcv-install-next -type f | xargs -i strip -g {} || true
+
+    # - name: install-clang
+    #   run: |
+    #     wget https://github.com/llvm/llvm-project/releases/download/llvmorg-15.0.1/llvm-project-15.0.1.src.tar.xz
+    #     tar -xf llvm-project-15.0.1.src.tar.xz
+    #     cd llvm-project-15.0.1.src
+    #     mkdir build
+    #     cd build
+    #     cmake -DCMAKE_INSTALL_PREFIX=install -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=ON -DLLVM_ENABLE_PROJECTS="clang" -DLLVM_TARGETS_TO_BUILD="RISCV" -DLLVM_INCLUDE_EXAMPLES=OFF -DLLVM_INCLUDE_TESTS=OFF ../llvm/
+    #     make -j16
+    #     make install
+
+    - name: build
+      env:
+        LD_LIBRARY_PATH: /data/action/install/lib64
+      run: |
+        export RISCV_ROOT_PATH=/data/action/osd/rv64gcv-install-next
+        export PATH=/data/action/osd/llvm-project-15.0.1.src/build/install/bin:$PATH
+        mkdir build && cd build
+        cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/riscv64-unknown-linux-gnu.llvm-toolchain.cmake -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF -DNCNN_BUILD_TESTS=ON ..
+        cmake --build . -j 4
+
+    - name: test-vlen256
+      env:
+        LD_LIBRARY_PATH: /data/action/install/lib64
+      run: |
+        export PATH=/data/action/osd/qemu-install/bin:$PATH
+        cd build
+        TESTS_EXECUTABLE_LOADER=qemu-riscv64 TESTS_EXECUTABLE_LOADER_ARGUMENTS="-cpu;rv64,v=true,Zfh=true,vlen=256,elen=64,vext_spec=v1.0;-L;/data/action/osd/rv64gcv-install-next/sysroot" ctest --output-on-failure -j 4
+
+    - name: test-vlen128
+      env:
+        LD_LIBRARY_PATH: /data/action/install/lib64
+      run: |
+        export PATH=/data/action/osd/qemu-install/bin:$PATH
+        cd build
+        TESTS_EXECUTABLE_LOADER=qemu-riscv64 TESTS_EXECUTABLE_LOADER_ARGUMENTS="-cpu;rv64,v=true,Zfh=true,vlen=128,elen=64,vext_spec=v1.0;-L;/data/action/osd/rv64gcv-install-next/sysroot" ctest --output-on-failure -j 4
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -296,10 +296,15 @@ elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(riscv)")
     include(CheckCXXCompilerFlag)
 
     set(CMAKE_REQUIRED_FLAGS "-march=rv64gcv")
-    check_cxx_source_compiles("#include <riscv_vector.h>\nint main() { vfloat32m1_t _s, _w; float _v; word_type vl; _s = vfmacc_vf_f32m1(_s, _v, _w, vl); return 0; }" NCNN_COMPILER_SUPPORT_RVV)
+    check_cxx_source_compiles("#include <riscv_vector.h>\nint main() { vfloat32m1_t _s, _w; float _v; size_t vl; _s = vfmacc_vf_f32m1(_s, _v, _w, vl); return 0; }" NCNN_COMPILER_SUPPORT_RVV)
 
     set(CMAKE_REQUIRED_FLAGS "-march=rv64gcv_zfh")
-    check_cxx_source_compiles("#include <riscv_vector.h>\nint main() { vfloat16m1_t _s, _w; __fp16 _v; word_type vl; _s = vfmacc_vf_f16m1(_s, _v, _w, vl); return 0; }" NCNN_COMPILER_SUPPORT_RVV_FP16)
+    check_cxx_source_compiles("#include <riscv_vector.h>\nint main() { vfloat16m1_t _s, _w; __fp16 _v; size_t vl; _s = vfmacc_vf_f16m1(_s, _v, _w, vl); return 0; }" NCNN_COMPILER_SUPPORT_RVV_ZFH)
+
+    if(NOT NCNN_COMPILER_SUPPORT_RVV_ZFH)
+        set(CMAKE_REQUIRED_FLAGS "-march=rv64gcv_zfh_zvfh0p1 -menable-experimental-extensions -D__fp16=_Float16")
+        check_cxx_source_compiles("#include <riscv_vector.h>\nint main() { vfloat16m1_t _s, _w; __fp16 _v; size_t vl; _s = vfmacc_vf_f16m1(_s, _v, _w, vl); return 0; }" NCNN_COMPILER_SUPPORT_RVV_ZVFH)
+    endif()
 
     unset(CMAKE_REQUIRED_FLAGS)
 
@@ -309,9 +314,19 @@ elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(riscv)")
         if(NCNN_RVV_CHECK_VFREDSUM)
             include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/ncnn_check_rvv_vfredusum.cmake)
         endif()
-        if(NOT NCNN_COMPILER_SUPPORT_RVV_FP16)
+        if(NOT (NCNN_COMPILER_SUPPORT_RVV_ZFH OR NCNN_COMPILER_SUPPORT_RVV_ZVFH))
             message(WARNING "The compiler does not support risc-v zfh extension. Upgrading your toolchain is strongly recommended.")
         endif()
+        option(NCNN_RVV_CHECK_PLAIN_SEGMENT "check compilter about rvv segment load/store interface" ON)
+        if(NCNN_RVV_CHECK_PLAIN_SEGMENT)
+            set(CMAKE_REQUIRED_FLAGS "-march=rv64gcv")
+            check_cxx_source_compiles("#include <riscv_vector.h>\nint main() { vfloat32m1_t _s, _w; size_t vl; float src[32]={.0f}; vlseg2e32_v_f32m1(&_s, &_w, src, vl); return 0; }" NCNN_COMPILER_USE_RVV_PLAIN_SEG)
+            unset(CMAKE_REQUIRED_FLAGS)
+        endif()
+        if(NOT NCNN_COMPILER_USE_RVV_PLAIN_SEG)
+            message(WARNING "The compiler uses tuple types for segment load/store. Upgrading your toolchain is strongly recommended.")
+            add_definitions(-D__rvv_tuple)
+        endif()
     else()
         message(WARNING "The compiler does not support risc-v v extension. NCNN_RVV will be OFF.")
     endif()

diff --git a/cmake/ncnn_add_layer.cmake b/cmake/ncnn_add_layer.cmake
@@ -271,8 +271,10 @@ macro(ncnn_add_layer class)
     endif()
 
     if(NCNN_RUNTIME_CPU AND NCNN_RVV AND NCNN_TARGET_ARCH STREQUAL "riscv")
-        if(NCNN_COMPILER_SUPPORT_RVV_FP16)
+        if(NCNN_COMPILER_SUPPORT_RVV_ZFH)
             ncnn_add_arch_opt_layer(${class} rvv "-march=rv64gcv_zfh")
+        elseif(NCNN_COMPILER_SUPPORT_RVV_ZVFH)
+            ncnn_add_arch_opt_layer(${class} rvv "-march=rv64gcv_zfh_zvfh0p1 -menable-experimental-extensions -D__fp16=_Float16")
         elseif(NCNN_COMPILER_SUPPORT_RVV)
             ncnn_add_arch_opt_layer(${class} rvv "-march=rv64gcv")
         endif()

diff --git a/cmake/ncnn_check_rvv_vfredusum.cmake b/cmake/ncnn_check_rvv_vfredusum.cmake
@@ -9,7 +9,7 @@ int main(void)
 {
     float in1[4] = {-1.f,0.f,+1.f,2.f};
     float out1=0;
-    word_type vl = vsetvl_e32m8(4);
+    size_t vl = vsetvl_e32m8(4);
     vfloat32m8_t _add = vle32_v_f32m8(in1,vl);
     vfloat32m1_t _sum = vfmv_s_f_f32m1(vundefined_f32m1(),out1,vl);
     _sum = vfredsum_vs_f32m8_f32m1(_sum, _add, _sum, vl);
@@ -23,7 +23,7 @@ int main(void)
 {
     float in1[4] = {-1.f,0.f,+1.f,2.f};
     float out1=0;
-    word_type vl = vsetvl_e32m8(4);
+    size_t vl = vsetvl_e32m8(4);
     vfloat32m8_t _add = vle32_v_f32m8(in1,vl);
     vfloat32m1_t _sum = vfmv_s_f_f32m1(vundefined_f32m1(),out1,vl);
     _sum = vfredusum_vs_f32m8_f32m1(_sum, _add, _sum, vl);
@@ -36,7 +36,7 @@ if(NCNN_COMPILER_USE_VFREDSUM AND NOT NCNN_COMPILER_USE_VFREDUSUM)
     message(WARNING "The compiler uses vfredsum. Upgrading your toolchain is strongly recommended.")
     foreach(LMUL 1 2 4 8)
         add_definitions(-Dvfredusum_vs_f32m${LMUL}_f32m1=vfredsum_vs_f32m${LMUL}_f32m1)
-        if(NCNN_COMPILER_SUPPORT_RVV_FP16)
+        if(NCNN_COMPILER_SUPPORT_RVV_ZFH OR NCNN_COMPILER_SUPPORT_RVV_ZVFH)
             add_definitions(-Dvfredusum_vs_f16m${LMUL}_f16m1=vfredsum_vs_f16m${LMUL}_f16m1)
         endif()
     endforeach()

diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
@@ -459,8 +459,10 @@ endif()
 
 if(NCNN_TARGET_ARCH STREQUAL "riscv" AND NOT C906)
     if(NOT NCNN_RUNTIME_CPU AND NCNN_RVV)
-        if(NCNN_COMPILER_SUPPORT_RVV_FP16)
+        if(NCNN_COMPILER_SUPPORT_RVV_ZFH)
             target_compile_options(ncnn PRIVATE -march=rv64gcv_zfh)
+        elseif(NCNN_COMPILER_SUPPORT_RVV_ZVFH)
+            target_compile_options(ncnn PRIVATE -march=rv64gcv_zfh_zvfh0p1 -menable-experimental-extensions -D__fp16=_Float16)
         elseif(NCNN_COMPILER_SUPPORT_RVV)
             target_compile_options(ncnn PRIVATE -march=rv64gcv)
         endif()

diff --git a/src/layer/riscv/absval_riscv.cpp b/src/layer/riscv/absval_riscv.cpp
@@ -66,7 +66,7 @@ int AbsVal_riscv::forward_inplace(Mat& bottom_top_blob, const Option& opt) const
         int n = size;
         while (n > 0)
         {
-            word_type vl = vsetvl_e32m8(n);
+            size_t vl = vsetvl_e32m8(n);
 
             vfloat32m8_t _p = vle32_v_f32m8(ptr, vl);
             _p = vfabs_v_f32m8_absval(_p, vl);
@@ -106,7 +106,7 @@ int AbsVal_riscv::forward_inplace_fp16s(Mat& bottom_top_blob, const Option& opt)
         int n = size;
         while (n > 0)
         {
-            word_type vl = vsetvl_e16m8(n);
+            size_t vl = vsetvl_e16m8(n);
 
             vfloat16m8_t _p = vle16_v_f16m8(ptr, vl);
             _p = vfabs_v_f16m8_absval(_p, vl);