Skip to content

Commit

Permalink
x86 optimization for convolution int8 gemm unified elempack (#4881)
Browse files Browse the repository at this point in the history
  • Loading branch information
nihui authored Aug 29, 2023
1 parent 1a61bcd commit 9ecf6a6
Show file tree
Hide file tree
Showing 18 changed files with 7,184 additions and 4,378 deletions.
22 changes: 14 additions & 8 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -408,15 +408,21 @@ else()
check_cxx_compiler_flag("/arch:AVX" NCNN_COMPILER_SUPPORT_X86_XOP)
check_cxx_compiler_flag("/arch:AVX" NCNN_COMPILER_SUPPORT_X86_F16C)
check_cxx_compiler_flag("/arch:AVX2" NCNN_COMPILER_SUPPORT_X86_AVX2)
check_cxx_compiler_flag("/arch:AVX2" NCNN_COMPILER_SUPPORT_X86_AVX_VNNI)
check_cxx_compiler_flag("/arch:AVX512" NCNN_COMPILER_SUPPORT_X86_AVX512)
check_cxx_compiler_flag("/arch:AVX512" NCNN_COMPILER_SUPPORT_X86_AVX512_VNNI)
if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 19.16)
# vs2017+ supports avx512 and vnni
set(NCNN_COMPILER_SUPPORT_X86_AVX_VNNI OFF)
set(NCNN_COMPILER_SUPPORT_X86_AVX512 OFF)
set(NCNN_COMPILER_SUPPORT_X86_AVX512_VNNI OFF)
endif()

set(CMAKE_REQUIRED_FLAGS "/arch:AVX2")
check_cxx_source_compiles("#include <immintrin.h>\nint main() { __m256i _s, _a, _b; _s = _mm256_dpwssd_epi32(_s, _a, _b); return 0; }" NCNN_COMPILER_SUPPORT_X86_AVX_VNNI)

set(CMAKE_REQUIRED_FLAGS "/arch:AVX512")
check_cxx_source_compiles("#include <immintrin.h>\nint main() { __m512i _s, _a, _b; _s = _mm512_dpwssd_epi32(_s, _a, _b); return 0; }" NCNN_COMPILER_SUPPORT_X86_AVX512_VNNI)

set(CMAKE_REQUIRED_FLAGS "/arch:AVX512")
check_cxx_source_compiles("#include <immintrin.h>\nint main() { __m256bh _s; __m512bh _a, _b; _s = _mm512_cvtneps_pbh(_mm512_dpbf16_ps(_mm512_cvtpbh_ps(_s), _a, _b)); return 0; }" NCNN_COMPILER_SUPPORT_X86_AVX512_BF16)

set(CMAKE_REQUIRED_FLAGS "/arch:AVX512")
check_cxx_source_compiles("#include <immintrin.h>\nint main() { __m512h _s, _a, _b; _s = _mm512_fmadd_ph(_s, _a, _b); __m512 _s2; _s2 = _mm512_cvtxph_ps(_mm512_cvtxps_ph(_s2)); return 0; }" NCNN_COMPILER_SUPPORT_X86_AVX512_FP16)

unset(CMAKE_REQUIRED_FLAGS)
else()
check_cxx_compiler_flag("-mavx" NCNN_COMPILER_SUPPORT_X86_AVX)

Expand Down
6 changes: 6 additions & 0 deletions cmake/ncnn_add_layer.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,12 @@ macro(ncnn_add_layer class)
if(NCNN_AVX512VNNI)
ncnn_add_arch_opt_source(${class} avx512vnni "/arch:AVX512 /D__SSE4_1__ /D__FMA__ /D__F16C__ /D__AVX512VNNI__")
endif()
if(NCNN_AVX512BF16)
ncnn_add_arch_opt_source(${class} avx512bf16 "/arch:AVX512 /D__SSE4_1__ /D__FMA__ /D__F16C__ /D__AVX512BF16__")
endif()
if(NCNN_AVX512FP16)
ncnn_add_arch_opt_source(${class} avx512fp16 "/arch:AVX512 /D__SSE4_1__ /D__FMA__ /D__F16C__ /D__AVX512FP16__")
endif()
if(NCNN_AVXVNNI)
ncnn_add_arch_opt_source(${class} avxvnni "/arch:AVX2 /D__SSE4_1__ /D__FMA__ /D__F16C__ /D__AVXVNNI__")
endif()
Expand Down
262 changes: 0 additions & 262 deletions src/layer/x86/convolution_1x1_int8.h

This file was deleted.

83 changes: 0 additions & 83 deletions src/layer/x86/convolution_1x1_pack1to4_int8.h

This file was deleted.

Loading

0 comments on commit 9ecf6a6

Please sign in to comment.