diff --git a/CMakeLists.txt b/CMakeLists.txt index 014cf515a2..a8df25ec2f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -204,21 +204,33 @@ set(tesseract_src ${tesseract_src} ) if (WIN32) - set_source_files_properties( - ${CMAKE_CURRENT_SOURCE_DIR}/arch/dotproductsse.cpp - PROPERTIES COMPILE_DEFINITIONS __SSE4_1__) if (MSVC) + set_source_files_properties( + ${CMAKE_CURRENT_SOURCE_DIR}/arch/dotproductsse.cpp + PROPERTIES COMPILE_DEFINITIONS __SSE4_1__) + set_source_files_properties( + ${CMAKE_CURRENT_SOURCE_DIR}/arch/intsimdmatrixsse.cpp + PROPERTIES COMPILE_DEFINITIONS __SSE4_1__) set_source_files_properties( ${CMAKE_CURRENT_SOURCE_DIR}/arch/dotproductavx.cpp PROPERTIES COMPILE_FLAGS "/arch:AVX") + set_source_files_properties( + ${CMAKE_CURRENT_SOURCE_DIR}/arch/intsimdmatrixavx2.cpp + PROPERTIES COMPILE_FLAGS "/arch:AVX2") endif() else() set_source_files_properties( ${CMAKE_CURRENT_SOURCE_DIR}/arch/dotproductsse.cpp PROPERTIES COMPILE_FLAGS "-msse4.1") + set_source_files_properties( + ${CMAKE_CURRENT_SOURCE_DIR}/arch/intsimdmatrixsse.cpp + PROPERTIES COMPILE_FLAGS "-msse4.1") set_source_files_properties( ${CMAKE_CURRENT_SOURCE_DIR}/arch/dotproductavx.cpp PROPERTIES COMPILE_FLAGS "-mavx") + set_source_files_properties( + ${CMAKE_CURRENT_SOURCE_DIR}/arch/intsimdmatrixavx2.cpp + PROPERTIES COMPILE_FLAGS "-mavx2") endif() add_library (libtesseract ${LIBRARY_TYPE} ${tesseract_src} ${tesseract_hdr}) @@ -288,69 +300,69 @@ install(FILES install(FILES # from api/makefile.am api/apitypes.h - api/baseapi.h - api/capi.h + api/baseapi.h + api/capi.h api/renderer.h #from arch/makefile.am arch/dotproductavx.h - arch/dotproductsse.h + arch/dotproductsse.h arch/simddetect.h #from ccmain/makefile.am ccmain/thresholder.h - ccmain/ltrresultiterator.h - ccmain/pageiterator.h - ccmain/resultiterator.h + ccmain/ltrresultiterator.h + ccmain/pageiterator.h + ccmain/resultiterator.h ccmain/osdetect.h #from ccstruct/makefile.am - ccstruct/publictypes.h + ccstruct/publictypes.h #from ccutil/makefile.am - ccutil/basedir.h - ccutil/errcode.h - ccutil/fileerr.h - ccutil/genericvector.h - ccutil/helpers.h - ccutil/host.h - ccutil/memry.h + ccutil/basedir.h + ccutil/errcode.h + ccutil/fileerr.h + ccutil/genericvector.h + ccutil/helpers.h + ccutil/host.h + ccutil/memry.h ccutil/ndminx.h ccutil/params.h - ccutil/ocrclass.h - ccutil/platform.h - ccutil/serialis.h - ccutil/strngs.h + ccutil/ocrclass.h + ccutil/platform.h + ccutil/serialis.h + ccutil/strngs.h ccutil/tesscallback.h - ccutil/unichar.h - ccutil/unicharcompress.h - ccutil/unicharmap.h + ccutil/unichar.h + ccutil/unicharcompress.h + ccutil/unicharmap.h ccutil/unicharset.h ccutil/version.h #from lstm/makefile.am lstm/convolve.h - lstm/ctc.h - lstm/fullyconnected.h - lstm/functions.h + lstm/ctc.h + lstm/fullyconnected.h + lstm/functions.h lstm/input.h - lstm/lstm.h - lstm/lstmrecognizer.h - lstm/lstmtrainer.h + lstm/lstm.h + lstm/lstmrecognizer.h + lstm/lstmtrainer.h lstm/maxpool.h - lstm/networkbuilder.h - lstm/network.h - lstm/networkio.h + lstm/networkbuilder.h + lstm/network.h + lstm/networkio.h lstm/networkscratch.h - lstm/parallel.h - lstm/plumbing.h - lstm/recodebeam.h - lstm/reconfig.h + lstm/parallel.h + lstm/plumbing.h + lstm/recodebeam.h + lstm/reconfig.h lstm/reversed.h - lstm/series.h - lstm/static_shape.h - lstm/stridemap.h - lstm/tfnetwork.h + lstm/series.h + lstm/static_shape.h + lstm/stridemap.h + lstm/tfnetwork.h lstm/weightmatrix.h #${CMAKE_BINARY_DIR}/src/endianness.h diff --git a/arch/intsimdmatrixavx2.cpp b/arch/intsimdmatrixavx2.cpp index 2d6cbb6783..4c865060cf 100644 --- a/arch/intsimdmatrixavx2.cpp +++ b/arch/intsimdmatrixavx2.cpp @@ -21,6 +21,7 @@ #ifdef __AVX2__ #include #include +#include #include namespace tesseract { @@ -74,7 +75,15 @@ inline void ExtractResults(__m256i& result, __m256i& shift_id, const int8_t*& wi, const double*& scales, int num_out, double*& v) { for (int out = 0; out < num_out; ++out) { - int32_t res = _mm256_extract_epi32(result, 0); + int32_t res = +#ifndef _MSC_VER + _mm256_extract_epi32(result, 0) +#else + // Workaround MSVC's ICE + // _mm256_extract_epi32(X, Y) == ((int32_t*)&X)[Y] + ((int32_t*)&result)[0] +#endif + ; *v++ = (static_cast(res) / MAX_INT8 + *wi++) * *scales++; // Rotate the results in int32_t units, so the next result is ready. result = _mm256_permutevar8x32_epi32(result, shift_id); diff --git a/cppan.yml b/cppan.yml index 68865d4028..7981dc0800 100644 --- a/cppan.yml +++ b/cppan.yml @@ -124,13 +124,19 @@ projects: post_sources: | if (WIN32) - set_source_files_properties( - ${SDIR}/arch/dotproductsse.cpp - PROPERTIES COMPILE_DEFINITIONS __SSE4_1__) if (MSVC) set_source_files_properties( - ${SDIR}/arch/dotproductavx.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/arch/dotproductsse.cpp + PROPERTIES COMPILE_DEFINITIONS __SSE4_1__) + set_source_files_properties( + ${CMAKE_CURRENT_SOURCE_DIR}/arch/intsimdmatrixsse.cpp + PROPERTIES COMPILE_DEFINITIONS __SSE4_1__) + set_source_files_properties( + ${CMAKE_CURRENT_SOURCE_DIR}/arch/dotproductavx.cpp PROPERTIES COMPILE_FLAGS "/arch:AVX") + set_source_files_properties( + ${CMAKE_CURRENT_SOURCE_DIR}/arch/intsimdmatrixavx2.cpp + PROPERTIES COMPILE_FLAGS "/arch:AVX2") endif() else() remove_src_dir(vs2010/port/*)