diff --git a/.gitignore b/.gitignore old mode 100755 new mode 100644 index 800804419..956ce6847 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ # Config files from CMake src/common/project_version.h src/common/git_revision.h +src/common/build_info.cpp *.vcxproj.user /vs/x64 @@ -61,4 +62,4 @@ examples/mnist/*ubyte .vs .vscode - + diff --git a/.gitmodules b/.gitmodules index 5c3c00f1e..b7c67befc 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,9 +1,16 @@ [submodule "examples"] path = examples url = https://github.com/marian-nmt/marian-examples +[submodule "regression-tests"] + path = regression-tests + url = https://github.com/marian-nmt/marian-regression-tests [submodule "src/3rd_party/sentencepiece"] path = src/3rd_party/sentencepiece url = https://github.com/marian-nmt/sentencepiece [submodule "src/3rd_party/nccl"] path = src/3rd_party/nccl url = https://github.com/marian-nmt/nccl +[submodule "src/3rd_party/fbgemm"] + path = src/3rd_party/fbgemm + url = https://github.com/marian-nmt/FBGEMM + branch = master diff --git a/CHANGELOG.md b/CHANGELOG.md index 02df12175..487c07a1d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,23 +5,109 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). + ## [Unreleased] ### Added -- Automatic detection of CPU intrisics when building with -arch=native +- An option to print cached variables from CMake +- Add support for compiling on Mac (and clang) +- An option for resetting stalled validation metrics +- Add CMAKE options to disable compilation for specific GPU SM types +- An option to print word-level translation scores +- An option to turn off automatic detokenization from SentencePiece +- Separate quantization types for 8-bit FBGEMM for AVX2 and AVX512 +- Sequence-level unliklihood training +- Allow file name templated valid-translation-output files +- Support for lexical shortlists in marian-server +- Support for 8-bit matrix multiplication with FBGEMM +- CMakeLists.txt now looks for SSE 4.2 +- Purging of finished hypotheses during beam-search. A lot faster for large batches. +- Faster option look-up, up to 20-30% faster translation +- Added --cite and --authors flag +- Added optional support for ccache +- Switch to change abort to exception, only to be used in library mode +- Support for 16-bit packed models with FBGEMM +- Multiple separated parameter types in ExpressionGraph, currently inference-only +- Safe handling of sigterm signal +- Automatic vectorization of elementwise operations on CPU for tensors dims that + are divisible by 4 (AVX) and 8 (AVX2) +- Replacing std::shared_ptr with custom IntrusivePtr for small objects like + Tensors, Hypotheses and Expressions. +- Fp16 inference working for translation +- Gradient-checkpointing + +### Fixed +- Replace value for INVALID_PATH_SCORE with std::numer_limits::lowest() + to avoid overflow with long sequences +- Break up potential circular references for GraphGroup* +- Fix empty source batch entries with batch purging +- Clear RNN chache in transformer model, add correct hash functions to nodes +- Gather-operation for all index sizes +- Fix word weighting with max length cropping +- Fixed compilation on CPUs without support for AVX +- FastOpt now reads "n" and "y" values as strings, not as boolean values +- Fixed multiple reduction kernels on GPU +- Fixed guided-alignment training with cross-entropy +- Replace IntrusivePtr with std::uniq_ptr in FastOpt, fixes random segfaults + due to thread-non-safty of reference counting. +- Make sure that items are 256-byte aligned during saving +- Make explicit matmul functions respect setting of cublasMathMode +- Fix memory mapping for mixed paramter models +- Removed naked pointer and potential memory-leak from file_stream.{cpp,h} +- Compilation for GCC >= 7 due to exception thrown in destructor +- Sort parameters by lexicographical order during allocation to ensure consistent + memory-layout during allocation, loading, saving. +- Output empty line when input is empty line. Previous behavior might result in + hallucinated outputs. +- Compilation with CUDA 10.1 + +### Changed +- Combine two for-loops in nth_element.cpp on CPU +- Revert LayerNorm eps to old position, i.e. sigma' = sqrt(sigma^2 + eps) +- Downgrade NCCL to 2.3.7 as 2.4.2 is buggy (hangs with larger models) +- Return error signal on SIGTERM +- Dropped support for CUDA 8.0, CUDA 9.0 is now minimal requirement +- Removed autotuner for now, will be switched back on later +- Boost depdendency is now optional and only required for marian_server +- Dropped support for g++-4.9 +- Simplified file stream and temporary file handling +- Unified node intializers, same function API. +- Remove overstuff/understuff code + +## [1.8.0] - 2019-09-04 + +### Added +- Alias options and new --task option +- Automatic detection of CPU intrisics when building with -arch=native +- First version of BERT-training and BERT-classifier, currently not compatible with TF models +- New reduction operators +- Use Cmake's ExternalProject to build NCCL and potentially other external libs +- Code for Factored Vocabulary, currently not usable yet without outside tools ### Fixed +- Issue with relative paths in automatically generated decoder config files +- Bug with overlapping CXX flags and building spm_train executable +- Compilation with gcc 8 +- Overwriting and unsetting vector options - Windows build with recent changes - Bug with read-ahead buffer -- Fixed handling of "dump-config: false" in YAML config +- Handling of "dump-config: false" in YAML config - Errors due to warnings -- Fixed issue concerning failed saving with single GPU training and --sync-sgd option. +- Issue concerning failed saving with single GPU training and --sync-sgd option. +- NaN problem when training with Tensor Cores on Volta GPUs +- Fix pipe-handling +- Fix compilation with GCC 9.1 +- Fix CMake build types ### Changed +- Error message when using left-to-right and right-to-left models together in ensembles +- Regression tests included as a submodule +- Update NCCL to 2.4.2 - Add zlib source to Marian's source tree, builds now as object lib - -DUSE_STATIC_LIBS=on now also looks for static versions of CUDA libraries - Include NCCL build from github.com/marian-nmt/nccl and compile within source tree -- Set nearly all warnings as errors for Marian's own targets. Disable warnings for 3rd party. +- Set nearly all warnings as errors for Marian's own targets. Disable warnings for 3rd party +- Refactored beam search ## [1.7.0] - 2018-11-27 diff --git a/CMakeLists.txt b/CMakeLists.txt index 28e648aa9..46d9c6c91 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -5,7 +5,6 @@ if (POLICY CMP0074) cmake_policy(SET CMP0074 NEW) # CMake 3.12 endif () - project(marian CXX C) set(CMAKE_CXX_STANDARD 11) set(CMAKE_CXX_STANDARD_REQUIRED ON) @@ -14,14 +13,33 @@ set(BUILD_ARCH native CACHE STRING "Compile for this CPU architecture.") # Custom CMake options option(COMPILE_CPU "Compile CPU version" ON) option(COMPILE_CUDA "Compile GPU version" ON) -option(USE_SENTENCEPIECE "Download and compile SentencePiece" OFF) -option(USE_STATIC_LIBS "Link statically against non-system libs" OFF) -option(USE_CUDNN "Use CUDNN library" OFF) -option(USE_NCCL "Use NCCL library" ON) -option(USE_MPI "Use MPI library" OFF) +option(COMPILE_CUDA_SM35 "Compile GPU version with SM35 support" ON) +option(COMPILE_CUDA_SM50 "Compile GPU version with SM50 support" ON) +option(COMPILE_CUDA_SM60 "Compile GPU version with SM60 support" ON) +option(COMPILE_CUDA_SM70 "Compile GPU version with SM70 support" ON) option(COMPILE_EXAMPLES "Compile examples" OFF) +option(COMPILE_SERVER "Compile marian-server" OFF) option(COMPILE_TESTS "Compile tests" OFF) -option(COMPILE_SERVER "Compile marian-server" ON) +option(USE_CCACHE "Use ccache compiler cache (https://ccache.dev)" OFF) +option(USE_CUDNN "Use CUDNN library" OFF) +option(USE_DOXYGEN "Build documentation with Doxygen" ON) +option(USE_FBGEMM "Use FBGEMM" OFF) +option(USE_MKL "Compile with MKL support" ON) +option(USE_MPI "Use MPI library" OFF) +option(USE_NCCL "Use NCCL library" ON) +option(USE_SENTENCEPIECE "Download and compile SentencePiece" OFF) +option(USE_STATIC_LIBS "Link statically against non-system libs" OFF) + +# use ccache (https://ccache.dev) for faster compilation if requested and available +if(USE_CCACHE) +find_program(CCACHE_PROGRAM ccache) +if(CCACHE_PROGRAM) + message(STATUS "Will be using ccache for faster repeat compilation (use cmake -DUSE_CCACHE=off to disable).") + set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE "${CCACHE_PROGRAM}") +else(CCACHE_PROGRAM) + message(WARNING "Compilation with ccache requested but no ccache found.") +endif(CCACHE_PROGRAM) +endif(USE_CCACHE) # Project versioning find_package(Git QUIET) @@ -32,7 +50,13 @@ message(STATUS "Project version: ${PROJECT_VERSION_STRING_FULL}") execute_process(COMMAND git submodule update --init --recursive --no-fetch WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) - + +if(NOT CMAKE_BUILD_TYPE) + message(WARNING "CMAKE_BUILD_TYPE not set; setting to Release") + set(CMAKE_BUILD_TYPE "Release") +endif() + +############################################################################### # Set compilation flags if(MSVC) # These are used in src/CMakeLists.txt on a per-target basis @@ -42,7 +66,7 @@ if(MSVC) # C4310: cast truncates constant value # C4324: 'marian::cpu::int16::`anonymous-namespace'::ScatterPut': structure was padded due to alignment specifier set(DISABLE_GLOBALLY "/wd\"4310\" /wd\"4324\"") - + set(INTRINSICS "/arch:AVX") # Or maybe use these? @@ -57,62 +81,112 @@ if(MSVC) set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} /DEBUG /LTCG:incremental /INCREMENTAL:NO /NODEFAULTLIB:MSVCRT /ignore:4049") set(CMAKE_STATIC_LINKER_FLAGS "${CMAKE_STATIC_LINKER_FLAGS} /LTCG:incremental") - find_library(SHLWAPI Shlwapi.lib) + find_library(SHLWAPI Shlwapi.lib) set(EXT_LIBS ${EXT_LIBS} SHLWAPI) -else() +else(MSVC) -# Detect support CPU instrinsics for the current platform. This will -# only by used with BUILD_ARCH=native. For overridden BUILD_ARCH we -# minimally use -msse4.1. This seems to work with MKL. -set(INTRINSICS "") -if(BUILD_ARCH STREQUAL "native") - message(STATUS "Checking support for CPU intrinsics") - include(FindSSE) - if(SSE2_FOUND) - message(STATUS "SSE2 support found") - set(INTRINSICS "${INTRINSICS} -msse2") - endif(SSE2_FOUND) - if(SSE3_FOUND) - message(STATUS "SSE3 support found") - set(INTRINSICS "${INTRINSICS} -msse3") - endif(SSE3_FOUND) - if(SSE4_1_FOUND) - message(STATUS "SSE4.1 support found") - set(INTRINSICS "${INTRINSICS} -msse4.1") - endif(SSE4_1_FOUND) - if(AVX_FOUND) - message(STATUS "AVX support found") - set(INTRINSICS "${INTRINSICS} -mavx") - endif(AVX_FOUND) - if(AVX2_FOUND) - message(STATUS "AVX2 support found") - set(INTRINSICS "${INTRINSICS} -mavx2") - endif(AVX2_FOUND) -else() - set(INTRINSICS "-msse4.1") -endif() + # Check we are using at least g++ 5.0 + if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 5.0) + message(FATAL_ERROR "FATAL ERROR: Compiling Marian requires at least g++ 5.0, your version is ${CMAKE_CXX_COMPILER_VERSION}") + endif() -set(DISABLE_GLOBALLY "-Wno-unused-result") + # Detect support CPU instrinsics for the current platform. This will + # only by used with BUILD_ARCH=native. For overridden BUILD_ARCH we + # minimally use -msse4.1. This seems to work with MKL. + set(INTRINSICS "") + list(APPEND INTRINSICS_NVCC) + + if(BUILD_ARCH STREQUAL "native") + message(STATUS "Checking support for CPU intrinsics") + include(FindSSE) + if(SSE2_FOUND) + message(STATUS "SSE2 support found") + set(INTRINSICS "${INTRINSICS} -msse2") + list(APPEND INTRINSICS_NVCC -Xcompiler\ -msse2) + endif(SSE2_FOUND) + if(SSE3_FOUND) + message(STATUS "SSE3 support found") + set(INTRINSICS "${INTRINSICS} -msse3") + list(APPEND INTRINSICS_NVCC -Xcompiler\ -msse3) + endif(SSE3_FOUND) + if(SSE4_1_FOUND) + message(STATUS "SSE4.1 support found") + set(INTRINSICS "${INTRINSICS} -msse4.1") + list(APPEND INTRINSICS_NVCC -Xcompiler\ -msse4.1) + endif(SSE4_1_FOUND) + if(SSE4_2_FOUND) + message(STATUS "SSE4.2 support found") + set(INTRINSICS "${INTRINSICS} -msse4.2") + list(APPEND INTRINSICS_NVCC -Xcompiler\ -msse4.2) + endif(SSE4_2_FOUND) + if(AVX_FOUND) + message(STATUS "AVX support found") + set(INTRINSICS "${INTRINSICS} -mavx") + list(APPEND INTRINSICS_NVCC -Xcompiler\ -mavx) + endif(AVX_FOUND) + if(AVX2_FOUND) + message(STATUS "AVX2 support found") + set(INTRINSICS "${INTRINSICS} -mavx2") + list(APPEND INTRINSICS_NVCC -Xcompiler\ -mavx2) + endif(AVX2_FOUND) + if(AVX512_FOUND) + message(STATUS "AVX512 support found") + set(INTRINSICS "${INTRINSICS} -mavx512f") + list(APPEND INTRINSICS_NVCC -Xcompiler\ -mavx512f) + endif(AVX512_FOUND) + else() + set(INTRINSICS "-msse4.1") + endif() -# These are used in src/CMakeLists.txt on a per-target basis -list(APPEND ALL_WARNINGS -Wall; -Werror; -Wno-unused-result; -Wno-deprecated; -Wno-pragmas; -Wno-unused-parameter; -Wextra; -Wno-unused-function; - -Wno-unused-value; -Wno-unknown-pragmas; -Wno-sign-compare; -Wno-missing-field-initializers;) + if(USE_FBGEMM) + set(EXT_LIBS ${EXT_LIBS} fbgemm dl) + add_definitions(-DUSE_FBGEMM=1) + endif(USE_FBGEMM) + + if (CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 9.0) + # Clang-10.0.0 complains when CUDA is newer than 10.1 + set(CLANG_IGNORE_UNKNOWN_CUDA "-Wno-unknown-cuda-version") + endif() + set(DISABLE_GLOBALLY "-Wno-unused-result -Wno-unknown-warning-option ${CLANG_IGNORE_UNKNOWN_CUDA}") + + # These are used in src/CMakeLists.txt on a per-target basis + list(APPEND ALL_WARNINGS -Wall; -Werror; -Wextra; -Wno-unused-result; -Wno-deprecated; + -Wno-pragmas; -Wno-unused-parameter; -Wno-unused-function; + -Wno-unused-value; -Wno-unknown-pragmas; -Wno-sign-compare; + -Wno-missing-field-initializers;) # This warning does not exist prior to gcc 5.0 if(CMAKE_COMPILER_IS_GNUCC AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 5.0) - list(APPEND ALL_WARNINGS -Wsuggest-override) + list(APPEND ALL_WARNINGS -Wsuggest-override -Wno-int-in-bool-context) endif() - set(CMAKE_CXX_FLAGS "-std=c++11 -O3 -Ofast -m64 -pthread -march=${BUILD_ARCH} ${INTRINSICS} -Wl,--no-as-needed -funroll-loops -ffinite-math-only -fPIC ${DISABLE_GLOBALLY}") - set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS} -g -rdynamic") - set(CMAKE_CXX_FLAGS_DEBUG "-std=c++11 -g -rdynamic -O0 -pthread -Wl,--no-as-needed -fPIC -Wno-unused-result -Wno-deprecated -Wno-pragmas") - set(CMAKE_CXX_FLAGS_SLIM "${CMAKE_CXX_FLAGS} -DNDEBUG") - set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS} -g -rdynamic") - set(CMAKE_CXX_FLAGS_PROFILE "${CMAKE_CXX_FLAGS_RELEASE} -pg -g -rdynamic") + if(CMAKE_COMPILER_IS_GNUCC) + # these flags are not known to clang + set(CMAKE_GCC_FLAGS "-Wl,--no-as-needed") + set(CMAKE_RDYNAMIC_FLAG "-rdynamic") + endif(CMAKE_COMPILER_IS_GNUCC) + + set(CMAKE_CXX_FLAGS "-std=c++11 -pthread ${CMAKE_GCC_FLAGS} -fPIC ${DISABLE_GLOBALLY} -march=${BUILD_ARCH} ${INTRINSICS}") + set(CMAKE_CXX_FLAGS_RELEASE "-Ofast -m64 -funroll-loops -ffinite-math-only -g ${CMAKE_RDYNAMIC_FLAG}") + set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g ${CMAKE_RDYNAMIC_FLAG}") + set(CMAKE_CXX_FLAGS_SLIM "-Ofast -m64 -funroll-loops -ffinite-math-only -DNDEBUG") + set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELEASE}") + set(CMAKE_CXX_FLAGS_PROFILE "${CMAKE_CXX_FLAGS_RELEASE} -pg") set(CMAKE_CXX_FLAGS_PROFGEN "${CMAKE_CXX_FLAGS_RELEASE} -fprofile-generate -fprofile-correction") set(CMAKE_CXX_FLAGS_PROFUSE "${CMAKE_CXX_FLAGS_RELEASE} -fprofile-use -fprofile-correction") - endif() + # these need to be set separately + set(CMAKE_C_FLAGS "-pthread ${CMAKE_GCC_FLAGS} -fPIC ${DISABLE_GLOBALLY} -march=${BUILD_ARCH} ${INTRINSICS}") + set(CMAKE_C_FLAGS_RELEASE "-O3 -m64 -funroll-loops -ffinite-math-only -g ${CMAKE_RDYNAMIC_FLAG}") + set(CMAKE_C_FLAGS_DEBUG "-O0 -g ${CMAKE_RDYNAMIC_FLAG}") + set(CMAKE_C_FLAGS_SLIM "-O3 -m64 -funroll-loops -ffinite-math-only -DNDEBUG") + set(CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELEASE}") + set(CMAKE_C_FLAGS_PROFILE "${CMAKE_C_FLAGS_RELEASE} -pg") + set(CMAKE_C_FLAGS_PROFGEN "${CMAKE_C_FLAGS_RELEASE} -fprofile-generate -fprofile-correction") + set(CMAKE_C_FLAGS_PROFUSE "${CMAKE_C_FLAGS_RELEASE} -fprofile-use -fprofile-correction") +endif(MSVC) + +############################################################################### # Downloading SentencePiece if requested and set to compile with it. # Requires all the dependencies imposed by SentencePiece if(USE_SENTENCEPIECE) @@ -121,10 +195,10 @@ if(USE_SENTENCEPIECE) set(EXT_LIBS ${EXT_LIBS} sentencepiece sentencepiece_train) endif() - # Find packages set(EXT_LIBS ${EXT_LIBS} ${CMAKE_DL_LIBS}) +############################################################################### if(COMPILE_CUDA) if(USE_STATIC_LIBS) @@ -140,16 +214,41 @@ if(USE_STATIC_LIBS) endif() endif() -find_package(CUDA "8.0") +find_package(CUDA "9.0") # TODO: only enable FP16-related options for compute_70 and higher. if(CUDA_FOUND) + # CUDA >= 10.0 requires CMake >= 3.12.2 + if((CUDA_VERSION VERSION_EQUAL "10.0" OR CUDA_VERSION VERSION_GREATER "10.0") AND (CMAKE_VERSION VERSION_LESS "3.12.2")) + message(WARNING "On some Unix systems CUDA 10.0+ requires CMake 3.12.2+; you use CMake ${CMAKE_VERSION}") + endif() + + if(COMPILE_CUDA_SM35) + LIST(APPEND COMPUTE -arch=sm_35; -gencode=arch=compute_35,code=sm_35;) # Tesla K40 and above + endif(COMPILE_CUDA_SM35) + if(COMPILE_CUDA_SM50) + LIST(APPEND COMPUTE -gencode=arch=compute_50,code=sm_50; -gencode=arch=compute_52,code=sm_52;) # Maxwell GPUs + endif(COMPILE_CUDA_SM50) + if(COMPILE_CUDA_SM60) + LIST(APPEND COMPUTE -gencode=arch=compute_60,code=sm_60; -gencode=arch=compute_61,code=sm_61;) # Pascal GPUs + endif(COMPILE_CUDA_SM60) + if(COMPILE_CUDA_SM70) + LIST(APPEND COMPUTE -gencode=arch=compute_70,code=sm_70; -gencode=arch=compute_70,code=compute_70) # Volta GPUs + endif(COMPILE_CUDA_SM70) + if(USE_STATIC_LIBS) find_library(CUDA_culibos_LIBRARY NAMES culibos PATHS ${CUDA_TOOLKIT_ROOT_DIR}/lib64) set(EXT_LIBS ${EXT_LIBS} ${CUDA_curand_LIBRARY} ${CUDA_cusparse_LIBRARY} ${CUDA_culibos_LIBRARY} ${CUDA_CUBLAS_LIBRARIES}) - message(STATUS "Found CUDA libraries: ${CUDA_curand_LIBRARY} ${CUDA_cusparse_LIBRARY} ${CUDA_culibos_LIBRARY} ${CUDA_CUBLAS_LIBRARIES}") + set(CUDA_LIBS ${CUDA_curand_LIBRARY} ${CUDA_cusparse_LIBRARY} ${CUDA_culibos_LIBRARY} ${CUDA_CUBLAS_LIBRARIES}) + # CUDA 10.1 introduces cublasLt library that is required on static build + if ((CUDA_VERSION VERSION_EQUAL "10.1" OR CUDA_VERSION VERSION_GREATER "10.1")) + find_library(CUDA_cublasLt_LIBRARY NAMES cublasLt PATHS ${CUDA_TOOLKIT_ROOT_DIR}/lib64) + set(EXT_LIBS ${EXT_LIBS} ${CUDA_cublasLt_LIBRARY}) + set(CUDA_LIBS ${CUDA_LIBS} ${CUDA_cublasLt_LIBRARY}) + endif() + message(STATUS "Found CUDA libraries: ${CUDA_LIBS}") else(USE_STATIC_LIBS) set(EXT_LIBS ${EXT_LIBS} ${CUDA_curand_LIBRARY} ${CUDA_cusparse_LIBRARY} ${CUDA_CUBLAS_LIBRARIES}) message(STATUS "Found CUDA libraries: ${CUDA_curand_LIBRARY} ${CUDA_cusparse_LIBRARY} ${CUDA_CUBLAS_LIBRARIES}") -endif(USE_STATIC_LIBS) + endif(USE_STATIC_LIBS) if(USE_CUDNN) find_package(CUDNN "7.0") @@ -168,63 +267,42 @@ endif(USE_STATIC_LIBS) list(APPEND CUDA_NVCC_FLAGS -DBOOST_PP_VARIADICS=0; ) endif() - # We compile NCCL ourselves, using the NVidia Makefile rather than CMake, this requires to pass a couple of parameters from - # Cmake. This is also fairly untested, let's hope it does not explode. - # @TODO: Make sure it does not use pre-installed NCCL headers if(USE_NCCL) - # define and set the include dir for the generated nccl.h header - set(NCCL_HEADER_LOCATION "${CMAKE_CURRENT_BINARY_DIR}/nccl/include") - include_directories(${NCCL_HEADER_LOCATION}) - - # set the path for the generated static lib - set(NCCL_LIB_STATIC "${CMAKE_CURRENT_BINARY_DIR}/nccl/lib/libnccl_static.a") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DUSE_NCCL") - - LIST(APPEND CUDA_NVCC_FLAGS -DUSE_NCCL; ) - - # disables compilation for sm_30 to avoid ptxas warning... that's general Kepler support. But K80s are supported for instance by sm_35 - set(GENCODE "-gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_61,code=sm_61") - - # We build using NVidia's custom makefile, for that we pass a number of variables from CMake. - # Sets output to the chosen build folder, i.e. where the binaries and objects are generated. - # Also passes CUDA location from FindCUDA, sets c++ compiler to the same one CMake uses. - add_custom_command(OUTPUT ${NCCL_LIB_STATIC} - COMMAND ${CMAKE_MAKE_PROGRAM} src.build - BUILDDIR=${CMAKE_CURRENT_BINARY_DIR}/nccl - CUDA_HOME=${CUDA_TOOLKIT_ROOT_DIR} - CUDA8_GENCODE=${GENCODE} - CXX=${CMAKE_CXX_COMPILER} - WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/src/3rd_party/nccl) - add_custom_target(nccl_target DEPENDS ${NCCL_LIB_STATIC}) add_library(nccl STATIC IMPORTED) - set_target_properties(nccl PROPERTIES IMPORTED_LOCATION ${NCCL_LIB_STATIC}) - add_dependencies(nccl nccl_target) set(EXT_LIBS ${EXT_LIBS} nccl) - - # adds the resulting files to be removed by `make clean` - set_directory_properties(PROPERTY ADDITIONAL_MAKE_CLEAN_FILES ${CMAKE_CURRENT_BINARY_DIR}/nccl) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DUSE_NCCL") + LIST(APPEND CUDA_NVCC_FLAGS -DUSE_NCCL; ) endif(USE_NCCL) -if(USE_STATIC_LIBS) - set(CMAKE_FIND_LIBRARY_SUFFIXES ${_ORIG_CMAKE_FIND_LIBRARY_SUFFIXES}) -endif() + if(USE_STATIC_LIBS) + set(CMAKE_FIND_LIBRARY_SUFFIXES ${_ORIG_CMAKE_FIND_LIBRARY_SUFFIXES}) + endif() else(CUDA_FOUND) - message(FATAL_ERROR "CUDA has not been found, set -DCOMPILE_CUDA=off to avoid this check and to compile the CPU version only") + message(" +Cannot find suitable CUDA libraries. Specify the path explicitly with + -DCUDA_TOOLKIT_ROOT_DIR=/path/to/appropriate/cuda/installation + (hint: try /usr/local/$(readlink /usr/local/cuda)) +OR compile the CPU-only version of Marian with + -DCOMPILE_CUDA=off +") + message(FATAL_ERROR "FATAL ERROR: No suitable CUDA library found.") endif(CUDA_FOUND) else(COMPILE_CUDA) message(WARNING "COMPILE_CUDA=off : Building only CPU version") endif(COMPILE_CUDA) +# TODO: make compatible with older CUDA versions if(CMAKE_BUILD_TYPE STREQUAL "Debug") - list(APPEND CUDA_NVCC_FLAGS --default-stream per-thread; -O0; -g; -arch=sm_30; -gencode=arch=compute_30,code=sm_30; -gencode=arch=compute_50,code=sm_50; -gencode=arch=compute_52,code=sm_52; -gencode=arch=compute_60,code=sm_60; -gencode=arch=compute_61,code=sm_61; -gencode=arch=compute_61,code=compute_61 ;) + list(APPEND CUDA_NVCC_FLAGS --default-stream per-thread; -O0; -g; --use_fast_math; ${COMPUTE}) else(CMAKE_BUILD_TYPE STREQUAL "Debug") - list(APPEND CUDA_NVCC_FLAGS --default-stream per-thread; -O3; -g; --use_fast_math; -arch=sm_30; -gencode=arch=compute_30,code=sm_30; -gencode=arch=compute_50,code=sm_50; -gencode=arch=compute_52,code=sm_52; -gencode=arch=compute_60,code=sm_60; -gencode=arch=compute_61,code=sm_61; -gencode=arch=compute_61,code=compute_61 ;) + list(APPEND CUDA_NVCC_FLAGS --default-stream per-thread; -O3; -g; --use_fast_math; ${COMPUTE}) endif(CMAKE_BUILD_TYPE STREQUAL "Debug") if(NOT MSVC) # @TODO: add warnings here too - list(APPEND CUDA_NVCC_FLAGS -std=c++11; -Xcompiler\ -fPIC; -Xcompiler\ -Wno-unused-result; -Xcompiler\ -Wno-deprecated; -Xcompiler\ -Wno-pragmas; -Xcompiler\ -Wno-unused-value; -Xcompiler\ -Werror;) + list(APPEND CUDA_NVCC_FLAGS -ccbin ${CMAKE_C_COMPILER}; -std=c++11; -Xcompiler\ -fPIC; -Xcompiler\ -Wno-unused-result; -Xcompiler\ -Wno-deprecated; -Xcompiler\ -Wno-pragmas; -Xcompiler\ -Wno-unused-value; -Xcompiler\ -Werror;) + list(APPEND CUDA_NVCC_FLAGS ${INTRINSICS_NVCC}) else() list(APPEND CUDA_NVCC_FLAGS -Xcompiler\ /FS; ) endif() @@ -241,6 +319,8 @@ if(USE_STATIC_LIBS) endif() endif() +############################################################################### +# Find Tcmalloc if(NOT WIN32) find_package(Tcmalloc) if(Tcmalloc_FOUND) @@ -251,6 +331,8 @@ if(NOT WIN32) endif(Tcmalloc_FOUND) endif() +############################################################################### +# Find MPI if(USE_MPI) find_package(MPI 2.0) if(MPI_FOUND) @@ -260,38 +342,40 @@ if(USE_MPI) endif(MPI_FOUND) endif(USE_MPI) +############################################################################### +# Find MKL if(COMPILE_CPU) - find_package(MKL) + if(USE_MKL) + find_package(MKL) + endif(USE_MKL) if(MKL_FOUND) include_directories(${MKL_INCLUDE_DIR}) set(EXT_LIBS ${EXT_LIBS} ${MKL_LIBRARIES}) add_definitions(-DBLAS_FOUND=1 -DMKL_FOUND=1) else(MKL_FOUND) - set(BLA_VENDOR "OpenBLAS") + set(BLAS_VENDOR "OpenBLAS") find_package(BLAS) if(BLAS_FOUND) - include_directories(${BLAS_INCLUDE_DIR}) - set(EXT_LIBS ${EXT_LIBS} ${BLAS_LIBRARIES}) - add_definitions(-DBLAS_FOUND=1) + include(FindCBLAS) + if(CBLAS_FOUND) + include_directories(${BLAS_INCLUDE_DIR} ${CBLAS_INCLUDE_DIR}) + set(EXT_LIBS ${EXT_LIBS} ${BLAS_LIBRARIES} ${CBLAS_LIBRARIES}) + add_definitions(-DBLAS_FOUND=1) + endif(CBLAS_FOUND) endif(BLAS_FOUND) endif(MKL_FOUND) endif(COMPILE_CPU) -set(BOOST_COMPONENTS timer iostreams filesystem system chrono) -if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.9) - add_definitions(-DUSE_BOOST_REGEX=1) - set(BOOST_COMPONENTS ${BOOST_COMPONENTS} regex) - message(STATUS "Using boost::regex") -else() - message(STATUS "Using std::regex") -endif() - +############################################################################### +# Find OpenSSL +set(BOOST_COMPONENTS "") if(COMPILE_SERVER) find_package(OpenSSL) if(OpenSSL_FOUND) message(STATUS "Found OpenSSL") include_directories(${OPENSSL_INCLUDE_DIR}) set(EXT_LIBS ${EXT_LIBS} ${OPENSSL_CRYPTO_LIBRARY}) + set(BOOST_COMPONENTS ${BOOST_COMPONENTS} system) else(OpenSSL_FOUND) message(WARNING "Cannot find OpenSSL library. Not compiling server.") set(COMPILE_SERVER "off") @@ -302,19 +386,25 @@ if(USE_STATIC_LIBS) set(CMAKE_FIND_LIBRARY_SUFFIXES ${_ORIG_CMAKE_FIND_LIBRARY_SUFFIXES}) endif() +# TODO: move inside if(BOOST_COMPONENTS) ? if(USE_STATIC_LIBS) set(Boost_USE_STATIC_LIBS ON) endif() -find_package(Boost COMPONENTS ${BOOST_COMPONENTS}) -if(Boost_FOUND) - include_directories(${Boost_INCLUDE_DIRS}) - set(EXT_LIBS ${EXT_LIBS} ${Boost_LIBRARIES}) - set(EXT_LIBS ${EXT_LIBS} ${ZLIB_LIBRARIES}) # hack for static compilation -else(Boost_FOUND) - message(SEND_ERROR "Cannot find Boost libraries. Terminating.") -endif(Boost_FOUND) - +############################################################################### +# Find Boost if required +if(BOOST_COMPONENTS) + find_package(Boost COMPONENTS ${BOOST_COMPONENTS}) + if(Boost_FOUND) + include_directories(${Boost_INCLUDE_DIRS}) + set(EXT_LIBS ${EXT_LIBS} ${Boost_LIBRARIES}) + set(EXT_LIBS ${EXT_LIBS} ${ZLIB_LIBRARIES}) # hack for static compilation + else(Boost_FOUND) + message(SEND_ERROR "Cannot find Boost libraries. Terminating.") + endif(Boost_FOUND) +endif(BOOST_COMPONENTS) + +############################################################################### if(COMPILE_TESTS) enable_testing() endif(COMPILE_TESTS) @@ -327,11 +417,18 @@ endif(COMPILE_EXAMPLES) configure_file(${CMAKE_CURRENT_SOURCE_DIR}/src/common/project_version.h.in ${CMAKE_CURRENT_SOURCE_DIR}/src/common/project_version.h @ONLY) +# Generate build_info.cpp with CMake cache variables +include(GetCacheVariables) + +configure_file(${CMAKE_CURRENT_SOURCE_DIR}/src/common/build_info.cpp.in + ${CMAKE_CURRENT_SOURCE_DIR}/src/common/build_info.cpp @ONLY) + # Compile source files include_directories(${marian_SOURCE_DIR}/src) add_subdirectory(src) - +############################################################################### +if(USE_DOXYGEN) # Add a target to generate API documentation with Doxygen find_package(Doxygen) if(DOXYGEN_FOUND) @@ -340,7 +437,7 @@ if(DOXYGEN_FOUND) add_custom_target(doc ${DOXYGEN_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/Doxyfile WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} - COMMENT "Generating API documentation with Doxygen" VERBATIM + COMMENT "Generating API documentation with Doxygen" VERBATIM ) endif(DOXYGEN_FOUND) - +endif(USE_DOXYGEN) diff --git a/Doxyfile.in b/Doxyfile.in index 1761a2283..ba2fec096 100644 --- a/Doxyfile.in +++ b/Doxyfile.in @@ -1592,7 +1592,7 @@ PAPER_TYPE = a4 # If left blank no extra packages will be included. # This tag requires that the tag GENERATE_LATEX is set to YES. -EXTRA_PACKAGES = +EXTRA_PACKAGES = amsmath # The LATEX_HEADER tag can be used to specify a personal LaTeX header for the # generated LaTeX document. The header should contain everything until the first diff --git a/README.md b/README.md index d60bcff2d..17a33728a 100644 --- a/README.md +++ b/README.md @@ -1,40 +1,26 @@ Marian ====== +[![Build Status CUDA 9](https://img.shields.io/jenkins/s/http/vali.inf.ed.ac.uk/jenkins/view/marian/job/marian-dev-cuda-9.2.svg?label=CUDA%209)](http://vali.inf.ed.ac.uk/jenkins/job/marian-dev-cuda-9.2/) [![Build Status CUDA 10](https://img.shields.io/jenkins/s/http/vali.inf.ed.ac.uk/jenkins/view/marian/job/marian-dev-cuda-10.1.svg?label=CUDA%2010)](http://vali.inf.ed.ac.uk/jenkins/job/marian-dev-cuda-10.1/) -[![CPU Build Status](https://img.shields.io/jenkins/s/http/vali.inf.ed.ac.uk/jenkins/view/marian/job/marian-dev-cpu.svg?label=CPU)](http://vali.inf.ed.ac.uk/jenkins/job/marian-dev-cpu/) +[![Build Status CPU](https://img.shields.io/jenkins/s/http/vali.inf.ed.ac.uk/jenkins/view/marian/job/marian-dev-cpu.svg?label=CPU)](http://vali.inf.ed.ac.uk/jenkins/job/marian-dev-cpu/) [![Tests Status](https://img.shields.io/jenkins/s/http/vali.inf.ed.ac.uk/jenkins/view/marian/job/marian-regression-tests.svg?label=tests)](http://vali.inf.ed.ac.uk/jenkins/job/marian-regression-tests/) [![Latest release](https://img.shields.io/github/release/marian-nmt/marian.svg?label=release)](https://github.com/marian-nmt/marian/releases) [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](./LICENSE.md) [![Twitter](https://img.shields.io/twitter/follow/marian_nmt.svg?style=social)](https://twitter.com/intent/follow?screen_name=marian_nmt) -

- Marian is an efficient Neural Machine Translation framework written - in pure C++ with minimal dependencies. - - Named in honour of Marian Rejewski, a Polish mathematician and cryptologist. - - -

- - - -

- Main features: -

    -
  • Fast multi-gpu training and translation
  • -
  • Compatible with Nematus and DL4MT
  • -
  • Efficient pure C++ implementation
  • -
  • Permissive open source license (MIT)
  • -
  • more details...
  • -
-

+*Marian* is an efficient Neural Machine Translation framework written in pure +C++ with minimal dependencies. + +Named in honour of Marian Rejewski, a Polish mathematician and cryptologist. + +Main features: + +- Efficient pure C++ implementation +- Fast multi-GPU training and GPU/CPU translation +- State-of-the-art NMT architectures: deep RNN and transformer +- Permissive open source license (MIT) +- [more detail...](https://marian-nmt.github.io/features) If you use this, please cite: @@ -59,20 +45,11 @@ Machine Translation in C++ (http://www.aclweb.org/anthology/P18-4020) url = {http://www.aclweb.org/anthology/P18-4020} } - - ## Amun -The handwritten decoder for RNN models compatible with Marian and Nematus has been superseded by the Marian decoder. The code is available in a separate repository: https://github.com/marian-nmt/amun + +The handwritten decoder for RNN models compatible with Marian and Nematus has +been superseded by the Marian decoder. The code is available in a separate +repository: https://github.com/marian-nmt/amun ## Website diff --git a/VERSION b/VERSION index 12751ca7b..6959dfcad 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -v1.7.6 +v1.8.52 diff --git a/cmake/FindCBLAS.cmake b/cmake/FindCBLAS.cmake new file mode 100644 index 000000000..97b0d3f8a --- /dev/null +++ b/cmake/FindCBLAS.cmake @@ -0,0 +1,186 @@ +# - Find CBLAS library +# +# This module finds an installed fortran library that implements the CBLAS +# linear-algebra interface (see http://www.netlib.org/blas/), with CBLAS +# interface. +# +# This module sets the following variables: +# CBLAS_FOUND - set to true if a library implementing the CBLAS interface +# is found +# CBLAS_LINKER_FLAGS - uncached list of required linker flags (excluding -l +# and -L). +# CBLAS_LIBRARIES - uncached list of libraries (using full path name) to +# link against to use CBLAS +# CBLAS_INCLUDE_DIR - path to includes +# CBLAS_INCLUDE_FILE - the file to be included to use CBLAS +# + +## Based on https://github.com/Eyescale/CMake/blob/master/FindCBLAS.cmake + +INCLUDE(CheckFunctionExists) +INCLUDE(CheckIncludeFile) + +MACRO(CHECK_ALL_LIBRARIES LIBRARIES INCLUDE _prefix _name _flags _list _include _search_include) + # This macro checks for the existence of the combination of fortran libraries + # given by _list. If the combination is found, this macro checks (using the + # Check_Fortran_Function_Exists macro) whether can link against that library + # combination using the name of a routine given by _name using the linker + # flags given by _flags. If the combination of libraries is found and passes + # the link test, LIBRARIES is set to the list of complete library paths that + # have been found. Otherwise, LIBRARIES is set to FALSE. + + # N.B. _prefix is the prefix applied to the names of all cached variables that + # are generated internally and marked advanced by this macro. + + SET(__list) + FOREACH(_elem ${_list}) + IF(__list) + SET(__list "${__list} - ${_elem}") + ELSE(__list) + SET(__list "${_elem}") + ENDIF(__list) + ENDFOREACH(_elem) + MESSAGE(STATUS "Checking for [${__list}]") + SET(_libraries_work TRUE) + SET(${LIBRARIES}) + SET(_combined_name) + SET(_paths) + FOREACH(_library ${_list}) + SET(_combined_name ${_combined_name}_${_library}) + + # did we find all the libraries in the _list until now? + # (we stop at the first unfound one) + IF(_libraries_work) + IF(APPLE) + FIND_LIBRARY(${_prefix}_${_library}_LIBRARY + NAMES ${_library} + PATHS /usr/local/lib /usr/lib /usr/local/lib64 /usr/lib64 /usr/local/opt/openblas/lib ENV + DYLD_LIBRARY_PATH + ) + ELSE(APPLE) + FIND_LIBRARY(${_prefix}_${_library}_LIBRARY + NAMES ${_library} + PATHS /usr/local/lib /usr/lib /usr/local/lib64 /usr/lib64 ENV + LD_LIBRARY_PATH + ) + ENDIF(APPLE) + MARK_AS_ADVANCED(${_prefix}_${_library}_LIBRARY) + IF(${_prefix}_${_library}_LIBRARY) + GET_FILENAME_COMPONENT(_path ${${_prefix}_${_library}_LIBRARY} PATH) + LIST(APPEND _paths ${_path}/../include ${_path}/../../include) + ENDIF(${_prefix}_${_library}_LIBRARY) + SET(${LIBRARIES} ${${LIBRARIES}} ${${_prefix}_${_library}_LIBRARY}) + SET(_libraries_work ${${_prefix}_${_library}_LIBRARY}) + ENDIF(_libraries_work) + ENDFOREACH(_library ${_list}) + + # Test include + SET(_bug_search_include ${_search_include}) #CMAKE BUG!!! SHOULD NOT BE THAT + IF(_bug_search_include) + FIND_PATH(${_prefix}${_combined_name}_INCLUDE ${_include} ${_paths}) + MARK_AS_ADVANCED(${_prefix}${_combined_name}_INCLUDE) + IF(${_prefix}${_combined_name}_INCLUDE) + MESSAGE(STATUS "Checking for [${__list}] -- includes found") + SET(${_prefix}_INCLUDE_DIR ${${_prefix}${_combined_name}_INCLUDE}) + SET(${_prefix}_INCLUDE_FILE ${_include}) + SET(${INCLUDE} ${${_prefix}_INCLUDE_DIR}) + ELSE(${_prefix}${_combined_name}_INCLUDE) + MESSAGE(STATUS "Checking for [${__list}] -- includes not found") + SET(_libraries_work FALSE) + ENDIF(${_prefix}${_combined_name}_INCLUDE) + ELSE(_bug_search_include) + SET(${_prefix}_INCLUDE_DIR) + SET(${_prefix}_INCLUDE_FILE ${_include}) + ENDIF(_bug_search_include) + + IF(_libraries_work) + # Test this combination of libraries. + SET(CMAKE_REQUIRED_LIBRARIES ${_flags} ${${LIBRARIES}}) + CHECK_FUNCTION_EXISTS(${_name} ${_prefix}${_combined_name}_WORKS) + SET(CMAKE_REQUIRED_LIBRARIES) + MARK_AS_ADVANCED(${_prefix}${_combined_name}_WORKS) + SET(_libraries_work ${${_prefix}${_combined_name}_WORKS}) + + IF(_libraries_work) + MESSAGE(STATUS "Checking for [${__list}] -- libraries found") + ENDIF(_libraries_work) + + ENDIF(_libraries_work) + + + IF(NOT _libraries_work) + SET(${LIBRARIES} FALSE) + ENDIF(NOT _libraries_work) + +ENDMACRO(CHECK_ALL_LIBRARIES) + +SET(CBLAS_LINKER_FLAGS) +SET(CBLAS_LIBRARIES) +SET(CBLAS_INCLUDE_DIR) + +# CBLAS in openBLAS +IF(NOT CBLAS_LIBRARIES) + CHECK_ALL_LIBRARIES( + CBLAS_LIBRARIES + CBLAS_INCLUDE_DIR + cblas + cblas_sgemm + "" + "openblas" + "cblas.h" + TRUE + ) +ENDIF(NOT CBLAS_LIBRARIES) + +#MESSAGE(STATUS ${openblas_INCLUDE_DIR}) + +# CBLAS in CBLAS +IF(NOT CBLAS_LIBRARIES) + CHECK_ALL_LIBRARIES( + CBLAS_LIBRARIES + CBLAS_INCLUDE_DIR + cblas + cblas_sgemm + "" + "cblas" + "cblas.h" + TRUE + ) +ENDIF(NOT CBLAS_LIBRARIES) + +#MESSAGE(STATUS ${cblas_INCLUDE_DIR}) + +# CBLAS in lapacke +IF(NOT CBLAS_LIBRARIES) + CHECK_ALL_LIBRARIES( + CBLAS_LIBRARIES + CBLAS_INCLUDE_DIR + cblas + cblas_sgemm + "" + "lapacke" + "cblas.h" + TRUE + ) +ENDIF(NOT CBLAS_LIBRARIES) + +#MESSAGE(STATUS ${lapacke_INCLUDE_DIR}) + +IF(CBLAS_LIBRARIES) + SET(CBLAS_FOUND TRUE) +ELSE(CBLAS_LIBRARIES) + SET(CBLAS_FOUND FALSE) +ENDIF(CBLAS_LIBRARIES) + +IF(NOT CBLAS_FOUND AND CBLAS_FIND_REQUIRED) + MESSAGE(FATAL_ERROR "CBLAS library not found. Please specify library location") +ENDIF(NOT CBLAS_FOUND AND CBLAS_FIND_REQUIRED) + +IF(NOT CBLAS_FIND_QUIETLY) + IF(CBLAS_FOUND) + MESSAGE(STATUS "CBLAS library found: " ${CBLAS_LIBRARIES}) + MESSAGE(STATUS "cblas.h include directory: " ${CBLAS_INCLUDE_DIR}) + ELSE(CBLAS_FOUND) + MESSAGE(STATUS "CBLAS library not found. Please specify library location") + ENDIF(CBLAS_FOUND) +ENDIF(NOT CBLAS_FIND_QUIETLY) diff --git a/cmake/FindMKL.cmake b/cmake/FindMKL.cmake index 028161e32..4e8a99eee 100644 --- a/cmake/FindMKL.cmake +++ b/cmake/FindMKL.cmake @@ -53,11 +53,11 @@ else() set(COR_LIB "mkl_core") endif() -if(MSVC) - set(ProgramFilesx86 "ProgramFiles(x86)") - set(INTEL_ROOT_DEFAULT $ENV{${ProgramFilesx86}}/IntelSWTools/compilers_and_libraries/windows) -else() - set(INTEL_ROOT_DEFAULT "/opt/intel") +if(MSVC) + set(ProgramFilesx86 "ProgramFiles(x86)") + set(INTEL_ROOT_DEFAULT $ENV{${ProgramFilesx86}}/IntelSWTools/compilers_and_libraries/windows) +else() + set(INTEL_ROOT_DEFAULT "/opt/intel") endif() set(INTEL_ROOT ${INTEL_ROOT_DEFAULT} CACHE PATH "Folder contains intel libs") find_path(MKL_ROOT include/mkl.h PATHS $ENV{MKLROOT} ${INTEL_ROOT}/mkl @@ -89,7 +89,10 @@ find_library(MKL_CORE_LIBRARY NO_DEFAULT_PATH) set(MKL_INCLUDE_DIRS ${MKL_INCLUDE_DIR}) -set(MKL_LIBRARIES ${MKL_INTERFACE_LIBRARY} ${MKL_SEQUENTIAL_LAYER_LIBRARY} ${MKL_CORE_LIBRARY}) +# Added -Wl block to avoid circular dependencies. +# https://stackoverflow.com/questions/5651869/what-are-the-start-group-and-end-group-command-line-options +# https://software.intel.com/en-us/articles/intel-mkl-link-line-advisor +set(MKL_LIBRARIES -Wl,--start-group ${MKL_INTERFACE_LIBRARY} ${MKL_SEQUENTIAL_LAYER_LIBRARY} ${MKL_CORE_LIBRARY} -Wl,--end-group) # message("1 ${MKL_INCLUDE_DIR}") # message("2 ${MKL_INTERFACE_LIBRARY}") diff --git a/cmake/FindSSE.cmake b/cmake/FindSSE.cmake index c152dd74f..82ee7f3e0 100644 --- a/cmake/FindSSE.cmake +++ b/cmake/FindSSE.cmake @@ -41,6 +41,14 @@ IF(CMAKE_SYSTEM_NAME MATCHES "Linux") set(SSE4_1_FOUND false CACHE BOOL "SSE4.1 available on host") ENDIF (SSE41_TRUE) + STRING(REGEX REPLACE "^.*(sse4_2).*$" "\\1" SSE_THERE ${CPUINFO}) + STRING(COMPARE EQUAL "sse4_2" "${SSE_THERE}" SSE42_TRUE) + IF (SSE42_TRUE) + set(SSE4_2_FOUND true CACHE BOOL "SSE4.2 available on host") + ELSE (SSE42_TRUE) + set(SSE4_2_FOUND false CACHE BOOL "SSE4.2 available on host") + ENDIF (SSE42_TRUE) + STRING(REGEX REPLACE "^.*(avx).*$" "\\1" SSE_THERE ${CPUINFO}) STRING(COMPARE EQUAL "avx" "${SSE_THERE}" AVX_TRUE) IF (AVX_TRUE) @@ -48,7 +56,7 @@ IF(CMAKE_SYSTEM_NAME MATCHES "Linux") ELSE (AVX_TRUE) set(AVX_FOUND false CACHE BOOL "AVX available on host") ENDIF (AVX_TRUE) - + STRING(REGEX REPLACE "^.*(avx2).*$" "\\1" SSE_THERE ${CPUINFO}) STRING(COMPARE EQUAL "avx2" "${SSE_THERE}" AVX2_TRUE) IF (AVX2_TRUE) @@ -57,6 +65,14 @@ IF(CMAKE_SYSTEM_NAME MATCHES "Linux") set(AVX2_FOUND false CACHE BOOL "AVX2 available on host") ENDIF (AVX2_TRUE) + STRING(REGEX REPLACE "^.*(avx512).*$" "\\1" SSE_THERE ${CPUINFO}) + STRING(COMPARE EQUAL "avx512" "${SSE_THERE}" AVX512_TRUE) + IF (AVX512_TRUE) + set(AVX512_FOUND true CACHE BOOL "AVX512 available on host") + ELSE (AVX512_TRUE) + set(AVX512_FOUND false CACHE BOOL "AVX512 available on host") + ENDIF (AVX512_TRUE) + ELSEIF(CMAKE_SYSTEM_NAME MATCHES "Darwin") EXEC_PROGRAM("/usr/sbin/sysctl -n machdep.cpu.features" OUTPUT_VARIABLE CPUINFO) @@ -109,6 +125,14 @@ ELSEIF(CMAKE_SYSTEM_NAME MATCHES "Darwin") set(AVX2_FOUND false CACHE BOOL "AVX2 available on host") ENDIF (AVX2_TRUE) + STRING(REGEX REPLACE "^.*(avx512).*$" "\\1" SSE_THERE ${CPUINFO}) + STRING(COMPARE EQUAL "avx512" "${SSE_THERE}" AVX512_TRUE) + IF (AVX512_TRUE) + set(AVX512_FOUND true CACHE BOOL "AVX512 available on host") + ELSE (AVX512_TRUE) + set(AVX512_FOUND false CACHE BOOL "AVX512 available on host") + ENDIF (AVX512_TRUE) + ELSEIF(CMAKE_SYSTEM_NAME MATCHES "Windows") # TODO set(SSE2_FOUND true CACHE BOOL "SSE2 available on host") @@ -117,6 +141,7 @@ ELSEIF(CMAKE_SYSTEM_NAME MATCHES "Windows") set(SSE4_1_FOUND false CACHE BOOL "SSE4.1 available on host") set(AVX_FOUND false CACHE BOOL "AVX available on host") set(AVX2_FOUND false CACHE BOOL "AVX2 available on host") + set(AVX512_FOUND false CACHE BOOL "AVX512 available on host") ELSE(CMAKE_SYSTEM_NAME MATCHES "Linux") set(SSE2_FOUND true CACHE BOOL "SSE2 available on host") set(SSE3_FOUND false CACHE BOOL "SSE3 available on host") @@ -124,6 +149,7 @@ ELSE(CMAKE_SYSTEM_NAME MATCHES "Linux") set(SSE4_1_FOUND false CACHE BOOL "SSE4.1 available on host") set(AVX_FOUND false CACHE BOOL "AVX available on host") set(AVX2_FOUND false CACHE BOOL "AVX2 available on host") + set(AVX512_FOUND false CACHE BOOL "AVX512 available on host") ENDIF(CMAKE_SYSTEM_NAME MATCHES "Linux") if(NOT SSE2_FOUND) @@ -144,5 +170,8 @@ endif(NOT AVX_FOUND) if(NOT AVX2_FOUND) MESSAGE(STATUS "Could not find hardware support for AVX2 on this machine.") endif(NOT AVX2_FOUND) +if(NOT AVX512_FOUND) + MESSAGE(STATUS "Could not find hardware support for AVX512 on this machine.") +endif(NOT AVX512_FOUND) -mark_as_advanced(SSE2_FOUND SSE3_FOUND SSSE3_FOUND SSE4_1_FOUND, AVX_FOUND, AVX2_FOUND) +mark_as_advanced(SSE2_FOUND SSE3_FOUND SSSE3_FOUND SSE4_1_FOUND, AVX_FOUND, AVX2_FOUND, AVX512_FOUND) diff --git a/cmake/GetCacheVariables.cmake b/cmake/GetCacheVariables.cmake new file mode 100644 index 000000000..563ade79e --- /dev/null +++ b/cmake/GetCacheVariables.cmake @@ -0,0 +1,52 @@ +## +# This module extracts CMake cached variables into a variable. +# +# Author: snukky +# +# This module sets the following variables: +# * PROJECT_CMAKE_CACHE - to the output of "cmake -L" - an uncached list of +# non-advanced cached variables +# * PROJECT_CMAKE_CACHE_ADVANCED - to the output of "cmake -LA" - an uncached +# list of advanced cached variables +# + +set(PROJECT_CMAKE_CACHE "") +set(PROJECT_CMAKE_CACHE_ADVANCED "") + +# Get all CMake variables +get_cmake_property(_variableNames VARIABLES) +list(SORT _variableNames) +list(REMOVE_DUPLICATES _variableNames) + +foreach(_variableName ${_variableNames}) + # If it is a cache variable + get_property(_cachePropIsSet CACHE "${_variableName}" PROPERTY VALUE SET) + if(_cachePropIsSet) + # Get the variable's type + get_property(_variableType CACHE ${_variableName} PROPERTY TYPE) + + # Get the variable's value + set(_variableValue "${${_variableName}}") + + # Skip static or internal cached variables, cmake -L[A] does not print them, see + # https://github.com/Kitware/CMake/blob/master/Source/cmakemain.cxx#L282 + if( (NOT "${_variableType}" STREQUAL "STATIC") AND + (NOT "${_variableType}" STREQUAL "INTERNAL") AND + (NOT "${_variableValue}" STREQUAL "") ) + + + set(PROJECT_CMAKE_CACHE_ADVANCED "${PROJECT_CMAKE_CACHE_ADVANCED} \"${_variableName}=${_variableValue}\\n\"\n") + + # Get the variable's advanced flag + get_property(_isAdvanced CACHE ${_variableName} PROPERTY ADVANCED SET) + if(NOT _isAdvanced) + set(PROJECT_CMAKE_CACHE "${PROJECT_CMAKE_CACHE} \"${_variableName}=${_variableValue}\\n\"\n") + endif() + + # Print variables for debugging + #message(STATUS "${_variableName}=${${_variableName}}") + #message(STATUS " Type=${_variableType}") + #message(STATUS " Advanced=${_isAdvanced}") + endif() + endif(_cachePropIsSet) +endforeach() diff --git a/contrib/autoformat.sh b/contrib/autoformat.sh old mode 100755 new mode 100644 diff --git a/examples b/examples index 336740065..c19b7814d 160000 --- a/examples +++ b/examples @@ -1 +1 @@ -Subproject commit 336740065d9c23e53e912a1befff18981d9d27ab +Subproject commit c19b7814d71febf1053bd93af6ac314b46204092 diff --git a/regression-tests b/regression-tests new file mode 160000 index 000000000..6a08849b2 --- /dev/null +++ b/regression-tests @@ -0,0 +1 @@ +Subproject commit 6a08849b23f6c14eefbe12f4eb73dc638b962587 diff --git a/scripts/bert/bert4marian.py b/scripts/bert/bert4marian.py new file mode 100755 index 000000000..8070c0fe9 --- /dev/null +++ b/scripts/bert/bert4marian.py @@ -0,0 +1,154 @@ +#!/usr/bin/env python3 +""" +This script takes a Tensorflow BERT checkpoint and a model description in a JSON file and converts +it to a Marian weight file with numpy weights and an internal YAML description. + +This works with checkpoints from https://github.com/google-research/bert + +Assmung a BERT checkpoint like this: +drwxr-xr-x 2 marcinjd marcinjd 4.0K Nov 23 16:39 . +-rw-r--r-- 1 marcinjd marcinjd 521 Nov 23 16:38 bert_config.json +-rw-r--r-- 1 marcinjd marcinjd 682M Nov 23 16:39 bert_model.ckpt.data-00000-of-00001 +-rw-r--r-- 1 marcinjd marcinjd 8.5K Nov 23 16:39 bert_model.ckpt.index +-rw-r--r-- 1 marcinjd marcinjd 888K Nov 23 16:39 bert_model.ckpt.meta +-rw-r--r-- 1 marcinjd marcinjd 973K Nov 23 16:37 vocab.txt + +usage: + +./bert.py --bert_prefix bert_model.ckpt --bert_config bert_config.json --marian bert.npz +""" + +import tensorflow as tf +import numpy as np +import sys +import yaml +import argparse + +parser = argparse.ArgumentParser(description='Convert Tensorflow BERT model to Marian weight file.') +parser.add_argument('--bert_prefix', help='Prefix for Tensorflow BERT checkpoint', required=True) +parser.add_argument('--bert_config', help='Path to Tensorflow BERT JSON config', required=True) +parser.add_argument('--marian', help='Output path for Marian weight file', required=True) +args = parser.parse_args() + +print("Loading TensorFlow config from %s" % (args.bert_config,)) +bertConfig = yaml.load(open(args.bert_config)) +bertConfigYamlStr = yaml.dump(bertConfig, default_flow_style=False) +print(bertConfigYamlStr) + +print("Loading TensorFlow model from %s" % (args.bert_prefix,)) + +# Collect tensors from TF model as numpy matrices +tfModel = dict() +with tf.Session() as sess: + preloader = tf.train.import_meta_graph(args.bert_prefix + ".meta") + preloader.restore(sess, args.bert_prefix) + vars = tf.global_variables() + for v in vars: + if len(v.shape) > 0: + if "adam" not in v.name: # ignore adam parameters + print(v.name, v.shape) + tfModel[v.name] = sess.run(v.name) # get numpy matrix + +# Prepare Marian model config +config = dict() +config["type"] = "bert" +config["input-types"] = ["sequence", "class"] +config["tied-embeddings-all"] = True +config["dim-emb"] = tfModel["bert/embeddings/word_embeddings:0"].shape[-1] +config["dim-vocabs"] = [ tfModel["bert/embeddings/word_embeddings:0"].shape[0], + tfModel["cls/seq_relationship/output_weights:0"].shape[0] ] + +config["transformer-dim-ffn"] = tfModel["bert/encoder/layer_0/intermediate/dense/kernel:0"].shape[-1] +config["transformer-ffn-activation"] = bertConfig["hidden_act"] +config["transformer-ffn-depth"] = 2 +config["transformer-heads"] = bertConfig["num_attention_heads"] +config["transformer-train-position-embeddings"] = True +config["transformer-preprocess"] = "" +config["transformer-postprocess"] = "dan" +config["transformer-postprocess-emb"] = "nd" +config["bert-train-type-embeddings"] = True +config["bert-type-vocab-size"] = tfModel["bert/embeddings/token_type_embeddings:0"].shape[0] +config["version"] = "bert4marian.py conversion" + +# check number of layers +found = True +config["enc-depth"] = 0; +while found: + found = False + for key in tfModel: + if "bert/encoder/layer_" + str(config["enc-depth"]) in key: + config["enc-depth"] += 1 + found = True + break + +if config["enc-depth"] != bertConfig["num_hidden_layers"]: + sys.exit("Number of layers in JSON config (%s) and number of layers found in checkpoint (%s) do not match!" % (config["enc-depth"], bertConfig["num_hidden_layers"])) + +configYamlStr = yaml.dump(config, default_flow_style=False) +desc = list(configYamlStr) +npDesc = np.chararray((len(desc),)) +npDesc[:] = desc +npDesc.dtype = np.int8 + +marianModel = dict() +marianModel["special:model.yml"] = npDesc + +# Map model weights here # +# Embedding layers +marianModel["Wemb"] = tfModel["bert/embeddings/word_embeddings:0"] +marianModel["Wpos"] = tfModel["bert/embeddings/position_embeddings:0"] +marianModel["Wtype"] = tfModel["bert/embeddings/token_type_embeddings:0"] +marianModel["encoder_emb_ln_scale_pre"] = tfModel["bert/embeddings/LayerNorm/gamma:0"] +marianModel["encoder_emb_ln_bias_pre"] = tfModel["bert/embeddings/LayerNorm/beta:0"] + +for layer in range(config["enc-depth"]): + marianPrefix = "encoder_l%s" % (layer + 1,) + tfPrefix = "bert/encoder/layer_%s" % (layer,) + + # Attention + marianModel[marianPrefix + "_self_Wq"] = tfModel[tfPrefix + "/attention/self/query/kernel:0"] + marianModel[marianPrefix + "_self_bq"] = tfModel[tfPrefix + "/attention/self/query/bias:0"] + + marianModel[marianPrefix + "_self_Wk"] = tfModel[tfPrefix + "/attention/self/key/kernel:0"] + marianModel[marianPrefix + "_self_bk"] = tfModel[tfPrefix + "/attention/self/key/bias:0"] + + marianModel[marianPrefix + "_self_Wv"] = tfModel[tfPrefix + "/attention/self/value/kernel:0"] + marianModel[marianPrefix + "_self_bv"] = tfModel[tfPrefix + "/attention/self/value/bias:0"] + + marianModel[marianPrefix + "_self_Wo"] = tfModel[tfPrefix + "/attention/output/dense/kernel:0"] + marianModel[marianPrefix + "_self_bo"] = tfModel[tfPrefix + "/attention/output/dense/bias:0"] + + marianModel[marianPrefix + "_self_Wo_ln_scale"] = tfModel[tfPrefix + "/attention/output/LayerNorm/gamma:0"] + marianModel[marianPrefix + "_self_Wo_ln_bias"] = tfModel[tfPrefix + "/attention/output/LayerNorm/beta:0"] + + # FFN + marianModel[marianPrefix + "_ffn_W1"] = tfModel[tfPrefix + "/intermediate/dense/kernel:0"] + marianModel[marianPrefix + "_ffn_b1"] = tfModel[tfPrefix + "/intermediate/dense/bias:0"] + + marianModel[marianPrefix + "_ffn_W2"] = tfModel[tfPrefix + "/output/dense/kernel:0"] + marianModel[marianPrefix + "_ffn_b2"] = tfModel[tfPrefix + "/output/dense/bias:0"] + + marianModel[marianPrefix + "_ffn_ffn_ln_scale"] = tfModel[tfPrefix + "/output/LayerNorm/gamma:0"] + marianModel[marianPrefix + "_ffn_ffn_ln_bias"] = tfModel[tfPrefix + "/output/LayerNorm/beta:0"] + + # Training objectives + # Masked-LM output layer + marianModel["masked-lm_ff_logit_l1_W"] = tfModel["cls/predictions/transform/dense/kernel:0"] + marianModel["masked-lm_ff_logit_l1_b"] = tfModel["cls/predictions/transform/dense/bias:0"] + + marianModel["masked-lm_ff_ln_scale"] = tfModel["cls/predictions/transform/LayerNorm/gamma:0"] + marianModel["masked-lm_ff_ln_bias"] = tfModel["cls/predictions/transform/LayerNorm/beta:0"] + + marianModel["masked-lm_ff_logit_l2_b"] = tfModel["cls/predictions/output_bias:0"] + + # Next Sentence classifier + marianModel["next-sentence_ff_logit_l1_W"] = tfModel["bert/pooler/dense/kernel:0"] + marianModel["next-sentence_ff_logit_l1_b"] = tfModel["bert/pooler/dense/bias:0"] + + marianModel["next-sentence_ff_logit_l2_W"] = np.transpose(tfModel["cls/seq_relationship/output_weights:0"]) # transpose?! + marianModel["next-sentence_ff_logit_l2_b"] = tfModel["cls/seq_relationship/output_bias:0"] + +print("\nMarian config:") +print(configYamlStr) +print("Saving Marian model to %s" % (args.marian,)) +np.savez(args.marian, **marianModel) diff --git a/scripts/checkpoints/average.py b/scripts/checkpoints/average.py new file mode 100755 index 000000000..53bff1862 --- /dev/null +++ b/scripts/checkpoints/average.py @@ -0,0 +1,55 @@ +#!/usr/bin/env python +""" +This script takes multiple Marian *.npz model files and outputs an elementwise average of the model, +meant to do check-point averaging from: + +https://www.aclweb.org/anthology/W16-2316 + +usage: + +./average.py -m model.1.npz model.2.npz --output model.avg.npz +""" + +from __future__ import print_function + +import os +import sys +import argparse + +import numpy as np + +# Parse arguments +parser = argparse.ArgumentParser() +parser.add_argument('-m', '--model', nargs='+', required=True, + help="models to average") +parser.add_argument('-o', '--output', required=True, + help="output path") +args = parser.parse_args() + +# *average* holds the model matrix +average = dict() +# No. of models. +n = len(args.model) + +for filename in args.model: + print("Loading {}".format(filename)) + with open(filename, "rb") as mfile: + # Loads matrix from model file + m = np.load(mfile) + for k in m: + if k != "history_errs": + # Initialize the key + if k not in average: + average[k] = m[k] + # Add to the appropriate value + elif average[k].shape == m[k].shape and "special" not in k: + average[k] += m[k] + +# Actual averaging +for k in average: + if "special" not in k: + average[k] /= n + +# Save averaged model to file +print("Saving to {}".format(args.output)) +np.savez(args.output, **average) diff --git a/scripts/contrib/fix_hard.py b/scripts/contrib/fix_hard.py old mode 100644 new mode 100755 diff --git a/scripts/contrib/inject_ctt.py b/scripts/contrib/inject_ctt.py old mode 100644 new mode 100755 diff --git a/scripts/contrib/inject_model_params.py b/scripts/contrib/inject_model_params.py old mode 100644 new mode 100755 diff --git a/scripts/contrib/model_info.py b/scripts/contrib/model_info.py old mode 100644 new mode 100755 diff --git a/scripts/embeddings/export_embeddings.py b/scripts/embeddings/export_embeddings.py index 1476e52c9..3b4f3314b 100755 --- a/scripts/embeddings/export_embeddings.py +++ b/scripts/embeddings/export_embeddings.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # -*- coding: utf-8 -*- from __future__ import print_function @@ -9,18 +9,22 @@ def main(): - desc = """Export word embedding from model""" + desc = """Export word embeddings from model""" parser = argparse.ArgumentParser( formatter_class=argparse.RawDescriptionHelpFormatter, description=desc) - parser.add_argument("-m", "--model", help="Model file", required=True) - parser.add_argument( - "-o", "--output-prefix", help="Output files prefix", required=True) + parser.add_argument("-m", "--model", help="path to model.npz file", required=True) + parser.add_argument("-o", "--output-prefix", help="prefix for output files", required=True) args = parser.parse_args() print("Loading model") model = np.load(args.model) special = yaml.load(model["special:model.yml"][:-1].tobytes()) + if special["tied-embeddings-all"] or special["tied-embeddings-src"]: + all_emb = model["Wemb"] + export_emb(args.output_prefix + ".all", all_emb) + exit() + if special["type"] == "amun": enc_emb = model["Wemb"] dec_emb = model["Wemb_dec"] @@ -28,16 +32,15 @@ def main(): enc_emb = model["encoder_Wemb"] dec_emb = model["decoder_Wemb"] - with open(args.output_prefix + ".src", "w") as out: - out.write("{0} {1}\n".format(*enc_emb.shape)) - for i in range(enc_emb.shape[0]): - vec = " ".join("{0:.8f}".format(v) for v in enc_emb[i]) - out.write("{0} {1}\n".format(i, vec)) + export_emb(args.output_prefix + ".src", enc_emb) + export_emb(args.output_prefix + ".trg", dec_emb) + - with open(args.output_prefix + ".trg", "w") as out: - out.write("{0} {1}\n".format(*dec_emb.shape)) - for i in range(dec_emb.shape[0]): - vec = " ".join("{0:.8f}".format(v) for v in dec_emb[i]) +def export_emb(filename, emb): + with open(filename, "w") as out: + out.write("{0} {1}\n".format(*emb.shape)) + for i in range(emb.shape[0]): + vec = " ".join("{0:.8f}".format(v) for v in emb[i]) out.write("{0} {1}\n".format(i, vec)) diff --git a/scripts/shortlist/.gitignore b/scripts/shortlist/.gitignore new file mode 100644 index 000000000..bf0d379e4 --- /dev/null +++ b/scripts/shortlist/.gitignore @@ -0,0 +1,3 @@ +bin +fast_align +extract-lex diff --git a/scripts/shortlist/README.md b/scripts/shortlist/README.md new file mode 100644 index 000000000..30bf10154 --- /dev/null +++ b/scripts/shortlist/README.md @@ -0,0 +1,8 @@ +`install.sh` is a helper script that downloads and compiles fastalign and extract-lex, and copies +required binaries into _./bin_. + +Shortlist files (_lex.s2t_ and _lex.t2s_) can be created using `generate_shortlists.pl`, for +example: + + perl generate_shortlists.pl --bindir ./bin -s corpus.bpe.src -t corpus.bpe.tgt + diff --git a/scripts/shortlist/generate_shortlists.pl b/scripts/shortlist/generate_shortlists.pl new file mode 100755 index 000000000..309eeef86 --- /dev/null +++ b/scripts/shortlist/generate_shortlists.pl @@ -0,0 +1,97 @@ +#!/usr/bin/env perl + +use strict; +use Getopt::Long; +use FindBin qw($Bin); +use File::Temp qw(tempdir tempfile); +use POSIX; + +my $PID = $$; +$SIG{TERM} = $SIG{INT} = $SIG{QUIT} = sub { die; }; + +my $BINDIR = "$Bin/bin"; +my $SRC; +my $TRG; +my $OUTPUT = "lex"; +my $THREADS = 8; +my $PARALLEL = 0; +my $HELP; + +GetOptions( + "b|bindir=s" => \$BINDIR, + "s|source=s" => \$SRC, + "t|target=s" => \$TRG, + "o|output=s" => \$OUTPUT, + "threads=i" => \$THREADS, + "parallel" => \$PARALLEL, + "h|help" => \$HELP, +); + +if($HELP) { + print "Usage: perl $0 -b bindir -s corpus.src -t corpus.tgt [-o outputprefix] [--threads 8] [--parallel]\n"; + exit 0; +} + +die "--bindir arg is required" if not defined $BINDIR; +die "-s|--source arg is required" if not defined $SRC; +die "-t|--target arg is required" if not defined $TRG; +die "-o|--output arg is required" if not defined $OUTPUT; + +for my $app (qw(fast_align atools extract_lex)) { + die "Could not find $app in $BINDIR" if not -e "$BINDIR/$app"; +} + +my $TEMPDIR = tempdir(CLEANUP => 1); + +my (undef, $CORPUS) = tempfile(DIR => $TEMPDIR); +my (undef, $ALN_S2T) = tempfile(DIR => $TEMPDIR); +my (undef, $ALN_T2S) = tempfile(DIR => $TEMPDIR); +my (undef, $ALN_GDF) = tempfile(DIR => $TEMPDIR); + +execute("paste $SRC $TRG | sed 's/\\t/ ||| /' > $CORPUS"); + +my @COMMANDS = ( + "OMP_NUM_THREADS=$THREADS $BINDIR/fast_align -vdo -i $CORPUS > $ALN_S2T", + "OMP_NUM_THREADS=$THREADS $BINDIR/fast_align -vdor -i $CORPUS > $ALN_T2S" +); + +my @PIDS; +for my $c (@COMMANDS) { + if ($PARALLEL) { + my $pid = fork(); + if (!$pid) { + execute($c); + exit(0); + } else { + push(@PIDS, $pid); + print "Forked process $pid\n"; + } + } else { + execute($c); + } +} +if ($PARALLEL) { + waitpid($_, 0) foreach(@PIDS); +} + +execute("$BINDIR/atools -c grow-diag-final -i $ALN_S2T -j $ALN_T2S > $ALN_GDF"); +execute("$BINDIR/extract_lex $TRG $SRC $ALN_GDF $OUTPUT.s2t $OUTPUT.t2s"); + +sub execute { + my $command = shift; + logMessage("Executing:\t$command"); + my $ret = system($command); + if ($ret != 0) { + logMessage("Command '$command' finished with return status $ret"); + logMessage("Aborting and killing parent process"); + kill(2, $PID); + die; + } +} + +sub logMessage { + my $message = shift; + my $time = POSIX::strftime("%m/%d/%Y %H:%M:%S", localtime()); + my $log_message = $time."\t$message\n"; + print STDERR $log_message; +} diff --git a/scripts/shortlist/install.sh b/scripts/shortlist/install.sh new file mode 100644 index 000000000..49b2171a4 --- /dev/null +++ b/scripts/shortlist/install.sh @@ -0,0 +1,25 @@ +#!/bin/bash -v + +mkdir -p bin + +# download and compile fast_align +if [ ! -e bin/fast_align ]; then + git clone https://github.com/clab/fast_align + mkdir -p fast_align/build + cd fast_align/build + cmake .. + make -j4 + cp fast_align atools ../../bin + cd ../../ +fi + +# download and compile extract-lex +if [ ! -e bin/extract_lex ]; then + git clone https://github.com/marian-nmt/extract-lex + mkdir -p extract-lex/build + cd extract-lex/build + cmake .. + make -j4 + cp extract_lex ../../bin + cd ../../ +fi diff --git a/src/3rd_party/CLI/App.hpp b/src/3rd_party/CLI/App.hpp index b943ef1a4..14ddd1e7f 100644 --- a/src/3rd_party/CLI/App.hpp +++ b/src/3rd_party/CLI/App.hpp @@ -1590,7 +1590,12 @@ class App { } // Unlimited vector parser + // RG: A negative number for the total number of expected values means that the option is a + // vector and accepts an unlimited number of values if(num < 0) { + // RG: We need to keep track if the vector option is empty and handle this separately as + // otherwise the parser will mark the command-line option as not set + bool emptyVectorArgs = true; while(!args.empty() && _recognize(args.back()) == detail::Classifer::NONE) { if(collected >= -num) { // We could break here for allow extras, but we don't @@ -1603,12 +1608,28 @@ class App { parse_order_.push_back(op.get()); args.pop_back(); collected++; + emptyVectorArgs = false; } // Allow -- to end an unlimited list and "eat" it if(!args.empty() && _recognize(args.back()) == detail::Classifer::POSITIONAL_MARK) args.pop_back(); + // RG: Handle empty vector-like options + if(emptyVectorArgs) { + // RG: Set implicit value(s) if the option has it (them) + if(op->get_implicit()) { + for(const auto& ival : detail::split_up(op->get_implicitval())) { + op->add_result(ival); + parse_order_.push_back(op.get()); + } + // RG: Abort if there is a minimum number of values expected. Note: get_expected() + // equals to -N means at least N values are expected + } else if (op->get_expected() < 0) { + parse_order_.push_back(op.get()); + throw ArgumentMismatch(op->get_name(), op->get_expected(), 0); + } + } } else { while(num > 0 && !args.empty()) { num--; diff --git a/src/3rd_party/CMakeLists.txt b/src/3rd_party/CMakeLists.txt index 8548d9b8b..9f3981af2 100644 --- a/src/3rd_party/CMakeLists.txt +++ b/src/3rd_party/CMakeLists.txt @@ -6,6 +6,33 @@ add_subdirectory(./SQLiteCpp) add_subdirectory(./pathie-cpp) add_subdirectory(./zlib) +if(USE_FBGEMM) + # @TODO: find out if this is somehow harmful. This is supppressing CMake warnings for CMAKE_SUPPRESS_DEVELOPER_WARNINGS + # meant to silence CMakeFiles of 3rd_party tools. + if(NOT DEFINED CMAKE_SUPPRESS_DEVELOPER_WARNINGS) + set(CMAKE_SUPPRESS_DEVELOPER_WARNINGS 1 CACHE INTERNAL "No dev warnings") + endif() + + if(NOT MSVC) + # only locally disabled for the 3rd_party folder + # set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-value -Wno-unused-parameter -Wno-unused-variable -Wno-unused-function") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused") + endif() + + set(FBGEMM_BUILD_TESTS OFF CACHE BOOL "Disable fbgemm tests") + set(FBGEMM_BUILD_BENCHMARKS OFF CACHE BOOL "Disable fbgemm benchmark") + add_subdirectory(./fbgemm) + + # asmjit (3rd-party submodule of fbgemm) sets -Wall -Wextra near the end of + # the compile options, invalidating any -Wno-... flags that we may have set + # earlier. Let's remove them. + get_property(ASMJIT_COMPILE_OPTIONS TARGET asmjit PROPERTY COMPILE_OPTIONS) + list(REMOVE_ITEM ASMJIT_COMPILE_OPTIONS -Wall -Wextra) + set_property(TARGET asmjit PROPERTY COMPILE_OPTIONS ${ASMJIT_COMPILE_OPTIONS}) + message(" ASMJIT COMPILE FLAGS: ${ASMJIT_COMPILE_OPTIONS}") + +endif(USE_FBGEMM) + if(USE_SENTENCEPIECE) if(USE_STATIC_LIBS) set(_ORIG_CMAKE_FIND_LIBRARY_SUFFIXES ${CMAKE_FIND_LIBRARY_SUFFIXES}) @@ -16,16 +43,37 @@ if(USE_SENTENCEPIECE) endif() endif() - set(SPM_ENABLE_SHARED OFF CACHE BOOL "Builds shared libaries in addition to static libraries." FORCE) set(SPM_ENABLE_TCMALLOC ON CACHE BOOL "Enable TCMalloc if available." FORCE) - set(SPM_TCMALLOC_STATIC ON CACHE BOOL "Link static library of TCMALLOC." FORCE) + + if(USE_STATIC_LIBS) + message(WARNING "You are compiling SentencePiece binaries with -DUSE_STATIC_LIBS=on. \ + This will cause spm_train to segfault. No need to worry if you do not intend to use that binary. \ + Marian support for SentencePiece will work fine.") + + set(SPM_ENABLE_SHARED OFF CACHE BOOL "Builds shared libaries in addition to static libraries." FORCE) + set(SPM_TCMALLOC_STATIC ON CACHE BOOL "Link static library of TCMALLOC." FORCE) + else(USE_STATIC_LIBS) + set(SPM_ENABLE_SHARED ON CACHE BOOL "Builds shared libaries in addition to static libraries." FORCE) + set(SPM_TCMALLOC_STATIC OFF CACHE BOOL "Link static library of TCMALLOC." FORCE) + endif(USE_STATIC_LIBS) add_subdirectory(./sentencepiece) include_directories(./sentencepiece) set_target_properties(spm_encode spm_decode spm_train spm_normalize spm_export_vocab - PROPERTIES - RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}") + PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}") + + if (CMAKE_CXX_COMPILER_ID MATCHES "Clang") + foreach(t sentencepiece sentencepiece_train sentencepiece_train-static + spm_decode spm_encode spm_export_vocab spm_normalize spm_train) + set_property(TARGET ${t} APPEND_STRING PROPERTY COMPILE_FLAGS " -Wno-tautological-compare -Wno-unused") + if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 9.0) + set_property(TARGET ${t} APPEND_STRING PROPERTY COMPILE_FLAGS " -Wno-range-loop-construct") + endif() + # get_property(SENTENCEPIECE_COMPILE_FLAGS TARGET ${t} PROPERTY COMPILE_FLAGS) + # message("-- SENTENCPIECE: compile flags for target ${t}: ${SENTENCEPIECE_COMPILE_FLAGS}") + endforeach(t) + endif() if(USE_STATIC_LIBS) set(CMAKE_FIND_LIBRARY_SUFFIXES ${_ORIG_CMAKE_FIND_LIBRARY_SUFFIXES}) @@ -36,5 +84,66 @@ include_directories(./SQLiteCpp/include) include_directories(./CLI) include_directories(./pathie-cpp/include) +if (CMAKE_CXX_COMPILER_ID MATCHES "Clang") + #set_target_properties(SQLiteCpp PROPERTIES COMPILE_FLAGS + set_property(TARGET SQLiteCpp APPEND_STRING PROPERTY COMPILE_FLAGS + " -Wno-parentheses-equality -Wno-unused-value") + if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 9.0) + set_property(TARGET SQLiteCpp APPEND_STRING PROPERTY COMPILE_FLAGS + " -Wno-implicit-int-float-conversion") + endif() + set_property(TARGET libyaml-cpp APPEND_STRING PROPERTY COMPILE_FLAGS + " -fPIC -Wno-unused-value") + set_property(TARGET pathie-cpp APPEND_STRING PROPERTY COMPILE_FLAGS + " -fPIC -Wno-unused-value") +endif() + + + include_directories(./zlib) +include(ExternalProject) + +set(INSTALLS "") # this will contain a list of 3rd part dependencies that we install locally +if(CUDA_FOUND) + if(USE_NCCL) + + # disables compilation for sm_30 to avoid ptxas warning... that is general Kepler support. But K80s are supported for instance by sm_35 + + set(GENCODE "") + if(COMPILE_CUDA_SM35) + set(GENCODE "${GENCODE} -gencode=arch=compute_35,code=sm_35") + endif(COMPILE_CUDA_SM35) + if(COMPILE_CUDA_SM50) + set(GENCODE "${GENCODE} -gencode=arch=compute_50,code=sm_50") + endif(COMPILE_CUDA_SM50) + if(COMPILE_CUDA_SM60) + set(GENCODE "${GENCODE} -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_61,code=sm_61") + endif(COMPILE_CUDA_SM60) + if(COMPILE_CUDA_SM70) + set(GENCODE "${GENCODE} -gencode=arch=compute_70,code=sm_70") + endif(COMPILE_CUDA_SM70) + + # install nccl in ${CMAKE_BINARY_DIR}/local similar to /usr/local linux installation + ExternalProject_Add(nccl_install + SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/nccl + BINARY_DIR ${CMAKE_CURRENT_SOURCE_DIR}/nccl + CONFIGURE_COMMAND "" + BUILD_COMMAND + $(MAKE) -f ${CMAKE_CURRENT_SOURCE_DIR}/nccl/Makefile src.build + BUILDDIR=${CMAKE_BINARY_DIR}/local CUDA_HOME=${CUDA_TOOLKIT_ROOT_DIR} + CUDA8_GENCODE=${GENCODE} CXX=${CMAKE_CXX_COMPILER} + INSTALL_COMMAND "") + + set_target_properties(nccl PROPERTIES IMPORTED_LOCATION ${CMAKE_BINARY_DIR}/local/lib/libnccl_static.a) + add_dependencies(nccl nccl_install) + set(INSTALLS ${INSTALLS} nccl_install) + + endif(USE_NCCL) +endif(CUDA_FOUND) + +# @TODO: do the same for SentencePiece, Protobuf etc. +# make clean will clean "${CMAKE_BINARY_DIR}/local" +set_directory_properties(PROPERTY ADDITIONAL_MAKE_CLEAN_FILES ${CMAKE_BINARY_DIR}/local) + +add_custom_target(3rd_party_installs DEPENDS ${INSTALLS}) diff --git a/src/3rd_party/ExceptionWithCallStack.cpp b/src/3rd_party/ExceptionWithCallStack.cpp old mode 100755 new mode 100644 diff --git a/src/3rd_party/ExceptionWithCallStack.h b/src/3rd_party/ExceptionWithCallStack.h index 5b961bd9b..488b1277f 100644 --- a/src/3rd_party/ExceptionWithCallStack.h +++ b/src/3rd_party/ExceptionWithCallStack.h @@ -5,6 +5,8 @@ // ExceptionWithCallStack.h - debug util functions // +#pragma once + #include namespace Microsoft { namespace MSR { namespace CNTK { diff --git a/src/3rd_party/any_type.h b/src/3rd_party/any_type.h old mode 100755 new mode 100644 diff --git a/src/3rd_party/avx_mathfun.h b/src/3rd_party/avx_mathfun.h new file mode 100644 index 000000000..6840478c5 --- /dev/null +++ b/src/3rd_party/avx_mathfun.h @@ -0,0 +1,726 @@ +/* + AVX implementation of sin, cos, sincos, exp and log + + Based on "sse_mathfun.h", by Julien Pommier + http://gruntthepeon.free.fr/ssemath/ + + Copyright (C) 2012 Giovanni Garberoglio + Interdisciplinary Laboratory for Computational Science (LISC) + Fondazione Bruno Kessler and University of Trento + via Sommarive, 18 + I-38123 Trento (Italy) + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + 3. This notice may not be removed or altered from any source distribution. + + (this is the zlib license) +*/ + +#include + +/* yes I know, the top of this file is quite ugly */ +#ifdef _MSC_VER +# define ALIGN32_BEG __declspec(align(32)) +# define ALIGN32_END +#else /* gcc or icc */ +# define ALIGN32_BEG +# define ALIGN32_END __attribute__((aligned(32))) +#endif + +/* __m128 is ugly to write */ +typedef __m256 v8sf; // vector of 8 float (avx) +typedef __m256i v8si; // vector of 8 int (avx) +typedef __m128i v4si; // vector of 8 int (avx) + +#define _PI32AVX_CONST(Name, Val) \ + static const ALIGN32_BEG int _pi32avx_##Name[4] ALIGN32_END = { Val, Val, Val, Val } + +_PI32AVX_CONST(1, 1); +_PI32AVX_CONST(inv1, ~1); +_PI32AVX_CONST(2, 2); +_PI32AVX_CONST(4, 4); + + +/* declare some AVX constants -- why can't I figure a better way to do that? */ +#define _PS256_CONST(Name, Val) \ + static const ALIGN32_BEG float _ps256_##Name[8] ALIGN32_END = { (float)Val, (float)Val, (float)Val, (float)Val, (float)Val, (float)Val, (float)Val, (float)Val } +#define _PI32_CONST256(Name, Val) \ + static const ALIGN32_BEG int _pi32_256_##Name[8] ALIGN32_END = { Val, Val, Val, Val, Val, Val, Val, Val } +#define _PS256_CONST_TYPE(Name, Type, Val) \ + static const ALIGN32_BEG Type _ps256_##Name[8] ALIGN32_END = { Val, Val, Val, Val, Val, Val, Val, Val } + +_PS256_CONST(1 , 1.0f); +_PS256_CONST(0p5, 0.5f); +/* the smallest non denormalized float number */ +_PS256_CONST_TYPE(min_norm_pos, int, 0x00800000); +_PS256_CONST_TYPE(mant_mask, int, 0x7f800000); +_PS256_CONST_TYPE(inv_mant_mask, int, ~0x7f800000); + +_PS256_CONST_TYPE(sign_mask, int, (int)0x80000000); +_PS256_CONST_TYPE(inv_sign_mask, int, ~0x80000000); + +_PI32_CONST256(0, 0); +_PI32_CONST256(1, 1); +_PI32_CONST256(inv1, ~1); +_PI32_CONST256(2, 2); +_PI32_CONST256(4, 4); +_PI32_CONST256(0x7f, 0x7f); + +_PS256_CONST(cephes_SQRTHF, 0.707106781186547524); +_PS256_CONST(cephes_log_p0, 7.0376836292E-2); +_PS256_CONST(cephes_log_p1, - 1.1514610310E-1); +_PS256_CONST(cephes_log_p2, 1.1676998740E-1); +_PS256_CONST(cephes_log_p3, - 1.2420140846E-1); +_PS256_CONST(cephes_log_p4, + 1.4249322787E-1); +_PS256_CONST(cephes_log_p5, - 1.6668057665E-1); +_PS256_CONST(cephes_log_p6, + 2.0000714765E-1); +_PS256_CONST(cephes_log_p7, - 2.4999993993E-1); +_PS256_CONST(cephes_log_p8, + 3.3333331174E-1); +_PS256_CONST(cephes_log_q1, -2.12194440e-4); +_PS256_CONST(cephes_log_q2, 0.693359375); + +#ifndef __AVX2__ + +typedef union imm_xmm_union { + v8si imm; + v4si xmm[2]; +} imm_xmm_union; + +#define COPY_IMM_TO_XMM(imm_, xmm0_, xmm1_) { \ + ALIGN32_BEG imm_xmm_union u ALIGN32_END; \ + u.imm = imm_; \ + xmm0_ = u.xmm[0]; \ + xmm1_ = u.xmm[1]; \ +} + +#define COPY_XMM_TO_IMM(xmm0_, xmm1_, imm_) { \ + ALIGN32_BEG imm_xmm_union u ALIGN32_END; \ + u.xmm[0]=xmm0_; u.xmm[1]=xmm1_; imm_ = u.imm; \ +} + + +#define AVX2_BITOP_USING_SSE2(fn) \ +static inline v8si avx2_mm256_##fn(v8si x, int a) \ +{ \ + /* use SSE2 instruction to perform the bitop AVX2 */ \ + v4si x1, x2; \ + v8si ret; \ + COPY_IMM_TO_XMM(x, x1, x2); \ + x1 = _mm_##fn(x1,a); \ + x2 = _mm_##fn(x2,a); \ + COPY_XMM_TO_IMM(x1, x2, ret); \ + return(ret); \ +} + +//#warning "Using SSE2 to perform AVX2 bitshift ops" +AVX2_BITOP_USING_SSE2(slli_epi32) +AVX2_BITOP_USING_SSE2(srli_epi32) + +#define AVX2_INTOP_USING_SSE2(fn) \ +static inline v8si avx2_mm256_##fn(v8si x, v8si y) \ +{ \ + /* use SSE2 instructions to perform the AVX2 integer operation */ \ + v4si x1, x2; \ + v4si y1, y2; \ + v8si ret; \ + COPY_IMM_TO_XMM(x, x1, x2); \ + COPY_IMM_TO_XMM(y, y1, y2); \ + x1 = _mm_##fn(x1,y1); \ + x2 = _mm_##fn(x2,y2); \ + COPY_XMM_TO_IMM(x1, x2, ret); \ + return(ret); \ +} + +//#warning "Using SSE2 to perform AVX2 integer ops" +AVX2_INTOP_USING_SSE2(and_si128) +AVX2_INTOP_USING_SSE2(andnot_si128) +AVX2_INTOP_USING_SSE2(cmpeq_epi32) +AVX2_INTOP_USING_SSE2(sub_epi32) +AVX2_INTOP_USING_SSE2(add_epi32) +#define avx2_mm256_and_si256 avx2_mm256_and_si128 +#define avx2_mm256_andnot_si256 avx2_mm256_andnot_si128 +#else +#define avx2_mm256_slli_epi32 _mm256_slli_epi32 +#define avx2_mm256_srli_epi32 _mm256_srli_epi32 +#define avx2_mm256_and_si256 _mm256_and_si256 +#define avx2_mm256_andnot_si256 _mm256_andnot_si256 +#define avx2_mm256_cmpeq_epi32 _mm256_cmpeq_epi32 +#define avx2_mm256_sub_epi32 _mm256_sub_epi32 +#define avx2_mm256_add_epi32 _mm256_add_epi32 +#endif /* __AVX2__ */ + + +/* natural logarithm computed for 8 simultaneous float + return NaN for x <= 0 +*/ +static inline v8sf log256_ps(v8sf x) { + v8si imm0; + v8sf one = *(v8sf*)_ps256_1; + + //v8sf invalid_mask = _mm256_cmple_ps(x, _mm256_setzero_ps()); + v8sf invalid_mask = _mm256_cmp_ps(x, _mm256_setzero_ps(), _CMP_LE_OS); + + x = _mm256_max_ps(x, *(v8sf*)_ps256_min_norm_pos); /* cut off denormalized stuff */ + + // can be done with AVX2 + imm0 = avx2_mm256_srli_epi32(_mm256_castps_si256(x), 23); + + /* keep only the fractional part */ + x = _mm256_and_ps(x, *(v8sf*)_ps256_inv_mant_mask); + x = _mm256_or_ps(x, *(v8sf*)_ps256_0p5); + + // this is again another AVX2 instruction + imm0 = avx2_mm256_sub_epi32(imm0, *(v8si*)_pi32_256_0x7f); + v8sf e = _mm256_cvtepi32_ps(imm0); + + e = _mm256_add_ps(e, one); + + /* part2: + if( x < SQRTHF ) { + e -= 1; + x = x + x - 1.0; + } else { x = x - 1.0; } + */ + //v8sf mask = _mm256_cmplt_ps(x, *(v8sf*)_ps256_cephes_SQRTHF); + v8sf mask = _mm256_cmp_ps(x, *(v8sf*)_ps256_cephes_SQRTHF, _CMP_LT_OS); + v8sf tmp = _mm256_and_ps(x, mask); + x = _mm256_sub_ps(x, one); + e = _mm256_sub_ps(e, _mm256_and_ps(one, mask)); + x = _mm256_add_ps(x, tmp); + + v8sf z = _mm256_mul_ps(x,x); + + v8sf y = *(v8sf*)_ps256_cephes_log_p0; + y = _mm256_mul_ps(y, x); + y = _mm256_add_ps(y, *(v8sf*)_ps256_cephes_log_p1); + y = _mm256_mul_ps(y, x); + y = _mm256_add_ps(y, *(v8sf*)_ps256_cephes_log_p2); + y = _mm256_mul_ps(y, x); + y = _mm256_add_ps(y, *(v8sf*)_ps256_cephes_log_p3); + y = _mm256_mul_ps(y, x); + y = _mm256_add_ps(y, *(v8sf*)_ps256_cephes_log_p4); + y = _mm256_mul_ps(y, x); + y = _mm256_add_ps(y, *(v8sf*)_ps256_cephes_log_p5); + y = _mm256_mul_ps(y, x); + y = _mm256_add_ps(y, *(v8sf*)_ps256_cephes_log_p6); + y = _mm256_mul_ps(y, x); + y = _mm256_add_ps(y, *(v8sf*)_ps256_cephes_log_p7); + y = _mm256_mul_ps(y, x); + y = _mm256_add_ps(y, *(v8sf*)_ps256_cephes_log_p8); + y = _mm256_mul_ps(y, x); + + y = _mm256_mul_ps(y, z); + + tmp = _mm256_mul_ps(e, *(v8sf*)_ps256_cephes_log_q1); + y = _mm256_add_ps(y, tmp); + + + tmp = _mm256_mul_ps(z, *(v8sf*)_ps256_0p5); + y = _mm256_sub_ps(y, tmp); + + tmp = _mm256_mul_ps(e, *(v8sf*)_ps256_cephes_log_q2); + x = _mm256_add_ps(x, y); + x = _mm256_add_ps(x, tmp); + x = _mm256_or_ps(x, invalid_mask); // negative arg will be NAN + return x; +} + +_PS256_CONST(exp_hi, 88.3762626647949f); +_PS256_CONST(exp_lo, -88.3762626647949f); + +_PS256_CONST(cephes_LOG2EF, 1.44269504088896341); +_PS256_CONST(cephes_exp_C1, 0.693359375); +_PS256_CONST(cephes_exp_C2, -2.12194440e-4); + +_PS256_CONST(cephes_exp_p0, 1.9875691500E-4); +_PS256_CONST(cephes_exp_p1, 1.3981999507E-3); +_PS256_CONST(cephes_exp_p2, 8.3334519073E-3); +_PS256_CONST(cephes_exp_p3, 4.1665795894E-2); +_PS256_CONST(cephes_exp_p4, 1.6666665459E-1); +_PS256_CONST(cephes_exp_p5, 5.0000001201E-1); + +static inline v8sf exp256_ps(v8sf x) { + v8sf tmp = _mm256_setzero_ps(), fx; + v8si imm0; + v8sf one = *(v8sf*)_ps256_1; + + x = _mm256_min_ps(x, *(v8sf*)_ps256_exp_hi); + x = _mm256_max_ps(x, *(v8sf*)_ps256_exp_lo); + + /* express exp(x) as exp(g + n*log(2)) */ + fx = _mm256_mul_ps(x, *(v8sf*)_ps256_cephes_LOG2EF); + fx = _mm256_add_ps(fx, *(v8sf*)_ps256_0p5); + + /* how to perform a floorf with SSE: just below */ + //imm0 = _mm256_cvttps_epi32(fx); + //tmp = _mm256_cvtepi32_ps(imm0); + + tmp = _mm256_floor_ps(fx); + + /* if greater, substract 1 */ + //v8sf mask = _mm256_cmpgt_ps(tmp, fx); + v8sf mask = _mm256_cmp_ps(tmp, fx, _CMP_GT_OS); + mask = _mm256_and_ps(mask, one); + fx = _mm256_sub_ps(tmp, mask); + + tmp = _mm256_mul_ps(fx, *(v8sf*)_ps256_cephes_exp_C1); + v8sf z = _mm256_mul_ps(fx, *(v8sf*)_ps256_cephes_exp_C2); + x = _mm256_sub_ps(x, tmp); + x = _mm256_sub_ps(x, z); + + z = _mm256_mul_ps(x,x); + + v8sf y = *(v8sf*)_ps256_cephes_exp_p0; + y = _mm256_mul_ps(y, x); + y = _mm256_add_ps(y, *(v8sf*)_ps256_cephes_exp_p1); + y = _mm256_mul_ps(y, x); + y = _mm256_add_ps(y, *(v8sf*)_ps256_cephes_exp_p2); + y = _mm256_mul_ps(y, x); + y = _mm256_add_ps(y, *(v8sf*)_ps256_cephes_exp_p3); + y = _mm256_mul_ps(y, x); + y = _mm256_add_ps(y, *(v8sf*)_ps256_cephes_exp_p4); + y = _mm256_mul_ps(y, x); + y = _mm256_add_ps(y, *(v8sf*)_ps256_cephes_exp_p5); + y = _mm256_mul_ps(y, z); + y = _mm256_add_ps(y, x); + y = _mm256_add_ps(y, one); + + /* build 2^n */ + imm0 = _mm256_cvttps_epi32(fx); + // another two AVX2 instructions + imm0 = avx2_mm256_add_epi32(imm0, *(v8si*)_pi32_256_0x7f); + imm0 = avx2_mm256_slli_epi32(imm0, 23); + v8sf pow2n = _mm256_castsi256_ps(imm0); + y = _mm256_mul_ps(y, pow2n); + return y; +} + +_PS256_CONST(minus_cephes_DP1, -0.78515625); +_PS256_CONST(minus_cephes_DP2, -2.4187564849853515625e-4); +_PS256_CONST(minus_cephes_DP3, -3.77489497744594108e-8); +_PS256_CONST(sincof_p0, -1.9515295891E-4); +_PS256_CONST(sincof_p1, 8.3321608736E-3); +_PS256_CONST(sincof_p2, -1.6666654611E-1); +_PS256_CONST(coscof_p0, 2.443315711809948E-005); +_PS256_CONST(coscof_p1, -1.388731625493765E-003); +_PS256_CONST(coscof_p2, 4.166664568298827E-002); +_PS256_CONST(cephes_FOPI, 1.27323954473516); // 4 / M_PI + + +/* evaluation of 8 sines at onces using AVX intrisics + + The code is the exact rewriting of the cephes sinf function. + Precision is excellent as long as x < 8192 (I did not bother to + take into account the special handling they have for greater values + -- it does not return garbage for arguments over 8192, though, but + the extra precision is missing). + + Note that it is such that sinf((float)M_PI) = 8.74e-8, which is the + surprising but correct result. + +*/ +static inline v8sf sin256_ps(v8sf x) { // any x + v8sf xmm1, xmm2 = _mm256_setzero_ps(), xmm3, sign_bit, y; + v8si imm0, imm2; + +#ifndef __AVX2__ + v4si imm0_1, imm0_2; + v4si imm2_1, imm2_2; +#endif + + sign_bit = x; + /* take the absolute value */ + x = _mm256_and_ps(x, *(v8sf*)_ps256_inv_sign_mask); + /* extract the sign bit (upper one) */ + sign_bit = _mm256_and_ps(sign_bit, *(v8sf*)_ps256_sign_mask); + + /* scale by 4/Pi */ + y = _mm256_mul_ps(x, *(v8sf*)_ps256_cephes_FOPI); + + /* + Here we start a series of integer operations, which are in the + realm of AVX2. + If we don't have AVX, let's perform them using SSE2 directives + */ + +#ifdef __AVX2__ + /* store the integer part of y in mm0 */ + imm2 = _mm256_cvttps_epi32(y); + /* j=(j+1) & (~1) (see the cephes sources) */ + // another two AVX2 instruction + imm2 = avx2_mm256_add_epi32(imm2, *(v8si*)_pi32_256_1); + imm2 = avx2_mm256_and_si256(imm2, *(v8si*)_pi32_256_inv1); + y = _mm256_cvtepi32_ps(imm2); + + /* get the swap sign flag */ + imm0 = avx2_mm256_and_si256(imm2, *(v8si*)_pi32_256_4); + imm0 = avx2_mm256_slli_epi32(imm0, 29); + /* get the polynom selection mask + there is one polynom for 0 <= x <= Pi/4 + and another one for Pi/4 +# if TARGET_OS_OSX == 1 +# define CATCH_PLATFORM_MAC +# elif TARGET_OS_IPHONE == 1 +# define CATCH_PLATFORM_IPHONE +# endif + +#elif defined(linux) || defined(__linux) || defined(__linux__) +# define CATCH_PLATFORM_LINUX + +#elif defined(WIN32) || defined(__WIN32__) || defined(_WIN32) || defined(_MSC_VER) || defined(__MINGW32__) +# define CATCH_PLATFORM_WINDOWS #endif +// end catch_platform.h + #ifdef CATCH_IMPL # ifndef CLARA_CONFIG_MAIN # define CLARA_CONFIG_MAIN_NOT_DEFINED @@ -56,74 +90,59 @@ # endif #endif -// #included from: internal/catch_notimplemented_exception.h -#define TWOBLUECUBES_CATCH_NOTIMPLEMENTED_EXCEPTION_H_INCLUDED +// start catch_user_interfaces.h + +namespace Catch { + unsigned int rngSeed(); +} + +// end catch_user_interfaces.h +// start catch_tag_alias_autoregistrar.h -// #included from: catch_common.h -#define TWOBLUECUBES_CATCH_COMMON_H_INCLUDED +// start catch_common.h -// #included from: catch_compiler_capabilities.h -#define TWOBLUECUBES_CATCH_COMPILER_CAPABILITIES_HPP_INCLUDED +// start catch_compiler_capabilities.h -// Detect a number of compiler features - mostly C++11/14 conformance - by compiler +// Detect a number of compiler features - by compiler // The following features are defined: // -// CATCH_CONFIG_CPP11_NULLPTR : is nullptr supported? -// CATCH_CONFIG_CPP11_NOEXCEPT : is noexcept supported? -// CATCH_CONFIG_CPP11_GENERATED_METHODS : The delete and default keywords for compiler generated methods -// CATCH_CONFIG_CPP11_IS_ENUM : std::is_enum is supported? -// CATCH_CONFIG_CPP11_TUPLE : std::tuple is supported -// CATCH_CONFIG_CPP11_LONG_LONG : is long long supported? -// CATCH_CONFIG_CPP11_OVERRIDE : is override supported? -// CATCH_CONFIG_CPP11_UNIQUE_PTR : is unique_ptr supported (otherwise use auto_ptr) -// CATCH_CONFIG_CPP11_SHUFFLE : is std::shuffle supported? -// CATCH_CONFIG_CPP11_TYPE_TRAITS : are type_traits and enable_if supported? - -// CATCH_CONFIG_CPP11_OR_GREATER : Is C++11 supported? - -// CATCH_CONFIG_VARIADIC_MACROS : are variadic macros supported? // CATCH_CONFIG_COUNTER : is the __COUNTER__ macro supported? // CATCH_CONFIG_WINDOWS_SEH : is Windows SEH supported? // CATCH_CONFIG_POSIX_SIGNALS : are POSIX signals supported? +// CATCH_CONFIG_DISABLE_EXCEPTIONS : Are exceptions enabled? // **************** // Note to maintainers: if new toggles are added please document them // in configuration.md, too // **************** // In general each macro has a _NO_ form -// (e.g. CATCH_CONFIG_CPP11_NO_NULLPTR) which disables the feature. +// (e.g. CATCH_CONFIG_NO_POSIX_SIGNALS) which disables the feature. // Many features, at point of detection, define an _INTERNAL_ macro, so they // can be combined, en-mass, with the _NO_ forms later. -// All the C++11 features can be disabled with CATCH_CONFIG_NO_CPP11 - #ifdef __cplusplus -# if __cplusplus >= 201103L -# define CATCH_CPP11_OR_GREATER +# if (__cplusplus >= 201402L) || (defined(_MSVC_LANG) && _MSVC_LANG >= 201402L) +# define CATCH_CPP14_OR_GREATER # endif -# if __cplusplus >= 201402L -# define CATCH_CPP14_OR_GREATER +# if (__cplusplus >= 201703L) || (defined(_MSVC_LANG) && _MSVC_LANG >= 201703L) +# define CATCH_CPP17_OR_GREATER # endif #endif -#ifdef __clang__ - -# if __has_feature(cxx_nullptr) -# define CATCH_INTERNAL_CONFIG_CPP11_NULLPTR -# endif +#if defined(CATCH_CPP17_OR_GREATER) +# define CATCH_INTERNAL_CONFIG_CPP17_UNCAUGHT_EXCEPTIONS +#endif -# if __has_feature(cxx_noexcept) -# define CATCH_INTERNAL_CONFIG_CPP11_NOEXCEPT -# endif +#ifdef __clang__ -# if defined(CATCH_CPP11_OR_GREATER) -# define CATCH_INTERNAL_SUPPRESS_ETD_WARNINGS \ +# define CATCH_INTERNAL_SUPPRESS_GLOBALS_WARNINGS \ _Pragma( "clang diagnostic push" ) \ - _Pragma( "clang diagnostic ignored \"-Wexit-time-destructors\"" ) -# define CATCH_INTERNAL_UNSUPPRESS_ETD_WARNINGS \ + _Pragma( "clang diagnostic ignored \"-Wexit-time-destructors\"" ) \ + _Pragma( "clang diagnostic ignored \"-Wglobal-constructors\"") +# define CATCH_INTERNAL_UNSUPPRESS_GLOBALS_WARNINGS \ _Pragma( "clang diagnostic pop" ) # define CATCH_INTERNAL_SUPPRESS_PARENTHESES_WARNINGS \ @@ -131,238 +150,298 @@ _Pragma( "clang diagnostic ignored \"-Wparentheses\"" ) # define CATCH_INTERNAL_UNSUPPRESS_PARENTHESES_WARNINGS \ _Pragma( "clang diagnostic pop" ) -# endif -#endif // __clang__ +# define CATCH_INTERNAL_SUPPRESS_UNUSED_WARNINGS \ + _Pragma( "clang diagnostic push" ) \ + _Pragma( "clang diagnostic ignored \"-Wunused-variable\"" ) +# define CATCH_INTERNAL_UNSUPPRESS_UNUSED_WARNINGS \ + _Pragma( "clang diagnostic pop" ) -//////////////////////////////////////////////////////////////////////////////// -// We know some environments not to support full POSIX signals -#if defined(__CYGWIN__) || defined(__QNX__) +# define CATCH_INTERNAL_SUPPRESS_ZERO_VARIADIC_WARNINGS \ + _Pragma( "clang diagnostic push" ) \ + _Pragma( "clang diagnostic ignored \"-Wgnu-zero-variadic-macro-arguments\"" ) +# define CATCH_INTERNAL_UNSUPPRESS_ZERO_VARIADIC_WARNINGS \ + _Pragma( "clang diagnostic pop" ) -# if !defined(CATCH_CONFIG_POSIX_SIGNALS) -# define CATCH_INTERNAL_CONFIG_NO_POSIX_SIGNALS -# endif +# define CATCH_INTERNAL_SUPPRESS_UNUSED_TEMPLATE_WARNINGS \ + _Pragma( "clang diagnostic push" ) \ + _Pragma( "clang diagnostic ignored \"-Wunused-template\"" ) +# define CATCH_INTERNAL_UNSUPPRESS_UNUSED_TEMPLATE_WARNINGS \ + _Pragma( "clang diagnostic pop" ) +#endif // __clang__ +//////////////////////////////////////////////////////////////////////////////// +// Assume that non-Windows platforms support posix signals by default +#if !defined(CATCH_PLATFORM_WINDOWS) + #define CATCH_INTERNAL_CONFIG_POSIX_SIGNALS #endif //////////////////////////////////////////////////////////////////////////////// -// Cygwin -#ifdef __CYGWIN__ - -// Required for some versions of Cygwin to declare gettimeofday -// see: http://stackoverflow.com/questions/36901803/gettimeofday-not-declared-in-this-scope-cygwin -# define _BSD_SOURCE +// We know some environments not to support full POSIX signals +#if defined(__CYGWIN__) || defined(__QNX__) || defined(__EMSCRIPTEN__) || defined(__DJGPP__) + #define CATCH_INTERNAL_CONFIG_NO_POSIX_SIGNALS +#endif -#endif // __CYGWIN__ +#ifdef __OS400__ +# define CATCH_INTERNAL_CONFIG_NO_POSIX_SIGNALS +# define CATCH_CONFIG_COLOUR_NONE +#endif //////////////////////////////////////////////////////////////////////////////// -// Borland -#ifdef __BORLANDC__ - -#endif // __BORLANDC__ +// Android somehow still does not support std::to_string +#if defined(__ANDROID__) +# define CATCH_INTERNAL_CONFIG_NO_CPP11_TO_STRING +# define CATCH_INTERNAL_CONFIG_ANDROID_LOGWRITE +#endif //////////////////////////////////////////////////////////////////////////////// -// EDG -#ifdef __EDG_VERSION__ - -#endif // __EDG_VERSION__ +// Not all Windows environments support SEH properly +#if defined(__MINGW32__) +# define CATCH_INTERNAL_CONFIG_NO_WINDOWS_SEH +#endif //////////////////////////////////////////////////////////////////////////////// -// Digital Mars -#ifdef __DMC__ - -#endif // __DMC__ +// PS4 +#if defined(__ORBIS__) +# define CATCH_INTERNAL_CONFIG_NO_NEW_CAPTURE +#endif //////////////////////////////////////////////////////////////////////////////// -// GCC -#ifdef __GNUC__ +// Cygwin +#ifdef __CYGWIN__ -# if __GNUC__ == 4 && __GNUC_MINOR__ >= 6 && defined(__GXX_EXPERIMENTAL_CXX0X__) -# define CATCH_INTERNAL_CONFIG_CPP11_NULLPTR -# endif +// Required for some versions of Cygwin to declare gettimeofday +// see: http://stackoverflow.com/questions/36901803/gettimeofday-not-declared-in-this-scope-cygwin +# define _BSD_SOURCE +// some versions of cygwin (most) do not support std::to_string. Use the libstd check. +// https://gcc.gnu.org/onlinedocs/gcc-4.8.2/libstdc++/api/a01053_source.html line 2812-2813 +# if !((__cplusplus >= 201103L) && defined(_GLIBCXX_USE_C99) \ + && !defined(_GLIBCXX_HAVE_BROKEN_VSWPRINTF)) -// - otherwise more recent versions define __cplusplus >= 201103L -// and will get picked up below +# define CATCH_INTERNAL_CONFIG_NO_CPP11_TO_STRING -#endif // __GNUC__ +# endif +#endif // __CYGWIN__ //////////////////////////////////////////////////////////////////////////////// // Visual C++ #ifdef _MSC_VER -#define CATCH_INTERNAL_CONFIG_WINDOWS_SEH +# if _MSC_VER >= 1900 // Visual Studio 2015 or newer +# define CATCH_INTERNAL_CONFIG_CPP17_UNCAUGHT_EXCEPTIONS +# endif -#if (_MSC_VER >= 1600) -# define CATCH_INTERNAL_CONFIG_CPP11_NULLPTR -# define CATCH_INTERNAL_CONFIG_CPP11_UNIQUE_PTR -#endif +// Universal Windows platform does not support SEH +// Or console colours (or console at all...) +# if defined(WINAPI_FAMILY) && (WINAPI_FAMILY == WINAPI_FAMILY_APP) +# define CATCH_CONFIG_COLOUR_NONE +# else +# define CATCH_INTERNAL_CONFIG_WINDOWS_SEH +# endif -#if (_MSC_VER >= 1900 ) // (VC++ 13 (VS2015)) -#define CATCH_INTERNAL_CONFIG_CPP11_NOEXCEPT -#define CATCH_INTERNAL_CONFIG_CPP11_GENERATED_METHODS -#define CATCH_INTERNAL_CONFIG_CPP11_SHUFFLE -#define CATCH_INTERNAL_CONFIG_CPP11_TYPE_TRAITS -#endif +// MSVC traditional preprocessor needs some workaround for __VA_ARGS__ +// _MSVC_TRADITIONAL == 0 means new conformant preprocessor +// _MSVC_TRADITIONAL == 1 means old traditional non-conformant preprocessor +# if !defined(_MSVC_TRADITIONAL) || (defined(_MSVC_TRADITIONAL) && _MSVC_TRADITIONAL) +# define CATCH_INTERNAL_CONFIG_TRADITIONAL_MSVC_PREPROCESSOR +# endif +#endif // _MSC_VER +#if defined(_REENTRANT) || defined(_MSC_VER) +// Enable async processing, as -pthread is specified or no additional linking is required +# define CATCH_INTERNAL_CONFIG_USE_ASYNC #endif // _MSC_VER //////////////////////////////////////////////////////////////////////////////// - -// Use variadic macros if the compiler supports them -#if ( defined _MSC_VER && _MSC_VER > 1400 && !defined __EDGE__) || \ - ( defined __WAVE__ && __WAVE_HAS_VARIADICS ) || \ - ( defined __GNUC__ && __GNUC__ >= 3 ) || \ - ( !defined __cplusplus && __STDC_VERSION__ >= 199901L || __cplusplus >= 201103L ) - -#define CATCH_INTERNAL_CONFIG_VARIADIC_MACROS - +// Check if we are compiled with -fno-exceptions or equivalent +#if defined(__EXCEPTIONS) || defined(__cpp_exceptions) || defined(_CPPUNWIND) +# define CATCH_INTERNAL_CONFIG_EXCEPTIONS_ENABLED #endif -// Use __COUNTER__ if the compiler supports it -#if ( defined _MSC_VER && _MSC_VER >= 1300 ) || \ - ( defined __GNUC__ && __GNUC__ >= 4 && __GNUC_MINOR__ >= 3 ) || \ - ( defined __clang__ && __clang_major__ >= 3 ) - -#define CATCH_INTERNAL_CONFIG_COUNTER +//////////////////////////////////////////////////////////////////////////////// +// DJGPP +#ifdef __DJGPP__ +# define CATCH_INTERNAL_CONFIG_NO_WCHAR +#endif // __DJGPP__ +//////////////////////////////////////////////////////////////////////////////// +// Embarcadero C++Build +#if defined(__BORLANDC__) + #define CATCH_INTERNAL_CONFIG_POLYFILL_ISNAN #endif //////////////////////////////////////////////////////////////////////////////// -// C++ language feature support - -// catch all support for C++11 -#if defined(CATCH_CPP11_OR_GREATER) - -# if !defined(CATCH_INTERNAL_CONFIG_CPP11_NULLPTR) -# define CATCH_INTERNAL_CONFIG_CPP11_NULLPTR -# endif -# ifndef CATCH_INTERNAL_CONFIG_CPP11_NOEXCEPT -# define CATCH_INTERNAL_CONFIG_CPP11_NOEXCEPT -# endif - -# ifndef CATCH_INTERNAL_CONFIG_CPP11_GENERATED_METHODS -# define CATCH_INTERNAL_CONFIG_CPP11_GENERATED_METHODS -# endif - -# ifndef CATCH_INTERNAL_CONFIG_CPP11_IS_ENUM -# define CATCH_INTERNAL_CONFIG_CPP11_IS_ENUM -# endif - -# ifndef CATCH_INTERNAL_CONFIG_CPP11_TUPLE -# define CATCH_INTERNAL_CONFIG_CPP11_TUPLE -# endif +// Use of __COUNTER__ is suppressed during code analysis in +// CLion/AppCode 2017.2.x and former, because __COUNTER__ is not properly +// handled by it. +// Otherwise all supported compilers support COUNTER macro, +// but user still might want to turn it off +#if ( !defined(__JETBRAINS_IDE__) || __JETBRAINS_IDE__ >= 20170300L ) + #define CATCH_INTERNAL_CONFIG_COUNTER +#endif -# ifndef CATCH_INTERNAL_CONFIG_VARIADIC_MACROS -# define CATCH_INTERNAL_CONFIG_VARIADIC_MACROS -# endif +//////////////////////////////////////////////////////////////////////////////// -# if !defined(CATCH_INTERNAL_CONFIG_CPP11_LONG_LONG) -# define CATCH_INTERNAL_CONFIG_CPP11_LONG_LONG -# endif +// RTX is a special version of Windows that is real time. +// This means that it is detected as Windows, but does not provide +// the same set of capabilities as real Windows does. +#if defined(UNDER_RTSS) || defined(RTX64_BUILD) + #define CATCH_INTERNAL_CONFIG_NO_WINDOWS_SEH + #define CATCH_INTERNAL_CONFIG_NO_ASYNC + #define CATCH_CONFIG_COLOUR_NONE +#endif -# if !defined(CATCH_INTERNAL_CONFIG_CPP11_OVERRIDE) -# define CATCH_INTERNAL_CONFIG_CPP11_OVERRIDE -# endif -# if !defined(CATCH_INTERNAL_CONFIG_CPP11_UNIQUE_PTR) -# define CATCH_INTERNAL_CONFIG_CPP11_UNIQUE_PTR -# endif -# if !defined(CATCH_INTERNAL_CONFIG_CPP11_SHUFFLE) -# define CATCH_INTERNAL_CONFIG_CPP11_SHUFFLE -# endif -# if !defined(CATCH_INTERNAL_CONFIG_CPP11_TYPE_TRAITS) -# define CATCH_INTERNAL_CONFIG_CPP11_TYPE_TRAITS -# endif +#if defined(__UCLIBC__) +#define CATCH_INTERNAL_CONFIG_GLOBAL_NEXTAFTER +#endif -#endif // __cplusplus >= 201103L +// Various stdlib support checks that require __has_include +#if defined(__has_include) + // Check if string_view is available and usable + #if __has_include() && defined(CATCH_CPP17_OR_GREATER) + # define CATCH_INTERNAL_CONFIG_CPP17_STRING_VIEW + #endif + + // Check if optional is available and usable + # if __has_include() && defined(CATCH_CPP17_OR_GREATER) + # define CATCH_INTERNAL_CONFIG_CPP17_OPTIONAL + # endif // __has_include() && defined(CATCH_CPP17_OR_GREATER) + + // Check if byte is available and usable + # if __has_include() && defined(CATCH_CPP17_OR_GREATER) + # define CATCH_INTERNAL_CONFIG_CPP17_BYTE + # endif // __has_include() && defined(CATCH_CPP17_OR_GREATER) + + // Check if variant is available and usable + # if __has_include() && defined(CATCH_CPP17_OR_GREATER) + # if defined(__clang__) && (__clang_major__ < 8) + // work around clang bug with libstdc++ https://bugs.llvm.org/show_bug.cgi?id=31852 + // fix should be in clang 8, workaround in libstdc++ 8.2 + # include + # if defined(__GLIBCXX__) && defined(_GLIBCXX_RELEASE) && (_GLIBCXX_RELEASE < 9) + # define CATCH_CONFIG_NO_CPP17_VARIANT + # else + # define CATCH_INTERNAL_CONFIG_CPP17_VARIANT + # endif // defined(__GLIBCXX__) && defined(_GLIBCXX_RELEASE) && (_GLIBCXX_RELEASE < 9) + # else + # define CATCH_INTERNAL_CONFIG_CPP17_VARIANT + # endif // defined(__clang__) && (__clang_major__ < 8) + # endif // __has_include() && defined(CATCH_CPP17_OR_GREATER) +#endif // defined(__has_include) + +#if defined(CATCH_INTERNAL_CONFIG_COUNTER) && !defined(CATCH_CONFIG_NO_COUNTER) && !defined(CATCH_CONFIG_COUNTER) +# define CATCH_CONFIG_COUNTER +#endif +#if defined(CATCH_INTERNAL_CONFIG_WINDOWS_SEH) && !defined(CATCH_CONFIG_NO_WINDOWS_SEH) && !defined(CATCH_CONFIG_WINDOWS_SEH) && !defined(CATCH_INTERNAL_CONFIG_NO_WINDOWS_SEH) +# define CATCH_CONFIG_WINDOWS_SEH +#endif +// This is set by default, because we assume that unix compilers are posix-signal-compatible by default. +#if defined(CATCH_INTERNAL_CONFIG_POSIX_SIGNALS) && !defined(CATCH_INTERNAL_CONFIG_NO_POSIX_SIGNALS) && !defined(CATCH_CONFIG_NO_POSIX_SIGNALS) && !defined(CATCH_CONFIG_POSIX_SIGNALS) +# define CATCH_CONFIG_POSIX_SIGNALS +#endif +// This is set by default, because we assume that compilers with no wchar_t support are just rare exceptions. +#if !defined(CATCH_INTERNAL_CONFIG_NO_WCHAR) && !defined(CATCH_CONFIG_NO_WCHAR) && !defined(CATCH_CONFIG_WCHAR) +# define CATCH_CONFIG_WCHAR +#endif -// Now set the actual defines based on the above + anything the user has configured -#if defined(CATCH_INTERNAL_CONFIG_CPP11_NULLPTR) && !defined(CATCH_CONFIG_CPP11_NO_NULLPTR) && !defined(CATCH_CONFIG_CPP11_NULLPTR) && !defined(CATCH_CONFIG_NO_CPP11) -# define CATCH_CONFIG_CPP11_NULLPTR +#if !defined(CATCH_INTERNAL_CONFIG_NO_CPP11_TO_STRING) && !defined(CATCH_CONFIG_NO_CPP11_TO_STRING) && !defined(CATCH_CONFIG_CPP11_TO_STRING) +# define CATCH_CONFIG_CPP11_TO_STRING #endif -#if defined(CATCH_INTERNAL_CONFIG_CPP11_NOEXCEPT) && !defined(CATCH_CONFIG_CPP11_NO_NOEXCEPT) && !defined(CATCH_CONFIG_CPP11_NOEXCEPT) && !defined(CATCH_CONFIG_NO_CPP11) -# define CATCH_CONFIG_CPP11_NOEXCEPT + +#if defined(CATCH_INTERNAL_CONFIG_CPP17_OPTIONAL) && !defined(CATCH_CONFIG_NO_CPP17_OPTIONAL) && !defined(CATCH_CONFIG_CPP17_OPTIONAL) +# define CATCH_CONFIG_CPP17_OPTIONAL #endif -#if defined(CATCH_INTERNAL_CONFIG_CPP11_GENERATED_METHODS) && !defined(CATCH_CONFIG_CPP11_NO_GENERATED_METHODS) && !defined(CATCH_CONFIG_CPP11_GENERATED_METHODS) && !defined(CATCH_CONFIG_NO_CPP11) -# define CATCH_CONFIG_CPP11_GENERATED_METHODS + +#if defined(CATCH_INTERNAL_CONFIG_CPP17_UNCAUGHT_EXCEPTIONS) && !defined(CATCH_CONFIG_NO_CPP17_UNCAUGHT_EXCEPTIONS) && !defined(CATCH_CONFIG_CPP17_UNCAUGHT_EXCEPTIONS) +# define CATCH_CONFIG_CPP17_UNCAUGHT_EXCEPTIONS #endif -#if defined(CATCH_INTERNAL_CONFIG_CPP11_IS_ENUM) && !defined(CATCH_CONFIG_CPP11_NO_IS_ENUM) && !defined(CATCH_CONFIG_CPP11_IS_ENUM) && !defined(CATCH_CONFIG_NO_CPP11) -# define CATCH_CONFIG_CPP11_IS_ENUM + +#if defined(CATCH_INTERNAL_CONFIG_CPP17_STRING_VIEW) && !defined(CATCH_CONFIG_NO_CPP17_STRING_VIEW) && !defined(CATCH_CONFIG_CPP17_STRING_VIEW) +# define CATCH_CONFIG_CPP17_STRING_VIEW #endif -#if defined(CATCH_INTERNAL_CONFIG_CPP11_TUPLE) && !defined(CATCH_CONFIG_CPP11_NO_TUPLE) && !defined(CATCH_CONFIG_CPP11_TUPLE) && !defined(CATCH_CONFIG_NO_CPP11) -# define CATCH_CONFIG_CPP11_TUPLE + +#if defined(CATCH_INTERNAL_CONFIG_CPP17_VARIANT) && !defined(CATCH_CONFIG_NO_CPP17_VARIANT) && !defined(CATCH_CONFIG_CPP17_VARIANT) +# define CATCH_CONFIG_CPP17_VARIANT #endif -#if defined(CATCH_INTERNAL_CONFIG_VARIADIC_MACROS) && !defined(CATCH_CONFIG_NO_VARIADIC_MACROS) && !defined(CATCH_CONFIG_VARIADIC_MACROS) -# define CATCH_CONFIG_VARIADIC_MACROS + +#if defined(CATCH_INTERNAL_CONFIG_CPP17_BYTE) && !defined(CATCH_CONFIG_NO_CPP17_BYTE) && !defined(CATCH_CONFIG_CPP17_BYTE) +# define CATCH_CONFIG_CPP17_BYTE #endif -#if defined(CATCH_INTERNAL_CONFIG_CPP11_LONG_LONG) && !defined(CATCH_CONFIG_CPP11_NO_LONG_LONG) && !defined(CATCH_CONFIG_CPP11_LONG_LONG) && !defined(CATCH_CONFIG_NO_CPP11) -# define CATCH_CONFIG_CPP11_LONG_LONG + +#if defined(CATCH_CONFIG_EXPERIMENTAL_REDIRECT) +# define CATCH_INTERNAL_CONFIG_NEW_CAPTURE #endif -#if defined(CATCH_INTERNAL_CONFIG_CPP11_OVERRIDE) && !defined(CATCH_CONFIG_CPP11_NO_OVERRIDE) && !defined(CATCH_CONFIG_CPP11_OVERRIDE) && !defined(CATCH_CONFIG_NO_CPP11) -# define CATCH_CONFIG_CPP11_OVERRIDE + +#if defined(CATCH_INTERNAL_CONFIG_NEW_CAPTURE) && !defined(CATCH_INTERNAL_CONFIG_NO_NEW_CAPTURE) && !defined(CATCH_CONFIG_NO_NEW_CAPTURE) && !defined(CATCH_CONFIG_NEW_CAPTURE) +# define CATCH_CONFIG_NEW_CAPTURE #endif -#if defined(CATCH_INTERNAL_CONFIG_CPP11_UNIQUE_PTR) && !defined(CATCH_CONFIG_CPP11_NO_UNIQUE_PTR) && !defined(CATCH_CONFIG_CPP11_UNIQUE_PTR) && !defined(CATCH_CONFIG_NO_CPP11) -# define CATCH_CONFIG_CPP11_UNIQUE_PTR + +#if !defined(CATCH_INTERNAL_CONFIG_EXCEPTIONS_ENABLED) && !defined(CATCH_CONFIG_DISABLE_EXCEPTIONS) +# define CATCH_CONFIG_DISABLE_EXCEPTIONS #endif -// Use of __COUNTER__ is suppressed if __JETBRAINS_IDE__ is #defined (meaning we're being parsed by a JetBrains IDE for -// analytics) because, at time of writing, __COUNTER__ is not properly handled by it. -// This does not affect compilation -#if defined(CATCH_INTERNAL_CONFIG_COUNTER) && !defined(CATCH_CONFIG_NO_COUNTER) && !defined(CATCH_CONFIG_COUNTER) && !defined(__JETBRAINS_IDE__) -# define CATCH_CONFIG_COUNTER + +#if defined(CATCH_INTERNAL_CONFIG_POLYFILL_ISNAN) && !defined(CATCH_CONFIG_NO_POLYFILL_ISNAN) && !defined(CATCH_CONFIG_POLYFILL_ISNAN) +# define CATCH_CONFIG_POLYFILL_ISNAN #endif -#if defined(CATCH_INTERNAL_CONFIG_CPP11_SHUFFLE) && !defined(CATCH_CONFIG_CPP11_NO_SHUFFLE) && !defined(CATCH_CONFIG_CPP11_SHUFFLE) && !defined(CATCH_CONFIG_NO_CPP11) -# define CATCH_CONFIG_CPP11_SHUFFLE + +#if defined(CATCH_INTERNAL_CONFIG_USE_ASYNC) && !defined(CATCH_INTERNAL_CONFIG_NO_ASYNC) && !defined(CATCH_CONFIG_NO_USE_ASYNC) && !defined(CATCH_CONFIG_USE_ASYNC) +# define CATCH_CONFIG_USE_ASYNC #endif -# if defined(CATCH_INTERNAL_CONFIG_CPP11_TYPE_TRAITS) && !defined(CATCH_CONFIG_CPP11_NO_TYPE_TRAITS) && !defined(CATCH_CONFIG_CPP11_TYPE_TRAITS) && !defined(CATCH_CONFIG_NO_CPP11) -# define CATCH_CONFIG_CPP11_TYPE_TRAITS -# endif -#if defined(CATCH_INTERNAL_CONFIG_WINDOWS_SEH) && !defined(CATCH_CONFIG_NO_WINDOWS_SEH) && !defined(CATCH_CONFIG_WINDOWS_SEH) -# define CATCH_CONFIG_WINDOWS_SEH + +#if defined(CATCH_INTERNAL_CONFIG_ANDROID_LOGWRITE) && !defined(CATCH_CONFIG_NO_ANDROID_LOGWRITE) && !defined(CATCH_CONFIG_ANDROID_LOGWRITE) +# define CATCH_CONFIG_ANDROID_LOGWRITE #endif -// This is set by default, because we assume that unix compilers are posix-signal-compatible by default. -#if !defined(CATCH_INTERNAL_CONFIG_NO_POSIX_SIGNALS) && !defined(CATCH_CONFIG_NO_POSIX_SIGNALS) && !defined(CATCH_CONFIG_POSIX_SIGNALS) -# define CATCH_CONFIG_POSIX_SIGNALS + +#if defined(CATCH_INTERNAL_CONFIG_GLOBAL_NEXTAFTER) && !defined(CATCH_CONFIG_NO_GLOBAL_NEXTAFTER) && !defined(CATCH_CONFIG_GLOBAL_NEXTAFTER) +# define CATCH_CONFIG_GLOBAL_NEXTAFTER #endif #if !defined(CATCH_INTERNAL_SUPPRESS_PARENTHESES_WARNINGS) # define CATCH_INTERNAL_SUPPRESS_PARENTHESES_WARNINGS # define CATCH_INTERNAL_UNSUPPRESS_PARENTHESES_WARNINGS #endif -#if !defined(CATCH_INTERNAL_SUPPRESS_ETD_WARNINGS) -# define CATCH_INTERNAL_SUPPRESS_ETD_WARNINGS -# define CATCH_INTERNAL_UNSUPPRESS_ETD_WARNINGS +#if !defined(CATCH_INTERNAL_SUPPRESS_GLOBALS_WARNINGS) +# define CATCH_INTERNAL_SUPPRESS_GLOBALS_WARNINGS +# define CATCH_INTERNAL_UNSUPPRESS_GLOBALS_WARNINGS +#endif +#if !defined(CATCH_INTERNAL_SUPPRESS_UNUSED_WARNINGS) +# define CATCH_INTERNAL_SUPPRESS_UNUSED_WARNINGS +# define CATCH_INTERNAL_UNSUPPRESS_UNUSED_WARNINGS +#endif +#if !defined(CATCH_INTERNAL_SUPPRESS_ZERO_VARIADIC_WARNINGS) +# define CATCH_INTERNAL_SUPPRESS_ZERO_VARIADIC_WARNINGS +# define CATCH_INTERNAL_UNSUPPRESS_ZERO_VARIADIC_WARNINGS #endif -// noexcept support: -#if defined(CATCH_CONFIG_CPP11_NOEXCEPT) && !defined(CATCH_NOEXCEPT) -# define CATCH_NOEXCEPT noexcept -# define CATCH_NOEXCEPT_IS(x) noexcept(x) -#else -# define CATCH_NOEXCEPT throw() -# define CATCH_NOEXCEPT_IS(x) +#if defined(__APPLE__) && defined(__apple_build_version__) && (__clang_major__ < 10) +# undef CATCH_INTERNAL_SUPPRESS_UNUSED_TEMPLATE_WARNINGS +# undef CATCH_INTERNAL_UNSUPPRESS_UNUSED_TEMPLATE_WARNINGS +#elif defined(__clang__) && (__clang_major__ < 5) +# undef CATCH_INTERNAL_SUPPRESS_UNUSED_TEMPLATE_WARNINGS +# undef CATCH_INTERNAL_UNSUPPRESS_UNUSED_TEMPLATE_WARNINGS #endif -// nullptr support -#ifdef CATCH_CONFIG_CPP11_NULLPTR -# define CATCH_NULL nullptr -#else -# define CATCH_NULL NULL +#if !defined(CATCH_INTERNAL_SUPPRESS_UNUSED_TEMPLATE_WARNINGS) +# define CATCH_INTERNAL_SUPPRESS_UNUSED_TEMPLATE_WARNINGS +# define CATCH_INTERNAL_UNSUPPRESS_UNUSED_TEMPLATE_WARNINGS #endif -// override support -#ifdef CATCH_CONFIG_CPP11_OVERRIDE -# define CATCH_OVERRIDE override +#if defined(CATCH_CONFIG_DISABLE_EXCEPTIONS) +#define CATCH_TRY if ((true)) +#define CATCH_CATCH_ALL if ((false)) +#define CATCH_CATCH_ANON(type) if ((false)) #else -# define CATCH_OVERRIDE +#define CATCH_TRY try +#define CATCH_CATCH_ALL catch (...) +#define CATCH_CATCH_ANON(type) catch (type) #endif -// unique_ptr support -#ifdef CATCH_CONFIG_CPP11_UNIQUE_PTR -# define CATCH_AUTO_PTR( T ) std::unique_ptr -#else -# define CATCH_AUTO_PTR( T ) std::auto_ptr +#if defined(CATCH_INTERNAL_CONFIG_TRADITIONAL_MSVC_PREPROCESSOR) && !defined(CATCH_CONFIG_NO_TRADITIONAL_MSVC_PREPROCESSOR) && !defined(CATCH_CONFIG_TRADITIONAL_MSVC_PREPROCESSOR) +#define CATCH_CONFIG_TRADITIONAL_MSVC_PREPROCESSOR #endif +// end catch_compiler_capabilities.h #define INTERNAL_CATCH_UNIQUE_NAME_LINE2( name, line ) name##line #define INTERNAL_CATCH_UNIQUE_NAME_LINE( name, line ) INTERNAL_CATCH_UNIQUE_NAME_LINE2( name, line ) #ifdef CATCH_CONFIG_COUNTER @@ -371,95 +450,48 @@ # define INTERNAL_CATCH_UNIQUE_NAME( name ) INTERNAL_CATCH_UNIQUE_NAME_LINE( name, __LINE__ ) #endif -#define INTERNAL_CATCH_STRINGIFY2( expr ) #expr -#define INTERNAL_CATCH_STRINGIFY( expr ) INTERNAL_CATCH_STRINGIFY2( expr ) +#include +#include +#include -#include -#include +// We need a dummy global operator<< so we can bring it into Catch namespace later +struct Catch_global_namespace_dummy {}; +std::ostream& operator<<(std::ostream&, Catch_global_namespace_dummy); namespace Catch { - struct IConfig; - struct CaseSensitive { enum Choice { Yes, No }; }; class NonCopyable { -#ifdef CATCH_CONFIG_CPP11_GENERATED_METHODS NonCopyable( NonCopyable const& ) = delete; NonCopyable( NonCopyable && ) = delete; NonCopyable& operator = ( NonCopyable const& ) = delete; NonCopyable& operator = ( NonCopyable && ) = delete; -#else - NonCopyable( NonCopyable const& info ); - NonCopyable& operator = ( NonCopyable const& ); -#endif protected: - NonCopyable() {} + NonCopyable(); virtual ~NonCopyable(); }; - class SafeBool { - public: - typedef void (SafeBool::*type)() const; - - static type makeSafe( bool value ) { - return value ? &SafeBool::trueValue : 0; - } - private: - void trueValue() const {} - }; - - template - inline void deleteAll( ContainerT& container ) { - typename ContainerT::const_iterator it = container.begin(); - typename ContainerT::const_iterator itEnd = container.end(); - for(; it != itEnd; ++it ) - delete *it; - } - template - inline void deleteAllValues( AssociativeContainerT& container ) { - typename AssociativeContainerT::const_iterator it = container.begin(); - typename AssociativeContainerT::const_iterator itEnd = container.end(); - for(; it != itEnd; ++it ) - delete it->second; - } - - bool startsWith( std::string const& s, std::string const& prefix ); - bool startsWith( std::string const& s, char prefix ); - bool endsWith( std::string const& s, std::string const& suffix ); - bool endsWith( std::string const& s, char suffix ); - bool contains( std::string const& s, std::string const& infix ); - void toLowerInPlace( std::string& s ); - std::string toLower( std::string const& s ); - std::string trim( std::string const& str ); - bool replaceInPlace( std::string& str, std::string const& replaceThis, std::string const& withThis ); - - struct pluralise { - pluralise( std::size_t count, std::string const& label ); - - friend std::ostream& operator << ( std::ostream& os, pluralise const& pluraliser ); + struct SourceLineInfo { - std::size_t m_count; - std::string m_label; - }; + SourceLineInfo() = delete; + SourceLineInfo( char const* _file, std::size_t _line ) noexcept + : file( _file ), + line( _line ) + {} - struct SourceLineInfo { + SourceLineInfo( SourceLineInfo const& other ) = default; + SourceLineInfo& operator = ( SourceLineInfo const& ) = default; + SourceLineInfo( SourceLineInfo&& ) noexcept = default; + SourceLineInfo& operator = ( SourceLineInfo&& ) noexcept = default; - SourceLineInfo(); - SourceLineInfo( char const* _file, std::size_t _line ); -# ifdef CATCH_CONFIG_CPP11_GENERATED_METHODS - SourceLineInfo(SourceLineInfo const& other) = default; - SourceLineInfo( SourceLineInfo && ) = default; - SourceLineInfo& operator = ( SourceLineInfo const& ) = default; - SourceLineInfo& operator = ( SourceLineInfo && ) = default; -# endif - bool empty() const; - bool operator == ( SourceLineInfo const& other ) const; - bool operator < ( SourceLineInfo const& other ) const; + bool empty() const noexcept { return file[0] == '\0'; } + bool operator == ( SourceLineInfo const& other ) const noexcept; + bool operator < ( SourceLineInfo const& other ) const noexcept; char const* file; std::size_t line; @@ -467,24 +499,17 @@ namespace Catch { std::ostream& operator << ( std::ostream& os, SourceLineInfo const& info ); - // This is just here to avoid compiler warnings with macro constants and boolean literals - inline bool isTrue( bool value ){ return value; } - inline bool alwaysTrue() { return true; } - inline bool alwaysFalse() { return false; } - - void throwLogicError( std::string const& message, SourceLineInfo const& locationInfo ); - - void seedRng( IConfig const& config ); - unsigned int rngSeed(); + // Bring in operator<< from global namespace into Catch namespace + // This is necessary because the overload of operator<< above makes + // lookup stop at namespace Catch + using ::operator<<; // Use this in variadic streaming macros to allow // >> +StreamEndStop // as well as // >> stuff +StreamEndStop struct StreamEndStop { - std::string operator+() { - return std::string(); - } + std::string operator+() const; }; template T const& operator + ( T const& value, StreamEndStop ) { @@ -492,364 +517,834 @@ namespace Catch { } } -#define CATCH_INTERNAL_LINEINFO ::Catch::SourceLineInfo( __FILE__, static_cast( __LINE__ ) ) -#define CATCH_INTERNAL_ERROR( msg ) ::Catch::throwLogicError( msg, CATCH_INTERNAL_LINEINFO ); +#define CATCH_INTERNAL_LINEINFO \ + ::Catch::SourceLineInfo( __FILE__, static_cast( __LINE__ ) ) +// end catch_common.h namespace Catch { - class NotImplementedException : public std::exception - { - public: - NotImplementedException( SourceLineInfo const& lineInfo ); - NotImplementedException( NotImplementedException const& ) {} - - virtual ~NotImplementedException() CATCH_NOEXCEPT {} - - virtual const char* what() const CATCH_NOEXCEPT; - - private: - std::string m_what; - SourceLineInfo m_lineInfo; + struct RegistrarForTagAliases { + RegistrarForTagAliases( char const* alias, char const* tag, SourceLineInfo const& lineInfo ); }; } // end namespace Catch -/////////////////////////////////////////////////////////////////////////////// -#define CATCH_NOT_IMPLEMENTED throw Catch::NotImplementedException( CATCH_INTERNAL_LINEINFO ) +#define CATCH_REGISTER_TAG_ALIAS( alias, spec ) \ + CATCH_INTERNAL_SUPPRESS_GLOBALS_WARNINGS \ + namespace{ Catch::RegistrarForTagAliases INTERNAL_CATCH_UNIQUE_NAME( AutoRegisterTagAlias )( alias, spec, CATCH_INTERNAL_LINEINFO ); } \ + CATCH_INTERNAL_UNSUPPRESS_GLOBALS_WARNINGS -// #included from: internal/catch_context.h -#define TWOBLUECUBES_CATCH_CONTEXT_H_INCLUDED +// end catch_tag_alias_autoregistrar.h +// start catch_test_registry.h -// #included from: catch_interfaces_generators.h -#define TWOBLUECUBES_CATCH_INTERFACES_GENERATORS_H_INCLUDED +// start catch_interfaces_testcase.h -#include +#include namespace Catch { - struct IGeneratorInfo { - virtual ~IGeneratorInfo(); - virtual bool moveNext() = 0; - virtual std::size_t getCurrentIndex() const = 0; + class TestSpec; + + struct ITestInvoker { + virtual void invoke () const = 0; + virtual ~ITestInvoker(); }; - struct IGeneratorsForTest { - virtual ~IGeneratorsForTest(); + class TestCase; + struct IConfig; - virtual IGeneratorInfo& getGeneratorInfo( std::string const& fileInfo, std::size_t size ) = 0; - virtual bool moveNext() = 0; + struct ITestCaseRegistry { + virtual ~ITestCaseRegistry(); + virtual std::vector const& getAllTests() const = 0; + virtual std::vector const& getAllTestsSorted( IConfig const& config ) const = 0; }; - IGeneratorsForTest* createGeneratorsForTest(); + bool isThrowSafe( TestCase const& testCase, IConfig const& config ); + bool matchTest( TestCase const& testCase, TestSpec const& testSpec, IConfig const& config ); + std::vector filterTests( std::vector const& testCases, TestSpec const& testSpec, IConfig const& config ); + std::vector const& getAllTestCasesSorted( IConfig const& config ); -} // end namespace Catch +} -// #included from: catch_ptr.hpp -#define TWOBLUECUBES_CATCH_PTR_HPP_INCLUDED +// end catch_interfaces_testcase.h +// start catch_stringref.h -#ifdef __clang__ -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wpadded" -#endif +#include +#include +#include +#include namespace Catch { - // An intrusive reference counting smart pointer. - // T must implement addRef() and release() methods - // typically implementing the IShared interface - template - class Ptr { + /// A non-owning string class (similar to the forthcoming std::string_view) + /// Note that, because a StringRef may be a substring of another string, + /// it may not be null terminated. c_str() must return a null terminated + /// string, however, and so the StringRef will internally take ownership + /// (taking a copy), if necessary. In theory this ownership is not externally + /// visible - but it does mean (substring) StringRefs should not be shared between + /// threads. + class StringRef { public: - Ptr() : m_p( CATCH_NULL ){} - Ptr( T* p ) : m_p( p ){ - if( m_p ) - m_p->addRef(); - } - Ptr( Ptr const& other ) : m_p( other.m_p ){ - if( m_p ) - m_p->addRef(); - } - ~Ptr(){ - if( m_p ) - m_p->release(); - } - void reset() { - if( m_p ) - m_p->release(); - m_p = CATCH_NULL; + using size_type = std::size_t; + using const_iterator = const char*; + + private: + friend struct StringRefTestAccess; + + char const* m_start; + size_type m_size; + + char* m_data = nullptr; + + void takeOwnership(); + + static constexpr char const* const s_empty = ""; + + public: // construction/ assignment + StringRef() noexcept + : StringRef( s_empty, 0 ) + {} + + StringRef( StringRef const& other ) noexcept + : m_start( other.m_start ), + m_size( other.m_size ) + {} + + StringRef( StringRef&& other ) noexcept + : m_start( other.m_start ), + m_size( other.m_size ), + m_data( other.m_data ) + { + other.m_data = nullptr; } - Ptr& operator = ( T* p ){ - Ptr temp( p ); - swap( temp ); - return *this; + + StringRef( char const* rawChars ) noexcept; + + StringRef( char const* rawChars, size_type size ) noexcept + : m_start( rawChars ), + m_size( size ) + {} + + StringRef( std::string const& stdString ) noexcept + : m_start( stdString.c_str() ), + m_size( stdString.size() ) + {} + + ~StringRef() noexcept { + delete[] m_data; } - Ptr& operator = ( Ptr const& other ){ - Ptr temp( other ); - swap( temp ); + + auto operator = ( StringRef const &other ) noexcept -> StringRef& { + delete[] m_data; + m_data = nullptr; + m_start = other.m_start; + m_size = other.m_size; return *this; } - void swap( Ptr& other ) { std::swap( m_p, other.m_p ); } - T* get() const{ return m_p; } - T& operator*() const { return *m_p; } - T* operator->() const { return m_p; } - bool operator !() const { return m_p == CATCH_NULL; } - operator SafeBool::type() const { return SafeBool::makeSafe( m_p != CATCH_NULL ); } - private: - T* m_p; - }; + explicit operator std::string() const { + return std::string(m_start, m_size); + } - struct IShared : NonCopyable { - virtual ~IShared(); - virtual void addRef() const = 0; - virtual void release() const = 0; - }; + void swap( StringRef& other ) noexcept; - template - struct SharedImpl : T { + public: // operators + auto operator == ( StringRef const& other ) const noexcept -> bool; + auto operator != ( StringRef const& other ) const noexcept -> bool; - SharedImpl() : m_rc( 0 ){} + auto operator[] ( size_type index ) const noexcept -> char { + assert(index < m_size); + return m_start[index]; + } - virtual void addRef() const { - ++m_rc; + public: // named queries + auto empty() const noexcept -> bool { + return m_size == 0; } - virtual void release() const { - if( --m_rc == 0 ) - delete this; + auto size() const noexcept -> size_type { + return m_size; } - mutable unsigned int m_rc; - }; + auto c_str() const -> char const*; -} // end namespace Catch + public: // substrings and searches + auto substr( size_type start, size_type size ) const noexcept -> StringRef; -#ifdef __clang__ -#pragma clang diagnostic pop -#endif + // Returns the current start pointer. + // Note that the pointer can change when if the StringRef is a substring + auto currentData() const noexcept -> char const*; -namespace Catch { + public: // iterators + const_iterator begin() const { return m_start; } + const_iterator end() const { return m_start + m_size; } - class TestCase; - class Stream; - struct IResultCapture; - struct IRunner; - struct IGeneratorsForTest; - struct IConfig; + private: // ownership queries - may not be consistent between calls + auto isOwned() const noexcept -> bool; + auto isSubstring() const noexcept -> bool; + }; - struct IContext - { - virtual ~IContext(); + auto operator += ( std::string& lhs, StringRef const& sr ) -> std::string&; + auto operator << ( std::ostream& os, StringRef const& sr ) -> std::ostream&; - virtual IResultCapture* getResultCapture() = 0; - virtual IRunner* getRunner() = 0; - virtual size_t getGeneratorIndex( std::string const& fileInfo, size_t totalSize ) = 0; - virtual bool advanceGeneratorsForCurrentTest() = 0; - virtual Ptr getConfig() const = 0; - }; - - struct IMutableContext : IContext - { - virtual ~IMutableContext(); - virtual void setResultCapture( IResultCapture* resultCapture ) = 0; - virtual void setRunner( IRunner* runner ) = 0; - virtual void setConfig( Ptr const& config ) = 0; - }; + inline auto operator "" _sr( char const* rawChars, std::size_t size ) noexcept -> StringRef { + return StringRef( rawChars, size ); + } - IContext& getCurrentContext(); - IMutableContext& getCurrentMutableContext(); - void cleanUpContext(); - Stream createStream( std::string const& streamName ); +} // namespace Catch +inline auto operator "" _catch_sr( char const* rawChars, std::size_t size ) noexcept -> Catch::StringRef { + return Catch::StringRef( rawChars, size ); } -// #included from: internal/catch_test_registry.hpp -#define TWOBLUECUBES_CATCH_TEST_REGISTRY_HPP_INCLUDED +// end catch_stringref.h +// start catch_preprocessor.hpp -// #included from: catch_interfaces_testcase.h -#define TWOBLUECUBES_CATCH_INTERFACES_TESTCASE_H_INCLUDED -#include +#define CATCH_RECURSION_LEVEL0(...) __VA_ARGS__ +#define CATCH_RECURSION_LEVEL1(...) CATCH_RECURSION_LEVEL0(CATCH_RECURSION_LEVEL0(CATCH_RECURSION_LEVEL0(__VA_ARGS__))) +#define CATCH_RECURSION_LEVEL2(...) CATCH_RECURSION_LEVEL1(CATCH_RECURSION_LEVEL1(CATCH_RECURSION_LEVEL1(__VA_ARGS__))) +#define CATCH_RECURSION_LEVEL3(...) CATCH_RECURSION_LEVEL2(CATCH_RECURSION_LEVEL2(CATCH_RECURSION_LEVEL2(__VA_ARGS__))) +#define CATCH_RECURSION_LEVEL4(...) CATCH_RECURSION_LEVEL3(CATCH_RECURSION_LEVEL3(CATCH_RECURSION_LEVEL3(__VA_ARGS__))) +#define CATCH_RECURSION_LEVEL5(...) CATCH_RECURSION_LEVEL4(CATCH_RECURSION_LEVEL4(CATCH_RECURSION_LEVEL4(__VA_ARGS__))) -namespace Catch { +#ifdef CATCH_CONFIG_TRADITIONAL_MSVC_PREPROCESSOR +#define INTERNAL_CATCH_EXPAND_VARGS(...) __VA_ARGS__ +// MSVC needs more evaluations +#define CATCH_RECURSION_LEVEL6(...) CATCH_RECURSION_LEVEL5(CATCH_RECURSION_LEVEL5(CATCH_RECURSION_LEVEL5(__VA_ARGS__))) +#define CATCH_RECURSE(...) CATCH_RECURSION_LEVEL6(CATCH_RECURSION_LEVEL6(__VA_ARGS__)) +#else +#define CATCH_RECURSE(...) CATCH_RECURSION_LEVEL5(__VA_ARGS__) +#endif - class TestSpec; +#define CATCH_REC_END(...) +#define CATCH_REC_OUT + +#define CATCH_EMPTY() +#define CATCH_DEFER(id) id CATCH_EMPTY() + +#define CATCH_REC_GET_END2() 0, CATCH_REC_END +#define CATCH_REC_GET_END1(...) CATCH_REC_GET_END2 +#define CATCH_REC_GET_END(...) CATCH_REC_GET_END1 +#define CATCH_REC_NEXT0(test, next, ...) next CATCH_REC_OUT +#define CATCH_REC_NEXT1(test, next) CATCH_DEFER ( CATCH_REC_NEXT0 ) ( test, next, 0) +#define CATCH_REC_NEXT(test, next) CATCH_REC_NEXT1(CATCH_REC_GET_END test, next) + +#define CATCH_REC_LIST0(f, x, peek, ...) , f(x) CATCH_DEFER ( CATCH_REC_NEXT(peek, CATCH_REC_LIST1) ) ( f, peek, __VA_ARGS__ ) +#define CATCH_REC_LIST1(f, x, peek, ...) , f(x) CATCH_DEFER ( CATCH_REC_NEXT(peek, CATCH_REC_LIST0) ) ( f, peek, __VA_ARGS__ ) +#define CATCH_REC_LIST2(f, x, peek, ...) f(x) CATCH_DEFER ( CATCH_REC_NEXT(peek, CATCH_REC_LIST1) ) ( f, peek, __VA_ARGS__ ) + +#define CATCH_REC_LIST0_UD(f, userdata, x, peek, ...) , f(userdata, x) CATCH_DEFER ( CATCH_REC_NEXT(peek, CATCH_REC_LIST1_UD) ) ( f, userdata, peek, __VA_ARGS__ ) +#define CATCH_REC_LIST1_UD(f, userdata, x, peek, ...) , f(userdata, x) CATCH_DEFER ( CATCH_REC_NEXT(peek, CATCH_REC_LIST0_UD) ) ( f, userdata, peek, __VA_ARGS__ ) +#define CATCH_REC_LIST2_UD(f, userdata, x, peek, ...) f(userdata, x) CATCH_DEFER ( CATCH_REC_NEXT(peek, CATCH_REC_LIST1_UD) ) ( f, userdata, peek, __VA_ARGS__ ) + +// Applies the function macro `f` to each of the remaining parameters, inserts commas between the results, +// and passes userdata as the first parameter to each invocation, +// e.g. CATCH_REC_LIST_UD(f, x, a, b, c) evaluates to f(x, a), f(x, b), f(x, c) +#define CATCH_REC_LIST_UD(f, userdata, ...) CATCH_RECURSE(CATCH_REC_LIST2_UD(f, userdata, __VA_ARGS__, ()()(), ()()(), ()()(), 0)) + +#define CATCH_REC_LIST(f, ...) CATCH_RECURSE(CATCH_REC_LIST2(f, __VA_ARGS__, ()()(), ()()(), ()()(), 0)) + +#define INTERNAL_CATCH_EXPAND1(param) INTERNAL_CATCH_EXPAND2(param) +#define INTERNAL_CATCH_EXPAND2(...) INTERNAL_CATCH_NO## __VA_ARGS__ +#define INTERNAL_CATCH_DEF(...) INTERNAL_CATCH_DEF __VA_ARGS__ +#define INTERNAL_CATCH_NOINTERNAL_CATCH_DEF +#define INTERNAL_CATCH_STRINGIZE(...) INTERNAL_CATCH_STRINGIZE2(__VA_ARGS__) +#ifndef CATCH_CONFIG_TRADITIONAL_MSVC_PREPROCESSOR +#define INTERNAL_CATCH_STRINGIZE2(...) #__VA_ARGS__ +#define INTERNAL_CATCH_STRINGIZE_WITHOUT_PARENS(param) INTERNAL_CATCH_STRINGIZE(INTERNAL_CATCH_REMOVE_PARENS(param)) +#else +// MSVC is adding extra space and needs another indirection to expand INTERNAL_CATCH_NOINTERNAL_CATCH_DEF +#define INTERNAL_CATCH_STRINGIZE2(...) INTERNAL_CATCH_STRINGIZE3(__VA_ARGS__) +#define INTERNAL_CATCH_STRINGIZE3(...) #__VA_ARGS__ +#define INTERNAL_CATCH_STRINGIZE_WITHOUT_PARENS(param) (INTERNAL_CATCH_STRINGIZE(INTERNAL_CATCH_REMOVE_PARENS(param)) + 1) +#endif - struct ITestCase : IShared { - virtual void invoke () const = 0; - protected: - virtual ~ITestCase(); - }; +#define INTERNAL_CATCH_MAKE_NAMESPACE2(...) ns_##__VA_ARGS__ +#define INTERNAL_CATCH_MAKE_NAMESPACE(name) INTERNAL_CATCH_MAKE_NAMESPACE2(name) - class TestCase; - struct IConfig; +#define INTERNAL_CATCH_REMOVE_PARENS(...) INTERNAL_CATCH_EXPAND1(INTERNAL_CATCH_DEF __VA_ARGS__) - struct ITestCaseRegistry { - virtual ~ITestCaseRegistry(); - virtual std::vector const& getAllTests() const = 0; - virtual std::vector const& getAllTestsSorted( IConfig const& config ) const = 0; - }; +#ifndef CATCH_CONFIG_TRADITIONAL_MSVC_PREPROCESSOR +#define INTERNAL_CATCH_MAKE_TYPE_LIST2(...) decltype(get_wrapper()) +#define INTERNAL_CATCH_MAKE_TYPE_LIST(...) INTERNAL_CATCH_MAKE_TYPE_LIST2(INTERNAL_CATCH_REMOVE_PARENS(__VA_ARGS__)) +#else +#define INTERNAL_CATCH_MAKE_TYPE_LIST2(...) INTERNAL_CATCH_EXPAND_VARGS(decltype(get_wrapper())) +#define INTERNAL_CATCH_MAKE_TYPE_LIST(...) INTERNAL_CATCH_EXPAND_VARGS(INTERNAL_CATCH_MAKE_TYPE_LIST2(INTERNAL_CATCH_REMOVE_PARENS(__VA_ARGS__))) +#endif - bool matchTest( TestCase const& testCase, TestSpec const& testSpec, IConfig const& config ); - std::vector filterTests( std::vector const& testCases, TestSpec const& testSpec, IConfig const& config ); - std::vector const& getAllTestCasesSorted( IConfig const& config ); +#define INTERNAL_CATCH_MAKE_TYPE_LISTS_FROM_TYPES(...)\ + CATCH_REC_LIST(INTERNAL_CATCH_MAKE_TYPE_LIST,__VA_ARGS__) + +#define INTERNAL_CATCH_REMOVE_PARENS_1_ARG(_0) INTERNAL_CATCH_REMOVE_PARENS(_0) +#define INTERNAL_CATCH_REMOVE_PARENS_2_ARG(_0, _1) INTERNAL_CATCH_REMOVE_PARENS(_0), INTERNAL_CATCH_REMOVE_PARENS_1_ARG(_1) +#define INTERNAL_CATCH_REMOVE_PARENS_3_ARG(_0, _1, _2) INTERNAL_CATCH_REMOVE_PARENS(_0), INTERNAL_CATCH_REMOVE_PARENS_2_ARG(_1, _2) +#define INTERNAL_CATCH_REMOVE_PARENS_4_ARG(_0, _1, _2, _3) INTERNAL_CATCH_REMOVE_PARENS(_0), INTERNAL_CATCH_REMOVE_PARENS_3_ARG(_1, _2, _3) +#define INTERNAL_CATCH_REMOVE_PARENS_5_ARG(_0, _1, _2, _3, _4) INTERNAL_CATCH_REMOVE_PARENS(_0), INTERNAL_CATCH_REMOVE_PARENS_4_ARG(_1, _2, _3, _4) +#define INTERNAL_CATCH_REMOVE_PARENS_6_ARG(_0, _1, _2, _3, _4, _5) INTERNAL_CATCH_REMOVE_PARENS(_0), INTERNAL_CATCH_REMOVE_PARENS_5_ARG(_1, _2, _3, _4, _5) +#define INTERNAL_CATCH_REMOVE_PARENS_7_ARG(_0, _1, _2, _3, _4, _5, _6) INTERNAL_CATCH_REMOVE_PARENS(_0), INTERNAL_CATCH_REMOVE_PARENS_6_ARG(_1, _2, _4, _5, _6) +#define INTERNAL_CATCH_REMOVE_PARENS_8_ARG(_0, _1, _2, _3, _4, _5, _6, _7) INTERNAL_CATCH_REMOVE_PARENS(_0), INTERNAL_CATCH_REMOVE_PARENS_7_ARG(_1, _2, _3, _4, _5, _6, _7) +#define INTERNAL_CATCH_REMOVE_PARENS_9_ARG(_0, _1, _2, _3, _4, _5, _6, _7, _8) INTERNAL_CATCH_REMOVE_PARENS(_0), INTERNAL_CATCH_REMOVE_PARENS_8_ARG(_1, _2, _3, _4, _5, _6, _7, _8) +#define INTERNAL_CATCH_REMOVE_PARENS_10_ARG(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9) INTERNAL_CATCH_REMOVE_PARENS(_0), INTERNAL_CATCH_REMOVE_PARENS_9_ARG(_1, _2, _3, _4, _5, _6, _7, _8, _9) +#define INTERNAL_CATCH_REMOVE_PARENS_11_ARG(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10) INTERNAL_CATCH_REMOVE_PARENS(_0), INTERNAL_CATCH_REMOVE_PARENS_10_ARG(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10) + +#define INTERNAL_CATCH_VA_NARGS_IMPL(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, N, ...) N + +#define INTERNAL_CATCH_TYPE_GEN\ + template struct TypeList {};\ + template\ + constexpr auto get_wrapper() noexcept -> TypeList { return {}; }\ + template class...> struct TemplateTypeList{};\ + template class...Cs>\ + constexpr auto get_wrapper() noexcept -> TemplateTypeList { return {}; }\ + template\ + struct append;\ + template\ + struct rewrap;\ + template class, typename...>\ + struct create;\ + template class, typename>\ + struct convert;\ + \ + template \ + struct append { using type = T; };\ + template< template class L1, typename...E1, template class L2, typename...E2, typename...Rest>\ + struct append, L2, Rest...> { using type = typename append, Rest...>::type; };\ + template< template class L1, typename...E1, typename...Rest>\ + struct append, TypeList, Rest...> { using type = L1; };\ + \ + template< template class Container, template class List, typename...elems>\ + struct rewrap, List> { using type = TypeList>; };\ + template< template class Container, template class List, class...Elems, typename...Elements>\ + struct rewrap, List, Elements...> { using type = typename append>, typename rewrap, Elements...>::type>::type; };\ + \ + template