Skip to content

Commit

Permalink
Update BLAKE3
Browse files Browse the repository at this point in the history
  • Loading branch information
fcharlie committed Jul 14, 2024
1 parent 789d130 commit 5454497
Show file tree
Hide file tree
Showing 8 changed files with 158 additions and 89 deletions.
2 changes: 1 addition & 1 deletion src/belahash/blake3.lock
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
https://github.com/BLAKE3-team/BLAKE3
4d32708f511fd85c6b0fb131295cc73224246738
fc2f7e4206f016b0cac0593f23a7d5976ce066e6
3 changes: 3 additions & 0 deletions src/belahash/blake3/.gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
blake3
example
build/
*.o

CMakeUserPresets.json
192 changes: 128 additions & 64 deletions src/belahash/blake3/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,34 +1,114 @@
cmake_minimum_required(VERSION 3.9)
cmake_minimum_required(VERSION 3.9 FATAL_ERROR)

# respect C_EXTENSIONS OFF without explicitly setting C_STANDARD
if (POLICY CMP0128)
cmake_policy(SET CMP0128 NEW)
endif()
# mark_as_advanced does not implicitly create UNINITIALIZED cache entries
if (POLICY CMP0102)
cmake_policy(SET CMP0102 NEW)
endif()

project(libblake3
VERSION 1.4.0
VERSION 1.5.2
DESCRIPTION "BLAKE3 C implementation"
LANGUAGES C ASM
)

include(FeatureSummary)
include(GNUInstallDirs)

# architecture lists for which to enable assembly / SIMD sources
set(BLAKE3_AMD64_NAMES amd64 AMD64 x86_64)
set(BLAKE3_X86_NAMES i686 x86 X86)
set(BLAKE3_ARMv8_NAMES aarch64 AArch64 arm64 ARM64 armv8 armv8a)
# default SIMD compiler flag configuration (can be overriden by toolchains or CLI)
if(CMAKE_C_COMPILER_ID STREQUAL "MSVC")
if(MSVC)
set(BLAKE3_CFLAGS_SSE2 "/arch:SSE2" CACHE STRING "the compiler flags to enable SSE2")
# MSVC has no dedicated sse4.1 flag (see https://learn.microsoft.com/en-us/cpp/build/reference/arch-x86?view=msvc-170)
set(BLAKE3_CFLAGS_SSE4.1 "/arch:AVX" CACHE STRING "the compiler flags to enable SSE4.1")
set(BLAKE3_CFLAGS_AVX2 "/arch:AVX2" CACHE STRING "the compiler flags to enable AVX2")
set(BLAKE3_CFLAGS_AVX512 "/arch:AVX512" CACHE STRING "the compiler flags to enable AVX512")

set(BLAKE3_AMD64_ASM_SOURCES
blake3_avx2_x86-64_windows_msvc.asm
blake3_avx512_x86-64_windows_msvc.asm
blake3_sse2_x86-64_windows_msvc.asm
blake3_sse41_x86-64_windows_msvc.asm
)

elseif(CMAKE_C_COMPILER_ID STREQUAL "GNU"
OR CMAKE_C_COMPILER_ID STREQUAL "Clang"
OR CMAKE_C_COMPILER_ID STREQUAL "AppleClang")
set(BLAKE3_CFLAGS_SSE2 "-msse2" CACHE STRING "the compiler flags to enable SSE2")
set(BLAKE3_CFLAGS_SSE4.1 "-msse4.1" CACHE STRING "the compiler flags to enable SSE4.1")
set(BLAKE3_CFLAGS_AVX2 "-mavx2" CACHE STRING "the compiler flags to enable AVX2")
set(BLAKE3_CFLAGS_AVX512 "-mavx512f -mavx512vl" CACHE STRING "the compiler flags to enable AVX512")

if (WIN32)
set(BLAKE3_AMD64_ASM_SOURCES
blake3_avx2_x86-64_windows_gnu.S
blake3_avx512_x86-64_windows_gnu.S
blake3_sse2_x86-64_windows_gnu.S
blake3_sse41_x86-64_windows_gnu.S
)

elseif(UNIX)
set(BLAKE3_AMD64_ASM_SOURCES
blake3_avx2_x86-64_unix.S
blake3_avx512_x86-64_unix.S
blake3_sse2_x86-64_unix.S
blake3_sse41_x86-64_unix.S
)
endif()

if (CMAKE_SYSTEM_PROCESSOR IN_LIST BLAKE3_ARMv8_NAMES
AND NOT CMAKE_SIZEOF_VOID_P EQUAL 8)
# 32-bit ARMv8 needs NEON to be enabled explicitly
set(BLAKE3_CFLAGS_NEON "-mfpu=neon" CACHE STRING "the compiler flags to enable NEON")
endif()
endif()
# architecture lists for which to enable assembly / SIMD sources
set(BLAKE3_AMD64_NAMES amd64 AMD64 x86_64)
set(BLAKE3_X86_NAMES i686 x86 X86)
set(BLAKE3_ARMv8_NAMES aarch64 AArch64 arm64 ARM64 armv8 armv8a)

mark_as_advanced(BLAKE3_CFLAGS_SSE2 BLAKE3_CFLAGS_SSE4.1 BLAKE3_CFLAGS_AVX2 BLAKE3_CFLAGS_AVX512 BLAKE3_CFLAGS_NEON)
mark_as_advanced(BLAKE3_AMD64_ASM_SOURCES)

message(STATUS "BLAKE3 SIMD configuration: ${CMAKE_C_COMPILER_ARCHITECTURE_ID}")
if(MSVC AND DEFINED CMAKE_C_COMPILER_ARCHITECTURE_ID)
if(CMAKE_C_COMPILER_ARCHITECTURE_ID MATCHES "[Xx]86")
set(BLAKE3_SIMD_TYPE "x86-intrinsics" CACHE STRING "the SIMD acceleration type to use")

elseif(CMAKE_C_COMPILER_ARCHITECTURE_ID MATCHES "[Xx]64")
set(BLAKE3_SIMD_TYPE "amd64-asm" CACHE STRING "the SIMD acceleration type to use")

elseif(CMAKE_C_COMPILER_ARCHITECTURE_ID MATCHES "[Aa][Rr][Mm]64")
set(BLAKE3_SIMD_TYPE "neon-intrinsics" CACHE STRING "the SIMD acceleration type to use")

else()
set(BLAKE3_SIMD_TYPE "none" CACHE STRING "the SIMD acceleration type to use")
endif()

elseif(CMAKE_SYSTEM_PROCESSOR IN_LIST BLAKE3_AMD64_NAMES)
set(BLAKE3_SIMD_TYPE "amd64-asm" CACHE STRING "the SIMD acceleration type to use")

elseif(CMAKE_SYSTEM_PROCESSOR IN_LIST BLAKE3_X86_NAMES
AND DEFINED BLAKE3_CFLAGS_SSE2
AND DEFINED BLAKE3_CFLAGS_SSE4.1
AND DEFINED BLAKE3_CFLAGS_AVX2
AND DEFINED BLAKE3_CFLAGS_AVX512)
set(BLAKE3_SIMD_TYPE "x86-intrinsics" CACHE STRING "the SIMD acceleration type to use")

elseif((CMAKE_SYSTEM_PROCESSOR IN_LIST BLAKE3_ARMv8_NAMES
OR ANDROID_ABI STREQUAL "armeabi-v7a"
OR BLAKE3_USE_NEON_INTRINSICS)
AND (DEFINED BLAKE3_CFLAGS_NEON
OR CMAKE_SIZEOF_VOID_P EQUAL 8))
set(BLAKE3_SIMD_TYPE "neon-intrinsics" CACHE STRING "the SIMD acceleration type to use")

else()
set(BLAKE3_SIMD_TYPE "none" CACHE STRING "the SIMD acceleration type to use")
endif()

mark_as_advanced(BLAKE3_SIMD_TYPE)

# library target
add_library(blake3
Expand All @@ -41,73 +121,49 @@ add_library(BLAKE3::blake3 ALIAS blake3)
# library configuration
set(BLAKE3_PKGCONFIG_CFLAGS)
if (BUILD_SHARED_LIBS)
target_compile_definitions(blake3
target_compile_definitions(blake3
PUBLIC BLAKE3_DLL
PRIVATE BLAKE3_DLL_EXPORTS
)
list(APPEND BLAKE3_PKGCONFIG_CFLAGS -DBLAKE3_DLL)
endif()
target_include_directories(blake3 PUBLIC $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>)
target_include_directories(blake3 PUBLIC
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>
)
set_target_properties(blake3 PROPERTIES
VERSION ${PROJECT_VERSION}
SOVERSION 0
C_VISIBILITY_PRESET hidden
C_EXTENSIONS OFF
)
target_compile_features(blake3 PUBLIC c_std_99)
# ensure C_EXTENSIONS OFF is respected without overriding CMAKE_C_STANDARD
# which may be set by the user or toolchain file
if (NOT POLICY CMP0128 AND NOT DEFINED CMAKE_C_STANDARD)
set_target_properties(blake3 PROPERTIES C_STANDARD 99)
endif()

# optional SIMD sources
macro(BLAKE3_DISABLE_SIMD)
set(BLAKE3_SIMD_AMD64_ASM OFF)
set(BLAKE3_SIMD_X86_INTRINSICS OFF)
set(BLAKE3_SIMD_NEON_INTRINSICS OFF)
set_source_files_properties(blake3_dispatch.c PROPERTIES
COMPILE_DEFINITIONS BLAKE3_USE_NEON=0;BLAKE3_NO_SSE2;BLAKE3_NO_SSE41;BLAKE3_NO_AVX2;BLAKE3_NO_AVX512
)
endmacro()

if(CMAKE_SYSTEM_PROCESSOR IN_LIST BLAKE3_AMD64_NAMES OR BLAKE3_USE_AMD64_ASM)
if(BLAKE3_SIMD_TYPE STREQUAL "amd64-asm")
if (NOT DEFINED BLAKE3_AMD64_ASM_SOURCES)
message(FATAL_ERROR "BLAKE3_SIMD_TYPE is set to 'amd64-asm' but no assembly sources are available for the target architecture.")
endif()
set(BLAKE3_SIMD_AMD64_ASM ON)

if(CMAKE_C_COMPILER_ID STREQUAL "MSVC")
if(MSVC)
enable_language(ASM_MASM)
target_sources(blake3 PRIVATE
blake3_avx2_x86-64_windows_msvc.asm
blake3_avx512_x86-64_windows_msvc.asm
blake3_sse2_x86-64_windows_msvc.asm
blake3_sse41_x86-64_windows_msvc.asm
)

elseif(CMAKE_C_COMPILER_ID STREQUAL "GNU"
OR CMAKE_C_COMPILER_ID STREQUAL "Clang"
OR CMAKE_C_COMPILER_ID STREQUAL "AppleClang")
if (WIN32)
target_sources(blake3 PRIVATE
blake3_avx2_x86-64_windows_gnu.S
blake3_avx512_x86-64_windows_gnu.S
blake3_sse2_x86-64_windows_gnu.S
blake3_sse41_x86-64_windows_gnu.S
)

elseif(UNIX)
target_sources(blake3 PRIVATE
blake3_avx2_x86-64_unix.S
blake3_avx512_x86-64_unix.S
blake3_sse2_x86-64_unix.S
blake3_sse41_x86-64_unix.S
)

else()
BLAKE3_DISABLE_SIMD()
endif()

else()
BLAKE3_DISABLE_SIMD()
endif()

elseif((CMAKE_SYSTEM_PROCESSOR IN_LIST BLAKE3_X86_NAMES OR BLAKE3_USE_X86_INTRINSICS)
AND DEFINED BLAKE3_CFLAGS_SSE2
AND DEFINED BLAKE3_CFLAGS_SSE4.1
AND DEFINED BLAKE3_CFLAGS_AVX2
AND DEFINED BLAKE3_CFLAGS_AVX512)
target_sources(blake3 PRIVATE ${BLAKE3_AMD64_ASM_SOURCES})

elseif(BLAKE3_SIMD_TYPE STREQUAL "x86-intrinsics")
if (NOT DEFINED BLAKE3_CFLAGS_SSE2
OR NOT DEFINED BLAKE3_CFLAGS_SSE4.1
OR NOT DEFINED BLAKE3_CFLAGS_AVX2
OR NOT DEFINED BLAKE3_CFLAGS_AVX512)
message(FATAL_ERROR "BLAKE3_SIMD_TYPE is set to 'x86-intrinsics' but no compiler flags are available for the target architecture.")
endif()
set(BLAKE3_SIMD_X86_INTRINSICS ON)

target_sources(blake3 PRIVATE
Expand All @@ -121,24 +177,31 @@ elseif((CMAKE_SYSTEM_PROCESSOR IN_LIST BLAKE3_X86_NAMES OR BLAKE3_USE_X86_INTRIN
set_source_files_properties(blake3_sse2.c PROPERTIES COMPILE_FLAGS "${BLAKE3_CFLAGS_SSE2}")
set_source_files_properties(blake3_sse41.c PROPERTIES COMPILE_FLAGS "${BLAKE3_CFLAGS_SSE4.1}")

elseif(CMAKE_SYSTEM_PROCESSOR IN_LIST BLAKE3_ARMv8_NAMES
OR ((ANDROID_ABI STREQUAL "armeabi-v7a"
OR BLAKE3_USE_NEON_INTRINSICS)
AND (DEFINED BLAKE3_CFLAGS_NEON
OR CMAKE_SIZEOF_VOID_P EQUAL 8)))
elseif(BLAKE3_SIMD_TYPE STREQUAL "neon-intrinsics")
set(BLAKE3_SIMD_NEON_INTRINSICS ON)

target_sources(blake3 PRIVATE
blake3_neon.c
)
set_source_files_properties(blake3_dispatch.c PROPERTIES COMPILE_DEFINITIONS BLAKE3_USE_NEON=1)
target_compile_definitions(blake3 PRIVATE
BLAKE3_USE_NEON=1
)

if (DEFINED BLAKE3_CFLAGS_NEON)
set_source_files_properties(blake3_neon.c PROPERTIES COMPILE_FLAGS "${BLAKE3_CFLAGS_NEON}")
endif()

elseif(BLAKE3_SIMD_TYPE STREQUAL "none")
target_compile_definitions(blake3 PRIVATE
BLAKE3_USE_NEON=0
BLAKE3_NO_SSE2
BLAKE3_NO_SSE41
BLAKE3_NO_AVX2
BLAKE3_NO_AVX512
)

else()
BLAKE3_DISABLE_SIMD()
message(FATAL_ERROR "BLAKE3_SIMD_TYPE is set to an unknown value: '${BLAKE3_SIMD_TYPE}'")
endif()

# cmake install support
Expand Down Expand Up @@ -171,6 +234,7 @@ install(FILES "${CMAKE_BINARY_DIR}/libblake3.pc"
DESTINATION "${CMAKE_INSTALL_LIBDIR}/pkgconfig")

# print feature summary
# add_feature_info cannot directly use the BLAKE3_SIMD_TYPE :(
add_feature_info("AMD64 assembly" BLAKE3_SIMD_AMD64_ASM "The library uses hand written amd64 SIMD assembly.")
add_feature_info("x86 SIMD intrinsics" BLAKE3_SIMD_X86_INTRINSICS "The library uses x86 SIMD intrinsics.")
add_feature_info("NEON SIMD intrinsics" BLAKE3_SIMD_NEON_INTRINSICS "The library uses NEON SIMD intrinsics.")
Expand Down
25 changes: 13 additions & 12 deletions src/belahash/blake3/blake3.c
Original file line number Diff line number Diff line change
Expand Up @@ -134,9 +134,7 @@ INLINE void chunk_state_update(blake3_chunk_state *self, const uint8_t *input,
input_len -= BLAKE3_BLOCK_LEN;
}

size_t take = chunk_state_fill_buf(self, input, input_len);
input += take;
input_len -= take;
chunk_state_fill_buf(self, input, input_len);
}

INLINE output_t chunk_state_output(const blake3_chunk_state *self) {
Expand Down Expand Up @@ -341,21 +339,24 @@ INLINE void compress_subtree_to_parent_node(
size_t num_cvs = blake3_compress_subtree_wide(input, input_len, key,
chunk_counter, flags, cv_array);
assert(num_cvs <= MAX_SIMD_DEGREE_OR_2);

// If MAX_SIMD_DEGREE is greater than 2 and there's enough input,
// The following loop never executes when MAX_SIMD_DEGREE_OR_2 is 2, because
// as we just asserted, num_cvs will always be <=2 in that case. But GCC
// (particularly GCC 8.5) can't tell that it never executes, and if NDEBUG is
// set then it emits incorrect warnings here. We tried a few different
// hacks to silence these, but in the end our hacks just produced different
// warnings (see https://github.com/BLAKE3-team/BLAKE3/pull/380). Out of
// desperation, we ifdef out this entire loop when we know it's not needed.
#if MAX_SIMD_DEGREE_OR_2 > 2
// If MAX_SIMD_DEGREE_OR_2 is greater than 2 and there's enough input,
// compress_subtree_wide() returns more than 2 chaining values. Condense
// them into 2 by forming parent nodes repeatedly.
uint8_t out_array[MAX_SIMD_DEGREE_OR_2 * BLAKE3_OUT_LEN / 2];
// The second half of this loop condition is always true, and we just
// asserted it above. But GCC can't tell that it's always true, and if NDEBUG
// is set on platforms where MAX_SIMD_DEGREE_OR_2 == 2, GCC emits spurious
// warnings here. GCC 8.5 is particularly sensitive, so if you're changing
// this code, test it against that version.
while (num_cvs > 2 && num_cvs <= MAX_SIMD_DEGREE_OR_2) {
while (num_cvs > 2) {
num_cvs =
compress_parents_parallel(cv_array, num_cvs, key, flags, out_array);
memcpy(cv_array, out_array, num_cvs * BLAKE3_OUT_LEN);
}
#endif
memcpy(out, cv_array, 2 * BLAKE3_OUT_LEN);
}

Expand Down Expand Up @@ -427,7 +428,7 @@ INLINE void hasher_merge_cv_stack(blake3_hasher *self, uint64_t total_len) {
// of the whole tree, and it would need to be ROOT finalized. We can't
// compress it until we know.
// 2) This 64 KiB input might complete a larger tree, whose root node is
// similarly going to be the the root of the whole tree. For example, maybe
// similarly going to be the root of the whole tree. For example, maybe
// we have 196 KiB (that is, 128 + 64) hashed so far. We can't compress the
// node at the root of the 256 KiB subtree until we know how to finalize it.
//
Expand Down
2 changes: 1 addition & 1 deletion src/belahash/blake3/blake3.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
extern "C" {
#endif

#define BLAKE3_VERSION_STRING "1.5.0"
#define BLAKE3_VERSION_STRING "1.5.2"
#define BLAKE3_KEY_LEN 32
#define BLAKE3_OUT_LEN 32
#define BLAKE3_BLOCK_LEN 64
Expand Down
11 changes: 7 additions & 4 deletions src/belahash/blake3/blake3_dispatch.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,12 @@

#include "blake3_impl.h"

#if defined(IS_X86)
#if defined(_MSC_VER)
#include <Windows.h>
#endif

#if defined(IS_X86)
#if defined(_MSC_VER)
#include <intrin.h>
#elif defined(__GNUC__)
#include <immintrin.h>
Expand All @@ -32,9 +35,9 @@
#define ATOMIC_LOAD(x) x
#define ATOMIC_STORE(x, y) x = y
#elif defined(_MSC_VER)
#define ATOMIC_INT long
#define ATOMIC_LOAD(x) _InterlockedOr(&x, 0)
#define ATOMIC_STORE(x, y) _InterlockedExchange(&x, y)
#define ATOMIC_INT LONG
#define ATOMIC_LOAD(x) InterlockedOr(&x, 0)
#define ATOMIC_STORE(x, y) InterlockedExchange(&x, y)
#else
#define ATOMIC_INT int
#define ATOMIC_LOAD(x) x
Expand Down
4 changes: 2 additions & 2 deletions src/belahash/blake3/blake3_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ enum blake3_flags {
#define INLINE static inline __attribute__((always_inline))
#endif

#if defined(__x86_64__) || defined(_M_X64)
#if (defined(__x86_64__) || defined(_M_X64)) && !defined(_M_ARM64EC)
#define IS_X86
#define IS_X86_64
#endif
Expand All @@ -38,7 +38,7 @@ enum blake3_flags {
#define IS_X86_32
#endif

#if defined(__aarch64__) || defined(_M_ARM64)
#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC)
#define IS_AARCH64
#endif

Expand Down
Loading

0 comments on commit 5454497

Please sign in to comment.