Skip to content

Commit

Permalink
Sync BLAKE3
Browse files Browse the repository at this point in the history
  • Loading branch information
fcharlie committed Sep 21, 2024
1 parent 9f68386 commit d255866
Show file tree
Hide file tree
Showing 11 changed files with 2,700 additions and 73 deletions.
2 changes: 2 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ cmake_minimum_required(VERSION 3.27)

project(bela CXX C ASM)

include(FeatureSummary)

if(CMAKE_SOURCE_DIR STREQUAL CMAKE_BINARY_DIR AND NOT MSVC_IDE)
message(
FATAL_ERROR
Expand Down
199 changes: 148 additions & 51 deletions src/belahash/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,70 +1,167 @@
# bela::hash https://docs.microsoft.com/en-us/cpp/intrinsics/arm64-intrinsics
# https://static.docs.arm.com/ihi0073/c/IHI0073C_arm_neon_intrinsics_ref.pdf

set(BELA_BLAKE3_SOURCES blake3/blake3.c blake3/blake3_dispatch.c blake3/blake3_portable.c)
# architecture lists for which to enable assembly / SIMD sources
set(BLAKE3_AMD64_NAMES amd64 AMD64 x86_64)
set(BLAKE3_X86_NAMES i686 x86 X86)
set(BLAKE3_ARMv8_NAMES aarch64 AArch64 arm64 ARM64 armv8 armv8a)
# default SIMD compiler flag configuration (can be overriden by toolchains or CLI)
if(MSVC)
set(BLAKE3_CFLAGS_SSE2 "/arch:SSE2" CACHE STRING "the compiler flags to enable SSE2")
# MSVC has no dedicated sse4.1 flag (see https://learn.microsoft.com/en-us/cpp/build/reference/arch-x86?view=msvc-170)
set(BLAKE3_CFLAGS_SSE4.1 "/arch:AVX" CACHE STRING "the compiler flags to enable SSE4.1")
set(BLAKE3_CFLAGS_AVX2 "/arch:AVX2" CACHE STRING "the compiler flags to enable AVX2")
set(BLAKE3_CFLAGS_AVX512 "/arch:AVX512" CACHE STRING "the compiler flags to enable AVX512")

if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
set(BELA_ENABLE_ASSEMBLY_FILES ON)
set(BLAKE3_AMD64_ASM_SOURCES
blake3/blake3_avx2_x86-64_windows_msvc.asm
blake3/blake3_avx512_x86-64_windows_msvc.asm
blake3/blake3_sse2_x86-64_windows_msvc.asm
blake3/blake3_sse41_x86-64_windows_msvc.asm
)

elseif(CMAKE_C_COMPILER_ID STREQUAL "GNU"
OR CMAKE_C_COMPILER_ID STREQUAL "Clang"
OR CMAKE_C_COMPILER_ID STREQUAL "AppleClang")
set(BLAKE3_CFLAGS_SSE2 "-msse2" CACHE STRING "the compiler flags to enable SSE2")
set(BLAKE3_CFLAGS_SSE4.1 "-msse4.1" CACHE STRING "the compiler flags to enable SSE4.1")
set(BLAKE3_CFLAGS_AVX2 "-mavx2" CACHE STRING "the compiler flags to enable AVX2")
set(BLAKE3_CFLAGS_AVX512 "-mavx512f -mavx512vl" CACHE STRING "the compiler flags to enable AVX512")

if (WIN32)
set(BLAKE3_AMD64_ASM_SOURCES
blake3/blake3_avx2_x86-64_windows_gnu.S
blake3/blake3_avx512_x86-64_windows_gnu.S
blake3/blake3_sse2_x86-64_windows_gnu.S
blake3/blake3_sse41_x86-64_windows_gnu.S
)

elseif(UNIX)
set(BLAKE3_AMD64_ASM_SOURCES
blake3/blake3_avx2_x86-64_unix.S
blake3/blake3_avx512_x86-64_unix.S
blake3/blake3_sse2_x86-64_unix.S
blake3/blake3_sse41_x86-64_unix.S
)
endif()

if (CMAKE_SYSTEM_PROCESSOR IN_LIST BLAKE3_ARMv8_NAMES
AND NOT CMAKE_SIZEOF_VOID_P EQUAL 8)
# 32-bit ARMv8 needs NEON to be enabled explicitly
set(BLAKE3_CFLAGS_NEON "-mfpu=neon" CACHE STRING "the compiler flags to enable NEON")
endif()
endif()

mark_as_advanced(BLAKE3_CFLAGS_SSE2 BLAKE3_CFLAGS_SSE4.1 BLAKE3_CFLAGS_AVX2 BLAKE3_CFLAGS_AVX512 BLAKE3_CFLAGS_NEON)
mark_as_advanced(BLAKE3_AMD64_ASM_SOURCES)

message(STATUS "BLAKE3 SIMD configuration: ${CMAKE_C_COMPILER_ARCHITECTURE_ID}")
if(MSVC AND DEFINED CMAKE_C_COMPILER_ARCHITECTURE_ID)
if(CMAKE_C_COMPILER_ARCHITECTURE_ID MATCHES "[Xx]86")
set(BLAKE3_SIMD_TYPE "x86-intrinsics" CACHE STRING "the SIMD acceleration type to use")

elseif(CMAKE_C_COMPILER_ARCHITECTURE_ID MATCHES "[Xx]64")
set(BLAKE3_SIMD_TYPE "amd64-asm" CACHE STRING "the SIMD acceleration type to use")

elseif(CMAKE_C_COMPILER_ARCHITECTURE_ID MATCHES "[Aa][Rr][Mm]64")
set(BLAKE3_SIMD_TYPE "neon-intrinsics" CACHE STRING "the SIMD acceleration type to use")

# FIXME CMAKE CMAKE_MSVC_RUNTIME_LIBRARY_DEFAULT
if(BELA_ARCHITECTURE_64BIT)
if(BELA_ENABLE_ASSEMBLY_FILES)
if(MSVC)
enable_language(ASM_MASM)
list(
APPEND
BELA_BLAKE3_SOURCES
blake3/blake3_sse2_x86-64_windows_msvc.asm
blake3/blake3_sse41_x86-64_windows_msvc.asm
blake3/blake3_avx2_x86-64_windows_msvc.asm
blake3/blake3_avx512_x86-64_windows_msvc.asm)
elseif(WIN32 OR CYGWIN)
list(
APPEND
BELA_BLAKE3_SOURCES
blake3/blake3_sse2_x86-64_windows_gnu.S
blake3/blake3_sse41_x86-64_windows_gnu.S
blake3/blake3_avx2_x86-64_windows_gnu.S
blake3/blake3_avx512_x86-64_windows_gnu.S)
else()
list(
APPEND
BELA_BLAKE3_SOURCES
blake3/blake3_sse2_x86-64_unix.S
blake3/blake3_sse41_x86-64_unix.S
blake3/blake3_avx2_x86-64_unix.S
blake3/blake3_avx512_x86-64_unix.S)
endif()
else()
list(
APPEND
BELA_BLAKE3_SOURCES
blake3/blake3_sse2.c
blake3/blake3_sse41.c
blake3/blake3_avx2.c
blake3/blake3_avx512.c)
endif(BELA_ENABLE_ASSEMBLY_FILES)
elseif(BELA_ARCHITECTURE_ARM64)
list(APPEND BELA_BLAKE3_SOURCES blake3/blake3_neon.c)
elseif(BELA_ARCHITECTURE_32BIT)
list(
APPEND
BELA_BLAKE3_SOURCES
blake3/blake3_sse2.c
blake3/blake3_sse41.c
blake3/blake3_avx2.c
blake3/blake3_avx512.c)
set(BLAKE3_SIMD_TYPE "none" CACHE STRING "the SIMD acceleration type to use")
endif()

elseif(CMAKE_SYSTEM_PROCESSOR IN_LIST BLAKE3_AMD64_NAMES)
set(BLAKE3_SIMD_TYPE "amd64-asm" CACHE STRING "the SIMD acceleration type to use")

elseif(CMAKE_SYSTEM_PROCESSOR IN_LIST BLAKE3_X86_NAMES
AND DEFINED BLAKE3_CFLAGS_SSE2
AND DEFINED BLAKE3_CFLAGS_SSE4.1
AND DEFINED BLAKE3_CFLAGS_AVX2
AND DEFINED BLAKE3_CFLAGS_AVX512)
set(BLAKE3_SIMD_TYPE "x86-intrinsics" CACHE STRING "the SIMD acceleration type to use")

elseif((CMAKE_SYSTEM_PROCESSOR IN_LIST BLAKE3_ARMv8_NAMES
OR ANDROID_ABI STREQUAL "armeabi-v7a"
OR BLAKE3_USE_NEON_INTRINSICS)
AND (DEFINED BLAKE3_CFLAGS_NEON
OR CMAKE_SIZEOF_VOID_P EQUAL 8))
set(BLAKE3_SIMD_TYPE "neon-intrinsics" CACHE STRING "the SIMD acceleration type to use")

else()
set(BLAKE3_SIMD_TYPE "none" CACHE STRING "the SIMD acceleration type to use")
endif()

mark_as_advanced(BLAKE3_SIMD_TYPE)

add_library(
belahash STATIC
sha256.cc
sha512.cc
sha3.cc
sm3.cc
${BELA_BLAKE3_SOURCES})
blake3/blake3.c
blake3/blake3_dispatch.c
blake3/blake3_portable.c)

# optional SIMD sources
if(BLAKE3_SIMD_TYPE STREQUAL "amd64-asm")
if (NOT DEFINED BLAKE3_AMD64_ASM_SOURCES)
message(FATAL_ERROR "BLAKE3_SIMD_TYPE is set to 'amd64-asm' but no assembly sources are available for the target architecture.")
endif()
set(BLAKE3_SIMD_AMD64_ASM ON)

if(MSVC)
enable_language(ASM_MASM)
endif()

target_sources(belahash PRIVATE ${BLAKE3_AMD64_ASM_SOURCES})

elseif(BLAKE3_SIMD_TYPE STREQUAL "x86-intrinsics")
if (NOT DEFINED BLAKE3_CFLAGS_SSE2
OR NOT DEFINED BLAKE3_CFLAGS_SSE4.1
OR NOT DEFINED BLAKE3_CFLAGS_AVX2
OR NOT DEFINED BLAKE3_CFLAGS_AVX512)
message(FATAL_ERROR "BLAKE3_SIMD_TYPE is set to 'x86-intrinsics' but no compiler flags are available for the target architecture.")
endif()
set(BLAKE3_SIMD_X86_INTRINSICS ON)

target_sources(belahash PRIVATE
blake3/blake3_avx2.c
blake3/blake3_avx512.c
blake3/blake3_sse2.c
blake3/blake3_sse41.c
)
set_source_files_properties(blake3/blake3_avx2.c PROPERTIES COMPILE_FLAGS "${BLAKE3_CFLAGS_AVX2}")
set_source_files_properties(blake3/blake3_avx512.c PROPERTIES COMPILE_FLAGS "${BLAKE3_CFLAGS_AVX512}")
set_source_files_properties(blake3/blake3_sse2.c PROPERTIES COMPILE_FLAGS "${BLAKE3_CFLAGS_SSE2}")
set_source_files_properties(blake3/blake3_sse41.c PROPERTIES COMPILE_FLAGS "${BLAKE3_CFLAGS_SSE4.1}")

elseif(BLAKE3_SIMD_TYPE STREQUAL "neon-intrinsics")
set(BLAKE3_SIMD_NEON_INTRINSICS ON)

target_sources(belahash PRIVATE
blake3/blake3_neon.c
)
target_compile_definitions(belahash PRIVATE
BLAKE3_USE_NEON=1
)

if (DEFINED BLAKE3_CFLAGS_NEON)
set_source_files_properties(blake3/blake3_neon.c PROPERTIES COMPILE_FLAGS "${BLAKE3_CFLAGS_NEON}")
endif()

elseif(BLAKE3_SIMD_TYPE STREQUAL "none")
target_compile_definitions(belahash PRIVATE
BLAKE3_USE_NEON=0
BLAKE3_NO_SSE2
BLAKE3_NO_SSE41
BLAKE3_NO_AVX2
BLAKE3_NO_AVX512
)

else()
message(FATAL_ERROR "BLAKE3_SIMD_TYPE is set to an unknown value: '${BLAKE3_SIMD_TYPE}'")
endif()

target_link_libraries(belahash bela)

Expand Down
2 changes: 1 addition & 1 deletion src/belahash/blake3.lock
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
https://github.com/BLAKE3-team/BLAKE3
fc2f7e4206f016b0cac0593f23a7d5976ce066e6
e81557689b0c80abf312772a1a5f89d1881a9878
2 changes: 1 addition & 1 deletion src/belahash/blake3/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ if (POLICY CMP0102)
endif()

project(libblake3
VERSION 1.5.2
VERSION 1.5.4
DESCRIPTION "BLAKE3 C implementation"
LANGUAGES C ASM
)
Expand Down
73 changes: 73 additions & 0 deletions src/belahash/blake3/CMakePresets.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
{
"version": 3,
"cmakeMinimumRequired": {
"major": 3,
"minor": 22,
"patch": 0
},
"configurePresets": [
{
"name": "base",
"hidden": true,
"binaryDir": "${sourceDir}/build/${presetName}"
},
{
"name": "msvc",
"hidden": true,
"generator": "Visual Studio 17 2022",
"vendor": {
"microsoft.com/VisualStudioSettings/CMake/1.0": {
"hostOS": [
"Windows"
]
}
}
},
{
"name": "x64-windows-msvc",
"inherits": [
"msvc",
"base"
],
"architecture": "x64"
},
{
"name": "x86-windows-msvc",
"inherits": [
"msvc",
"base"
],
"architecture": "Win32"
},
{
"name": "arm64-windows-msvc",
"inherits": [
"msvc",
"base"
],
"architecture": "ARM64"
}
],
"buildPresets": [
{
"name": "x64-windows-msvc-debug",
"configurePreset": "x64-windows-msvc",
"configuration": "Debug"
},
{
"name": "x64-windows-msvc-release",
"configurePreset": "x64-windows-msvc",
"configuration": "RelWithDebInfo"
},
{
"name": "x86-windows-msvc-debug",
"configurePreset": "x86-windows-msvc",
"configuration": "Debug"
},
{
"name": "x86-windows-msvc-release",
"configurePreset": "x86-windows-msvc",
"configuration": "RelWithDebInfo"
}
]
}
34 changes: 20 additions & 14 deletions src/belahash/blake3/blake3.c
Original file line number Diff line number Diff line change
Expand Up @@ -88,24 +88,30 @@ INLINE void output_chaining_value(const output_t *self, uint8_t cv[32]) {

INLINE void output_root_bytes(const output_t *self, uint64_t seek, uint8_t *out,
size_t out_len) {
if (out_len == 0) {
return;
}
uint64_t output_block_counter = seek / 64;
size_t offset_within_block = seek % 64;
uint8_t wide_buf[64];
while (out_len > 0) {
blake3_compress_xof(self->input_cv, self->block, self->block_len,
output_block_counter, self->flags | ROOT, wide_buf);
size_t available_bytes = 64 - offset_within_block;
size_t memcpy_len;
if (out_len > available_bytes) {
memcpy_len = available_bytes;
} else {
memcpy_len = out_len;
}
memcpy(out, wide_buf + offset_within_block, memcpy_len);
out += memcpy_len;
out_len -= memcpy_len;
if(offset_within_block) {
blake3_compress_xof(self->input_cv, self->block, self->block_len, output_block_counter, self->flags | ROOT, wide_buf);
const size_t available_bytes = 64 - offset_within_block;
const size_t bytes = out_len > available_bytes ? available_bytes : out_len;
memcpy(out, wide_buf + offset_within_block, bytes);
out += bytes;
out_len -= bytes;
output_block_counter += 1;
offset_within_block = 0;
}
if(out_len / 64) {
blake3_xof_many(self->input_cv, self->block, self->block_len, output_block_counter, self->flags | ROOT, out, out_len / 64);
}
output_block_counter += out_len / 64;
out += out_len & -64;
out_len -= out_len & -64;
if(out_len) {
blake3_compress_xof(self->input_cv, self->block, self->block_len, output_block_counter, self->flags | ROOT, wide_buf);
memcpy(out, wide_buf, out_len);
}
}

Expand Down
2 changes: 1 addition & 1 deletion src/belahash/blake3/blake3.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
extern "C" {
#endif

#define BLAKE3_VERSION_STRING "1.5.2"
#define BLAKE3_VERSION_STRING "1.5.4"
#define BLAKE3_KEY_LEN 32
#define BLAKE3_OUT_LEN 32
#define BLAKE3_BLOCK_LEN 64
Expand Down
Loading

0 comments on commit d255866

Please sign in to comment.