Skip to content

Commit

Permalink
Use BLAKE3 from LLVM (11% faster)
Browse files Browse the repository at this point in the history
Results are the same:

  upstream: SELECT hex(BLAKE3('bar')): F2E897EED7D206CD855D441598FA521ABC75AA96953E97C030C9612C30C1293D
  llvm:     SELECT hex(BLAKE3('bar')): F2E897EED7D206CD855D441598FA521ABC75AA96953E97C030C9612C30C1293D

Query for benchmark:

    SELECT ignore(BLAKE3(materialize('Lorem ipsum dolor sit amet, consectetur adipiscing elit'))) FROM numbers(1000000000) FORMAT `Null`

  upstream           : Elapsed: 2.494 sec. Processed 31.13 million rows, 249.08 MB (12.48 million rows/s., 99.86 MB/s.)
  upstream + rust lto:                                                             ~14.00 million rows/s
  llvm               : Elapsed: 3.053 sec. Processed 43.24 million rows, 345.88 MB (14.16 million rows/s., 113.28 MB/s.)

And note, that now, integrating_rust_libraries.md became deprecated.

P.S. LLVM implementation had been choosen over Rust + LTO, because there
are issues with linking multiple Rust libraries together with LTO:
- https://alanwu.space/post/symbol-hygiene/
- rust-lang/rust#44322
  • Loading branch information
azat committed May 6, 2023
1 parent 3f0c360 commit d087b30
Show file tree
Hide file tree
Showing 8 changed files with 10 additions and 200 deletions.
3 changes: 0 additions & 3 deletions rust/BLAKE3/CMakeLists.txt

This file was deleted.

92 changes: 0 additions & 92 deletions rust/BLAKE3/Cargo.lock

This file was deleted.

13 changes: 0 additions & 13 deletions rust/BLAKE3/Cargo.toml

This file was deleted.

17 changes: 0 additions & 17 deletions rust/BLAKE3/include/blake3.h

This file was deleted.

55 changes: 0 additions & 55 deletions rust/BLAKE3/src/lib.rs

This file was deleted.

1 change: 0 additions & 1 deletion rust/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -67,5 +67,4 @@ function(add_rust_subdirectory src)
VERBATIM)
endfunction()

add_rust_subdirectory (BLAKE3)
add_rust_subdirectory (skim)
25 changes: 9 additions & 16 deletions src/Functions/FunctionsHashing.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#include <farmhash.h>
#include <metrohash.h>
#include <wyhash.h>
#include <llvm/ADT/StringRef.h>
#include <MurmurHash2.h>
#include <MurmurHash3.h>

Expand All @@ -15,10 +16,6 @@
#endif
#include <xxhash.h>

#if USE_BLAKE3
# include <blake3.h>
#endif

#include <Common/SipHash.h>
#include <Common/typeid_cast.h>
#include <Common/safe_cast.h>
Expand Down Expand Up @@ -57,6 +54,10 @@
#include <base/bit_cast.h>
#include <base/unaligned.h>

#if USE_BLAKE3
#include <llvm-c/blake3.h>
#endif

namespace DB
{

Expand Down Expand Up @@ -819,18 +820,10 @@ struct ImplBLAKE3
#else
static void apply(const char * begin, const size_t size, unsigned char* out_char_data)
{
#if defined(MEMORY_SANITIZER)
auto err_msg = blake3_apply_shim_msan_compat(begin, safe_cast<uint32_t>(size), out_char_data);
__msan_unpoison(out_char_data, length);
#else
auto err_msg = blake3_apply_shim(begin, safe_cast<uint32_t>(size), out_char_data);
#endif
if (err_msg != nullptr)
{
auto err_st = std::string(err_msg);
blake3_free_char_pointer(err_msg);
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Function returned error message: {}", err_st);
}
llvm_blake3_hasher hasher;
llvm_blake3_hasher_init(&hasher);
llvm_blake3_hasher_update(&hasher, begin, size);
llvm_blake3_hasher_finalize(&hasher, out_char_data, length);
}
#endif
};
Expand Down
4 changes: 1 addition & 3 deletions src/configure_config.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,6 @@ endif()
if (TARGET ch_contrib::rdkafka)
set(USE_RDKAFKA 1)
endif()
if (TARGET ch_rust::blake3)
set(USE_BLAKE3 1)
endif()
if (TARGET ch_rust::skim)
set(USE_SKIM 1)
endif()
Expand Down Expand Up @@ -99,6 +96,7 @@ if (TARGET ch_contrib::ulid)
endif()
if (TARGET ch_contrib::llvm)
set(USE_EMBEDDED_COMPILER 1)
set(USE_BLAKE3 1)
endif()
if (TARGET ch_contrib::unixodbc)
set(USE_ODBC 1)
Expand Down

0 comments on commit d087b30

Please sign in to comment.