From d087b30cedfad1b68ee15cb927a49cab37cac86e Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 6 May 2023 13:58:10 +0200 Subject: [PATCH] Use BLAKE3 from LLVM (11% faster) Results are the same: upstream: SELECT hex(BLAKE3('bar')): F2E897EED7D206CD855D441598FA521ABC75AA96953E97C030C9612C30C1293D llvm: SELECT hex(BLAKE3('bar')): F2E897EED7D206CD855D441598FA521ABC75AA96953E97C030C9612C30C1293D Query for benchmark: SELECT ignore(BLAKE3(materialize('Lorem ipsum dolor sit amet, consectetur adipiscing elit'))) FROM numbers(1000000000) FORMAT `Null` upstream : Elapsed: 2.494 sec. Processed 31.13 million rows, 249.08 MB (12.48 million rows/s., 99.86 MB/s.) upstream + rust lto: ~14.00 million rows/s llvm : Elapsed: 3.053 sec. Processed 43.24 million rows, 345.88 MB (14.16 million rows/s., 113.28 MB/s.) And note, that now, integrating_rust_libraries.md became deprecated. P.S. LLVM implementation had been choosen over Rust + LTO, because there are issues with linking multiple Rust libraries together with LTO: - https://alanwu.space/post/symbol-hygiene/ - https://github.com/rust-lang/rust/issues/44322 --- rust/BLAKE3/CMakeLists.txt | 3 -- rust/BLAKE3/Cargo.lock | 92 -------------------------------- rust/BLAKE3/Cargo.toml | 13 ----- rust/BLAKE3/include/blake3.h | 17 ------ rust/BLAKE3/src/lib.rs | 55 ------------------- rust/CMakeLists.txt | 1 - src/Functions/FunctionsHashing.h | 25 ++++----- src/configure_config.cmake | 4 +- 8 files changed, 10 insertions(+), 200 deletions(-) delete mode 100644 rust/BLAKE3/CMakeLists.txt delete mode 100644 rust/BLAKE3/Cargo.lock delete mode 100644 rust/BLAKE3/Cargo.toml delete mode 100644 rust/BLAKE3/include/blake3.h delete mode 100644 rust/BLAKE3/src/lib.rs diff --git a/rust/BLAKE3/CMakeLists.txt b/rust/BLAKE3/CMakeLists.txt deleted file mode 100644 index ceb0a647b664..000000000000 --- a/rust/BLAKE3/CMakeLists.txt +++ /dev/null @@ -1,3 +0,0 @@ -clickhouse_import_crate(MANIFEST_PATH Cargo.toml) -target_include_directories(_ch_rust_blake3 INTERFACE include) -add_library(ch_rust::blake3 ALIAS _ch_rust_blake3) diff --git a/rust/BLAKE3/Cargo.lock b/rust/BLAKE3/Cargo.lock deleted file mode 100644 index 9ac60773732f..000000000000 --- a/rust/BLAKE3/Cargo.lock +++ /dev/null @@ -1,92 +0,0 @@ -# This file is automatically @generated by Cargo. -# It is not intended for manual editing. -version = 3 - -[[package]] -name = "_ch_rust_blake3" -version = "0.1.0" -dependencies = [ - "blake3", - "libc", -] - -[[package]] -name = "arrayref" -version = "0.3.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4c527152e37cf757a3f78aae5a06fbeefdb07ccc535c980a3208ee3060dd544" - -[[package]] -name = "arrayvec" -version = "0.7.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8da52d66c7071e2e3fa2a1e5c6d088fec47b593032b254f5e980de8ea54454d6" - -[[package]] -name = "blake3" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "526c210b4520e416420759af363083471656e819a75e831b8d2c9d5a584f2413" -dependencies = [ - "arrayref", - "arrayvec", - "cc", - "cfg-if", - "constant_time_eq", - "digest", -] - -[[package]] -name = "cc" -version = "1.0.73" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2fff2a6927b3bb87f9595d67196a70493f627687a71d87a0d692242c33f58c11" - -[[package]] -name = "cfg-if" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" - -[[package]] -name = "constant_time_eq" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "245097e9a4535ee1e3e3931fcfcd55a796a44c643e8596ff6566d68f09b87bbc" - -[[package]] -name = "digest" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d3dd60d1080a57a05ab032377049e0591415d2b31afd7028356dbf3cc6dcb066" -dependencies = [ - "generic-array", -] - -[[package]] -name = "generic-array" -version = "0.14.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bff49e947297f3312447abdca79f45f4738097cc82b06e72054d2223f601f1b9" -dependencies = [ - "typenum", - "version_check", -] - -[[package]] -name = "libc" -version = "0.2.132" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8371e4e5341c3a96db127eb2465ac681ced4c433e01dd0e938adbef26ba93ba5" - -[[package]] -name = "typenum" -version = "1.15.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dcf81ac59edc17cc8697ff311e8f5ef2d99fcbd9817b34cec66f90b6c3dfd987" - -[[package]] -name = "version_check" -version = "0.9.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" diff --git a/rust/BLAKE3/Cargo.toml b/rust/BLAKE3/Cargo.toml deleted file mode 100644 index eb8f3467424b..000000000000 --- a/rust/BLAKE3/Cargo.toml +++ /dev/null @@ -1,13 +0,0 @@ -[package] -name = "_ch_rust_blake3" -version = "0.1.0" - -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html - -[dependencies] -blake3 = "1.2.0" -libc = "0.2.132" - -[lib] -crate-type = ["staticlib"] - diff --git a/rust/BLAKE3/include/blake3.h b/rust/BLAKE3/include/blake3.h deleted file mode 100644 index 85572506d432..000000000000 --- a/rust/BLAKE3/include/blake3.h +++ /dev/null @@ -1,17 +0,0 @@ -#ifndef BLAKE3_H -#define BLAKE3_H - -#include - - -extern "C" { - -char *blake3_apply_shim(const char *begin, uint32_t _size, uint8_t *out_char_data); - -char *blake3_apply_shim_msan_compat(const char *begin, uint32_t size, uint8_t *out_char_data); - -void blake3_free_char_pointer(char *ptr_to_free); - -} // extern "C" - -#endif /* BLAKE3_H */ diff --git a/rust/BLAKE3/src/lib.rs b/rust/BLAKE3/src/lib.rs deleted file mode 100644 index 2b54787589f4..000000000000 --- a/rust/BLAKE3/src/lib.rs +++ /dev/null @@ -1,55 +0,0 @@ -extern crate blake3; -extern crate libc; - -use std::ffi::{CStr, CString}; -use std::os::raw::c_char; -use std::mem; - -#[no_mangle] -pub unsafe extern "C" fn blake3_apply_shim( - begin: *const c_char, - _size: u32, - out_char_data: *mut u8, -) -> *mut c_char { - if begin.is_null() { - let err_str = CString::new("input was a null pointer").unwrap(); - return err_str.into_raw(); - } - let mut hasher = blake3::Hasher::new(); - let input_bytes = CStr::from_ptr(begin); - let input_res = input_bytes.to_bytes(); - hasher.update(input_res); - let mut reader = hasher.finalize_xof(); - reader.fill(std::slice::from_raw_parts_mut(out_char_data, blake3::OUT_LEN)); - std::ptr::null_mut() -} - -#[no_mangle] -pub unsafe extern "C" fn blake3_apply_shim_msan_compat( - mut begin: *const c_char, - size: u32, - out_char_data: *mut u8, -) -> *mut c_char { - if begin.is_null() { - let err_str = CString::new("input was a null pointer").unwrap(); - return err_str.into_raw(); - } - libc::memset(out_char_data as *mut libc::c_void, 0, mem::size_of::()); - let mut hasher = blake3::Hasher::new(); - let mut vec = Vec::::new(); - for _ in 0..size { - vec.push(*begin as u8); - begin = begin.add(1); - } - let input_res = vec.as_mut_slice(); - hasher.update(input_res); - let mut reader = hasher.finalize_xof(); - reader.fill(std::slice::from_raw_parts_mut(out_char_data, blake3::OUT_LEN)); - std::ptr::null_mut() -} - -// Freeing memory according to docs: https://doc.rust-lang.org/std/ffi/struct.CString.html#method.into_raw -#[no_mangle] -pub unsafe extern "C" fn blake3_free_char_pointer(ptr_to_free: *mut c_char) { - std::mem::drop(CString::from_raw(ptr_to_free)); -} diff --git a/rust/CMakeLists.txt b/rust/CMakeLists.txt index ec2377fce71b..cbe0373da085 100644 --- a/rust/CMakeLists.txt +++ b/rust/CMakeLists.txt @@ -67,5 +67,4 @@ function(add_rust_subdirectory src) VERBATIM) endfunction() -add_rust_subdirectory (BLAKE3) add_rust_subdirectory (skim) diff --git a/src/Functions/FunctionsHashing.h b/src/Functions/FunctionsHashing.h index 034ef868cc7b..1d0f94d22729 100644 --- a/src/Functions/FunctionsHashing.h +++ b/src/Functions/FunctionsHashing.h @@ -4,6 +4,7 @@ #include #include #include +#include #include #include @@ -15,10 +16,6 @@ #endif #include -#if USE_BLAKE3 -# include -#endif - #include #include #include @@ -57,6 +54,10 @@ #include #include +#if USE_BLAKE3 +#include +#endif + namespace DB { @@ -819,18 +820,10 @@ struct ImplBLAKE3 #else static void apply(const char * begin, const size_t size, unsigned char* out_char_data) { - #if defined(MEMORY_SANITIZER) - auto err_msg = blake3_apply_shim_msan_compat(begin, safe_cast(size), out_char_data); - __msan_unpoison(out_char_data, length); - #else - auto err_msg = blake3_apply_shim(begin, safe_cast(size), out_char_data); - #endif - if (err_msg != nullptr) - { - auto err_st = std::string(err_msg); - blake3_free_char_pointer(err_msg); - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Function returned error message: {}", err_st); - } + llvm_blake3_hasher hasher; + llvm_blake3_hasher_init(&hasher); + llvm_blake3_hasher_update(&hasher, begin, size); + llvm_blake3_hasher_finalize(&hasher, out_char_data, length); } #endif }; diff --git a/src/configure_config.cmake b/src/configure_config.cmake index fedc05e1fdcd..2f45d636d489 100644 --- a/src/configure_config.cmake +++ b/src/configure_config.cmake @@ -19,9 +19,6 @@ endif() if (TARGET ch_contrib::rdkafka) set(USE_RDKAFKA 1) endif() -if (TARGET ch_rust::blake3) - set(USE_BLAKE3 1) -endif() if (TARGET ch_rust::skim) set(USE_SKIM 1) endif() @@ -99,6 +96,7 @@ if (TARGET ch_contrib::ulid) endif() if (TARGET ch_contrib::llvm) set(USE_EMBEDDED_COMPILER 1) + set(USE_BLAKE3 1) endif() if (TARGET ch_contrib::unixodbc) set(USE_ODBC 1)