Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update 3rd party libs #231

Merged
2 commits merged into from
Mar 14, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 16 additions & 21 deletions 3rd/datasketches/common/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,29 +17,24 @@

add_library(common INTERFACE)

if (BUILD_TESTS)
add_subdirectory(test)
endif()

target_include_directories(common
INTERFACE
$<INSTALL_INTERFACE:$<INSTALL_PREFIX>/include>
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
)
INTERFACE
$<INSTALL_INTERFACE:$<INSTALL_PREFIX>/include>
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
)

target_compile_features(common INTERFACE cxx_std_11)

target_sources(common
INTERFACE
${CMAKE_CURRENT_SOURCE_DIR}/include/common_defs.hpp
${CMAKE_CURRENT_SOURCE_DIR}/include/memory_operations.hpp
${CMAKE_CURRENT_SOURCE_DIR}/include/MurmurHash3.h
${CMAKE_CURRENT_SOURCE_DIR}/include/serde.hpp
${CMAKE_CURRENT_SOURCE_DIR}/include/count_zeros.hpp
${CMAKE_CURRENT_SOURCE_DIR}/include/inv_pow2_table.hpp
${CMAKE_CURRENT_SOURCE_DIR}/include/binomial_bounds.hpp
${CMAKE_CURRENT_SOURCE_DIR}/include/conditional_back_inserter.hpp
${CMAKE_CURRENT_SOURCE_DIR}/include/conditional_forward.hpp
${CMAKE_CURRENT_SOURCE_DIR}/include/ceiling_power_of_2.hpp
)

INTERFACE
${CMAKE_CURRENT_SOURCE_DIR}/include/common_defs.hpp
${CMAKE_CURRENT_SOURCE_DIR}/include/memory_operations.hpp
${CMAKE_CURRENT_SOURCE_DIR}/include/MurmurHash3.h
${CMAKE_CURRENT_SOURCE_DIR}/include/serde.hpp
${CMAKE_CURRENT_SOURCE_DIR}/include/count_zeros.hpp
${CMAKE_CURRENT_SOURCE_DIR}/include/inv_pow2_table.hpp
${CMAKE_CURRENT_SOURCE_DIR}/include/binomial_bounds.hpp
${CMAKE_CURRENT_SOURCE_DIR}/include/conditional_back_inserter.hpp
${CMAKE_CURRENT_SOURCE_DIR}/include/conditional_forward.hpp
${CMAKE_CURRENT_SOURCE_DIR}/include/ceiling_power_of_2.hpp
)
25 changes: 18 additions & 7 deletions 3rd/datasketches/common/include/MurmurHash3.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
// * Changed input seed in MurmurHash3_x64_128 to uint64_t
// * Define and use HashState reference to return result
// * Made entire hash function defined inline
// * Added compute_seed_hash
//-----------------------------------------------------------------------------
// MurmurHash3 was written by Austin Appleby, and is placed in the public
// domain. The author hereby disclaims copyright to this source code.
Expand All @@ -15,6 +16,8 @@
#ifndef _MURMURHASH3_H_
#define _MURMURHASH3_H_

#include <cstring>

//-----------------------------------------------------------------------------
// Platform-specific functions and macros

Expand Down Expand Up @@ -75,9 +78,11 @@ typedef struct {
// Block read - if your platform needs to do endian-swapping or can only
// handle aligned reads, do the conversion here

FORCE_INLINE uint64_t getblock64 ( const uint64_t * p, int i )
FORCE_INLINE uint64_t getblock64 ( const uint64_t * p, size_t i )
{
return p[i];
uint64_t res;
memcpy(&res, p + i, sizeof(res));
return res;
}

//-----------------------------------------------------------------------------
Expand All @@ -94,7 +99,7 @@ FORCE_INLINE uint64_t fmix64 ( uint64_t k )
return k;
}

FORCE_INLINE void MurmurHash3_x64_128(const void* key, int lenBytes, uint64_t seed, HashState& out) {
FORCE_INLINE void MurmurHash3_x64_128(const void* key, size_t lenBytes, uint64_t seed, HashState& out) {
static const uint64_t c1 = BIG_CONSTANT(0x87c37b91114253d5);
static const uint64_t c2 = BIG_CONSTANT(0x4cf5ad432745937f);

Expand All @@ -105,13 +110,13 @@ FORCE_INLINE void MurmurHash3_x64_128(const void* key, int lenBytes, uint64_t se

// Number of full 128-bit blocks of 16 bytes.
// Possible exclusion of a remainder of up to 15 bytes.
const int nblocks = lenBytes >> 4; // bytes / 16
const size_t nblocks = lenBytes >> 4; // bytes / 16

// Process the 128-bit blocks (the body) into the hash
const uint64_t* blocks = (const uint64_t*)(data);
for (int i = 0; i < nblocks; ++i) { // 16 bytes per block
uint64_t k1 = getblock64(blocks,i*2+0);
uint64_t k2 = getblock64(blocks,i*2+1);
for (size_t i = 0; i < nblocks; ++i) { // 16 bytes per block
uint64_t k1 = getblock64(blocks, i * 2 + 0);
uint64_t k2 = getblock64(blocks, i * 2 + 1);

k1 *= c1; k1 = ROTL64(k1,31); k1 *= c2; out.h1 ^= k1;
out.h1 = ROTL64(out.h1,27);
Expand Down Expand Up @@ -170,4 +175,10 @@ FORCE_INLINE void MurmurHash3_x64_128(const void* key, int lenBytes, uint64_t se

//-----------------------------------------------------------------------------

FORCE_INLINE uint16_t compute_seed_hash(uint64_t seed) {
HashState hashes;
MurmurHash3_x64_128(&seed, sizeof(seed), 0, hashes);
return static_cast<uint16_t>(hashes.h1 & 0xffff);
}

#endif // _MURMURHASH3_H_
16 changes: 8 additions & 8 deletions 3rd/datasketches/common/include/binomial_bounds.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -381,7 +381,7 @@ class binomial_bounds {
// The following computes an approximation to the lower bound of a Frequentist
// confidence interval based on the tails of the Binomial distribution.
static double compute_approx_binomial_lower_bound(unsigned long long num_samples, double theta, unsigned num_std_devs) {
if (theta == 1) return num_samples;
if (theta == 1) return static_cast<double>(num_samples);
if (num_samples == 0) return 0;
if (num_samples == 1) {
const double delta = delta_of_num_std_devs[num_std_devs];
Expand All @@ -395,24 +395,24 @@ class binomial_bounds {
}
// at this point we know 2 <= num_samples <= 120
if (theta > (1 - 1e-5)) { // empirically-determined threshold
return num_samples;
return static_cast<double>(num_samples);
}
if (theta < (num_samples / 360.0)) { // empirically-determined threshold
// here we use the Gaussian approximation, but with a modified num_std_devs
const unsigned index = 3 * num_samples + (num_std_devs - 1);
const unsigned index = 3 * static_cast<unsigned>(num_samples) + (num_std_devs - 1);
const double raw_lb = cont_classic_lb(num_samples, theta, lb_equiv_table[index]);
return raw_lb - 0.5; // fake round down
}
// This is the most difficult range to approximate; we will compute an "exact" LB.
// We know that est <= 360, so specialNStar() shouldn't be ridiculously slow.
const double delta = delta_of_num_std_devs[num_std_devs];
return special_n_star(num_samples, theta, delta); // no need to round
return static_cast<double>(special_n_star(num_samples, theta, delta)); // no need to round
}

// The following computes an approximation to the upper bound of a Frequentist
// confidence interval based on the tails of the Binomial distribution.
static double compute_approx_binomial_upper_bound(unsigned long long num_samples, double theta, unsigned num_std_devs) {
if (theta == 1) return num_samples;
if (theta == 1) return static_cast<double>(num_samples);
if (num_samples == 0) {
const double delta = delta_of_num_std_devs[num_std_devs];
const double raw_ub = std::log(delta) / std::log(1 - theta);
Expand All @@ -425,18 +425,18 @@ class binomial_bounds {
}
// at this point we know 2 <= num_samples <= 120
if (theta > (1 - 1e-5)) { // empirically-determined threshold
return num_samples + 1;
return static_cast<double>(num_samples + 1);
}
if (theta < (num_samples / 360.0)) { // empirically-determined threshold
// here we use the Gaussian approximation, but with a modified num_std_devs
const unsigned index = 3 * num_samples + (num_std_devs - 1);
const unsigned index = 3 * static_cast<unsigned>(num_samples) + (num_std_devs - 1);
const double raw_ub = cont_classic_ub(num_samples, theta, ub_equiv_table[index]);
return raw_ub + 0.5; // fake round up
}
// This is the most difficult range to approximate; we will compute an "exact" UB.
// We know that est <= 360, so specialNPrimeF() shouldn't be ridiculously slow.
const double delta = delta_of_num_std_devs[num_std_devs];
return special_n_prime_f(num_samples, theta, delta); // no need to round
return static_cast<double>(special_n_prime_f(num_samples, theta, delta)); // no need to round
}

static void check_theta(double theta) {
Expand Down
27 changes: 12 additions & 15 deletions 3rd/datasketches/common/include/bounds_binomial_proportions.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -110,14 +110,14 @@ class bounds_binomial_proportions { // confidence intervals for binomial proport
* @return the lower bound of the approximate Clopper-Pearson confidence interval for the
* unknown success probability.
*/
static inline double approximate_lower_bound_on_p(long n, long k, double num_std_devs) {
static inline double approximate_lower_bound_on_p(uint64_t n, uint64_t k, double num_std_devs) {
check_inputs(n, k);
if (n == 0) { return 0.0; } // the coin was never flipped, so we know nothing
else if (k == 0) { return 0.0; }
else if (k == 1) { return (exact_lower_bound_on_p_k_eq_1(n, delta_of_num_stdevs(num_std_devs))); }
else if (k == n) { return (exact_lower_bound_on_p_k_eq_n(n, delta_of_num_stdevs(num_std_devs))); }
else {
double x = abramowitz_stegun_formula_26p5p22((n - k) + 1, k, (-1.0 * num_std_devs));
double x = abramowitz_stegun_formula_26p5p22((n - k) + 1.0, static_cast<double>(k), (-1.0 * num_std_devs));
return (1.0 - x); // which is p
}
}
Expand Down Expand Up @@ -145,18 +145,18 @@ class bounds_binomial_proportions { // confidence intervals for binomial proport
* @return the upper bound of the approximate Clopper-Pearson confidence interval for the
* unknown success probability.
*/
static inline double approximate_upper_bound_on_p(long n, long k, double num_std_devs) {
static inline double approximate_upper_bound_on_p(uint64_t n, uint64_t k, double num_std_devs) {
check_inputs(n, k);
if (n == 0) { return 1.0; } // the coin was never flipped, so we know nothing
else if (k == n) { return 1.0; }
else if (k == (n - 1)) {
return (exactU_upper_bound_on_p_k_eq_minusone(n, delta_of_num_stdevs(num_std_devs)));
return (exact_upper_bound_on_p_k_eq_minusone(n, delta_of_num_stdevs(num_std_devs)));
}
else if (k == 0) {
return (exact_upper_bound_on_p_k_eq_zero(n, delta_of_num_stdevs(num_std_devs)));
}
else {
double x = abramowitz_stegun_formula_26p5p22(n - k, k + 1, num_std_devs);
double x = abramowitz_stegun_formula_26p5p22(static_cast<double>(n - k), k + 1.0, num_std_devs);
return (1.0 - x); // which is p
}
}
Expand All @@ -167,7 +167,7 @@ class bounds_binomial_proportions { // confidence intervals for binomial proport
* @param k is the number of successes. Must be non-negative, and cannot exceed n.
* @return the estimate of the unknown binomial proportion.
*/
static inline double estimate_unknown_p(long n, long k) {
static inline double estimate_unknown_p(uint64_t n, uint64_t k) {
check_inputs(n, k);
if (n == 0) { return 0.5; } // the coin was never flipped, so we know nothing
else { return ((double) k / (double) n); }
Expand All @@ -193,9 +193,7 @@ class bounds_binomial_proportions { // confidence intervals for binomial proport
}

private:
static inline void check_inputs(long n, long k) {
if (n < 0) { throw std::invalid_argument("N must be non-negative"); }
if (k < 0) { throw std::invalid_argument("K must be non-negative"); }
static inline void check_inputs(uint64_t n, uint64_t k) {
if (k > n) { throw std::invalid_argument("K cannot exceed N"); }
}

Expand Down Expand Up @@ -251,8 +249,7 @@ class bounds_binomial_proportions { // confidence intervals for binomial proport
// and it is worth keeping it that way so that it will always be easy to verify
// that the formula was typed in correctly.

static inline double abramowitz_stegun_formula_26p5p22(double a, double b,
double yp) {
static inline double abramowitz_stegun_formula_26p5p22(double a, double b, double yp) {
const double b2m1 = (2.0 * b) - 1.0;
const double a2m1 = (2.0 * a) - 1.0;
const double lambda = ((yp * yp) - 3.0) / 6.0;
Expand All @@ -268,19 +265,19 @@ class bounds_binomial_proportions { // confidence intervals for binomial proport

// Formulas for some special cases.

static inline double exact_upper_bound_on_p_k_eq_zero(double n, double delta) {
static inline double exact_upper_bound_on_p_k_eq_zero(uint64_t n, double delta) {
return (1.0 - pow(delta, (1.0 / n)));
}

static inline double exact_lower_bound_on_p_k_eq_n(double n, double delta) {
static inline double exact_lower_bound_on_p_k_eq_n(uint64_t n, double delta) {
return (pow(delta, (1.0 / n)));
}

static inline double exact_lower_bound_on_p_k_eq_1(double n, double delta) {
static inline double exact_lower_bound_on_p_k_eq_1(uint64_t n, double delta) {
return (1.0 - pow((1.0 - delta), (1.0 / n)));
}

static inline double exactU_upper_bound_on_p_k_eq_minusone(double n, double delta) {
static inline double exact_upper_bound_on_p_k_eq_minusone(uint64_t n, double delta) {
return (pow((1.0 - delta), (1.0 / n)));
}

Expand Down
26 changes: 26 additions & 0 deletions 3rd/datasketches/common/include/common_defs.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,14 @@
#include <cstdint>
#include <string>
#include <memory>
#include <iostream>

namespace datasketches {

static const uint64_t DEFAULT_SEED = 9001;

enum resize_factor { X1 = 0, X2, X4, X8 };

template<typename A> using AllocChar = typename std::allocator_traits<A>::template rebind_alloc<char>;
template<typename A> using string = std::basic_string<char, std::char_traits<char>, AllocChar<A>>;

Expand All @@ -46,6 +49,29 @@ constexpr uint8_t lg_size_from_count(uint32_t n, double load_factor) {
return log2(n) + ((n > static_cast<uint32_t>((1 << (log2(n) + 1)) * load_factor)) ? 2 : 1);
}

// stream helpers to hide casts
template<typename T>
static inline T read(std::istream& is) {
T value;
is.read(reinterpret_cast<char*>(&value), sizeof(T));
return value;
}

template<typename T>
static inline void read(std::istream& is, T* ptr, size_t size_bytes) {
is.read(reinterpret_cast<char*>(ptr), size_bytes);
}

template<typename T>
static inline void write(std::ostream& os, T& value) {
os.write(reinterpret_cast<const char*>(&value), sizeof(T));
}

template<typename T>
static inline void write(std::ostream& os, const T* ptr, size_t size_bytes) {
os.write(reinterpret_cast<const char*>(ptr), size_bytes);
}

} // namespace

#endif // _COMMON_DEFS_HPP_
28 changes: 20 additions & 8 deletions 3rd/datasketches/common/include/conditional_forward.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,29 +38,41 @@ fwd_type<T1, T2> conditional_forward(T2&& value) {
// Forward container as iterators

template<typename Container>
auto forward_begin(Container&& c) ->
typename std::enable_if<std::is_lvalue_reference<Container>::value, decltype(c.begin())>::type
auto forward_begin(Container&& c) -> typename std::enable_if<
std::is_lvalue_reference<Container>::value ||
std::is_same<typename std::remove_reference<Container>::type::const_iterator, decltype(c.begin())>::value,
decltype(c.begin())
>::type
{
return c.begin();
}

template<typename Container>
auto forward_begin(Container&& c) ->
typename std::enable_if<!std::is_lvalue_reference<Container>::value, decltype(std::make_move_iterator(c.begin()))>::type
auto forward_begin(Container&& c) -> typename std::enable_if<
!std::is_lvalue_reference<Container>::value &&
!std::is_same<typename std::remove_reference<Container>::type::const_iterator, decltype(c.begin())>::value,
decltype(std::make_move_iterator(c.begin()))
>::type
{
return std::make_move_iterator(c.begin());
}

template<typename Container>
auto forward_end(Container&& c) ->
typename std::enable_if<std::is_lvalue_reference<Container>::value, decltype(c.end())>::type
auto forward_end(Container&& c) -> typename std::enable_if<
std::is_lvalue_reference<Container>::value ||
std::is_same<typename std::remove_reference<Container>::type::const_iterator, decltype(c.begin())>::value,
decltype(c.end())
>::type
{
return c.end();
}

template<typename Container>
auto forward_end(Container&& c) ->
typename std::enable_if<!std::is_lvalue_reference<Container>::value, decltype(std::make_move_iterator(c.end()))>::type
auto forward_end(Container&& c) -> typename std::enable_if<
!std::is_lvalue_reference<Container>::value &&
!std::is_same<typename std::remove_reference<Container>::type::const_iterator, decltype(c.begin())>::value,
decltype(std::make_move_iterator(c.end()))
>::type
{
return std::make_move_iterator(c.end());
}
Expand Down
4 changes: 2 additions & 2 deletions 3rd/datasketches/common/include/count_zeros.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ static inline uint8_t count_leading_zeros_in_u64(uint64_t input) {
static inline uint8_t count_trailing_zeros_in_u32(uint32_t input) {
for (int i = 0; i < 4; i++) {
const int byte = input & 0xff;
if (byte != 0) return (i << 3) + byte_trailing_zeros_table[byte];
if (byte != 0) return static_cast<uint8_t>((i << 3) + byte_trailing_zeros_table[byte]);
input >>= 8;
}
return 32;
Expand All @@ -103,7 +103,7 @@ static inline uint8_t count_trailing_zeros_in_u32(uint32_t input) {
static inline uint8_t count_trailing_zeros_in_u64(uint64_t input) {
for (int i = 0; i < 8; i++) {
const int byte = input & 0xff;
if (byte != 0) return (i << 3) + byte_trailing_zeros_table[byte];
if (byte != 0) return static_cast<uint8_t>((i << 3) + byte_trailing_zeros_table[byte]);
input >>= 8;
}
return 64;
Expand Down
Loading