Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

bit: Add ceil2 and floor2 functions #105

Merged
merged 4 commits into from
Feb 18, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions src/stdgpu/bit.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,24 @@ template <typename T, typename = typename std::enable_if<std::is_unsigned<T>::va
STDGPU_HOST_DEVICE bool
ispow2(const T number);

/**
* \brief Computes the smallest power of two which is larger or equal than the given number
* \param[in] number A number
* \return The smallest power of two which is larger than the given number
*/
template <typename T, typename = typename std::enable_if<std::is_unsigned<T>::value>::type>
STDGPU_HOST_DEVICE T
ceil2(const T number);

/**
* \brief Computes the largest power of two which is smaller or equal than the given number
* \param[in] number A number
* \return The largest power of two which is smaller than the given number
*/
template <typename T, typename = typename std::enable_if<std::is_unsigned<T>::value>::type>
STDGPU_HOST_DEVICE T
floor2(const T number);

/**
* \brief Computes the modulus of the given number and a power of two divider
* \param[in] number A number
Expand Down
44 changes: 44 additions & 0 deletions src/stdgpu/impl/bit_detail.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,50 @@ ispow2(const T number)
}


template <typename T, typename>
STDGPU_HOST_DEVICE T
ceil2(const T number)
{
T result = number;

// Special case zero
result += (result == 0);

result--;
for (index_t i = 0; i < stdgpu::numeric_limits<T>::digits; ++i)
{
result |= result >> i;
}
result++;

// If result is not representable in T, we have undefined behavior
// --> In this case, we have an overflow to 0
STDGPU_ENSURES(result == 0 || ispow2(result));

return result;
}


template <typename T, typename>
STDGPU_HOST_DEVICE T
floor2(const T number)
{
// Special case zero
if (number == 0) return 0;

T result = number;
for (index_t i = 0; i < stdgpu::numeric_limits<T>::digits; ++i)
{
result |= result >> i;
}
result &= ~(result >> 1);

STDGPU_ENSURES(ispow2(result));

return result;
}


template <typename T, typename>
STDGPU_HOST_DEVICE T
mod2(const T number,
Expand Down
16 changes: 1 addition & 15 deletions src/stdgpu/impl/unordered_base_detail.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -43,20 +43,6 @@ namespace stdgpu
namespace detail
{

inline index_t
next_pow2(const index_t capacity)
{
STDGPU_EXPECTS(capacity > 0);

index_t result = static_cast<index_t>(1) << static_cast<index_t>(std::ceil(std::log2(capacity)));

STDGPU_ENSURES(result >= capacity);
STDGPU_ENSURES(ispow2<std::size_t>(static_cast<std::size_t>(result)));

return result;
}


inline index_t
expected_collisions(const index_t bucket_count,
const index_t capacity)
Expand Down Expand Up @@ -1038,7 +1024,7 @@ unordered_base<Key, Value, KeyFromValue, Hash, KeyEqual>::createDeviceObject(con
STDGPU_EXPECTS(capacity > 0);

// bucket count depends on default max load factor
index_t bucket_count = next_pow2(static_cast<index_t>(std::ceil(static_cast<float>(capacity) / default_max_load_factor())));
index_t bucket_count = static_cast<index_t>(stdgpu::ceil2(static_cast<std::size_t>(std::ceil(static_cast<float>(capacity) / default_max_load_factor()))));

// excess count is estimated by the expected collision count and conservatively lowered since entries falling into regular buckets are already included here
index_t excess_count = std::max<index_t>(1, expected_collisions(bucket_count, capacity) * 2 / 3);
Expand Down
135 changes: 110 additions & 25 deletions test/stdgpu/bit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,14 @@ template
STDGPU_HOST_DEVICE bool
ispow2<unsigned int>(const unsigned int);

template
STDGPU_HOST_DEVICE unsigned int
ceil2<unsigned int>(const unsigned int);

template
STDGPU_HOST_DEVICE unsigned int
floor2<unsigned int>(const unsigned int);

template
STDGPU_HOST_DEVICE unsigned int
mod2<unsigned int>(const unsigned int,
Expand Down Expand Up @@ -80,17 +88,17 @@ popcount<unsigned long long int>(const unsigned long long int);

void
thread_ispow2_random(const stdgpu::index_t iterations,
const std::unordered_set<size_t>& pow2_list)
const std::unordered_set<std::size_t>& pow2_list)
{
// Generate true random numbers
size_t seed = test_utils::random_thread_seed();
std::size_t seed = test_utils::random_thread_seed();

std::default_random_engine rng(seed);
std::uniform_int_distribution<size_t> dist(std::numeric_limits<size_t>::lowest(), std::numeric_limits<size_t>::max());
std::uniform_int_distribution<std::size_t> dist(std::numeric_limits<std::size_t>::lowest(), std::numeric_limits<std::size_t>::max());

for (stdgpu::index_t i = 0; i < iterations; ++i)
{
size_t number = dist(rng);
std::size_t number = dist(rng);

if (pow2_list.find(number) == pow2_list.end())
{
Expand All @@ -102,10 +110,10 @@ thread_ispow2_random(const stdgpu::index_t iterations,

TEST_F(stdgpu_bit, ispow2)
{
std::unordered_set<size_t> pow2_list;
for (size_t i = 0; i < 63; ++i)
std::unordered_set<std::size_t> pow2_list;
for (std::size_t i = 0; i < std::numeric_limits<std::size_t>::digits; ++i)
{
size_t pow2_i = static_cast<size_t>(1) << i;
std::size_t pow2_i = static_cast<std::size_t>(1) << i;

ASSERT_TRUE(stdgpu::ispow2(pow2_i));

Expand All @@ -121,27 +129,104 @@ TEST_F(stdgpu_bit, ispow2)
}


void
thread_ceil2_random(const stdgpu::index_t iterations)
{
// Generate true random numbers
std::size_t seed = test_utils::random_thread_seed();

std::default_random_engine rng(seed);
std::uniform_int_distribution<std::size_t> dist(std::numeric_limits<std::size_t>::lowest(), std::numeric_limits<std::size_t>::max());

for (stdgpu::index_t i = 0; i < iterations; ++i)
{
std::size_t number = dist(rng);

// result will not be representable, so skip this sample
if (number > static_cast<std::size_t>(1) << (std::numeric_limits<std::size_t>::digits - 1)) continue;

std::size_t result = stdgpu::ceil2(number);

EXPECT_TRUE(stdgpu::ispow2(result));
EXPECT_GE(result, number);
EXPECT_LT(result / 2, number);
}
}


TEST_F(stdgpu_bit, ceil2_random)
{
stdgpu::index_t iterations_per_thread = static_cast<stdgpu::index_t>(pow(2, 19));

test_utils::for_each_concurrent_thread(&thread_ceil2_random,
iterations_per_thread);
}


TEST_F(stdgpu_bit, ceil2_zero)
{
EXPECT_EQ(stdgpu::ceil2(static_cast<std::size_t>(0)), static_cast<std::size_t>(1));
}


void
thread_floor2_random(const stdgpu::index_t iterations)
{
// Generate true random numbers
std::size_t seed = test_utils::random_thread_seed();

std::default_random_engine rng(seed);
std::uniform_int_distribution<std::size_t> dist(std::numeric_limits<std::size_t>::lowest(), std::numeric_limits<std::size_t>::max());

for (stdgpu::index_t i = 0; i < iterations; ++i)
{
std::size_t number = dist(rng);

std::size_t result = stdgpu::floor2(number);

EXPECT_TRUE(stdgpu::ispow2(result));
EXPECT_LE(result, number);
EXPECT_GT(result, number / 2);
}
}


TEST_F(stdgpu_bit, floor2_random)
{
stdgpu::index_t iterations_per_thread = static_cast<stdgpu::index_t>(pow(2, 19));

test_utils::for_each_concurrent_thread(&thread_floor2_random,
iterations_per_thread);
}


TEST_F(stdgpu_bit, floor2_zero)
{
EXPECT_EQ(stdgpu::floor2(static_cast<std::size_t>(0)), static_cast<std::size_t>(0));
}


void
thread_mod2_random(const stdgpu::index_t iterations,
const size_t divider)
const std::size_t divider)
{
// Generate true random numbers
size_t seed = test_utils::random_thread_seed();
std::size_t seed = test_utils::random_thread_seed();

std::default_random_engine rng(seed);
std::uniform_int_distribution<size_t> dist(std::numeric_limits<size_t>::lowest(), std::numeric_limits<size_t>::max());
std::uniform_int_distribution<std::size_t> dist(std::numeric_limits<std::size_t>::lowest(), std::numeric_limits<std::size_t>::max());

for (stdgpu::index_t i = 0; i < iterations; ++i)
{
size_t number = dist(rng);
std::size_t number = dist(rng);
EXPECT_EQ(stdgpu::mod2(number, divider), number % divider);
}
}


TEST_F(stdgpu_bit, mod2_random)
{
const size_t divider = static_cast<size_t>(pow(2, 21));
const std::size_t divider = static_cast<std::size_t>(pow(2, 21));
stdgpu::index_t iterations_per_thread = static_cast<stdgpu::index_t>(pow(2, 19));

test_utils::for_each_concurrent_thread(&thread_mod2_random,
Expand All @@ -152,49 +237,49 @@ TEST_F(stdgpu_bit, mod2_random)

TEST_F(stdgpu_bit, mod2_one_positive)
{
size_t number = 42;
size_t divider = 1;
EXPECT_EQ(stdgpu::mod2(number, divider), static_cast<size_t>(0));
std::size_t number = 42;
std::size_t divider = 1;
EXPECT_EQ(stdgpu::mod2(number, divider), static_cast<std::size_t>(0));
}


TEST_F(stdgpu_bit, mod2_one_zero)
{
size_t number = 0;
size_t divider = 1;
EXPECT_EQ(stdgpu::mod2(number, divider), static_cast<size_t>(0));
std::size_t number = 0;
std::size_t divider = 1;
EXPECT_EQ(stdgpu::mod2(number, divider), static_cast<std::size_t>(0));
}


TEST_F(stdgpu_bit, log2pow2)
{
for (size_t i = 0; i < std::numeric_limits<size_t>::digits; ++i)
for (std::size_t i = 0; i < std::numeric_limits<std::size_t>::digits; ++i)
{
EXPECT_EQ(stdgpu::log2pow2(static_cast<size_t>(1) << i), static_cast<size_t>(i));
EXPECT_EQ(stdgpu::log2pow2(static_cast<std::size_t>(1) << i), static_cast<std::size_t>(i));
}
}


TEST_F(stdgpu_bit, popcount_zero)
{
EXPECT_EQ(stdgpu::popcount(static_cast<size_t>(0)), 0);
EXPECT_EQ(stdgpu::popcount(static_cast<std::size_t>(0)), 0);
}


TEST_F(stdgpu_bit, popcount_pow2)
{
for (size_t i = 0; i < std::numeric_limits<size_t>::digits; ++i)
for (std::size_t i = 0; i < std::numeric_limits<std::size_t>::digits; ++i)
{
EXPECT_EQ(stdgpu::popcount(static_cast<size_t>(1) << i), 1);
EXPECT_EQ(stdgpu::popcount(static_cast<std::size_t>(1) << i), 1);
}
}


TEST_F(stdgpu_bit, popcount_pow2m1)
{
for (size_t i = 0; i < std::numeric_limits<size_t>::digits; ++i)
for (std::size_t i = 0; i < std::numeric_limits<std::size_t>::digits; ++i)
{
EXPECT_EQ(stdgpu::popcount((static_cast<size_t>(1) << i) - 1), i);
EXPECT_EQ(stdgpu::popcount((static_cast<std::size_t>(1) << i) - 1), i);
}
}