Skip to content

Commit

Permalink
unordered_{map,set}: Add single-parameter createDeviceObject() and ma…
Browse files Browse the repository at this point in the history
…x_load_factor()
  • Loading branch information
stotko committed Dec 17, 2019
1 parent 6eae5ac commit 2fe5cdb
Show file tree
Hide file tree
Showing 9 changed files with 203 additions and 33 deletions.
2 changes: 1 addition & 1 deletion examples/cuda/container_kernel.cu
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ main()

int* d_input = createDeviceArray<int>(n);
int* d_result = createDeviceArray<int>(n);
stdgpu::unordered_set<int> set = stdgpu::unordered_set<int>::createDeviceObject(1024, n);
stdgpu::unordered_set<int> set = stdgpu::unordered_set<int>::createDeviceObject(n);
stdgpu::vector<int> vec = stdgpu::vector<int>::createDeviceObject(n);

thrust::sequence(stdgpu::device_begin(d_input), stdgpu::device_end(d_input),
Expand Down
2 changes: 1 addition & 1 deletion examples/cuda/thrust_towards_ranges.cu
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ main()

int* d_input = createDeviceArray<int>(n);
int* d_result = createDeviceArray<int>(n);
stdgpu::unordered_set<int> set = stdgpu::unordered_set<int>::createDeviceObject(1024, n);
stdgpu::unordered_set<int> set = stdgpu::unordered_set<int>::createDeviceObject(n);
stdgpu::atomic<int> sum = stdgpu::atomic<int>::createDeviceObject();

thrust::sequence(stdgpu::device_begin(d_input), stdgpu::device_end(d_input),
Expand Down
41 changes: 20 additions & 21 deletions src/stdgpu/impl/unordered_base.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -74,16 +74,12 @@ class unordered_base

/**
* \brief Creates an object of this class on the GPU (device)
* \param[in] bucket_count The number of buckets
* \param[in] excess_count The number of excess entries
* \pre bucket_count > 0
* \pre excess_count > 0
* \pre ispow2(bucket_count)
* \param[in] capacity The capacity of the object
* \pre capacity > 0
* \return A newly created object of this class allocated on the GPU (device)
*/
static unordered_base
createDeviceObject(const index_t& bucket_count,
const index_t& excess_count);
createDeviceObject(const index_t& capacity);

/**
* \brief Destroys the given object of this class on the GPU (device)
Expand Down Expand Up @@ -340,20 +336,6 @@ class unordered_base
STDGPU_HOST_DEVICE index_t
bucket_count() const;

/**
* \brief The excess count
* \return The number of excess entries for handling collisions
*/
STDGPU_HOST_DEVICE index_t
excess_count() const;

/**
* \brief The total count
* \return The total number of entries
*/
STDGPU_HOST_DEVICE index_t
total_count() const;


/**
* \brief The average number of elements per bucket
Expand All @@ -362,6 +344,13 @@ class unordered_base
STDGPU_HOST_DEVICE float
load_factor() const;

/**
* \brief The maximum number of elements per bucket
* \return The maximum number of elements per bucket
*/
STDGPU_HOST_DEVICE float
max_load_factor() const;


/**
* \brief The hash function
Expand Down Expand Up @@ -392,6 +381,16 @@ class unordered_base

mutable vector<index_t> _range_indices = {}; /**< The buffer of range indices */

// Deprecated
static unordered_base
createDeviceObject(const index_t& bucket_count,
const index_t& excess_count);

STDGPU_HOST_DEVICE index_t
excess_count() const;

STDGPU_HOST_DEVICE index_t
total_count() const;

STDGPU_DEVICE_ONLY bool
occupied(const index_t n) const;
Expand Down
87 changes: 87 additions & 0 deletions src/stdgpu/impl/unordered_base_detail.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@
#ifndef STDGPU_UNORDERED_BASE_DETAIL_H
#define STDGPU_UNORDERED_BASE_DETAIL_H

#include <algorithm>
#include <cmath>

#include <thrust/copy.h>
#include <thrust/distance.h>
#include <thrust/execution_policy.h>
Expand All @@ -40,6 +43,44 @@ namespace stdgpu
namespace detail
{

inline index_t
next_pow2(const index_t capacity)
{
STDGPU_EXPECTS(capacity > 0);

index_t result = static_cast<index_t>(1) << static_cast<index_t>(std::ceil(std::log2(capacity)));

STDGPU_ENSURES(result >= capacity);
STDGPU_ENSURES(ispow2<std::size_t>(result));

return result;
}


inline index_t
expected_collisions(const index_t bucket_count,
const index_t capacity)
{
STDGPU_EXPECTS(bucket_count > 0);
STDGPU_EXPECTS(capacity > 0);

float k = static_cast<float>(bucket_count);
float n = static_cast<float>(capacity);
index_t result = static_cast<index_t>(n * (1.0 - std::pow(1.0 - (1.0 / k), n - 1.0)));

STDGPU_ENSURES(result >= 0);

return result;
}


inline float
default_max_load_factor()
{
return 1.0f;
}


template <typename Key, typename Value, typename KeyFromValue, typename Hash, typename KeyEqual>
inline STDGPU_DEVICE_ONLY typename unordered_base<Key, Value, KeyFromValue, Hash, KeyEqual>::iterator
unordered_base<Key, Value, KeyFromValue, Hash, KeyEqual>::begin()
Expand Down Expand Up @@ -891,6 +932,14 @@ unordered_base<Key, Value, KeyFromValue, Hash, KeyEqual>::load_factor() const
}


template <typename Key, typename Value, typename KeyFromValue, typename Hash, typename KeyEqual>
inline STDGPU_HOST_DEVICE float
unordered_base<Key, Value, KeyFromValue, Hash, KeyEqual>::max_load_factor() const
{
return default_max_load_factor();
}


template <typename Key, typename Value, typename KeyFromValue, typename Hash, typename KeyEqual>
inline STDGPU_HOST_DEVICE typename unordered_base<Key, Value, KeyFromValue, Hash, KeyEqual>::hasher
unordered_base<Key, Value, KeyFromValue, Hash, KeyEqual>::hash_function() const
Expand Down Expand Up @@ -935,6 +984,44 @@ unordered_base<Key, Value, KeyFromValue, Hash, KeyEqual>::clear()
}


template <typename Key, typename Value, typename KeyFromValue, typename Hash, typename KeyEqual>
unordered_base<Key, Value, KeyFromValue, Hash, KeyEqual>
unordered_base<Key, Value, KeyFromValue, Hash, KeyEqual>::createDeviceObject(const index_t& capacity)
{
STDGPU_EXPECTS(capacity > 0);

// bucket count depends on default max load factor
index_t bucket_count = next_pow2(std::ceil(static_cast<float>(capacity) / default_max_load_factor()));

// excess count is estimated by the expected collision count and conservatively lowered since entries falling into regular buckets are already included here
index_t excess_count = std::max<index_t>(1, expected_collisions(bucket_count, capacity) * 2 / 3);

index_t total_count = bucket_count + excess_count;

unordered_base<Key, Value, KeyFromValue, Hash, KeyEqual> result;
result._bucket_count = bucket_count;
result._excess_count = excess_count;
result._values = createDeviceArray<value_type>(total_count, value_type());
result._offsets = createDeviceArray<index_t>(total_count, 0);
result._occupied = bitset::createDeviceObject(total_count);
result._occupied_count = atomic<int>::createDeviceObject();
result._locks = mutex_array::createDeviceObject(total_count);
result._excess_list_positions = vector<index_t>::createDeviceObject(excess_count);
result._key_from_value = key_from_value();
result._hash = hasher();
result._key_equal = key_equal();

result._range_indices = vector<index_t>::createDeviceObject(total_count);

thrust::copy(thrust::device,
thrust::counting_iterator<index_t>(bucket_count), thrust::counting_iterator<index_t>(bucket_count + excess_count),
stdgpu::back_inserter(result._excess_list_positions));

STDGPU_ENSURES(result._excess_list_positions.full());

return result;
}


template <typename Key, typename Value, typename KeyFromValue, typename Hash, typename KeyEqual>
unordered_base<Key, Value, KeyFromValue, Hash, KeyEqual>
Expand Down
21 changes: 21 additions & 0 deletions src/stdgpu/impl/unordered_map_detail.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,14 @@ unordered_map<Key, T, Hash, KeyEqual>::load_factor() const
}


template <typename Key, typename T, typename Hash, typename KeyEqual>
inline STDGPU_HOST_DEVICE float
unordered_map<Key, T, Hash, KeyEqual>::max_load_factor() const
{
return _base.max_load_factor();
}


template <typename Key, typename T, typename Hash, typename KeyEqual>
inline STDGPU_HOST_DEVICE typename unordered_map<Key, T, Hash, KeyEqual>::hasher
unordered_map<Key, T, Hash, KeyEqual>::hash_function() const
Expand Down Expand Up @@ -302,6 +310,19 @@ unordered_map<Key, T, Hash, KeyEqual>::clear()



template <typename Key, typename T, typename Hash, typename KeyEqual>
unordered_map<Key, T, Hash, KeyEqual>
unordered_map<Key, T, Hash, KeyEqual>::createDeviceObject(const index_t& capacity)
{
STDGPU_EXPECTS(capacity > 0);

unordered_map<Key, T, Hash, KeyEqual> result;
result._base = detail::unordered_base<key_type, value_type, detail::select1st<value_type>, hasher, key_equal>::createDeviceObject(capacity);

return result;
}


template <typename Key, typename T, typename Hash, typename KeyEqual>
unordered_map<Key, T, Hash, KeyEqual>
unordered_map<Key, T, Hash, KeyEqual>::createDeviceObject(const index_t& bucket_count,
Expand Down
21 changes: 21 additions & 0 deletions src/stdgpu/impl/unordered_set_detail.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,14 @@ unordered_set<Key, Hash, KeyEqual>::load_factor() const
}


template <typename Key, typename Hash, typename KeyEqual>
inline STDGPU_HOST_DEVICE float
unordered_set<Key, Hash, KeyEqual>::max_load_factor() const
{
return _base.max_load_factor();
}


template <typename Key, typename Hash, typename KeyEqual>
inline STDGPU_HOST_DEVICE typename unordered_set<Key, Hash, KeyEqual>::hasher
unordered_set<Key, Hash, KeyEqual>::hash_function() const
Expand Down Expand Up @@ -287,6 +295,19 @@ unordered_set<Key, Hash, KeyEqual>::clear()



template <typename Key, typename Hash, typename KeyEqual>
unordered_set<Key, Hash, KeyEqual>
unordered_set<Key, Hash, KeyEqual>::createDeviceObject(const index_t& capacity)
{
STDGPU_EXPECTS(capacity > 0);

unordered_set<Key, Hash, KeyEqual> result;
result._base = detail::unordered_base<key_type, value_type, thrust::identity<key_type>, hasher, key_equal>::createDeviceObject(capacity);

return result;
}


template <typename Key, typename Hash, typename KeyEqual>
unordered_set<Key, Hash, KeyEqual>
unordered_set<Key, Hash, KeyEqual>::createDeviceObject(const index_t& bucket_count,
Expand Down
23 changes: 22 additions & 1 deletion src/stdgpu/unordered_map.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ class unordered_map


/**
* \deprecated Replaced by createDeviceObject(const index_t& capacity)
* \brief Creates an object of this class on the GPU (device)
* \param[in] bucket_count The number of buckets
* \param[in] excess_count The number of excess entries
Expand All @@ -104,10 +105,20 @@ class unordered_map
* \pre ispow2(bucket_count)
* \return A newly created object of this class allocated on the GPU (device)
*/
[[deprecated("Replaced by createDeviceObject(const index_t& capacity)")]]
static unordered_map
createDeviceObject(const index_t& bucket_count,
const index_t& excess_count);

/**
* \brief Creates an object of this class on the GPU (device)
* \param[in] capacity The capacity of the object
* \pre capacity > 0
* \return A newly created object of this class allocated on the GPU (device)
*/
static unordered_map
createDeviceObject(const index_t& capacity);

/**
* \brief Destroys the given object of this class on the GPU (device)
* \param[in] device_object The object allocated on the GPU (device)
Expand Down Expand Up @@ -333,7 +344,6 @@ class unordered_map
/**
* \brief The maximum size
* \return The maximum size
* \note Equivalent to total_count()
*/
STDGPU_HOST_DEVICE index_t
max_size() const;
Expand All @@ -346,16 +356,20 @@ class unordered_map
bucket_count() const;

/**
* \deprecated Implementation detail of deprecated createDeviceObject(const index_t& bucket_count, const index_t& excess_count) function
* \brief The excess count
* \return The number of excess entries for handling collisions
*/
[[deprecated("Implementation detail of deprecated createDeviceObject(const index_t& bucket_count, const index_t& excess_count) function")]]
STDGPU_HOST_DEVICE index_t
excess_count() const;

/**
* \deprecated Implementation detail of deprecated createDeviceObject(const index_t& bucket_count, const index_t& excess_count) function
* \brief The total count
* \return The total number of entries
*/
[[deprecated("Implementation detail of deprecated createDeviceObject(const index_t& bucket_count, const index_t& excess_count) function")]]
STDGPU_HOST_DEVICE index_t
total_count() const;

Expand All @@ -367,6 +381,13 @@ class unordered_map
STDGPU_HOST_DEVICE float
load_factor() const;

/**
* \brief The maximum number of elements per bucket
* \return The maximum number of elements per bucket
*/
STDGPU_HOST_DEVICE float
max_load_factor() const;


/**
* \brief The hash function
Expand Down
Loading

0 comments on commit 2fe5cdb

Please sign in to comment.