diff --git a/src/care/SortFuser.h b/src/care/SortFuser.h index e6a2923e..884ca823 100644 --- a/src/care/SortFuser.h +++ b/src/care/SortFuser.h @@ -275,7 +275,7 @@ namespace care { // do the unique of the concatenated sort result int outLen; - care::uniqArray(RAJAExec{}, m_concatenated_result, m_total_length, concatenated_out, outLen); + care::uniqArray(RAJAExec{}, reinterpret_cast&>(m_concatenated_result), m_total_length, concatenated_out, outLen); /// determine new offsets by looking for boundaries in max_range host_device_ptr out_offsets(m_num_arrays+1, "out_offsets"); diff --git a/src/care/algorithm_decl.h b/src/care/algorithm_decl.h index a3caf94a..e2751547 100644 --- a/src/care/algorithm_decl.h +++ b/src/care/algorithm_decl.h @@ -346,14 +346,13 @@ void sortArray(RAJADeviceExec, care::host_device_ptr &Array, size_t #endif // defined(CARE_PARALLEL_DEVICE) -// TODO should this have an unused noCopy parameter? template class Accessor = care::CARE_DEFAULT_ACCESSOR> -void uniqArray(RAJA::seq_exec, care::host_device_ptr Array, size_t len, care::host_device_ptr & outArray, int & newLen); +void uniqArray(RAJA::seq_exec, care::host_device_ptr Array, size_t len, care::host_device_ptr & outArray, int & newLen); template class Accessor = care::CARE_DEFAULT_ACCESSOR> int uniqArray(RAJA::seq_exec exec, care::host_device_ptr & Array, size_t len, bool noCopy=false); #ifdef CARE_PARALLEL_DEVICE template class Accessor = care::CARE_DEFAULT_ACCESSOR> -void uniqArray(RAJADeviceExec, care::host_device_ptr Array, size_t len, care::host_device_ptr & outArray, int & outLen, bool noCopy=false); +void uniqArray(RAJADeviceExec, care::host_device_ptr Array, size_t len, care::host_device_ptr & outArray, int & outLen); template class Accessor = care::CARE_DEFAULT_ACCESSOR> int uniqArray(RAJADeviceExec exec, care::host_device_ptr & Array, size_t len, bool noCopy=false); #endif // defined(CARE_PARALLEL_DEVICE) diff --git a/src/care/algorithm_impl.h b/src/care/algorithm_impl.h index aad9fb8c..8399bf1c 100644 --- a/src/care/algorithm_impl.h +++ b/src/care/algorithm_impl.h @@ -449,6 +449,10 @@ CARE_INLINE void IntersectArrays(RAJA::seq_exec exec, * If returnUpperBound is set to true, this will return the * index corresponding to the earliest entry that is greater * than num. + * + * @NOTE: Intentionally implemented this using only the '<' + * operator to follow weak strict ordering semantics. + * ************************************************************************/ template @@ -471,7 +475,7 @@ CARE_HOST_DEVICE CARE_INLINE int BinarySearch(const T *map, const int start, while (khi-klo > 1) { k = (khi+klo) >> 1 ; - if (map[k] == num) { + if (! (map[k] < num) && !(num < map[k])) { if (returnUpperBound) { khi = k+1; klo = k; @@ -481,7 +485,7 @@ CARE_HOST_DEVICE CARE_INLINE int BinarySearch(const T *map, const int start, return k ; } } - else if (map[k] > num) { + else if (num < map[k]) { khi = k ; } else { @@ -491,19 +495,19 @@ CARE_HOST_DEVICE CARE_INLINE int BinarySearch(const T *map, const int start, if (returnUpperBound) { k = klo; // the lower option bounds num - if (map[k] > num) { + if (num < map[k]) { return k; } // the upper option is within the range of the map index set if (khi < start + mapSize) { // Note: fix for last test in TEST(algorithm, binarysearch). This algorithm has failed to pick up the upper // bound above 1 in the array {0, 1, 1, 1, 1, 1, 6}. Having 1 repeated confused the algorithm. - while ((khi < start + mapSize) && (map[khi] == num)) { + while ((khi < start + mapSize) && (!(map[khi] < num) && !(num < map[khi]))) { ++khi; } // the upper option bounds num - if ((khi < start + mapSize) && (map[khi] > num)) { + if ((khi < start + mapSize) && (num < map[khi])) { return khi; } // neither the upper or lower option bound num @@ -514,8 +518,8 @@ CARE_HOST_DEVICE CARE_INLINE int BinarySearch(const T *map, const int start, return -1; } } - - if (map[--k] == num) { + --k; + if (!(map[k] < num) && !(num < map[k])) { return k ; } else { @@ -547,8 +551,8 @@ CARE_HOST_DEVICE CARE_INLINE int BinarySearch(const care::host_device_ptr class Accessor> -CARE_INLINE void uniqArray(RAJADeviceExec, care::host_device_ptr Array, size_t len, - care::host_device_ptr & outArray, int & outLen, bool noCopy) +CARE_INLINE void uniqArray(RAJADeviceExec, care::host_device_ptr Array, size_t len, + care::host_device_ptr & outArray, int & outLen) { care::host_device_ptr uniq(len+1,"uniqArray uniq"); fill_n(uniq, len+1, 0); @@ -582,7 +586,7 @@ CARE_INLINE int uniqArray(RAJADeviceExec exec, care::host_device_ptr tmp; int newLen; - uniqArray(exec, Array, len, tmp, newLen); + uniqArray(exec, Array, len, tmp, newLen); if (noCopy) { Array.free(); Array = tmp; @@ -602,11 +606,11 @@ CARE_INLINE int uniqArray(RAJADeviceExec exec, care::host_device_ptr class Accessor> -CARE_INLINE void uniqArray(RAJA::seq_exec, care::host_device_ptr Array, size_t len, +CARE_INLINE void uniqArray(RAJA::seq_exec, care::host_device_ptr Array, size_t len, care::host_device_ptr & outArray, int & newLen) { - CHAIDataGetter getter {}; - const auto * rawData = getter.getConstRawArrayData(Array); + CHAIDataGetter getter {}; + auto * rawData = getter.getConstRawArrayData(Array); newLen = 0 ; care::host_ptr arrout = nullptr ; outArray = nullptr; diff --git a/src/care/care_inst.h b/src/care/care_inst.h index 527b9298..c276feac 100644 --- a/src/care/care_inst.h +++ b/src/care/care_inst.h @@ -215,14 +215,14 @@ CARE_HOST_DEVICE int BinarySearch(const care::host_device_ptr&, c #ifdef CARE_PARALLEL_DEVICE CARE_EXTERN template CARE_DLL_API -void uniqArray(RAJADeviceExec, care::host_device_ptr, size_t, care::host_device_ptr &, int &, bool) ; +void uniqArray(RAJADeviceExec, care::host_device_ptr, size_t, care::host_device_ptr &, int &) ; CARE_EXTERN template CARE_DLL_API -void uniqArray(RAJADeviceExec, care::host_device_ptr, size_t, care::host_device_ptr &, int &, bool) ; +void uniqArray(RAJADeviceExec, care::host_device_ptr, size_t, care::host_device_ptr &, int &) ; CARE_EXTERN template CARE_DLL_API -void uniqArray(RAJADeviceExec, care::host_device_ptr, size_t, care::host_device_ptr &, int &, bool) ; +void uniqArray(RAJADeviceExec, care::host_device_ptr, size_t, care::host_device_ptr &, int &) ; #if CARE_HAVE_LLNL_GLOBALID CARE_EXTERN template CARE_DLL_API -void uniqArray(RAJADeviceExec, care::host_device_ptr, size_t, care::host_device_ptr &, int &, bool) ; +void uniqArray(RAJADeviceExec, care::host_device_ptr, size_t, care::host_device_ptr &, int &) ; #endif CARE_EXTERN template CARE_DLL_API @@ -237,14 +237,14 @@ int uniqArray(RAJADeviceExec, care::host_device_ptr, size_t, care::host_device_ptr &, int &, bool) ; +void uniqArray(RAJADeviceExec, care::host_device_ptr, size_t, care::host_device_ptr &, int &) ; CARE_EXTERN template CARE_DLL_API -void uniqArray(RAJADeviceExec, care::host_device_ptr, size_t, care::host_device_ptr &, int &, bool) ; +void uniqArray(RAJADeviceExec, care::host_device_ptr, size_t, care::host_device_ptr &, int &) ; CARE_EXTERN template CARE_DLL_API -void uniqArray(RAJADeviceExec, care::host_device_ptr, size_t, care::host_device_ptr &, int &, bool) ; +void uniqArray(RAJADeviceExec, care::host_device_ptr, size_t, care::host_device_ptr &, int &) ; #if CARE_HAVE_LLNL_GLOBALID CARE_EXTERN template CARE_DLL_API -void uniqArray(RAJADeviceExec, care::host_device_ptr, size_t, care::host_device_ptr &, int &, bool) ; +void uniqArray(RAJADeviceExec, care::host_device_ptr, size_t, care::host_device_ptr &, int &) ; #endif CARE_EXTERN template CARE_DLL_API @@ -261,14 +261,14 @@ int uniqArray(RAJADeviceExec, care::host_device_ptr, size_t, care::host_device_ptr &, int &) ; +void uniqArray(RAJA::seq_exec, care::host_device_ptr, size_t, care::host_device_ptr &, int &) ; CARE_EXTERN template CARE_DLL_API -void uniqArray(RAJA::seq_exec, care::host_device_ptr, size_t, care::host_device_ptr &, int &) ; +void uniqArray(RAJA::seq_exec, care::host_device_ptr, size_t, care::host_device_ptr &, int &) ; CARE_EXTERN template CARE_DLL_API -void uniqArray(RAJA::seq_exec, care::host_device_ptr, size_t, care::host_device_ptr &, int &) ; +void uniqArray(RAJA::seq_exec, care::host_device_ptr, size_t, care::host_device_ptr &, int &) ; #if CARE_HAVE_LLNL_GLOBALID CARE_EXTERN template CARE_DLL_API -void uniqArray(RAJA::seq_exec, care::host_device_ptr, size_t, care::host_device_ptr &, int &) ; +void uniqArray(RAJA::seq_exec, care::host_device_ptr, size_t, care::host_device_ptr &, int &) ; #endif CARE_EXTERN template CARE_DLL_API @@ -283,14 +283,14 @@ int uniqArray(RAJA::seq_exec exec, care::host_device_ptr, size_t, care::host_device_ptr &, int &) ; +void uniqArray(RAJA::seq_exec, care::host_device_ptr, size_t, care::host_device_ptr &, int &) ; CARE_EXTERN template CARE_DLL_API -void uniqArray(RAJA::seq_exec, care::host_device_ptr, size_t, care::host_device_ptr &, int &) ; +void uniqArray(RAJA::seq_exec, care::host_device_ptr, size_t, care::host_device_ptr &, int &) ; CARE_EXTERN template CARE_DLL_API -void uniqArray(RAJA::seq_exec, care::host_device_ptr, size_t, care::host_device_ptr &, int &) ; +void uniqArray(RAJA::seq_exec, care::host_device_ptr, size_t, care::host_device_ptr &, int &) ; #if CARE_HAVE_LLNL_GLOBALID CARE_EXTERN template CARE_DLL_API -void uniqArray(RAJA::seq_exec, care::host_device_ptr, size_t, care::host_device_ptr &, int &) ; +void uniqArray(RAJA::seq_exec, care::host_device_ptr, size_t, care::host_device_ptr &, int &) ; #endif CARE_EXTERN template CARE_DLL_API diff --git a/src/care/host_device_map.h b/src/care/host_device_map.h index 35463b83..d22181e4 100644 --- a/src/care/host_device_map.h +++ b/src/care/host_device_map.h @@ -44,6 +44,11 @@ namespace care { { public: host_device_map(size_t max_entries, mapped_type miss_signal); + host_device_map(size_t max_entries); + host_device_map() noexcept; + host_device_map(host_device_map const & other) noexcept; + host_device_map(host_device_map && other) noexcept; + host_device_map& operator=(host_device_map&& other) noexcept; CARE_HOST_DEVICE inline void emplace(key_type key, mapped_type val) const; CARE_HOST_DEVICE inline mapped_type at(key_type key) const; void sort(); @@ -60,8 +65,12 @@ namespace care { template class host_device_map< key_type, mapped_type, RAJA::seq_exec> { public: - // constructor - host_device_map(size_t max_entries, mapped_type miss_signal) : m_map(), m_max_size(max_entries), m_signal(miss_signal) { + // default constructor + host_device_map() noexcept = default; + + // constructor taking max number of entries + host_device_map(size_t max_entries) : host_device_map{} { + m_max_size = max_entries; m_map = new std::map{}; m_size = new int(); *m_size = 0; @@ -71,6 +80,49 @@ namespace care { *m_next_iterator_index = 0; } + // constructor that also takes the miss signal + host_device_map(size_t max_entries, mapped_type miss_signal) : host_device_map{max_entries} { + m_signal = miss_signal; + } + + // copy constructor + host_device_map(host_device_map const & other) noexcept = default; + + // move constructor + host_device_map(host_device_map && other) noexcept { + delete m_map; + delete m_size; + delete m_iterator; + delete m_next_iterator_index; + m_map = other.m_map; + m_size = other.m_size; + m_iterator = other.m_iterator; + m_next_iterator_index = other.m_next_iterator_index; + m_max_size = other.m_max_size; + m_signal = other.m_signal; + other.m_map = nullptr; + other.m_size = nullptr; + other.m_iterator = nullptr; + other.m_next_iterator_index = nullptr; + } + host_device_map & operator=(host_device_map && other) noexcept { + delete m_map; + delete m_size; + delete m_iterator; + delete m_next_iterator_index; + m_map = other.m_map; + m_size = other.m_size; + m_iterator = other.m_iterator; + m_next_iterator_index = other.m_next_iterator_index; + m_max_size = other.m_max_size; + m_signal = other.m_signal; + other.m_map = nullptr; + other.m_size = nullptr; + other.m_iterator = nullptr; + other.m_next_iterator_index = nullptr; + return *this; + } + // emplace a key value pair inline void emplace(key_type key, mapped_type val) const { m_map->emplace(key, val); @@ -141,8 +193,8 @@ namespace care { typename std::map::iterator * m_iterator = nullptr; int * m_next_iterator_index = nullptr; int * m_size = nullptr; - int m_max_size; - mapped_type m_signal; + int m_max_size = 0; + mapped_type m_signal {}; }; #endif // !CARE_ENABLE_GPU_SIMULATION_MODE @@ -156,22 +208,57 @@ namespace care { { public: using int_ptr = care::host_device_ptr; - // constructor - host_device_map(size_t max_entries, mapped_type miss_signal) : m_max_size(max_entries), m_signal(miss_signal), m_gpu_map{max_entries} { - // m_size_ptr will be atomically incremented as elements are emplaced into the map + + // default constructor + host_device_map() noexcept = default; + + // constructor taking max_entries + host_device_map(size_t max_entries) : m_max_size(max_entries), m_signal(0), m_gpu_map{max_entries} { + // m_size_ptr[0] will be atomically incremented as elements are emplaced into the map m_size_ptr = int_ptr(1, "map_size"); // set size to 0 clear(); } + // constructor that also takes the miss signal + host_device_map(size_t max_entries, mapped_type miss_signal) : host_device_map{max_entries} { + m_signal = miss_signal; + } + + // copy constructor + host_device_map(host_device_map const & other) noexcept = default; + + // move constructor + CARE_HOST_DEVICE host_device_map(host_device_map&& other) noexcept { + m_max_size = other.m_max_size; + m_signal = other.m_signal; + m_gpu_map = std::move(other.m_gpu_map); + m_size_ptr.free(); + m_size_ptr = other.m_size_ptr; + other.m_size_ptr = nullptr; + m_size = other.m_size; + } + + // move assignment + host_device_map & operator=(host_device_map && other) noexcept { + m_max_size = other.m_max_size; + m_signal = other.m_signal; + m_gpu_map = std::move(other.m_gpu_map); + m_size_ptr.free(); + m_size_ptr = other.m_size_ptr; + other.m_size_ptr = nullptr; + m_size = other.m_size; + return *this; + } + // emplace a key value pair, using return of atomic increment to provide the initial insertion index - inline CARE_DEVICE void emplace(key_type key, mapped_type val) const { + inline CARE_HOST_DEVICE void emplace(key_type key, mapped_type val) const { care::local_ptr size_ptr = m_size_ptr; int index = ATOMIC_ADD(size_ptr[0], 1); - // TODO Add control for this check - if (size_ptr[0] > m_max_size) { - printf("[CARE] Warning: host_device_map exceeds max size %d > %d\n", size_ptr[0], m_max_size); - } + // commenting out to avoid having printfs compiled into every kernel that uses emplace + //if (size_ptr[0] > m_max_size) { + // printf("[CARE] Warning: host_device_map exceeds max size %d > %d\n", size_ptr[0], m_max_size); + //} LocalKeyValueSorter const & local_map = m_gpu_map; local_map.setKey(index, key); local_map.setValue(index, val); @@ -179,7 +266,7 @@ namespace care { // lookups (valid after a sort() call) are done by binary searching the keys and using the // index of the located key to grab the appropriate value - inline CARE_DEVICE mapped_type at(key_type key) const { + inline CARE_HOST_DEVICE mapped_type at(key_type key) const { int index = care::BinarySearch(m_gpu_map.keys(),0,m_size,key); if (index >= 0) { return m_gpu_map.values()[index]; @@ -219,20 +306,22 @@ namespace care { // preallocate buffers for adding up to size elements void reserve(int max_size) { if (m_max_size < max_size) { - if (m_size == 0) { - m_gpu_map = std::move(KeyValueSorter{static_cast(max_size)}); - } - else { + KeyValueSorter new_map{ + static_cast(max_size)}; + + if (m_size > 0) { // copy existing state into new map - KeyValueSorter new_map{static_cast(max_size)}; auto & map = m_gpu_map; + CARE_STREAM_LOOP(i, 0, m_size) { new_map.setKey(i, map.key(i)); new_map.setValue(i, map.value(i)); } CARE_STREAM_LOOP_END - m_gpu_map = std::move(new_map); } + + m_gpu_map = std::move(new_map); } + m_max_size = max_size; } @@ -264,9 +353,9 @@ namespace care { private: int_ptr m_size_ptr = nullptr; int m_size = 0; - int m_max_size; - int m_signal; - KeyValueSorter m_gpu_map; + int m_max_size = 0; + mapped_type m_signal {}; + KeyValueSorter m_gpu_map{0}; }; #endif // defined(CARE_PARALLEL_DEVICE) || CARE_ENABLE_GPU_SIMULATION_MODE @@ -290,10 +379,12 @@ namespace care { class host_device_map { public: - + // default constructor + host_device_map() noexcept = default; + // constructor - host_device_map(size_t max_entries, mapped_type signal) : m_max_size(max_entries), m_signal(signal) { - + host_device_map(size_t max_entries) : host_device_map{} { + m_max_size = max_entries; // m_size_ptr will be atomically incremented as elements are emplaced into the map m_size_ptr = new int(); // set size to 0 @@ -302,6 +393,35 @@ namespace care { m_map = KeyValueSorter{max_entries}; } + // constructor + host_device_map(size_t max_entries, mapped_type signal) : host_device_map{max_entries} { + m_signal = signal; + } + + // copy constructor + host_device_map(host_device_map const & other) noexcept = default; + + // move constructor + host_device_map(host_device_map && other) noexcept { + delete m_size_ptr; + m_size_ptr = other.m_size_ptr; + m_size = other.m_size; + m_map = std::move(other.m_map); + m_max_size = other.m_max_size; + m_signal = other.m_signal; + } + + // move assignment + host_device_map & operator=(host_device_map && other) noexcept { + delete m_size_ptr; + m_size_ptr = other.m_size_ptr; + m_size = other.m_size; + m_map = std::move(other.m_map); + m_max_size = other.m_max_size; + m_signal = other.m_signal; + return *this; + } + // emplace a key value pair,increment length inline void emplace(key_type key, mapped_type val) const { @@ -371,11 +491,11 @@ namespace care { private: mutable int * m_size_ptr = nullptr; mutable int m_size = 0; - mutable int m_max_size; - KeyValueSorter m_map; + mutable int m_max_size = 0; + KeyValueSorter m_map{}; /* hasBeenSorted may be used in the future to enable an implicit sort on lambda capture */ bool hasBeenSorted = false; - int m_signal; + mapped_type m_signal {}; }; }