Skip to content

Commit

Permalink
Merge branch 'develop' into bugfix/probinso/tp_performance_fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
robinson96 authored Sep 5, 2024
2 parents f024770 + 62a352f commit 9ca7901
Show file tree
Hide file tree
Showing 5 changed files with 184 additions and 61 deletions.
2 changes: 1 addition & 1 deletion src/care/SortFuser.h
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,7 @@ namespace care {

// do the unique of the concatenated sort result
int outLen;
care::uniqArray(RAJAExec{}, m_concatenated_result, m_total_length, concatenated_out, outLen);
care::uniqArray(RAJAExec{}, reinterpret_cast<host_device_ptr<const T>&>(m_concatenated_result), m_total_length, concatenated_out, outLen);

/// determine new offsets by looking for boundaries in max_range
host_device_ptr<int> out_offsets(m_num_arrays+1, "out_offsets");
Expand Down
5 changes: 2 additions & 3 deletions src/care/algorithm_decl.h
Original file line number Diff line number Diff line change
Expand Up @@ -346,14 +346,13 @@ void sortArray(RAJADeviceExec, care::host_device_ptr<T, Accessor> &Array, size_t

#endif // defined(CARE_PARALLEL_DEVICE)

// TODO should this have an unused noCopy parameter?
template <typename T, template<class A> class Accessor = care::CARE_DEFAULT_ACCESSOR>
void uniqArray(RAJA::seq_exec, care::host_device_ptr<T, Accessor> Array, size_t len, care::host_device_ptr<T, Accessor> & outArray, int & newLen);
void uniqArray(RAJA::seq_exec, care::host_device_ptr<const T, Accessor> Array, size_t len, care::host_device_ptr<T, Accessor> & outArray, int & newLen);
template <typename T, template<class A> class Accessor = care::CARE_DEFAULT_ACCESSOR>
int uniqArray(RAJA::seq_exec exec, care::host_device_ptr<T, Accessor> & Array, size_t len, bool noCopy=false);
#ifdef CARE_PARALLEL_DEVICE
template <typename T, template<class A> class Accessor = care::CARE_DEFAULT_ACCESSOR>
void uniqArray(RAJADeviceExec, care::host_device_ptr<T, Accessor> Array, size_t len, care::host_device_ptr<T, Accessor> & outArray, int & outLen, bool noCopy=false);
void uniqArray(RAJADeviceExec, care::host_device_ptr<const T, Accessor> Array, size_t len, care::host_device_ptr<T, Accessor> & outArray, int & outLen);
template <typename T, template<class A> class Accessor = care::CARE_DEFAULT_ACCESSOR>
int uniqArray(RAJADeviceExec exec, care::host_device_ptr<T, Accessor> & Array, size_t len, bool noCopy=false);
#endif // defined(CARE_PARALLEL_DEVICE)
Expand Down
30 changes: 17 additions & 13 deletions src/care/algorithm_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -449,6 +449,10 @@ CARE_INLINE void IntersectArrays(RAJA::seq_exec exec,
* If returnUpperBound is set to true, this will return the
* index corresponding to the earliest entry that is greater
* than num.
*
* @NOTE: Intentionally implemented this using only the '<'
* operator to follow weak strict ordering semantics.
*
************************************************************************/

template <typename T>
Expand All @@ -471,7 +475,7 @@ CARE_HOST_DEVICE CARE_INLINE int BinarySearch(const T *map, const int start,

while (khi-klo > 1) {
k = (khi+klo) >> 1 ;
if (map[k] == num) {
if (! (map[k] < num) && !(num < map[k])) {
if (returnUpperBound) {
khi = k+1;
klo = k;
Expand All @@ -481,7 +485,7 @@ CARE_HOST_DEVICE CARE_INLINE int BinarySearch(const T *map, const int start,
return k ;
}
}
else if (map[k] > num) {
else if (num < map[k]) {
khi = k ;
}
else {
Expand All @@ -491,19 +495,19 @@ CARE_HOST_DEVICE CARE_INLINE int BinarySearch(const T *map, const int start,
if (returnUpperBound) {
k = klo;
// the lower option bounds num
if (map[k] > num) {
if (num < map[k]) {
return k;
}
// the upper option is within the range of the map index set
if (khi < start + mapSize) {
// Note: fix for last test in TEST(algorithm, binarysearch). This algorithm has failed to pick up the upper
// bound above 1 in the array {0, 1, 1, 1, 1, 1, 6}. Having 1 repeated confused the algorithm.
while ((khi < start + mapSize) && (map[khi] == num)) {
while ((khi < start + mapSize) && (!(map[khi] < num) && !(num < map[khi]))) {
++khi;
}

// the upper option bounds num
if ((khi < start + mapSize) && (map[khi] > num)) {
if ((khi < start + mapSize) && (num < map[khi])) {
return khi;
}
// neither the upper or lower option bound num
Expand All @@ -514,8 +518,8 @@ CARE_HOST_DEVICE CARE_INLINE int BinarySearch(const T *map, const int start,
return -1;
}
}

if (map[--k] == num) {
--k;
if (!(map[k] < num) && !(num < map[k])) {
return k ;
}
else {
Expand Down Expand Up @@ -547,8 +551,8 @@ CARE_HOST_DEVICE CARE_INLINE int BinarySearch(const care::host_device_ptr<const
* scan.
************************************************************************/
template <typename T, template<class A> class Accessor>
CARE_INLINE void uniqArray(RAJADeviceExec, care::host_device_ptr<T, Accessor> Array, size_t len,
care::host_device_ptr<T, Accessor> & outArray, int & outLen, bool noCopy)
CARE_INLINE void uniqArray(RAJADeviceExec, care::host_device_ptr<const T, Accessor> Array, size_t len,
care::host_device_ptr<T, Accessor> & outArray, int & outLen)
{
care::host_device_ptr<int> uniq(len+1,"uniqArray uniq");
fill_n(uniq, len+1, 0);
Expand Down Expand Up @@ -582,7 +586,7 @@ CARE_INLINE int uniqArray(RAJADeviceExec exec, care::host_device_ptr<T, Accessor
{
care::host_device_ptr<T, Accessor> tmp;
int newLen;
uniqArray(exec, Array, len, tmp, newLen);
uniqArray<T, Accessor>(exec, Array, len, tmp, newLen);
if (noCopy) {
Array.free();
Array = tmp;
Expand All @@ -602,11 +606,11 @@ CARE_INLINE int uniqArray(RAJADeviceExec exec, care::host_device_ptr<T, Accessor
* Purpose : CPU version of uniqArray.
************************************************************************/
template <typename T, template<class A> class Accessor>
CARE_INLINE void uniqArray(RAJA::seq_exec, care::host_device_ptr<T, Accessor> Array, size_t len,
CARE_INLINE void uniqArray(RAJA::seq_exec, care::host_device_ptr<const T, Accessor> Array, size_t len,
care::host_device_ptr<T, Accessor> & outArray, int & newLen)
{
CHAIDataGetter<T, RAJA::seq_exec> getter {};
const auto * rawData = getter.getConstRawArrayData(Array);
CHAIDataGetter<const T, RAJA::seq_exec> getter {};
auto * rawData = getter.getConstRawArrayData(Array);
newLen = 0 ;
care::host_ptr<T> arrout = nullptr ;
outArray = nullptr;
Expand Down
32 changes: 16 additions & 16 deletions src/care/care_inst.h
Original file line number Diff line number Diff line change
Expand Up @@ -215,14 +215,14 @@ CARE_HOST_DEVICE int BinarySearch(const care::host_device_ptr<const GIDTYPE>&, c
#ifdef CARE_PARALLEL_DEVICE

CARE_EXTERN template CARE_DLL_API
void uniqArray(RAJADeviceExec, care::host_device_ptr<int, care::NoOpAccessor>, size_t, care::host_device_ptr<int, care::NoOpAccessor> &, int &, bool) ;
void uniqArray(RAJADeviceExec, care::host_device_ptr<int const, care::NoOpAccessor>, size_t, care::host_device_ptr<int, care::NoOpAccessor> &, int &) ;
CARE_EXTERN template CARE_DLL_API
void uniqArray(RAJADeviceExec, care::host_device_ptr<float, care::NoOpAccessor>, size_t, care::host_device_ptr<float, care::NoOpAccessor> &, int &, bool) ;
void uniqArray(RAJADeviceExec, care::host_device_ptr<float const, care::NoOpAccessor>, size_t, care::host_device_ptr<float, care::NoOpAccessor> &, int &) ;
CARE_EXTERN template CARE_DLL_API
void uniqArray(RAJADeviceExec, care::host_device_ptr<double, care::NoOpAccessor>, size_t, care::host_device_ptr<double, care::NoOpAccessor> &, int &, bool) ;
void uniqArray(RAJADeviceExec, care::host_device_ptr<double const, care::NoOpAccessor>, size_t, care::host_device_ptr<double, care::NoOpAccessor> &, int &) ;
#if CARE_HAVE_LLNL_GLOBALID
CARE_EXTERN template CARE_DLL_API
void uniqArray(RAJADeviceExec, care::host_device_ptr<globalID, care::NoOpAccessor>, size_t, care::host_device_ptr<globalID, care::NoOpAccessor> &, int &, bool) ;
void uniqArray(RAJADeviceExec, care::host_device_ptr<globalID const, care::NoOpAccessor>, size_t, care::host_device_ptr<globalID, care::NoOpAccessor> &, int &) ;
#endif

CARE_EXTERN template CARE_DLL_API
Expand All @@ -237,14 +237,14 @@ int uniqArray(RAJADeviceExec, care::host_device_ptr<globalID, care::NoOpAccessor
#endif

CARE_EXTERN template CARE_DLL_API
void uniqArray(RAJADeviceExec, care::host_device_ptr<int, care::RaceConditionAccessor>, size_t, care::host_device_ptr<int, care::RaceConditionAccessor> &, int &, bool) ;
void uniqArray(RAJADeviceExec, care::host_device_ptr<int const, care::RaceConditionAccessor>, size_t, care::host_device_ptr<int, care::RaceConditionAccessor> &, int &) ;
CARE_EXTERN template CARE_DLL_API
void uniqArray(RAJADeviceExec, care::host_device_ptr<float, care::RaceConditionAccessor>, size_t, care::host_device_ptr<float, care::RaceConditionAccessor> &, int &, bool) ;
void uniqArray(RAJADeviceExec, care::host_device_ptr<float const, care::RaceConditionAccessor>, size_t, care::host_device_ptr<float, care::RaceConditionAccessor> &, int &) ;
CARE_EXTERN template CARE_DLL_API
void uniqArray(RAJADeviceExec, care::host_device_ptr<double, care::RaceConditionAccessor>, size_t, care::host_device_ptr<double, care::RaceConditionAccessor> &, int &, bool) ;
void uniqArray(RAJADeviceExec, care::host_device_ptr<double const, care::RaceConditionAccessor>, size_t, care::host_device_ptr<double, care::RaceConditionAccessor> &, int &) ;
#if CARE_HAVE_LLNL_GLOBALID
CARE_EXTERN template CARE_DLL_API
void uniqArray(RAJADeviceExec, care::host_device_ptr<globalID, care::RaceConditionAccessor>, size_t, care::host_device_ptr<globalID, care::RaceConditionAccessor> &, int &, bool) ;
void uniqArray(RAJADeviceExec, care::host_device_ptr<globalID const, care::RaceConditionAccessor>, size_t, care::host_device_ptr<globalID, care::RaceConditionAccessor> &, int &) ;
#endif

CARE_EXTERN template CARE_DLL_API
Expand All @@ -261,14 +261,14 @@ int uniqArray(RAJADeviceExec, care::host_device_ptr<globalID, care::RaceConditio
#endif // defined(CARE_PARALLEL_DEVICE)

CARE_EXTERN template CARE_DLL_API
void uniqArray(RAJA::seq_exec, care::host_device_ptr<int, care::NoOpAccessor>, size_t, care::host_device_ptr<int, care::NoOpAccessor> &, int &) ;
void uniqArray(RAJA::seq_exec, care::host_device_ptr<int const, care::NoOpAccessor>, size_t, care::host_device_ptr<int, care::NoOpAccessor> &, int &) ;
CARE_EXTERN template CARE_DLL_API
void uniqArray(RAJA::seq_exec, care::host_device_ptr<float, care::NoOpAccessor>, size_t, care::host_device_ptr<float, care::NoOpAccessor> &, int &) ;
void uniqArray(RAJA::seq_exec, care::host_device_ptr<float const, care::NoOpAccessor>, size_t, care::host_device_ptr<float, care::NoOpAccessor> &, int &) ;
CARE_EXTERN template CARE_DLL_API
void uniqArray(RAJA::seq_exec, care::host_device_ptr<double, care::NoOpAccessor>, size_t, care::host_device_ptr<double, care::NoOpAccessor> &, int &) ;
void uniqArray(RAJA::seq_exec, care::host_device_ptr<double const, care::NoOpAccessor>, size_t, care::host_device_ptr<double, care::NoOpAccessor> &, int &) ;
#if CARE_HAVE_LLNL_GLOBALID
CARE_EXTERN template CARE_DLL_API
void uniqArray(RAJA::seq_exec, care::host_device_ptr<globalID, care::NoOpAccessor>, size_t, care::host_device_ptr<globalID, care::NoOpAccessor> &, int &) ;
void uniqArray(RAJA::seq_exec, care::host_device_ptr<globalID const, care::NoOpAccessor>, size_t, care::host_device_ptr<globalID, care::NoOpAccessor> &, int &) ;
#endif

CARE_EXTERN template CARE_DLL_API
Expand All @@ -283,14 +283,14 @@ int uniqArray(RAJA::seq_exec exec, care::host_device_ptr<globalID, care::NoOpAcc
#endif

CARE_EXTERN template CARE_DLL_API
void uniqArray(RAJA::seq_exec, care::host_device_ptr<int, care::RaceConditionAccessor>, size_t, care::host_device_ptr<int, care::RaceConditionAccessor> &, int &) ;
void uniqArray(RAJA::seq_exec, care::host_device_ptr<int const, care::RaceConditionAccessor>, size_t, care::host_device_ptr<int, care::RaceConditionAccessor> &, int &) ;
CARE_EXTERN template CARE_DLL_API
void uniqArray(RAJA::seq_exec, care::host_device_ptr<float, care::RaceConditionAccessor>, size_t, care::host_device_ptr<float, care::RaceConditionAccessor> &, int &) ;
void uniqArray(RAJA::seq_exec, care::host_device_ptr<float const, care::RaceConditionAccessor>, size_t, care::host_device_ptr<float, care::RaceConditionAccessor> &, int &) ;
CARE_EXTERN template CARE_DLL_API
void uniqArray(RAJA::seq_exec, care::host_device_ptr<double, care::RaceConditionAccessor>, size_t, care::host_device_ptr<double, care::RaceConditionAccessor> &, int &) ;
void uniqArray(RAJA::seq_exec, care::host_device_ptr<double const, care::RaceConditionAccessor>, size_t, care::host_device_ptr<double, care::RaceConditionAccessor> &, int &) ;
#if CARE_HAVE_LLNL_GLOBALID
CARE_EXTERN template CARE_DLL_API
void uniqArray(RAJA::seq_exec, care::host_device_ptr<globalID, care::RaceConditionAccessor>, size_t, care::host_device_ptr<globalID, care::RaceConditionAccessor> &, int &) ;
void uniqArray(RAJA::seq_exec, care::host_device_ptr<globalID const, care::RaceConditionAccessor>, size_t, care::host_device_ptr<globalID, care::RaceConditionAccessor> &, int &) ;
#endif

CARE_EXTERN template CARE_DLL_API
Expand Down
Loading

0 comments on commit 9ca7901

Please sign in to comment.