Skip to content
This repository has been archived by the owner on Mar 21, 2024. It is now read-only.

Commit

Permalink
The great Thrust index type fix, part 9: exclusive_scan, inclusive_scan.
Browse files Browse the repository at this point in the history
  • Loading branch information
Francis Lemaire authored and griwes committed Feb 11, 2020
1 parent 01bbe09 commit 1d16811
Show file tree
Hide file tree
Showing 2 changed files with 122 additions and 20 deletions.
92 changes: 92 additions & 0 deletions testing/scan.cu
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
#include <thrust/functional.h>
#include <thrust/iterator/discard_iterator.h>
#include <thrust/iterator/retag.h>
#include <thrust/device_malloc.h>
#include <thrust/device_free.h>


template<typename T>
Expand Down Expand Up @@ -555,3 +557,93 @@ void TestInclusiveScanWithIndirection(void)
}
DECLARE_INTEGRAL_VECTOR_UNITTEST(TestInclusiveScanWithIndirection);

struct only_set_when_expected_it
{
long long expected;
bool * flag;

__host__ __device__ only_set_when_expected_it operator++() const { return *this; }
__host__ __device__ only_set_when_expected_it operator*() const { return *this; }
template<typename Difference>
__host__ __device__ only_set_when_expected_it operator+(Difference) const { return *this; }
template<typename Index>
__host__ __device__ only_set_when_expected_it operator[](Index) const { return *this; }

__device__
void operator=(long long value) const
{
if (value == expected)
{
*flag = true;
}
}
};

namespace thrust
{
template<>
struct iterator_traits<only_set_when_expected_it>
{
typedef long long value_type;
typedef only_set_when_expected_it reference;
};
}

void TestInclusiveScanWithBigIndexesHelper(int magnitude)
{
thrust::constant_iterator<long long> begin(1);
thrust::constant_iterator<long long> end = begin + (1ll << magnitude);
ASSERT_EQUAL(thrust::distance(begin, end), 1ll << magnitude);

thrust::device_ptr<bool> has_executed = thrust::device_malloc<bool>(1);
*has_executed = false;

only_set_when_expected_it out = { (1ll << magnitude), thrust::raw_pointer_cast(has_executed) };

thrust::inclusive_scan(thrust::device, begin, end, out);

bool has_executed_h = *has_executed;
thrust::device_free(has_executed);

ASSERT_EQUAL(has_executed_h, true);
}

void TestInclusiveScanWithBigIndexes()
{
TestInclusiveScanWithBigIndexesHelper(30);
TestInclusiveScanWithBigIndexesHelper(31);
TestInclusiveScanWithBigIndexesHelper(32);
TestInclusiveScanWithBigIndexesHelper(33);
}

DECLARE_UNITTEST(TestInclusiveScanWithBigIndexes);

void TestExclusiveScanWithBigIndexesHelper(int magnitude)
{
thrust::constant_iterator<long long> begin(1);
thrust::constant_iterator<long long> end = begin + (1ll << magnitude);
ASSERT_EQUAL(thrust::distance(begin, end), 1ll << magnitude);

thrust::device_ptr<bool> has_executed = thrust::device_malloc<bool>(1);
*has_executed = false;

only_set_when_expected_it out = { (1ll << magnitude) - 1, thrust::raw_pointer_cast(has_executed) };

thrust::exclusive_scan(thrust::device, begin, end, out,0ll);

bool has_executed_h = *has_executed;
thrust::device_free(has_executed);

ASSERT_EQUAL(has_executed_h, true);
}

void TestExclusiveScanWithBigIndexes()
{
TestExclusiveScanWithBigIndexesHelper(30);
TestExclusiveScanWithBigIndexesHelper(31);
TestExclusiveScanWithBigIndexesHelper(32);
TestExclusiveScanWithBigIndexesHelper(33);
}

DECLARE_UNITTEST(TestExclusiveScanWithBigIndexes);

50 changes: 30 additions & 20 deletions thrust/system/cuda/detail/scan.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,11 @@
#include <cub/device/device_scan.cuh>
#include <thrust/system/cuda/detail/core/agent_launcher.h>
#include <thrust/system/cuda/detail/par_to_seq.h>
#include <thrust/system/cuda/detail/dispatch.h>
#include <thrust/detail/mpl/math.h>
#include <thrust/detail/minmax.h>
#include <thrust/distance.h>
#include <thrust/iterator/iterator_traits.h>

THRUST_BEGIN_NS
template <typename DerivedPolicy,
Expand Down Expand Up @@ -710,31 +712,37 @@ namespace __scan {
bool debug_sync = THRUST_DEBUG_SYNC_FLAG;

cudaError_t status;
status = doit_step<Inclusive>(NULL,
storage_size,
input_it,
num_items,
add_init_to_exclusive_scan,
output_it,
scan_op,
stream,
debug_sync);
THRUST_INDEX_TYPE_DISPATCH(status,
doit_step<Inclusive>,
num_items,
(NULL,
storage_size,
input_it,
num_items_fixed,
add_init_to_exclusive_scan,
output_it,
scan_op,
stream,
debug_sync));
cuda_cub::throw_on_error(status, "scan failed on 1st step");

// Allocate temporary storage.
thrust::detail::temporary_array<thrust::detail::uint8_t, Derived>
tmp(policy, storage_size);
void *ptr = static_cast<void*>(tmp.data().get());

status = doit_step<Inclusive>(ptr,
storage_size,
input_it,
num_items,
add_init_to_exclusive_scan,
output_it,
scan_op,
stream,
debug_sync);
THRUST_INDEX_TYPE_DISPATCH(status,
doit_step<Inclusive>,
num_items,
(ptr,
storage_size,
input_it,
num_items_fixed,
add_init_to_exclusive_scan,
output_it,
scan_op,
stream,
debug_sync));
cuda_cub::throw_on_error(status, "scan failed on 2nd step");

status = cuda_cub::synchronize(policy);
Expand Down Expand Up @@ -798,7 +806,8 @@ inclusive_scan(execution_policy<Derived> &policy,
OutputIt result,
ScanOp scan_op)
{
int num_items = static_cast<int>(thrust::distance(first, last));
typedef typename thrust::iterator_traits<InputIt>::difference_type diff_t;
diff_t num_items = thrust::distance(first, last);
return cuda_cub::inclusive_scan_n(policy, first, num_items, result, scan_op);
}

Expand Down Expand Up @@ -873,7 +882,8 @@ exclusive_scan(execution_policy<Derived> &policy,
T init,
ScanOp scan_op)
{
int num_items = static_cast<int>(thrust::distance(first, last));
typedef typename thrust::iterator_traits<InputIt>::difference_type diff_t;
diff_t num_items = thrust::distance(first, last);
return cuda_cub::exclusive_scan_n(policy, first, num_items, result, init, scan_op);
}

Expand Down

0 comments on commit 1d16811

Please sign in to comment.