Skip to content

Commit

Permalink
Collapse element loop and restore nstream
Browse files Browse the repository at this point in the history
  • Loading branch information
thomasgibson committed Oct 3, 2022
1 parent 25fc434 commit 45d63ea
Showing 1 changed file with 4 additions and 5 deletions.
9 changes: 4 additions & 5 deletions src/hip/HIPStream.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -191,19 +191,18 @@ void HIPStream<T>::triad()
check_error();
}

template <size_t elements_per_lane, typename T>
template <typename T>
__global__ void nstream_kernel(T * __restrict a, const T * __restrict b, const T * __restrict c)
{
const T scalar = startScalar;
const size_t gidx = (threadIdx.x + blockIdx.x * blockDim.x) * elements_per_lane;
for (size_t j = 0; j < elements_per_lane; ++j)
a[gidx + j] += b[gidx + j] + scalar * c[gidx + j];
const size_t i = threadIdx.x + blockIdx.x * blockDim.x;
a[i] += b[i] + scalar * c[i];
}

template <class T>
void HIPStream<T>::nstream()
{
nstream_kernel<elements_per_lane, T><<<dim3(block_count), dim3(TBSIZE), 0, 0>>>(d_a, d_b, d_c);
nstream_kernel<T><<<dim3(array_size/TBSIZE), dim3(TBSIZE), 0, 0>>>(d_a, d_b, d_c);
check_error();
hipDeviceSynchronize();
check_error();
Expand Down

0 comments on commit 45d63ea

Please sign in to comment.