Skip to content

Commit

Permalink
Merge pull request #287 from LLNL/bugfix/probinso/tp_performance_fixes
Browse files Browse the repository at this point in the history
adams is 15-20% faster.
  • Loading branch information
robinson96 authored Sep 5, 2024
2 parents 62a352f + 9ca7901 commit a318e74
Showing 1 changed file with 6 additions and 2 deletions.
8 changes: 6 additions & 2 deletions src/care/policies.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,11 @@ using RAJADeviceExec = RAJA::seq_exec;

// reduction kernel policy
#if defined(__HIPCC__)
using RAJAReductionExec = RAJA::hip_exec_occ_calc<CARE_CUDA_BLOCK_SIZE, CARE_CUDA_ASYNC>;
using RAJAReductionExec = RAJA::hip_exec_with_reduce<256, CARE_CUDA_ASYNC>;
#elif defined(__CUDACC__)
using RAJAReductionExec = RAJA::cuda_exec_with_reduce<256, CARE_CUDA_ASYNC>;
#elif defined(_OPENMP) && defined(RAJA_ENABLE_OPENMP) // CARE_GPUCC
using RAJAReductionExec = RAJA::omp_parallel_for_exec;
#else
using RAJAReductionExec = RAJADeviceExec;
#endif
Expand Down Expand Up @@ -115,7 +119,7 @@ using RAJAExec = RAJADeviceExec ;
#else // CARE_ENABLE_GPU_SIMULATION_MODE

// The defined(__HIPCC__) case is here:
using RAJAHipReduce = RAJA::hip_reduce ;
using RAJAHipReduce = RAJA::hip_reduce_atomic ;

template <class T>
using RAJAReduceMax = RAJA::ReduceMax<RAJAHipReduce, T> ;
Expand Down

0 comments on commit a318e74

Please sign in to comment.