@@ -121,20 +121,20 @@ template <int Nominal4ByteBlockThreads, int Nominal4ByteItemsPerThread, typename
121121struct RegBoundScaling
122122{
123123 static constexpr int ITEMS_PER_THREAD =
124- ( ::cuda::std::max) (1 , Nominal4ByteItemsPerThread * 4 / ( ::cuda::std::max) (4 , int {sizeof (T)}));
125- static constexpr int BLOCK_THREADS = (::cuda::std::min)(
126- Nominal4ByteBlockThreads,
127- ::cuda::ceil_div (int {detail::max_smem_per_block} / (int {sizeof (T)} * ITEMS_PER_THREAD), 32) * 32);
124+ ::cuda::std::max (1 , Nominal4ByteItemsPerThread * 4 / ::cuda::std::max(4 , int {sizeof (T)}));
125+ static constexpr int BLOCK_THREADS =
126+ ::cuda::std::min ( Nominal4ByteBlockThreads,
127+ ::cuda::ceil_div (int {detail::max_smem_per_block} / (int {sizeof (T)} * ITEMS_PER_THREAD), 32) * 32);
128128};
129129
130130template <int Nominal4ByteBlockThreads, int Nominal4ByteItemsPerThread, typename T>
131131struct MemBoundScaling
132132{
133- static constexpr int ITEMS_PER_THREAD = ( ::cuda::std::max) (
134- 1 , ( ::cuda::std::min) (Nominal4ByteItemsPerThread * 4 / int {sizeof (T)}, Nominal4ByteItemsPerThread * 2 ));
135- static constexpr int BLOCK_THREADS = (::cuda::std::min)(
136- Nominal4ByteBlockThreads,
137- ::cuda::ceil_div (int {detail::max_smem_per_block} / (int {sizeof (T)} * ITEMS_PER_THREAD), 32) * 32);
133+ static constexpr int ITEMS_PER_THREAD = ::cuda::std::max(
134+ 1 , ::cuda::std::min(Nominal4ByteItemsPerThread * 4 / int {sizeof (T)}, Nominal4ByteItemsPerThread * 2 ));
135+ static constexpr int BLOCK_THREADS =
136+ ::cuda::std::min ( Nominal4ByteBlockThreads,
137+ ::cuda::ceil_div (int {detail::max_smem_per_block} / (int {sizeof (T)} * ITEMS_PER_THREAD), 32) * 32);
138138};
139139
140140#endif // Do not document
0 commit comments