Skip to content

Commit 8d05f7a

Browse files
Refactor CUB's util_debug
1 parent 048b2bd commit 8d05f7a

24 files changed

+115
-278
lines changed

cub/cub/detail/device_synchronize.cuh

Lines changed: 0 additions & 54 deletions
This file was deleted.

cub/cub/device/dispatch/dispatch_adjacent_difference.cuh

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -189,13 +189,13 @@ struct DispatchAdjacentDifference
189189
constexpr int init_block_size = AgentDifferenceInitT::BLOCK_THREADS;
190190
const int init_grid_size = ::cuda::ceil_div(num_tiles, init_block_size);
191191

192-
#ifdef CUB_DETAIL_DEBUG_ENABLE_LOG
192+
#ifdef CUB_DEBUG_LOG
193193
_CubLog("Invoking DeviceAdjacentDifferenceInitKernel"
194194
"<<<%d, %d, 0, %lld>>>()\n",
195195
init_grid_size,
196196
init_block_size,
197197
reinterpret_cast<long long>(stream));
198-
#endif // CUB_DETAIL_DEBUG_ENABLE_LOG
198+
#endif // CUB_DEBUG_LOG
199199

200200
THRUST_NS_QUALIFIER::cuda_cub::launcher::triple_chevron(init_grid_size, init_block_size, 0, stream)
201201
.doit(DeviceAdjacentDifferenceInitKernel<AgentDifferenceInitT, InputIteratorT, InputT, OffsetT>,
@@ -219,13 +219,13 @@ struct DispatchAdjacentDifference
219219
}
220220
}
221221

222-
#ifdef CUB_DETAIL_DEBUG_ENABLE_LOG
222+
#ifdef CUB_DEBUG_LOG
223223
_CubLog("Invoking DeviceAdjacentDifferenceDifferenceKernel"
224224
"<<<%d, %d, 0, %lld>>>()\n",
225225
num_tiles,
226226
AdjacentDifferencePolicyT::BLOCK_THREADS,
227227
reinterpret_cast<long long>(stream));
228-
#endif // CUB_DETAIL_DEBUG_ENABLE_LOG
228+
#endif // CUB_DEBUG_LOG
229229

230230
THRUST_NS_QUALIFIER::cuda_cub::launcher::triple_chevron(
231231
num_tiles, AdjacentDifferencePolicyT::BLOCK_THREADS, 0, stream)

cub/cub/device/dispatch/dispatch_batch_memcpy.cuh

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -536,7 +536,7 @@ struct DispatchBatchMemcpy
536536
return error;
537537
}
538538

539-
#ifdef CUB_DETAIL_DEBUG_ENABLE_LOG
539+
#ifdef CUB_DEBUG_LOG
540540
_CubLog("Invoking "
541541
"InitTileStateKernel<<<%d, %d, 0, %lld>>>()\n",
542542
static_cast<int>(init_grid_size),
@@ -564,7 +564,7 @@ struct DispatchBatchMemcpy
564564
return error;
565565
}
566566

567-
#ifdef CUB_DETAIL_DEBUG_ENABLE_LOG
567+
#ifdef CUB_DEBUG_LOG
568568
_CubLog("Invoking "
569569
"BatchMemcpyKernel<<<%d, %d, 0, %lld>>>()\n",
570570
static_cast<int>(batch_memcpy_grid_size),
@@ -603,7 +603,7 @@ struct DispatchBatchMemcpy
603603
return error;
604604
}
605605

606-
#ifdef CUB_DETAIL_DEBUG_ENABLE_LOG
606+
#ifdef CUB_DEBUG_LOG
607607
_CubLog("Invoking "
608608
"MultiBlockBatchMemcpyKernel<<<%d, %d, 0, %lld>>>()\n",
609609
static_cast<int>(batch_memcpy_blev_grid_size),

cub/cub/device/dispatch/dispatch_for.cuh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ struct dispatch_t
101101
const auto tile_size = static_cast<OffsetT>(block_threads * items_per_thread);
102102
const auto num_tiles = ::cuda::ceil_div(num_items, tile_size);
103103

104-
#ifdef CUB_DETAIL_DEBUG_ENABLE_LOG
104+
#ifdef CUB_DEBUG_LOG
105105
_CubLog("Invoking detail::for_each::dynamic_kernel<<<%d, %d, 0, %lld>>>(), "
106106
"%d items per thread\n",
107107
static_cast<int>(num_tiles),
@@ -144,7 +144,7 @@ struct dispatch_t
144144
const auto tile_size = static_cast<OffsetT>(block_threads * items_per_thread);
145145
const auto num_tiles = ::cuda::ceil_div(num_items, tile_size);
146146

147-
#ifdef CUB_DETAIL_DEBUG_ENABLE_LOG
147+
#ifdef CUB_DEBUG_LOG
148148
_CubLog("Invoking detail::for_each::static_kernel<<<%d, %d, 0, %lld>>>(), "
149149
"%d items per thread\n",
150150
static_cast<int>(num_tiles),

cub/cub/device/dispatch/dispatch_for_each_in_extents.cuh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ public:
117117
constexpr unsigned items_per_thread = ActivePolicyT::for_policy_t::items_per_thread;
118118
unsigned num_cta = ::cuda::ceil_div(_size, block_threads * items_per_thread);
119119

120-
# ifdef CUB_DETAIL_DEBUG_ENABLE_LOG
120+
# ifdef CUB_DEBUG_LOG
121121
_CubLog("Invoking detail::for_each_in_extents::static_kernel<<<%u, %u, 0, %p>>>(), items_per_thread: %u\n",
122122
num_cta,
123123
block_threads,
@@ -155,7 +155,7 @@ public:
155155
_CUB_RETURN_IF_ERROR(status)
156156
unsigned num_cta = ::cuda::ceil_div(_size, block_threads * items_per_thread);
157157

158-
# ifdef CUB_DETAIL_DEBUG_ENABLE_LOG
158+
# ifdef CUB_DEBUG_LOG
159159
_CubLog("Invoking detail::for_each_in_extents::dynamic_kernel<<<%u, %u, 0, %p>>>(), items_per_thread: %u\n",
160160
num_cta,
161161
block_threads,

cub/cub/device/dispatch/dispatch_histogram.cuh

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -434,12 +434,12 @@ struct dispatch_histogram
434434
(max_num_output_bins + histogram_init_block_threads - 1) / histogram_init_block_threads;
435435

436436
// Log DeviceHistogramInitKernel configuration
437-
#ifdef CUB_DETAIL_DEBUG_ENABLE_LOG
437+
#ifdef CUB_DEBUG_LOG
438438
_CubLog("Invoking DeviceHistogramInitKernel<<<%d, %d, 0, %lld>>>()\n",
439439
histogram_init_grid_dims,
440440
histogram_init_block_threads,
441441
(long long) stream);
442-
#endif // CUB_DETAIL_DEBUG_ENABLE_LOG
442+
#endif // CUB_DEBUG_LOG
443443

444444
// Invoke histogram_init_kernel
445445
THRUST_NS_QUALIFIER::cuda_cub::launcher::triple_chevron(
@@ -453,7 +453,7 @@ struct dispatch_histogram
453453
}
454454

455455
// Log histogram_sweep_kernel configuration
456-
#ifdef CUB_DETAIL_DEBUG_ENABLE_LOG
456+
#ifdef CUB_DEBUG_LOG
457457
_CubLog("Invoking histogram_sweep_kernel<<<{%d, %d, %d}, %d, 0, %lld>>>(), %d pixels "
458458
"per thread, %d SM occupancy\n",
459459
sweep_grid_dims.x,
@@ -463,7 +463,7 @@ struct dispatch_histogram
463463
(long long) stream,
464464
pixels_per_thread,
465465
histogram_sweep_sm_occupancy);
466-
#endif // CUB_DETAIL_DEBUG_ENABLE_LOG
466+
#endif // CUB_DEBUG_LOG
467467

468468
// Invoke histogram_sweep_kernel
469469
THRUST_NS_QUALIFIER::cuda_cub::launcher::triple_chevron(sweep_grid_dims, block_threads, 0, stream)

cub/cub/device/dispatch/dispatch_radix_sort.cuh

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -965,7 +965,7 @@ struct DispatchRadixSort
965965
}
966966

967967
// Log single_tile_kernel configuration
968-
#ifdef CUB_DETAIL_DEBUG_ENABLE_LOG
968+
#ifdef CUB_DEBUG_LOG
969969
_CubLog("Invoking single_tile_kernel<<<%d, %d, 0, %lld>>>(), %d items per thread, %d SM occupancy, current bit "
970970
"%d, bit_grain %d\n",
971971
1,
@@ -1036,7 +1036,7 @@ struct DispatchRadixSort
10361036
int pass_bits = CUB_MIN(pass_config.radix_bits, (end_bit - current_bit));
10371037

10381038
// Log upsweep_kernel configuration
1039-
#ifdef CUB_DETAIL_DEBUG_ENABLE_LOG
1039+
#ifdef CUB_DEBUG_LOG
10401040
_CubLog("Invoking upsweep_kernel<<<%d, %d, 0, %lld>>>(), %d items per thread, %d SM occupancy, current bit %d, "
10411041
"bit_grain %d\n",
10421042
pass_config.even_share.grid_size,
@@ -1078,7 +1078,7 @@ struct DispatchRadixSort
10781078
}
10791079

10801080
// Log scan_kernel configuration
1081-
#ifdef CUB_DETAIL_DEBUG_ENABLE_LOG
1081+
#ifdef CUB_DEBUG_LOG
10821082
_CubLog("Invoking scan_kernel<<<%d, %d, 0, %lld>>>(), %d items per thread\n",
10831083
1,
10841084
pass_config.scan_config.block_threads,
@@ -1105,7 +1105,7 @@ struct DispatchRadixSort
11051105
}
11061106

11071107
// Log downsweep_kernel configuration
1108-
#ifdef CUB_DETAIL_DEBUG_ENABLE_LOG
1108+
#ifdef CUB_DEBUG_LOG
11091109
_CubLog("Invoking downsweep_kernel<<<%d, %d, 0, %lld>>>(), %d items per thread, %d SM occupancy\n",
11101110
pass_config.even_share.grid_size,
11111111
pass_config.downsweep_config.block_threads,
@@ -1305,7 +1305,7 @@ struct DispatchRadixSort
13051305
}
13061306

13071307
// log histogram_kernel configuration
1308-
#ifdef CUB_DETAIL_DEBUG_ENABLE_LOG
1308+
#ifdef CUB_DEBUG_LOG
13091309
_CubLog("Invoking histogram_kernel<<<%d, %d, 0, %lld>>>(), %d items per iteration, "
13101310
"%d SM occupancy, bit_grain %d\n",
13111311
histo_blocks_per_sm * num_sms,
@@ -1335,7 +1335,7 @@ struct DispatchRadixSort
13351335
constexpr int SCAN_BLOCK_THREADS = ActivePolicyT::ExclusiveSumPolicy::BLOCK_THREADS;
13361336

13371337
// log exclusive_sum_kernel configuration
1338-
#ifdef CUB_DETAIL_DEBUG_ENABLE_LOG
1338+
#ifdef CUB_DEBUG_LOG
13391339
_CubLog("Invoking exclusive_sum_kernel<<<%d, %d, 0, %lld>>>(), bit_grain %d\n",
13401340
num_passes,
13411341
SCAN_BLOCK_THREADS,
@@ -1383,7 +1383,7 @@ struct DispatchRadixSort
13831383
}
13841384

13851385
// log onesweep_kernel configuration
1386-
#ifdef CUB_DETAIL_DEBUG_ENABLE_LOG
1386+
#ifdef CUB_DEBUG_LOG
13871387
_CubLog("Invoking onesweep_kernel<<<%d, %d, 0, %lld>>>(), %d items per thread, "
13881388
"current bit %d, bit_grain %d, portion %d/%d\n",
13891389
num_blocks,
@@ -1672,7 +1672,7 @@ struct DispatchRadixSort
16721672
}
16731673

16741674
// Copy keys
1675-
#ifdef CUB_DETAIL_DEBUG_ENABLE_LOG
1675+
#ifdef CUB_DEBUG_LOG
16761676
_CubLog("Invoking async copy of %lld keys on stream %lld\n", (long long) num_items, (long long) stream);
16771677
#endif
16781678
cudaError_t error = cudaSuccess;
@@ -1694,7 +1694,7 @@ struct DispatchRadixSort
16941694
// Copy values if necessary
16951695
if (!KEYS_ONLY)
16961696
{
1697-
#ifdef CUB_DETAIL_DEBUG_ENABLE_LOG
1697+
#ifdef CUB_DEBUG_LOG
16981698
_CubLog("Invoking async copy of %lld values on stream %lld\n", (long long) num_items, (long long) stream);
16991699
#endif
17001700
error = CubDebug(cudaMemcpyAsync(
@@ -2001,7 +2001,7 @@ struct DispatchSegmentedRadixSort
20012001
int pass_bits = CUB_MIN(pass_config.radix_bits, (end_bit - current_bit));
20022002

20032003
// Log kernel configuration
2004-
#ifdef CUB_DETAIL_DEBUG_ENABLE_LOG
2004+
#ifdef CUB_DEBUG_LOG
20052005
_CubLog("Invoking segmented_kernels<<<%lld, %lld, 0, %lld>>>(), "
20062006
"%lld items per thread, %lld SM occupancy, "
20072007
"current bit %d, bit_grain %d\n",

cub/cub/device/dispatch/dispatch_reduce.cuh

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -378,13 +378,13 @@ struct DispatchReduce
378378
}
379379

380380
// Log single_reduce_sweep_kernel configuration
381-
#ifdef CUB_DETAIL_DEBUG_ENABLE_LOG
381+
#ifdef CUB_DEBUG_LOG
382382
_CubLog("Invoking DeviceReduceSingleTileKernel<<<1, %d, 0, %lld>>>(), "
383383
"%d items per thread\n",
384384
policy.SingleTile().BlockThreads(),
385385
(long long) stream,
386386
policy.SingleTile().ItemsPerThread());
387-
#endif // CUB_DETAIL_DEBUG_ENABLE_LOG
387+
#endif // CUB_DEBUG_LOG
388388

389389
// Invoke single_reduce_sweep_kernel
390390
launcher_factory(1, policy.SingleTile().BlockThreads(), 0, stream)
@@ -490,15 +490,15 @@ struct DispatchReduce
490490
int reduce_grid_size = even_share.grid_size;
491491

492492
// Log device_reduce_sweep_kernel configuration
493-
#ifdef CUB_DETAIL_DEBUG_ENABLE_LOG
493+
#ifdef CUB_DEBUG_LOG
494494
_CubLog("Invoking DeviceReduceKernel<<<%lu, %d, 0, %lld>>>(), %d items "
495495
"per thread, %d SM occupancy\n",
496496
(unsigned long) reduce_grid_size,
497497
active_policy.Reduce().BlockThreads(),
498498
(long long) stream,
499499
active_policy.Reduce().ItemsPerThread(),
500500
reduce_config.sm_occupancy);
501-
#endif // CUB_DETAIL_DEBUG_ENABLE_LOG
501+
#endif // CUB_DEBUG_LOG
502502

503503
// Invoke DeviceReduceKernel
504504
launcher_factory(reduce_grid_size, active_policy.Reduce().BlockThreads(), 0, stream)
@@ -519,13 +519,13 @@ struct DispatchReduce
519519
}
520520

521521
// Log single_reduce_sweep_kernel configuration
522-
#ifdef CUB_DETAIL_DEBUG_ENABLE_LOG
522+
#ifdef CUB_DEBUG_LOG
523523
_CubLog("Invoking DeviceReduceSingleTileKernel<<<1, %d, 0, %lld>>>(), "
524524
"%d items per thread\n",
525525
active_policy.SingleTile().BlockThreads(),
526526
(long long) stream,
527527
active_policy.SingleTile().ItemsPerThread());
528-
#endif // CUB_DETAIL_DEBUG_ENABLE_LOG
528+
#endif // CUB_DEBUG_LOG
529529

530530
// Invoke DeviceReduceSingleTileKernel
531531
launcher_factory(1, active_policy.SingleTile().BlockThreads(), 0, stream)
@@ -881,15 +881,15 @@ struct DispatchSegmentedReduce
881881
}
882882

883883
// Log device_reduce_sweep_kernel configuration
884-
#ifdef CUB_DETAIL_DEBUG_ENABLE_LOG
884+
#ifdef CUB_DEBUG_LOG
885885
_CubLog("Invoking SegmentedDeviceReduceKernel<<<%d, %d, 0, %lld>>>(), "
886886
"%d items per thread, %d SM occupancy\n",
887887
num_segments,
888888
ActivePolicyT::SegmentedReducePolicy::BLOCK_THREADS,
889889
(long long) stream,
890890
ActivePolicyT::SegmentedReducePolicy::ITEMS_PER_THREAD,
891891
segmented_reduce_config.sm_occupancy);
892-
#endif // CUB_DETAIL_DEBUG_ENABLE_LOG
892+
#endif // CUB_DEBUG_LOG
893893

894894
// Invoke DeviceReduceKernel
895895
THRUST_NS_QUALIFIER::cuda_cub::launcher::triple_chevron(

cub/cub/device/dispatch/dispatch_reduce_by_key.cuh

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -341,9 +341,9 @@ struct DispatchReduceByKey
341341
// Log init_kernel configuration
342342
int init_grid_size = CUB_MAX(1, ::cuda::ceil_div(num_tiles, INIT_KERNEL_THREADS));
343343

344-
#ifdef CUB_DETAIL_DEBUG_ENABLE_LOG
344+
#ifdef CUB_DEBUG_LOG
345345
_CubLog("Invoking init_kernel<<<%d, %d, 0, %lld>>>()\n", init_grid_size, INIT_KERNEL_THREADS, (long long) stream);
346-
#endif // CUB_DETAIL_DEBUG_ENABLE_LOG
346+
#endif // CUB_DEBUG_LOG
347347

348348
// Invoke init_kernel to initialize tile descriptors
349349
THRUST_NS_QUALIFIER::cuda_cub::launcher::triple_chevron(init_grid_size, INIT_KERNEL_THREADS, 0, stream)
@@ -391,7 +391,7 @@ struct DispatchReduceByKey
391391
for (int start_tile = 0; start_tile < num_tiles; start_tile += scan_grid_size)
392392
{
393393
// Log reduce_by_key_kernel configuration
394-
#ifdef CUB_DETAIL_DEBUG_ENABLE_LOG
394+
#ifdef CUB_DEBUG_LOG
395395
_CubLog("Invoking %d reduce_by_key_kernel<<<%d, %d, 0, %lld>>>(), %d "
396396
"items per thread, %d SM occupancy\n",
397397
start_tile,
@@ -400,7 +400,7 @@ struct DispatchReduceByKey
400400
(long long) stream,
401401
items_per_thread,
402402
reduce_by_key_sm_occupancy);
403-
#endif // CUB_DETAIL_DEBUG_ENABLE_LOG
403+
#endif // CUB_DEBUG_LOG
404404

405405
// Invoke reduce_by_key_kernel
406406
THRUST_NS_QUALIFIER::cuda_cub::launcher::triple_chevron(scan_grid_size, block_threads, 0, stream)

0 commit comments

Comments
 (0)