@@ -430,71 +430,34 @@ class open_addressing_ref_impl {
430
430
__device__ bool insert (cooperative_groups::thread_block_tile<cg_size> const & group,
431
431
Value const & value) noexcept
432
432
{
433
- auto const val = this ->heterogeneous_value (value);
434
- auto const key = this ->extract_key (val);
435
- auto probing_iter = probing_scheme_ (group, key, storage_ref_.bucket_extent ());
436
- auto const init_idx = *probing_iter ;
433
+ auto const val = this ->heterogeneous_value (value);
434
+ auto const key = this ->extract_key (val);
435
+ auto probing_iter = probing_scheme_ (group, key, storage_ref_.bucket_extent ());
436
+ auto * data = reinterpret_cast < char *>(storage_ref_. data ()) ;
437
437
438
438
while (true ) {
439
- auto const bucket_slots = storage_ref_[*probing_iter];
439
+ value_type bucket_slots[2 ];
440
+ auto const tmp =
441
+ *reinterpret_cast <uint4 const *>(data + *probing_iter * sizeof (value_type) * 2 );
442
+ memcpy (&bucket_slots[0 ], &tmp, 2 * sizeof (value_type));
440
443
441
- auto const [state, intra_bucket_index] = [&]() {
442
- for (auto i = 0 ; i < bucket_size; ++i) {
443
- switch (
444
- this ->predicate_ .operator ()<is_insert::YES>(key, this ->extract_key (bucket_slots[i]))) {
445
- case detail::equal_result::AVAILABLE:
446
- return bucket_probing_results{detail::equal_result::AVAILABLE, i};
447
- case detail::equal_result::EQUAL: {
448
- if constexpr (allows_duplicates) {
449
- continue ;
450
- } else {
451
- return bucket_probing_results{detail::equal_result::EQUAL, i};
452
- }
453
- }
454
- default : continue ;
455
- }
456
- }
457
- // returns dummy index `-1` for UNEQUAL
458
- return bucket_probing_results{detail::equal_result::UNEQUAL, -1 };
459
- }();
444
+ auto const first_slot_is_empty =
445
+ detail::bitwise_compare (bucket_slots[0 ].first , this ->empty_key_sentinel ());
446
+ auto const second_slot_is_empty =
447
+ detail::bitwise_compare (bucket_slots[1 ].first , this ->empty_key_sentinel ());
460
448
461
- if constexpr (not allows_duplicates) {
462
- // If the key is already in the container, return false
463
- if (group.any (state == detail::equal_result::EQUAL)) { return false ; }
464
- }
449
+ auto const bucket_contains_empty = group.ballot (first_slot_is_empty or second_slot_is_empty);
465
450
466
- auto const group_contains_available = group.ballot (state == detail::equal_result::AVAILABLE);
467
- if (group_contains_available) {
468
- auto const src_lane = __ffs (group_contains_available) - 1 ;
451
+ if (bucket_contains_empty) {
452
+ auto const src_lane = __ffs (bucket_contains_empty) - 1 ;
469
453
auto status = insert_result::CONTINUE;
470
454
if (group.thread_rank () == src_lane) {
471
- if constexpr (SupportsErase) {
472
- status =
473
- attempt_insert ((storage_ref_.data () + *probing_iter)->data () + intra_bucket_index,
474
- bucket_slots[intra_bucket_index],
475
- val);
476
- } else {
477
- status =
478
- attempt_insert ((storage_ref_.data () + *probing_iter)->data () + intra_bucket_index,
479
- this ->empty_slot_sentinel (),
480
- val);
481
- }
455
+ status = attempt_insert (bucket_slots, this ->empty_slot_sentinel (), val);
482
456
}
483
457
484
- switch (group.shfl (status, src_lane)) {
485
- case insert_result::SUCCESS: return true ;
486
- case insert_result::DUPLICATE: {
487
- if constexpr (allows_duplicates) {
488
- [[fallthrough]];
489
- } else {
490
- return false ;
491
- }
492
- }
493
- default : continue ;
494
- }
458
+ if (group.any (status == insert_result::SUCCESS)) { return true ; }
495
459
} else {
496
460
++probing_iter;
497
- if (*probing_iter == init_idx) { return false ; }
498
461
}
499
462
}
500
463
}
0 commit comments