diff --git a/include/rxmesh/cavity_manager.cuh b/include/rxmesh/cavity_manager.cuh index 8710e4db..d97ca49f 100644 --- a/include/rxmesh/cavity_manager.cuh +++ b/include/rxmesh/cavity_manager.cuh @@ -24,6 +24,10 @@ struct CavityManager : m_write_to_gmem(true), m_s_num_cavities(nullptr), m_s_cavity_size_prefix(nullptr), + m_s_q_correspondence_e(nullptr), + m_s_q_correspondence_vf(nullptr), + m_correspondence_size_e(0), + m_correspondence_size_vf(0), m_s_readd_to_queue(nullptr), m_s_ev(nullptr), m_s_fe(nullptr), @@ -543,17 +547,19 @@ struct CavityManager const LPPair* s_stash); /** - * @brief give a patch q, set a bit in s_in_patch if element x appears in - * s_table/s_stash. + * @brief give a patch q, we store the corresponding element in p in + * s_correspondence. Thus, s_correspondence is indexing via q's index space */ template - __device__ __inline__ void populate_in_patch( + __device__ __inline__ void populate_correspondence( cooperative_groups::thread_block& block, - uint8_t q_stash, - Bitmask& s_in_patch, + const uint8_t q_stash, + uint16_t* s_correspondence, + const uint16_t s_correspondence_size, const LPPair* s_table, const LPPair* s_stash); + // indicate if this block can write its updates to global memory during // epilogue bool m_write_to_gmem; @@ -603,10 +609,15 @@ struct CavityManager // indicate if the mesh element is in the interior of the cavity Bitmask m_s_in_cavity_v, m_s_in_cavity_e, m_s_in_cavity_f; - // given a patch q, this bitmask store whether a vertex/edge/face in q is - // stored in the hashtable of this patch (p). Thus, this bitmask used the - // index space of q - Bitmask m_s_in_patch_v, m_s_in_patch_e, m_s_in_patch_f; + // given a patch q, this buffer stores the p's local index corresponding to + // an element in q. Thus, this buffer is indexed using q's index space. + // We either need this for (vertices and edges) or (edges and faces) at the + // same time. Thus, the buffer use for vertices/faces is being recycled to + // serve both + uint16_t* m_s_q_correspondence_e; + uint16_t* m_s_q_correspondence_vf; + uint16_t m_correspondence_size_e; + uint16_t m_correspondence_size_vf; bool* m_s_readd_to_queue; diff --git a/include/rxmesh/cavity_manager_impl.cuh b/include/rxmesh/cavity_manager_impl.cuh index 559103c3..820013b6 100644 --- a/include/rxmesh/cavity_manager_impl.cuh +++ b/include/rxmesh/cavity_manager_impl.cuh @@ -107,6 +107,16 @@ CavityManager::alloc_shared_memory( const uint16_t edge_cap = m_patch_info.edges_capacity[0]; const uint16_t face_cap = m_patch_info.faces_capacity[0]; + const uint16_t max_vertex_cap = static_cast( + m_context.m_capacity_factor * + static_cast(m_context.m_max_num_vertices[0])); + const uint16_t max_edge_cap = + static_cast(m_context.m_capacity_factor * + static_cast(m_context.m_max_num_edges[0])); + const uint16_t max_face_cap = + static_cast(m_context.m_capacity_factor * + static_cast(m_context.m_max_num_faces[0])); + // load EV and FE m_s_ev = shrd_alloc.alloc(2 * edge_cap); m_s_fe = shrd_alloc.alloc(3 * face_cap); @@ -169,7 +179,6 @@ CavityManager::alloc_shared_memory( m_s_src_mask_v = Bitmask(m_context.m_max_num_vertices[0], shrd_alloc); m_s_src_connect_mask_v = Bitmask(m_context.m_max_num_vertices[0], shrd_alloc); - m_s_in_patch_v = Bitmask(m_context.m_max_num_vertices[0], shrd_alloc); // edges masks @@ -180,12 +189,8 @@ CavityManager::alloc_shared_memory( m_s_in_cavity_e, m_patch_info.owned_mask_e, m_patch_info.active_mask_e); - const uint16_t max_edge_cap = - static_cast(m_context.m_capacity_factor * - static_cast(m_context.m_max_num_edges[0])); m_s_src_mask_e = Bitmask(std::max(max_edge_cap, edge_cap), shrd_alloc); m_s_src_connect_mask_e = Bitmask(m_context.m_max_num_edges[0], shrd_alloc); - m_s_in_patch_e = Bitmask(m_context.m_max_num_edges[0], shrd_alloc); // faces masks alloc_masks(face_cap, @@ -195,7 +200,14 @@ CavityManager::alloc_shared_memory( m_s_in_cavity_f, m_patch_info.owned_mask_f, m_patch_info.active_mask_f); - m_s_in_patch_f = Bitmask(m_context.m_max_num_faces[0], shrd_alloc); + + // correspondence + m_correspondence_size_e = max_edge_cap; + m_s_q_correspondence_e = + shrd_alloc.alloc(m_correspondence_size_e); + m_correspondence_size_vf = std::max(max_face_cap, max_vertex_cap); + m_s_q_correspondence_vf = + shrd_alloc.alloc(m_correspondence_size_vf); // patch to lock __shared__ uint32_t p_to_lock[PatchStash::stash_size]; @@ -1459,20 +1471,6 @@ __device__ __inline__ bool CavityManager::migrate_from_patch( // they mark patches they read from in m_s_patches_to_lock_mask. // At the end of every round, one thread make sure make sure that all // patches marked in m_s_patches_to_lock_mask are actually locked. - assert(m_s_in_patch_v.m_bitmask != nullptr); - assert(m_s_in_patch_e.m_bitmask != nullptr); - assert(m_s_in_patch_f.m_bitmask != nullptr); - m_s_in_patch_v.reset(block); - m_s_in_patch_e.reset(block); - m_s_in_patch_f.reset(block); - block.sync(); - - populate_in_patch( - block, q_stash_id, m_s_in_patch_v, m_s_table_v, m_s_table_stash_v); - populate_in_patch( - block, q_stash_id, m_s_in_patch_e, m_s_table_e, m_s_table_stash_e); - populate_in_patch( - block, q_stash_id, m_s_in_patch_f, m_s_table_f, m_s_table_stash_f); PatchInfo q_patch_info = m_context.m_patches_info[q]; @@ -1504,6 +1502,13 @@ __device__ __inline__ bool CavityManager::migrate_from_patch( } } } + + populate_correspondence(block, + q_stash_id, + m_s_q_correspondence_vf, + m_correspondence_size_vf, + m_s_table_v, + m_s_table_stash_v); block.sync(); // 3. make sure there is a copy in p for any vertex in @@ -1542,8 +1547,9 @@ __device__ __inline__ bool CavityManager::migrate_from_patch( lp, m_s_table_v, m_s_table_stash_v); if (lp.patch_stash_id() == q_stash_id) { assert(lp.local_id_in_owner_patch() < - m_s_in_patch_v.size()); - m_s_in_patch_v.set(lp.local_id_in_owner_patch(), true); + m_correspondence_size_vf); + m_s_q_correspondence_vf[lp.local_id_in_owner_patch()] = + lp.key(); } assert(inserted); } @@ -1555,6 +1561,14 @@ __device__ __inline__ bool CavityManager::migrate_from_patch( return false; } + populate_correspondence(block, + q_stash_id, + m_s_q_correspondence_e, + m_correspondence_size_e, + m_s_table_e, + m_s_table_stash_e); + block.sync(); + // same story as with the loop that adds vertices const uint16_t q_num_edges_up = ROUND_UP_TO_NEXT_MULTIPLE(q_num_edges, blockThreads); @@ -1591,8 +1605,9 @@ __device__ __inline__ bool CavityManager::migrate_from_patch( lp, m_s_table_e, m_s_table_stash_e); if (lp.patch_stash_id() == q_stash_id) { assert(lp.local_id_in_owner_patch() < - m_s_in_patch_e.size()); - m_s_in_patch_e.set(lp.local_id_in_owner_patch(), true); + m_correspondence_size_e); + m_s_q_correspondence_e[lp.local_id_in_owner_patch()] = + lp.key(); } assert(inserted); } @@ -1665,8 +1680,9 @@ __device__ __inline__ bool CavityManager::migrate_from_patch( lp, m_s_table_e, m_s_table_stash_e); if (lp.patch_stash_id() == q_stash_id) { assert(lp.local_id_in_owner_patch() < - m_s_in_patch_e.size()); - m_s_in_patch_e.set(lp.local_id_in_owner_patch(), true); + m_correspondence_size_e); + m_s_q_correspondence_e[lp.local_id_in_owner_patch()] = + lp.key(); } assert(inserted); } @@ -1677,6 +1693,14 @@ __device__ __inline__ bool CavityManager::migrate_from_patch( return false; } + populate_correspondence(block, + q_stash_id, + m_s_q_correspondence_vf, + m_correspondence_size_vf, + m_s_table_f, + m_s_table_stash_f); + block.sync(); + // same story as with the loop that adds vertices const uint16_t q_num_faces_up = ROUND_UP_TO_NEXT_MULTIPLE(q_num_faces, blockThreads); @@ -1705,8 +1729,9 @@ __device__ __inline__ bool CavityManager::migrate_from_patch( lp, m_s_table_f, m_s_table_stash_f); if (lp.patch_stash_id() == q_stash_id) { assert(lp.local_id_in_owner_patch() < - m_s_in_patch_f.size()); - m_s_in_patch_f.set(lp.local_id_in_owner_patch(), true); + m_correspondence_size_vf); + m_s_q_correspondence_vf[lp.local_id_in_owner_patch()] = + lp.key(); } assert(inserted); } @@ -2082,22 +2107,12 @@ __device__ __inline__ uint16_t CavityManager::find_copy( src_patch = owner.patch_id(); lid = owner.local_id(); } else { - if constexpr (std::is_same_v) { - if (!m_s_in_patch_v(lid)) { - return INVALID16; - } - } - if constexpr (std::is_same_v) { - if (!m_s_in_patch_e(lid)) { - return INVALID16; - } - } - - if constexpr (std::is_same_v) { - if (!m_s_in_patch_f(lid)) { - return INVALID16; - } + assert(lid < m_correspondence_size_e); + return m_s_q_correspondence_e[lid]; + } else { + assert(lid < m_correspondence_size_vf); + return m_s_q_correspondence_vf[lid]; } } @@ -2137,19 +2152,27 @@ __device__ __inline__ uint16_t CavityManager::find_copy( template template -__device__ __inline__ void CavityManager::populate_in_patch( +__device__ __inline__ void +CavityManager::populate_correspondence( cooperative_groups::thread_block& block, - uint8_t q_stash, - Bitmask& s_in_patch, + const uint8_t q_stash, + uint16_t* s_correspondence, + const uint16_t s_correspondence_size, const LPPair* s_table, const LPPair* s_stash) { + + fill_n( + s_correspondence, s_correspondence_size, uint16_t(INVALID16)); + block.sync(); + LPHashTable lp = m_patch_info.get_lp(); for (uint16_t b = threadIdx.x; b < lp.m_capacity; b += blockThreads) { const auto pair = s_table[b]; if (pair.patch_stash_id() == q_stash) { - s_in_patch.set(pair.local_id_in_owner_patch(), true); + assert(pair.local_id_in_owner_patch() < s_correspondence_size); + s_correspondence[pair.local_id_in_owner_patch()] = pair.key(); } } @@ -2158,11 +2181,13 @@ __device__ __inline__ void CavityManager::populate_in_patch( b += blockThreads) { auto pair = s_stash[b]; if (pair.patch_stash_id() == q_stash) { - s_in_patch.set(pair.local_id_in_owner_patch(), true); + assert(pair.local_id_in_owner_patch() < s_correspondence_size); + s_correspondence[pair.local_id_in_owner_patch()] = pair.key(); } } } + template template __device__ __inline__ bool CavityManager::ensure_ownership( diff --git a/include/rxmesh/lp_hashtable.cuh b/include/rxmesh/lp_hashtable.cuh index 257764ed..cb1b2f99 100644 --- a/include/rxmesh/lp_hashtable.cuh +++ b/include/rxmesh/lp_hashtable.cuh @@ -56,6 +56,7 @@ struct LPHashTable __device__ __host__ LPHashTable() : m_table(nullptr), + m_stash(nullptr), m_capacity(0), m_max_cuckoo_chains(0), m_is_on_device(false) diff --git a/include/rxmesh/rxmesh_dynamic.h b/include/rxmesh/rxmesh_dynamic.h index 3835a3cf..3c67ee9a 100644 --- a/include/rxmesh/rxmesh_dynamic.h +++ b/include/rxmesh/rxmesh_dynamic.h @@ -459,23 +459,31 @@ class RXMeshDynamic : public RXMeshStatic // owned_cavity_bdry (for vertices only), ribbonize (for vertices only) // added_to_lp, in_cavity size_t bitmasks_shmem = 0; - bitmasks_shmem += 11 * detail::mask_num_bytes(vertex_cap) + - 11 * ShmemAllocator::default_alignment; - bitmasks_shmem += 8 * detail::mask_num_bytes(edge_cap) + - 8 * ShmemAllocator::default_alignment; - bitmasks_shmem += 6 * detail::mask_num_bytes(face_cap) + - 6 * ShmemAllocator::default_alignment; + bitmasks_shmem += 10 * detail::mask_num_bytes(vertex_cap) + + 10 * ShmemAllocator::default_alignment; + bitmasks_shmem += 7 * detail::mask_num_bytes(edge_cap) + + 7 * ShmemAllocator::default_alignment; + bitmasks_shmem += 5 * detail::mask_num_bytes(face_cap) + + 5 * ShmemAllocator::default_alignment; // active cavity bitmask bitmasks_shmem += detail::mask_num_bytes(face_cap / 2); + + // correspondence buffer + size_t cv = sizeof(uint16_t) * vertex_cap; + size_t ce = sizeof(uint16_t) * edge_cap; + size_t cf = sizeof(uint16_t) * face_cap; + size_t correspond_shmem = + std::max(cv + ce, ce + cf) + 2 * ShmemAllocator::default_alignment; + // shared memory is the max of 1. static query shared memory + the // cavity ID shared memory (since we need to mark seed elements) 2. // dynamic rxmesh shared memory which includes cavity ID shared memory // and other things launch_box.smem_bytes_dyn = std::max(connectivity_shmem + cavity_id_shmem + cavity_bdr_shmem + - cavity_size_shmem + bitmasks_shmem, + cavity_size_shmem + bitmasks_shmem + correspond_shmem, static_shmem + cavity_id_shmem); if (with_vertex_valence) {