diff --git a/include/rxmesh/cavity_manager.cuh b/include/rxmesh/cavity_manager.cuh index d64bd6de..94e60828 100644 --- a/include/rxmesh/cavity_manager.cuh +++ b/include/rxmesh/cavity_manager.cuh @@ -496,18 +496,6 @@ struct CavityManager * the lid lives in src_patch and we want to find the corresponding local * index in dest_patch */ - template - __device__ __inline__ uint16_t find_copy( - uint16_t& lid, - uint32_t& src_patch, - const uint16_t dest_patch_num_elements, - const Bitmask& dest_patch_owned_mask, - const Bitmask& dest_patch_active_mask, - const Bitmask& dest_in_cavity, - const LPPair* s_table, - const LPPair* s_stash); - - template __device__ __inline__ uint16_t find_copy( uint16_t& lid, @@ -690,6 +678,10 @@ struct CavityManager bool* m_s_should_slice; ShmemMutex m_s_patch_stash_mutex; + + //LPPair* m_s_table_q; + //LPPair* m_s_table_stash_q; + //uint32_t m_s_table_q_size; }; } // namespace rxmesh diff --git a/include/rxmesh/cavity_manager_impl.cuh b/include/rxmesh/cavity_manager_impl.cuh index 735fe565..3f585210 100644 --- a/include/rxmesh/cavity_manager_impl.cuh +++ b/include/rxmesh/cavity_manager_impl.cuh @@ -228,6 +228,17 @@ CavityManager::alloc_shared_memory( // cavity boundary edges m_s_cavity_boundary_edges = shrd_alloc.alloc(m_s_num_edges[0]); + // q hash table + // m_s_table_q_size = std::max( + // std::max(m_context.m_max_lp_capacity_v, + // m_context.m_max_lp_capacity_e), m_context.m_max_lp_capacity_f); + // m_s_table_q = shrd_alloc.alloc(m_s_table_q_size); + + //__shared__ LPPair st_q[LPHashTable::stash_size]; + //m_s_table_stash_q = st_q; + //fill_n( + // m_s_table_stash_q, uint16_t(LPHashTable::stash_size), LPPair()); + // lp stash __shared__ LPPair st_v[LPHashTable::stash_size]; m_s_table_stash_v = st_v; @@ -1681,6 +1692,12 @@ CavityManager::soft_migrate_from_patch( m_correspondence_size_vf, m_s_table_v, m_s_table_stash_v); + + // assert(m_s_table_q_size >= + // m_context.m_patches_info[q].lp_v.get_capacity()); + // m_context.m_patches_info[q].lp_v.load_in_shared_memory( + // m_s_table_q, true, m_s_table_stash_q); + block.sync(); // make sure there is a copy in p for any vertex in @@ -1842,6 +1859,12 @@ __device__ __inline__ bool CavityManager::migrate_from_patch( m_correspondence_size_vf, m_s_table_v, m_s_table_stash_v); + + // assert(m_s_table_q_size >= + // m_context.m_patches_info[q].lp_v.get_capacity()); + // m_context.m_patches_info[q].lp_v.load_in_shared_memory( + // m_s_table_q, true, m_s_table_stash_q); + block.sync(); // 3. make sure there is a copy in p for any vertex in @@ -1895,6 +1918,11 @@ __device__ __inline__ bool CavityManager::migrate_from_patch( m_correspondence_size_e, m_s_table_e, m_s_table_stash_e); + // assert(m_s_table_q_size >= + // m_context.m_patches_info[q].lp_e.get_capacity()); + // m_context.m_patches_info[q].lp_e.load_in_shared_memory( + // m_s_table_q, true, m_s_table_stash_q); + block.sync(); // same story as with the loop that adds vertices @@ -2027,6 +2055,12 @@ __device__ __inline__ bool CavityManager::migrate_from_patch( m_correspondence_size_vf, m_s_table_f, m_s_table_stash_f); + + // assert(m_s_table_q_size >= + // m_context.m_patches_info[q].lp_f.get_capacity()); + // m_context.m_patches_info[q].lp_f.load_in_shared_memory( + // m_s_table_q, true, m_s_table_stash_q); + block.sync(); // same story as with the loop that adds vertices @@ -2403,77 +2437,6 @@ __device__ __inline__ uint16_t CavityManager::find_copy_face( } -template -template -__device__ __inline__ uint16_t CavityManager::find_copy( - uint16_t& lid, - uint32_t& src_patch, - const uint16_t dest_patch_num_elements, - const Bitmask& dest_patch_owned_mask, - const Bitmask& dest_patch_active_mask, - const Bitmask& dest_in_cavity, - const LPPair* s_table, - const LPPair* s_stash) -{ - - assert( - !m_context.m_patches_info[src_patch].is_deleted(HandleT::LocalT(lid))); - - // First check if lid is owned by src_patch. If not, then map it to its - // owner patch and local index in it - - if (!m_context.m_patches_info[src_patch].is_owned(HandleT::LocalT(lid))) { - HandleT owner = - m_context.m_patches_info[src_patch].find({lid}); - src_patch = owner.patch_id(); - lid = owner.local_id(); - } else { - if constexpr (std::is_same_v) { - assert(lid < m_correspondence_size_e); - return m_s_q_correspondence_e[lid]; - } else { - assert(lid < m_correspondence_size_vf); - return m_s_q_correspondence_vf[lid]; - } - } - - // if the owner src_patch is the same as the patch associated with this - // cavity, the lid is the local index we are looking for - if (src_patch == m_patch_info.patch_id) { - return lid; - } - - // otherwise, we do a search over the not-owned elements in the dest - // patch. For every not-owned element, we map it to its owner patch and - // check against lid-src_patch pair - for (uint16_t i = 0; i < dest_patch_num_elements; ++i) { - assert(i < dest_patch_owned_mask.size()); - assert(i < dest_patch_active_mask.size()); - assert(i < dest_in_cavity.size()); - if (!dest_patch_owned_mask(i) && - (dest_patch_active_mask(i) || dest_in_cavity(i))) { - - const HandleT handle = m_patch_info.find( - i, s_table, s_stash, m_s_patch_stash); - - // These assertion does not work any more since we change the - // active and owned mask when we add new elements. So, a thread - // A might set the bit for the active mask and reset the owned - // for element X before adding it to the hashtable leading to - // another thread B looking for it without finding it - - // assert(handle.is_valid()); - // assert(handle.patch_id() != INVALID32); - // assert(handle.local_id() != INVALID16); - - if (handle.patch_id() == src_patch && handle.local_id() == lid) { - return i; - } - } - } - return INVALID16; -} - template template __device__ __inline__ uint16_t CavityManager::find_copy( @@ -2507,7 +2470,11 @@ __device__ __inline__ uint16_t CavityManager::find_copy( const uint16_t lid_in(lid); HandleT owner; if (!m_context.m_patches_info[src_patch].is_owned(HandleT::LocalT(lid))) { - owner = m_context.m_patches_info[src_patch].find({lid}); + owner = m_context.m_patches_info[src_patch].find( + {lid} /*, m_s_table_q, m_s_table_stash_q */); + + assert(owner.is_valid()); + // if the owner src_patch is the same as the patch associated with this // cavity, the lid is the local index we are looking for src_patch = owner.patch_id(); diff --git a/include/rxmesh/patch_info.h b/include/rxmesh/patch_info.h index 1167e68e..0f4ea0f2 100644 --- a/include/rxmesh/patch_info.h +++ b/include/rxmesh/patch_info.h @@ -48,11 +48,11 @@ struct ALIGN(16) PatchInfo faces_capacity(nullptr), patch_id(INVALID32){}; - __device__ __host__ PatchInfo(const PatchInfo& other) = default; - __device__ __host__ PatchInfo(PatchInfo&&) = default; - __device__ __host__ PatchInfo& operator=(const PatchInfo&) = default; - __device__ __host__ PatchInfo& operator=(PatchInfo&&) = default; - __device__ __host__ ~PatchInfo() = default; + __device__ __host__ PatchInfo(const PatchInfo& other) = default; + __device__ __host__ PatchInfo(PatchInfo&&) = default; + __device__ __host__ PatchInfo& operator=(const PatchInfo&) = default; + __device__ __host__ PatchInfo& operator=(PatchInfo&&) = default; + __device__ __host__ ~PatchInfo() = default; // The topology information: edge incident vertices and face incident edges LocalVertexT* ev; @@ -145,17 +145,20 @@ struct ALIGN(16) PatchInfo } template - __device__ __host__ __inline__ HandleT find(const LPPair::KeyT key) const + __device__ __host__ __inline__ HandleT find( + const LPPair::KeyT key, + const LPPair* table = nullptr, + const LPPair* stash = nullptr) const { LPPair lp; if constexpr (std::is_same_v) { - lp = lp_v.find(key, nullptr, nullptr); + lp = lp_v.find(key, table, stash); } if constexpr (std::is_same_v) { - lp = lp_e.find(key, nullptr, nullptr); + lp = lp_e.find(key, table, stash); } if constexpr (std::is_same_v) { - lp = lp_f.find(key, nullptr, nullptr); + lp = lp_f.find(key, table, stash); } // assert(!lp.is_sentinel()); diff --git a/include/rxmesh/patch_scheduler.cuh b/include/rxmesh/patch_scheduler.cuh index 8413ebf0..8d1f35e9 100644 --- a/include/rxmesh/patch_scheduler.cuh +++ b/include/rxmesh/patch_scheduler.cuh @@ -2,7 +2,7 @@ // for debugging, this macro let the scheduler only generate one valid patch // (corresponding to the blockIdx.x) -//#define PROCESS_SINGLE_PATCH +// #define PROCESS_SINGLE_PATCH // inpsired/taken from // https://github.com/GPUPeople/Ouroboros/blob/9153c55abffb3bceb5aea4028dfcc00439b046d5/include/device/queues/Queue.h diff --git a/include/rxmesh/rxmesh.cpp b/include/rxmesh/rxmesh.cpp index 4d034e51..13988fd8 100644 --- a/include/rxmesh/rxmesh.cpp +++ b/include/rxmesh/rxmesh.cpp @@ -791,7 +791,8 @@ void RXMesh::build_device() m_d_patches_info[p]); } - for (int p = 0; p < static_cast(get_num_patches()); ++p) { + + for (uint32_t p = 0; p < get_num_patches(); ++p) { m_max_capacity_lp_v = std::max(m_max_capacity_lp_v, m_h_patches_info[p].lp_v.get_capacity()); @@ -1175,5 +1176,17 @@ void RXMesh::allocate_extra_patches() m_h_patches_info[p], m_d_patches_info[p]); } + + + for (uint32_t p = get_num_patches(); p < get_max_num_patches(); ++p) { + m_max_capacity_lp_v = std::max(m_max_capacity_lp_v, + m_h_patches_info[p].lp_v.get_capacity()); + + m_max_capacity_lp_e = std::max(m_max_capacity_lp_e, + m_h_patches_info[p].lp_e.get_capacity()); + + m_max_capacity_lp_f = std::max(m_max_capacity_lp_f, + m_h_patches_info[p].lp_f.get_capacity()); + } } } // namespace rxmesh diff --git a/include/rxmesh/rxmesh_dynamic.h b/include/rxmesh/rxmesh_dynamic.h index ae11596e..1e1e6620 100644 --- a/include/rxmesh/rxmesh_dynamic.h +++ b/include/rxmesh/rxmesh_dynamic.h @@ -453,6 +453,12 @@ class RXMeshDynamic : public RXMeshStatic cavity_size_shmem += (this->m_max_faces_per_patch / 2) * sizeof(int) + ShmemAllocator::default_alignment; + size_t q_lp_shmem = std::max(max_lp_hashtable_capacity(), + max_lp_hashtable_capacity()); + + q_lp_shmem = std::max(q_lp_shmem, + size_t(max_lp_hashtable_capacity())) * + sizeof(LPPair); // active, owned, migrate(for vertices only), src bitmask (for vertices // and edges only), src connect (for vertices and edges only), ownership diff --git a/include/rxmesh/util/macros.h b/include/rxmesh/util/macros.h index 4f48999c..8cf0e34f 100644 --- a/include/rxmesh/util/macros.h +++ b/include/rxmesh/util/macros.h @@ -43,11 +43,7 @@ inline void HandleError(cudaError_t err, const char* file, int line) if (err != cudaSuccess) { Log::get_logger()->error("Line {} File {}", line, file); Log::get_logger()->error("CUDA ERROR: {}", cudaGetErrorString(err)); -#ifdef _WIN32 - system("pause"); -#else exit(EXIT_FAILURE); -#endif } } #define CUDA_ERROR(err) (HandleError(err, __FILE__, __LINE__))