store q hash table in shmem

comment out for now
owensgroup · Aug 15, 2023 · deb9d44 · deb9d44
1 parent 05963a6
commit deb9d44
Show file tree

Hide file tree

Showing 7 changed files with 76 additions and 99 deletions.
diff --git a/include/rxmesh/cavity_manager.cuh b/include/rxmesh/cavity_manager.cuh
@@ -496,18 +496,6 @@ struct CavityManager
  * the lid lives in src_patch and we want to find the corresponding local
  * index in dest_patch
  */
- template <typename HandleT>
- __device__ __inline__ uint16_t find_copy(
- uint16_t& lid,
- uint32_t& src_patch,
- const uint16_t dest_patch_num_elements,
- const Bitmask& dest_patch_owned_mask,
- const Bitmask& dest_patch_active_mask,
- const Bitmask& dest_in_cavity,
- const LPPair* s_table,
- const LPPair* s_stash);
-
-
  template <typename HandleT>
  __device__ __inline__ uint16_t find_copy(
  uint16_t& lid,
@@ -690,6 +678,10 @@ struct CavityManager
 
  bool* m_s_should_slice;
  ShmemMutex m_s_patch_stash_mutex;
+
+ //LPPair* m_s_table_q;
+ //LPPair* m_s_table_stash_q;
+ //uint32_t m_s_table_q_size; 
 };
 
 } // namespace rxmesh

diff --git a/include/rxmesh/cavity_manager_impl.cuh b/include/rxmesh/cavity_manager_impl.cuh
@@ -228,6 +228,17 @@ CavityManager<blockThreads, cop>::alloc_shared_memory(
  // cavity boundary edges
  m_s_cavity_boundary_edges = shrd_alloc.alloc<uint16_t>(m_s_num_edges[0]);
 
+ // q hash table
+ // m_s_table_q_size = std::max(
+ // std::max(m_context.m_max_lp_capacity_v,
+ // m_context.m_max_lp_capacity_e), m_context.m_max_lp_capacity_f);
+ // m_s_table_q = shrd_alloc.alloc<LPPair>(m_s_table_q_size);
+
+ //__shared__ LPPair st_q[LPHashTable::stash_size];
+ //m_s_table_stash_q = st_q;
+ //fill_n<blockThreads>(
+ // m_s_table_stash_q, uint16_t(LPHashTable::stash_size), LPPair());
+
  // lp stash
  __shared__ LPPair st_v[LPHashTable::stash_size];
  m_s_table_stash_v = st_v;
@@ -1681,6 +1692,12 @@ CavityManager<blockThreads, cop>::soft_migrate_from_patch(
  m_correspondence_size_vf,
  m_s_table_v,
  m_s_table_stash_v);
+
+ // assert(m_s_table_q_size >=
+ // m_context.m_patches_info[q].lp_v.get_capacity());
+ // m_context.m_patches_info[q].lp_v.load_in_shared_memory(
+ // m_s_table_q, true, m_s_table_stash_q);
+
  block.sync();
 
  // make sure there is a copy in p for any vertex in
@@ -1842,6 +1859,12 @@ __device__ __inline__ bool CavityManager<blockThreads, cop>::migrate_from_patch(
  m_correspondence_size_vf,
  m_s_table_v,
  m_s_table_stash_v);
+
+ // assert(m_s_table_q_size >=
+ // m_context.m_patches_info[q].lp_v.get_capacity());
+ // m_context.m_patches_info[q].lp_v.load_in_shared_memory(
+ // m_s_table_q, true, m_s_table_stash_q);
+
  block.sync();
 
  // 3. make sure there is a copy in p for any vertex in
@@ -1895,6 +1918,11 @@ __device__ __inline__ bool CavityManager<blockThreads, cop>::migrate_from_patch(
  m_correspondence_size_e,
  m_s_table_e,
  m_s_table_stash_e);
+ // assert(m_s_table_q_size >=
+ // m_context.m_patches_info[q].lp_e.get_capacity());
+ // m_context.m_patches_info[q].lp_e.load_in_shared_memory(
+ // m_s_table_q, true, m_s_table_stash_q);
+
  block.sync();
 
  // same story as with the loop that adds vertices
@@ -2027,6 +2055,12 @@ __device__ __inline__ bool CavityManager<blockThreads, cop>::migrate_from_patch(
  m_correspondence_size_vf,
  m_s_table_f,
  m_s_table_stash_f);
+
+ // assert(m_s_table_q_size >=
+ // m_context.m_patches_info[q].lp_f.get_capacity());
+ // m_context.m_patches_info[q].lp_f.load_in_shared_memory(
+ // m_s_table_q, true, m_s_table_stash_q);
+
  block.sync();
 
  // same story as with the loop that adds vertices
@@ -2403,77 +2437,6 @@ __device__ __inline__ uint16_t CavityManager<blockThreads, cop>::find_copy_face(
 }
 
 
-template <uint32_t blockThreads, CavityOp cop>
-template <typename HandleT>
-__device__ __inline__ uint16_t CavityManager<blockThreads, cop>::find_copy(
- uint16_t& lid,
- uint32_t& src_patch,
- const uint16_t dest_patch_num_elements,
- const Bitmask& dest_patch_owned_mask,
- const Bitmask& dest_patch_active_mask,
- const Bitmask& dest_in_cavity,
- const LPPair* s_table,
- const LPPair* s_stash)
-{
-
- assert(
- !m_context.m_patches_info[src_patch].is_deleted(HandleT::LocalT(lid)));
-
- // First check if lid is owned by src_patch. If not, then map it to its
- // owner patch and local index in it
-
- if (!m_context.m_patches_info[src_patch].is_owned(HandleT::LocalT(lid))) {
- HandleT owner =
- m_context.m_patches_info[src_patch].find<HandleT>({lid});
- src_patch = owner.patch_id();
- lid = owner.local_id();
- } else {
- if constexpr (std::is_same_v<HandleT, EdgeHandle>) {
- assert(lid < m_correspondence_size_e);
- return m_s_q_correspondence_e[lid];
- } else {
- assert(lid < m_correspondence_size_vf);
- return m_s_q_correspondence_vf[lid];
- }
- }
-
- // if the owner src_patch is the same as the patch associated with this
- // cavity, the lid is the local index we are looking for
- if (src_patch == m_patch_info.patch_id) {
- return lid;
- }
-
- // otherwise, we do a search over the not-owned elements in the dest
- // patch. For every not-owned element, we map it to its owner patch and
- // check against lid-src_patch pair
- for (uint16_t i = 0; i < dest_patch_num_elements; ++i) {
- assert(i < dest_patch_owned_mask.size());
- assert(i < dest_patch_active_mask.size());
- assert(i < dest_in_cavity.size());
- if (!dest_patch_owned_mask(i) &&
- (dest_patch_active_mask(i) || dest_in_cavity(i))) {
-
- const HandleT handle = m_patch_info.find<HandleT>(
- i, s_table, s_stash, m_s_patch_stash);
-
- // These assertion does not work any more since we change the
- // active and owned mask when we add new elements. So, a thread
- // A might set the bit for the active mask and reset the owned
- // for element X before adding it to the hashtable leading to
- // another thread B looking for it without finding it
-
- // assert(handle.is_valid());
- // assert(handle.patch_id() != INVALID32);
- // assert(handle.local_id() != INVALID16);
-
- if (handle.patch_id() == src_patch && handle.local_id() == lid) {
- return i;
- }
- }
- }
- return INVALID16;
-}
-
 template <uint32_t blockThreads, CavityOp cop>
 template <typename HandleT>
 __device__ __inline__ uint16_t CavityManager<blockThreads, cop>::find_copy(
@@ -2507,7 +2470,11 @@ __device__ __inline__ uint16_t CavityManager<blockThreads, cop>::find_copy(
  const uint16_t lid_in(lid);
  HandleT owner;
  if (!m_context.m_patches_info[src_patch].is_owned(HandleT::LocalT(lid))) {
- owner = m_context.m_patches_info[src_patch].find<HandleT>({lid});
+ owner = m_context.m_patches_info[src_patch].find<HandleT>(
+ {lid} /*, m_s_table_q, m_s_table_stash_q */);
+
+ assert(owner.is_valid());
+
  // if the owner src_patch is the same as the patch associated with this
  // cavity, the lid is the local index we are looking for
  src_patch = owner.patch_id();

diff --git a/include/rxmesh/patch_info.h b/include/rxmesh/patch_info.h
@@ -48,11 +48,11 @@ struct ALIGN(16) PatchInfo
  faces_capacity(nullptr),
  patch_id(INVALID32){};
 
- __device__ __host__ PatchInfo(const PatchInfo& other) = default;
- __device__ __host__ PatchInfo(PatchInfo&&) = default;
- __device__ __host__ PatchInfo& operator=(const PatchInfo&) = default;
- __device__ __host__ PatchInfo& operator=(PatchInfo&&) = default;
- __device__ __host__ ~PatchInfo() = default;
+ __device__ __host__  PatchInfo(const PatchInfo& other) = default;
+ __device__ __host__  PatchInfo(PatchInfo&&) = default;
+ __device__ __host__ PatchInfo& operator=(const PatchInfo&)  = default;
+ __device__ __host__ PatchInfo& operator=(PatchInfo&&)  = default;
+ __device__ __host__ ~PatchInfo()  = default;
 
  // The topology information: edge incident vertices and face incident edges
  LocalVertexT* ev;
@@ -145,17 +145,20 @@ struct ALIGN(16) PatchInfo
  }
 
  template <typename HandleT>
- __device__ __host__ __inline__ HandleT find(const LPPair::KeyT key) const
+ __device__ __host__ __inline__ HandleT find(
+ const LPPair::KeyT key,
+ const LPPair* table = nullptr,
+ const LPPair* stash = nullptr) const
  {
  LPPair lp;
  if constexpr (std::is_same_v<HandleT, VertexHandle>) {
- lp = lp_v.find(key, nullptr, nullptr);
+ lp = lp_v.find(key, table, stash);
  }
  if constexpr (std::is_same_v<HandleT, EdgeHandle>) {
- lp = lp_e.find(key, nullptr, nullptr);
+ lp = lp_e.find(key, table, stash);
  }
  if constexpr (std::is_same_v<HandleT, FaceHandle>) {
- lp = lp_f.find(key, nullptr, nullptr);
+ lp = lp_f.find(key, table, stash);
  }
 
  // assert(!lp.is_sentinel());

diff --git a/include/rxmesh/patch_scheduler.cuh b/include/rxmesh/patch_scheduler.cuh
@@ -2,7 +2,7 @@
 
 // for debugging, this macro let the scheduler only generate one valid patch
 // (corresponding to the blockIdx.x)
-//#define PROCESS_SINGLE_PATCH
+// #define PROCESS_SINGLE_PATCH
 
 // inpsired/taken from
 // https://github.com/GPUPeople/Ouroboros/blob/9153c55abffb3bceb5aea4028dfcc00439b046d5/include/device/queues/Queue.h

diff --git a/include/rxmesh/rxmesh.cpp b/include/rxmesh/rxmesh.cpp
@@ -791,7 +791,8 @@ void RXMesh::build_device()
  m_d_patches_info[p]);
  }
 
- for (int p = 0; p < static_cast<int>(get_num_patches()); ++p) {
+
+ for (uint32_t p = 0; p < get_num_patches(); ++p) {
  m_max_capacity_lp_v = std::max(m_max_capacity_lp_v,
  m_h_patches_info[p].lp_v.get_capacity());
 
@@ -1175,5 +1176,17 @@ void RXMesh::allocate_extra_patches()
  m_h_patches_info[p],
  m_d_patches_info[p]);
  }
+
+
+ for (uint32_t p = get_num_patches(); p < get_max_num_patches(); ++p) {
+ m_max_capacity_lp_v = std::max(m_max_capacity_lp_v,
+ m_h_patches_info[p].lp_v.get_capacity());
+
+ m_max_capacity_lp_e = std::max(m_max_capacity_lp_e,
+ m_h_patches_info[p].lp_e.get_capacity());
+
+ m_max_capacity_lp_f = std::max(m_max_capacity_lp_f,
+ m_h_patches_info[p].lp_f.get_capacity());
+ }
 }
 } // namespace rxmesh
diff --git a/include/rxmesh/rxmesh_dynamic.h b/include/rxmesh/rxmesh_dynamic.h
@@ -453,6 +453,12 @@ class RXMeshDynamic : public RXMeshStatic
  cavity_size_shmem += (this->m_max_faces_per_patch / 2) * sizeof(int) +
  ShmemAllocator::default_alignment;
 
+ size_t q_lp_shmem = std::max(max_lp_hashtable_capacity<LocalVertexT>(),
+ max_lp_hashtable_capacity<LocalEdgeT>());
+
+ q_lp_shmem = std::max(q_lp_shmem,
+ size_t(max_lp_hashtable_capacity<LocalFaceT>())) *
+ sizeof(LPPair);
 
  // active, owned, migrate(for vertices only), src bitmask (for vertices
  // and edges only), src connect (for vertices and edges only), ownership

diff --git a/include/rxmesh/util/macros.h b/include/rxmesh/util/macros.h
@@ -43,11 +43,7 @@ inline void HandleError(cudaError_t err, const char* file, int line)
  if (err != cudaSuccess) {
  Log::get_logger()->error("Line {} File {}", line, file);
  Log::get_logger()->error("CUDA ERROR: {}", cudaGetErrorString(err));
-#ifdef _WIN32
- system("pause");
-#else
  exit(EXIT_FAILURE);
-#endif
  }
 }
 #define CUDA_ERROR(err) (HandleError(err, __FILE__, __LINE__))