speeding up add_element()

owensgroup · Jul 30, 2023 · 18819d6 · 18819d6
1 parent d3802c3
commit 18819d6
Show file tree

Hide file tree

Showing 5 changed files with 129 additions and 70 deletions.
diff --git a/include/rxmesh/cavity_manager.cuh b/include/rxmesh/cavity_manager.cuh
@@ -54,7 +54,8 @@ struct CavityManager
  */
  __device__ __inline__ CavityManager(cooperative_groups::thread_block& block,
  Context& context,
- ShmemAllocator& shrd_alloc);
+ ShmemAllocator& shrd_alloc,
+ uint32_t current_p = 0);
 
  /**
  * @brief create new cavity from a seed element. The seed element type
@@ -319,21 +320,32 @@ struct CavityManager
  * where we successfully flipped its status from inactive to active.
  * If we fail, we just atomically increment num_elements and set the
  * corresponding bit in active_bitmask. There is a special case when it
- * comes to fill in spots of in-cavity elements that are not-owned. We need
- * to leave these spots clear since we use them as a key in the hashtable
- * in order to change their ownership. If we filled them in, and added an
- * element (which are initially not-owned), we will pollute the hashtable
- * and won't be able to get the owner element in order to change their
- * ownership flag during ownership_change(). However, after
- * ownership_change, we should leave only spots that are in-cavity and
- * not-owned since we use the corresponding entries in the hashtable of
- * these spots in hashtable calibration
+ * comes to fill in spots of in-cavity elements. These elements are not
+ * really deleted (only in shared memory) and we use the fact that there are
+ * in cavity to check if there were active during migration. If we activate
+ * them, we will lose these information. For example if we have a face
+ * in-cavity that is shared with two other patches q0 and q1. During
+ * migration from q0, we may reactivate the face by flipping its bit mask
+ * but now it refers to different face with different connectivity. Next,
+ * during migration from q1, we may need to check if this face has been in p
+ * before so we don't copy. But now, its bit mask refers to a different face
+ * and we lost this info. We also need to leave these spots clear since we
+ * use them as a key in the hashtable in order to change their ownership. If
+ * we filled them in, and added an element (which are initially not-owned),
+ * we will pollute the hashtable and won't be able to get the owner element
+ * in order to change their ownership flag during ownership_change().
+ * However, after ownership_change, we should leave only spots that are
+ * in-cavity AND not-owned since we use the corresponding entries in the
+ * hashtable of these spots in hashtable calibration. After a full round
+ * i.e., after hashtable calibration, these deactivate spot can be use in
+ * subsequent iterations
  */
  __device__ __inline__ uint16_t add_element(Bitmask& active_bitmask,
  uint32_t* num_elements,
  const uint16_t capacity,
  const Bitmask& in_cavity,
  const Bitmask& owned,
+ bool avoid_in_cavity,
  bool avoid_not_owned_in_cavity);
 
  /**

diff --git a/include/rxmesh/cavity_manager_impl.cuh b/include/rxmesh/cavity_manager_impl.cuh
@@ -4,7 +4,8 @@ template <uint32_t blockThreads, CavityOp cop>
 __device__ __inline__ CavityManager<blockThreads, cop>::CavityManager(
  cooperative_groups::thread_block& block,
  Context& context,
- ShmemAllocator& shrd_alloc)
+ ShmemAllocator& shrd_alloc,
+ uint32_t current_p)
  : m_write_to_gmem(false), m_context(context)
 {
  __shared__ uint32_t s_patch_id;
@@ -225,7 +226,6 @@ CavityManager<blockThreads, cop>::alloc_shared_memory(
  // cavity boundary edges
  m_s_cavity_boundary_edges = shrd_alloc.alloc<uint16_t>(m_s_num_edges[0]);
 
-
  // lp stash
  __shared__ LPPair st_v[LPHashTable::stash_size];
  m_s_table_stash_v = st_v;
@@ -980,7 +980,9 @@ CavityManager<blockThreads, cop>::add_vertex()
  m_patch_info.vertices_capacity[0],
  m_s_in_cavity_v,
  m_s_owned_mask_v,
- false);
+ false,
+ true);
+ assert(v_id < m_patch_info.vertices_capacity[0]);
  assert(m_s_active_mask_v(v_id));
  m_s_owned_mask_v.set(v_id, true);
  return {m_patch_info.patch_id, v_id};
@@ -1000,8 +1002,9 @@ __device__ __inline__ DEdgeHandle CavityManager<blockThreads, cop>::add_edge(
  m_patch_info.edges_capacity[0],
  m_s_in_cavity_e,
  m_s_owned_mask_e,
- false);
-
+ false,
+ true);
+ assert(e_id < m_patch_info.edges_capacity[0]);
  assert(m_s_active_mask_e(e_id));
  assert(m_s_active_mask_v(src.local_id()));
  assert(m_s_active_mask_v(dest.local_id()));
@@ -1028,8 +1031,9 @@ __device__ __inline__ FaceHandle CavityManager<blockThreads, cop>::add_face(
  m_patch_info.faces_capacity[0],
  m_s_in_cavity_f,
  m_s_owned_mask_f,
- false);
-
+ false,
+ true);
+ assert(f_id < m_patch_info.faces_capacity[0]);
  assert(m_s_active_mask_f(f_id));
 
  m_s_fe[3 * f_id + 0] = e0.local_id();
@@ -1054,24 +1058,65 @@ __device__ __inline__ uint16_t CavityManager<blockThreads, cop>::add_element(
  const uint16_t capacity,
  const Bitmask& in_cavity,
  const Bitmask& owned,
+ bool avoid_in_cavity,
  bool avoid_not_owned_in_cavity)
 {
  assert(capacity == in_cavity.size());
+ assert(capacity == active_bitmask.size());
+ assert(capacity == owned.size());
 
  uint16_t found = INVALID16;
- for (uint16_t i = 0; i < capacity; ++i) {
- if (!avoid_not_owned_in_cavity && in_cavity(i) && !owned(i)) {
- continue;
- }
- if (avoid_not_owned_in_cavity && in_cavity(i) /*&& !owned(i)*/) {
- continue;
- }
- if (active_bitmask.try_set(i)) {
- found = i;
- break;
+
+ // number of 32-bit unsigned int used in the bit mask
+ const uint32_t num32 = DIVIDE_UP(capacity, 32);
+
+ for (uint32_t i = 0; i < num32 && found == INVALID16; ++i) {
+ // flip the bits so that we are not looking for an element whose bit is
+ // set
+ uint32_t mask = ~active_bitmask.m_bitmask[i];
+ // if there is at least one element that is not active in this 32
+ // elements i.e., its bit is set
+ if (mask != 0) {
+ if (avoid_not_owned_in_cavity) {
+ mask &= (~in_cavity.m_bitmask[i] | owned.m_bitmask[i]);
+ }
+
+ if (avoid_in_cavity) {
+ mask &= ~in_cavity.m_bitmask[i];
+ }
+ while (mask != 0) {
+ // find the first set bit
+ // ffs finds the position of the least significant bit set to 1
+ uint32_t first = __ffs(mask) - 1;
+
+ // now this is the element that meet all the requirements
+ uint32_t pos = 32 * i + first;
+
+ // try to set its bit
+ if (active_bitmask.try_set(pos)) {
+ found = pos;
+ break;
+ }
+ // if not successful, then we mask out this elements and try the
+ // next one in this `mask` until we turn it all to zero
+ mask &= ~(1 << first);
+ }
  }
  }
 
+ // for (uint16_t i = 0; i < capacity; ++i) {
+ // if (avoid_not_owned_in_cavity && in_cavity(i) && !owned(i)) {
+ // continue;
+ // }
+ // if (avoid_in_cavity && in_cavity(i)) {
+ // continue;
+ // }
+ // if (active_bitmask.try_set(i)) {
+ // found = i;
+ // break;
+ // }
+ //}
+
 
  if (found != INVALID16) {
  ::atomicMax(num_elements, found + 1);
@@ -1122,7 +1167,7 @@ __device__ __forceinline__ bool CavityManager<blockThreads, cop>::lock(
  if (!okay) {
  okay = m_context.m_patches_info[q].lock.acquire_lock(blockIdx.x);
  if (okay) {
- m_s_locked_patches_mask.set(stash_id, true);
+ m_s_locked_patches_mask.set(stash_id);
  }
  }
  s_success = okay;
@@ -1165,7 +1210,7 @@ __device__ __forceinline__ void CavityManager<blockThreads, cop>::unlock(
  if (threadIdx.x == 0) {
  assert(m_s_locked_patches_mask(stash_id));
  m_context.m_patches_info[q].lock.release_lock();
- m_s_locked_patches_mask.reset(stash_id, true);
+ m_s_locked_patches_mask.reset(stash_id);
  }
 }
 
@@ -1313,7 +1358,7 @@ CavityManager<blockThreads, cop>::set_ownership_change_bitmask(
  }
 }
 
-template <uint32_t blockThreads, CavityOp cop>
+/*template <uint32_t blockThreads, CavityOp cop>
 __device__ __inline__ bool CavityManager<blockThreads, cop>::migrate(
  cooperative_groups::thread_block& block)
 {
@@ -1372,7 +1417,7 @@ __device__ __inline__ bool CavityManager<blockThreads, cop>::migrate(
  }
 
  return true;
-}
+}*/
 
 
 template <uint32_t blockThreads, CavityOp cop>
@@ -1949,7 +1994,7 @@ CavityManager<blockThreads, cop>::migrate_from_patch_v2(
  return true;
 }
 
-template <uint32_t blockThreads, CavityOp cop>
+/*template <uint32_t blockThreads, CavityOp cop>
 __device__ __inline__ bool CavityManager<blockThreads, cop>::migrate_from_patch(
  cooperative_groups::thread_block& block,
  const uint8_t q_stash_id,
@@ -2307,7 +2352,7 @@ __device__ __inline__ bool CavityManager<blockThreads, cop>::migrate_from_patch(
  }
 
  return true;
-}
+}*/
 
 
 template <uint32_t blockThreads, CavityOp cop>
@@ -2338,11 +2383,13 @@ __device__ __inline__ LPPair CavityManager<blockThreads, cop>::migrate_vertex(
  m_patch_info.vertices_capacity[0],
  m_s_in_cavity_v,
  m_s_owned_mask_v,
- true);
+ true,
+ false);
  if (vp == INVALID16) {
  m_s_should_slice[0] = true;
  return ret;
  }
+ assert(vp < m_patch_info.vertices_capacity[0]);
 
  // active bitmask is set in add_element
 
@@ -2409,7 +2456,8 @@ __device__ __inline__ LPPair CavityManager<blockThreads, cop>::migrate_edge(
  m_patch_info.edges_capacity[0],
  m_s_in_cavity_e,
  m_s_owned_mask_e,
- true);
+ true,
+ false);
  if (ep == INVALID16) {
  m_s_should_slice[0] = true;
  return ret;
@@ -2426,17 +2474,6 @@ __device__ __inline__ LPPair CavityManager<blockThreads, cop>::migrate_edge(
  uint16_t v0p = find_copy_vertex(v0q, o0);
  uint16_t v1p = find_copy_vertex(v1q, o1);
 
- // assert(!m_context.m_patches_info[o0].is_deleted(
- // LocalVertexT(v0q)));
- // assert(
- // m_context.m_patches_info[o0].is_owned(LocalVertexT(v0q)));
- //
- // assert(!m_context.m_patches_info[o1].is_deleted(
- // LocalVertexT(v1q)));
- // assert(
- // m_context.m_patches_info[o1].is_owned(LocalVertexT(v1q)));
-
-
  // since any vertex in m_s_src_mask_v has been
  // added already to p, then we should find the
  // copy otherwise there is something wrong
@@ -2501,22 +2538,20 @@ __device__ __inline__ LPPair CavityManager<blockThreads, cop>::migrate_face(
  uint16_t fp = find_copy_face(fq, o);
 
 
- // assert(!m_context.m_patches_info[o].is_deleted(LocalFaceT(fq)));
- // assert(m_context.m_patches_info[o].is_owned(LocalFaceT(fq)));
-
  if (fp == INVALID16) {
  fp = add_element(m_s_active_mask_f,
  m_s_num_faces,
  m_patch_info.faces_capacity[0],
  m_s_in_cavity_f,
  m_s_owned_mask_f,
- true);
+ true,
+ false);
 
  if (fp == INVALID16) {
  m_s_should_slice[0] = true;
  return ret;
  }
- // assert(fp < m_patch_info.faces_capacity[0]);
+ assert(fp < m_patch_info.faces_capacity[0]);
 
  uint32_t o0(q), o1(q), o2(q);
 

diff --git a/include/rxmesh/context.h b/include/rxmesh/context.h
@@ -30,7 +30,10 @@ class Context
  m_edge_prefix(nullptr),
  m_face_prefix(nullptr),
  m_capacity_factor(0.0f),
- m_patches_info(nullptr)
+ m_patches_info(nullptr),
+ m_max_lp_capacity_v(0),
+ m_max_lp_capacity_e(0),
+ m_max_lp_capacity_f(0)
  {
  }
 
@@ -158,6 +161,9 @@ class Context
  uint32_t* vertex_prefix,
  uint32_t* edge_prefix,
  uint32_t* face_prefix,
+ uint16_t max_lp_capacity_v,
+ uint16_t max_lp_capacity_e,
+ uint16_t max_lp_capacity_f,
  PatchInfo* d_patches,
  PatchScheduler scheduler)
  {
@@ -203,6 +209,10 @@ class Context
  m_edge_prefix = edge_prefix;
  m_face_prefix = face_prefix;
 
+ m_max_lp_capacity_v = max_lp_capacity_v;
+ m_max_lp_capacity_e = max_lp_capacity_e;
+ m_max_lp_capacity_f = max_lp_capacity_f;
+
  m_patches_info = d_patches;
 
  m_patch_scheduler = scheduler;
@@ -214,12 +224,13 @@ class Context
  }
 
 
- uint32_t * m_num_edges, *m_num_faces, *m_num_vertices, *m_num_patches;
- uint32_t * m_max_num_vertices, *m_max_num_edges, *m_max_num_faces;
- uint32_t * m_vertex_prefix, *m_edge_prefix, *m_face_prefix;
- PatchInfo* m_patches_info;
- float m_capacity_factor;
- uint32_t m_max_num_patches;
+ uint32_t * m_num_edges, *m_num_faces, *m_num_vertices, *m_num_patches;
+ uint32_t * m_max_num_vertices, *m_max_num_edges, *m_max_num_faces;
+ uint32_t * m_vertex_prefix, *m_edge_prefix, *m_face_prefix;
+ uint16_t m_max_lp_capacity_v, m_max_lp_capacity_e, m_max_lp_capacity_f;
+ PatchInfo* m_patches_info;
+ float m_capacity_factor;
+ uint32_t m_max_num_patches;
  PatchScheduler m_patch_scheduler;
 };
 } // namespace rxmesh
diff --git a/include/rxmesh/patcher/patcher.cu b/include/rxmesh/patcher/patcher.cu
@@ -23,6 +23,7 @@ namespace patcher {
 
 Patcher::Patcher(std::string filename)
 {
+ RXMESH_TRACE("Patcher: Reading {}", filename);
  std::ifstream is(filename, std::ios::binary);
  cereal::PortableBinaryInputArchive archive(is);
  archive(*this);