Skip to content

Commit

Permalink
speeding up add_element()
Browse files Browse the repository at this point in the history
  • Loading branch information
Ahdhn committed Jul 30, 2023
1 parent d3802c3 commit 18819d6
Show file tree
Hide file tree
Showing 5 changed files with 129 additions and 70 deletions.
32 changes: 22 additions & 10 deletions include/rxmesh/cavity_manager.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,8 @@ struct CavityManager
*/
__device__ __inline__ CavityManager(cooperative_groups::thread_block& block,
Context& context,
ShmemAllocator& shrd_alloc);
ShmemAllocator& shrd_alloc,
uint32_t current_p = 0);

/**
* @brief create new cavity from a seed element. The seed element type
Expand Down Expand Up @@ -319,21 +320,32 @@ struct CavityManager
* where we successfully flipped its status from inactive to active.
* If we fail, we just atomically increment num_elements and set the
* corresponding bit in active_bitmask. There is a special case when it
* comes to fill in spots of in-cavity elements that are not-owned. We need
* to leave these spots clear since we use them as a key in the hashtable
* in order to change their ownership. If we filled them in, and added an
* element (which are initially not-owned), we will pollute the hashtable
* and won't be able to get the owner element in order to change their
* ownership flag during ownership_change(). However, after
* ownership_change, we should leave only spots that are in-cavity and
* not-owned since we use the corresponding entries in the hashtable of
* these spots in hashtable calibration
* comes to fill in spots of in-cavity elements. These elements are not
* really deleted (only in shared memory) and we use the fact that there are
* in cavity to check if there were active during migration. If we activate
* them, we will lose these information. For example if we have a face
* in-cavity that is shared with two other patches q0 and q1. During
* migration from q0, we may reactivate the face by flipping its bit mask
* but now it refers to different face with different connectivity. Next,
* during migration from q1, we may need to check if this face has been in p
* before so we don't copy. But now, its bit mask refers to a different face
* and we lost this info. We also need to leave these spots clear since we
* use them as a key in the hashtable in order to change their ownership. If
* we filled them in, and added an element (which are initially not-owned),
* we will pollute the hashtable and won't be able to get the owner element
* in order to change their ownership flag during ownership_change().
* However, after ownership_change, we should leave only spots that are
* in-cavity AND not-owned since we use the corresponding entries in the
* hashtable of these spots in hashtable calibration. After a full round
* i.e., after hashtable calibration, these deactivate spot can be use in
* subsequent iterations
*/
__device__ __inline__ uint16_t add_element(Bitmask& active_bitmask,
uint32_t* num_elements,
const uint16_t capacity,
const Bitmask& in_cavity,
const Bitmask& owned,
bool avoid_in_cavity,
bool avoid_not_owned_in_cavity);

/**
Expand Down
117 changes: 76 additions & 41 deletions include/rxmesh/cavity_manager_impl.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@ template <uint32_t blockThreads, CavityOp cop>
__device__ __inline__ CavityManager<blockThreads, cop>::CavityManager(
cooperative_groups::thread_block& block,
Context& context,
ShmemAllocator& shrd_alloc)
ShmemAllocator& shrd_alloc,
uint32_t current_p)
: m_write_to_gmem(false), m_context(context)
{
__shared__ uint32_t s_patch_id;
Expand Down Expand Up @@ -225,7 +226,6 @@ CavityManager<blockThreads, cop>::alloc_shared_memory(
// cavity boundary edges
m_s_cavity_boundary_edges = shrd_alloc.alloc<uint16_t>(m_s_num_edges[0]);


// lp stash
__shared__ LPPair st_v[LPHashTable::stash_size];
m_s_table_stash_v = st_v;
Expand Down Expand Up @@ -980,7 +980,9 @@ CavityManager<blockThreads, cop>::add_vertex()
m_patch_info.vertices_capacity[0],
m_s_in_cavity_v,
m_s_owned_mask_v,
false);
false,
true);
assert(v_id < m_patch_info.vertices_capacity[0]);
assert(m_s_active_mask_v(v_id));
m_s_owned_mask_v.set(v_id, true);
return {m_patch_info.patch_id, v_id};
Expand All @@ -1000,8 +1002,9 @@ __device__ __inline__ DEdgeHandle CavityManager<blockThreads, cop>::add_edge(
m_patch_info.edges_capacity[0],
m_s_in_cavity_e,
m_s_owned_mask_e,
false);

false,
true);
assert(e_id < m_patch_info.edges_capacity[0]);
assert(m_s_active_mask_e(e_id));
assert(m_s_active_mask_v(src.local_id()));
assert(m_s_active_mask_v(dest.local_id()));
Expand All @@ -1028,8 +1031,9 @@ __device__ __inline__ FaceHandle CavityManager<blockThreads, cop>::add_face(
m_patch_info.faces_capacity[0],
m_s_in_cavity_f,
m_s_owned_mask_f,
false);

false,
true);
assert(f_id < m_patch_info.faces_capacity[0]);
assert(m_s_active_mask_f(f_id));

m_s_fe[3 * f_id + 0] = e0.local_id();
Expand All @@ -1054,24 +1058,65 @@ __device__ __inline__ uint16_t CavityManager<blockThreads, cop>::add_element(
const uint16_t capacity,
const Bitmask& in_cavity,
const Bitmask& owned,
bool avoid_in_cavity,
bool avoid_not_owned_in_cavity)
{
assert(capacity == in_cavity.size());
assert(capacity == active_bitmask.size());
assert(capacity == owned.size());

uint16_t found = INVALID16;
for (uint16_t i = 0; i < capacity; ++i) {
if (!avoid_not_owned_in_cavity && in_cavity(i) && !owned(i)) {
continue;
}
if (avoid_not_owned_in_cavity && in_cavity(i) /*&& !owned(i)*/) {
continue;
}
if (active_bitmask.try_set(i)) {
found = i;
break;

// number of 32-bit unsigned int used in the bit mask
const uint32_t num32 = DIVIDE_UP(capacity, 32);

for (uint32_t i = 0; i < num32 && found == INVALID16; ++i) {
// flip the bits so that we are not looking for an element whose bit is
// set
uint32_t mask = ~active_bitmask.m_bitmask[i];
// if there is at least one element that is not active in this 32
// elements i.e., its bit is set
if (mask != 0) {
if (avoid_not_owned_in_cavity) {
mask &= (~in_cavity.m_bitmask[i] | owned.m_bitmask[i]);
}

if (avoid_in_cavity) {
mask &= ~in_cavity.m_bitmask[i];
}
while (mask != 0) {
// find the first set bit
// ffs finds the position of the least significant bit set to 1
uint32_t first = __ffs(mask) - 1;

// now this is the element that meet all the requirements
uint32_t pos = 32 * i + first;

// try to set its bit
if (active_bitmask.try_set(pos)) {
found = pos;
break;
}
// if not successful, then we mask out this elements and try the
// next one in this `mask` until we turn it all to zero
mask &= ~(1 << first);
}
}
}

// for (uint16_t i = 0; i < capacity; ++i) {
// if (avoid_not_owned_in_cavity && in_cavity(i) && !owned(i)) {
// continue;
// }
// if (avoid_in_cavity && in_cavity(i)) {
// continue;
// }
// if (active_bitmask.try_set(i)) {
// found = i;
// break;
// }
//}


if (found != INVALID16) {
::atomicMax(num_elements, found + 1);
Expand Down Expand Up @@ -1122,7 +1167,7 @@ __device__ __forceinline__ bool CavityManager<blockThreads, cop>::lock(
if (!okay) {
okay = m_context.m_patches_info[q].lock.acquire_lock(blockIdx.x);
if (okay) {
m_s_locked_patches_mask.set(stash_id, true);
m_s_locked_patches_mask.set(stash_id);
}
}
s_success = okay;
Expand Down Expand Up @@ -1165,7 +1210,7 @@ __device__ __forceinline__ void CavityManager<blockThreads, cop>::unlock(
if (threadIdx.x == 0) {
assert(m_s_locked_patches_mask(stash_id));
m_context.m_patches_info[q].lock.release_lock();
m_s_locked_patches_mask.reset(stash_id, true);
m_s_locked_patches_mask.reset(stash_id);
}
}

Expand Down Expand Up @@ -1313,7 +1358,7 @@ CavityManager<blockThreads, cop>::set_ownership_change_bitmask(
}
}

template <uint32_t blockThreads, CavityOp cop>
/*template <uint32_t blockThreads, CavityOp cop>
__device__ __inline__ bool CavityManager<blockThreads, cop>::migrate(
cooperative_groups::thread_block& block)
{
Expand Down Expand Up @@ -1372,7 +1417,7 @@ __device__ __inline__ bool CavityManager<blockThreads, cop>::migrate(
}
return true;
}
}*/


template <uint32_t blockThreads, CavityOp cop>
Expand Down Expand Up @@ -1949,7 +1994,7 @@ CavityManager<blockThreads, cop>::migrate_from_patch_v2(
return true;
}

template <uint32_t blockThreads, CavityOp cop>
/*template <uint32_t blockThreads, CavityOp cop>
__device__ __inline__ bool CavityManager<blockThreads, cop>::migrate_from_patch(
cooperative_groups::thread_block& block,
const uint8_t q_stash_id,
Expand Down Expand Up @@ -2307,7 +2352,7 @@ __device__ __inline__ bool CavityManager<blockThreads, cop>::migrate_from_patch(
}
return true;
}
}*/


template <uint32_t blockThreads, CavityOp cop>
Expand Down Expand Up @@ -2338,11 +2383,13 @@ __device__ __inline__ LPPair CavityManager<blockThreads, cop>::migrate_vertex(
m_patch_info.vertices_capacity[0],
m_s_in_cavity_v,
m_s_owned_mask_v,
true);
true,
false);
if (vp == INVALID16) {
m_s_should_slice[0] = true;
return ret;
}
assert(vp < m_patch_info.vertices_capacity[0]);

// active bitmask is set in add_element

Expand Down Expand Up @@ -2409,7 +2456,8 @@ __device__ __inline__ LPPair CavityManager<blockThreads, cop>::migrate_edge(
m_patch_info.edges_capacity[0],
m_s_in_cavity_e,
m_s_owned_mask_e,
true);
true,
false);
if (ep == INVALID16) {
m_s_should_slice[0] = true;
return ret;
Expand All @@ -2426,17 +2474,6 @@ __device__ __inline__ LPPair CavityManager<blockThreads, cop>::migrate_edge(
uint16_t v0p = find_copy_vertex(v0q, o0);
uint16_t v1p = find_copy_vertex(v1q, o1);

// assert(!m_context.m_patches_info[o0].is_deleted(
// LocalVertexT(v0q)));
// assert(
// m_context.m_patches_info[o0].is_owned(LocalVertexT(v0q)));
//
// assert(!m_context.m_patches_info[o1].is_deleted(
// LocalVertexT(v1q)));
// assert(
// m_context.m_patches_info[o1].is_owned(LocalVertexT(v1q)));


// since any vertex in m_s_src_mask_v has been
// added already to p, then we should find the
// copy otherwise there is something wrong
Expand Down Expand Up @@ -2501,22 +2538,20 @@ __device__ __inline__ LPPair CavityManager<blockThreads, cop>::migrate_face(
uint16_t fp = find_copy_face(fq, o);


// assert(!m_context.m_patches_info[o].is_deleted(LocalFaceT(fq)));
// assert(m_context.m_patches_info[o].is_owned(LocalFaceT(fq)));

if (fp == INVALID16) {
fp = add_element(m_s_active_mask_f,
m_s_num_faces,
m_patch_info.faces_capacity[0],
m_s_in_cavity_f,
m_s_owned_mask_f,
true);
true,
false);

if (fp == INVALID16) {
m_s_should_slice[0] = true;
return ret;
}
// assert(fp < m_patch_info.faces_capacity[0]);
assert(fp < m_patch_info.faces_capacity[0]);

uint32_t o0(q), o1(q), o2(q);

Expand Down
25 changes: 18 additions & 7 deletions include/rxmesh/context.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,10 @@ class Context
m_edge_prefix(nullptr),
m_face_prefix(nullptr),
m_capacity_factor(0.0f),
m_patches_info(nullptr)
m_patches_info(nullptr),
m_max_lp_capacity_v(0),
m_max_lp_capacity_e(0),
m_max_lp_capacity_f(0)
{
}

Expand Down Expand Up @@ -158,6 +161,9 @@ class Context
uint32_t* vertex_prefix,
uint32_t* edge_prefix,
uint32_t* face_prefix,
uint16_t max_lp_capacity_v,
uint16_t max_lp_capacity_e,
uint16_t max_lp_capacity_f,
PatchInfo* d_patches,
PatchScheduler scheduler)
{
Expand Down Expand Up @@ -203,6 +209,10 @@ class Context
m_edge_prefix = edge_prefix;
m_face_prefix = face_prefix;

m_max_lp_capacity_v = max_lp_capacity_v;
m_max_lp_capacity_e = max_lp_capacity_e;
m_max_lp_capacity_f = max_lp_capacity_f;

m_patches_info = d_patches;

m_patch_scheduler = scheduler;
Expand All @@ -214,12 +224,13 @@ class Context
}


uint32_t * m_num_edges, *m_num_faces, *m_num_vertices, *m_num_patches;
uint32_t * m_max_num_vertices, *m_max_num_edges, *m_max_num_faces;
uint32_t * m_vertex_prefix, *m_edge_prefix, *m_face_prefix;
PatchInfo* m_patches_info;
float m_capacity_factor;
uint32_t m_max_num_patches;
uint32_t * m_num_edges, *m_num_faces, *m_num_vertices, *m_num_patches;
uint32_t * m_max_num_vertices, *m_max_num_edges, *m_max_num_faces;
uint32_t * m_vertex_prefix, *m_edge_prefix, *m_face_prefix;
uint16_t m_max_lp_capacity_v, m_max_lp_capacity_e, m_max_lp_capacity_f;
PatchInfo* m_patches_info;
float m_capacity_factor;
uint32_t m_max_num_patches;
PatchScheduler m_patch_scheduler;
};
} // namespace rxmesh
1 change: 1 addition & 0 deletions include/rxmesh/patcher/patcher.cu
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ namespace patcher {

Patcher::Patcher(std::string filename)
{
RXMESH_TRACE("Patcher: Reading {}", filename);
std::ifstream is(filename, std::ios::binary);
cereal::PortableBinaryInputArchive archive(is);
archive(*this);
Expand Down
Loading

0 comments on commit 18819d6

Please sign in to comment.