Merge branch 'dev' of https://github.com/owensgroup/RXMesh into dyn

owensgroup · Nov 27, 2023 · b8eec65 · b8eec65
2 parents b34e3b6 + 0c12677
commit b8eec65
Show file tree

Hide file tree

Showing 10 changed files with 1,555 additions and 635 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -172,4 +172,4 @@ add_subdirectory("include")
 
 include(GoogleTest)
 add_subdirectory(apps)
-add_subdirectory(tests)
+add_subdirectory(tests)
diff --git a/apps/MCF/mcf.cu b/apps/MCF/mcf.cu
@@ -28,6 +28,7 @@ struct arg
 
 #include "mcf_openmesh.h"
 #include "mcf_rxmesh.h"
+#include "mcf_sparse_matrix.cuh"
 
 
 TEST(App, MCF)
@@ -52,7 +53,10 @@ TEST(App, MCF)
  mcf_openmesh(omp_get_max_threads(), input_mesh, ground_truth);
 
  // RXMesh Impl
- mcf_rxmesh(rxmesh, ground_truth);
+ mcf_rxmesh_cg(rxmesh, ground_truth); 
+
+ // RXMesh cusolver Impl
+ mcf_rxmesh_cusolver_chol(rxmesh, ground_truth); 
 }
 
 int main(int argc, char** argv)

diff --git a/apps/MCF/mcf_rxmesh.h b/apps/MCF/mcf_rxmesh.h
@@ -49,7 +49,7 @@ void init_PR(rxmesh::RXMeshStatic& rxmesh,
 }
 
 template <typename T>
-void mcf_rxmesh(rxmesh::RXMeshStatic& rxmesh,
+void mcf_rxmesh_cg(rxmesh::RXMeshStatic& rxmesh,
  const std::vector<std::vector<T>>& ground_truth)
 {
  using namespace rxmesh;
@@ -106,11 +106,11 @@ void mcf_rxmesh(rxmesh::RXMeshStatic& rxmesh,
  rxmesh.prepare_launch_box({rxmesh::Op::VV},
  launch_box_init_B,
  (void*)init_B<T, blockThreads>,
- true);
+ !Arg.use_uniform_laplace);
  rxmesh.prepare_launch_box({rxmesh::Op::VV},
  launch_box_matvec,
  (void*)rxmesh_matvec<T, blockThreads>,
- true);
+ !Arg.use_uniform_laplace);
 
 
  // init kernel to initialize RHS (B)

diff --git a/apps/MCF/mcf_sparse_matrix.cuh b/apps/MCF/mcf_sparse_matrix.cuh
@@ -1,47 +1,227 @@
 #pragma once
+#include "mcf_util.h"
 #include "rxmesh/attribute.h"
 #include "rxmesh/matrix/dense_matrix.cuh"
 #include "rxmesh/matrix/sparse_matrix.cuh"
 #include "rxmesh/rxmesh_static.h"
 
+template <typename T, uint32_t blockThreads>
+__global__ static void mcf_B_setup(const rxmesh::Context context,
+ const rxmesh::VertexAttribute<T> coords,
+ rxmesh::DenseMatrix<T> B_mat,
+ const bool use_uniform_laplace)
+{
+ using namespace rxmesh;
+
+ auto init_lambda = [&](VertexHandle& p_id, const VertexIterator& iter) {
+ auto r_ids = p_id.unpack();
+ uint32_t r_patch_id = r_ids.first;
+ uint16_t r_local_id = r_ids.second;
+
+ uint32_t row_index = context.m_vertex_prefix[r_patch_id] + r_local_id;
+
+ if (use_uniform_laplace) {
+ const T valence = static_cast<T>(iter.size());
+ B_mat(row_index, 0) = coords(p_id, 0) * valence;
+ B_mat(row_index, 1) = coords(p_id, 1) * valence;
+ B_mat(row_index, 2) = coords(p_id, 2) * valence;
+ } else {
+ T v_weight = 0;
+
+ // this is the last vertex in the one-ring (before r_id)
+ VertexHandle q_id = iter.back();
+
+ for (uint32_t v = 0; v < iter.size(); ++v) {
+ // the current one ring vertex
+ VertexHandle r_id = iter[v];
+
+ T tri_area = partial_voronoi_area(p_id, q_id, r_id, coords);
+
+ v_weight += (tri_area > 0) ? tri_area : 0.0;
+
+ q_id = r_id;
+ }
+ v_weight = 0.5 / v_weight;
+
+ B_mat(row_index, 0) = coords(p_id, 0) / v_weight;
+ B_mat(row_index, 1) = coords(p_id, 1) / v_weight;
+ B_mat(row_index, 2) = coords(p_id, 2) / v_weight;
+ }
+ };
+
+ // With uniform Laplacian, we just need the valence, thus we
+ // call query and set oriented to false
+ auto block = cooperative_groups::this_thread_block();
+
+ Query<blockThreads> query(context);
+ ShmemAllocator shrd_alloc;
+ query.dispatch<Op::VV>(
+ block,
+ shrd_alloc,
+ init_lambda,
+ [](VertexHandle) { return true; },
+ !use_uniform_laplace);
+}
 
 template <typename T, uint32_t blockThreads>
-__global__ static void mcf_A_B_setup(
- const rxmesh::Context context,
- rxmesh::VertexAttribute<T> coords, // for non-uniform
- rxmesh::SparseMatrix<T> A_mat,
- rxmesh::DenseMatrix<T> B_mat,
- const bool use_uniform_laplace, // for non-uniform
- const T time_step)
+__global__ static void mcf_A_X_setup(
+ const rxmesh::Context  context,
+ const rxmesh::VertexAttribute<T> coords,
+ rxmesh::SparseMatrix<T>  A_mat,
+ rxmesh::DenseMatrix<T>  X_mat,
+ const bool  use_uniform_laplace, // for non-uniform
+ const T  time_step)
 {
  using namespace rxmesh;
- auto init_lambda = [&](VertexHandle& v_id, const VertexIterator& iter) {
+ auto init_lambda = [&](VertexHandle& p_id, const VertexIterator& iter) {
  T sum_e_weight(0);
+ T v_weight(0);
 
- T v_weight = iter.size();
+ VertexHandle q_id = iter.back();
 
  // reference value calculation
- auto r_ids = v_id.unpack();
+ auto r_ids = p_id.unpack();
  uint32_t r_patch_id = r_ids.first;
  uint16_t r_local_id = r_ids.second;
 
- uint32_t row_index = A_mat.m_d_patch_ptr_v[r_patch_id] + r_local_id;
+ uint32_t row_index = context.m_vertex_prefix[r_patch_id] + r_local_id;
 
- B_mat(row_index, 0) = coords(v_id, 0) * v_weight;
- B_mat(row_index, 1) = coords(v_id, 1) * v_weight;
- B_mat(row_index, 2) = coords(v_id, 2) * v_weight;
+ // set up initial X matrix
+ X_mat(row_index, 0) = coords(p_id, 0);
+ X_mat(row_index, 1) = coords(p_id, 1);
+ X_mat(row_index, 2) = coords(p_id, 2);
 
- Vector<3, float> vi_coord(
- coords(v_id, 0), coords(v_id, 1), coords(v_id, 2));
+ // set up matrix A
  for (uint32_t v = 0; v < iter.size(); ++v) {
- T e_weight = 1;
- A_mat(v_id, iter[v]) = -time_step * e_weight;
+ VertexHandle r_id = iter[v];
+
+ T e_weight = 0;
+ if (use_uniform_laplace) {
+ e_weight = 1;
+ } else {
+ VertexHandle s_id =
+ (v == iter.size() - 1) ? iter[0] : iter[v + 1];
+
+ e_weight = edge_cotan_weight(p_id, r_id, q_id, s_id, coords);
+ e_weight = (static_cast<T>(e_weight >= 0.0)) * e_weight;
+ }
 
+ e_weight *= time_step;
  sum_e_weight += e_weight;
+
+ A_mat(p_id, iter[v]) = -e_weight;
+
+ // compute vertex weight
+ if (use_uniform_laplace) {
+ ++v_weight;
+ } else {
+ T tri_area = partial_voronoi_area(p_id, q_id, r_id, coords);
+ v_weight += (tri_area > 0) ? tri_area : 0;
+ q_id = r_id;
+ }
  }
 
- A_mat(v_id, v_id) = v_weight + time_step * sum_e_weight;
+ // Diagonal entry
+ if (use_uniform_laplace) {
+ v_weight = 1.0 / v_weight;
+ } else {
+ v_weight = 0.5 / v_weight;
+ }
+
+ assert(!isnan(v_weight));
+ assert(!isinf(v_weight));
+
+ A_mat(p_id, p_id) = (1.0 / v_weight) + sum_e_weight;
  };
 
- query_block_dispatcher<Op::VV, blockThreads>(context, init_lambda);
+ auto block = cooperative_groups::this_thread_block();
+ Query<blockThreads> query(context);
+ ShmemAllocator shrd_alloc;
+ query.dispatch<Op::VV>(
+ block,
+ shrd_alloc,
+ init_lambda,
+ [](VertexHandle) { return true; },
+ !use_uniform_laplace);
 }
+
+template <typename T>
+void mcf_rxmesh_cusolver_chol(rxmesh::RXMeshStatic& rxmesh,
+ const std::vector<std::vector<T>>& ground_truth)
+{
+ using namespace rxmesh;
+ constexpr uint32_t blockThreads = 256;
+
+ uint32_t num_vertices = rxmesh.get_num_vertices();
+ auto coords = rxmesh.get_input_vertex_coordinates();
+
+ SparseMatrix<float> A_mat(rxmesh);
+ DenseMatrix<float> X_mat(num_vertices, 3);
+ DenseMatrix<float> B_mat(num_vertices, 3);
+
+ RXMESH_INFO("use_uniform_laplace: {}, time_step: {}",
+ Arg.use_uniform_laplace,
+ Arg.time_step);
+
+ // B set up
+ LaunchBox<blockThreads> launch_box_B;
+ rxmesh.prepare_launch_box({Op::VV},
+ launch_box_B,
+ (void*)mcf_B_setup<float, blockThreads>,
+ !Arg.use_uniform_laplace);
+
+ mcf_B_setup<float, blockThreads><<<launch_box_B.blocks,
+ launch_box_B.num_threads,
+ launch_box_B.smem_bytes_dyn>>>(
+ rxmesh.get_context(), *coords, B_mat, Arg.use_uniform_laplace);
+
+ CUDA_ERROR(cudaDeviceSynchronize());
+
+ // A and X set up
+ LaunchBox<blockThreads> launch_box_A_X;
+ rxmesh.prepare_launch_box({Op::VV},
+ launch_box_A_X,
+ (void*)mcf_A_X_setup<float, blockThreads>,
+ !Arg.use_uniform_laplace);
+
+ mcf_A_X_setup<float, blockThreads>
+ <<<launch_box_A_X.blocks,
+ launch_box_A_X.num_threads,
+ launch_box_A_X.smem_bytes_dyn>>>(rxmesh.get_context(),
+ *coords,
+ A_mat,
+ X_mat,
+ Arg.use_uniform_laplace,
+ Arg.time_step);
+
+ // Solving the linear system using chol factorization and no reordering
+ A_mat.spmat_linear_solve(B_mat, X_mat, Solver::CHOL, Reorder::NONE);
+
+ X_mat.move(rxmesh::DEVICE, rxmesh::HOST);
+
+ const T tol = 0.001;
+ T tmp_tol = tol;
+ bool passed = true;
+ rxmesh.for_each_vertex(HOST, [&](const VertexHandle vh) {
+ uint32_t v_id = rxmesh.map_to_global(vh);
+ uint32_t v_linear_id = rxmesh.linear_id(vh);
+
+ T a = X_mat(v_linear_id, 0);
+
+ for (uint32_t i = 0; i < 3; ++i) {
+ tmp_tol = std::abs((X_mat(v_linear_id, i) - ground_truth[v_id][i]) /
+ ground_truth[v_id][i]);
+
+ if (tmp_tol > tol) {
+ RXMESH_WARN("val: {}, truth: {}, tol: {}\n",
+ X_mat(v_linear_id, i),
+ ground_truth[v_id][i],
+ tmp_tol);
+ passed = false;
+ break;
+ }
+ }
+ });
+
+ EXPECT_TRUE(passed);
+}
diff --git a/include/rxmesh/attribute.h b/include/rxmesh/attribute.h
@@ -227,7 +227,10 @@ class Attribute : public AttributeBase
 
  Attribute(const Attribute& rhs) = default;
 
- virtual ~Attribute() = default;
+ virtual ~Attribute()
+ {
+ free(m_name);
+ }
 
  /**
  * @brief Get the name of the attribute