Skip to content

Commit

Permalink
Merge branch 'dev' of https://github.com/owensgroup/RXMesh into dyn
Browse files Browse the repository at this point in the history
  • Loading branch information
Ahdhn committed Nov 27, 2023
2 parents b34e3b6 + 0c12677 commit b8eec65
Show file tree
Hide file tree
Showing 10 changed files with 1,555 additions and 635 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -172,4 +172,4 @@ add_subdirectory("include")

include(GoogleTest)
add_subdirectory(apps)
add_subdirectory(tests)
add_subdirectory(tests)
6 changes: 5 additions & 1 deletion apps/MCF/mcf.cu
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ struct arg

#include "mcf_openmesh.h"
#include "mcf_rxmesh.h"
#include "mcf_sparse_matrix.cuh"


TEST(App, MCF)
Expand All @@ -52,7 +53,10 @@ TEST(App, MCF)
mcf_openmesh(omp_get_max_threads(), input_mesh, ground_truth);

// RXMesh Impl
mcf_rxmesh(rxmesh, ground_truth);
mcf_rxmesh_cg(rxmesh, ground_truth);

// RXMesh cusolver Impl
mcf_rxmesh_cusolver_chol(rxmesh, ground_truth);
}

int main(int argc, char** argv)
Expand Down
6 changes: 3 additions & 3 deletions apps/MCF/mcf_rxmesh.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ void init_PR(rxmesh::RXMeshStatic& rxmesh,
}

template <typename T>
void mcf_rxmesh(rxmesh::RXMeshStatic& rxmesh,
void mcf_rxmesh_cg(rxmesh::RXMeshStatic& rxmesh,
const std::vector<std::vector<T>>& ground_truth)
{
using namespace rxmesh;
Expand Down Expand Up @@ -106,11 +106,11 @@ void mcf_rxmesh(rxmesh::RXMeshStatic& rxmesh,
rxmesh.prepare_launch_box({rxmesh::Op::VV},
launch_box_init_B,
(void*)init_B<T, blockThreads>,
true);
!Arg.use_uniform_laplace);
rxmesh.prepare_launch_box({rxmesh::Op::VV},
launch_box_matvec,
(void*)rxmesh_matvec<T, blockThreads>,
true);
!Arg.use_uniform_laplace);


// init kernel to initialize RHS (B)
Expand Down
220 changes: 200 additions & 20 deletions apps/MCF/mcf_sparse_matrix.cuh
Original file line number Diff line number Diff line change
@@ -1,47 +1,227 @@
#pragma once
#include "mcf_util.h"
#include "rxmesh/attribute.h"
#include "rxmesh/matrix/dense_matrix.cuh"
#include "rxmesh/matrix/sparse_matrix.cuh"
#include "rxmesh/rxmesh_static.h"

template <typename T, uint32_t blockThreads>
__global__ static void mcf_B_setup(const rxmesh::Context context,
const rxmesh::VertexAttribute<T> coords,
rxmesh::DenseMatrix<T> B_mat,
const bool use_uniform_laplace)
{
using namespace rxmesh;

auto init_lambda = [&](VertexHandle& p_id, const VertexIterator& iter) {
auto r_ids = p_id.unpack();
uint32_t r_patch_id = r_ids.first;
uint16_t r_local_id = r_ids.second;

uint32_t row_index = context.m_vertex_prefix[r_patch_id] + r_local_id;

if (use_uniform_laplace) {
const T valence = static_cast<T>(iter.size());
B_mat(row_index, 0) = coords(p_id, 0) * valence;
B_mat(row_index, 1) = coords(p_id, 1) * valence;
B_mat(row_index, 2) = coords(p_id, 2) * valence;
} else {
T v_weight = 0;

// this is the last vertex in the one-ring (before r_id)
VertexHandle q_id = iter.back();

for (uint32_t v = 0; v < iter.size(); ++v) {
// the current one ring vertex
VertexHandle r_id = iter[v];

T tri_area = partial_voronoi_area(p_id, q_id, r_id, coords);

v_weight += (tri_area > 0) ? tri_area : 0.0;

q_id = r_id;
}
v_weight = 0.5 / v_weight;

B_mat(row_index, 0) = coords(p_id, 0) / v_weight;
B_mat(row_index, 1) = coords(p_id, 1) / v_weight;
B_mat(row_index, 2) = coords(p_id, 2) / v_weight;
}
};

// With uniform Laplacian, we just need the valence, thus we
// call query and set oriented to false
auto block = cooperative_groups::this_thread_block();

Query<blockThreads> query(context);
ShmemAllocator shrd_alloc;
query.dispatch<Op::VV>(
block,
shrd_alloc,
init_lambda,
[](VertexHandle) { return true; },
!use_uniform_laplace);
}

template <typename T, uint32_t blockThreads>
__global__ static void mcf_A_B_setup(
const rxmesh::Context context,
rxmesh::VertexAttribute<T> coords, // for non-uniform
rxmesh::SparseMatrix<T> A_mat,
rxmesh::DenseMatrix<T> B_mat,
const bool use_uniform_laplace, // for non-uniform
const T time_step)
__global__ static void mcf_A_X_setup(
const rxmesh::Context context,
const rxmesh::VertexAttribute<T> coords,
rxmesh::SparseMatrix<T> A_mat,
rxmesh::DenseMatrix<T> X_mat,
const bool use_uniform_laplace, // for non-uniform
const T time_step)
{
using namespace rxmesh;
auto init_lambda = [&](VertexHandle& v_id, const VertexIterator& iter) {
auto init_lambda = [&](VertexHandle& p_id, const VertexIterator& iter) {
T sum_e_weight(0);
T v_weight(0);

T v_weight = iter.size();
VertexHandle q_id = iter.back();

// reference value calculation
auto r_ids = v_id.unpack();
auto r_ids = p_id.unpack();
uint32_t r_patch_id = r_ids.first;
uint16_t r_local_id = r_ids.second;

uint32_t row_index = A_mat.m_d_patch_ptr_v[r_patch_id] + r_local_id;
uint32_t row_index = context.m_vertex_prefix[r_patch_id] + r_local_id;

B_mat(row_index, 0) = coords(v_id, 0) * v_weight;
B_mat(row_index, 1) = coords(v_id, 1) * v_weight;
B_mat(row_index, 2) = coords(v_id, 2) * v_weight;
// set up initial X matrix
X_mat(row_index, 0) = coords(p_id, 0);
X_mat(row_index, 1) = coords(p_id, 1);
X_mat(row_index, 2) = coords(p_id, 2);

Vector<3, float> vi_coord(
coords(v_id, 0), coords(v_id, 1), coords(v_id, 2));
// set up matrix A
for (uint32_t v = 0; v < iter.size(); ++v) {
T e_weight = 1;
A_mat(v_id, iter[v]) = -time_step * e_weight;
VertexHandle r_id = iter[v];

T e_weight = 0;
if (use_uniform_laplace) {
e_weight = 1;
} else {
VertexHandle s_id =
(v == iter.size() - 1) ? iter[0] : iter[v + 1];

e_weight = edge_cotan_weight(p_id, r_id, q_id, s_id, coords);
e_weight = (static_cast<T>(e_weight >= 0.0)) * e_weight;
}

e_weight *= time_step;
sum_e_weight += e_weight;

A_mat(p_id, iter[v]) = -e_weight;

// compute vertex weight
if (use_uniform_laplace) {
++v_weight;
} else {
T tri_area = partial_voronoi_area(p_id, q_id, r_id, coords);
v_weight += (tri_area > 0) ? tri_area : 0;
q_id = r_id;
}
}

A_mat(v_id, v_id) = v_weight + time_step * sum_e_weight;
// Diagonal entry
if (use_uniform_laplace) {
v_weight = 1.0 / v_weight;
} else {
v_weight = 0.5 / v_weight;
}

assert(!isnan(v_weight));
assert(!isinf(v_weight));

A_mat(p_id, p_id) = (1.0 / v_weight) + sum_e_weight;
};

query_block_dispatcher<Op::VV, blockThreads>(context, init_lambda);
auto block = cooperative_groups::this_thread_block();
Query<blockThreads> query(context);
ShmemAllocator shrd_alloc;
query.dispatch<Op::VV>(
block,
shrd_alloc,
init_lambda,
[](VertexHandle) { return true; },
!use_uniform_laplace);
}

template <typename T>
void mcf_rxmesh_cusolver_chol(rxmesh::RXMeshStatic& rxmesh,
const std::vector<std::vector<T>>& ground_truth)
{
using namespace rxmesh;
constexpr uint32_t blockThreads = 256;

uint32_t num_vertices = rxmesh.get_num_vertices();
auto coords = rxmesh.get_input_vertex_coordinates();

SparseMatrix<float> A_mat(rxmesh);
DenseMatrix<float> X_mat(num_vertices, 3);
DenseMatrix<float> B_mat(num_vertices, 3);

RXMESH_INFO("use_uniform_laplace: {}, time_step: {}",
Arg.use_uniform_laplace,
Arg.time_step);

// B set up
LaunchBox<blockThreads> launch_box_B;
rxmesh.prepare_launch_box({Op::VV},
launch_box_B,
(void*)mcf_B_setup<float, blockThreads>,
!Arg.use_uniform_laplace);

mcf_B_setup<float, blockThreads><<<launch_box_B.blocks,
launch_box_B.num_threads,
launch_box_B.smem_bytes_dyn>>>(
rxmesh.get_context(), *coords, B_mat, Arg.use_uniform_laplace);

CUDA_ERROR(cudaDeviceSynchronize());

// A and X set up
LaunchBox<blockThreads> launch_box_A_X;
rxmesh.prepare_launch_box({Op::VV},
launch_box_A_X,
(void*)mcf_A_X_setup<float, blockThreads>,
!Arg.use_uniform_laplace);

mcf_A_X_setup<float, blockThreads>
<<<launch_box_A_X.blocks,
launch_box_A_X.num_threads,
launch_box_A_X.smem_bytes_dyn>>>(rxmesh.get_context(),
*coords,
A_mat,
X_mat,
Arg.use_uniform_laplace,
Arg.time_step);

// Solving the linear system using chol factorization and no reordering
A_mat.spmat_linear_solve(B_mat, X_mat, Solver::CHOL, Reorder::NONE);

X_mat.move(rxmesh::DEVICE, rxmesh::HOST);

const T tol = 0.001;
T tmp_tol = tol;
bool passed = true;
rxmesh.for_each_vertex(HOST, [&](const VertexHandle vh) {
uint32_t v_id = rxmesh.map_to_global(vh);
uint32_t v_linear_id = rxmesh.linear_id(vh);

T a = X_mat(v_linear_id, 0);

for (uint32_t i = 0; i < 3; ++i) {
tmp_tol = std::abs((X_mat(v_linear_id, i) - ground_truth[v_id][i]) /
ground_truth[v_id][i]);

if (tmp_tol > tol) {
RXMESH_WARN("val: {}, truth: {}, tol: {}\n",
X_mat(v_linear_id, i),
ground_truth[v_id][i],
tmp_tol);
passed = false;
break;
}
}
});

EXPECT_TRUE(passed);
}
5 changes: 4 additions & 1 deletion include/rxmesh/attribute.h
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,10 @@ class Attribute : public AttributeBase

Attribute(const Attribute& rhs) = default;

virtual ~Attribute() = default;
virtual ~Attribute()
{
free(m_name);
}

/**
* @brief Get the name of the attribute
Expand Down
Loading

0 comments on commit b8eec65

Please sign in to comment.