Skip to content

Commit

Permalink
Minor refactoring
Browse files Browse the repository at this point in the history
  • Loading branch information
ZamanLantra committed Nov 14, 2024
1 parent fb43fdb commit c9646fb
Show file tree
Hide file tree
Showing 5 changed files with 41 additions and 92 deletions.
3 changes: 3 additions & 0 deletions app_fempic_cg/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,9 @@ endif

CUDA_INC = -I$(CUDA_INSTALL_PATH)/include
CUDA_LIB = -L$(CUDA_INSTALL_PATH)/lib64 -lcudart -lcuda
# for demos with gcc
# CUDA_INC = -I$(CUDA_INSTALL_PATH)/include -I$(MPI_INSTALL_PATH)/include
# CUDA_LIB = -L$(CUDA_INSTALL_PATH)/lib64 -lcudart -lcuda -I$(MPI_INSTALL_PATH)/lib -lmpi

HIP_INC = -I$(ROCM_INSTALL_DIR)/include -I$(ROCM_THRUST_DIR)/include -I$(ROCM_PRIM_DIR)/include
HIP_LIB = -L$(ROCM_INSTALL_DIR)/lib -lamdhip64
Expand Down
2 changes: 0 additions & 2 deletions app_fempic_cg/field_solver.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,6 @@ class FESolver {
void duplicate_vec(Vec* vec_mimic, Vec* vec_new);
void add_ke(std::map<int, std::map<int, double>>& sparse_K, int e, double ke[4][4]);
void add_fe(Vec *Fvec, int e, double fe[4]);
double evaluate_na(int a, double xi, double eta, double zeta);
void get_nax(double nx[3], int e, int a);
void initialze_matrix(std::map<int, std::map<int, double>>& sparse_K);
void compute_nx(const opp_dat n_pos);
void sanity_check();
Expand Down
53 changes: 9 additions & 44 deletions app_fempic_cg/field_solver/field_solver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -292,14 +292,9 @@ void FESolver::pre_assembly(const opp_dat n_bnd_pot)
for (int j = 0; j < n_int; j++)
for (int i = 0; i < n_int; i++)
{
double nax[3],nbx[3];

get_nax(nax,e,a);
get_nax(nbx,e,b);

double dot = 0.0; /*dot product*/
for (int d = 0; d < DIM; d++)
dot += (nax[d] * nbx[d]);
dot += (NX[e][a][d] * NX[e][b][d]);
ke[a][b] += (dot * detJ[e] * W[i] * W[j] * W[k]);
}
}
Expand Down Expand Up @@ -350,7 +345,7 @@ void FESolver::initialze_matrix(std::map<int, std::map<int, double>>& sparse_K)
for (int i=own_start; i<own_end; i++) { // iterate own rows

std::vector<int> tempVec;
int local_diag_max_fields = 0, local_off_diag_max_fields = 0;
int diag = 0, off_diag = 0;

std::map<int, double>& K_sparse_row = sparse_K[i-own_start];

Expand All @@ -363,10 +358,8 @@ void FESolver::initialze_matrix(std::map<int, std::map<int, double>>& sparse_K)

if ((std::abs(value) > 1e-12) || (i == j)) { // significant ones and all diagonals
tempVec.push_back(j);
if (j >= own_start && j < own_end)
local_diag_max_fields++;
else
local_off_diag_max_fields++;
if (j >= own_start && j < own_end) diag++;
else off_diag++;
}
}

Expand All @@ -376,11 +369,8 @@ void FESolver::initialze_matrix(std::map<int, std::map<int, double>>& sparse_K)

std::copy(tempVec.begin(), tempVec.end(), matIndex[i-own_start].data());

diag_max_fields = (diag_max_fields > local_diag_max_fields) ?
diag_max_fields : local_diag_max_fields;

off_diag_max_fields = (off_diag_max_fields > local_off_diag_max_fields) ?
off_diag_max_fields : local_off_diag_max_fields;
diag_max_fields = (diag_max_fields > diag) ? diag_max_fields : diag;
off_diag_max_fields = (off_diag_max_fields > off_diag) ? off_diag_max_fields : off_diag;
}

#ifndef USE_MPI
Expand Down Expand Up @@ -446,7 +436,7 @@ void FESolver::enrich_cell_shape_deriv(opp_dat cell_shape_deriv)
}

/*adds contributions from element stiffness matrix*/
void FESolver::add_ke(std::map<int, std::map<int, double>>& sparse_K, int e, double ke[4][4]) // BUG : K is not created correctly
void FESolver::add_ke(std::map<int, std::map<int, double>>& sparse_K, int e, double ke[4][4])
{
if (neq <= 0) return;

Expand All @@ -465,7 +455,7 @@ void FESolver::add_ke(std::map<int, std::map<int, double>>& sparse_K, int e, dou
if (Q < 0)
{
if (Q == INT_MIN) continue; // skip g nodes
else Q = (-1 * Q - 1); // on a different MPI rank, get its correct column
else Q = (-1 * Q - 1); // on a different MPI rank, get its correct column
}
else
Q += own_start; // on the same MPI rank, get its correct column
Expand Down Expand Up @@ -497,30 +487,7 @@ void FESolver::add_fe(Vec *Fvec, int e, double fe[4])
}
}

/*evaluates shape function a at position (xi,eta,zeta)*/
double FESolver::evaluate_na(int a, double xi, double eta, double zeta)
{
switch(a)
{
case 0: return xi;
case 1: return eta;
case 2: return zeta;
case 3: return 1-xi-eta-zeta;
}

return 0; /*shouldn't happen*/
}

/*returns derivative of N[a] at some logical point since we are using linear elements,
these are constant in each element*/
void FESolver::get_nax(double nx[3], int e, int a)
{
for (int d = 0; d < DIM; d++)
nx[d] = NX[e][a][d];
}

/*computes derivatives of the shape functions for all elements constants since
using linear elements*/
/*computes derivatives of the shape functions for all elements constants since using linear elements*/
void FESolver::compute_nx(const opp_dat n_pos)
{
/*derivatives of the shape functions vs. xi*/
Expand Down Expand Up @@ -562,8 +529,6 @@ void FESolver::compute_nx(const opp_dat n_pos)
}
}

//*************************************************************************************************
// Sanity check after assembling matrices
void FESolver::sanity_check()
{
if (OPP_DBG)
Expand Down
68 changes: 24 additions & 44 deletions app_fempic_cg/kernels.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#include "opp_lib.h"

//****************************************
#define KERNEL_ONE_OVER_SIX (1.0 / 6.0)
#define KERNEL_N_PER_C 4
#define KERNEL_DET_FIELDS 4
Expand All @@ -48,23 +47,17 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
inline void init_boundary_pot_kernel(
const int *node_type,
double *n_bnd_pot
)
{
switch (*node_type)
{
) {
switch (*node_type) {
case 2: // INLET:
*n_bnd_pot = 0;
break;
*n_bnd_pot = 0; break;
case 3: // FIXED:
*n_bnd_pot = -1 * CONST_wall_potential[0];
break;
*n_bnd_pot = -1 * CONST_wall_potential[0]; break;
default: // NORMAL or OPEN
*n_bnd_pot = 0; /*default*/
}
}

int part_counter = 0;

//*************************************************************************************************
inline void inject_ions_kernel(
double *part_pos,
Expand All @@ -77,18 +70,15 @@ inline void inject_ions_kernel(
const double *iface_normal,
const double *node_pos,
const double* dummy_part_random
)
{
) {
double a = dummy_part_random[0];
double b = dummy_part_random[1];
if ((a + b) > 1) // TODO : Change the random dat to avoid this
{
if ((a + b) > 1) { // TODO : Change the random dat to avoid this
a = (1 - a);
b = (1 - b);
}

for (int i = 0; i < KERNEL_DIM; i++)
{
for (int i = 0; i < KERNEL_DIM; i++) {
part_pos[i] = a * iface_u[i] + b * iface_v[i] + node_pos[i];

part_vel[i] = (iface_normal[i] * CONST_ion_velocity[0]);
Expand All @@ -102,8 +92,8 @@ inline void inject_ions_kernel(
inline void calculate_new_pos_vel_kernel(
const double *cell_ef,
double *part_pos,
double *part_vel ) {

double *part_vel
) {
const double coefficient1 = CONST_charge[0] / CONST_mass[0] * (CONST_dt[0]);
for (int i = 0; i < KERNEL_DIM; i++) {
part_vel[i] += (coefficient1 * cell_ef[i]);
Expand All @@ -116,7 +106,6 @@ inline void move_kernel(
const double *point_pos, double* point_lc,
const double *cell_volume, const double *cell_det
) {

const double coefficient2 = KERNEL_ONE_OVER_SIX / (*cell_volume);

for (int i=0; i<KERNEL_N_PER_C; i++) { /*loop over vertices*/
Expand All @@ -128,14 +117,10 @@ inline void move_kernel(
cell_det[i * KERNEL_DET_FIELDS + 3] * point_pos[2]);
}

if (!(point_lc[0] < 0.0 ||
point_lc[0] > 1.0 ||
point_lc[1] < 0.0 ||
point_lc[1] > 1.0 ||
point_lc[2] < 0.0 ||
point_lc[2] > 1.0 ||
point_lc[3] < 0.0 ||
point_lc[3] > 1.0)) {
if (!(point_lc[0] < 0.0 || point_lc[0] > 1.0 ||
point_lc[1] < 0.0 || point_lc[1] > 1.0 ||
point_lc[2] < 0.0 || point_lc[2] > 1.0 ||
point_lc[3] < 0.0 || point_lc[3] > 1.0)) {

OPP_PARTICLE_MOVE_DONE;
return;
Expand Down Expand Up @@ -169,8 +154,8 @@ inline void deposit_charge_on_nodes_kernel(
double *node_charge_den0,
double *node_charge_den1,
double *node_charge_den2,
double *node_charge_den3) {

double *node_charge_den3
) {
node_charge_den0[0] += part_lc[0];
node_charge_den1[0] += part_lc[1];
node_charge_den2[0] += part_lc[2];
Expand All @@ -181,8 +166,7 @@ inline void deposit_charge_on_nodes_kernel(
inline void compute_node_charge_density_kernel(
double *node_charge_den,
const double *node_volume
)
{
) {
(*node_charge_den) *= (CONST_spwt[0] / node_volume[0]);
}

Expand All @@ -194,10 +178,8 @@ inline void compute_electric_field_kernel(
const double *node_potential1,
const double *node_potential2,
const double *node_potential3
)
{
for (int dim = 0; dim < KERNEL_DIM; dim++)
{
) {
for (int dim = 0; dim < KERNEL_DIM; dim++) {
const double c1 = (cell_shape_deriv[0 * KERNEL_DIM + dim] * (*node_potential0));
const double c2 = (cell_shape_deriv[1 * KERNEL_DIM + dim] * (*node_potential1));
const double c3 = (cell_shape_deriv[2 * KERNEL_DIM + dim] * (*node_potential2));
Expand All @@ -212,20 +194,18 @@ inline void get_final_max_values_kernel(
const OPP_REAL* n_charge_den,
OPP_REAL* max_n_charge_den,
const OPP_REAL* n_pot,
OPP_REAL* max_n_pot)
{
OPP_REAL* max_n_pot
) {
*max_n_charge_den = MAX(abs(*n_charge_den), *max_n_charge_den);

*max_n_pot = MAX(*n_pot, *max_n_pot);
}

//*******************************************************************************
//*************************************************************************************************
inline void get_max_cef_kernel(
const OPP_REAL* val,
OPP_REAL* max_val)
{
for (int dim = 0; dim < KERNEL_DIM; ++dim)
{
OPP_REAL* max_val
) {
for (int dim = 0; dim < KERNEL_DIM; ++dim) {
*max_val = MAX(val[dim], *max_val);
}
}
7 changes: 5 additions & 2 deletions opp_lib/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,9 @@ ifeq ($(PARMETIS), 1)
endif

CUDA_INC += -I$(CUDA_INSTALL_PATH)/include
# for demos with gcc
# CUDA_INC = -I$(CUDA_INSTALL_PATH)/include -I$(MPI_INSTALL_PATH)/include

HIP_INC += -I$(ROCM_INSTALL_DIR)/include -I$(ROCM_THRUST_DIR)/include -I$(ROCM_PRIM_DIR)/include

.PHONY: clean mklib
Expand Down Expand Up @@ -249,9 +252,9 @@ OBJ_CUDA_MPI_CPP = $(patsubst $(SRC)/core/%.cpp,$(OBJ)/%+cuda_mpi.o,$(SRC_CUDA_
$(patsubst $(SRC)/mpi/%.cpp,$(OBJ)/%+cuda_mpi.o,$(SRC_CUDA_MPI_CPP))
OBJ_CUDA_MPI_CU = $(patsubst $(SRC)/cuda/%.cu,$(OBJ)/%+cuda_mpi.o,$(SRC_CUDA_MPI_CU))
$(OBJ)/%+cuda_mpi.o: $(SRC)/core/%.cpp
$(CPP) $(CPPFLAGS) -DUSE_MPI -DUSE_THRUST -c $< -o $@ $(OPP_INC) $(CUDA_INC)
$(MPICPP) $(CPPFLAGS) -DUSE_MPI -DUSE_THRUST -c $< -o $@ $(OPP_INC) $(CUDA_INC)
$(OBJ)/%+cuda_mpi.o: $(SRC)/mpi/%.cpp
$(CPP) $(CPPFLAGS) -DUSE_MPI -DUSE_THRUST -c $< -o $@ $(OPP_INC) $(CUDA_INC)
$(MPICPP) $(CPPFLAGS) -DUSE_MPI -DUSE_THRUST -c $< -o $@ $(OPP_INC) $(CUDA_INC)
$(OBJ)/%+cuda_mpi.o: $(SRC)/cuda/%.cu
$(NVCC) $(NVCCFLAGS) -DUSE_MPI -DUSE_THRUST -c $< -o $@ $(OPP_INC) $(CUDA_INC)

Expand Down

0 comments on commit c9646fb

Please sign in to comment.