Minor refactoring

OP-DSL · Nov 14, 2024 · c9646fb · c9646fb
1 parent fb43fdb
commit c9646fb
Show file tree

Hide file tree

Showing 5 changed files with 41 additions and 92 deletions.
diff --git a/app_fempic_cg/Makefile b/app_fempic_cg/Makefile
@@ -61,6 +61,9 @@ endif
 
 CUDA_INC   = -I$(CUDA_INSTALL_PATH)/include
 CUDA_LIB   = -L$(CUDA_INSTALL_PATH)/lib64 -lcudart -lcuda
+# for demos with gcc
+# CUDA_INC   = -I$(CUDA_INSTALL_PATH)/include -I$(MPI_INSTALL_PATH)/include
+# CUDA_LIB   = -L$(CUDA_INSTALL_PATH)/lib64 -lcudart -lcuda -I$(MPI_INSTALL_PATH)/lib -lmpi
 
 HIP_INC   = -I$(ROCM_INSTALL_DIR)/include -I$(ROCM_THRUST_DIR)/include -I$(ROCM_PRIM_DIR)/include
 HIP_LIB   = -L$(ROCM_INSTALL_DIR)/lib -lamdhip64

diff --git a/app_fempic_cg/field_solver.h b/app_fempic_cg/field_solver.h
@@ -59,8 +59,6 @@ class FESolver {
     void duplicate_vec(Vec* vec_mimic, Vec* vec_new);
     void add_ke(std::map<int, std::map<int, double>>& sparse_K, int e, double ke[4][4]);
     void add_fe(Vec *Fvec, int e, double fe[4]);
-    double evaluate_na(int a, double xi, double eta, double zeta);
-    void get_nax(double nx[3], int e, int a);
     void initialze_matrix(std::map<int, std::map<int, double>>& sparse_K);
     void compute_nx(const opp_dat n_pos);
     void sanity_check();

diff --git a/app_fempic_cg/field_solver/field_solver.cpp b/app_fempic_cg/field_solver/field_solver.cpp
@@ -292,14 +292,9 @@ void FESolver::pre_assembly(const opp_dat n_bnd_pot)
                     for (int j = 0; j < n_int; j++)
                         for (int i = 0; i < n_int; i++) 
                         {
-                            double nax[3],nbx[3];
-
-                            get_nax(nax,e,a);
-                            get_nax(nbx,e,b);
-
                             double dot = 0.0; /*dot product*/
                             for (int d = 0; d < DIM; d++) 
-                                dot += (nax[d] * nbx[d]);
+                                dot += (NX[e][a][d] * NX[e][b][d]);
                             ke[a][b] += (dot * detJ[e] * W[i] * W[j] * W[k]);
                         }
             }
@@ -350,7 +345,7 @@ void FESolver::initialze_matrix(std::map<int, std::map<int, double>>& sparse_K)
     for (int i=own_start; i<own_end; i++) { // iterate own rows
 
         std::vector<int> tempVec;
-        int local_diag_max_fields = 0, local_off_diag_max_fields = 0;
+        int diag = 0, off_diag = 0;
 
         std::map<int, double>& K_sparse_row = sparse_K[i-own_start];
 
@@ -363,10 +358,8 @@ void FESolver::initialze_matrix(std::map<int, std::map<int, double>>& sparse_K)
 
             if ((std::abs(value) > 1e-12) || (i == j)) { // significant ones and all diagonals  
                 tempVec.push_back(j); 
-                if (j >= own_start && j < own_end)
-                    local_diag_max_fields++;
-                else
-                    local_off_diag_max_fields++;
+                if (j >= own_start && j < own_end) diag++;
+                else off_diag++;
             }         
         }
 
@@ -376,11 +369,8 @@ void FESolver::initialze_matrix(std::map<int, std::map<int, double>>& sparse_K)
 
         std::copy(tempVec.begin(), tempVec.end(), matIndex[i-own_start].data());
 
-        diag_max_fields = (diag_max_fields > local_diag_max_fields) ? 
-                                diag_max_fields : local_diag_max_fields; 
-
-        off_diag_max_fields = (off_diag_max_fields > local_off_diag_max_fields) ? 
-                                off_diag_max_fields : local_off_diag_max_fields;  
+        diag_max_fields = (diag_max_fields > diag) ? diag_max_fields : diag; 
+        off_diag_max_fields = (off_diag_max_fields > off_diag) ? off_diag_max_fields : off_diag;  
     }
 
 #ifndef USE_MPI
@@ -446,7 +436,7 @@ void FESolver::enrich_cell_shape_deriv(opp_dat cell_shape_deriv)
 }
 
 /*adds contributions from element stiffness matrix*/
-void FESolver::add_ke(std::map<int, std::map<int, double>>& sparse_K, int e, double ke[4][4]) // BUG : K is not created correctly
+void FESolver::add_ke(std::map<int, std::map<int, double>>& sparse_K, int e, double ke[4][4])
 { 
     if (neq <= 0) return;
 
@@ -465,7 +455,7 @@ void FESolver::add_ke(std::map<int, std::map<int, double>>& sparse_K, int e, dou
             if (Q < 0)
             {
                 if (Q == INT_MIN) continue; // skip g nodes
-                else Q = (-1 * Q - 1);            // on a different MPI rank, get its correct column
+                else Q = (-1 * Q - 1);      // on a different MPI rank, get its correct column
             }
             else 
                 Q += own_start;             // on the same MPI rank, get its correct column
@@ -497,30 +487,7 @@ void FESolver::add_fe(Vec *Fvec, int e, double fe[4])
     }
 }
 
-/*evaluates shape function a at position (xi,eta,zeta)*/
-double FESolver::evaluate_na(int a, double xi, double eta, double zeta) 
-{
-    switch(a) 
-    {
-        case 0: return xi;
-        case 1: return eta;
-        case 2: return zeta;
-        case 3: return 1-xi-eta-zeta;       
-    }
-
-    return 0;    /*shouldn't happen*/
-}
-
-/*returns derivative of N[a] at some logical point since we are using linear elements, 
-these are constant in each element*/
-void FESolver::get_nax(double nx[3], int e, int a) 
-{
-    for (int d = 0; d < DIM; d++)
-        nx[d] = NX[e][a][d];
-}
-
-/*computes derivatives of the shape functions for all elements constants since 
-using linear elements*/
+/*computes derivatives of the shape functions for all elements constants since using linear elements*/
 void FESolver::compute_nx(const opp_dat n_pos) 
 { 
     /*derivatives of the shape functions vs. xi*/
@@ -562,8 +529,6 @@ void FESolver::compute_nx(const opp_dat n_pos)
     }
 }
 
-//*************************************************************************************************
-// Sanity check after assembling matrices
 void FESolver::sanity_check()
 {
     if (OPP_DBG)

diff --git a/app_fempic_cg/kernels.h b/app_fempic_cg/kernels.h
@@ -37,7 +37,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include "opp_lib.h"
 
-//****************************************
 #define KERNEL_ONE_OVER_SIX     (1.0 / 6.0)
 #define KERNEL_N_PER_C          4
 #define KERNEL_DET_FIELDS       4
@@ -48,23 +47,17 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 inline void init_boundary_pot_kernel(
     const int *node_type, 
     double *n_bnd_pot
-)
-{
-    switch (*node_type)
-    {
+) {
+    switch (*node_type) {
         case 2: // INLET: 
-            *n_bnd_pot = 0; 
-            break;     
+            *n_bnd_pot = 0; break;     
         case 3: // FIXED: 
-            *n_bnd_pot = -1 * CONST_wall_potential[0]; 
-            break;
+            *n_bnd_pot = -1 * CONST_wall_potential[0]; break;
         default: // NORMAL or OPEN
             *n_bnd_pot = 0; /*default*/
     }
 }
 
-int part_counter = 0;
-
 //*************************************************************************************************
 inline void inject_ions_kernel(
     double *part_pos,
@@ -77,18 +70,15 @@ inline void inject_ions_kernel(
     const double *iface_normal,
     const double *node_pos,
     const double* dummy_part_random
-)
-{
+) {
     double a = dummy_part_random[0];
     double b = dummy_part_random[1];
-    if ((a + b) > 1)  // TODO : Change the random dat to avoid this
-    {
+    if ((a + b) > 1) {  // TODO : Change the random dat to avoid this
         a = (1 - a);
         b = (1 - b);
     }
 
-    for (int i = 0; i < KERNEL_DIM; i++) 
-    {
+    for (int i = 0; i < KERNEL_DIM; i++) {
         part_pos[i] = a * iface_u[i] + b * iface_v[i] + node_pos[i];
 
         part_vel[i] = (iface_normal[i] * CONST_ion_velocity[0]);
@@ -102,8 +92,8 @@ inline void inject_ions_kernel(
 inline void calculate_new_pos_vel_kernel(
     const double *cell_ef,
     double *part_pos,
-    double *part_vel ) {
-
+    double *part_vel 
+) {
     const double coefficient1 = CONST_charge[0] / CONST_mass[0] * (CONST_dt[0]);
     for (int i = 0; i < KERNEL_DIM; i++) {
         part_vel[i] += (coefficient1 * cell_ef[i]);   
@@ -116,7 +106,6 @@ inline void move_kernel(
     const double *point_pos, double* point_lc, 
     const double *cell_volume, const double *cell_det
 ) {
-
     const double coefficient2 = KERNEL_ONE_OVER_SIX / (*cell_volume);
 
     for (int i=0; i<KERNEL_N_PER_C; i++) { /*loop over vertices*/
@@ -128,14 +117,10 @@ inline void move_kernel(
             cell_det[i * KERNEL_DET_FIELDS + 3] * point_pos[2]);
     }  
 
-    if (!(point_lc[0] < 0.0 || 
-        point_lc[0] > 1.0 ||
-        point_lc[1] < 0.0 || 
-        point_lc[1] > 1.0 ||
-        point_lc[2] < 0.0 || 
-        point_lc[2] > 1.0 ||
-        point_lc[3] < 0.0 || 
-        point_lc[3] > 1.0)) { 
+    if (!(point_lc[0] < 0.0 || point_lc[0] > 1.0 ||
+          point_lc[1] < 0.0 || point_lc[1] > 1.0 ||
+          point_lc[2] < 0.0 || point_lc[2] > 1.0 ||
+          point_lc[3] < 0.0 || point_lc[3] > 1.0)) { 
 
         OPP_PARTICLE_MOVE_DONE;
         return;
@@ -169,8 +154,8 @@ inline void deposit_charge_on_nodes_kernel(
     double *node_charge_den0,
     double *node_charge_den1,
     double *node_charge_den2,
-    double *node_charge_den3) {
-
+    double *node_charge_den3
+) {
     node_charge_den0[0] += part_lc[0];
     node_charge_den1[0] += part_lc[1];
     node_charge_den2[0] += part_lc[2];
@@ -181,8 +166,7 @@ inline void deposit_charge_on_nodes_kernel(
 inline void compute_node_charge_density_kernel(
     double *node_charge_den,
     const double *node_volume
-)
-{
+) {
     (*node_charge_den) *= (CONST_spwt[0] / node_volume[0]);
 }
 
@@ -194,10 +178,8 @@ inline void compute_electric_field_kernel(
     const double *node_potential1,
     const double *node_potential2,
     const double *node_potential3
-)
-{
-    for (int dim = 0; dim < KERNEL_DIM; dim++)
-    { 
+) {
+    for (int dim = 0; dim < KERNEL_DIM; dim++) { 
         const double c1 = (cell_shape_deriv[0 * KERNEL_DIM + dim] * (*node_potential0));
         const double c2 = (cell_shape_deriv[1 * KERNEL_DIM + dim] * (*node_potential1));
         const double c3 = (cell_shape_deriv[2 * KERNEL_DIM + dim] * (*node_potential2));
@@ -212,20 +194,18 @@ inline void get_final_max_values_kernel(
     const OPP_REAL* n_charge_den,
     OPP_REAL* max_n_charge_den,
     const OPP_REAL* n_pot,
-    OPP_REAL* max_n_pot)
-{
+    OPP_REAL* max_n_pot
+) {
     *max_n_charge_den = MAX(abs(*n_charge_den), *max_n_charge_den);
-
     *max_n_pot = MAX(*n_pot, *max_n_pot);
 }
 
-//*******************************************************************************
+//*************************************************************************************************
 inline void get_max_cef_kernel(
     const OPP_REAL* val,
-    OPP_REAL* max_val)
-{
-    for (int dim = 0; dim < KERNEL_DIM; ++dim) 
-    {
+    OPP_REAL* max_val
+) {
+    for (int dim = 0; dim < KERNEL_DIM; ++dim) {
         *max_val = MAX(val[dim], *max_val);
     }
 }
diff --git a/opp_lib/Makefile b/opp_lib/Makefile
@@ -82,6 +82,9 @@ ifeq ($(PARMETIS), 1)
 endif
 
 CUDA_INC   += -I$(CUDA_INSTALL_PATH)/include
+# for demos with gcc
+# CUDA_INC   = -I$(CUDA_INSTALL_PATH)/include -I$(MPI_INSTALL_PATH)/include
+
 HIP_INC   += -I$(ROCM_INSTALL_DIR)/include -I$(ROCM_THRUST_DIR)/include -I$(ROCM_PRIM_DIR)/include
 
 .PHONY: clean mklib
@@ -249,9 +252,9 @@ OBJ_CUDA_MPI_CPP = 	$(patsubst $(SRC)/core/%.cpp,$(OBJ)/%+cuda_mpi.o,$(SRC_CUDA_
 					$(patsubst $(SRC)/mpi/%.cpp,$(OBJ)/%+cuda_mpi.o,$(SRC_CUDA_MPI_CPP))
 OBJ_CUDA_MPI_CU = 	$(patsubst $(SRC)/cuda/%.cu,$(OBJ)/%+cuda_mpi.o,$(SRC_CUDA_MPI_CU))
 $(OBJ)/%+cuda_mpi.o: $(SRC)/core/%.cpp
-	$(CPP) $(CPPFLAGS) -DUSE_MPI -DUSE_THRUST -c $< -o $@ $(OPP_INC) $(CUDA_INC)
+	$(MPICPP) $(CPPFLAGS) -DUSE_MPI -DUSE_THRUST -c $< -o $@ $(OPP_INC) $(CUDA_INC)
 $(OBJ)/%+cuda_mpi.o: $(SRC)/mpi/%.cpp
-	$(CPP) $(CPPFLAGS) -DUSE_MPI -DUSE_THRUST -c $< -o $@ $(OPP_INC) $(CUDA_INC)
+	$(MPICPP) $(CPPFLAGS) -DUSE_MPI -DUSE_THRUST -c $< -o $@ $(OPP_INC) $(CUDA_INC)
 $(OBJ)/%+cuda_mpi.o: $(SRC)/cuda/%.cu
 	$(NVCC) $(NVCCFLAGS) -DUSE_MPI -DUSE_THRUST -c $< -o $@ $(OPP_INC) $(CUDA_INC)