Skip to content

Commit

Permalink
fempic with dh-codegen
Browse files Browse the repository at this point in the history
  • Loading branch information
ZamanLantra committed Oct 25, 2024
1 parent 71bc64d commit 211981f
Show file tree
Hide file tree
Showing 20 changed files with 623 additions and 794 deletions.
15 changes: 3 additions & 12 deletions app_fempic_cg/cuda/calculate_new_pos_vel_kernel_loop.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,18 +76,9 @@ void opp_par_loop_all__calculate_new_pos_vel_kernel(opp_set set, opp_iterate_typ
const int iter_size = opp_mpi_halo_exchanges_grouped(set, nargs, args, Device_GPU);


if (opp_k3_dat0_stride != args[0].dat->set->set_capacity) {
opp_k3_dat0_stride = args[0].dat->set->set_capacity;
cutilSafeCall(cudaMemcpyToSymbol(opp_k3_dat0_stride_d, &opp_k3_dat0_stride, sizeof(OPP_INT)));
}
if (opp_k3_dat1_stride != args[1].dat->set->set_capacity) {
opp_k3_dat1_stride = args[1].dat->set->set_capacity;
cutilSafeCall(cudaMemcpyToSymbol(opp_k3_dat1_stride_d, &opp_k3_dat1_stride, sizeof(OPP_INT)));
}
if (opp_k3_dat2_stride != args[2].dat->set->set_capacity) {
opp_k3_dat2_stride = args[2].dat->set->set_capacity;
cutilSafeCall(cudaMemcpyToSymbol(opp_k3_dat2_stride_d, &opp_k3_dat2_stride, sizeof(OPP_INT)));
}
opp_mem::dev_copy_to_symbol<OPP_INT>(opp_k3_dat0_stride_d, &opp_k3_dat0_stride, &(args[0].dat->set->set_capacity), 1);
opp_mem::dev_copy_to_symbol<OPP_INT>(opp_k3_dat1_stride_d, &opp_k3_dat1_stride, &(args[1].dat->set->set_capacity), 1);
opp_mem::dev_copy_to_symbol<OPP_INT>(opp_k3_dat2_stride_d, &opp_k3_dat2_stride, &(args[2].dat->set->set_capacity), 1);

#ifdef OPP_BLOCK_SIZE_3
const int block_size = OPP_BLOCK_SIZE_3;
Expand Down
20 changes: 4 additions & 16 deletions app_fempic_cg/cuda/compute_electric_field_kernel_loop.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -93,22 +93,10 @@ void opp_par_loop_all__compute_electric_field_kernel(opp_set set, opp_iterate_ty
const int iter_size = opp_mpi_halo_exchanges_grouped(set, nargs, args, Device_GPU);


if (opp_k7_dat0_stride != args[0].dat->set->set_capacity) {
opp_k7_dat0_stride = args[0].dat->set->set_capacity;
cutilSafeCall(cudaMemcpyToSymbol(opp_k7_dat0_stride_d, &opp_k7_dat0_stride, sizeof(OPP_INT)));
}
if (opp_k7_dat1_stride != args[1].dat->set->set_capacity) {
opp_k7_dat1_stride = args[1].dat->set->set_capacity;
cutilSafeCall(cudaMemcpyToSymbol(opp_k7_dat1_stride_d, &opp_k7_dat1_stride, sizeof(OPP_INT)));
}
if (opp_k7_dat2_stride != args[2].dat->set->set_capacity) {
opp_k7_dat2_stride = args[2].dat->set->set_capacity;
cutilSafeCall(cudaMemcpyToSymbol(opp_k7_dat2_stride_d, &opp_k7_dat2_stride, sizeof(OPP_INT)));
}
if (opp_k7_map0_stride != args[2].size) {
opp_k7_map0_stride = args[2].size;
cutilSafeCall(cudaMemcpyToSymbol(opp_k7_map0_stride_d, &opp_k7_map0_stride, sizeof(OPP_INT)));
}
opp_mem::dev_copy_to_symbol<OPP_INT>(opp_k7_dat0_stride_d, &opp_k7_dat0_stride, &(args[0].dat->set->set_capacity), 1);
opp_mem::dev_copy_to_symbol<OPP_INT>(opp_k7_dat1_stride_d, &opp_k7_dat1_stride, &(args[1].dat->set->set_capacity), 1);
opp_mem::dev_copy_to_symbol<OPP_INT>(opp_k7_dat2_stride_d, &opp_k7_dat2_stride, &(args[2].dat->set->set_capacity), 1);
opp_mem::dev_copy_to_symbol<OPP_INT>(opp_k7_map0_stride_d, &opp_k7_map0_stride, &(args[2].size), 1);

#ifdef OPP_BLOCK_SIZE_7
const int block_size = OPP_BLOCK_SIZE_7;
Expand Down
10 changes: 2 additions & 8 deletions app_fempic_cg/cuda/compute_node_charge_density_kernel_loop.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,14 +61,8 @@ void opp_par_loop_all__compute_node_charge_density_kernel(opp_set set, opp_itera
const int iter_size = opp_mpi_halo_exchanges_grouped(set, nargs, args, Device_GPU);


if (opp_k6_dat0_stride != args[0].dat->set->set_capacity) {
opp_k6_dat0_stride = args[0].dat->set->set_capacity;
cutilSafeCall(cudaMemcpyToSymbol(opp_k6_dat0_stride_d, &opp_k6_dat0_stride, sizeof(OPP_INT)));
}
if (opp_k6_dat1_stride != args[1].dat->set->set_capacity) {
opp_k6_dat1_stride = args[1].dat->set->set_capacity;
cutilSafeCall(cudaMemcpyToSymbol(opp_k6_dat1_stride_d, &opp_k6_dat1_stride, sizeof(OPP_INT)));
}
opp_mem::dev_copy_to_symbol<OPP_INT>(opp_k6_dat0_stride_d, &opp_k6_dat0_stride, &(args[0].dat->set->set_capacity), 1);
opp_mem::dev_copy_to_symbol<OPP_INT>(opp_k6_dat1_stride_d, &opp_k6_dat1_stride, &(args[1].dat->set->set_capacity), 1);

#ifdef OPP_BLOCK_SIZE_6
const int block_size = OPP_BLOCK_SIZE_6;
Expand Down
20 changes: 4 additions & 16 deletions app_fempic_cg/cuda/deposit_charge_on_nodes_kernel_loop.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -199,18 +199,9 @@ void opp_par_loop_all__deposit_charge_on_nodes_kernel(opp_set set, opp_iterate_t
#endif


if (opp_k5_dat0_stride != args[0].dat->set->set_capacity) {
opp_k5_dat0_stride = args[0].dat->set->set_capacity;
cutilSafeCall(cudaMemcpyToSymbol(opp_k5_dat0_stride_d, &opp_k5_dat0_stride, sizeof(OPP_INT)));
}
if (opp_k5_dat1_stride != args[1].dat->set->set_capacity) {
opp_k5_dat1_stride = args[1].dat->set->set_capacity;
cutilSafeCall(cudaMemcpyToSymbol(opp_k5_dat1_stride_d, &opp_k5_dat1_stride, sizeof(OPP_INT)));
}
if (opp_k5_map0_stride != args[1].size) {
opp_k5_map0_stride = args[1].size;
cutilSafeCall(cudaMemcpyToSymbol(opp_k5_map0_stride_d, &opp_k5_map0_stride, sizeof(OPP_INT)));
}
opp_mem::dev_copy_to_symbol<OPP_INT>(opp_k5_dat0_stride_d, &opp_k5_dat0_stride, &(args[0].dat->set->set_capacity), 1);
opp_mem::dev_copy_to_symbol<OPP_INT>(opp_k5_dat1_stride_d, &opp_k5_dat1_stride, &(args[1].dat->set->set_capacity), 1);
opp_mem::dev_copy_to_symbol<OPP_INT>(opp_k5_map0_stride_d, &opp_k5_map0_stride, &(args[1].size), 1);

#ifdef OPP_BLOCK_SIZE_5
const int block_size = OPP_BLOCK_SIZE_5;
Expand Down Expand Up @@ -241,10 +232,7 @@ void opp_par_loop_all__deposit_charge_on_nodes_kernel(opp_set set, opp_iterate_t

else // Do segmented reductions ----------
{
if (opp_k5_sr_set_stride != set->size) {
opp_k5_sr_set_stride = set->size;
cutilSafeCall(cudaMemcpyToSymbol(opp_k5_sr_set_stride_d, &opp_k5_sr_set_stride, sizeof(OPP_INT)));
}
opp_mem::dev_copy_to_symbol<OPP_INT>(opp_k5_sr_set_stride_d, &opp_k5_sr_set_stride, &(set->size), 1);

size_t operating_size_dat1 = 0, resize_size_dat1 = 0;

Expand Down
10 changes: 2 additions & 8 deletions app_fempic_cg/cuda/get_final_max_values_kernel_loop.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -89,14 +89,8 @@ void opp_par_loop_all__get_final_max_values_kernel(opp_set set, opp_iterate_type
OPP_REAL *arg1_host_data = (OPP_REAL *)args[1].data;
OPP_REAL *arg3_host_data = (OPP_REAL *)args[3].data;

if (opp_k9_dat0_stride != args[0].dat->set->set_capacity) {
opp_k9_dat0_stride = args[0].dat->set->set_capacity;
cutilSafeCall(cudaMemcpyToSymbol(opp_k9_dat0_stride_d, &opp_k9_dat0_stride, sizeof(OPP_INT)));
}
if (opp_k9_dat1_stride != args[2].dat->set->set_capacity) {
opp_k9_dat1_stride = args[2].dat->set->set_capacity;
cutilSafeCall(cudaMemcpyToSymbol(opp_k9_dat1_stride_d, &opp_k9_dat1_stride, sizeof(OPP_INT)));
}
opp_mem::dev_copy_to_symbol<OPP_INT>(opp_k9_dat0_stride_d, &opp_k9_dat0_stride, &(args[0].dat->set->set_capacity), 1);
opp_mem::dev_copy_to_symbol<OPP_INT>(opp_k9_dat1_stride_d, &opp_k9_dat1_stride, &(args[2].dat->set->set_capacity), 1);

#ifdef OPP_BLOCK_SIZE_9
const int block_size = OPP_BLOCK_SIZE_9;
Expand Down
5 changes: 1 addition & 4 deletions app_fempic_cg/cuda/get_max_cef_kernel_loop.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,10 +70,7 @@ void opp_par_loop_all__get_max_cef_kernel(opp_set set, opp_iterate_type,

OPP_REAL *arg1_host_data = (OPP_REAL *)args[1].data;

if (opp_k8_dat0_stride != args[0].dat->set->set_capacity) {
opp_k8_dat0_stride = args[0].dat->set->set_capacity;
cutilSafeCall(cudaMemcpyToSymbol(opp_k8_dat0_stride_d, &opp_k8_dat0_stride, sizeof(OPP_INT)));
}
opp_mem::dev_copy_to_symbol<OPP_INT>(opp_k8_dat0_stride_d, &opp_k8_dat0_stride, &(args[0].dat->set->set_capacity), 1);

#ifdef OPP_BLOCK_SIZE_8
const int block_size = OPP_BLOCK_SIZE_8;
Expand Down
10 changes: 2 additions & 8 deletions app_fempic_cg/cuda/init_boundary_pot_kernel_loop.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,14 +71,8 @@ void opp_par_loop_all__init_boundary_pot_kernel(opp_set set, opp_iterate_type,
const int iter_size = opp_mpi_halo_exchanges_grouped(set, nargs, args, Device_GPU);


if (opp_k1_dat0_stride != args[0].dat->set->set_capacity) {
opp_k1_dat0_stride = args[0].dat->set->set_capacity;
cutilSafeCall(cudaMemcpyToSymbol(opp_k1_dat0_stride_d, &opp_k1_dat0_stride, sizeof(OPP_INT)));
}
if (opp_k1_dat1_stride != args[1].dat->set->set_capacity) {
opp_k1_dat1_stride = args[1].dat->set->set_capacity;
cutilSafeCall(cudaMemcpyToSymbol(opp_k1_dat1_stride_d, &opp_k1_dat1_stride, sizeof(OPP_INT)));
}
opp_mem::dev_copy_to_symbol<OPP_INT>(opp_k1_dat0_stride_d, &opp_k1_dat0_stride, &(args[0].dat->set->set_capacity), 1);
opp_mem::dev_copy_to_symbol<OPP_INT>(opp_k1_dat1_stride_d, &opp_k1_dat1_stride, &(args[1].dat->set->set_capacity), 1);

#ifdef OPP_BLOCK_SIZE_1
const int block_size = OPP_BLOCK_SIZE_1;
Expand Down
55 changes: 11 additions & 44 deletions app_fempic_cg/cuda/inject_ions_kernel_loop.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -143,50 +143,17 @@ void opp_par_loop_injected__inject_ions_kernel(opp_set set, opp_iterate_type,
const int iter_size = set->diff;
const int inj_start = (set->size - set->diff);

if (opp_k2_dat0_stride != args[0].dat->set->set_capacity) {
opp_k2_dat0_stride = args[0].dat->set->set_capacity;
cutilSafeCall(cudaMemcpyToSymbol(opp_k2_dat0_stride_d, &opp_k2_dat0_stride, sizeof(OPP_INT)));
}
if (opp_k2_dat1_stride != args[1].dat->set->set_capacity) {
opp_k2_dat1_stride = args[1].dat->set->set_capacity;
cutilSafeCall(cudaMemcpyToSymbol(opp_k2_dat1_stride_d, &opp_k2_dat1_stride, sizeof(OPP_INT)));
}
if (opp_k2_dat2_stride != args[2].size) {
opp_k2_dat2_stride = args[2].size;
cutilSafeCall(cudaMemcpyToSymbol(opp_k2_dat2_stride_d, &opp_k2_dat2_stride, sizeof(OPP_INT)));
}
if (opp_k2_dat3_stride != args[3].size) {
opp_k2_dat3_stride = args[3].size;
cutilSafeCall(cudaMemcpyToSymbol(opp_k2_dat3_stride_d, &opp_k2_dat3_stride, sizeof(OPP_INT)));
}
if (opp_k2_dat4_stride != args[4].dat->set->set_capacity) {
opp_k2_dat4_stride = args[4].dat->set->set_capacity;
cutilSafeCall(cudaMemcpyToSymbol(opp_k2_dat4_stride_d, &opp_k2_dat4_stride, sizeof(OPP_INT)));
}
if (opp_k2_dat5_stride != args[5].dat->set->set_capacity) {
opp_k2_dat5_stride = args[5].dat->set->set_capacity;
cutilSafeCall(cudaMemcpyToSymbol(opp_k2_dat5_stride_d, &opp_k2_dat5_stride, sizeof(OPP_INT)));
}
if (opp_k2_dat6_stride != args[6].dat->set->set_capacity) {
opp_k2_dat6_stride = args[6].dat->set->set_capacity;
cutilSafeCall(cudaMemcpyToSymbol(opp_k2_dat6_stride_d, &opp_k2_dat6_stride, sizeof(OPP_INT)));
}
if (opp_k2_dat7_stride != args[7].dat->set->set_capacity) {
opp_k2_dat7_stride = args[7].dat->set->set_capacity;
cutilSafeCall(cudaMemcpyToSymbol(opp_k2_dat7_stride_d, &opp_k2_dat7_stride, sizeof(OPP_INT)));
}
if (opp_k2_dat8_stride != args[8].dat->set->set_capacity) {
opp_k2_dat8_stride = args[8].dat->set->set_capacity;
cutilSafeCall(cudaMemcpyToSymbol(opp_k2_dat8_stride_d, &opp_k2_dat8_stride, sizeof(OPP_INT)));
}
if (opp_k2_dat9_stride != args[9].dat->set->set_capacity) {
opp_k2_dat9_stride = args[9].dat->set->set_capacity;
cutilSafeCall(cudaMemcpyToSymbol(opp_k2_dat9_stride_d, &opp_k2_dat9_stride, sizeof(OPP_INT)));
}
if (opp_k2_map0_stride != args[4].size) {
opp_k2_map0_stride = args[4].size;
cutilSafeCall(cudaMemcpyToSymbol(opp_k2_map0_stride_d, &opp_k2_map0_stride, sizeof(OPP_INT)));
}
opp_mem::dev_copy_to_symbol<OPP_INT>(opp_k2_dat0_stride_d, &opp_k2_dat0_stride, &(args[0].dat->set->set_capacity), 1);
opp_mem::dev_copy_to_symbol<OPP_INT>(opp_k2_dat1_stride_d, &opp_k2_dat1_stride, &(args[1].dat->set->set_capacity), 1);
opp_mem::dev_copy_to_symbol<OPP_INT>(opp_k2_dat2_stride_d, &opp_k2_dat2_stride, &(args[2].size), 1);
opp_mem::dev_copy_to_symbol<OPP_INT>(opp_k2_dat3_stride_d, &opp_k2_dat3_stride, &(args[3].size), 1);
opp_mem::dev_copy_to_symbol<OPP_INT>(opp_k2_dat4_stride_d, &opp_k2_dat4_stride, &(args[4].dat->set->set_capacity), 1);
opp_mem::dev_copy_to_symbol<OPP_INT>(opp_k2_dat5_stride_d, &opp_k2_dat5_stride, &(args[5].dat->set->set_capacity), 1);
opp_mem::dev_copy_to_symbol<OPP_INT>(opp_k2_dat6_stride_d, &opp_k2_dat6_stride, &(args[6].dat->set->set_capacity), 1);
opp_mem::dev_copy_to_symbol<OPP_INT>(opp_k2_dat7_stride_d, &opp_k2_dat7_stride, &(args[7].dat->set->set_capacity), 1);
opp_mem::dev_copy_to_symbol<OPP_INT>(opp_k2_dat8_stride_d, &opp_k2_dat8_stride, &(args[8].dat->set->set_capacity), 1);
opp_mem::dev_copy_to_symbol<OPP_INT>(opp_k2_dat9_stride_d, &opp_k2_dat9_stride, &(args[9].dat->set->set_capacity), 1);
opp_mem::dev_copy_to_symbol<OPP_INT>(opp_k2_map0_stride_d, &opp_k2_map0_stride, &(args[4].size), 1);

#ifdef OPP_BLOCK_SIZE_2
const int block_size = OPP_BLOCK_SIZE_2;
Expand Down
Loading

0 comments on commit 211981f

Please sign in to comment.