Skip to content

Commit

Permalink
minor dh perf improvement
Browse files Browse the repository at this point in the history
  • Loading branch information
ZamanLantra committed Nov 15, 2024
1 parent ea822fa commit cd2a2c3
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 9 deletions.
15 changes: 7 additions & 8 deletions opp_lib/src/cuda/opp_direct_hop_cuda.cu
Original file line number Diff line number Diff line change
Expand Up @@ -795,6 +795,7 @@ void opp_init_dh_device(opp_set set)
// gathers all global move information into the global mover for communication
void opp_gather_dh_move_indices(opp_set set)
{
opp_profiler->start("MvDH_Gather");
opp_mem::copy_dev_to_host<OPP_INT>(dh_indices_h.move_count, dh_indices_d.move_count, 1);

if (OPP_DBG)
Expand Down Expand Up @@ -841,6 +842,7 @@ void opp_gather_dh_move_indices(opp_set set)
dh_indices_h.rank_indices[i], dh_indices_h.cell_indices[i]);
}

opp_profiler->end("MvDH_Gather");
if (OPP_DBG) opp_printf("OPP", "opp_gather_dh_move_indices DONE - dh move count %d",
*(dh_indices_h.move_count));
}
Expand All @@ -865,26 +867,23 @@ void dh_particle_packer_gpu::pack(opp_set set)
if (OPP_DBG)
opp_printf("dh_particle_packer_gpu", "pack set [%s]", set->name);

std::map<int, std::vector<char>>& buffers_of_set = this->buffers[set->index];
for (auto& x : buffers_of_set) // try to keep the allocated vectors as it is, without deleting
x.second.clear();

opp_profiler->start("MvDH_Pack");

std::map<int, std::vector<char>>& buffers_of_set = this->buffers[set->index];
thrust::device_vector<OPP_INT>& temp_dv = *(set->mesh_relation_dat->thrust_int_sort);

for (const auto& a : local_part_ids) {
for (const auto& per_rank_parts : local_part_ids) {

const int send_rank = a.first;
const std::vector<int>& part_ids_vec = a.second;
const int send_rank = per_rank_parts.first;
const std::vector<int>& part_ids_vec = per_rank_parts.second;
const size_t bytes_per_rank = (size_t)set->particle_size * part_ids_vec.size();

if (OPP_DBG)
opp_printf("dh_particle_packer_gpu", "pack send_rank %d - count %zu",
send_rank, part_ids_vec.size());

std::vector<char>& send_rank_buffer = buffers_of_set[send_rank];
send_rank_buffer.resize(bytes_per_rank, 0);
send_rank_buffer.resize(bytes_per_rank);

const int copy_count = (int)part_ids_vec.size();

Expand Down
2 changes: 1 addition & 1 deletion opp_lib/src/mpi/opp_mpi_particle_comm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1164,7 +1164,7 @@ int64_t GlobalParticleMover::finalize(opp_set set) {
MPI_CHECK(MPI_Waitall(this->numRemoteRecvRanks, &(this->h_recv_requests[0]), &(this->h_recv_status[0])));
opp_profiler->end("MvDH_WaitFin1");

// if (OPP_DBG)
if (OPP_DBG)
{
// Check this rank recv'd the correct number of bytes from each remote
for (int rankx = 0; rankx < this->numRemoteRecvRanks; rankx++) {
Expand Down

0 comments on commit cd2a2c3

Please sign in to comment.