Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add option to allocate mpi buffers in device memory #979

Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions docs/source/run/parameters.rst
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,12 @@ General parameters
Currently, this option only affects plasma operations (gather, push and deposition).
The tile size can be set with ``plasmas.sort_bin_size``.

* ``hipace.comms_buffer_on_gpu`` (`bool`) optional (default `0`)
Whether the buffers used for MPI communication should be allocated on the GPU (device memory).
By default they will be allocated on the CPU (pinned memory).
Setting this option to `1` is necessary to take advantge of GPU-Enabled MPI, however for this
AlexanderSinn marked this conversation as resolved.
Show resolved Hide resolved
additional enviroment variables need to be set depending on the system.

* ``hipace.do_beam_jz_minus_rho`` (`bool`) optional (default `0`)
Whether the beam contribution to :math:`j_z-c\rho` is calculated and used when solving for Psi (used to caculate the transverse fields Ex-By and Ey+Bx).
if 0, this term is assumed to be 0 (a good approximation for an ultra-relativistic beam in the z direction with small transverse momentum).
Expand Down
4 changes: 4 additions & 0 deletions src/Hipace.H
Original file line number Diff line number Diff line change
Expand Up @@ -372,6 +372,10 @@ public:
amrex::Parser m_salame_parser;
/** Function to get the target Ez field for SALAME */
amrex::ParserExecutor<3> m_salame_target_func;
/** Whether MPI communication buffers should be allocated in device memory */
bool m_comms_buffer_on_gpu = false;
/** Arena for MPI communications */
amrex::Arena* m_comms_arena = nullptr;

/** \brief Check that the ghost beam particles are in the proper box, and invalidate
* those not in the right slice.
Expand Down
28 changes: 19 additions & 9 deletions src/Hipace.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,8 @@ Hipace::Hipace () :
queryWithParser(pph, "background_density_SI", m_background_density_SI);

#ifdef AMREX_USE_MPI
queryWithParser(pph, "comms_buffer_on_gpu", m_comms_buffer_on_gpu);
m_comms_arena = m_comms_buffer_on_gpu ? amrex::The_Device_Arena() : amrex::The_Pinned_Arena();
queryWithParser(pph, "skip_empty_comms", m_skip_empty_comms);
int myproc = amrex::ParallelDescriptor::MyProc();
m_rank_z = myproc/(m_numprocs_x*m_numprocs_y);
Expand Down Expand Up @@ -494,6 +496,14 @@ Hipace::Evolve ()
void
Hipace::SolveOneSlice (int islice, const int islice_local, int step)
{
#ifdef AMREX_USE_MPI
{
// Call a MPI function so that the MPI implementation has a chance to
// run tasks necessary to make progress with asynchronous communications.
int flag = 0;
MPI_Iprobe(MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &flag, MPI_STATUS_IGNORE);
}
#endif
HIPACE_PROFILE("Hipace::SolveOneSlice()");

// Between this push and the corresponding pop at the end of this
Expand Down Expand Up @@ -1065,7 +1075,7 @@ Hipace::Wait (const int step, int it, bool only_ghost)
const amrex::Long psize = sizeof(amrex::ParticleReal)*BeamParticleContainer::NAR
+ sizeof(int)*BeamParticleContainer::NAI;
const amrex::Long buffer_size = psize*np_total;
auto recv_buffer = (char*)amrex::The_Pinned_Arena()->alloc(buffer_size);
auto recv_buffer = (char*)m_comms_arena->alloc(buffer_size);

MPI_Status status;
const int loc_pcomm_z_tag = only_ghost ? pcomm_z_tag_ghost : pcomm_z_tag;
Expand Down Expand Up @@ -1140,7 +1150,7 @@ Hipace::Wait (const int step, int it, bool only_ghost)
}

amrex::Gpu::streamSynchronize();
amrex::The_Pinned_Arena()->free(recv_buffer);
m_comms_arena->free(recv_buffer);
}

// Receive laser
Expand All @@ -1164,7 +1174,7 @@ Hipace::Wait (const int step, int it, bool only_ghost)
(m_rank_z+1)%m_numprocs_z, lcomm_z_tag, m_comm_z, &lstatus);
} else {
// Receive envelope in a host buffer, and copy to laser fab on device
auto lrecv_buffer = (amrex::Real*)amrex::The_Pinned_Arena()->alloc
auto lrecv_buffer = (amrex::Real*)m_comms_arena->alloc
(sizeof(amrex::Real)*nreals);
MPI_Recv(lrecv_buffer, nreals,
amrex::ParallelDescriptor::Mpi_typemap<amrex::Real>::type(),
Expand All @@ -1178,7 +1188,7 @@ Hipace::Wait (const int step, int it, bool only_ghost)
laser_arr(i,j,k,n) = buf(i,j,k,n);
});
amrex::Gpu::streamSynchronize();
amrex::The_Pinned_Arena()->free(lrecv_buffer);
m_comms_arena->free(lrecv_buffer);
}
}
#endif
Expand Down Expand Up @@ -1260,7 +1270,7 @@ Hipace::Notify (const int step, const int it, bool only_ghost)
+ sizeof(int)*BeamParticleContainer::NAI;
const amrex::Long buffer_size = psize*np_total;
char*& psend_buffer = only_ghost ? m_psend_buffer_ghost : m_psend_buffer;
psend_buffer = (char*)amrex::The_Pinned_Arena()->alloc(buffer_size);
psend_buffer = (char*)m_comms_arena->alloc(buffer_size);

int offset_beam = 0;
for (int ibeam = 0; ibeam < nbeams; ibeam++){
Expand Down Expand Up @@ -1354,7 +1364,7 @@ Hipace::Notify (const int step, const int it, bool only_ghost)
amrex::Array4<amrex::Real const> const& laser_arr = laser_fab.array();
const amrex::Box& lbx = laser_fab.box(); // does not include ghost cells
const std::size_t nreals = lbx.numPts()*laser_fab.nComp();
m_lsend_buffer = (amrex::Real*)amrex::The_Pinned_Arena()->alloc
m_lsend_buffer = (amrex::Real*)m_comms_arena->alloc
(sizeof(amrex::Real)*nreals);
if (m_multi_laser.is3dOnHost()) {
amrex::Gpu::streamSynchronize();
Expand Down Expand Up @@ -1391,7 +1401,7 @@ Hipace::NotifyFinish (const int it, bool only_ghost, bool only_time)
if (m_psend_buffer_ghost) {
MPI_Status status;
MPI_Wait(&m_psend_request_ghost, &status);
amrex::The_Pinned_Arena()->free(m_psend_buffer_ghost);
m_comms_arena->free(m_psend_buffer_ghost);
m_psend_buffer_ghost = nullptr;
}
} else {
Expand All @@ -1412,13 +1422,13 @@ Hipace::NotifyFinish (const int it, bool only_ghost, bool only_time)
if (m_psend_buffer) {
MPI_Status status;
MPI_Wait(&m_psend_request, &status);
amrex::The_Pinned_Arena()->free(m_psend_buffer);
m_comms_arena->free(m_psend_buffer);
m_psend_buffer = nullptr;
}
if (m_lsend_buffer) {
MPI_Status status;
MPI_Wait(&m_lsend_request, &status);
amrex::The_Pinned_Arena()->free(m_lsend_buffer);
m_comms_arena->free(m_lsend_buffer);
m_lsend_buffer = nullptr;
}
}
Expand Down