Hi-PACE · MaxThevenet · Jun 19, 2023 · Jun 14, 2023 · Jun 14, 2023 · Jun 15, 2023
diff --git a/docs/source/run/parameters.rst b/docs/source/run/parameters.rst
@@ -193,6 +193,12 @@ General parameters
     Currently, this option only affects plasma operations (gather, push and deposition).
     The tile size can be set with ``plasmas.sort_bin_size``.
 
+* ``hipace.comms_buffer_on_gpu`` (`bool`) optional (default `0`)
+    Whether the buffers used for MPI communication should be allocated on the GPU (device memory).
+    By default they will be allocated on the CPU (pinned memory).
+    Setting this option to `1` is necessary to take advantge of GPU-Enabled MPI, however for this
+    additional enviroment variables need to be set depending on the system.
+
 * ``hipace.do_beam_jz_minus_rho`` (`bool`) optional (default `0`)
     Whether the beam contribution to :math:`j_z-c\rho` is calculated and used when solving for Psi (used to caculate the transverse fields Ex-By and Ey+Bx).
     if 0, this term is assumed to be 0 (a good approximation for an ultra-relativistic beam in the z direction with small transverse momentum).

diff --git a/src/Hipace.H b/src/Hipace.H
@@ -372,6 +372,10 @@ public:
     amrex::Parser m_salame_parser;
     /** Function to get the target Ez field for SALAME */
     amrex::ParserExecutor<3> m_salame_target_func;
+    /** Whether MPI communication buffers should be allocated in device memory */
+    bool m_comms_buffer_on_gpu = false;
+    /** Arena for MPI communications */
+    amrex::Arena* m_comms_arena = nullptr;
 
     /** \brief Check that the ghost beam particles are in the proper box, and invalidate
      * those not in the right slice.

diff --git a/src/Hipace.cpp b/src/Hipace.cpp
@@ -165,6 +165,8 @@ Hipace::Hipace () :
     queryWithParser(pph, "background_density_SI", m_background_density_SI);
 
 #ifdef AMREX_USE_MPI
+    queryWithParser(pph, "comms_buffer_on_gpu", m_comms_buffer_on_gpu);
+    m_comms_arena = m_comms_buffer_on_gpu ? amrex::The_Device_Arena() : amrex::The_Pinned_Arena();
     queryWithParser(pph, "skip_empty_comms", m_skip_empty_comms);
     int myproc = amrex::ParallelDescriptor::MyProc();
     m_rank_z = myproc/(m_numprocs_x*m_numprocs_y);
@@ -494,6 +496,14 @@ Hipace::Evolve ()
 void
 Hipace::SolveOneSlice (int islice, const int islice_local, int step)
 {
+#ifdef AMREX_USE_MPI
+    {
+        // Call a MPI function so that the MPI implementation has a chance to
+        // run tasks necessary to make progress with asynchronous communications.
+        int flag = 0;
+        MPI_Iprobe(MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &flag, MPI_STATUS_IGNORE);
+    }
+#endif
     HIPACE_PROFILE("Hipace::SolveOneSlice()");
 
     // Between this push and the corresponding pop at the end of this
@@ -1065,7 +1075,7 @@ Hipace::Wait (const int step, int it, bool only_ghost)
         const amrex::Long psize = sizeof(amrex::ParticleReal)*BeamParticleContainer::NAR
                                   + sizeof(int)*BeamParticleContainer::NAI;
         const amrex::Long buffer_size = psize*np_total;
-        auto recv_buffer = (char*)amrex::The_Pinned_Arena()->alloc(buffer_size);
+        auto recv_buffer = (char*)m_comms_arena->alloc(buffer_size);
 
         MPI_Status status;
         const int loc_pcomm_z_tag = only_ghost ? pcomm_z_tag_ghost : pcomm_z_tag;
@@ -1140,7 +1150,7 @@ Hipace::Wait (const int step, int it, bool only_ghost)
         }
 
         amrex::Gpu::streamSynchronize();
-        amrex::The_Pinned_Arena()->free(recv_buffer);
+        m_comms_arena->free(recv_buffer);
     }
 
     // Receive laser
@@ -1164,7 +1174,7 @@ Hipace::Wait (const int step, int it, bool only_ghost)
                      (m_rank_z+1)%m_numprocs_z, lcomm_z_tag, m_comm_z, &lstatus);
         } else {
             // Receive envelope in a host buffer, and copy to laser fab on device
-            auto lrecv_buffer = (amrex::Real*)amrex::The_Pinned_Arena()->alloc
+            auto lrecv_buffer = (amrex::Real*)m_comms_arena->alloc
                 (sizeof(amrex::Real)*nreals);
             MPI_Recv(lrecv_buffer, nreals,
                      amrex::ParallelDescriptor::Mpi_typemap<amrex::Real>::type(),
@@ -1178,7 +1188,7 @@ Hipace::Wait (const int step, int it, bool only_ghost)
                      laser_arr(i,j,k,n) = buf(i,j,k,n);
                  });
             amrex::Gpu::streamSynchronize();
-            amrex::The_Pinned_Arena()->free(lrecv_buffer);
+            m_comms_arena->free(lrecv_buffer);
         }
     }
 #endif
@@ -1260,7 +1270,7 @@ Hipace::Notify (const int step, const int it, bool only_ghost)
                                   + sizeof(int)*BeamParticleContainer::NAI;
         const amrex::Long buffer_size = psize*np_total;
         char*& psend_buffer = only_ghost ? m_psend_buffer_ghost : m_psend_buffer;
-        psend_buffer = (char*)amrex::The_Pinned_Arena()->alloc(buffer_size);
+        psend_buffer = (char*)m_comms_arena->alloc(buffer_size);
 
         int offset_beam = 0;
         for (int ibeam = 0; ibeam < nbeams; ibeam++){
@@ -1354,7 +1364,7 @@ Hipace::Notify (const int step, const int it, bool only_ghost)
         amrex::Array4<amrex::Real const> const& laser_arr = laser_fab.array();
         const amrex::Box& lbx = laser_fab.box(); // does not include ghost cells
         const std::size_t nreals = lbx.numPts()*laser_fab.nComp();
-        m_lsend_buffer = (amrex::Real*)amrex::The_Pinned_Arena()->alloc
+        m_lsend_buffer = (amrex::Real*)m_comms_arena->alloc
             (sizeof(amrex::Real)*nreals);
         if (m_multi_laser.is3dOnHost()) {
             amrex::Gpu::streamSynchronize();
@@ -1391,7 +1401,7 @@ Hipace::NotifyFinish (const int it, bool only_ghost, bool only_time)
         if (m_psend_buffer_ghost) {
             MPI_Status status;
             MPI_Wait(&m_psend_request_ghost, &status);
-            amrex::The_Pinned_Arena()->free(m_psend_buffer_ghost);
+            m_comms_arena->free(m_psend_buffer_ghost);
             m_psend_buffer_ghost = nullptr;
         }
     } else {
@@ -1412,13 +1422,13 @@ Hipace::NotifyFinish (const int it, bool only_ghost, bool only_time)
         if (m_psend_buffer) {
             MPI_Status status;
             MPI_Wait(&m_psend_request, &status);
-            amrex::The_Pinned_Arena()->free(m_psend_buffer);
+            m_comms_arena->free(m_psend_buffer);
             m_psend_buffer = nullptr;
         }
         if (m_lsend_buffer) {
             MPI_Status status;
             MPI_Wait(&m_lsend_request, &status);
-            amrex::The_Pinned_Arena()->free(m_lsend_buffer);
+            m_comms_arena->free(m_lsend_buffer);
             m_lsend_buffer = nullptr;
         }
     }