diff --git a/CMakeLists.txt b/CMakeLists.txt index bf77b6d34e..8f7bef0b5d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -57,7 +57,7 @@ if(NOT HiPACE_PRECISION IN_LIST HiPACE_PRECISION_VALUES) endif() set(HiPACE_COMPUTE_VALUES NOACC CUDA SYCL HIP OMP) -set(HiPACE_COMPUTE NOACC CACHE STRING +set(HiPACE_COMPUTE OMP CACHE STRING "On-node, accelerated computing backend (NOACC/CUDA/SYCL/HIP/OMP)") set_property(CACHE HiPACE_COMPUTE PROPERTY STRINGS ${HiPACE_COMPUTE_VALUES}) if(NOT HiPACE_COMPUTE IN_LIST HiPACE_COMPUTE_VALUES) diff --git a/docs/source/building/building.rst b/docs/source/building/building.rst index b8d0529cc9..7cc1b14162 100644 --- a/docs/source/building/building.rst +++ b/docs/source/building/building.rst @@ -157,7 +157,7 @@ or by providing arguments to the CMake call CMake Option Default & Values Description ----------------------------- ---------------------------------------- ----------------------------------------------------- ``CMAKE_BUILD_TYPE`` RelWithDebInfo/**Release**/Debug Type of build, symbols & optimizations - ``HiPACE_COMPUTE`` **NOACC**/CUDA/SYCL/HIP/OMP On-node, accelerated computing backend + ``HiPACE_COMPUTE`` NOACC/CUDA/SYCL/HIP/**OMP** On-node, accelerated computing backend ``HiPACE_MPI`` **ON**/OFF Multi-node support (message-passing) ``HiPACE_PRECISION`` SINGLE/**DOUBLE** Floating point precision (single/double) ``HiPACE_amrex_repo`` https://github.com/AMReX-Codes/amrex.git Repository URI to pull and build AMReX from diff --git a/docs/source/building/platforms/booster_jsc.rst b/docs/source/building/platforms/booster_jsc.rst index 588e9cc030..d2789897c9 100644 --- a/docs/source/building/platforms/booster_jsc.rst +++ b/docs/source/building/platforms/booster_jsc.rst @@ -6,6 +6,9 @@ For more information please visit the `JSC documentation @juwels-booster.fz-juelich.de``. +Running on GPU +-------------- + Create a file ``profile.hipace`` and ``source`` it whenever you log in and want to work with HiPACE++: .. code-block:: bash @@ -59,3 +62,63 @@ You can then create your directory in your ``$SCRATCH_``, where you srun -n 8 --cpu_bind=sockets $HOME/src/hipace/build/bin/hipace.MPI.CUDA.DP inputs and use it to submit a simulation. + +Running on CPU +-------------- + +.. warning:: + The Juwels Booster is a GPU-accelerated supercomputer, and running on CPUs only is strongly discouraged. + This section only illustrates how to efficiently run on CPU with OpenMP threading, which was tested on the Juwels Booster for practical reasons, but should apply to other supercomputers. + In particular, the proposed values of OMP_PROC_BIND and OMP_PLACES give decent performance for both threaded FFTW and particle operations. + +Create a file ``profile.hipace`` and ``source`` it whenever you log in and want to work with HiPACE++: + +.. code-block:: bash + + # please set your project account + export proj= + # required dependencies + module load CMake + module load GCC + module load OpenMPI + module load FFTW + module load HDF5 + module load ccache # optional, accelerates recompilation + +Install HiPACE++ (the first time, and whenever you want the latest version): + +.. code-block:: bash + + source profile.hipace + git clone https://github.com/Hi-PACE/hipace.git $HOME/src/hipace # only the first time + cd $HOME/src/hipace + rm -rf build + cmake -S . -B build -DHiPACE_COMPUTE=OMP + cmake --build build -j 16 + +You can get familiar with the HiPACE++ input file format in our :doc:`../../run/get_started` section, to prepare an input file that suits your needs. +You can then create your directory in your ``$SCRATCH_``, where you can put your input file and adapt the following submission script: + +.. code-block:: bash + + #!/bin/bash -l + #SBATCH -A $proj + #SBATCH --partition=booster + #SBATCH --nodes=1 + #SBATCH --ntasks=1 + #SBATCH --time=00:05:00 + #SBATCH --job-name=hipace + #SBATCH --output=hipace-%j-%N.txt + #SBATCH --error=hipace-%j-%N.err + + source $HOME/hipace.profile + + # These options give the best performance, in particular for the threaded FFTW + export OMP_PROC_BIND=false # true false master close spread + export OMP_PLACES=cores # threads cores sockets + + export OMP_NUM_THREADS=8 # Anything <= 16, depending on the problem size + + srun -n 8 --cpu_bind=sockets inputs + +and use it to submit a simulation.