From eb91f8ddbe564bdc43d3311d4ad057366b02192c Mon Sep 17 00:00:00 2001 From: Maxence Thevenet Date: Tue, 27 Jul 2021 18:57:18 +0200 Subject: [PATCH 1/4] make OpenMP default compute instead of NOACC --- CMakeLists.txt | 2 +- docs/source/building/building.rst | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index bf77b6d34e..8f7bef0b5d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -57,7 +57,7 @@ if(NOT HiPACE_PRECISION IN_LIST HiPACE_PRECISION_VALUES) endif() set(HiPACE_COMPUTE_VALUES NOACC CUDA SYCL HIP OMP) -set(HiPACE_COMPUTE NOACC CACHE STRING +set(HiPACE_COMPUTE OMP CACHE STRING "On-node, accelerated computing backend (NOACC/CUDA/SYCL/HIP/OMP)") set_property(CACHE HiPACE_COMPUTE PROPERTY STRINGS ${HiPACE_COMPUTE_VALUES}) if(NOT HiPACE_COMPUTE IN_LIST HiPACE_COMPUTE_VALUES) diff --git a/docs/source/building/building.rst b/docs/source/building/building.rst index b8d0529cc9..7cc1b14162 100644 --- a/docs/source/building/building.rst +++ b/docs/source/building/building.rst @@ -157,7 +157,7 @@ or by providing arguments to the CMake call CMake Option Default & Values Description ----------------------------- ---------------------------------------- ----------------------------------------------------- ``CMAKE_BUILD_TYPE`` RelWithDebInfo/**Release**/Debug Type of build, symbols & optimizations - ``HiPACE_COMPUTE`` **NOACC**/CUDA/SYCL/HIP/OMP On-node, accelerated computing backend + ``HiPACE_COMPUTE`` NOACC/CUDA/SYCL/HIP/**OMP** On-node, accelerated computing backend ``HiPACE_MPI`` **ON**/OFF Multi-node support (message-passing) ``HiPACE_PRECISION`` SINGLE/**DOUBLE** Floating point precision (single/double) ``HiPACE_amrex_repo`` https://github.com/AMReX-Codes/amrex.git Repository URI to pull and build AMReX from From f1295c6a3272206c40f431c0fa99742984d67483 Mon Sep 17 00:00:00 2001 From: Maxence Thevenet Date: Wed, 28 Jul 2021 08:54:25 +0200 Subject: [PATCH 2/4] doc for OpenMP on the Juwels Booster --- .../source/building/platforms/booster_jsc.rst | 58 +++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/docs/source/building/platforms/booster_jsc.rst b/docs/source/building/platforms/booster_jsc.rst index 588e9cc030..93df22c5e5 100644 --- a/docs/source/building/platforms/booster_jsc.rst +++ b/docs/source/building/platforms/booster_jsc.rst @@ -6,6 +6,9 @@ For more information please visit the `JSC documentation @juwels-booster.fz-juelich.de``. +Running on GPU +-------------- + Create a file ``profile.hipace`` and ``source`` it whenever you log in and want to work with HiPACE++: .. code-block:: bash @@ -59,3 +62,58 @@ You can then create your directory in your ``$SCRATCH_``, where you srun -n 8 --cpu_bind=sockets $HOME/src/hipace/build/bin/hipace.MPI.CUDA.DP inputs and use it to submit a simulation. + +Running on CPU +-------------- + +Create a file ``profile.hipace`` and ``source`` it whenever you log in and want to work with HiPACE++: + +.. code-block:: bash + + # please set your project account + export proj= + # required dependencies + module load CMake + module load GCC + module load OpenMPI + module load FFTW + module load HDF5 + module load ccache # optional, accelerates recompilation + +Install HiPACE++ (the first time, and whenever you want the latest version): + +.. code-block:: bash + + source profile.hipace + git clone https://github.com/Hi-PACE/hipace.git $HOME/src/hipace # only the first time + cd $HOME/src/hipace + rm -rf build + cmake -S . -B build -DHiPACE_COMPUTE=OMP + cmake --build build -j 16 + +You can get familiar with the HiPACE++ input file format in our :doc:`../../run/get_started` section, to prepare an input file that suits your needs. +You can then create your directory in your ``$SCRATCH_``, where you can put your input file and adapt the following submission script: + +.. code-block:: bash + + #!/bin/bash -l + #SBATCH -A $proj + #SBATCH --partition=booster + #SBATCH --nodes=1 + #SBATCH --ntasks=1 + #SBATCH --time=00:05:00 + #SBATCH --job-name=hipace + #SBATCH --output=hipace-%j-%N.txt + #SBATCH --error=hipace-%j-%N.err + + source $HOME/hipace.profile + + # These options give the best performance, in particular for the threaded FFTW + export OMP_PROC_BIND=false # true false master close spread + export OMP_PLACES=cores # threads cores sockets + + export OMP_NUM_THREADS=8 # Anything <= 16, depending on the problem size + + srun -n 8 --cpu_bind=sockets inputs + +and use it to submit a simulation. From 9f6076903570020ec0aff105cce959ecc38bbc6c Mon Sep 17 00:00:00 2001 From: Maxence Thevenet Date: Wed, 28 Jul 2021 08:56:58 +0200 Subject: [PATCH 3/4] eol --- docs/source/building/platforms/booster_jsc.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/building/platforms/booster_jsc.rst b/docs/source/building/platforms/booster_jsc.rst index 93df22c5e5..2ba3562d04 100644 --- a/docs/source/building/platforms/booster_jsc.rst +++ b/docs/source/building/platforms/booster_jsc.rst @@ -113,7 +113,7 @@ You can then create your directory in your ``$SCRATCH_``, where you export OMP_PLACES=cores # threads cores sockets export OMP_NUM_THREADS=8 # Anything <= 16, depending on the problem size - + srun -n 8 --cpu_bind=sockets inputs and use it to submit a simulation. From bd22bddab941e3b14a53c9f328276978b527a63a Mon Sep 17 00:00:00 2001 From: Maxence Thevenet Date: Wed, 28 Jul 2021 11:35:55 +0200 Subject: [PATCH 4/4] Warning that Thou Shalt Not run CPU-only on the Juwels booster --- docs/source/building/platforms/booster_jsc.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/docs/source/building/platforms/booster_jsc.rst b/docs/source/building/platforms/booster_jsc.rst index 2ba3562d04..d2789897c9 100644 --- a/docs/source/building/platforms/booster_jsc.rst +++ b/docs/source/building/platforms/booster_jsc.rst @@ -66,6 +66,11 @@ and use it to submit a simulation. Running on CPU -------------- +.. warning:: + The Juwels Booster is a GPU-accelerated supercomputer, and running on CPUs only is strongly discouraged. + This section only illustrates how to efficiently run on CPU with OpenMP threading, which was tested on the Juwels Booster for practical reasons, but should apply to other supercomputers. + In particular, the proposed values of OMP_PROC_BIND and OMP_PLACES give decent performance for both threaded FFTW and particle operations. + Create a file ``profile.hipace`` and ``source`` it whenever you log in and want to work with HiPACE++: .. code-block:: bash