Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add OpenMP for FFTW #541

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
103 changes: 74 additions & 29 deletions cmake/dependencies/FFT.cmake
Original file line number Diff line number Diff line change
@@ -1,3 +1,53 @@
# Helper Functions ############################################################
#
option(HiPACE_FFTW_IGNORE_OMP "Ignore FFTW3 OpenMP support, even if found" OFF)
mark_as_advanced(HiPACE_FFTW_IGNORE_OMP)

# Set the HIPACE_FFTW_OMP=1 define on HiPACE::thirdparty::FFT if TRUE and print
# a message
#
function(fftw_add_define HAS_FFTW_OMP_LIB)
if(HAS_FFTW_OMP_LIB)
message(STATUS "FFTW: Found OpenMP support")
target_compile_definitions(HiPACE::thirdparty::FFT INTERFACE HIPACE_FFTW_OMP=1)
else()
message(STATUS "FFTW: Could NOT find OpenMP support")
Comment on lines +11 to +14
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

lovely!

endif()
endfunction()

# Check if the PkgConfig target location has an _omp library, e.g.,
# libfftw3(f)_omp.a shipped and if yes, set the HIPACE_FFTW_OMP=1 define.
#
function(fftw_check_omp library_paths fftw_precision_suffix)
if(HiPACE_FFTW_IGNORE_OMP)
fftw_add_define(FALSE)
return()
endif()

find_library(HAS_FFTW_OMP_LIB fftw3${fftw_precision_suffix}_omp
PATHS ${library_paths}
NO_DEFAULT_PATH
NO_PACKAGE_ROOT_PATH
NO_CMAKE_PATH
NO_CMAKE_ENVIRONMENT_PATH
NO_SYSTEM_ENVIRONMENT_PATH
NO_CMAKE_SYSTEM_PATH
NO_CMAKE_FIND_ROOT_PATH
)
if(HAS_FFTW_OMP_LIB)
# the .pc files here forget to link the _omp.a/so files
# explicitly - we add those manually to avoid any trouble,
# e.g., in static builds.
target_link_libraries(HiPACE::thirdparty::FFT INTERFACE ${HAS_FFTW_OMP_LIB})
endif()

fftw_add_define("${HAS_FFTW_OMP_LIB}")
endfunction()


# Various FFT implementations that we want to use #############################
#

# cuFFT (CUDA)
# TODO: check if `find_package` search works

Expand Down Expand Up @@ -28,20 +78,18 @@ elseif(NOT HiPACE_COMPUTE STREQUAL CUDA)
endif()
mark_as_advanced(HiPACE_FFTW_SEARCH)

# floating point precision suffixes: float, double and quad precision
if(HiPACE_PRECISION STREQUAL "DOUBLE")
set(HFFTWp "")
else()
set(HFFTWp "f")
endif()

if(HiPACE_FFTW_SEARCH STREQUAL CMAKE)
if(HiPACE_PRECISION STREQUAL "DOUBLE")
find_package(FFTW3 CONFIG REQUIRED)
else()
find_package(FFTW3f CONFIG REQUIRED)
endif()
find_package(FFTW3${HFFTWp} CONFIG REQUIRED)
else()
if(HiPACE_PRECISION STREQUAL "DOUBLE")
find_package(PkgConfig REQUIRED QUIET)
pkg_check_modules(fftw3 REQUIRED IMPORTED_TARGET fftw3)
else()
find_package(PkgConfig REQUIRED QUIET)
pkg_check_modules(fftw3f REQUIRED IMPORTED_TARGET fftw3f)
endif()
find_package(PkgConfig REQUIRED QUIET)
pkg_check_modules(fftw3${HFFTWp} REQUIRED IMPORTED_TARGET fftw3${HFFTWp})
endif()
endif()

Expand All @@ -52,27 +100,24 @@ if(HiPACE_COMPUTE STREQUAL CUDA)
elseif(HiPACE_COMPUTE STREQUAL HIP)
make_third_party_includes_system(roc::rocfft FFT)
else()
if(HiPACE_PRECISION STREQUAL "DOUBLE")
if(FFTW3_FOUND)
# subtargets: fftw3, fftw3_threads, fftw3_omp
if(HiPACE_COMPUTE STREQUAL OMP AND TARGET FFTW3::fftw3_omp)
make_third_party_includes_system(FFTW3::fftw3_omp FFT)
else()
make_third_party_includes_system(FFTW3::fftw3 FFT)
endif()
if(FFTW3_FOUND)
# subtargets: fftw3(p), fftw3(p)_threads, fftw3(p)_omp
if(HiPACE_COMPUTE STREQUAL OMP AND
TARGET FFTW3::fftw3${HFFTWp}_omp AND
NOT HiPACE_FFTW_IGNORE_OMP)
make_third_party_includes_system(FFTW3::fftw3${HFFTWp}_omp FFT)
fftw_add_define(TRUE)
else()
make_third_party_includes_system(PkgConfig::fftw3 FFT)
make_third_party_includes_system(FFTW3::fftw3${HFFTWp} FFT)
fftw_add_define(FALSE)
endif()
else()
if(FFTW3f_FOUND)
# subtargets: fftw3f, fftw3f_threads, fftw3f_omp
if(HiPACE_COMPUTE STREQUAL OMP AND TARGET FFTW3::fftw3f_omp)
make_third_party_includes_system(FFTW3::fftw3f_omp FFT)
else()
make_third_party_includes_system(FFTW3::fftw3f FFT)
endif()
make_third_party_includes_system(PkgConfig::fftw3${HFFTWp} FFT)
if(HiPACE_COMPUTE STREQUAL OMP AND
NOT HiPACE_FFTW_IGNORE_OMP)
fftw_check_omp("${fftw3${HFFTWp}_LIBRARY_DIRS}" "${HFFTWp}")
else()
make_third_party_includes_system(PkgConfig::fftw3f FFT)
fftw_add_define(FALSE)
endif()
endif()
endif()
16 changes: 16 additions & 0 deletions src/fields/fft_poisson_solver/fft/WrapDSTW.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
#include "AnyDST.H"
#include "utils/HipaceProfilerWrapper.H"

#ifdef AMREX_USE_OMP
# include <omp.h>
#endif

namespace AnyDST
{
#ifdef AMREX_USE_FLOAT
Expand All @@ -16,6 +20,18 @@ namespace AnyDST
const int nx = real_size[0];
const int ny = real_size[1];

#if defined(AMREX_USE_OMP) && defined(HIPACE_FFTW_OMP)
if (nx > 32 && ny > 32) {
# ifdef AMREX_USE_FLOAT
fftwf_init_threads();
fftwf_plan_with_nthreads(omp_get_max_threads());
# else
fftw_init_threads();
fftw_plan_with_nthreads(omp_get_max_threads());
Comment on lines +27 to +30
Copy link
Member

@ax3l ax3l Jun 28, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We could also add, just to expose even more control, a runtime parameter that can overwrite the value passed to ..._nthreads() from the inputs file.

The default would be the heuristic you already added (1 of <32**2 cells and omp_get_max_threads() otherwise), but it could add a useful intermediate layer of control in case we want to set the FFT parallelism independent of the rest of the sum that is controlled by OMP_NUM_THREADS.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You are right, this will be an interesting addition. After an offline discussion with @MaxThevenet, I will merge this PR as is and add this feature as soon as we have other openMP acceleration. As it is the only function using openMP, we have currently full control with OMP_NUM_THREADS.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh right, I forgot this is the first OpenMP accelerated part 😅

# endif
}
#endif

// Initialize fft_plan.m_plan with the vendor fft plan.
// Swap dimensions: AMReX FAB are Fortran-order but FFTW is C-order
dst_plan.m_plan = VendorCreatePlanR2R2D(
Expand Down