diff --git a/Makefile b/Makefile index b569fe250..a73b8ddd9 100644 --- a/Makefile +++ b/Makefile @@ -23,10 +23,13 @@ xlf: "LDFLAGS_DEBUG = -O0 -g" \ "FFLAGS_OMP = -qsmp=omp" \ "CFLAGS_OMP = -qsmp=omp" \ + "FFLAGS_ACC =" \ + "CFLAGS_ACC =" \ "CORE = $(CORE)" \ "DEBUG = $(DEBUG)" \ "USE_PAPI = $(USE_PAPI)" \ "OPENMP = $(OPENMP)" \ + "OPENACC = $(OPENACC)" \ "CPPFLAGS = $(MODEL_FORMULATION) -D_MPI" ) ftn: @@ -44,10 +47,13 @@ ftn: "LDFLAGS_OPT = " \ "FFLAGS_OMP = -mp" \ "CFLAGS_OMP = -mp" \ + "FFLAGS_ACC =" \ + "CFLAGS_ACC =" \ "CORE = $(CORE)" \ "DEBUG = $(DEBUG)" \ "USE_PAPI = $(USE_PAPI)" \ "OPENMP = $(OPENMP)" \ + "OPENACC = $(OPENACC)" \ "CPPFLAGS = $(MODEL_FORMULATION) -D_MPI -DUNDERSCORE" ) titan-cray: @@ -62,10 +68,13 @@ titan-cray: "LDFLAGS_OPT = -O3" \ "FFLAGS_OMP = " \ "CFLAGS_OMP = " \ + "FFLAGS_ACC =" \ + "CFLAGS_ACC =" \ "CORE = $(CORE)" \ "DEBUG = $(DEBUG)" \ "USE_PAPI = $(USE_PAPI)" \ "OPENMP = $(OPENMP)" \ + "OPENACC = $(OPENACC)" \ "CPPFLAGS = $(MODEL_FORMULATION) -D_MPI -DUNDERSCORE" ) pgi: @@ -87,10 +96,13 @@ pgi: "LDFLAGS_DEBUG = -O0 -g -Mbounds -Mchkptr -Ktrap=divz,fp,inv,ovf -traceback" \ "FFLAGS_OMP = -mp" \ "CFLAGS_OMP = -mp" \ + "FFLAGS_ACC = -Mnofma -acc -ta=tesla:cc60 -Minfo" \ + "CFLAGS_ACC =" \ "CORE = $(CORE)" \ "DEBUG = $(DEBUG)" \ "USE_PAPI = $(USE_PAPI)" \ "OPENMP = $(OPENMP)" \ + "OPENACC = $(OPENACC)" \ "CPPFLAGS = $(MODEL_FORMULATION) -D_MPI -DUNDERSCORE" ) pgi-nersc: @@ -108,10 +120,13 @@ pgi-nersc: "LDFLAGS_OPT = -O3" \ "FFLAGS_OMP = -mp" \ "CFLAGS_OMP = -mp" \ + "FFLAGS_ACC =" \ + "CFLAGS_ACC =" \ "CORE = $(CORE)" \ "DEBUG = $(DEBUG)" \ "USE_PAPI = $(USE_PAPI)" \ "OPENMP = $(OPENMP)" \ + "OPENACC = $(OPENACC)" \ "CPPFLAGS = $(MODEL_FORMULATION) -D_MPI -DUNDERSCORE" ) pgi-llnl: @@ -129,10 +144,13 @@ pgi-llnl: "LDFLAGS_OPT = " \ "FFLAGS_OMP = -mp" \ "CFLAGS_OMP = -mp" \ + "FFLAGS_ACC =" \ + "CFLAGS_ACC =" \ "CORE = $(CORE)" \ "DEBUG = $(DEBUG)" \ "USE_PAPI = $(USE_PAPI)" \ "OPENMP = $(OPENMP)" \ + "OPENACC = $(OPENACC)" \ "CPPFLAGS = $(MODEL_FORMULATION) -D_MPI -DUNDERSCORE" ) ifort: @@ -154,10 +172,13 @@ ifort: "LDFLAGS_DEBUG = -g -fpe0 -traceback" \ "FFLAGS_OMP = -qopenmp" \ "CFLAGS_OMP = -qopenmp" \ + "FFLAGS_ACC =" \ + "CFLAGS_ACC =" \ "CORE = $(CORE)" \ "DEBUG = $(DEBUG)" \ "USE_PAPI = $(USE_PAPI)" \ "OPENMP = $(OPENMP)" \ + "OPENACC = $(OPENACC)" \ "CPPFLAGS = $(MODEL_FORMULATION) -D_MPI -DUNDERSCORE" ) ifort-scorep: @@ -179,10 +200,13 @@ ifort-scorep: "LDFLAGS_DEBUG = -g -fpe0 -traceback" \ "FFLAGS_OMP = -qopenmp" \ "CFLAGS_OMP = -qopenmp" \ + "FFLAGS_ACC =" \ + "CFLAGS_ACC =" \ "CORE = $(CORE)" \ "DEBUG = $(DEBUG)" \ "USE_PAPI = $(USE_PAPI)" \ "OPENMP = $(OPENMP)" \ + "OPENACC = $(OPENACC)" \ "CPPFLAGS = $(MODEL_FORMULATION) -D_MPI -DUNDERSCORE" ) ifort-gcc: @@ -204,10 +228,13 @@ ifort-gcc: "LDFLAGS_DEBUG = -g -fpe0 -traceback" \ "FFLAGS_OMP = -qopenmp" \ "CFLAGS_OMP = -fopenmp" \ + "FFLAGS_ACC =" \ + "CFLAGS_ACC =" \ "CORE = $(CORE)" \ "DEBUG = $(DEBUG)" \ "USE_PAPI = $(USE_PAPI)" \ "OPENMP = $(OPENMP)" \ + "OPENACC = $(OPENACC)" \ "CPPFLAGS = $(MODEL_FORMULATION) -D_MPI -DUNDERSCORE" ) gfortran: @@ -229,10 +256,13 @@ gfortran: "LDFLAGS_DEBUG = -g -m64" \ "FFLAGS_OMP = -fopenmp" \ "CFLAGS_OMP = -fopenmp" \ + "FFLAGS_ACC =" \ + "CFLAGS_ACC =" \ "CORE = $(CORE)" \ "DEBUG = $(DEBUG)" \ "USE_PAPI = $(USE_PAPI)" \ "OPENMP = $(OPENMP)" \ + "OPENACC = $(OPENACC)" \ "CPPFLAGS = $(MODEL_FORMULATION) -D_MPI -DUNDERSCORE" ) gfortran-clang: @@ -254,10 +284,13 @@ gfortran-clang: "LDFLAGS_DEBUG = -g -m64" \ "FFLAGS_OMP = -fopenmp" \ "CFLAGS_OMP = -fopenmp" \ + "FFLAGS_ACC =" \ + "CFLAGS_ACC =" \ "CORE = $(CORE)" \ "DEBUG = $(DEBUG)" \ "USE_PAPI = $(USE_PAPI)" \ "OPENMP = $(OPENMP)" \ + "OPENACC = $(OPENACC)" \ "CPPFLAGS = $(MODEL_FORMULATION) -D_MPI -DUNDERSCORE" ) g95: @@ -275,10 +308,13 @@ g95: "LDFLAGS_OPT = -O3" \ "FFLAGS_OMP = -fopenmp" \ "CFLAGS_OMP = -fopenmp" \ + "FFLAGS_ACC =" \ + "CFLAGS_ACC =" \ "CORE = $(CORE)" \ "DEBUG = $(DEBUG)" \ "USE_PAPI = $(USE_PAPI)" \ "OPENMP = $(OPENMP)" \ + "OPENACC = $(OPENACC)" \ "CPPFLAGS = $(MODEL_FORMULATION) -D_MPI -DUNDERSCORE" ) pathscale-nersc: @@ -296,10 +332,13 @@ pathscale-nersc: "LDFLAGS_OPT = -O3" \ "FFLAGS_OMP = -mp" \ "CFLAGS_OMP = -mp" \ + "FFLAGS_ACC =" \ + "CFLAGS_ACC =" \ "CORE = $(CORE)" \ "DEBUG = $(DEBUG)" \ "USE_PAPI = $(USE_PAPI)" \ "OPENMP = $(OPENMP)" \ + "OPENACC = $(OPENACC)" \ "CPPFLAGS = $(MODEL_FORMULATION) -D_MPI -DUNDERSCORE" ) cray-nersc: @@ -317,10 +356,13 @@ cray-nersc: "LDFLAGS_OPT = -O3" \ "FFLAGS_OMP = " \ "CFLAGS_OMP = " \ + "FFLAGS_ACC =" \ + "CFLAGS_ACC =" \ "CORE = $(CORE)" \ "DEBUG = $(DEBUG)" \ "USE_PAPI = $(USE_PAPI)" \ "OPENMP = $(OPENMP)" \ + "OPENACC = $(OPENACC)" \ "CPPFLAGS = $(MODEL_FORMULATION) -D_MPI -DUNDERSCORE" ) gnu-nersc: @@ -361,6 +403,8 @@ intel-nersc: "LDFLAGS_OPT = -O3" \ "FFLAGS_OMP = -qopenmp" \ "CFLAGS_OMP = -qopenmp" \ + "FFLAGS_ACC =" \ + "CFLAGS_ACC =" \ "FFLAGS_DEBUG = -real-size 64 -g -convert big_endian -FR -CU -CB -check all -gen-interfaces -warn interfaces -traceback" \ "CFLAGS_DEBUG = -g -traceback" \ "CXXFLAGS_DEBUG = -g -traceback" \ @@ -369,6 +413,7 @@ intel-nersc: "DEBUG = $(DEBUG)" \ "USE_PAPI = $(USE_PAPI)" \ "OPENMP = $(OPENMP)" \ + "OPENACC = $(OPENACC)" \ "CPPFLAGS = $(MODEL_FORMULATION) -D_MPI -DUNDERSCORE" ) bluegene: @@ -390,10 +435,13 @@ bluegene: "LDFLAGS_DEBUG = -O0 -g" \ "FFLAGS_OMP = -qsmp=omp" \ "CFLAGS_OMP = -qsmp=omp" \ + "FFLAGS_ACC =" \ + "CFLAGS_ACC =" \ "CORE = $(CORE)" \ "DEBUG = $(DEBUG)" \ "USE_PAPI = $(USE_PAPI)" \ "OPENMP = $(OPENMP)" \ + "OPENACC = $(OPENACC)" \ "CPPFLAGS = $(MODEL_FORMULATION) -D_MPI" ) CPPINCLUDES = @@ -514,6 +562,13 @@ ifeq "$(OPENMP)" "true" LDFLAGS += $(FFLAGS_OMP) endif #OPENMP IF +ifeq "$(OPENACC)" "true" + FFLAGS += $(FFLAGS_ACC) + CFLAGS += $(CFLAGS_ACC) + override CPPFLAGS += "-DMPAS_OPENACC" + LDFLAGS += $(FFLAGS_ACC) +endif #OPENACC IF + ifeq "$(PRECISION)" "single" CFLAGS += "-DSINGLE_PRECISION" CXXFLAGS += "-DSINGLE_PRECISION" @@ -586,6 +641,12 @@ else OPENMP_MESSAGE="MPAS was built without OpenMP support." endif +ifeq "$(OPENACC)" "true" + OPENACC_MESSAGE="MPAS was built with OpenACC enabled." +else + OPENACC_MESSAGE="MPAS was built without OpenACC support." +endif + ifneq ($(wildcard .mpas_core_*), ) # CHECK FOR BUILT CORE ifneq ($(wildcard .mpas_core_$(CORE)), ) # CHECK FOR SAME CORE AS ATTEMPTED BUILD. @@ -686,6 +747,10 @@ ifeq "$(OPENMP)" "true" @rm -fr conftest.* endif +ifeq "$(OPENACC)" "true" + @echo "Testing compiler for OpenACC support" +endif + mpas_main: compiler_test ifeq "$(AUTOCLEAN)" "true" @@ -723,6 +788,7 @@ endif @echo $(PAPI_MESSAGE) @echo $(TAU_MESSAGE) @echo $(OPENMP_MESSAGE) + @echo $(OPENACC_MESSAGE) ifeq "$(AUTOCLEAN)" "true" @echo $(AUTOCLEAN_MESSAGE) endif @@ -804,6 +870,7 @@ errmsg: @echo " TIMER_LIB=gptl - Uses gptl for the timer interface instead of the native interface" @echo " TIMER_LIB=tau - Uses TAU for the timer interface instead of the native interface" @echo " OPENMP=true - builds and links with OpenMP flags. Default is to not use OpenMP." + @echo " OPENACC=true - builds and links with OpenACC flags. Default is to not use OpenACC." @echo " USE_PIO2=true - links with the PIO 2 library. Default is to use the PIO 1.x library." @echo " PRECISION=single - builds with default single-precision real kind. Default is to use double-precision." @echo "" diff --git a/src/Makefile b/src/Makefile index d5f158404..b0004ba4d 100644 --- a/src/Makefile +++ b/src/Makefile @@ -15,7 +15,7 @@ endif all: mpas mpas: $(AUTOCLEAN_DEPS) externals frame ops dycore drver - $(LINKER) $(LDFLAGS) -o $(EXE_NAME) driver/*.o -L. -ldycore -lops -lframework $(LIBS) -I./external/esmf_time_f90 -L./external/esmf_time_f90 -lesmf_time -r8 -O3 -byteswapio -Mfree -m64 -Mnofma -acc -ta=tesla:cc60 + $(LINKER) $(LDFLAGS) -o $(EXE_NAME) driver/*.o -L. -ldycore -lops -lframework $(LIBS) -I./external/esmf_time_f90 -L./external/esmf_time_f90 -lesmf_time externals: $(AUTOCLEAN_DEPS) ( cd external; $(MAKE) FC="$(FC)" SFC="$(SFC)" CC="$(CC)" SCC="$(SCC)" FFLAGS="$(FFLAGS)" CFLAGS="$(CFLAGS)" CPP="$(CPP)" NETCDF="$(NETCDF)" CORE="$(CORE)" all ) diff --git a/src/core_atmosphere/dynamics/Makefile b/src/core_atmosphere/dynamics/Makefile index 761ca07bc..97785deb4 100644 --- a/src/core_atmosphere/dynamics/Makefile +++ b/src/core_atmosphere/dynamics/Makefile @@ -18,5 +18,5 @@ ifeq "$(GEN_F90)" "true" $(CPP) $(CPPFLAGS) $(PHYSICS) $(CPPINCLUDES) $< > $*.f90 $(FC) $(FFLAGS) -c $*.f90 $(FCINCLUDES) -I.. -I../../framework -I../../operators -I../physics -I../physics/physics_wrf -I../../external/esmf_time_f90 else - $(FC) $(CPPFLAGS) $(PHYSICS) $(FFLAGS) -c $*.F $(CPPINCLUDES) $(FCINCLUDES) -I.. -I../../framework -I../../operators -I../physics -I../physics/physics_wrf -I../../external/esmf_time_f90 -r8 -O3 -byteswapio -Mfree -m64 -Mnofma -acc -ta=tesla:cc60 -Minfo + $(FC) $(CPPFLAGS) $(PHYSICS) $(FFLAGS) -c $*.F $(CPPINCLUDES) $(FCINCLUDES) -I.. -I../../framework -I../../operators -I../physics -I../physics/physics_wrf -I../../external/esmf_time_f90 endif diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F index 42c639d7d..41e5d401a 100644 --- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F +++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F @@ -26,7 +26,11 @@ module atm_time_integration #endif use mpas_atm_iau + +#ifdef MPAS_OPENACC use openacc +#endif + integer :: timerid, secs, u_secs ! Used to store physics tendencies for dynamics variables @@ -211,11 +215,15 @@ subroutine atm_srk3(domain, dt, itimestep) real (kind=RKIND), dimension(:,:), pointer :: gpu_rtheta_pp, gpu_u_1, gpu_u_2, gpu_w_1, gpu_w_2 real (kind=RKIND), dimension(:,:), pointer :: gpu_tend_u, gpu_ru_p, gpu_rw_p, gpu_rho_pp, gpu_pv_edge, gpu_rho_edge real (kind=RKIND), dimension(:,:,:), pointer :: gpu_scalars_1 + +#ifdef MPAS_OPENACC integer :: rrpk_rank, rrpk_devices, rrpk_local_gpu_id rrpk_rank = domain % dminfo % my_proc_id rrpk_devices = acc_get_num_devices(acc_device_nvidia) rrpk_local_gpu_id = mod(rrpk_rank,rrpk_devices) call acc_set_device_num(rrpk_local_gpu_id,acc_device_nvidia) +#endif + ! ! Retrieve configuration options !