From 3d745522e7ce4de2b18f9c8f2ec62109b08e888f Mon Sep 17 00:00:00 2001 From: Youngsung Kim Date: Mon, 1 Apr 2024 14:35:37 -0400 Subject: [PATCH 01/11] Refactor: Migrate Omega from YAKL to Kokkos * The Omega build system builds and links with Kokkos. * Several Kokkos functions are wrapped in the Omega namespace. * Previous YAKL codes in Omega modules have been translated to Kokkos. * All unit tests have also been translated to Kokkos. * The translation has been tested on Frontier GPU (HIP)/CPU (OpenMP), on Perlmutter GPU (CUDA)/CPU (OpenMP), and on Chrysalis CPU (OpenMP). * On CPUs, the default parallelization is set to OpenMP. --- components/omega/CMakeLists.txt | 14 +- components/omega/OmegaBuild.cmake | 426 +++-- components/omega/create_scripts.py | 19 +- components/omega/doc/design/DataTypes.md | 51 +- components/omega/doc/design/Decomp.md | 2 +- components/omega/doc/design/Driver.md | 13 +- components/omega/doc/design/HorzMeshClass.md | 16 +- components/omega/doc/design/IO.md | 14 +- components/omega/doc/design/IOStreams.md | 2 +- components/omega/doc/design/MachEnv.md | 10 +- components/omega/doc/design/Reductions.md | 6 +- .../omega/doc/design/ShallowWaterOmega0.md | 4 +- components/omega/doc/devGuide/CMakeBuild.md | 13 +- components/omega/doc/devGuide/DataTypes.md | 14 +- components/omega/doc/devGuide/Decomp.md | 18 +- components/omega/doc/devGuide/HorzMesh.md | 10 +- components/omega/doc/devGuide/Logging.md | 6 +- components/omega/doc/devGuide/QuickStart.md | 6 +- components/omega/doc/index.md | 2 +- components/omega/doc/userGuide/Logging.md | 2 +- components/omega/external/CMakeLists.txt | 20 +- components/omega/external/README.md | 2 +- components/omega/src/CMakeLists.txt | 44 +- components/omega/src/base/DataTypes.h | 173 +- components/omega/src/base/Decomp.cpp | 112 +- components/omega/src/base/Decomp.h | 38 +- components/omega/src/base/Halo.cpp | 422 +++-- components/omega/src/base/Halo.h | 99 +- components/omega/src/infra/Config.cpp | 1 + components/omega/src/infra/IOField.h | 2 +- components/omega/src/infra/LogFormatters.h | 33 +- components/omega/src/infra/Logging.cpp | 1 + components/omega/src/infra/OmegaKokkos.h | 122 ++ components/omega/src/ocn/HorzMesh.cpp | 123 +- components/omega/src/ocn/HorzMesh.h | 83 +- components/omega/src/ocn/OcnDummy.cpp | 119 +- components/omega/test/CMakeLists.txt | 373 +--- components/omega/test/base/DataTypesTest.cpp | 1335 +++++++-------- components/omega/test/base/DecompTest.cpp | 163 +- components/omega/test/base/HaloTest.cpp | 494 +++--- components/omega/test/base/IOTest.cpp | 1519 +++++++++-------- components/omega/test/infra/IOFieldTest.cpp | 341 ++-- components/omega/test/infra/LoggingTest.cpp | 24 +- .../omega/test/infra/OmegaKokkosTest.cpp | 138 ++ components/omega/test/ocn/HorzMeshTest.cpp | 1018 +++++------ 45 files changed, 3822 insertions(+), 3625 deletions(-) create mode 100644 components/omega/src/infra/OmegaKokkos.h create mode 100644 components/omega/test/infra/OmegaKokkosTest.cpp diff --git a/components/omega/CMakeLists.txt b/components/omega/CMakeLists.txt index bd8ab4cea873..c771a0f9a1d3 100644 --- a/components/omega/CMakeLists.txt +++ b/components/omega/CMakeLists.txt @@ -14,6 +14,9 @@ cmake_policy(SET CMP0007 NEW) # Only interpret if() arguments as variables or keywords when unquoted cmake_policy(SET CMP0054 NEW) +#find_package() uses _ROOT variables. +cmake_policy(SET CMP0074 NEW) + ########################################################### # STEP 1: Setup # # # @@ -23,6 +26,8 @@ cmake_policy(SET CMP0054 NEW) # define variables and macros for Omega build include(${CMAKE_CURRENT_SOURCE_DIR}/OmegaBuild.cmake) +# common pre-processing for standalone and e3sm mode +common() # handle build modes if (NOT DEFINED PROJECT_NAME) @@ -31,14 +36,15 @@ if (NOT DEFINED PROJECT_NAME) cmake_minimum_required(VERSION 3.21) # the minimum version for HIP support # Collect machine and compiler info from CIME - preset() + init_standalone_build() project(${OMEGA_PROJECT_NAME} - LANGUAGES CXX + LANGUAGES C CXX ) set(CMAKE_CXX_STANDARD 17) # used in E3SM set(CMAKE_CXX_STANDARD_REQUIRED ON) + set(LINKER_LANGUAGE C) # needed to support Cray C compiler wrapper # update variables for standalone build setup_standalone_build() @@ -61,11 +67,11 @@ endif() ########################################################### # STEP 2: Update # # # -# update cmake & yakl variables, and adjust # +# update cmake & kokkos variables, and adjust # # build-controlling variables # ########################################################### -# update CMake and YAKL variables from the build-controlling variables +# update CMake and Kokkos variables from the build-controlling variables update_variables() ########################################### diff --git a/components/omega/OmegaBuild.cmake b/components/omega/OmegaBuild.cmake index 7e4c708715aa..d1ef5652e3b9 100644 --- a/components/omega/OmegaBuild.cmake +++ b/components/omega/OmegaBuild.cmake @@ -5,23 +5,24 @@ set(OMEGA_PROJECT_NAME "OmegaOceanModel") set(OMEGA_EXE_NAME "omega.exe") set(OMEGA_LIB_NAME "OmegaLib") +set(OMEGA_SOURCE_DIR ${CMAKE_CURRENT_LIST_DIR}) set(OMEGA_BUILD_MODES "E3SM" "STANDALONE" "NOT_DEFINED") set(OMEGA_BUILD_MODE NOT_DEFINED CACHE STRING "Omega build mode") set_property(CACHE OMEGA_BUILD_MODE PROPERTY STRINGS ${OMEGA_BUILD_MODES}) - -set(OMEGA_SOURCE_DIR ${CMAKE_CURRENT_LIST_DIR}) +set(OMEGA_BUILD_DIR ${CMAKE_CURRENT_BINARY_DIR}) set(OMEGA_DEFAULT_BUILD_TYPE Release) # Debug or Release -set(E3SM_CIME_ROOT ${OMEGA_SOURCE_DIR}/../../cime) -set(E3SM_CIMECONFIG_ROOT ${OMEGA_SOURCE_DIR}/../../cime_config) +set(E3SM_ROOT "${OMEGA_SOURCE_DIR}/../..") +set(E3SM_CIME_ROOT "${E3SM_ROOT}/cime") +set(E3SM_CIMECONFIG_ROOT "${E3SM_ROOT}/cime_config") +set(E3SM_EXTERNALS_ROOT "${E3SM_ROOT}/externals") ########################### # Macros # ########################### -# set build control variables used for both e3sm build and standalone build -macro(setup_common_variables) +macro(common) option(OMEGA_DEBUG "Turn on error message throwing (default OFF)." OFF) @@ -33,25 +34,29 @@ macro(setup_common_variables) set(OMEGA_LINK_OPTIONS "") endif() -endmacro() + if(NOT Python_FOUND) + find_package (Python COMPONENTS Interpreter) -# Collect machine and compiler info from CIME -macro(preset) + if(NOT Python_FOUND) + message(FATAL_ERROR "Python is not available, CMake will exit." ) + endif() + endif() - find_package (Python COMPONENTS Interpreter) +endmacro() - if(NOT Python_FOUND) - message(FATAL_ERROR "Python is not available, CMake will exit." ) - endif() +# Collect machine and compiler info from CIME +# and detect OMEGA_ARCH and compilers +macro(init_standalone_build) - set(_TMP_CMAKE_FILE ${CMAKE_CURRENT_BINARY_DIR}/_Omega.cmake) + # update CMake configuration with CIME configuration + set(_TMP_CMAKE_FILE ${OMEGA_BUILD_DIR}/_Omega.cmake) set(_PY_OPTS "-p;${E3SM_CIME_ROOT};-o;${_TMP_CMAKE_FILE}") - if(DEFINED OMEGA_CIME_COMPILER) + if(OMEGA_CIME_COMPILER) list(APPEND _PY_OPTS "-c" "${OMEGA_CIME_COMPILER}") endif() - if(DEFINED OMEGA_CIME_MACHINE) + if(OMEGA_CIME_MACHINE) list(APPEND _PY_OPTS "-m" "${OMEGA_CIME_MACHINE}") endif() @@ -77,61 +82,261 @@ macro(preset) endif() include(${_TMP_CMAKE_FILE}) - if(OMEGA_BUILD_TYPE STREQUAL "Release") + + if(NOT OMEGA_BUILD_TYPE STREQUAL "Debug") file(REMOVE ${_TMP_CMAKE_FILE}) endif() + # find compilers if(OMEGA_C_COMPILER) find_program(_OMEGA_C_COMPILER ${OMEGA_C_COMPILER}) + + elseif(MPILIB STREQUAL "mpi-serial") + find_program(_OMEGA_C_COMPILER ${SCC}) + else() - if (MPILIB STREQUAL "mpi-serial") - find_program(_OMEGA_C_COMPILER ${SCC}) - else() - find_program(_OMEGA_C_COMPILER ${MPICC}) - endif() + find_program(_OMEGA_C_COMPILER ${MPICC}) + endif() + + if(_OMEGA_C_COMPILER) + set(OMEGA_C_COMPILER ${_OMEGA_C_COMPILER}) + + else() + message(FATAL_ERROR "C compiler, '${OMEGA_C_COMPILER}', is not found." ) endif() if(OMEGA_CXX_COMPILER) find_program(_OMEGA_CXX_COMPILER ${OMEGA_CXX_COMPILER}) + + elseif(MPILIB STREQUAL "mpi-serial") + find_program(_OMEGA_CXX_COMPILER ${SCXX}) + else() - if (MPILIB STREQUAL "mpi-serial") - find_program(_OMEGA_CXX_COMPILER ${SCXX}) - else() - find_program(_OMEGA_CXX_COMPILER ${MPICXX}) - endif() + find_program(_OMEGA_CXX_COMPILER ${MPICXX}) + endif() + + if(_OMEGA_CXX_COMPILER) + set(OMEGA_CXX_COMPILER ${_OMEGA_CXX_COMPILER}) + + else() + message(FATAL_ERROR "C++ compiler, '${OMEGA_CXX_COMPILER}', is not found." ) endif() if(OMEGA_Fortran_COMPILER) find_program(_OMEGA_Fortran_COMPILER ${OMEGA_Fortran_COMPILER}) + + elseif(MPILIB STREQUAL "mpi-serial") + find_program(_OMEGA_Fortran_COMPILER ${SFC}) + else() - if (MPILIB STREQUAL "mpi-serial") - find_program(_OMEGA_Fortran_COMPILER ${SFC}) + find_program(_OMEGA_Fortran_COMPILER ${MPIFC}) + endif() + + if(_OMEGA_Fortran_COMPILER) + set(OMEGA_Fortran_COMPILER ${_OMEGA_Fortran_COMPILER}) + + else() + message(FATAL_ERROR "Fortran compiler, '${OMEGA_Fortran_COMPILER}', is not found." ) + endif() + + message(STATUS "OMEGA_C_COMPILER = ${OMEGA_C_COMPILER}") + message(STATUS "OMEGA_CXX_COMPILER = ${OMEGA_CXX_COMPILER}") + message(STATUS "OMEGA_Fortran_COMPILER = ${OMEGA_Fortran_COMPILER}") + + # detect OMEGA_ARCH if not provided + if(NOT OMEGA_ARCH) + + if(USE_CUDA) + set(OMEGA_ARCH "CUDA") + + elseif(USE_HIP) + set(OMEGA_ARCH "HIP") + else() - find_program(_OMEGA_Fortran_COMPILER ${MPIFC}) + + execute_process( + COMMAND ${OMEGA_CXX_COMPILER} --version + RESULT_VARIABLE _CXX_VER_RESULT + OUTPUT_VARIABLE _CXX_VER_OUTPUT) + + if (_CXX_VER_RESULT EQUAL 0) + + string(REGEX MATCH "HIP|hip" _HIP_CHECK "${_CXX_VER_OUTPUT}") + string(REGEX MATCH "AMD|amd" _AMD_CHECK "${_CXX_VER_OUTPUT}") + string(REGEX MATCH "NVCC|nvcc" _NVCC_CHECK "${_CXX_VER_OUTPUT}") + string(REGEX MATCH "NVIDIA|nvidia" _NVIDIA_CHECK "${_CXX_VER_OUTPUT}") + + if(_HIP_CHECK AND _AMD_CHECK) + set(OMEGA_ARCH "HIP") + + elseif(_NVCC_CHECK AND _NVIDIA_CHECK) + set(OMEGA_ARCH "CUDA") + + else() + set(OMEGA_ARCH "OPENMP") + + endif() + else() + set(OMEGA_ARCH "OPENMP") + + endif() endif() endif() - set(OMEGA_C_COMPILER ${_OMEGA_C_COMPILER}) - set(CMAKE_C_COMPILER ${OMEGA_C_COMPILER}) + message(STATUS "OMEGA_ARCH = ${OMEGA_ARCH}") - set(OMEGA_CXX_COMPILER ${_OMEGA_CXX_COMPILER}) - set(CMAKE_CXX_COMPILER ${OMEGA_CXX_COMPILER}) + # create a profile script + set(_ProfileScript ${OMEGA_BUILD_DIR}/omega_profile.sh) + file(WRITE ${_ProfileScript} "#!/usr/bin/env bash\n\n") + file(APPEND ${_ProfileScript} "source ./omega_env.sh\n\n") + file(APPEND ${_ProfileScript} "# modify 'OUTFILE' with a path in that the profiler can\n") + file(APPEND ${_ProfileScript} "# create files such as a path in a scratch file system.\n") - set(OMEGA_Fortran_COMPILER ${_OMEGA_Fortran_COMPILER}) + # set C and Fortran compilers *before* calling CMake project() + set(CMAKE_C_COMPILER ${OMEGA_C_COMPILER}) set(CMAKE_Fortran_COMPILER ${OMEGA_Fortran_COMPILER}) - message(STATUS "OMEGA_C_COMPILER = ${OMEGA_C_COMPILER}") - message(STATUS "OMEGA_CXX_COMPILER = ${OMEGA_CXX_COMPILER}") - message(STATUS "OMEGA_Fortran_COMPILER = ${OMEGA_Fortran_COMPILER}") - message(STATUS "MPI_EXEC = ${MPI_EXEC}") +# TODO: do we want to use these variables? +# # Set compiler and linker flags +# if (CXXFLAGS) +# separate_arguments(_CXXFLAGS NATIVE_COMMAND ${CXXFLAGS}) +# list(APPEND OMEGA_CXX_FLAGS ${_CXXFLAGS}) +# endif() +# +# if (LDFLAGS) +# separate_arguments(_LDFLAGS NATIVE_COMMAND ${LDFLAGS}) +# list(APPEND OMEGA_LINK_OPTIONS ${_LDFLAGS}) +# endif() +# +# if (SLIBS) +# separate_arguments(_SLIBS NATIVE_COMMAND ${SLIBS}) +# list(APPEND OMEGA_LINK_OPTIONS ${_SLIBS}) +# endif() + + # set CXX compiler *before* calling CMake project() + if(OMEGA_ARCH STREQUAL "CUDA") + + if(NOT OMEGA_CUDA_COMPILER) + find_program(OMEGA_CUDA_COMPILER + "nvcc_wrapper" + PATHS "${OMEGA_SOURCE_DIR}/../../externals/ekat/extern/kokkos/bin" + ) + endif() + + if(OMEGA_CUDA_COMPILER) + message(STATUS "OMEGA_CUDA_COMPILER = ${OMEGA_CUDA_COMPILER}") + + else() + message(FATAL_ERROR "Cuda compiler is not found." ) + endif() + + set(CMAKE_CXX_COMPILER ${OMEGA_CUDA_COMPILER}) + set(CMAKE_CUDA_HOST_COMPILER ${OMEGA_CXX_COMPILER}) + + # overwrite CMAKE_CXX_FLAGS and CMAKE_EXE_LINKER_FLAGS defined in + # cime configuration because those could break CUDA build + if(OMEGA_CXX_FLAGS) + set(CMAKE_CXX_FLAGS ${OMEGA_CXX_FLAGS}) + + else() + set(CMAKE_CXX_FLAGS "") + endif() + + if(OMEGA_CUDA_FLAGS) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OMEGA_CUDA_FLAGS}") + endif() + + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ccbin ${CMAKE_CUDA_HOST_COMPILER}") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-deprecated-gpu-targets") + + if(OMEGA_EXE_LINKER_FLAGS) + set(CMAKE_EXE_LINKER_FLAGS ${OMEGA_EXE_LINKER_FLAGS}) + + else() + set(CMAKE_EXE_LINKER_FLAGS "") + endif() + + message(STATUS "CMAKE_CUDA_HOST_COMPILER = ${CMAKE_CUDA_HOST_COMPILER}") + + file(APPEND ${_ProfileScript} "OUTFILE=${OMEGA_BUILD_DIR}/nsys_output\n\n") + file(APPEND ${_ProfileScript} "# load Nsight Systems Profiler\n") + file(APPEND ${_ProfileScript} "module load Nsight-Systems\n\n") + file(APPEND ${_ProfileScript} "nsys profile -o \$OUTFILE \\\n") + file(APPEND ${_ProfileScript} " --cuda-memory-usage=true --force-overwrite=true \\\n") + file(APPEND ${_ProfileScript} " --trace=cuda,nvtx,osrt \\\n") + file(APPEND ${_ProfileScript} " ./src/omega.exe 1000") + + elseif(OMEGA_ARCH STREQUAL "HIP") + + if(NOT OMEGA_HIP_COMPILER) + find_program(OMEGA_HIP_COMPILER "hipcc") + endif() + + if(OMEGA_HIP_COMPILER) + message(STATUS "OMEGA_HIP_COMPILER = ${OMEGA_HIP_COMPILER}") + + else() + message(FATAL_ERROR "hipcc is not found." ) + endif() + + set(CMAKE_HIP_COMPILER ${OMEGA_HIP_COMPILER}) + set(CMAKE_CXX_COMPILER ${OMEGA_CXX_COMPILER}) + + if(OMEGA_CXX_FLAGS) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OMEGA_CXX_FLAGS}") + endif() + + if(OMEGA_HIP_FLAGS) + set(CMAKE_HIP_FLAGS "${CMAKE_HIP_FLAGS} ${OMEGA_HIP_FLAGS}") + endif() + + if(${MPILIB_NAME} STREQUAL "mpich") + if(NOT $ENV{MPICH_CXX}) + set(ENV{MPICH_CXX} ${OMEGA_HIP_COMPILER}) + endif() + + elseif(${MPILIB_NAME} STREQUAL "openmpi") + if(NOT $ENV{OMPI_CXX}) + set(ENV{OMPI_CXX} ${OMEGA_HIP_COMPILER}) + endif() + + else() + message(FATAL_ERROR "'$ENV{MPILIB_NAME}' is not supported yet.") + + endif() + + file(APPEND ${_ProfileScript} "OUTFILE=${OMEGA_BUILD_DIR}/rocprof_output.csv\n") + file(APPEND ${_ProfileScript} "rocprof --hip-trace --hsa-trace --timestamp on \\\n") + file(APPEND ${_ProfileScript} " -o \$OUTFILE ./src/omega.exe 1000") + + elseif(OMEGA_ARCH STREQUAL "SYCL") + set(CMAKE_CXX_COMPILER ${OMEGA_SYCL_COMPILER}) + + if(OMEGA_SYCL_FLAGS) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OMEGA_SYCL_FLAGS}") + endif() + + else() + set(CMAKE_CXX_COMPILER ${OMEGA_CXX_COMPILER}) + + endif() + + execute_process(COMMAND chmod +x ${_ProfileScript}) + + if(KOKKOS_OPTIONS) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${KOKKOS_OPTIONS}") + unset(KOKKOS_OPTIONS) + endif() + + message(STATUS "CMAKE_CXX_COMPILER = ${CMAKE_CXX_COMPILER}") + message(STATUS "CMAKE_CXX_FLAGS = ${CMAKE_CXX_FLAGS}") + message(STATUS "CMAKE_EXE_LINKER_FLAGS = ${CMAKE_EXE_LINKER_FLAGS}") endmacro() # set build-control-variables for standalone build macro(setup_standalone_build) - setup_common_variables() - if(NOT DEFINED OMEGA_BUILD_TYPE) set(OMEGA_BUILD_TYPE ${OMEGA_DEFAULT_BUILD_TYPE}) endif() @@ -142,7 +347,6 @@ macro(setup_standalone_build) EXISTS ${OMEGA_SOURCE_DIR}/../../externals) set(E3SM_SOURCE_DIR ${OMEGA_SOURCE_DIR}/../../components) - set(E3SM_EXTERNALS_ROOT ${OMEGA_SOURCE_DIR}/../../externals) else() # so far, we assume that Omega exists inside of E3SM. @@ -158,46 +362,49 @@ endmacro() # set build-control-variables for e3sm build macro(setup_e3sm_build) - setup_common_variables() - set(OMEGA_BUILD_TYPE ${E3SM_DEFAULT_BUILD_TYPE}) - set(E3SM_EXTERNALS_ROOT ${E3SM_SOURCE_DIR}/../externals) set(OMEGA_CXX_COMPILER ${CMAKE_CXX_COMPILER}) #TODO: set OMEGA_ARCH according to E3SM variables - set(OMEGA_ARCH "NOT_DEFINED") + set(OMEGA_ARCH "") set(OMEGA_BUILD_MODE "E3SM") message(STATUS "OMEGA_CXX_COMPILER = ${OMEGA_CXX_COMPILER}") endmacro() - -################################ -# Set cmake and YAKL variables # -################################ +################################## +# Set Cmake and Kokkos variables # +################################## macro(update_variables) # Set the build type set(CMAKE_BUILD_TYPE ${OMEGA_BUILD_TYPE}) - # Set compiler and linker flags - if (CXXFLAGS) - separate_arguments(_CXXFLAGS NATIVE_COMMAND ${CXXFLAGS}) - list(APPEND OMEGA_CXX_FLAGS ${_CXXFLAGS}) + if(OMEGA_BUILD_TYPE STREQUAL "Debug") + set(OMEGA_DEBUG ON) + endif() + + if(OMEGA_DEBUG) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DOMEGA_DEBUG") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DOMEGA_DEBUG") + set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -DOMEGA_DEBUG") endif() - if (LDFLAGS) - separate_arguments(_LDFLAGS NATIVE_COMMAND ${LDFLAGS}) - list(APPEND OMEGA_LINK_OPTIONS ${_LDFLAGS}) + if(OMEGA_MEMORY_LAYOUT) + string(TOUPPER "${OMEGA_MEMORY_LAYOUT}" _LAYOUT) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DOMEGA_LAYOUT_${_LAYOUT}") + else() + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DOMEGA_LAYOUT_RIGHT") endif() - if (SLIBS) - separate_arguments(_SLIBS NATIVE_COMMAND ${SLIBS}) - list(APPEND OMEGA_LINK_OPTIONS ${_SLIBS}) + if(OMEGA_TILE_LENGTH) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DOMEGA_TILE_LENGTH=${OMEGA_TILE_LENGTH}") endif() + message(STATUS "OMEGA_LINK_OPTIONS = ${OMEGA_LINK_OPTIONS}") + # check if MPI is supported string(CONCAT _TestMPISource "#include \"mpi.h\"\n" @@ -214,7 +421,7 @@ macro(update_variables) OUTPUT_VARIABLE _MPI_TEST_OUTPUT ERROR_VARIABLE _MPI_TEST_ERROR) - if(OMEGA_BUILD_TYPE EQUAL Release) + if(NOT OMEGA_DEBUG) file(REMOVE ${_TestMPISrcFile}) file(REMOVE ${_TestMPIObjFile}) endif() @@ -222,11 +429,10 @@ macro(update_variables) if (NOT _MPI_TEST_RESULT EQUAL 0) if (_MPI_TEST_RESULT MATCHES "^[-]?[0-9]+$") find_package(MPI) + if(MPI_CXX_FOUND) - list(APPEND OMEGA_CXX_FLAGS "-I${MPI_CXX_INCLUDE_DIRS}") - list(APPEND OMEGA_LINK_OPTIONS - "-L${MPI_CXX_INCLUDE_DIRS}/../lib" "-lmpi" - ) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -I${MPI_CXX_INCLUDE_DIRS}") + else() message(FATAL_ERROR "MPI is not found" ) endif() @@ -235,87 +441,37 @@ macro(update_variables) endif() endif() - message(STATUS "OMEGA_CXX_FLAGS = ${OMEGA_CXX_FLAGS}") - message(STATUS "OMEGA_LINK_OPTIONS = ${OMEGA_LINK_OPTIONS}") - if(OMEGA_INSTALL_PREFIX) set(CMAKE_INSTALL_PREFIX ${OMEGA_INSTALL_PREFIX}) endif() - # Check if CUDA or HIP is supported - if((NOT DEFINED OMEGA_ARCH) OR (OMEGA_ARCH STREQUAL "NOT_DEFINED")) - - if(USE_CUDA) - set(OMEGA_ARCH "CUDA") - - elseif(USE_HIP) - set(OMEGA_ARCH "HIP") - - else() - - execute_process( - COMMAND ${OMEGA_CXX_COMPILER} --version - RESULT_VARIABLE _CXX_VER_RESULT - OUTPUT_VARIABLE _CXX_VER_OUTPUT) - - if (_CXX_VER_RESULT EQUAL 0) - - string(REGEX MATCH "HIP|hip" _HIP_CHECK "${_CXX_VER_OUTPUT}") - string(REGEX MATCH "AMD|amd" _AMD_CHECK "${_CXX_VER_OUTPUT}") - string(REGEX MATCH "NVCC|nvcc" _NVCC_CHECK "${_CXX_VER_OUTPUT}") - string(REGEX MATCH "NVIDIA|nvidia" _NVIDIA_CHECK "${_CXX_VER_OUTPUT}") - - if(_HIP_CHECK AND _AMD_CHECK) - set(OMEGA_ARCH "HIP") - - elseif(_NVCC_CHECK AND _NVIDIA_CHECK) - set(OMEGA_ARCH "CUDA") - - else() - set(OMEGA_ARCH "") - - endif() - else() - set(OMEGA_ARCH "") - - endif() - endif() - endif() - - if(OMEGA_BUILD_TYPE STREQUAL "Debug") - message(STATUS "OMEGA_ARCH = ${OMEGA_ARCH}") - endif() - if(OMEGA_ARCH STREQUAL "CUDA") - set(CMAKE_CUDA_HOST_COMPILER ${CMAKE_CXX_COMPILER}) - find_program(CMAKE_CUDA_COMPILER "nvcc") - - if(OMEGA_BUILD_TYPE STREQUAL "Debug") - message(STATUS "CMAKE_CUDA_COMPILER = ${CMAKE_CUDA_COMPILER}") - message(STATUS "CMAKE_CUDA_HOST_COMPILER = ${CMAKE_CUDA_HOST_COMPILER}") - endif() + option(Kokkos_ENABLE_CUDA "" ON) + option(Kokkos_ENABLE_CUDA_LAMBDA "" ON) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DOMEGA_TARGET_DEVICE") elseif(OMEGA_ARCH STREQUAL "HIP") - set(CMAKE_HIP_HOST_COMPILER ${CMAKE_CXX_COMPILER}) - find_program(CMAKE_HIP_COMPILER "hipcc") - - if(OMEGA_BUILD_TYPE STREQUAL "Debug") - message(STATUS "CMAKE_HIP_COMPILER = ${CMAKE_HIP_COMPILER}") - message(STATUS "CMAKE_HIP_HOST_COMPILER = ${CMAKE_HIP_HOST_COMPILER}") - endif() + option(Kokkos_ENABLE_HIP "" ON) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DOMEGA_TARGET_DEVICE") - endif() + elseif(OMEGA_ARCH STREQUAL "SYCL") + option(Kokkos_ENABLE_SYCL "" ON) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DOMEGA_TARGET_DEVICE") - set(YAKL_ARCH "${OMEGA_ARCH}") + elseif(OMEGA_ARCH STREQUAL "OPENMP") + option(Kokkos_ENABLE_OPENMP "" ON) - if(YAKL_ARCH) + elseif(OMEGA_ARCH STREQUAL "THREADS") + option(Kokkos_ENABLE_THREADS "" ON) - if(OMEGA_${YAKL_ARCH}_FLAGS) - set(YAKL_${YAKL_ARCH}_FLAGS ${OMEGA_${YAKL_ARCH}_FLAGS}) - endif() + else() + set(OMEGA_ARCH "SERIAL") + option(Kokkos_ENABLE_SERIAL "" ON) endif() + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DOMEGA_ENABLE_${OMEGA_ARCH}") + # Include the findParmetis script list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}") find_package(Parmetis REQUIRED) @@ -350,9 +506,9 @@ macro(check_setup) endif() - if (NOT DEFINED YAKL_ARCH) - message(FATAL_ERROR "YAKL_ARCH is not defined.") - endif() +# if (NOT DEFINED YAKL_ARCH) +# message(FATAL_ERROR "YAKL_ARCH is not defined.") +# endif() endmacro() diff --git a/components/omega/create_scripts.py b/components/omega/create_scripts.py index bf34f6293948..c953fd7e35aa 100644 --- a/components/omega/create_scripts.py +++ b/components/omega/create_scripts.py @@ -82,7 +82,7 @@ def __init__(self, cimepath, outpath, machine, compiler, debug): self.machos = self.get_value("OS") self.mpilibs = self.get_value("MPILIBS").split(",") - # modifed based on generic_xml.py in CIME + # modified based on generic_xml.py in CIME def get_processed_value(self, raw_value, outvar): reference_re = re.compile(r"\${?(\w+)}?") @@ -327,6 +327,7 @@ def write_output(self, outvar, exclude_envs): f.write(f"set(OS {self.machos})\n") f.write(f"set(COMPILER {self.compiler})\n") f.write(f"set(MPI_EXEC {self.mpiexec})\n") + f.write(f"set(MPILIB_NAME {self.mpilib})\n") f.write(f"set(CASEROOT {self.machpath})\n") f.write(f"include({self.macrospath})\n") @@ -353,24 +354,32 @@ def generate_scripts(self, outvar): for key, value in self.__OMEGA_SCRIPT_EXPORTS__.items(): f.write(f"export {key}=\"{value}\"\n") + if "OMP_NUM_THREADS" not in self.__OMEGA_SCRIPT_EXPORTS__: + f.write(f"export OMP_NUM_THREADS=\"1\"\n") + with open(omega_build, "w") as f: f.write("#!/usr/bin/env bash\n\n") f.write("source ./omega_env.sh\n") - nthreads_build = self.get_value("GMAKE_J") - f.write(f"make -j{nthreads_build}\n") + + if self.debug == "TRUE": + f.write("make\n") + + else: + nthreads_build = self.get_value("GMAKE_J") + f.write(f"make -j{nthreads_build}\n") with open(omega_run, "w") as f: f.write("#!/usr/bin/env bash\n\n") f.write("source ./omega_env.sh\n") - f.write("./src/omega.exe\n") + f.write("./src/omega.exe 1000\n") with open(omega_ctest, "w") as f: f.write("#!/usr/bin/env bash\n\n") f.write("source ./omega_env.sh\n") - f.write("ctest $* # --rerun-failed --output-on-failure\n") + f.write("ctest --output-on-failure $* # --rerun-failed \n") st = os.stat(omega_env) os.chmod(omega_env, st.st_mode | stat.S_IEXEC) diff --git a/components/omega/doc/design/DataTypes.md b/components/omega/doc/design/DataTypes.md index ede9c17cab9f..40221507c0ec 100644 --- a/components/omega/doc/design/DataTypes.md +++ b/components/omega/doc/design/DataTypes.md @@ -4,7 +4,7 @@ ## 1 Overview Data type aliases are a useful means for managing precision within OMEGA and -to improve readability for for YAKL array data types. This header file +to improve readability for for Kokkos array data types. This header file defines a number of aliases to common data types used throughout OMEGA. @@ -20,7 +20,7 @@ types to enforce precision where needed. ### 2.2 Desired: Readability for array types -OMEGA will be using YAKL array data types which have a long syntax. For +OMEGA will be using Kokkos array data types which have a long syntax. For readability, these should be aliased to a shorter, intuitive data type. ### 2.3 Desired: Fixed width integer types @@ -50,7 +50,7 @@ require reproducible algorithms in the future. Currently, the data types can be defined in a single header file that sets up aliases. Because the main data types are simply aliases to the standard data types, interoperability (Req 2.4) should not be an issue. For the -YAKL arrays, YAKL provides means for interoperability with both other +Kokkos arrays, Kokkos provides means for interoperability with both other languages and other frameworks and will be incorporated into any interfaces with other codes. @@ -67,7 +67,7 @@ Otherwise, the default real will be double precision. The data types will be defined using type aliases within a single header file DataTypes.h We will use the "using" syntax rather than -the older typedef. For YAKL arrays, we require both device arrays (default) +the older typedef. For Kokkos arrays, we require both device arrays (default) and host array types and will use C-ordering. ```c++ @@ -82,22 +82,27 @@ using Real = float; using Real = double; #endif -// Aliases for YAKL arrays - by default on device and in +// Aliases for Kokkos arrays - by default on device and in // C-ordering. -using Array1DI4 = YAKL::Array -using Array1DI8 = YAKL::Array -using Array1DR4 = YAKL::Array -using Array1DR8 = YAKL::Array -using Array1DReal = YAKL::Array -using Array2DI4 = YAKL::Array -using Array2DI8 = YAKL::Array -using Array2DR4 = YAKL::Array -using Array2DR8 = YAKL::Array -using Array2DReal = YAKL::Array -// continue this pattern for higher-dimensional arrays -// Also need similar aliases for arrays on the host -using ArrayHost1DI4 = YAKL::Array -// replicated as above for each type, dimension +#define MAKE_OMEGA_VIEW_DIMS(N, V, T, ML, MS) \ + using N##1D##T = Kokkos::V; \ + using N##2D##T = Kokkos::V; \ + using N##3D##T = Kokkos::V; \ + using N##4D##T = Kokkos::V; \ + using N##5D##T = Kokkos::V; + +#define MAKE_OMEGA_VIEW_TYPES(N, V, ML, MS) \ + MAKE_OMEGA_VIEW_DIMS(N, V, I4, ML, MS) \ + MAKE_OMEGA_VIEW_DIMS(N, V, I8, ML, MS) \ + MAKE_OMEGA_VIEW_DIMS(N, V, R4, ML, MS) \ + MAKE_OMEGA_VIEW_DIMS(N, V, R8, ML, MS) \ + MAKE_OMEGA_VIEW_DIMS(N, V, Real, ML, MS) + +// Aliases for Kokkos device arrays of various dimensions and types +MAKE_OMEGA_VIEW_TYPES(Array, View, MemLayout, MemSpace) + +// Aliases for Kokkos host arrays of various dimensions and types +MAKE_OMEGA_VIEW_TYPES(HostArray, View, HostMemLayout, HostMemSpace) ``` ### 4.2 Methods @@ -122,10 +127,10 @@ Build test code with and without -D SINGLE_PRECISION and verify size is as expected using sizeof * tests requirement 2.1 -### 5.3 YAKL array test +### 5.3 Kokkos array test -Create YAKL arrays of each type on device. Create host arrays to -mirror each. Initialize YAKL arrays on device and copy to host -using YAKL. Create a non-YAKL array on host initialized the same +Create Kokkos arrays of each type on device. Create host arrays to +mirror each. Initialize Kokkos arrays on device and copy to host +using Kokkos. Create a non-Kokkos array on host initialized the same way and compare values from each on the host. * tests requirement 2.2 diff --git a/components/omega/doc/design/Decomp.md b/components/omega/doc/design/Decomp.md index 0ccb9bd3b5fc..546de2312580 100644 --- a/components/omega/doc/design/Decomp.md +++ b/components/omega/doc/design/Decomp.md @@ -133,7 +133,7 @@ before the parallel IO is set up. Once the primary (cell) mesh is partitioned, the edge and vertex index spaces will be assigned based on the adjacency to the local cells, as in the current MPAS model. Because much of this information -will be later replicated in a mesh class with YAKL arrays, this +will be later replicated in a mesh class with Kokkos arrays, this decomposition structure will be destroyed after initialization and any related setup of halos and other infrastructure. diff --git a/components/omega/doc/design/Driver.md b/components/omega/doc/design/Driver.md index 58c51c36afcb..faf378b4d0b0 100644 --- a/components/omega/doc/design/Driver.md +++ b/components/omega/doc/design/Driver.md @@ -38,7 +38,7 @@ must be stored internally within OMEGA as static variables. ### 2.4 Requirement: Managing environments -Initializing and exiting environments like MPI and YAKL must +Initializing and exiting environments like MPI and Kokkos must take place at the driver level (standalone driver or coupled model driver) as these environments are shared by other components. The finalize method described below must clean up @@ -58,7 +58,7 @@ is typically the fastest forcing data interval. The finalize method must provide a graceful exit, checkpointing as needed and cleaning up all memory. It must not, however, -exit the MPI or other shared environments (eg YAKL) per +exit the MPI or other shared environments (eg Kokkos) per requirement 2.4 ### 2.7 Requirement: Init method @@ -194,7 +194,8 @@ something like the code below (details subject to change during implementation). int main(int argc, char **argv) { MPI_Init(); // initialize MPI - yakl::init(); // initialize YAKL + Kokkos::init(); // initialize Kokkos + { OMEGA::State CurrState; OMEGA::TimeInstant CurrTime; @@ -229,9 +230,9 @@ int main(int argc, char **argv) { LOG_ERROR("OMEGA terminating due to error"); } - // Exit various environments - yakl::finalize(); - MPI::Finalize(); + } + Kokkos::finalize(); // Exit Kokkos + MPI::Finalize(); // Exit MPI return ErrAll; } diff --git a/components/omega/doc/design/HorzMeshClass.md b/components/omega/doc/design/HorzMeshClass.md index 692853082bf2..9b983bba4b75 100644 --- a/components/omega/doc/design/HorzMeshClass.md +++ b/components/omega/doc/design/HorzMeshClass.md @@ -3,7 +3,7 @@ ## 1 Overview -The mesh class will contain the YAKL arrays which describe the horizontal mesh and are used in the computation of the tendency terms in the discrete governing equations. OMEGA will separate the horizontal mesh variables from the vertical mesh information. +The mesh class will contain the Kokkos arrays which describe the horizontal mesh and are used in the computation of the tendency terms in the discrete governing equations. OMEGA will separate the horizontal mesh variables from the vertical mesh information. ## 2 Requirements @@ -15,12 +15,12 @@ The OMEGA mesh information should be compatible with the [MPAS Mesh Specificatio Not all mesh information is required in computing the tendency terms on the device, e.g. lonCell, latCell, etc. However, other arrays will need to be allocated and copied to the device for use in tendency computation. -The mesh class will explicitly include host and device YAKL arrays for each variable. +The mesh class will explicitly include host and device Kokkos arrays for each variable. A class method will be included to copy the relevant mesh information to the device. ### 2.3 Requirement: Zero-based cell, edge, and vertex numbering -Although the existing MPAS Mesh spec uses a one-based mesh numbering, zero-based mesh numbering is required to be compatible with the zero-based indexing used for YAKL arrays. +Although the existing MPAS Mesh spec uses a one-based mesh numbering, zero-based mesh numbering is required to be compatible with the zero-based indexing used for Kokkos arrays. ### 2.4 Requirement: Work with Decomp class to decompose mesh @@ -28,7 +28,7 @@ The mesh class will reference the partitioned connectivity arrays created by the ### 2.5 Requirement: Mesh variables will be associated with metadata to describe data -Following the Metadata and IO designs, the YAKL arrays for the mesh variables will be associated with information about the represented values. +Following the Metadata and IO designs, the Kokkos arrays for the mesh variables will be associated with information about the represented values. ### 2.6 Requirement: I/O to obtain mesh data @@ -67,7 +67,7 @@ The algorithms required for computing dependent mesh quantities are currently im ``` #### 4.1.2 Class/structs/data types -The horizontal mesh information will be organized in a class with public YAKL arrays. +The horizontal mesh information will be organized in a class with public Kokkos arrays. Arrays that require a device copy will have a explicit variable. Connectivity arrays that are already contained in the Decomp class will be replicated in the horizontal mesh class via pointers. ```c++ @@ -76,10 +76,10 @@ class HorzMesh { public: Array1DR8 AreaCell; - ArrayHost1DR8 AreaCellH; + HostArray1DR8 AreaCellH; Array2DI4 CellsOnCell; - ArrayHost2DI4 CellsOnCellH; + HostArray2DI4 CellsOnCellH; } ``` @@ -113,7 +113,7 @@ The compute method will be a private method called by the constructor. It will b This method will be repsonsible for creating the device copies of the required mesh information on the host. It will be a private method called by the constructor. ```c++ -AreaCell = AreaCellH.createDeviceCopy() +AreaCell = OMEGA::createDeviceCopy(AreaCellH) ``` diff --git a/components/omega/doc/design/IO.md b/components/omega/doc/design/IO.md index eceb3178d14d..5ac26cffdda7 100644 --- a/components/omega/doc/design/IO.md +++ b/components/omega/doc/design/IO.md @@ -9,7 +9,7 @@ For performance at high resolution, much of this IO must occur in parallel and interact with a high-performance filesystem. We describe here an IO layer that provides interfaces to the underlying SCORPIO parallel I/O library used in E3SM. It primarily provides a translation layer to -read/write OMEGA metadata and YAKL arrays. It works together with the +read/write OMEGA metadata and Kokkos arrays. It works together with the OMEGA Metadata and IOStreams capabilities. Users will interact with IO primarily through the IOStreams and should not need to access this layer directly. @@ -67,14 +67,14 @@ of those tasks. ### 2.7 Requirement: Data types and type conversion The I/O system must be able to read/write all supported -data types for metadata and all supported YAKL array types. +data types for metadata and all supported Kokkos array types. In some cases, output files at reduced precision are required so an option to convert data to reduced precision is needed. -### 2.8 Requirement: YAKL arrays and host/device support +### 2.8 Requirement: Kokkos arrays and host/device support -Distributed data in OMEGA is stored as YAKL array types. -We must be able to read/write YAKL arrays and be able +Distributed data in OMEGA is stored as Kokkos array types. +We must be able to read/write Kokkos arrays and be able to move data between host and device as needed. ### 2.9 Requirement: Modes on file existence @@ -114,7 +114,7 @@ The OMEGA model I/O will be built on top of the SCORPIO parallel I/O library used across E3SM components. The I/O interfaces here generally provide wrappers for translating internal OMEGA metadata representations and -YAKL array types to the form required by SCORPIO. OMEGA +Kokkos array types to the form required by SCORPIO. OMEGA users and developers will generally interact with I/O through the IO Streams layer that manages all files and associated file contents. @@ -253,7 +253,7 @@ to specify the fields in an IOStream. /// only one of the pointers will be defined based on array type std::shared_ptr data1DI4; [replicated up to 5D arrays of I4,I8,R4,R8] - std::shared_ptr dataHost1DI4; + std::shared_ptr dataHost1DI4; [replicated up to 5D arrays of I4,I8,R4,R8] diff --git a/components/omega/doc/design/IOStreams.md b/components/omega/doc/design/IOStreams.md index de1e4c8a3216..6ab3d56b5b87 100644 --- a/components/omega/doc/design/IOStreams.md +++ b/components/omega/doc/design/IOStreams.md @@ -12,7 +12,7 @@ or file sequence an IO stream with its own unique set of properties. We describe here the requirements and design of these IO streams. The design relies on companion designs for managing metadata and a lower-level IO functions for writing metadata and data stored in -YAKL arrays. +Kokkos arrays. ## 2 Requirements diff --git a/components/omega/doc/design/MachEnv.md b/components/omega/doc/design/MachEnv.md index e75d9b608712..5f5f80446be3 100644 --- a/components/omega/doc/design/MachEnv.md +++ b/components/omega/doc/design/MachEnv.md @@ -26,10 +26,10 @@ communicator based on a parent communicator sent by the calling routine Each MPI rank will need to know its own rank id, number of ranks and define a master rank. -### 2.4 Requirement: YAKL initialization +### 2.4 Requirement: Kokkos initialization -Since we are using YAKL for kernel launching and array types, we -will need to initialize YAKL in standalone mode. It may also be +Since we are using Kokkos for kernel launching and array types, we +will need to initialize Kokkos in standalone mode. It may also be needed for coupled simulations. ### 2.5 Requirement: Vector blocking size defined at compile time @@ -229,9 +229,9 @@ success/fail return code. ## 5 Verification and Testing We will test this with a simple test driver in an 8-rank -MPI configuration. Requirement 2.4 (YAKL initialization) will +MPI configuration. Requirement 2.4 (Kokkos initialization) will need to be verified by visual inspection but later tests using -YAKL functions will determine whether this has been successful. +Kokkos functions will determine whether this has been successful. ### 5.1 Test standalone initialization diff --git a/components/omega/doc/design/Reductions.md b/components/omega/doc/design/Reductions.md index 579272892b30..6c0ff9f4453f 100644 --- a/components/omega/doc/design/Reductions.md +++ b/components/omega/doc/design/Reductions.md @@ -84,7 +84,7 @@ like minloc/maxloc. For integers, the sum is straightforward. For single-precision floating point, we will perform sums in double precision and convert back to -single before returning. In each of the above cases, the local YAKL +single before returning. In each of the above cases, the local Kokkos versions of sum, minval, maxval will be used for the local sum before accumulating the MPI sum across ranks. @@ -140,7 +140,7 @@ entries. For scalar sums, the interface is the same with the indxRange argument absent. -We will assume that YAKL host arrays will be summed on the +We will assume that Kokkos host arrays will be summed on the host and device arrays will be summed on the device. #### 4.2.2 Global sum with product @@ -199,7 +199,7 @@ to manage two use cases. A typical use case is for the second array in the product to be the same for all fields (eg a mask or an area array), but we may wish to support a case where each array product has a different array for both operands. -Because YAKL arrays contain metadata and a pointer to +Because Kokkos arrays contain metadata and a pointer to the data, it may be possible to create a std::vector of the same array, allowing both the case of a fixed array to be used for all or for all the array products to have diff --git a/components/omega/doc/design/ShallowWaterOmega0.md b/components/omega/doc/design/ShallowWaterOmega0.md index 488f6ecd90f0..e15ea0597d6d 100644 --- a/components/omega/doc/design/ShallowWaterOmega0.md +++ b/components/omega/doc/design/ShallowWaterOmega0.md @@ -3,7 +3,7 @@ ## 1 Overview -This design document describes the first version of the Omega ocean model, Omega0. Overall, Omega is an unstructured-mesh ocean model based on TRiSK numerical methods ([Thuburn et al. 2009](https://www.sciencedirect.com/science/article/pii/S0021999109004434)) that is specifically designed for modern exascale computing architectures. The algorithms in Omega will be nearly identical to those in MPAS-Ocean, but it will be written in c++ rather than Fortran in order to take advantage of libraries to run on GPUs, such as YAKL (Yet Another Kernel Library, [Norman et al. 2023](https://link.springer.com/10.1007/s10766-022-00739-0)). +This design document describes the first version of the Omega ocean model, Omega0. Overall, Omega is an unstructured-mesh ocean model based on TRiSK numerical methods ([Thuburn et al. 2009](https://www.sciencedirect.com/science/article/pii/S0021999109004434)) that is specifically designed for modern exascale computing architectures. The algorithms in Omega will be nearly identical to those in MPAS-Ocean, but it will be written in c++ rather than Fortran in order to take advantage of libraries to run on GPUs, such as Kokkos (https://github.com/kokkos). The planned versions of Omega are: @@ -87,7 +87,7 @@ The test cases relevant to this design document are in Section 5 below. Omega will be able to run on all the upcoming DOE architectures and make good use of GPU hardware. This should occur with minimal alterations in the high-level PDE solver code for different platforms. -Options include: writing kernels directly for GPUs in CUDA; adding OpenACC pragmas for the GPUs; or calling libraries such as Kokkos ([Trott et al. 2022](https://ieeexplore.ieee.org/document/9485033)), YAKL ([Norman et al. 2023](https://link.springer.com/10.1007/s10766-022-00739-0)) or [HIP](https://github.com/ROCm-Developer-Tools/HIP) that execute code optimized for specialized architectures on the back-end, while providing a simpler front-end interface for the domain scientist. +Options include: writing kernels directly for GPUs in CUDA; adding OpenACC pragmas for the GPUs; or calling libraries such as Kokkos ([Trott et al. 2022](https://ieeexplore.ieee.org/document/9485033)), Kokkos (https://github.com/kokkos) or [HIP](https://github.com/ROCm-Developer-Tools/HIP) that execute code optimized for specialized architectures on the back-end, while providing a simpler front-end interface for the domain scientist. ### 2.6 Requirement: Omega-0 will run on multi-node with domain decomposition diff --git a/components/omega/doc/devGuide/CMakeBuild.md b/components/omega/doc/devGuide/CMakeBuild.md index 16222aaf1e49..d8942fac0507 100644 --- a/components/omega/doc/devGuide/CMakeBuild.md +++ b/components/omega/doc/devGuide/CMakeBuild.md @@ -43,21 +43,28 @@ OMEGA_EXE_NAME: Name of the executable ("omega.exe") OMEGA_LIB_NAME: Name of the library ("OmegaLib") OMEGA_BUILD_MODES: List of build modes ("E3SM", "STANDALONE", "NOT_DEFINED") OMEGA_BUILD_MODE: Selected build mode -OMEGA_DEFAULT_BUILD_TYPE: Default build type ("Release") +OMEGA_BUILD_DIR: Omega top-level build directory OMEGA_SOURCE_DIR: Directory where the top-level Omega CMakeLists.txt is located +OMEGA_DEFAULT_BUILD_TYPE: Default build type ("Release") OMEGA_INSTALL_PREFIX: User-defined output directory for the library and executable OMEGA_ARCH: User-defined programming framework (e.g., "CUDA", "HIP", "OPENMP", "SYCL", "") -OMEGA_${YAKL_ARCH}_FLAGS: Framework-specific compiler flags OMEGA_CXX_COMPILER: C++ compiler +OMEGA_C_COMPILER: C compiler +OMEGA_Fortran_COMPILER: Fortran compiler OMEGA_CIME_COMPILER: E3SM compiler name defined in config_machines.xml OMEGA_CIME_MACHINE: E3SM machine name defined in config_machines.xml OMEGA_CXX_FLAGS: a list for C++ compiler flags OMEGA_LINK_OPTIONS: a list for linker flags +OMEGA_EXE_LINKER_FLAGS: linker flags for building the Omega executable OMEGA_BUILD_EXECUTABLE: Enable building the Omega executable OMEGA_BUILD_TEST: Enable building Omega tests OMEGA_PARMETIS_ROOT: Parmetis installtion directory OMEGA_METIS_ROOT: Metis installtion directory OMEGA_GKLIB_ROOT: GKlib installtion directory +OMEGA_HIP_COMPILER: HIP compiler (e.g., hipcc) +OMEGA_HIP_FLAGS: HIP compiler flags +OMEGA_MEMORY_LAYOUT: Kokkos memory layout ("LEFT" or "RIGHT"). "RIGHT" is a default value. +OMEGA_TILE_LENGTH: a length of one "side" of a Kokkos tile. 64 is a default value. ``` E3SM-specific variables @@ -88,7 +95,7 @@ CMAKE_VERSION ## Step 2: Update In this step, CMake is configured, and external library variables, -such as YAKL, MPI, NetCDF, and PNetCDF, are set based on the settings +such as Kokkos, MPI, NetCDF, and PNetCDF, are set based on the settings defined in the Setup step. The integrity of the build setup is verified at the end of this step. diff --git a/components/omega/doc/devGuide/DataTypes.md b/components/omega/doc/devGuide/DataTypes.md index f54a57a44dc5..8fdb15a12ee4 100644 --- a/components/omega/doc/devGuide/DataTypes.md +++ b/components/omega/doc/devGuide/DataTypes.md @@ -24,34 +24,34 @@ the inverse area of a cell using only the Real type as follows: Real InvAreaCell = 1._Real / AreaCell(ICell); ``` -## Arrays and YAKL +## Arrays and Kokkos The C++ language does not have native support for multi-dimensional arrays as part of the language standard, though there are a number of implementations as part of the Standard Template Library and -elsewhere. OMEGA uses the [YAKL](https://github.com/mrnorman/YAKL) +elsewhere. OMEGA uses the [Kokkos](https://github.com/kokkos) framework for defining and allocating arrays on both the CPU host and any accelerator device that may be present. Because the syntax for defining such arrays is somewhat long, we instead define a number of alias array types of the form `ArrayNDTT` where N is the dimension of the array and TT is the data type (I4, I8, R4, R8 or Real) corresponding to the types described above. The dimension refers to the number of -ranks in the array and not the physical dimension. Although YAKL +ranks in the array and not the physical dimension. Although Kokkos supports Fortran ordering, we will use C ordering for array indices. Within OMEGA the default location for an Array should be on the device -with a similar type ArrayHostNDTT defined for arrays needed on the host. +with a similar type HostArrayNDTT defined for arrays needed on the host. As an example, we can define and allocate a device and host array using: ```c++ Array3dReal Temperature("Temperature",nTimeLevels, nCells, nVertLevels); - ArrayHost3dReal TemperatureHost("Temperature",nTimeLevels, nCells, nVertLevels); + HostArray3dReal TemperatureHost("Temperature",nTimeLevels, nCells, nVertLevels); ``` Alternatively, you can use the copy functions to create a host copy from the device or vice versa. ```c++ - auto TemperatureHost = Temperature.createHostCopy(); + auto TemperatureHost = OMEGA::createHostCopy(Temperature); ``` Finally, the arrays can be deallocated explicity using the class deallocate method, eg `Temperature.deallocate();` or if they are local to a routine, they will be automatically deallocated when they fall out -of scope on exit. More details on YAKL arrays are available in the YAKL +of scope on exit. More details on Kokkos arrays are available in the Kokkos documentation. diff --git a/components/omega/doc/devGuide/Decomp.md b/components/omega/doc/devGuide/Decomp.md index fceb5af9dfda..f577c400840e 100644 --- a/components/omega/doc/devGuide/Decomp.md +++ b/components/omega/doc/devGuide/Decomp.md @@ -50,7 +50,7 @@ OMEGA::Decomp *DefDecomp = OMEGA::Decomp::getDefault(); Once retrieved all Decomp members are public and can be accessed using ```c++ OMEGA::I4 NCells = DefDecomp->NCells; -OMEGA::ArrayHost1DI4 CellIDH = DefDecomp->CellIDH; +OMEGA::HostArray1DI4 CellIDH = DefDecomp->CellIDH; ``` Decomp is a container for all mesh index and connectivity arrays as described in the mesh specification above. In particular, it contains @@ -83,7 +83,7 @@ described in the mesh specification above. In particular, it contains For each of the arrays above, there is a copy of the array on the host and device (GPU) with the host array named with an extra H on the end -(eg CellsOnCellH). All are YAKL arrays so are accessed with (index) rather +(eg CellsOnCellH). All are Kokkos arrays so are accessed with (index) rather than [index] and for some of the arrays noted above are multi-dimensional. A typical host loop might then look something like: ```c++ @@ -93,12 +93,10 @@ for (int Cell = 0; Cell < NCellsOwned; ++Cell) { } } ``` -And on the device, we use the YAKL form (note that we will likely create -aliases for extended yakl expressions, like parallel_for to replace -yakl::c::parallel_for); +And on the device, we use `OMEGA::parallelFor` in place of `Kokkos::parallel_for`; ```c++ -yakl::c::parallel_for( yakl::c::Bounds<2>(NCellsOwned,MaxEdges), - YAKL_LAMBDA (int Cell, int Edge) { +OMEGA::parallelFor( {NCellsOwned,MaxEdges}, + KOKKOS_LAMBDA (int Cell, int Edge) { if (Edge < NEdgesOnCell(Cell)) { Var(Cell) = Var(Cell) + Flux(Cell,Edge); } @@ -109,10 +107,10 @@ Any defined decomposition can be removed by name using ```c++ Decomp::erase(Name); ``` -and all decompositions *must* be removed before the yakl finalize call using +and all decompositions *must* be removed before the Kokkos finalize call using ```c++ Decomp::clear(); ``` -which destroys all host and device arrays before YAKL finalizes and removes +which destroys all host and device arrays before Kokkos finalizes and removes the memory pool in which all the arrays are allocated. Failure to call clear -before `yakl::finalize()` will result in an error. +before `Kokkos::finalize()` will result in an error. diff --git a/components/omega/doc/devGuide/HorzMesh.md b/components/omega/doc/devGuide/HorzMesh.md index de4ab86f456f..0e8e39896666 100644 --- a/components/omega/doc/devGuide/HorzMesh.md +++ b/components/omega/doc/devGuide/HorzMesh.md @@ -15,7 +15,7 @@ connectivity information from Decomp so this information can be passed among the computational routines, alongside the other local mesh information. It then creates several parallel I/O decompositions and reads in the remaining subdomain mesh information. Finally, any mesh information needed on the device is copied -from the host to a device YAKL array. Arrays such as the coordinate variables, +from the host to a device Kokkos array. Arrays such as the coordinate variables, which are not involved in tendency calculations, are not transferred to the device. These tasks are organized into several private methods. Eventually, dependent mesh variables will be computed from the minimum set of required mesh @@ -36,8 +36,8 @@ The HorzMesh is meant to be a container that allows the mesh information to be passed to the PDE solver routines: ``` void computeFluxTendency(OMEGA::HorzMesh *HMesh, ...) { -yakl::c::parallel_for(yakl::c::Bounds<2>(HMesh->NCellsOwned,HMesh->MaxEdges), - YAKL_LAMBDA (int Cell, int Edge) { +OMEGA::parallelFor({HMesh->NCellsOwned,HMesh->MaxEdges}, + KOKKOS_LAMBDA (int Cell, int Edge) { if (Edge < HMesh->NEdgesOnCell(Cell)) { Var(Cell) = Var(Cell) + Flux(Cell,Edge); } @@ -48,8 +48,8 @@ For member variables that are host arrays, variable names are appended with an `H`. Array variable names not ending in `H` are device arrays. The copy from host to device array is performed in the constructor via: ```c++ -AreaCell = AreaCellH.createDeviceCopy(); +AreaCell = OMEGA::createDeviceCopy(AreaCellH); ``` The device arrays are deallocated by the `HorzMesh::clear()` method, which is -necessary before calling `yakl::finalize`. +necessary before calling `Kokkos::finalize`. diff --git a/components/omega/doc/devGuide/Logging.md b/components/omega/doc/devGuide/Logging.md index 7463e3665495..3a0f801e4e6c 100644 --- a/components/omega/doc/devGuide/Logging.md +++ b/components/omega/doc/devGuide/Logging.md @@ -5,7 +5,7 @@ Omega's logging system is built upon the [spdlog](https://github.com/gabime/spdlog) logging tool. -Logging macros and custom formatters for YAKL data types have been +Logging macros and custom formatters for Kokkos data types have been integrated into the Omega logging system via the Logging.h header file located in the src/infra directory. Users who wish to utilize Omega's logging capabilities must include this header file in their code. @@ -39,10 +39,10 @@ versatility by accommodating the utilization of the spdlog logger as their first argument. This approach facilitats the integration of various logger types. -## Customer formatter for YAKL +## Customer formatter for Kokkos Within the same header file, you will encounter specialized spdlog formatter -structs designed to accommodate YAKL data types. +structs designed to accommodate Kokkos data types. For further information on customizing the spdlog formatter, refer to [Custom formatting](https://github.com/gabime/spdlog/wiki/3.-Custom-formatting). diff --git a/components/omega/doc/devGuide/QuickStart.md b/components/omega/doc/devGuide/QuickStart.md index 3610e6bf5f45..97ef115643d9 100644 --- a/components/omega/doc/devGuide/QuickStart.md +++ b/components/omega/doc/devGuide/QuickStart.md @@ -80,7 +80,7 @@ to buid and test Omega. The utility automates many of the steps below. In the Omega branch you would like to build, first update the submodules that Omega requires: ```sh -git submodule update --init --recursive externals/YAKL externals/ekat \ +git submodule update --init --recursive externals/ekat \ externals/scorpio cime ``` @@ -165,8 +165,8 @@ Test project /gpfs/fs1/home/ac.xylar/e3sm_work/polaris/add-omega-ctest-util/buil 7/9 Test #7: IO_TEST .......................... Passed 2.94 sec Start 8: CONFIG_TEST 8/9 Test #8: CONFIG_TEST ...................... Passed 1.01 sec - Start 9: YAKL_TEST -9/9 Test #9: YAKL_TEST ........................ Passed 0.03 sec + Start 9: KOKKOS_TEST +9/9 Test #9: KOKKOS_TEST ........................ Passed 0.03 sec 100% tests passed, 0 tests failed out of 9 diff --git a/components/omega/doc/index.md b/components/omega/doc/index.md index c438578b62e8..7bdac7cdf744 100644 --- a/components/omega/doc/index.md +++ b/components/omega/doc/index.md @@ -6,7 +6,7 @@ global ocean model in the early stages of development by the [E3SM](https://e3sm.org/) ocean team. The first release is planned for Summer 2026. A non-eddying configuration will be released in early 2027. -The model is written in c++ using the [YAKL](https://github.com/mrnorman/YAKL) +The model is written in c++ using the [Kokkos](https://github.com/kokkos) framework for performance portability. OMEGA is based on the [TRSK formulation](https://doi.org/10.1016/j.jcp.2009.08.006) for geophysical models on unstructured meshes. The first version of OMEGA will primarily be a direct port diff --git a/components/omega/doc/userGuide/Logging.md b/components/omega/doc/userGuide/Logging.md index 674aa7089faf..6c286e874be0 100644 --- a/components/omega/doc/userGuide/Logging.md +++ b/components/omega/doc/userGuide/Logging.md @@ -55,7 +55,7 @@ Here is a table that summarizes the different log severities and their meanings: You can replace the string "World" with any variable whose type is a C++ basic data type, such as an int, a float, or a string. You can -also use a limited set of YAKL variables. For example, the following code +also use a limited set of Kokkos variables. For example, the following code writes the value of the variable `MyInt` as a TRACE level log: ```c diff --git a/components/omega/external/CMakeLists.txt b/components/omega/external/CMakeLists.txt index b8fcb6c0e8f8..e2955a89d959 100644 --- a/components/omega/external/CMakeLists.txt +++ b/components/omega/external/CMakeLists.txt @@ -1,5 +1,7 @@ # Add external packages +include(ExternalProject) + # Add the spdlog library add_subdirectory( ${E3SM_EXTERNALS_ROOT}/ekat/extern/spdlog @@ -12,19 +14,11 @@ add_subdirectory( ${CMAKE_CURRENT_BINARY_DIR}/ekat/extern/yaml-cpp ) -# Add the YAKL library -add_subdirectory( - ${E3SM_EXTERNALS_ROOT}/YAKL - ${CMAKE_CURRENT_BINARY_DIR}/YAKL -) - -if (CMAKE_VERSION VERSION_GREATER "3.18.0") - set_property(TARGET yakl PROPERTY CUDA_ARCHITECTURES OFF) -endif() - -if(OMEGA_DEBUG) - target_compile_definitions(yakl INTERFACE YAKL_DEBUG=1) -endif() +# Add the Kokkos library + add_subdirectory( + ${E3SM_EXTERNALS_ROOT}/ekat/extern/kokkos + ${CMAKE_CURRENT_BINARY_DIR}/ekat/extern/kokkos + ) # forward env. variables to Scorpio build if(DEFINED ENV{NETCDF_PATH}) diff --git a/components/omega/external/README.md b/components/omega/external/README.md index 939d564ed83c..84e165fa7129 100644 --- a/components/omega/external/README.md +++ b/components/omega/external/README.md @@ -3,4 +3,4 @@ This directory holds any needed external libraries, submodules or source code for the Ocean Model for E3SM Global Applications (OMEGA). Other external libraries may come from shared external E3SM libraries -(eg Scorpio, YAKL, etc.). +(eg Scorpio, Kokkos, etc.). diff --git a/components/omega/src/CMakeLists.txt b/components/omega/src/CMakeLists.txt index b72215858d45..c67914cb72be 100644 --- a/components/omega/src/CMakeLists.txt +++ b/components/omega/src/CMakeLists.txt @@ -3,32 +3,22 @@ # Add source files for the library file(GLOB _LIBSRC_FILES infra/*.cpp base/*.cpp ocn/*.cpp) -# Create the library target -add_library(${OMEGA_LIB_NAME} ${_LIBSRC_FILES}) + add_library(${OMEGA_LIB_NAME} ${_LIBSRC_FILES}) -# add include directories -target_include_directories( - ${OMEGA_LIB_NAME} - PRIVATE - ${OMEGA_SOURCE_DIR}/src/base - ${OMEGA_SOURCE_DIR}/src/infra - ${Parmetis_INCLUDE_DIRS} -) - -# add compiler options -target_include_directories( + target_include_directories( ${OMEGA_LIB_NAME} PRIVATE ${OMEGA_SOURCE_DIR}/src/base ${OMEGA_SOURCE_DIR}/src/infra ${OMEGA_SOURCE_DIR}/src/ocn -) + ${Parmetis_INCLUDE_DIRS} + ) -target_compile_options( + target_compile_definitions( ${OMEGA_LIB_NAME} - PRIVATE - ${OMEGA_CXX_FLAGS} -) + PUBLIC + OMEGA_ARCH=${OMEGA_ARCH} + ) # add linker options target_link_options( @@ -37,7 +27,7 @@ target_link_options( ${OMEGA_LINK_OPTIONS} ) -target_link_libraries(${OMEGA_LIB_NAME} spdlog yakl pioc yaml-cpp parmetis metis) +target_link_libraries(${OMEGA_LIB_NAME} Kokkos::kokkos spdlog pioc yaml-cpp parmetis metis) if(GKlib_FOUND) target_link_libraries(${OMEGA_LIB_NAME} gklib) @@ -53,11 +43,17 @@ if(OMEGA_BUILD_EXECUTABLE) # Create the executable target add_executable(${OMEGA_EXE_NAME} ${EXESRC_FILES}) - target_compile_options( - ${OMEGA_EXE_NAME} - PRIVATE - "-L${OMEGA_SOURCE_DIR}/src/base" - "-L${CMAKE_CURRENT_SOURCE_DIR}/infra" +# target_compile_options( +# ${OMEGA_EXE_NAME} +# PRIVATE +# "-L${OMEGA_SOURCE_DIR}/src/base" +# "-L${CMAKE_CURRENT_SOURCE_DIR}/infra" +# ) + + target_compile_definitions( + ${OMEGA_EXE_NAME} + PUBLIC + OMEGA_ARCH=${OMEGA_ARCH} ) target_link_libraries(${OMEGA_EXE_NAME} ${OMEGA_LIB_NAME}) diff --git a/components/omega/src/base/DataTypes.h b/components/omega/src/base/DataTypes.h index 7b0b4887a353..4a9d62c6a179 100644 --- a/components/omega/src/base/DataTypes.h +++ b/components/omega/src/base/DataTypes.h @@ -3,21 +3,20 @@ //===-- base/DataTypes.h - data type and array definitions ------*- C++ -*-===// // /// \file -/// \brief Defines standard data types and YAKL array aliases +/// \brief Defines standard data types and Kokkos array aliases /// /// This header defines fixed-length data types to enforce levels of precision /// where needed. In addition, it supplies a generic real type that is double /// precision by default but can be switched throughout using a preprocessor /// definition SINGLE_PRECISION. Finally, all arrays in OMEGA are defined -/// as YAKL arrays to enable allocation and kernel launching on accelerator -/// devices. Because the YAKL definitions can be lengthy, this header defines +/// as Kokkos arrays to enable allocation and kernel launching on accelerator +/// devices. Because the Kokkos definitions can be lengthy, this header defines /// useful aliases for up to 5D arrays in all supported types on either the /// host or device. // //===----------------------------------------------------------------------===// -#include "YAKL.h" -#include +#include "Kokkos_Core.hpp" namespace OMEGA { @@ -31,69 +30,115 @@ using R8 = double; ///< alias for 64-bit (double prec) real #ifdef SINGLE_PRECISION using Real = float; #else -using Real = double; +using Real = double; #endif // user-defined literal for generic reals -YAKL_INLINE constexpr Real operator""_Real(long double x) { return x; } - -// Aliases for YAKL arrays - by default, all arrays are on the device and -// use C-ordering. -/// Aliases for YAKL device arrays of various dimensions and types -using Array1DI4 = yakl::Array; -using Array1DI8 = yakl::Array; -using Array1DR4 = yakl::Array; -using Array1DR8 = yakl::Array; -using Array1DReal = yakl::Array; -using Array2DI4 = yakl::Array; -using Array2DI8 = yakl::Array; -using Array2DR4 = yakl::Array; -using Array2DR8 = yakl::Array; -using Array2DReal = yakl::Array; -using Array3DI4 = yakl::Array; -using Array3DI8 = yakl::Array; -using Array3DR4 = yakl::Array; -using Array3DR8 = yakl::Array; -using Array3DReal = yakl::Array; -using Array4DI4 = yakl::Array; -using Array4DI8 = yakl::Array; -using Array4DR4 = yakl::Array; -using Array4DR8 = yakl::Array; -using Array4DReal = yakl::Array; -using Array5DI4 = yakl::Array; -using Array5DI8 = yakl::Array; -using Array5DR4 = yakl::Array; -using Array5DR8 = yakl::Array; -using Array5DReal = yakl::Array; - -// Also need similar aliases for arrays on the host -/// Aliases for YAKL host arrays of various dimensions and types -using ArrayHost1DI4 = yakl::Array; -using ArrayHost1DI8 = yakl::Array; -using ArrayHost1DR4 = yakl::Array; -using ArrayHost1DR8 = yakl::Array; -using ArrayHost1DReal = yakl::Array; -using ArrayHost2DI4 = yakl::Array; -using ArrayHost2DI8 = yakl::Array; -using ArrayHost2DR4 = yakl::Array; -using ArrayHost2DR8 = yakl::Array; -using ArrayHost2DReal = yakl::Array; -using ArrayHost3DI4 = yakl::Array; -using ArrayHost3DI8 = yakl::Array; -using ArrayHost3DR4 = yakl::Array; -using ArrayHost3DR8 = yakl::Array; -using ArrayHost3DReal = yakl::Array; -using ArrayHost4DI4 = yakl::Array; -using ArrayHost4DI8 = yakl::Array; -using ArrayHost4DR4 = yakl::Array; -using ArrayHost4DR8 = yakl::Array; -using ArrayHost4DReal = yakl::Array; -using ArrayHost5DI4 = yakl::Array; -using ArrayHost5DI8 = yakl::Array; -using ArrayHost5DR4 = yakl::Array; -using ArrayHost5DR8 = yakl::Array; -using ArrayHost5DReal = yakl::Array; +KOKKOS_INLINE_FUNCTION constexpr Real operator""_Real(long double x) { + return x; +} +// Aliases for Kokkos memory spaces +#ifdef OMEGA_ENABLE_CUDA +using MemSpace = Kokkos::CudaSpace; +#elif OMEGA_ENABLE_HIP +using MemSpace = Kokkos::Experimental::HIPSpace; +#elif OMEGA_ENABLE_OPENMP +using MemSpace = Kokkos::HostSpace; +#else +#error \ + "None of OMEGA_ENABLE_CUDA, OMEGA_ENABLE_HIP, and OMEGA_ENABLE_OPENMP is defined." +#endif + +// Set default tile length +#ifndef OMEGA_TILE_LENGTH +#define OMEGA_TILE_LENGTH 64 +#endif + +// Aliases for Kokkos memory layouts +#ifdef OMEGA_LAYOUT_RIGHT + +using MemLayout = Kokkos::LayoutRight; +using MemInvLayout = Kokkos::LayoutLeft; + +// Default tile configurations +template struct DefaultTile; + +template <> struct DefaultTile<1> { + static constexpr int value[] = {OMEGA_TILE_LENGTH}; +}; + +template <> struct DefaultTile<2> { + static constexpr int value[] = {1, OMEGA_TILE_LENGTH}; +}; + +template <> struct DefaultTile<3> { + static constexpr int value[] = {1, 1, OMEGA_TILE_LENGTH}; +}; + +template <> struct DefaultTile<4> { + static constexpr int value[] = {1, 1, 1, OMEGA_TILE_LENGTH}; +}; + +template <> struct DefaultTile<5> { + static constexpr int value[] = {1, 1, 1, 1, OMEGA_TILE_LENGTH}; +}; + +#elif OMEGA_LAYOUT_LEFT + +using MemLayout = Kokkos::LayoutLeft; +using MemInvLayout = Kokkos::LayoutRight; + +// Default tile configurations +template struct DefaultTile; + +template <> struct DefaultTile<1> { + static constexpr int value[] = {OMEGA_TILE_LENGTH}; +}; + +template <> struct DefaultTile<2> { + static constexpr int value[] = {OMEGA_TILE_LENGTH, 1}; +}; + +template <> struct DefaultTile<3> { + static constexpr int value[] = {OMEGA_TILE_LENGTH, 1, 1}; +}; + +template <> struct DefaultTile<4> { + static constexpr int value[] = {OMEGA_TILE_LENGTH, 1, 1, 1}; +}; + +template <> struct DefaultTile<5> { + static constexpr int value[] = {OMEGA_TILE_LENGTH, 1, 1, 1, 1}; +}; + +#else +#error "OMEGA Memory Layout is not defined." +#endif + +using HostMemSpace = Kokkos::HostSpace; +using HostMemLayout = MemLayout; +using HostMemInvLayout = MemInvLayout; + +#define MAKE_OMEGA_VIEW_DIMS(N, V, T, ML, MS) \ + using N##1D##T = Kokkos::V; \ + using N##2D##T = Kokkos::V; \ + using N##3D##T = Kokkos::V; \ + using N##4D##T = Kokkos::V; \ + using N##5D##T = Kokkos::V; + +#define MAKE_OMEGA_VIEW_TYPES(N, V, ML, MS) \ + MAKE_OMEGA_VIEW_DIMS(N, V, I4, ML, MS) \ + MAKE_OMEGA_VIEW_DIMS(N, V, I8, ML, MS) \ + MAKE_OMEGA_VIEW_DIMS(N, V, R4, ML, MS) \ + MAKE_OMEGA_VIEW_DIMS(N, V, R8, ML, MS) \ + MAKE_OMEGA_VIEW_DIMS(N, V, Real, ML, MS) + +// Aliases for Kokkos device arrays of various dimensions and types +MAKE_OMEGA_VIEW_TYPES(Array, View, MemLayout, MemSpace) + +// Aliases for Kokkos host arrays of various dimensions and types +MAKE_OMEGA_VIEW_TYPES(HostArray, View, HostMemLayout, HostMemSpace) } // end namespace OMEGA //===----------------------------------------------------------------------===// diff --git a/components/omega/src/base/Decomp.cpp b/components/omega/src/base/Decomp.cpp index 78042996bb92..9aad17e27942 100644 --- a/components/omega/src/base/Decomp.cpp +++ b/components/omega/src/base/Decomp.cpp @@ -19,6 +19,7 @@ #include "IO.h" #include "Logging.h" #include "MachEnv.h" +#include "OmegaKokkos.h" #include "mpi.h" #include "parmetis.h" @@ -68,17 +69,18 @@ I4 srchVector(const std::vector &InVector, // vector to search } // end function srchVector (std::vector) //------------------------------------------------------------------------------ -// A search routine for vectors in which the vector is a YAKL array rather +// A search routine for vectors in which the vector is a Kokkos array rather // than a std::vector. It searches for a value and returns the first index of // that // entry. If not found, the size is returned (corresponding to the // last index + 1) -I4 srchVector(ArrayHost1DI4 InVector, // vector to search +I4 srchVector(HostArray1DI4 InVector, // vector to search I4 Value // value to search for ) { // extract the vector length - I4 VecSize = InVector.totElems(); + // I4 VecSize = InVector.totElems(); + I4 VecSize = InVector.size(); I4 LocIndex = VecSize; // set default to size (return value if not found) // Loop over elements, searching for Value @@ -91,7 +93,7 @@ I4 srchVector(ArrayHost1DI4 InVector, // vector to search return LocIndex; -} // end function srchVector (YAKL) +} // end function srchVector (Kokkos) // Routines needed for creating the decomposition //------------------------------------------------------------------------------ @@ -627,30 +629,30 @@ Decomp::Decomp( // Create device copies of all arrays - NCellsHalo = NCellsHaloH.createDeviceCopy(); - CellID = CellIDH.createDeviceCopy(); - CellLoc = CellLocH.createDeviceCopy(); + NCellsHalo = createDeviceCopy(NCellsHaloH); + CellID = createDeviceCopy(CellIDH); + CellLoc = createDeviceCopy(CellLocH); - NEdgesHalo = NEdgesHaloH.createDeviceCopy(); - EdgeID = EdgeIDH.createDeviceCopy(); - EdgeLoc = EdgeLocH.createDeviceCopy(); + NEdgesHalo = createDeviceCopy(NEdgesHaloH); + EdgeID = createDeviceCopy(EdgeIDH); + EdgeLoc = createDeviceCopy(EdgeLocH); - NVerticesHalo = NVerticesHaloH.createDeviceCopy(); - VertexID = VertexIDH.createDeviceCopy(); - VertexLoc = VertexLocH.createDeviceCopy(); + NVerticesHalo = createDeviceCopy(NVerticesHaloH); + VertexID = createDeviceCopy(VertexIDH); + VertexLoc = createDeviceCopy(VertexLocH); - CellsOnCell = CellsOnCellH.createDeviceCopy(); - EdgesOnCell = EdgesOnCellH.createDeviceCopy(); - VerticesOnCell = VerticesOnCellH.createDeviceCopy(); - NEdgesOnCell = NEdgesOnCellH.createDeviceCopy(); + CellsOnCell = createDeviceCopy(CellsOnCellH); + EdgesOnCell = createDeviceCopy(EdgesOnCellH); + VerticesOnCell = createDeviceCopy(VerticesOnCellH); + NEdgesOnCell = createDeviceCopy(NEdgesOnCellH); - CellsOnEdge = CellsOnEdgeH.createDeviceCopy(); - EdgesOnEdge = EdgesOnEdgeH.createDeviceCopy(); - VerticesOnEdge = VerticesOnEdgeH.createDeviceCopy(); - NEdgesOnEdge = NEdgesOnEdgeH.createDeviceCopy(); + CellsOnEdge = createDeviceCopy(CellsOnEdgeH); + EdgesOnEdge = createDeviceCopy(EdgesOnEdgeH); + VerticesOnEdge = createDeviceCopy(VerticesOnEdgeH); + NEdgesOnEdge = createDeviceCopy(NEdgesOnEdgeH); - CellsOnVertex = CellsOnVertexH.createDeviceCopy(); - EdgesOnVertex = EdgesOnVertexH.createDeviceCopy(); + CellsOnVertex = createDeviceCopy(CellsOnVertexH); + EdgesOnVertex = createDeviceCopy(EdgesOnVertexH); // Assign this as the default decomposition AllDecomps.emplace(Name, *this); @@ -663,7 +665,7 @@ Decomp::Decomp( Decomp::~Decomp() { - // No operations needed, YAKL arrays removed when no longer in scope + // No operations needed, Kokkos arrays removed when no longer in scope } // end decomp destructor @@ -885,7 +887,7 @@ int Decomp::partCellsKWay( I4 CellLocStart = 0; I4 CellLocEnd = NCellsOwned - 1; I4 CurSize = NCellsOwned; - ArrayHost1DI4 NCellsHaloTmp("NCellsHalo", HaloWidth); + HostArray1DI4 NCellsHaloTmp("NCellsHalo", HaloWidth); std::set HaloList; // Loop over each halo layer for (int Halo = 0; Halo < HaloWidth; ++Halo) { @@ -953,8 +955,8 @@ int Decomp::partCellsKWay( // Copy global ID for each cell, both owned and halo. // Copy cell location (task, local add) for each cell, both owned and halo - ArrayHost1DI4 CellIDHTmp("CellID", NCellsSize); - ArrayHost2DI4 CellLocHTmp("CellLoc", NCellsSize, 2); + HostArray1DI4 CellIDHTmp("CellID", NCellsSize); + HostArray2DI4 CellLocHTmp("CellLoc", NCellsSize, 2); for (int Cell = 0; Cell < NCellsAll; ++Cell) { CellIDHTmp(Cell) = CellIDTmp[Cell]; CellLocHTmp(Cell, 0) = CellLocTmp[2 * Cell]; // task owning this cell @@ -1094,12 +1096,12 @@ int Decomp::partEdges( // edge IDs, locations and CellsOnEdge with this ordering. NEdgesOwned = EdgesOwned.size(); - ArrayHost1DI4 NEdgesHaloTmp("NEdgesHalo", HaloWidth); + HostArray1DI4 NEdgesHaloTmp("NEdgesHalo", HaloWidth); I4 HaloCount = EdgesOwnedHalo1.size(); NEdgesHaloTmp(0) = HaloCount; - ArrayHost1DI4 EdgeIDTmp("EdgeID", NEdgesSize); - yakl::memset(EdgeIDTmp, NEdgesGlobal + 1); + HostArray1DI4 EdgeIDTmp("EdgeID", NEdgesSize); + deepCopy(EdgeIDTmp, NEdgesGlobal + 1); // The owned and first halo of edges comes from the edges around // the owned cells, so start with these. @@ -1160,7 +1162,7 @@ int Decomp::partEdges( // Resize the buffer to make sure we have enough room - the distribution // may be less even than the original chunk size. - ArrayHost2DI4 EdgeLocTmp("EdgeLoc", NEdgesSize, 2); + HostArray2DI4 EdgeLocTmp("EdgeLoc", NEdgesSize, 2); EdgeBuf.resize(2 * NEdgesChunk); for (int Edge = 0; Edge < NEdgesSize; ++Edge) { @@ -1331,12 +1333,12 @@ int Decomp::partVertices( // vertex IDs, locations and CellsOnVertex with this ordering. NVerticesOwned = VerticesOwned.size(); - ArrayHost1DI4 NVerticesHaloTmp("NVerticesHalo", HaloWidth); + HostArray1DI4 NVerticesHaloTmp("NVerticesHalo", HaloWidth); I4 HaloCount = VerticesOwnedHalo1.size(); NVerticesHaloTmp(0) = HaloCount; - ArrayHost1DI4 VertexIDTmp("VertexID", NVerticesSize); - yakl::memset(VertexIDTmp, NVerticesGlobal + 1); + HostArray1DI4 VertexIDTmp("VertexID", NVerticesSize); + deepCopy(VertexIDTmp, NVerticesGlobal + 1); // The owned and first halo of vertices comes from the vertices around // the owned cells, so start with these. @@ -1398,7 +1400,7 @@ int Decomp::partVertices( // Resize the buffer to make sure we have enough room - the distribution // may be less even than the original chunk size. - ArrayHost2DI4 VertexLocTmp("VertexLoc", NVerticesSize, 2); + HostArray2DI4 VertexLocTmp("VertexLoc", NVerticesSize, 2); VrtxBuf.resize(2 * NVerticesChunk); for (int Vrtx = 0; Vrtx < NVerticesSize; ++Vrtx) { @@ -1475,14 +1477,14 @@ int Decomp::rearrangeCellArrays( // Create temporary arrays for holding the XxOnCell results // and initialize to NXxGlobal+1 to denote a non-existent entry - ArrayHost2DI4 CellsOnCellTmp("CellsOnCell", NCellsSize, MaxEdges); - ArrayHost2DI4 EdgesOnCellTmp("EdgesOnCell", NCellsSize, MaxEdges); - ArrayHost2DI4 VerticesOnCellTmp("VerticesOnCell", NCellsSize, MaxEdges); - ArrayHost1DI4 NEdgesOnCellTmp("NEdgesOnCell", NCellsSize); - yakl::memset(CellsOnCellTmp, NCellsGlobal + 1); - yakl::memset(EdgesOnCellTmp, NEdgesGlobal + 1); - yakl::memset(VerticesOnCellTmp, NVerticesGlobal + 1); - yakl::memset(NEdgesOnCellTmp, 0); + HostArray2DI4 CellsOnCellTmp("CellsOnCell", NCellsSize, MaxEdges); + HostArray2DI4 EdgesOnCellTmp("EdgesOnCell", NCellsSize, MaxEdges); + HostArray2DI4 VerticesOnCellTmp("VerticesOnCell", NCellsSize, MaxEdges); + HostArray1DI4 NEdgesOnCellTmp("NEdgesOnCell", NCellsSize); + deepCopy(CellsOnCellTmp, NCellsGlobal + 1); + deepCopy(EdgesOnCellTmp, NEdgesGlobal + 1); + deepCopy(VerticesOnCellTmp, NVerticesGlobal + 1); + deepCopy(NEdgesOnCellTmp, 0); // Each task will broadcast the cells it owns in the initial linear // distribution and all tasks will search that list and extract the @@ -1600,14 +1602,14 @@ int Decomp::rearrangeEdgeArrays( // Create temporary arrays for holding the XxOnEdge results // and initialize to NXxGlobal+1 to denote a non-existent entry - ArrayHost2DI4 CellsOnEdgeTmp("CellsOnEdge", NEdgesSize, MaxCellsOnEdge); - ArrayHost2DI4 EdgesOnEdgeTmp("EdgesOnEdge", NEdgesSize, 2 * MaxEdges); - ArrayHost2DI4 VerticesOnEdgeTmp("VerticesOnEdge", NEdgesSize, 2); - ArrayHost1DI4 NEdgesOnEdgeTmp("NEdgesOnEdge", NEdgesSize); - yakl::memset(CellsOnEdgeTmp, NCellsGlobal + 1); - yakl::memset(EdgesOnEdgeTmp, NEdgesGlobal + 1); - yakl::memset(VerticesOnEdgeTmp, NVerticesGlobal + 1); - yakl::memset(NEdgesOnEdgeTmp, 0); + HostArray2DI4 CellsOnEdgeTmp("CellsOnEdge", NEdgesSize, MaxCellsOnEdge); + HostArray2DI4 EdgesOnEdgeTmp("EdgesOnEdge", NEdgesSize, 2 * MaxEdges); + HostArray2DI4 VerticesOnEdgeTmp("VerticesOnEdge", NEdgesSize, 2); + HostArray1DI4 NEdgesOnEdgeTmp("NEdgesOnEdge", NEdgesSize); + deepCopy(CellsOnEdgeTmp, NCellsGlobal + 1); + deepCopy(EdgesOnEdgeTmp, NEdgesGlobal + 1); + deepCopy(VerticesOnEdgeTmp, NVerticesGlobal + 1); + deepCopy(NEdgesOnEdgeTmp, 0); // Each task will broadcast the array chunks it owns in the initial linear // distribution and all tasks will search that list and extract the @@ -1735,10 +1737,10 @@ int Decomp::rearrangeVertexArrays( // Create temporary arrays for holding the XxOnVertex results // and initialize to NXxGlobal+1 to denote a non-existent entry - ArrayHost2DI4 CellsOnVertexTmp("CellsOnVertex", NVerticesSize, VertexDegree); - ArrayHost2DI4 EdgesOnVertexTmp("EdgesOnVertex", NVerticesSize, VertexDegree); - yakl::memset(CellsOnVertexTmp, NCellsGlobal + 1); - yakl::memset(EdgesOnVertexTmp, NEdgesGlobal + 1); + HostArray2DI4 CellsOnVertexTmp("CellsOnVertex", NVerticesSize, VertexDegree); + HostArray2DI4 EdgesOnVertexTmp("EdgesOnVertex", NVerticesSize, VertexDegree); + deepCopy(CellsOnVertexTmp, NCellsGlobal + 1); + deepCopy(EdgesOnVertexTmp, NEdgesGlobal + 1); // Each task will broadcast the array chunks it owns in the initial linear // distribution and all tasks will search that list and extract the diff --git a/components/omega/src/base/Decomp.h b/components/omega/src/base/Decomp.h index 4ad73b90c8b8..fd07cf0195c9 100644 --- a/components/omega/src/base/Decomp.h +++ b/components/omega/src/base/Decomp.h @@ -150,11 +150,11 @@ class Decomp { I4 MaxEdges; ///< Max number of edges around a cell Array1DI4 NCellsHalo; ///< num cells owned+halo for halo layer - ArrayHost1DI4 NCellsHaloH; ///< num cells owned+halo for halo layer + HostArray1DI4 NCellsHaloH; ///< num cells owned+halo for halo layer Array1DI4 CellID; ///< global cell ID for each local cell - ArrayHost1DI4 CellIDH; ///< global cell ID for each local cell + HostArray1DI4 CellIDH; ///< global cell ID for each local cell Array2DI4 CellLoc; ///< location (task, local add) for local cells,halo - ArrayHost2DI4 CellLocH; ///< location (task, local add) for local cells,halo + HostArray2DI4 CellLocH; ///< location (task, local add) for local cells,halo I4 NEdgesGlobal; ///< Number of edges in the full global mesh I4 NEdgesOwned; ///< Number of edges owned by this task @@ -163,11 +163,11 @@ class Decomp { I4 MaxCellsOnEdge; ///< Max number of cells sharing an edge Array1DI4 NEdgesHalo; ///< num cells owned+halo for halo layer - ArrayHost1DI4 NEdgesHaloH; ///< num cells owned+halo for halo layer + HostArray1DI4 NEdgesHaloH; ///< num cells owned+halo for halo layer Array1DI4 EdgeID; ///< global cell ID for each local cell - ArrayHost1DI4 EdgeIDH; ///< global cell ID for each local cell + HostArray1DI4 EdgeIDH; ///< global cell ID for each local cell Array2DI4 EdgeLoc; ///< location (task, local add) for local edges,halo - ArrayHost2DI4 EdgeLocH; ///< location (task, local add) for local edges,halo + HostArray2DI4 EdgeLocH; ///< location (task, local add) for local edges,halo I4 NVerticesGlobal; ///< Number of vertices in the full global mesh I4 NVerticesOwned; ///< Number of vertices owned by this task @@ -176,43 +176,43 @@ class Decomp { I4 VertexDegree; ///< Number of cells that meet at each vertex Array1DI4 NVerticesHalo; ///< num cells owned+halo for halo layer - ArrayHost1DI4 NVerticesHaloH; ///< num cells owned+halo for halo layer + HostArray1DI4 NVerticesHaloH; ///< num cells owned+halo for halo layer Array1DI4 VertexID; ///< global vertex ID for each local cell - ArrayHost1DI4 VertexIDH; ///< global vertex ID for each local cell + HostArray1DI4 VertexIDH; ///< global vertex ID for each local cell Array2DI4 VertexLoc; ///< location (task, local add) for local vrtx halo - ArrayHost2DI4 VertexLocH; ///< location (task, local add) for local vrtx halo + HostArray2DI4 VertexLocH; ///< location (task, local add) for local vrtx halo // Mesh connectivity Array2DI4 CellsOnCell; ///< Indx of cells that neighbor each cell - ArrayHost2DI4 CellsOnCellH; ///< Indx of cells that neighbor each cell + HostArray2DI4 CellsOnCellH; ///< Indx of cells that neighbor each cell Array2DI4 EdgesOnCell; ///< Indx of edges that border each cell - ArrayHost2DI4 EdgesOnCellH; ///< Indx of edges that border each cell + HostArray2DI4 EdgesOnCellH; ///< Indx of edges that border each cell Array1DI4 NEdgesOnCell; ///< Num of active edges around each cell - ArrayHost1DI4 NEdgesOnCellH; ///< Num of active edges around each cell + HostArray1DI4 NEdgesOnCellH; ///< Num of active edges around each cell Array2DI4 VerticesOnCell; ///< Indx of vertices bordering each cell - ArrayHost2DI4 VerticesOnCellH; ///< Indx of vertices bordering each cell + HostArray2DI4 VerticesOnCellH; ///< Indx of vertices bordering each cell Array2DI4 CellsOnEdge; ///< Indx of cells straddling each edge - ArrayHost2DI4 CellsOnEdgeH; ///< Indx of cells straddling each edge + HostArray2DI4 CellsOnEdgeH; ///< Indx of cells straddling each edge Array2DI4 EdgesOnEdge; ///< Indx of edges around cells across each edge - ArrayHost2DI4 EdgesOnEdgeH; ///< Indx of edges around cells across each edge + HostArray2DI4 EdgesOnEdgeH; ///< Indx of edges around cells across each edge Array1DI4 NEdgesOnEdge; ///< Num of edges around the cells across edge - ArrayHost1DI4 NEdgesOnEdgeH; ///< Num of edges around the cells across edge + HostArray1DI4 NEdgesOnEdgeH; ///< Num of edges around the cells across edge Array2DI4 VerticesOnEdge; ///< Indx of vertices straddling each edge - ArrayHost2DI4 VerticesOnEdgeH; ///< Indx of vertices straddling each edge + HostArray2DI4 VerticesOnEdgeH; ///< Indx of vertices straddling each edge Array2DI4 CellsOnVertex; ///< Indx of cells that share a vertex - ArrayHost2DI4 CellsOnVertexH; ///< Indx of cells that share a vertex + HostArray2DI4 CellsOnVertexH; ///< Indx of cells that share a vertex Array2DI4 EdgesOnVertex; ///< Indx of edges sharing vertex as endpoint - ArrayHost2DI4 EdgesOnVertexH; ///< Indx of edges sharing vertex as endpoint + HostArray2DI4 EdgesOnVertexH; ///< Indx of edges sharing vertex as endpoint // Methods diff --git a/components/omega/src/base/Halo.cpp b/components/omega/src/base/Halo.cpp index 78472b6e13e3..dc9a572907b0 100644 --- a/components/omega/src/base/Halo.cpp +++ b/components/omega/src/base/Halo.cpp @@ -2,7 +2,7 @@ // // The Halo class and its nested classes Neighbor and ExchList contain all // the information and methods needed for exchanging the halo elements of -// supported YAKL array types for a given machine environment (MachEnv) +// supported Kokkos array types for a given machine environment (MachEnv) // and parallel decomposition (Decomp). These exchanges are carried out // via non-blocking MPI library routines. Constructor and private member // functions are defined here. The Halo class public member function @@ -167,8 +167,8 @@ int Halo::generateExchangeLists( // Pointers to the needed info from the Decomp for the input index space const I4 *NOwnedPtr{nullptr}; const I4 *NAllPtr{nullptr}; - ArrayHost1DI4 NHaloPtr; - ArrayHost2DI4 LocPtr; + HostArray1DI4 NHaloPtr; + HostArray2DI4 LocPtr; // Logical flag to check if this is the first time the function is called bool First{false}; @@ -442,7 +442,7 @@ int Halo::startSends() { // recast as a Real in a bit-preserving manner using reinterpret_cast to pack // into the buffer, which is of type std::vector. -int Halo::packBuffer(const ArrayHost1DI4 Array) { +int Halo::packBuffer(const HostArray1DI4 Array) { ExchList *MyList = &MyNeighbor->SendLists[MyElem]; @@ -457,9 +457,9 @@ int Halo::packBuffer(const ArrayHost1DI4 Array) { } return 0; -} // end packBuffer ArrayHost1DI4 +} // end packBuffer HostArray1DI4 -int Halo::packBuffer(const ArrayHost1DI8 Array) { +int Halo::packBuffer(const HostArray1DI8 Array) { ExchList *MyList = &MyNeighbor->SendLists[MyElem]; @@ -474,9 +474,9 @@ int Halo::packBuffer(const ArrayHost1DI8 Array) { } return 0; -} // end packBuffer ArrayHost1DI8 +} // end packBuffer HostArray1DI8 -int Halo::packBuffer(const ArrayHost1DR4 Array) { +int Halo::packBuffer(const HostArray1DR4 Array) { ExchList *MyList = &MyNeighbor->SendLists[MyElem]; @@ -490,9 +490,9 @@ int Halo::packBuffer(const ArrayHost1DR4 Array) { } return 0; -} // end packBuffer ArrayHost1DR4 +} // end packBuffer HostArray1DR4 -int Halo::packBuffer(const ArrayHost1DR8 Array) { +int Halo::packBuffer(const HostArray1DR8 Array) { ExchList *MyList = &MyNeighbor->SendLists[MyElem]; @@ -506,13 +506,12 @@ int Halo::packBuffer(const ArrayHost1DR8 Array) { } return 0; -} // end packBuffer ArrayHost1DR8 +} // end packBuffer HostArray1DR8 -int Halo::packBuffer(const ArrayHost2DI4 Array) { +int Halo::packBuffer(const HostArray2DI4 Array) { - ExchList *MyList = &MyNeighbor->SendLists[MyElem]; - yakl::Dims MyDims = Array.get_dimensions(); - int NJ = MyDims[1]; + ExchList *MyList = &MyNeighbor->SendLists[MyElem]; + int NJ = Array.extent(0); MyNeighbor->SendBuffer.resize(MyList->NTot * TotSize); @@ -527,13 +526,12 @@ int Halo::packBuffer(const ArrayHost2DI4 Array) { } return 0; -} // end packBuffer ArrayHost2DI4 +} // end packBuffer HostArray2DI4 -int Halo::packBuffer(const ArrayHost2DI8 Array) { +int Halo::packBuffer(const HostArray2DI8 Array) { - ExchList *MyList = &MyNeighbor->SendLists[MyElem]; - yakl::Dims MyDims = Array.get_dimensions(); - int NJ = MyDims[1]; + ExchList *MyList = &MyNeighbor->SendLists[MyElem]; + int NJ = Array.extent(1); MyNeighbor->SendBuffer.resize(MyList->NTot * TotSize); @@ -548,13 +546,12 @@ int Halo::packBuffer(const ArrayHost2DI8 Array) { } return 0; -} // end packBuffer ArrayHost2DI8 +} // end packBuffer HostArray2DI8 -int Halo::packBuffer(const ArrayHost2DR4 Array) { +int Halo::packBuffer(const HostArray2DR4 Array) { - ExchList *MyList = &MyNeighbor->SendLists[MyElem]; - yakl::Dims MyDims = Array.get_dimensions(); - int NJ = MyDims[1]; + ExchList *MyList = &MyNeighbor->SendLists[MyElem]; + int NJ = Array.extent(1); MyNeighbor->SendBuffer.resize(MyList->NTot * TotSize); @@ -569,13 +566,12 @@ int Halo::packBuffer(const ArrayHost2DR4 Array) { } return 0; -} // end packBuffer ArrayHost2DR4 +} // end packBuffer HostArray2DR4 -int Halo::packBuffer(const ArrayHost2DR8 Array) { +int Halo::packBuffer(const HostArray2DR8 Array) { - ExchList *MyList = &MyNeighbor->SendLists[MyElem]; - yakl::Dims MyDims = Array.get_dimensions(); - int NJ = MyDims[1]; + ExchList *MyList = &MyNeighbor->SendLists[MyElem]; + int NJ = Array.extent(1); MyNeighbor->SendBuffer.resize(MyList->NTot * TotSize); @@ -590,14 +586,13 @@ int Halo::packBuffer(const ArrayHost2DR8 Array) { } return 0; -} // end packBuffer ArrayHost2DR8 +} // end packBuffer HostArray2DR8 -int Halo::packBuffer(const ArrayHost3DI4 Array) { +int Halo::packBuffer(const HostArray3DI4 Array) { - ExchList *MyList = &MyNeighbor->SendLists[MyElem]; - yakl::Dims MyDims = Array.get_dimensions(); - int NK = MyDims[0]; - int NJ = MyDims[2]; + ExchList *MyList = &MyNeighbor->SendLists[MyElem]; + int NK = Array.extent(0); + int NJ = Array.extent(2); MyNeighbor->SendBuffer.resize(MyList->NTot * TotSize); @@ -616,14 +611,13 @@ int Halo::packBuffer(const ArrayHost3DI4 Array) { } return 0; -} // end packBuffer ArrayHost3DI4 +} // end packBuffer HostArray3DI4 -int Halo::packBuffer(const ArrayHost3DI8 Array) { +int Halo::packBuffer(const HostArray3DI8 Array) { - ExchList *MyList = &MyNeighbor->SendLists[MyElem]; - yakl::Dims MyDims = Array.get_dimensions(); - int NK = MyDims[0]; - int NJ = MyDims[2]; + ExchList *MyList = &MyNeighbor->SendLists[MyElem]; + int NK = Array.extent(0); + int NJ = Array.extent(2); MyNeighbor->SendBuffer.resize(MyList->NTot * TotSize); @@ -642,14 +636,13 @@ int Halo::packBuffer(const ArrayHost3DI8 Array) { } return 0; -} // end packBuffer ArrayHost3DI8 +} // end packBuffer HostArray3DI8 -int Halo::packBuffer(const ArrayHost3DR4 Array) { +int Halo::packBuffer(const HostArray3DR4 Array) { - ExchList *MyList = &MyNeighbor->SendLists[MyElem]; - yakl::Dims MyDims = Array.get_dimensions(); - int NK = MyDims[0]; - int NJ = MyDims[2]; + ExchList *MyList = &MyNeighbor->SendLists[MyElem]; + int NK = Array.extent(0); + int NJ = Array.extent(2); MyNeighbor->SendBuffer.resize(MyList->NTot * TotSize); @@ -668,14 +661,13 @@ int Halo::packBuffer(const ArrayHost3DR4 Array) { } return 0; -} // end packBuffer ArrayHost3DR4 +} // end packBuffer HostArray3DR4 -int Halo::packBuffer(const ArrayHost3DR8 Array) { +int Halo::packBuffer(const HostArray3DR8 Array) { - ExchList *MyList = &MyNeighbor->SendLists[MyElem]; - yakl::Dims MyDims = Array.get_dimensions(); - int NK = MyDims[0]; - int NJ = MyDims[2]; + ExchList *MyList = &MyNeighbor->SendLists[MyElem]; + int NK = Array.extent(0); + int NJ = Array.extent(2); MyNeighbor->SendBuffer.resize(MyList->NTot * TotSize); @@ -694,15 +686,14 @@ int Halo::packBuffer(const ArrayHost3DR8 Array) { } return 0; -} // end packBuffer ArrayHost3DR8 +} // end packBuffer HostArray3DR8 -int Halo::packBuffer(const ArrayHost4DI4 Array) { +int Halo::packBuffer(const HostArray4DI4 Array) { - ExchList *MyList = &MyNeighbor->SendLists[MyElem]; - yakl::Dims MyDims = Array.get_dimensions(); - int NL = MyDims[0]; - int NK = MyDims[1]; - int NJ = MyDims[3]; + ExchList *MyList = &MyNeighbor->SendLists[MyElem]; + int NL = Array.extent(0); + int NK = Array.extent(1); + int NJ = Array.extent(3); MyNeighbor->SendBuffer.resize(MyList->NTot * TotSize); @@ -724,15 +715,14 @@ int Halo::packBuffer(const ArrayHost4DI4 Array) { } return 0; -} // end packBuffer ArrayHost4DI4 +} // end packBuffer HostArray4DI4 -int Halo::packBuffer(const ArrayHost4DI8 Array) { +int Halo::packBuffer(const HostArray4DI8 Array) { - ExchList *MyList = &MyNeighbor->SendLists[MyElem]; - yakl::Dims MyDims = Array.get_dimensions(); - int NL = MyDims[0]; - int NK = MyDims[1]; - int NJ = MyDims[3]; + ExchList *MyList = &MyNeighbor->SendLists[MyElem]; + int NL = Array.extent(0); + int NK = Array.extent(1); + int NJ = Array.extent(3); MyNeighbor->SendBuffer.resize(MyList->NTot * TotSize); @@ -754,15 +744,14 @@ int Halo::packBuffer(const ArrayHost4DI8 Array) { } return 0; -} // end packBuffer ArrayHost4DI8 +} // end packBuffer HostArray4DI8 -int Halo::packBuffer(const ArrayHost4DR4 Array) { +int Halo::packBuffer(const HostArray4DR4 Array) { - ExchList *MyList = &MyNeighbor->SendLists[MyElem]; - yakl::Dims MyDims = Array.get_dimensions(); - int NL = MyDims[0]; - int NK = MyDims[1]; - int NJ = MyDims[3]; + ExchList *MyList = &MyNeighbor->SendLists[MyElem]; + int NL = Array.extent(0); + int NK = Array.extent(1); + int NJ = Array.extent(3); MyNeighbor->SendBuffer.resize(MyList->NTot * TotSize); @@ -784,15 +773,14 @@ int Halo::packBuffer(const ArrayHost4DR4 Array) { } return 0; -} // end packBuffer ArrayHost4DR4 +} // end packBuffer HostArray4DR4 -int Halo::packBuffer(const ArrayHost4DR8 Array) { +int Halo::packBuffer(const HostArray4DR8 Array) { - ExchList *MyList = &MyNeighbor->SendLists[MyElem]; - yakl::Dims MyDims = Array.get_dimensions(); - int NL = MyDims[0]; - int NK = MyDims[1]; - int NJ = MyDims[3]; + ExchList *MyList = &MyNeighbor->SendLists[MyElem]; + int NL = Array.extent(0); + int NK = Array.extent(1); + int NJ = Array.extent(3); MyNeighbor->SendBuffer.resize(MyList->NTot * TotSize); @@ -814,16 +802,15 @@ int Halo::packBuffer(const ArrayHost4DR8 Array) { } return 0; -} // end packBuffer ArrayHost4DR8 +} // end packBuffer HostArray4DR8 -int Halo::packBuffer(const ArrayHost5DI4 Array) { +int Halo::packBuffer(const HostArray5DI4 Array) { - ExchList *MyList = &MyNeighbor->SendLists[MyElem]; - yakl::Dims MyDims = Array.get_dimensions(); - int NM = MyDims[0]; - int NL = MyDims[1]; - int NK = MyDims[2]; - int NJ = MyDims[4]; + ExchList *MyList = &MyNeighbor->SendLists[MyElem]; + int NM = Array.extent(0); + int NL = Array.extent(1); + int NK = Array.extent(2); + int NJ = Array.extent(4); MyNeighbor->SendBuffer.resize(MyList->NTot * TotSize); @@ -847,16 +834,15 @@ int Halo::packBuffer(const ArrayHost5DI4 Array) { } return 0; -} // end packBuffer ArrayHost5DI4 +} // end packBuffer HostArray5DI4 -int Halo::packBuffer(const ArrayHost5DI8 Array) { +int Halo::packBuffer(const HostArray5DI8 Array) { - ExchList *MyList = &MyNeighbor->SendLists[MyElem]; - yakl::Dims MyDims = Array.get_dimensions(); - int NM = MyDims[0]; - int NL = MyDims[1]; - int NK = MyDims[2]; - int NJ = MyDims[4]; + ExchList *MyList = &MyNeighbor->SendLists[MyElem]; + int NM = Array.extent(0); + int NL = Array.extent(1); + int NK = Array.extent(2); + int NJ = Array.extent(4); MyNeighbor->SendBuffer.resize(MyList->NTot * TotSize); @@ -880,16 +866,15 @@ int Halo::packBuffer(const ArrayHost5DI8 Array) { } return 0; -} // end packBuffer ArrayHost5DI8 +} // end packBuffer HostArray5DI8 -int Halo::packBuffer(const ArrayHost5DR4 Array) { +int Halo::packBuffer(const HostArray5DR4 Array) { - ExchList *MyList = &MyNeighbor->SendLists[MyElem]; - yakl::Dims MyDims = Array.get_dimensions(); - int NM = MyDims[0]; - int NL = MyDims[1]; - int NK = MyDims[2]; - int NJ = MyDims[4]; + ExchList *MyList = &MyNeighbor->SendLists[MyElem]; + int NM = Array.extent(0); + int NL = Array.extent(1); + int NK = Array.extent(2); + int NJ = Array.extent(4); MyNeighbor->SendBuffer.resize(MyList->NTot * TotSize); @@ -913,16 +898,15 @@ int Halo::packBuffer(const ArrayHost5DR4 Array) { } return 0; -} // end packBuffer ArrayHost5DR4 +} // end packBuffer HostArray5DR4 -int Halo::packBuffer(const ArrayHost5DR8 Array) { +int Halo::packBuffer(const HostArray5DR8 Array) { - ExchList *MyList = &MyNeighbor->SendLists[MyElem]; - yakl::Dims MyDims = Array.get_dimensions(); - int NM = MyDims[0]; - int NL = MyDims[1]; - int NK = MyDims[2]; - int NJ = MyDims[4]; + ExchList *MyList = &MyNeighbor->SendLists[MyElem]; + int NM = Array.extent(0); + int NL = Array.extent(1); + int NK = Array.extent(2); + int NJ = Array.extent(4); MyNeighbor->SendBuffer.resize(MyList->NTot * TotSize); @@ -946,7 +930,7 @@ int Halo::packBuffer(const ArrayHost5DR8 Array) { } return 0; -} // end packBuffer ArrayHost5DR8 +} // end packBuffer HostArray5DR8 //------------------------------------------------------------------------------ // The unpackBuffer function is overloaded to all supported data types. After @@ -959,7 +943,7 @@ int Halo::packBuffer(const ArrayHost5DR8 Array) { // integer type (I4 or I8) using reinterpret_cast, and then saved in the // input Array. -int Halo::unpackBuffer(ArrayHost1DI4 &Array) { +int Halo::unpackBuffer(HostArray1DI4 &Array) { ExchList *MyList = &MyNeighbor->RecvLists[MyElem]; @@ -972,9 +956,9 @@ int Halo::unpackBuffer(ArrayHost1DI4 &Array) { } return 0; -} // end unpackBuffer ArrayHost1DI4 +} // end unpackBuffer HostArray1DI4 -int Halo::unpackBuffer(ArrayHost1DI8 &Array) { +int Halo::unpackBuffer(HostArray1DI8 &Array) { ExchList *MyList = &MyNeighbor->RecvLists[MyElem]; @@ -987,9 +971,9 @@ int Halo::unpackBuffer(ArrayHost1DI8 &Array) { } return 0; -} // end unpackBuffer ArrayHost1DI8 +} // end unpackBuffer HostArray1DI8 -int Halo::unpackBuffer(ArrayHost1DR4 &Array) { +int Halo::unpackBuffer(HostArray1DR4 &Array) { ExchList *MyList = &MyNeighbor->RecvLists[MyElem]; @@ -1001,9 +985,9 @@ int Halo::unpackBuffer(ArrayHost1DR4 &Array) { } return 0; -} // end unpackBuffer ArrayHost1DR4 +} // end unpackBuffer HostArray1DR4 -int Halo::unpackBuffer(ArrayHost1DR8 &Array) { +int Halo::unpackBuffer(HostArray1DR8 &Array) { ExchList *MyList = &MyNeighbor->RecvLists[MyElem]; @@ -1015,13 +999,12 @@ int Halo::unpackBuffer(ArrayHost1DR8 &Array) { } return 0; -} // end unpackBuffer ArrayHost1DR8 +} // end unpackBuffer HostArray1DR8 -int Halo::unpackBuffer(ArrayHost2DI4 &Array) { +int Halo::unpackBuffer(HostArray2DI4 &Array) { - ExchList *MyList = &MyNeighbor->RecvLists[MyElem]; - yakl::Dims MyDims = Array.get_dimensions(); - int NJ = MyDims[1]; + ExchList *MyList = &MyNeighbor->RecvLists[MyElem]; + int NJ = Array.extent(1); for (int ILayer = 0; ILayer < NumLayers; ++ILayer) { for (int IExch = 0; IExch < MyList->NList[ILayer]; ++IExch) { @@ -1034,13 +1017,12 @@ int Halo::unpackBuffer(ArrayHost2DI4 &Array) { } return 0; -} // end unpackBuffer ArrayHost2DI4 +} // end unpackBuffer HostArray2DI4 -int Halo::unpackBuffer(ArrayHost2DI8 &Array) { +int Halo::unpackBuffer(HostArray2DI8 &Array) { - ExchList *MyList = &MyNeighbor->RecvLists[MyElem]; - yakl::Dims MyDims = Array.get_dimensions(); - int NJ = MyDims[1]; + ExchList *MyList = &MyNeighbor->RecvLists[MyElem]; + int NJ = Array.extent(1); for (int ILayer = 0; ILayer < NumLayers; ++ILayer) { for (int IExch = 0; IExch < MyList->NList[ILayer]; ++IExch) { @@ -1053,13 +1035,12 @@ int Halo::unpackBuffer(ArrayHost2DI8 &Array) { } return 0; -} // end unpackBuffer ArrayHost2DI8 +} // end unpackBuffer HostArray2DI8 -int Halo::unpackBuffer(ArrayHost2DR4 &Array) { +int Halo::unpackBuffer(HostArray2DR4 &Array) { - ExchList *MyList = &MyNeighbor->RecvLists[MyElem]; - yakl::Dims MyDims = Array.get_dimensions(); - int NJ = MyDims[1]; + ExchList *MyList = &MyNeighbor->RecvLists[MyElem]; + int NJ = Array.extent(1); for (int ILayer = 0; ILayer < NumLayers; ++ILayer) { for (int IExch = 0; IExch < MyList->NList[ILayer]; ++IExch) { @@ -1072,13 +1053,12 @@ int Halo::unpackBuffer(ArrayHost2DR4 &Array) { } return 0; -} // end unpackBuffer ArrayHost2DR4 +} // end unpackBuffer HostArray2DR4 -int Halo::unpackBuffer(ArrayHost2DR8 &Array) { +int Halo::unpackBuffer(HostArray2DR8 &Array) { - ExchList *MyList = &MyNeighbor->RecvLists[MyElem]; - yakl::Dims MyDims = Array.get_dimensions(); - int NJ = MyDims[1]; + ExchList *MyList = &MyNeighbor->RecvLists[MyElem]; + int NJ = Array.extent(1); for (int ILayer = 0; ILayer < NumLayers; ++ILayer) { for (int IExch = 0; IExch < MyList->NList[ILayer]; ++IExch) { @@ -1091,14 +1071,13 @@ int Halo::unpackBuffer(ArrayHost2DR8 &Array) { } return 0; -} // end unpackBuffer ArrayHost2DR8 +} // end unpackBuffer HostArray2DR8 -int Halo::unpackBuffer(ArrayHost3DI4 &Array) { +int Halo::unpackBuffer(HostArray3DI4 &Array) { - ExchList *MyList = &MyNeighbor->RecvLists[MyElem]; - yakl::Dims MyDims = Array.get_dimensions(); - int NK = MyDims[0]; - int NJ = MyDims[2]; + ExchList *MyList = &MyNeighbor->RecvLists[MyElem]; + int NK = Array.extent(0); + int NJ = Array.extent(2); for (int K = 0; K < NK; ++K) { for (int ILayer = 0; ILayer < NumLayers; ++ILayer) { @@ -1115,14 +1094,13 @@ int Halo::unpackBuffer(ArrayHost3DI4 &Array) { } return 0; -} // end unpackBuffer ArrayHost3DI4 +} // end unpackBuffer HostArray3DI4 -int Halo::unpackBuffer(ArrayHost3DI8 &Array) { +int Halo::unpackBuffer(HostArray3DI8 &Array) { - ExchList *MyList = &MyNeighbor->RecvLists[MyElem]; - yakl::Dims MyDims = Array.get_dimensions(); - int NK = MyDims[0]; - int NJ = MyDims[2]; + ExchList *MyList = &MyNeighbor->RecvLists[MyElem]; + int NK = Array.extent(0); + int NJ = Array.extent(2); for (int K = 0; K < NK; ++K) { for (int ILayer = 0; ILayer < NumLayers; ++ILayer) { @@ -1139,14 +1117,13 @@ int Halo::unpackBuffer(ArrayHost3DI8 &Array) { } return 0; -} // end unpackBuffer ArrayHost3DI8 +} // end unpackBuffer HostArray3DI8 -int Halo::unpackBuffer(ArrayHost3DR4 &Array) { +int Halo::unpackBuffer(HostArray3DR4 &Array) { - ExchList *MyList = &MyNeighbor->RecvLists[MyElem]; - yakl::Dims MyDims = Array.get_dimensions(); - int NK = MyDims[0]; - int NJ = MyDims[2]; + ExchList *MyList = &MyNeighbor->RecvLists[MyElem]; + int NK = Array.extent(0); + int NJ = Array.extent(2); for (int K = 0; K < NK; ++K) { for (int ILayer = 0; ILayer < NumLayers; ++ILayer) { @@ -1163,14 +1140,13 @@ int Halo::unpackBuffer(ArrayHost3DR4 &Array) { } return 0; -} // end unpackBuffer ArrayHost3DR4 +} // end unpackBuffer HostArray3DR4 -int Halo::unpackBuffer(ArrayHost3DR8 &Array) { +int Halo::unpackBuffer(HostArray3DR8 &Array) { - ExchList *MyList = &MyNeighbor->RecvLists[MyElem]; - yakl::Dims MyDims = Array.get_dimensions(); - int NK = MyDims[0]; - int NJ = MyDims[2]; + ExchList *MyList = &MyNeighbor->RecvLists[MyElem]; + int NK = Array.extent(0); + int NJ = Array.extent(2); for (int K = 0; K < NK; ++K) { for (int ILayer = 0; ILayer < NumLayers; ++ILayer) { @@ -1187,15 +1163,14 @@ int Halo::unpackBuffer(ArrayHost3DR8 &Array) { } return 0; -} // end unpackBuffer ArrayHost3DR8 +} // end unpackBuffer HostArray3DR8 -int Halo::unpackBuffer(ArrayHost4DI4 &Array) { +int Halo::unpackBuffer(HostArray4DI4 &Array) { - ExchList *MyList = &MyNeighbor->RecvLists[MyElem]; - yakl::Dims MyDims = Array.get_dimensions(); - int NL = MyDims[0]; - int NK = MyDims[1]; - int NJ = MyDims[3]; + ExchList *MyList = &MyNeighbor->RecvLists[MyElem]; + int NL = Array.extent(0); + int NK = Array.extent(1); + int NJ = Array.extent(3); for (int L = 0; L < NL; ++L) { for (int K = 0; K < NK; ++K) { @@ -1215,15 +1190,14 @@ int Halo::unpackBuffer(ArrayHost4DI4 &Array) { } return 0; -} // end unpackBuffer ArrayHost4DI4 +} // end unpackBuffer HostArray4DI4 -int Halo::unpackBuffer(ArrayHost4DI8 &Array) { +int Halo::unpackBuffer(HostArray4DI8 &Array) { - ExchList *MyList = &MyNeighbor->RecvLists[MyElem]; - yakl::Dims MyDims = Array.get_dimensions(); - int NL = MyDims[0]; - int NK = MyDims[1]; - int NJ = MyDims[3]; + ExchList *MyList = &MyNeighbor->RecvLists[MyElem]; + int NL = Array.extent(0); + int NK = Array.extent(1); + int NJ = Array.extent(3); for (int L = 0; L < NL; ++L) { for (int K = 0; K < NK; ++K) { @@ -1243,15 +1217,14 @@ int Halo::unpackBuffer(ArrayHost4DI8 &Array) { } return 0; -} // end unpackBuffer ArrayHost4DI8 +} // end unpackBuffer HostArray4DI8 -int Halo::unpackBuffer(ArrayHost4DR4 &Array) { +int Halo::unpackBuffer(HostArray4DR4 &Array) { - ExchList *MyList = &MyNeighbor->RecvLists[MyElem]; - yakl::Dims MyDims = Array.get_dimensions(); - int NL = MyDims[0]; - int NK = MyDims[1]; - int NJ = MyDims[3]; + ExchList *MyList = &MyNeighbor->RecvLists[MyElem]; + int NL = Array.extent(0); + int NK = Array.extent(1); + int NJ = Array.extent(3); for (int L = 0; L < NL; ++L) { for (int K = 0; K < NK; ++K) { @@ -1271,15 +1244,14 @@ int Halo::unpackBuffer(ArrayHost4DR4 &Array) { } return 0; -} // end unpackBuffer ArrayHost4DR4 +} // end unpackBuffer HostArray4DR4 -int Halo::unpackBuffer(ArrayHost4DR8 &Array) { +int Halo::unpackBuffer(HostArray4DR8 &Array) { - ExchList *MyList = &MyNeighbor->RecvLists[MyElem]; - yakl::Dims MyDims = Array.get_dimensions(); - int NL = MyDims[0]; - int NK = MyDims[1]; - int NJ = MyDims[3]; + ExchList *MyList = &MyNeighbor->RecvLists[MyElem]; + int NL = Array.extent(0); + int NK = Array.extent(1); + int NJ = Array.extent(3); for (int L = 0; L < NL; ++L) { for (int K = 0; K < NK; ++K) { @@ -1299,16 +1271,15 @@ int Halo::unpackBuffer(ArrayHost4DR8 &Array) { } return 0; -} // end unpackBuffer ArrayHost4DR8 +} // end unpackBuffer HostArray4DR8 -int Halo::unpackBuffer(ArrayHost5DI4 &Array) { +int Halo::unpackBuffer(HostArray5DI4 &Array) { - ExchList *MyList = &MyNeighbor->RecvLists[MyElem]; - yakl::Dims MyDims = Array.get_dimensions(); - int NM = MyDims[0]; - int NL = MyDims[1]; - int NK = MyDims[2]; - int NJ = MyDims[4]; + ExchList *MyList = &MyNeighbor->RecvLists[MyElem]; + int NM = Array.extent(0); + int NL = Array.extent(1); + int NK = Array.extent(2); + int NJ = Array.extent(4); for (int M = 0; M < NM; ++M) { for (int L = 0; L < NL; ++L) { @@ -1330,16 +1301,15 @@ int Halo::unpackBuffer(ArrayHost5DI4 &Array) { } return 0; -} // end unpackBuffer ArrayHost5DI4 +} // end unpackBuffer HostArray5DI4 -int Halo::unpackBuffer(ArrayHost5DI8 &Array) { +int Halo::unpackBuffer(HostArray5DI8 &Array) { - ExchList *MyList = &MyNeighbor->RecvLists[MyElem]; - yakl::Dims MyDims = Array.get_dimensions(); - int NM = MyDims[0]; - int NL = MyDims[1]; - int NK = MyDims[2]; - int NJ = MyDims[4]; + ExchList *MyList = &MyNeighbor->RecvLists[MyElem]; + int NM = Array.extent(0); + int NL = Array.extent(1); + int NK = Array.extent(2); + int NJ = Array.extent(4); for (int M = 0; M < NM; ++M) { for (int L = 0; L < NL; ++L) { @@ -1361,16 +1331,15 @@ int Halo::unpackBuffer(ArrayHost5DI8 &Array) { } return 0; -} // end unpackBuffer ArrayHost5DI8 +} // end unpackBuffer HostArray5DI8 -int Halo::unpackBuffer(ArrayHost5DR4 &Array) { +int Halo::unpackBuffer(HostArray5DR4 &Array) { - ExchList *MyList = &MyNeighbor->RecvLists[MyElem]; - yakl::Dims MyDims = Array.get_dimensions(); - int NM = MyDims[0]; - int NL = MyDims[1]; - int NK = MyDims[2]; - int NJ = MyDims[4]; + ExchList *MyList = &MyNeighbor->RecvLists[MyElem]; + int NM = Array.extent(0); + int NL = Array.extent(1); + int NK = Array.extent(2); + int NJ = Array.extent(4); for (int M = 0; M < NM; ++M) { for (int L = 0; L < NL; ++L) { @@ -1392,16 +1361,15 @@ int Halo::unpackBuffer(ArrayHost5DR4 &Array) { } return 0; -} // end unpackBuffer ArrayHost5DR4 +} // end unpackBuffer HostArray5DR4 -int Halo::unpackBuffer(ArrayHost5DR8 &Array) { +int Halo::unpackBuffer(HostArray5DR8 &Array) { - ExchList *MyList = &MyNeighbor->RecvLists[MyElem]; - yakl::Dims MyDims = Array.get_dimensions(); - int NM = MyDims[0]; - int NL = MyDims[1]; - int NK = MyDims[2]; - int NJ = MyDims[4]; + ExchList *MyList = &MyNeighbor->RecvLists[MyElem]; + int NM = Array.extent(0); + int NL = Array.extent(1); + int NK = Array.extent(2); + int NJ = Array.extent(4); for (int M = 0; M < NM; ++M) { for (int L = 0; L < NL; ++L) { @@ -1423,7 +1391,7 @@ int Halo::unpackBuffer(ArrayHost5DR8 &Array) { } return 0; -} // end unpackBuffer ArrayHost5DR8 +} // end unpackBuffer HostArray5DR8 } // end namespace OMEGA diff --git a/components/omega/src/base/Halo.h b/components/omega/src/base/Halo.h index da6d84a5dc8a..a2bf1068d839 100644 --- a/components/omega/src/base/Halo.h +++ b/components/omega/src/base/Halo.h @@ -170,54 +170,54 @@ class Halo { /// the neighboring tasks int startSends(); - /// Buffer pack functions overloaded to each supported YAKL array type. + /// Buffer pack functions overloaded to each supported Kokkos array type. /// Select out the proper elements from the input Array to send to a /// neighboring task and pack them into SendBuffer for that Neighbor - int packBuffer(const ArrayHost1DI4 Array); - int packBuffer(const ArrayHost1DI8 Array); - int packBuffer(const ArrayHost1DR4 Array); - int packBuffer(const ArrayHost1DR8 Array); - int packBuffer(const ArrayHost2DI4 Array); - int packBuffer(const ArrayHost2DI8 Array); - int packBuffer(const ArrayHost2DR4 Array); - int packBuffer(const ArrayHost2DR8 Array); - int packBuffer(const ArrayHost3DI4 Array); - int packBuffer(const ArrayHost3DI8 Array); - int packBuffer(const ArrayHost3DR4 Array); - int packBuffer(const ArrayHost3DR8 Array); - int packBuffer(const ArrayHost4DI4 Array); - int packBuffer(const ArrayHost4DI8 Array); - int packBuffer(const ArrayHost4DR4 Array); - int packBuffer(const ArrayHost4DR8 Array); - int packBuffer(const ArrayHost5DI4 Array); - int packBuffer(const ArrayHost5DI8 Array); - int packBuffer(const ArrayHost5DR4 Array); - int packBuffer(const ArrayHost5DR8 Array); - - /// Buffer unpack functions overloaded to each supported YAKL array type. + int packBuffer(const HostArray1DI4 Array); + int packBuffer(const HostArray1DI8 Array); + int packBuffer(const HostArray1DR4 Array); + int packBuffer(const HostArray1DR8 Array); + int packBuffer(const HostArray2DI4 Array); + int packBuffer(const HostArray2DI8 Array); + int packBuffer(const HostArray2DR4 Array); + int packBuffer(const HostArray2DR8 Array); + int packBuffer(const HostArray3DI4 Array); + int packBuffer(const HostArray3DI8 Array); + int packBuffer(const HostArray3DR4 Array); + int packBuffer(const HostArray3DR8 Array); + int packBuffer(const HostArray4DI4 Array); + int packBuffer(const HostArray4DI8 Array); + int packBuffer(const HostArray4DR4 Array); + int packBuffer(const HostArray4DR8 Array); + int packBuffer(const HostArray5DI4 Array); + int packBuffer(const HostArray5DI8 Array); + int packBuffer(const HostArray5DR4 Array); + int packBuffer(const HostArray5DR8 Array); + + /// Buffer unpack functions overloaded to each supported Kokkos array type. /// After receiving a message from a neighboring task, save the elements /// of RecvBuffer for that Neighbor into the corresponding halo elements /// of the input Array - int unpackBuffer(ArrayHost1DI4 &Array); - int unpackBuffer(ArrayHost1DI8 &Array); - int unpackBuffer(ArrayHost1DR4 &Array); - int unpackBuffer(ArrayHost1DR8 &Array); - int unpackBuffer(ArrayHost2DI4 &Array); - int unpackBuffer(ArrayHost2DI8 &Array); - int unpackBuffer(ArrayHost2DR4 &Array); - int unpackBuffer(ArrayHost2DR8 &Array); - int unpackBuffer(ArrayHost3DI4 &Array); - int unpackBuffer(ArrayHost3DI8 &Array); - int unpackBuffer(ArrayHost3DR4 &Array); - int unpackBuffer(ArrayHost3DR8 &Array); - int unpackBuffer(ArrayHost4DI4 &Array); - int unpackBuffer(ArrayHost4DI8 &Array); - int unpackBuffer(ArrayHost4DR4 &Array); - int unpackBuffer(ArrayHost4DR8 &Array); - int unpackBuffer(ArrayHost5DI4 &Array); - int unpackBuffer(ArrayHost5DI8 &Array); - int unpackBuffer(ArrayHost5DR4 &Array); - int unpackBuffer(ArrayHost5DR8 &Array); + int unpackBuffer(HostArray1DI4 &Array); + int unpackBuffer(HostArray1DI8 &Array); + int unpackBuffer(HostArray1DR4 &Array); + int unpackBuffer(HostArray1DR8 &Array); + int unpackBuffer(HostArray2DI4 &Array); + int unpackBuffer(HostArray2DI8 &Array); + int unpackBuffer(HostArray2DR4 &Array); + int unpackBuffer(HostArray2DR8 &Array); + int unpackBuffer(HostArray3DI4 &Array); + int unpackBuffer(HostArray3DI8 &Array); + int unpackBuffer(HostArray3DR4 &Array); + int unpackBuffer(HostArray3DR8 &Array); + int unpackBuffer(HostArray4DI4 &Array); + int unpackBuffer(HostArray4DI8 &Array); + int unpackBuffer(HostArray4DR4 &Array); + int unpackBuffer(HostArray4DR8 &Array); + int unpackBuffer(HostArray5DI4 &Array); + int unpackBuffer(HostArray5DI8 &Array); + int unpackBuffer(HostArray5DR4 &Array); + int unpackBuffer(HostArray5DR8 &Array); public: // Methods @@ -226,11 +226,11 @@ class Halo { Halo(const MachEnv *InEnv, const Decomp *InDecomp); //--------------------------------------------------------------------------- - // Function template to perform a full halo exchange on the input YAKL array + // Function template to perform a full halo exchange on the input Kokkos array // of any supported type defined on the input index space ThisElem template int - exchangeFullArrayHalo(T &Array, // YAKL array of any type + exchangeFullArrayHalo(T &Array, // Kokkos array of any type MeshElement ThisElem // index space Array is defined on ) { @@ -252,18 +252,17 @@ class Halo { // Determine the number of array elements per cell, edge, or vertex // in the input array - yakl::Dims MyDims = Array.get_dimensions(); - I4 NDims = MyDims.size(); + I4 NDims = Array.Rank; if (NDims == 1) { TotSize = 1; } else if (NDims == 2) { - TotSize = MyDims[1]; + TotSize = Array.extent(1); } else { TotSize = 1; for (int I = 0; I < NDims - 2; ++I) { - TotSize *= MyDims[I]; + TotSize *= Array.extent(I); } - TotSize *= MyDims[NDims - 1]; + TotSize *= Array.extent(NDims - 1); } // Allocate the receive buffers and Call MPI_Irecv for each Neighbor diff --git a/components/omega/src/infra/Config.cpp b/components/omega/src/infra/Config.cpp index f52430745715..a868eaf35c40 100644 --- a/components/omega/src/infra/Config.cpp +++ b/components/omega/src/infra/Config.cpp @@ -17,6 +17,7 @@ #include "mpi.h" #include "yaml-cpp/yaml.h" +#include #include #include diff --git a/components/omega/src/infra/IOField.h b/components/omega/src/infra/IOField.h index fb1f9b741faf..772b72163bc9 100644 --- a/components/omega/src/infra/IOField.h +++ b/components/omega/src/infra/IOField.h @@ -89,7 +89,7 @@ class IOField { //--------------------------------------------------------------------------- /// Retrieves IOField data array given the field name. Because all data - /// arrays in OMEGA are Kokkos/YAKL arrays, this is a shallow copy of the + /// arrays in OMEGA are Kokkos arrays, this is a shallow copy of the /// attached data array. This is a templated function on the supported /// OMEGA array types so a template argument with the proper type must /// also be supplied. diff --git a/components/omega/src/infra/LogFormatters.h b/components/omega/src/infra/LogFormatters.h index 90aa94cc64aa..d1eed707388a 100644 --- a/components/omega/src/infra/LogFormatters.h +++ b/components/omega/src/infra/LogFormatters.h @@ -12,28 +12,45 @@ #include "DataTypes.h" #include +// TODO: +// 1. Use template to create formatter for various array types +// 2. Consider using some of the following for formatting +// View.rank() +// View.rank_dynamic() +// View.stride_(0, 1,2,3...)() +// View.span() +// View.size() +// View.span_is_contiguous() +// View.use_count() +// View.label() +// View.is_allocated() +// ExecSpace.name() +// ExecSpace.print_configuration(ostr); +// ExecSpace.print_configuration(ostr, detail); +// MemSpace.name() + template <> -struct fmt::formatter : fmt::formatter { - auto format(OMEGA::ArrayHost1DReal my, format_context &ctx) +struct fmt::formatter : fmt::formatter { + auto format(OMEGA::HostArray1DReal my, format_context &ctx) -> decltype(ctx.out()) { #ifdef OMEGA_DEBUG return fmt::format_to( - ctx.out(), "[data type of '{}' is ArrayHost1DReal.]", my.label()); + ctx.out(), "[data type of '{}' is HostArray1DReal.]", my.label()); #else - return fmt::format_to(ctx.out(), "[data type of '' is ArrayHost1DReal.]"); + return fmt::format_to(ctx.out(), "[data type of '' is HostArray1DReal.]"); #endif } }; template <> -struct fmt::formatter : fmt::formatter { - auto format(OMEGA::ArrayHost2DReal my, format_context &ctx) +struct fmt::formatter : fmt::formatter { + auto format(OMEGA::HostArray2DReal my, format_context &ctx) -> decltype(ctx.out()) { #ifdef OMEGA_DEBUG return fmt::format_to( - ctx.out(), "[data type of '{}' is ArrayHost2DReal.]", my.label()); + ctx.out(), "[data type of '{}' is HostArray2DReal.]", my.label()); #else - return fmt::format_to(ctx.out(), "[data type of '' is ArrayHost2DReal.]"); + return fmt::format_to(ctx.out(), "[data type of '' is HostArray2DReal.]"); #endif } }; diff --git a/components/omega/src/infra/Logging.cpp b/components/omega/src/infra/Logging.cpp index 745296c020af..f135664eb4f6 100644 --- a/components/omega/src/infra/Logging.cpp +++ b/components/omega/src/infra/Logging.cpp @@ -8,6 +8,7 @@ //===----------------------------------------------------------------------===// #include "Logging.h" +#include #include namespace OMEGA { diff --git a/components/omega/src/infra/OmegaKokkos.h b/components/omega/src/infra/OmegaKokkos.h new file mode 100644 index 000000000000..2e23915b829f --- /dev/null +++ b/components/omega/src/infra/OmegaKokkos.h @@ -0,0 +1,122 @@ +#ifndef OMEGA_KOKKOS_H +#define OMEGA_KOKKOS_H +//===-- base/OmegaKokkos.h - Omega extension of Kokkos ------*- C++ -*-===// +// +/// \file +/// \brief Extends Kokkos for Omega +/// +/// This header extends Kokkos for Omega. +// +//===----------------------------------------------------------------------===// + +#include "DataTypes.h" + +namespace OMEGA { + +#define OMEGA_SCOPE(a, b) auto &a = b + +using ExecSpace = MemSpace::execution_space; +using HostExecSpace = HostMemSpace::execution_space; + +#ifdef OMEGA_TARGET_DEVICE + +template +auto createHostCopy(const V &view) + -> Kokkos::View { + return Kokkos::create_mirror_view_and_copy(HostExecSpace(), view); +} + +template +auto createDeviceCopy(const V &view) + -> Kokkos::View { + return Kokkos::create_mirror_view_and_copy(ExecSpace(), view); +} + +#else + +template V createHostCopy(const V &view) { return view; } + +template V createDeviceCopy(const V &view) { return view; } + +#endif + +// function alias to follow Camel Naming Convention +template void deepCopy(D &dst, const S &src) { + Kokkos::deep_copy(dst, src); +} + +template +void deepCopy(E &space, D &dst, const S &src) { + Kokkos::deep_copy(space, dst, src); +} + +#if OMEGA_LAYOUT_RIGHT + +template +using Bounds = Kokkos::MDRangePolicy< + ExecSpace, Kokkos::Rank, + Args...>; + +#elif OMEGA_LAYOUT_LEFT + +template +using Bounds = Kokkos::MDRangePolicy< + ExecSpace, Kokkos::Rank, + Args...>; + +#else + +#error "OMEGA Memory Layout is not defined." + +#endif + +// parallelFor: with label +template +inline void parallelFor(const std::string &label, const int (&upper_bounds)[N], + const F &f, + const int (&tile)[N] = DefaultTile::value) { + if constexpr (N == 1) { + const auto policy = Kokkos::RangePolicy(0, upper_bounds[0]); + Kokkos::parallel_for(label, policy, f); + + } else { + const int lower_bounds[N] = {0}; + const auto policy = Bounds(lower_bounds, upper_bounds, tile); + Kokkos::parallel_for(label, policy, f); + } +} + +// parallelFor: without label +template +inline void parallelFor(const int (&upper_bounds)[N], const F &f, + const int (&tile)[N] = DefaultTile::value) { + parallelFor("", upper_bounds, f, tile); +} + +// parallelReduce: with label +template +inline void parallelReduce(const std::string &label, + const int (&upper_bounds)[N], const F &f, R &reducer, + const int (&tile)[N] = DefaultTile::value) { + if constexpr (N == 1) { + const auto policy = Kokkos::RangePolicy(0, upper_bounds[0]); + Kokkos::parallel_reduce(label, policy, f, reducer); + + } else { + const int lower_bounds[N] = {0}; + const auto policy = Bounds(lower_bounds, upper_bounds, tile); + Kokkos::parallel_reduce(label, policy, f, reducer); + } +} + +// parallelReduce: without label +template +inline void parallelReduce(const int (&upper_bounds)[N], const F &f, R &reducer, + const int (&tile)[N] = DefaultTile::value) { + parallelReduce("", upper_bounds, f, tile, reducer); +} + +} // end namespace OMEGA + +//===----------------------------------------------------------------------===// +#endif diff --git a/components/omega/src/ocn/HorzMesh.cpp b/components/omega/src/ocn/HorzMesh.cpp index 1f6a354725c0..af4012739417 100644 --- a/components/omega/src/ocn/HorzMesh.cpp +++ b/components/omega/src/ocn/HorzMesh.cpp @@ -15,6 +15,7 @@ #include "IO.h" #include "Logging.h" #include "MachEnv.h" +#include "OmegaKokkos.h" namespace OMEGA { @@ -37,7 +38,6 @@ int HorzMesh::init() { // Retrieve this mesh and set pointer to DefaultHorzMesh HorzMesh::DefaultHorzMesh = HorzMesh::get("Default"); - return Err; } @@ -135,7 +135,7 @@ HorzMesh::HorzMesh(const std::string &Name, //< [in] Name for new mesh // Destroys a local mesh and deallocates all arrays HorzMesh::~HorzMesh() { - // No operations needed, YAKL arrays removed when no longer in scope + // No operations needed, Kokkos arrays removed when no longer in scope } // end deconstructor @@ -279,35 +279,35 @@ void HorzMesh::readCoordinates() { // Read mesh cell coordinates int XCellID; - XCellH = ArrayHost1DR8("XCell", NCellsSize); + XCellH = HostArray1DR8("XCell", NCellsSize); Err = IO::readArray(XCellH.data(), NCellsAll, "xCell", MeshFileID, CellDecompR8, XCellID); if (Err != 0) LOG_CRITICAL("HorzMesh: error reading xCell"); int YCellID; - YCellH = ArrayHost1DR8("YCell", NCellsSize); + YCellH = HostArray1DR8("YCell", NCellsSize); Err = IO::readArray(YCellH.data(), NCellsAll, "yCell", MeshFileID, CellDecompR8, YCellID); if (Err != 0) LOG_CRITICAL("HorzMesh: error reading yCell"); int ZCellID; - ZCellH = ArrayHost1DR8("ZCell", NCellsSize); + ZCellH = HostArray1DR8("ZCell", NCellsSize); Err = IO::readArray(ZCellH.data(), NCellsAll, "zCell", MeshFileID, CellDecompR8, ZCellID); if (Err != 0) LOG_CRITICAL("HorzMesh: error reading zCell"); int LonCellID; - LonCellH = ArrayHost1DR8("LonCell", NCellsSize); + LonCellH = HostArray1DR8("LonCell", NCellsSize); Err = IO::readArray(LonCellH.data(), NCellsAll, "lonCell", MeshFileID, CellDecompR8, LonCellID); if (Err != 0) LOG_CRITICAL("HorzMesh: error reading lonCell"); int LatCellID; - LatCellH = ArrayHost1DR8("LatCell", NCellsSize); + LatCellH = HostArray1DR8("LatCell", NCellsSize); Err = IO::readArray(LatCellH.data(), NCellsAll, "latCell", MeshFileID, CellDecompR8, LatCellID); if (Err != 0) @@ -315,35 +315,35 @@ void HorzMesh::readCoordinates() { // Read mesh edge coordinateID int XEdgeID; - XEdgeH = ArrayHost1DR8("XEdge", NEdgesSize); + XEdgeH = HostArray1DR8("XEdge", NEdgesSize); Err = IO::readArray(XEdgeH.data(), NEdgesAll, "xEdge", MeshFileID, EdgeDecompR8, XEdgeID); if (Err != 0) LOG_CRITICAL("HorzMesh: error reading xEdge"); int YEdgeID; - YEdgeH = ArrayHost1DR8("YEdge", NEdgesSize); + YEdgeH = HostArray1DR8("YEdge", NEdgesSize); Err = IO::readArray(YEdgeH.data(), NEdgesAll, "yEdge", MeshFileID, EdgeDecompR8, YEdgeID); if (Err != 0) LOG_CRITICAL("HorzMesh: error reading yEdge"); int ZEdgeID; - ZEdgeH = ArrayHost1DR8("ZEdge", NEdgesSize); + ZEdgeH = HostArray1DR8("ZEdge", NEdgesSize); Err = IO::readArray(ZEdgeH.data(), NEdgesAll, "zEdge", MeshFileID, EdgeDecompR8, ZEdgeID); if (Err != 0) LOG_CRITICAL("HorzMesh: error reading zEdge"); int LonEdgeID; - LonEdgeH = ArrayHost1DR8("LonEdge", NEdgesSize); + LonEdgeH = HostArray1DR8("LonEdge", NEdgesSize); Err = IO::readArray(LonEdgeH.data(), NEdgesAll, "lonEdge", MeshFileID, EdgeDecompR8, LonEdgeID); if (Err != 0) LOG_CRITICAL("HorzMesh: error reading lonEdge"); int LatEdgeID; - LatEdgeH = ArrayHost1DR8("LatEdge", NEdgesSize); + LatEdgeH = HostArray1DR8("LatEdge", NEdgesSize); Err = IO::readArray(LatEdgeH.data(), NEdgesAll, "latEdge", MeshFileID, EdgeDecompR8, LatEdgeID); if (Err != 0) @@ -351,35 +351,35 @@ void HorzMesh::readCoordinates() { // Read mesh vertex coordinates int XVertexID; - XVertexH = ArrayHost1DR8("XVertex", NVerticesSize); + XVertexH = HostArray1DR8("XVertex", NVerticesSize); Err = IO::readArray(XVertexH.data(), NVerticesAll, "xVertex", MeshFileID, VertexDecompR8, XVertexID); if (Err != 0) LOG_CRITICAL("HorzMesh: error reading xVertex"); int YVertexID; - YVertexH = ArrayHost1DR8("YVertex", NVerticesSize); + YVertexH = HostArray1DR8("YVertex", NVerticesSize); Err = IO::readArray(YVertexH.data(), NVerticesAll, "yVertex", MeshFileID, VertexDecompR8, YVertexID); if (Err != 0) LOG_CRITICAL("HorzMesh: error reading yVertex"); int ZVertexID; - ZVertexH = ArrayHost1DR8("ZVertex", NVerticesSize); + ZVertexH = HostArray1DR8("ZVertex", NVerticesSize); Err = IO::readArray(ZVertexH.data(), NVerticesAll, "zVertex", MeshFileID, VertexDecompR8, ZVertexID); if (Err != 0) LOG_CRITICAL("HorzMesh: error reading zVertex"); int LonVertexID; - LonVertexH = ArrayHost1DR8("LonVertex", NVerticesSize); + LonVertexH = HostArray1DR8("LonVertex", NVerticesSize); Err = IO::readArray(LonVertexH.data(), NVerticesAll, "lonVertex", MeshFileID, VertexDecompR8, LonVertexID); if (Err != 0) LOG_CRITICAL("HorzMesh: error reading lonVertex"); int LatVertexID; - LatVertexH = ArrayHost1DR8("LatVertex", NVerticesSize); + LatVertexH = HostArray1DR8("LatVertex", NVerticesSize); Err = IO::readArray(LatVertexH.data(), NVerticesAll, "latVertex", MeshFileID, VertexDecompR8, LatVertexID); if (Err != 0) @@ -394,7 +394,7 @@ void HorzMesh::readBottomDepth() { I4 Err; int BottomDepthID; - BottomDepthH = ArrayHost1DR8("BottomDepth", NCellsSize); + BottomDepthH = HostArray1DR8("BottomDepth", NCellsSize); Err = IO::readArray(BottomDepthH.data(), NCellsAll, "bottomDepth", MeshFileID, CellDecompR8, BottomDepthID); if (Err != 0) @@ -410,42 +410,42 @@ void HorzMesh::readMeasurements() { I4 Err; int AreaCellID; - AreaCellH = ArrayHost1DR8("AreaCell", NCellsSize); + AreaCellH = HostArray1DR8("AreaCell", NCellsSize); Err = IO::readArray(AreaCellH.data(), NCellsAll, "areaCell", MeshFileID, CellDecompR8, AreaCellID); if (Err != 0) LOG_CRITICAL("HorzMesh: error reading areaCell"); int AreaTriangleID; - AreaTriangleH = ArrayHost1DR8("AreaTriangle", NVerticesSize); + AreaTriangleH = HostArray1DR8("AreaTriangle", NVerticesSize); Err = IO::readArray(AreaTriangleH.data(), NVerticesAll, "areaTriangle", MeshFileID, VertexDecompR8, AreaTriangleID); if (Err != 0) LOG_CRITICAL("HorzMesh: error reading areaTriangle"); int DvEdgeID; - DvEdgeH = ArrayHost1DR8("DvEdge", NEdgesSize); + DvEdgeH = HostArray1DR8("DvEdge", NEdgesSize); Err = IO::readArray(DvEdgeH.data(), NEdgesAll, "dvEdge", MeshFileID, EdgeDecompR8, DvEdgeID); if (Err != 0) LOG_CRITICAL("HorzMesh: error reading dvEdge"); int DcEdgeID; - DcEdgeH = ArrayHost1DR8("DcEdge", NEdgesSize); + DcEdgeH = HostArray1DR8("DcEdge", NEdgesSize); Err = IO::readArray(DcEdgeH.data(), NEdgesAll, "dcEdge", MeshFileID, EdgeDecompR8, DcEdgeID); if (Err != 0) LOG_CRITICAL("HorzMesh: error reading dcEdge"); int AngleEdgeID; - AngleEdgeH = ArrayHost1DR8("AngleEdge", NEdgesSize); + AngleEdgeH = HostArray1DR8("AngleEdge", NEdgesSize); Err = IO::readArray(AngleEdgeH.data(), NEdgesAll, "angleEdge", MeshFileID, EdgeDecompR8, AngleEdgeID); if (Err != 0) LOG_CRITICAL("HorzMesh: error reading angleEdge"); int MeshDensityID; - MeshDensityH = ArrayHost1DR8("MeshDensity", NCellsSize); + MeshDensityH = HostArray1DR8("MeshDensity", NCellsSize); Err = IO::readArray(MeshDensityH.data(), NCellsAll, "meshDensity", MeshFileID, CellDecompR8, MeshDensityID); if (Err != 0) @@ -453,7 +453,7 @@ void HorzMesh::readMeasurements() { int KiteAreasOnVertexID; KiteAreasOnVertexH = - ArrayHost2DR8("KiteAreasOnVertex", NVerticesSize, VertexDegree); + HostArray2DR8("KiteAreasOnVertex", NVerticesSize, VertexDegree); Err = IO::readArray(KiteAreasOnVertexH.data(), NVerticesAll * VertexDegree, "kiteAreasOnVertex", MeshFileID, OnVertexDecompR8, KiteAreasOnVertexID); @@ -469,7 +469,7 @@ void HorzMesh::readWeights() { I4 Err; int WeightsOnEdgeID; - WeightsOnEdgeH = ArrayHost2DR8("WeightsOnEdge", NEdgesSize, MaxEdges2); + WeightsOnEdgeH = HostArray2DR8("WeightsOnEdge", NEdgesSize, MaxEdges2); Err = IO::readArray(WeightsOnEdgeH.data(), NEdgesAll * MaxEdges2, "weightsOnEdge", MeshFileID, OnEdgeDecompR8, WeightsOnEdgeID); @@ -485,21 +485,21 @@ void HorzMesh::readCoriolis() { int Err; int FCellID; - FCellH = ArrayHost1DR8("FCell", NCellsSize); + FCellH = HostArray1DR8("FCell", NCellsSize); Err = IO::readArray(FCellH.data(), NCellsAll, "fCell", MeshFileID, CellDecompR8, FCellID); if (Err != 0) LOG_CRITICAL("HorzMesh: error reading fCell"); int FVertexID; - FVertexH = ArrayHost1DR8("FVertex", NVerticesSize); + FVertexH = HostArray1DR8("FVertex", NVerticesSize); Err = IO::readArray(FVertexH.data(), NVerticesAll, "fVertex", MeshFileID, VertexDecompR8, FVertexID); if (Err != 0) LOG_CRITICAL("HorzMesh: error reading fVertex"); int FEdgeID; - FEdgeH = ArrayHost1DR8("FEdge", NEdgesSize); + FEdgeH = HostArray1DR8("FEdge", NEdgesSize); Err = IO::readArray(FEdgeH.data(), NEdgesAll, "fEdge", MeshFileID, EdgeDecompR8, FEdgeID); if (Err != 0) @@ -511,54 +511,67 @@ void HorzMesh::readCoriolis() { // Compute the sign of edge contributions to a cell/vertex for each edge void HorzMesh::computeEdgeSign() { - EdgeSignOnCell = Array2DR8("EdgeSignOnCell", NCellsSize, MaxEdges); - yakl::c::parallel_for( - yakl::c::SimpleBounds<1>(NCellsAll), YAKL_LAMBDA(int Cell) { - for (int i = 0; i < NEdgesOnCell(Cell); i++) { - int Edge = EdgesOnCell(Cell, i); + auto EdgeSignOnCell = Array2DR8("EdgeSignOnCell", NCellsSize, MaxEdges); + + OMEGA_SCOPE(o_NEdgesOnCell, NEdgesOnCell); + OMEGA_SCOPE(o_EdgesOnCell, EdgesOnCell); + OMEGA_SCOPE(o_CellsOnEdge, CellsOnEdge); + OMEGA_SCOPE(o_EdgeSignOnCell, EdgeSignOnCell); + + parallelFor( + {NCellsAll}, KOKKOS_LAMBDA(int Cell) { + for (int i = 0; i < o_NEdgesOnCell(Cell); i++) { + int Edge = o_EdgesOnCell(Cell, i); // Vector points from cell 0 to cell 1 - if (Cell == CellsOnEdge(Edge, 0)) { - EdgeSignOnCell(Cell, i) = -1.0; + if (Cell == o_CellsOnEdge(Edge, 0)) { + o_EdgeSignOnCell(Cell, i) = -1.0; } else { - EdgeSignOnCell(Cell, i) = 1.0; + o_EdgeSignOnCell(Cell, i) = 1.0; } } }); - EdgeSignOnCellH = EdgeSignOnCell.createHostCopy(); + + EdgeSignOnCellH = createHostCopy(EdgeSignOnCell); EdgeSignOnVertex = Array2DR8("EdgeSignOnVertex", NVerticesSize, VertexDegree); - yakl::c::parallel_for( - yakl::c::SimpleBounds<1>(NVerticesAll), YAKL_LAMBDA(int Vertex) { - for (int i = 0; i < VertexDegree; i++) { - int Edge = EdgesOnVertex(Vertex, i); + + OMEGA_SCOPE(o_VertexDegree, VertexDegree); + OMEGA_SCOPE(o_EdgesOnVertex, EdgesOnVertex); + OMEGA_SCOPE(o_VerticesOnEdge, VerticesOnEdge); + OMEGA_SCOPE(o_EdgeSignOnVertex, EdgeSignOnVertex); + + parallelFor( + {NVerticesAll}, KOKKOS_LAMBDA(int Vertex) { + for (int i = 0; i < o_VertexDegree; i++) { + int Edge = o_EdgesOnVertex(Vertex, i); // Vector points from vertex 0 to vertex 1 - if (Vertex == VerticesOnEdge(Edge, 0)) { - EdgeSignOnVertex(Vertex, i) = -1.0; + if (Vertex == o_VerticesOnEdge(Edge, 0)) { + o_EdgeSignOnVertex(Vertex, i) = -1.0; } else { - EdgeSignOnVertex(Vertex, i) = 1.0; + o_EdgeSignOnVertex(Vertex, i) = 1.0; } } }); - EdgeSignOnVertexH = EdgeSignOnVertex.createHostCopy(); + EdgeSignOnVertexH = createHostCopy(EdgeSignOnVertex); } // end computeEdgeSign //------------------------------------------------------------------------------ // Perform copy to device for mesh variables void HorzMesh::copyToDevice() { - AreaCell = AreaCellH.createDeviceCopy(); - AreaTriangle = AreaTriangleH.createDeviceCopy(); - KiteAreasOnVertex = KiteAreasOnVertexH.createDeviceCopy(); - DcEdge = DcEdgeH.createDeviceCopy(); - DvEdge = DvEdgeH.createDeviceCopy(); - AngleEdge = AngleEdgeH.createDeviceCopy(); - WeightsOnEdge = WeightsOnEdgeH.createDeviceCopy(); - FVertex = FVertexH.createDeviceCopy(); - BottomDepth = BottomDepthH.createDeviceCopy(); + AreaCell = createDeviceCopy(AreaCellH); + AreaTriangle = createDeviceCopy(AreaTriangleH); + KiteAreasOnVertex = createDeviceCopy(KiteAreasOnVertexH); + DcEdge = createDeviceCopy(DcEdgeH); + DvEdge = createDeviceCopy(DvEdgeH); + AngleEdge = createDeviceCopy(AngleEdgeH); + WeightsOnEdge = createDeviceCopy(WeightsOnEdgeH); + FVertex = createDeviceCopy(FVertexH); + BottomDepth = createDeviceCopy(BottomDepthH); } // end copyToDevice diff --git a/components/omega/src/ocn/HorzMesh.h b/components/omega/src/ocn/HorzMesh.h index ed079e99de5a..cf3c29e4f237 100644 --- a/components/omega/src/ocn/HorzMesh.h +++ b/components/omega/src/ocn/HorzMesh.h @@ -41,7 +41,7 @@ class HorzMesh { void readCoriolis(); - void computeEdgeSign(); + // void computeEdgeSign(); void copyToDevice(); @@ -57,6 +57,9 @@ class HorzMesh { static std::map AllHorzMeshes; public: + // KOKKOS_LAMBDA does not allow to have parallel_* functions inside of a + // private function. + void computeEdgeSign(); // Variables // Since these are used frequently, we make them public to reduce the // number of retrievals required. @@ -87,118 +90,118 @@ class HorzMesh { // Mesh connectivity Array2DI4 CellsOnCell; ///< Indx of cells that neighbor each cell - ArrayHost2DI4 CellsOnCellH; ///< Indx of cells that neighbor each cell + HostArray2DI4 CellsOnCellH; ///< Indx of cells that neighbor each cell Array2DI4 EdgesOnCell; ///< Indx of edges that border each cell - ArrayHost2DI4 EdgesOnCellH; ///< Indx of edges that border each cell + HostArray2DI4 EdgesOnCellH; ///< Indx of edges that border each cell Array1DI4 NEdgesOnCell; ///< Num of active edges around each cell - ArrayHost1DI4 NEdgesOnCellH; ///< Num of active edges around each cell + HostArray1DI4 NEdgesOnCellH; ///< Num of active edges around each cell Array2DI4 VerticesOnCell; ///< Indx of vertices bordering each cell - ArrayHost2DI4 VerticesOnCellH; ///< Indx of vertices bordering each cell + HostArray2DI4 VerticesOnCellH; ///< Indx of vertices bordering each cell Array2DI4 CellsOnEdge; ///< Indx of cells straddling each edge - ArrayHost2DI4 CellsOnEdgeH; ///< Indx of cells straddling each edge + HostArray2DI4 CellsOnEdgeH; ///< Indx of cells straddling each edge Array2DI4 EdgesOnEdge; ///< Indx of edges around cells across each edge - ArrayHost2DI4 EdgesOnEdgeH; ///< Indx of edges around cells across each edge + HostArray2DI4 EdgesOnEdgeH; ///< Indx of edges around cells across each edge Array1DI4 NEdgesOnEdge; ///< Num of edges around the cells across edge - ArrayHost1DI4 NEdgesOnEdgeH; ///< Num of edges around the cells across edge + HostArray1DI4 NEdgesOnEdgeH; ///< Num of edges around the cells across edge Array2DI4 VerticesOnEdge; ///< Indx of vertices straddling each edge - ArrayHost2DI4 VerticesOnEdgeH; ///< Indx of vertices straddling each edge + HostArray2DI4 VerticesOnEdgeH; ///< Indx of vertices straddling each edge Array2DI4 CellsOnVertex; ///< Indx of cells that share a vertex - ArrayHost2DI4 CellsOnVertexH; ///< Indx of cells that share a vertex + HostArray2DI4 CellsOnVertexH; ///< Indx of cells that share a vertex Array2DI4 EdgesOnVertex; ///< Indx of edges sharing vertex as endpoint - ArrayHost2DI4 EdgesOnVertexH; ///< Indx of edges sharing vertex as endpoint + HostArray2DI4 EdgesOnVertexH; ///< Indx of edges sharing vertex as endpoint // Coordinates - ArrayHost1DR8 XCellH; ///< X Coordinates of cell centers (m) - ArrayHost1DR8 YCellH; ///< Y Coordinates of cell centers (m) - ArrayHost1DR8 ZCellH; ///< Z Coordinates of cell centers (m) - ArrayHost1DR8 LonCellH; ///< Longitude location of cell centers (radians) - ArrayHost1DR8 LatCellH; ///< Latitude location of cell centers (radians) + HostArray1DR8 XCellH; ///< X Coordinates of cell centers (m) + HostArray1DR8 YCellH; ///< Y Coordinates of cell centers (m) + HostArray1DR8 ZCellH; ///< Z Coordinates of cell centers (m) + HostArray1DR8 LonCellH; ///< Longitude location of cell centers (radians) + HostArray1DR8 LatCellH; ///< Latitude location of cell centers (radians) - ArrayHost1DR8 XEdgeH; ///< X Coordinate of edge midpoints (m) - ArrayHost1DR8 YEdgeH; ///< Y Coordinate of edge midpoints (m) - ArrayHost1DR8 ZEdgeH; ///< Z Coordinate of edge midpoints (m) - ArrayHost1DR8 LonEdgeH; ///< Longitude location of edge midpoints (radians) - ArrayHost1DR8 LatEdgeH; ///< Latitude location of edge midpoints (radians) + HostArray1DR8 XEdgeH; ///< X Coordinate of edge midpoints (m) + HostArray1DR8 YEdgeH; ///< Y Coordinate of edge midpoints (m) + HostArray1DR8 ZEdgeH; ///< Z Coordinate of edge midpoints (m) + HostArray1DR8 LonEdgeH; ///< Longitude location of edge midpoints (radians) + HostArray1DR8 LatEdgeH; ///< Latitude location of edge midpoints (radians) - ArrayHost1DR8 XVertexH; ///< X Coordinate of vertices (m) - ArrayHost1DR8 YVertexH; ///< Y Coordinate of vertices (m) - ArrayHost1DR8 ZVertexH; ///< Z Coordinate of vertices (m) - ArrayHost1DR8 LonVertexH; ///< Longitude location of vertices (radians) - ArrayHost1DR8 LatVertexH; ///< Latitude location of vertices (radians) + HostArray1DR8 XVertexH; ///< X Coordinate of vertices (m) + HostArray1DR8 YVertexH; ///< Y Coordinate of vertices (m) + HostArray1DR8 ZVertexH; ///< Z Coordinate of vertices (m) + HostArray1DR8 LonVertexH; ///< Longitude location of vertices (radians) + HostArray1DR8 LatVertexH; ///< Latitude location of vertices (radians) // Mesh measurements Array1DR8 AreaCell; ///< Area of each cell (m^2) - ArrayHost1DR8 AreaCellH; ///< Area of each cell (m^2) + HostArray1DR8 AreaCellH; ///< Area of each cell (m^2) Array1DR8 AreaTriangle; ///< Area of each triangle in the dual grid (m^2) - ArrayHost1DR8 + HostArray1DR8 AreaTriangleH; ///< Area of each triangle in the dual grid (m^2) Array2DR8 KiteAreasOnVertex; ///< Area of the portions of each dual cell that /// are part of each cellsOnVertex (m^2) - ArrayHost2DR8 + HostArray2DR8 KiteAreasOnVertexH; ///< Area of the portions of each dual cell that /// are part of each cellsOnVertex (m^2) Array1DR8 DvEdge; ///< Length of each edge, computed as the distance between /// verticesOnEdge (m) - ArrayHost1DR8 DvEdgeH; ///< Length of each edge, computed as the distance + HostArray1DR8 DvEdgeH; ///< Length of each edge, computed as the distance /// between verticesOnEdge (m) Array1DR8 DcEdge; ///< Length of each edge, computed as the distance between /// CellsOnEdge (m) - ArrayHost1DR8 DcEdgeH; ///< Length of each edge, computed as the distance + HostArray1DR8 DcEdgeH; ///< Length of each edge, computed as the distance /// between CellsOnEdge (m) Array1DR8 AngleEdge; ///< Angle the edge normal makes with local eastward /// direction (radians) - ArrayHost1DR8 AngleEdgeH; ///< Angle the edge normal makes with local + HostArray1DR8 AngleEdgeH; ///< Angle the edge normal makes with local /// eastward direction (radians) - ArrayHost1DR8 MeshDensityH; ///< Value of density function used to generate a + HostArray1DR8 MeshDensityH; ///< Value of density function used to generate a /// particular mesh at cell centers // Weights Array2DR8 WeightsOnEdge; ///< Reconstruction weights associated with each of /// the edgesOnEdge - ArrayHost2DR8 WeightsOnEdgeH; ///< Reconstruction weights associated with + HostArray2DR8 WeightsOnEdgeH; ///< Reconstruction weights associated with /// each of the edgesOnEdge // Coriolis Array1DR8 FEdge; ///< Coriolis parameter at edges (radians s^-1) - ArrayHost1DR8 FEdgeH; ///< Coriolis parameter at edges (radians s^-1) + HostArray1DR8 FEdgeH; ///< Coriolis parameter at edges (radians s^-1) Array1DR8 FCell; ///< Coriolis parameter at cell centers (radians s^-1) - ArrayHost1DR8 FCellH; ///< Coriolis parameter at cell centers (radians s^-1) + HostArray1DR8 FCellH; ///< Coriolis parameter at cell centers (radians s^-1) Array1DR8 FVertex; ///< Coriolis parameter at vertices (radians s^-1) - ArrayHost1DR8 FVertexH; ///< Coriolis parameter at vertices (radians s^-1) + HostArray1DR8 FVertexH; ///< Coriolis parameter at vertices (radians s^-1) // Depth Array1DR8 BottomDepth; ///< Depth of the bottom of the ocean (m) - ArrayHost1DR8 BottomDepthH; ///< Depth of the bottom of the ocean (m) + HostArray1DR8 BottomDepthH; ///< Depth of the bottom of the ocean (m) // Edge sign Array2DR8 EdgeSignOnCell; ///< Sign of vector connecting cells - ArrayHost2DR8 EdgeSignOnCellH; ///< Sign of vector connecting cells + HostArray2DR8 EdgeSignOnCellH; ///< Sign of vector connecting cells Array2DR8 EdgeSignOnVertex; ///< Sign of vector connecting vertices - ArrayHost2DR8 EdgeSignOnVertexH; ///< Sign of vector connecting vertices + HostArray2DR8 EdgeSignOnVertexH; ///< Sign of vector connecting vertices // Methods diff --git a/components/omega/src/ocn/OcnDummy.cpp b/components/omega/src/ocn/OcnDummy.cpp index 54718e061c9d..0189cec7b800 100644 --- a/components/omega/src/ocn/OcnDummy.cpp +++ b/components/omega/src/ocn/OcnDummy.cpp @@ -3,82 +3,55 @@ /// \file /// \brief implements a placeholder for OCN driver /// -/// This implements a placeholder for OCN driver. The content is copied from -/// https://github.com/mrnorman/YAKL/blob/main/unit/CArray/CArray.cpp -// //===----------------------------------------------------------------------===// -#include "DataTypes.h" -#include "Logging.h" +#include + +#include #include -using namespace OMEGA; - -void die(std::string msg) { yakl::yakl_throw(msg.c_str()); } - -void dummy(int argc, char **argv) { - - initLogging(std::string(OmegaDefaultLogfile)); - - yakl::init(); - { - int constexpr d1 = 2; - int constexpr d2 = 3; - - LOG_INFO("Starting main..."); - - ArrayHost1DReal test1d("test1d", d1); - ArrayHost2DReal test2d("test2d", d1, d2); - - LOG_INFO("1d var {}", test1d); - LOG_INFO("2d var {}", test2d); - - yakl::memset(test1d, 0.f); - yakl::memset(test2d, 0.f); - - yakl::c::parallel_for( - YAKL_AUTO_LABEL(), yakl::c::Bounds<1>(d1), - YAKL_LAMBDA(int i1) { test1d(i1) = 1; }); - yakl::c::parallel_for( - YAKL_AUTO_LABEL(), yakl::c::Bounds<2>(d1, d2), - YAKL_LAMBDA(int i1, int i2) { test2d(i1, i2) = 1; }); - - if (yakl::intrinsics::sum(test1d) != d1) { - die("LOOPS: wrong sum for test1d"); - } - if (yakl::intrinsics::sum(test2d) != d1 * d2) { - die("LOOPS: wrong sum for test2d"); - } - - if (test1d.get_rank() != 1) { - die("Ranks: wrong rank for test1d"); - } - if (test2d.get_rank() != 2) { - die("Ranks: wrong rank for test2d"); - } - - if (test1d.get_elem_count() != d1) { - die("get_elem_count: wrong value for test1d"); - } - if (test2d.get_elem_count() != d1 * d2) { - die("get_elem_count: wrong value for test2d"); - } - - if (yakl::intrinsics::sum(test1d.get_dimensions()) != d1) { - die("get_dimensions: wrong value for test1d"); - } - if (yakl::intrinsics::sum(test2d.get_dimensions()) != d1 + d2) { - die("get_dimensions: wrong value for test2d"); - } - - if (test1d.extent(0) != d1) { - die("extent: wrong value for test1d"); - } - if (test2d.extent(1) != d2) { - die("extent: wrong value for test2d"); - } - - LOG_INFO("Finished main."); +// using namespace OMEGA; + +int dummy(int argc, char **argv) { + + // initLogging(std::string(OmegaDefaultLogfile)); + Kokkos::initialize(argc, argv); + Kokkos::DefaultExecutionSpace{}.print_configuration(std::cout); + + if (argc < 2) { + fprintf(stderr, "Usage: %s [] \n", argv[0]); + Kokkos::finalize(); + exit(1); + } + + const long n = strtol(argv[1], nullptr, 10); + + printf("Number of even integers from 0 to %ld\n", n - 1); + + Kokkos::Timer timer; + timer.reset(); + + // Compute the number of even integers from 0 to n-1, in parallel. + long count = 0; + Kokkos::parallel_reduce( + n, KOKKOS_LAMBDA(const long i, long &lcount) { lcount += (i % 2) == 0; }, + count); + + double count_time = timer.seconds(); + printf(" Parallel: %ld %10.6f\n", count, count_time); + + timer.reset(); + + // Compare to a sequential loop. + long seq_count = 0; + for (long i = 0; i < n; ++i) { + seq_count += (i % 2) == 0; } - yakl::finalize(); + + count_time = timer.seconds(); + printf("Sequential: %ld %10.6f\n", seq_count, count_time); + + Kokkos::finalize(); + + return (count == seq_count) ? 0 : -1; } diff --git a/components/omega/test/CMakeLists.txt b/components/omega/test/CMakeLists.txt index 4d5dfdd81642..b5d3dfccb69c 100644 --- a/components/omega/test/CMakeLists.txt +++ b/components/omega/test/CMakeLists.txt @@ -4,406 +4,134 @@ # Data type test ################## -set(_TestDataTypesName testDataTypes.exe) +add_executable(testDataTypes.exe base/DataTypesTest.cpp) -add_executable(${_TestDataTypesName} base/DataTypesTest.cpp) - -target_compile_options( - ${_TestDataTypesName} - PRIVATE - "-I${OMEGA_SOURCE_DIR}/src/base" - "-I${OMEGA_SOURCE_DIR}/src/infra" - ${OMEGA_CXX_FLAGS} -) - -target_link_options( - ${_TestDataTypesName} - PRIVATE - ${OMEGA_LINK_OPTIONS} -) - -target_link_libraries(${_TestDataTypesName} ${OMEGA_LIB_NAME} yakl parmetis metis) - -if(GKlib_FOUND) - target_link_libraries(${_TestDataTypesName} gklib) -endif() - -add_test( - NAME DATA_TYPES_TEST - COMMAND ${MPI_EXEC} -n 1 -- ./${_TestDataTypesName} -) +target_link_libraries(testDataTypes.exe ${OMEGA_LIB_NAME}) +add_test(NAME DATA_TYPES_TEST COMMAND ${MPI_EXEC} --cpu-bind=cores -n 1 -- ./testDataTypes.exe) ################## # Machine env test ################## -set(_TestMachEnvName testMachEnv.exe) - -add_executable(${_TestMachEnvName} base/MachEnvTest.cpp) +add_executable(testMachEnv.exe base/MachEnvTest.cpp) -target_compile_options( - ${_TestMachEnvName} - PRIVATE - "-I${OMEGA_SOURCE_DIR}/src/base" - "-I${OMEGA_SOURCE_DIR}/src/infra" - ${OMEGA_CXX_FLAGS} -) +target_link_libraries(testMachEnv.exe ${OMEGA_LIB_NAME}) -target_link_options( - ${_TestMachEnvName} - PRIVATE - ${OMEGA_LINK_OPTIONS} -) - -target_link_libraries(${_TestMachEnvName} ${OMEGA_LIB_NAME} yakl) - -add_test( - NAME MACHINE_ENV_TEST - COMMAND ${MPI_EXEC} -n 8 -- ./${_TestMachEnvName} -) +add_test(NAME MACHINE_ENV_TEST COMMAND ${MPI_EXEC} --cpu-bind=cores -n 8 -- ./testMachEnv.exe) ################## # Broadcast test ################## -set(_TestBroadcastName testBroadcast.exe) - -# Add broadcast test -add_executable(${_TestBroadcastName} base/BroadcastTest.cpp) +add_executable(testBroadcast.exe base/BroadcastTest.cpp) -target_compile_options( - ${_TestBroadcastName} - PRIVATE - "-I${OMEGA_SOURCE_DIR}/src/base" - "-I${OMEGA_SOURCE_DIR}/src/infra" - ${OMEGA_CXX_FLAGS} -) +target_link_libraries(testBroadcast.exe ${OMEGA_LIB_NAME}) -target_link_options( - ${_TestBroadcastName} - PRIVATE - ${OMEGA_LINK_OPTIONS} -) - -target_link_libraries(${_TestBroadcastName} ${OMEGA_LIB_NAME} spdlog yakl) - -add_test( - NAME BROADCAST_TEST - COMMAND ${MPI_EXEC} -n 8 -- ./${_TestBroadcastName} -) +add_test(NAME BROADCAST_TEST COMMAND ${MPI_EXEC} --cpu-bind=cores -n 8 -- ./testBroadcast.exe) ################## # Logging test ################## -set(_TestLoggingName testLogging.exe) - -add_executable(${_TestLoggingName} infra/LoggingTest.cpp) - -target_compile_options( - ${_TestLoggingName} - PRIVATE - "-I${OMEGA_SOURCE_DIR}/src/base" - "-I${OMEGA_SOURCE_DIR}/src/infra" - ${OMEGA_CXX_FLAGS} -) - -target_link_options( - ${_TestLoggingName} - PRIVATE - ${OMEGA_LINK_OPTIONS} -) +add_executable(testLogging.exe infra/LoggingTest.cpp) -target_link_libraries(${_TestLoggingName} ${OMEGA_LIB_NAME} spdlog yakl) +target_link_libraries(testLogging.exe ${OMEGA_LIB_NAME}) -add_test(NAME LOGGING_TEST COMMAND ./${_TestLoggingName}) +add_test(NAME LOGGING_TEST COMMAND ./testLogging.exe) ############# # Decomp test ############# -set(_TestDecompName testDecomp.exe) - -add_executable(${_TestDecompName} base/DecompTest.cpp) - -target_include_directories( - ${_TestDecompName} - PRIVATE - ${OMEGA_SOURCE_DIR}/src/base - ${OMEGA_SOURCE_DIR}/src/infra - ${Parmetis_INCLUDE_DIRS} -) - -target_compile_options( - ${_TestDecompName} - PRIVATE - ${OMEGA_CXX_FLAGS} -) - -target_link_options( - ${_TestDecompName} - PRIVATE - ${OMEGA_LINK_OPTIONS} -) +add_executable(testDecomp.exe base/DecompTest.cpp) -target_link_libraries(${_TestDecompName} ${OMEGA_LIB_NAME} spdlog yakl parmetis metis pioc) +target_link_libraries(testDecomp.exe ${OMEGA_LIB_NAME}) -if(GKlib_FOUND) - target_link_libraries(${_TestDecompName} gklib) -endif() +add_test( NAME DECOMP_TEST COMMAND ${MPI_EXEC} -n 8 --cpu-bind=cores -- ./testDecomp.exe) -add_test( - NAME DECOMP_TEST - COMMAND ${MPI_EXEC} -n 8 -- ./${_TestDecompName} -) - -################## -# Halo test -################## - -set(_TestHaloName testHalo.exe) - -# Add halo test -add_executable(${_TestHaloName} base/HaloTest.cpp) - -target_include_directories( - ${_TestHaloName} - PRIVATE - ${OMEGA_SOURCE_DIR}/src/base - ${OMEGA_SOURCE_DIR}/src/infra - ${Parmetis_INCLUDE_DIRS} -) - -target_compile_options( - ${_TestHaloName} - PRIVATE - ${OMEGA_CXX_FLAGS} -) - -target_link_libraries(${_TestHaloName} ${OMEGA_LIB_NAME} spdlog yakl parmetis metis pioc) - -add_test( - NAME HALO_TEST - COMMAND ${MPI_EXEC} -n 8 -- ./${_TestHaloName} -) +################### +## Halo test +################### +# +#add_executable(testHalo.exe base/HaloTest.cpp) +# +#target_link_libraries(testHalo.exe ${OMEGA_LIB_NAME}) +# +#add_test(NAME HALO_TEST COMMAND ${MPI_EXEC} --cpu-bind=cores -n 8 -- ./testHalo.exe) ################ # HorzMesh test ################ -set(_TestHorzMeshName testHorzMesh.exe) +add_executable(testHorzMesh.exe ocn/HorzMeshTest.cpp) -# Add broadcast test -add_executable(${_TestHorzMeshName} ocn/HorzMeshTest.cpp) +target_link_libraries(testHorzMesh.exe ${OMEGA_LIB_NAME}) -target_include_directories( - ${_TestHorzMeshName} - PRIVATE - ${OMEGA_SOURCE_DIR}/src/base - ${OMEGA_SOURCE_DIR}/src/infra - ${OMEGA_SOURCE_DIR}/src/ocn - ${Parmetis_INCLUDE_DIRS} -) - -target_compile_options( - ${_TestHorzMeshName} - PRIVATE - ${OMEGA_LINK_OPTIONS} -) - -target_link_libraries(${_TestHorzMeshName} ${OMEGA_LIB_NAME} spdlog yakl parmetis metis pioc) - -add_test( - NAME HORZMESH_TEST - COMMAND ${MPI_EXEC} -n 8 -- ./${_TestHorzMeshName} -) +add_test(NAME HORZMESH_TEST COMMAND ${MPI_EXEC} -n 8 --cpu-bind=cores -- ./testHorzMesh.exe) ############# # IO test ############# -set(_TestIOName testIO.exe) - -add_executable(${_TestIOName} base/IOTest.cpp) - -target_include_directories( - ${_TestIOName} - PRIVATE - ${OMEGA_SOURCE_DIR}/src/base - ${OMEGA_SOURCE_DIR}/src/infra - ${Parmetis_INCLUDE_DIRS} -) - -target_compile_options( - ${_TestIOName} - PRIVATE - ${OMEGA_CXX_FLAGS} -) - -target_link_options( - ${_TestIOName} - PRIVATE - ${OMEGA_LINK_OPTIONS} -) +add_executable(testIO.exe base/IOTest.cpp) -target_link_libraries(${_TestIOName} ${OMEGA_LIB_NAME} spdlog yakl parmetis metis pioc) +target_link_libraries(testIO.exe ${OMEGA_LIB_NAME}) -if(GKlib_FOUND) - target_link_libraries(${_TestIOName} gklib) -endif() - -add_test( - NAME IO_TEST - COMMAND ${MPI_EXEC} -n 8 -- ./${_TestIOName} -) +add_test(NAME IO_TEST COMMAND ${MPI_EXEC} -n 8 --cpu-bind=cores -- ./testIO.exe) ################## # Config test ################## -set(_TestConfigName testConfig.exe) - -# Add broadcast test -add_executable(${_TestConfigName} infra/ConfigTest.cpp) +add_executable(testConfig.exe infra/ConfigTest.cpp) -target_include_directories( - ${_TestConfigName} - PRIVATE - ${OMEGA_SOURCE_DIR}/src/base - ${OMEGA_SOURCE_DIR}/src/infra -) +target_link_libraries(testConfig.exe ${OMEGA_LIB_NAME}) -target_compile_options( - ${_TestConfigName} - PRIVATE - ${OMEGA_CXX_FLAGS} -) - -target_link_options( - ${_TestConfigName} - PRIVATE - ${OMEGA_LINK_OPTIONS} -) - -target_link_libraries(${_TestConfigName} ${OMEGA_LIB_NAME} yaml-cpp) - -add_test( - NAME CONFIG_TEST - COMMAND ${MPI_EXEC} -n 8 -- ./${_TestConfigName} -) +add_test(NAME CONFIG_TEST COMMAND ${MPI_EXEC} --cpu-bind=cores -n 8 -- ./testConfig.exe) ################## # Metadata test ################## -set(_TestMetadataName testMetadata.exe) +add_executable(testMetadata.exe infra/MetadataTest.cpp) -add_executable(${_TestMetadataName} infra/MetadataTest.cpp) +target_link_libraries(testMetadata.exe ${OMEGA_LIB_NAME}) -target_compile_options( - ${_TestMetadataName} - PRIVATE - "-I${OMEGA_SOURCE_DIR}/src/base" - "-I${OMEGA_SOURCE_DIR}/src/infra" - ${OMEGA_CXX_FLAGS} -) - -target_link_options( - ${_TestMetadataName} - PRIVATE - ${OMEGA_LINK_OPTIONS} -) - -target_link_libraries(${_TestMetadataName} ${OMEGA_LIB_NAME} yakl) - -add_test(NAME METADATA_TEST COMMAND ./${_TestMetadataName}) +add_test(NAME METADATA_TEST COMMAND ./testMetadata.exe) ################## # IOField test ################## -set(_TestIOFieldName testIOField.exe) +add_executable(testIOField.exe infra/IOFieldTest.cpp) -add_executable(${_TestIOFieldName} infra/IOFieldTest.cpp) +target_link_libraries(testIOField.exe ${OMEGA_LIB_NAME}) -target_compile_options( - ${_TestIOFieldName} - PRIVATE - "-I${OMEGA_SOURCE_DIR}/src/base" - "-I${OMEGA_SOURCE_DIR}/src/infra" - ${OMEGA_CXX_FLAGS} -) - -target_link_options( - ${_TestIOFieldName} - PRIVATE - ${OMEGA_LINK_OPTIONS} -) - -target_link_libraries(${_TestIOFieldName} ${OMEGA_LIB_NAME} yakl) - -add_test(NAME IOFIELD_TEST - COMMAND ${MPI_EXEC} -n 8 -- ./${_TestIOFieldName}) +add_test(NAME IOFIELD_TEST COMMAND ${MPI_EXEC} --cpu-bind=cores -n 8 -- ./testIOField.exe) ################## # Time Manager test ################## -set(_TestTimeMgrName testTimeMgr.exe) - -add_executable(${_TestTimeMgrName} infra/TimeMgrTest.cpp) - -target_include_directories( - ${_TestTimeMgrName} - PRIVATE - ${OMEGA_SOURCE_DIR}/src/base - ${OMEGA_SOURCE_DIR}/src/infra -) - -target_compile_options( - ${_TestTimeMgrName} - PRIVATE - ${OMEGA_CXX_FLAGS} -) +add_executable(testTimeMgr.exe infra/TimeMgrTest.cpp) -target_link_options( - ${_TestTimeMgrName} - PRIVATE - ${OMEGA_LINK_OPTIONS} -) +target_link_libraries(testTimeMgr.exe ${OMEGA_LIB_NAME}) -target_link_libraries(${_TestTimeMgrName} ${OMEGA_LIB_NAME} spdlog yakl) +add_test(NAME TIMEMGR_TEST COMMAND ./testTimeMgr.exe) -add_test( - NAME TIMEMGR_TEST - COMMAND ./${_TestTimeMgrName} -) ################## -# YAKL test +# Kokkos test ################## -set(_TestYaklName testYakl.exe) +add_executable(testKokkos.exe infra/OmegaKokkosTest.cpp) -add_executable( - ${_TestYaklName} ${E3SM_EXTERNALS_ROOT}/YAKL/unit/ParForC/ParForC.cpp -) +target_link_libraries(testKokkos.exe ${OMEGA_LIB_NAME}) -include(${E3SM_EXTERNALS_ROOT}/YAKL/yakl_utils.cmake) -yakl_process_target(${_TestYaklName}) - -# handles cuda cases -if (YAKL_ARCH STREQUAL "CUDA") - set_target_properties(${_TestYaklName} PROPERTIES LINKER_LANGUAGE CXX) - if (CMAKE_VERSION VERSION_GREATER "3.18.0") - set_target_properties(${_TestYaklName} PROPERTIES CUDA_ARCHITECTURES OFF) - endif() -endif() - -add_test(NAME YAKL_TEST COMMAND ./${_TestYaklName}) +add_test(NAME KOKKOS_TEST COMMAND ./testKokkos.exe) ################## # test properties @@ -416,15 +144,12 @@ set_tests_properties( LOGGING_TEST CONFIG_TEST DECOMP_TEST - HALO_TEST +# HALO_TEST HORZMESH_TEST IO_TEST METADATA_TEST + TIMEMGR_TEST + KOKKOS_TEST IOFIELD_TEST PROPERTIES FAIL_REGULAR_EXPRESSION "FAIL" PASS_REGULAR_EXPRESSION "PASS" ) - -set_tests_properties( - YAKL_TEST - PROPERTIES FAIL_REGULAR_EXPRESSION "ERROR" -) diff --git a/components/omega/test/base/DataTypesTest.cpp b/components/omega/test/base/DataTypesTest.cpp index 4ba67c385709..82f8ff45e67c 100644 --- a/components/omega/test/base/DataTypesTest.cpp +++ b/components/omega/test/base/DataTypesTest.cpp @@ -15,1046 +15,1013 @@ #include #include "DataTypes.h" +#include "OmegaKokkos.h" #include "mpi.h" +using namespace OMEGA; + int main(int argc, char *argv[]) { // initialize environments MPI_Init(&argc, &argv); - yakl::init(); - - // declare variables of each supported type - OMEGA::I4 MyInt4 = 1; - OMEGA::I8 MyInt8 = 2; - OMEGA::R4 MyR4 = 3.0; - OMEGA::R8 MyR8 = 4.0000000000001; - OMEGA::Real MyReal = 5.000001; - using OMEGA::operator""_Real; - auto MyRealLiteral = 1._Real; - int SizeTmp = 0; - - // Check expected size (in bytes) for data types - SizeTmp = sizeof(MyInt4); - if (SizeTmp == 4) - std::cout << "Size of I4: PASS" << std::endl; - else - std::cout << "Size of I4: FAIL " << SizeTmp << std::endl; - - SizeTmp = sizeof(MyInt8); - if (SizeTmp == 8) - std::cout << "Size of I8: PASS" << std::endl; - else - std::cout << "Size of I8: FAIL " << SizeTmp << std::endl; - - SizeTmp = sizeof(MyR4); - if (SizeTmp == 4) - std::cout << "Size of R4: PASS" << std::endl; - else - std::cout << "Size of R4: FAIL " << SizeTmp << std::endl; - - SizeTmp = sizeof(MyR8); - if (SizeTmp == 8) - std::cout << "Size of R8: PASS" << std::endl; - else - std::cout << "Size of R8: FAIL " << SizeTmp << std::endl; - - SizeTmp = sizeof(MyReal); + Kokkos::initialize(); + { + + // declare variables of each supported type + I4 MyInt4 = 1; + I8 MyInt8 = 2; + R4 MyR4 = 3.0; + R8 MyR8 = 4.0000000000001; + Real MyReal = 5.000001; + // using operator""_Real; + auto MyRealLiteral = 1._Real; + int SizeTmp = 0; + + // Check expected size (in bytes) for data types + SizeTmp = sizeof(MyInt4); + if (SizeTmp == 4) + std::cout << "Size of I4: PASS" << std::endl; + else + std::cout << "Size of I4: FAIL " << SizeTmp << std::endl; + + SizeTmp = sizeof(MyInt8); + if (SizeTmp == 8) + std::cout << "Size of I8: PASS" << std::endl; + else + std::cout << "Size of I8: FAIL " << SizeTmp << std::endl; + + SizeTmp = sizeof(MyR4); + if (SizeTmp == 4) + std::cout << "Size of R4: PASS" << std::endl; + else + std::cout << "Size of R4: FAIL " << SizeTmp << std::endl; + + SizeTmp = sizeof(MyR8); + if (SizeTmp == 8) + std::cout << "Size of R8: PASS" << std::endl; + else + std::cout << "Size of R8: FAIL " << SizeTmp << std::endl; + + SizeTmp = sizeof(MyReal); #ifdef SINGLE_PRECISION - if (SizeTmp == 4) - std::cout << "Size of Real is 4: PASS" << std::endl; - else - std::cout << "Size of Real is 4: FAIL " << SizeTmp << std::endl; + if (SizeTmp == 4) + std::cout << "Size of Real is 4: PASS" << std::endl; + else + std::cout << "Size of Real is 4: FAIL " << SizeTmp << std::endl; #else - if (SizeTmp == 8) - std::cout << "Size of Real is 8: PASS" << std::endl; - else - std::cout << "Size of Real is 8: FAIL " << SizeTmp << std::endl; + if (SizeTmp == 8) + std::cout << "Size of Real is 8: PASS" << std::endl; + else + std::cout << "Size of Real is 8: FAIL " << SizeTmp << std::endl; #endif - SizeTmp = sizeof(MyRealLiteral); - if (SizeTmp == sizeof(OMEGA::Real)) - std::cout << "Size of Real literal: PASS" << std::endl; - else - std::cout << "Size of Real literal: FAIL " << SizeTmp << std::endl; - - // Test creation of device arrays and copying to/from host - // by initializing on the device, copying to host and comparing with - // a reference host array. - - int NumCells = 100; - int NumVertLvls = 100; - int NumTracers = 4; - int NumTimeLvls = 2; - int NumExtra = 2; - - using yakl::c::Bounds; - using yakl::c::parallel_for; - - // Test for 1DI4 - OMEGA::Array1DI4 TstArr1DI4("TstArr", NumCells); - OMEGA::ArrayHost1DI4 RefArr1DI4("RefArr", NumCells); + SizeTmp = sizeof(MyRealLiteral); + if (SizeTmp == sizeof(Real)) + std::cout << "Size of Real literal: PASS" << std::endl; + else + std::cout << "Size of Real literal: FAIL " << SizeTmp << std::endl; - for (int i = 0; i < NumCells; ++i) { - RefArr1DI4(i) = i; - } + // Test creation of device arrays and copying to/from host + // by initializing on the device, copying to host and comparing with + // a reference host array. - parallel_for( - Bounds<1>(NumCells), YAKL_LAMBDA(int i) { TstArr1DI4(i) = i; }); + int NumCells = 100; + int NumVertLvls = 100; + int NumTracers = 4; + int NumTimeLvls = 2; + int NumExtra = 2; - yakl::fence(); - auto TstHost1DI4 = TstArr1DI4.createHostCopy(); + // Test for 1DI4 + Array1DI4 TstArr1DI4("TstArr1DI4", NumCells); + HostArray1DI4 RefArr1DI4("RefArr1DI4", NumCells); - int icount = 0; - for (int i = 0; i < NumCells; ++i) { - if (TstHost1DI4(i) != RefArr1DI4(i)) - ++icount; - } - TstHost1DI4.deallocate(); - RefArr1DI4.deallocate(); - TstArr1DI4.deallocate(); - - if (icount == 0) - std::cout << "YAKL 1DI4 test: PASS" << std::endl; - else - std::cout << "YAKL 1DI4 test: FAIL" << std::endl; - - // Test for 2DI4 - OMEGA::Array2DI4 TstArr2DI4("TstArr", NumCells, NumVertLvls); - OMEGA::ArrayHost2DI4 RefArr2DI4("RefArr", NumCells, NumVertLvls); - - for (int j = 0; j < NumCells; ++j) { - for (int i = 0; i < NumVertLvls; ++i) { - RefArr2DI4(j, i) = i + j; + for (int i = 0; i < NumCells; ++i) { + RefArr1DI4(i) = i; } - } - parallel_for( - Bounds<2>(NumCells, NumVertLvls), - YAKL_LAMBDA(int j, int i) { TstArr2DI4(j, i) = i + j; }); + parallelFor( + {NumCells}, KOKKOS_LAMBDA(int i) { TstArr1DI4(i) = i; }); - yakl::fence(); - auto TstHost2DI4 = TstArr2DI4.createHostCopy(); + Kokkos::fence(); - icount = 0; - for (int j = 0; j < NumCells; ++j) { - for (int i = 0; i < NumVertLvls; ++i) { - if (TstHost2DI4(j, i) != RefArr2DI4(j, i)) + auto TstHost1DI4 = createHostCopy(TstArr1DI4); + + int icount = 0; + for (int i = 0; i < NumCells; ++i) { + if (TstHost1DI4(i) != RefArr1DI4(i)) ++icount; } - } - TstHost2DI4.deallocate(); - RefArr2DI4.deallocate(); - TstArr2DI4.deallocate(); - if (icount == 0) - std::cout << "YAKL 2DI4 test: PASS" << std::endl; - else - std::cout << "YAKL 2DI4 test: FAIL" << std::endl; + if (icount == 0) + std::cout << "Kokkos 1DI4 test: PASS" << std::endl; + else + std::cout << "Kokkos 1DI4 test: FAIL" << std::endl; - // Test for 3DI4 - OMEGA::Array3DI4 TstArr3DI4("TstArr", NumTracers, NumCells, NumVertLvls); - OMEGA::ArrayHost3DI4 RefArr3DI4("RefArr", NumTracers, NumCells, NumVertLvls); + // Test for 2DI4 + Array2DI4 TstArr2DI4("TstArr2DI4", NumCells, NumVertLvls); + HostArray2DI4 RefArr2DI4("RefArr2DI4", NumCells, NumVertLvls); - for (int k = 0; k < NumTracers; ++k) { for (int j = 0; j < NumCells; ++j) { for (int i = 0; i < NumVertLvls; ++i) { - RefArr3DI4(k, j, i) = i + j + k; + RefArr2DI4(j, i) = i + j; } } - } - parallel_for( - Bounds<3>(NumTracers, NumCells, NumVertLvls), - YAKL_LAMBDA(int k, int j, int i) { TstArr3DI4(k, j, i) = i + j + k; }); + parallelFor( + {NumCells, NumVertLvls}, + KOKKOS_LAMBDA(int j, int i) { TstArr2DI4(j, i) = i + j; }); + + Kokkos::fence(); - yakl::fence(); - auto TstHost3DI4 = TstArr3DI4.createHostCopy(); + auto TstHost2DI4 = createHostCopy(TstArr2DI4); - icount = 0; - for (int k = 0; k < NumTracers; ++k) { + icount = 0; for (int j = 0; j < NumCells; ++j) { for (int i = 0; i < NumVertLvls; ++i) { - if (TstHost3DI4(k, j, i) != RefArr3DI4(k, j, i)) + if (TstHost2DI4(j, i) != RefArr2DI4(j, i)) ++icount; } } - } - TstHost3DI4.deallocate(); - RefArr3DI4.deallocate(); - TstArr3DI4.deallocate(); - if (icount == 0) - std::cout << "YAKL 3DI4 test: PASS" << std::endl; - else - std::cout << "YAKL 3DI4 test: FAIL" << std::endl; + if (icount == 0) + std::cout << "Kokkos 2DI4 test: PASS" << std::endl; + else + std::cout << "Kokkos 2DI4 test: FAIL" << std::endl; - // Test for 4DI4 - OMEGA::Array4DI4 TstArr4DI4("TstArr", NumTimeLvls, NumTracers, NumCells, - NumVertLvls); - OMEGA::ArrayHost4DI4 RefArr4DI4("RefArr", NumTimeLvls, NumTracers, NumCells, - NumVertLvls); + // Test for 3DI4 + Array3DI4 TstArr3DI4("TstArr3DI4", NumTracers, NumCells, NumVertLvls); + HostArray3DI4 RefArr3DI4("RefArr3DI4", NumTracers, NumCells, NumVertLvls); - for (int m = 0; m < NumTimeLvls; ++m) { for (int k = 0; k < NumTracers; ++k) { for (int j = 0; j < NumCells; ++j) { for (int i = 0; i < NumVertLvls; ++i) { - RefArr4DI4(m, k, j, i) = i + j + k + m; + RefArr3DI4(k, j, i) = i + j + k; } } } - } - parallel_for( - Bounds<4>(NumTimeLvls, NumTracers, NumCells, NumVertLvls), - YAKL_LAMBDA(int m, int k, int j, int i) { - TstArr4DI4(m, k, j, i) = i + j + k + m; - }); + parallelFor( + {NumTracers, NumCells, NumVertLvls}, + KOKKOS_LAMBDA(int k, int j, int i) { + TstArr3DI4(k, j, i) = i + j + k; + }); + + Kokkos::fence(); - yakl::fence(); - auto TstHost4DI4 = TstArr4DI4.createHostCopy(); + auto TstHost3DI4 = createHostCopy(TstArr3DI4); - icount = 0; - for (int m = 0; m < NumTimeLvls; ++m) { + icount = 0; for (int k = 0; k < NumTracers; ++k) { for (int j = 0; j < NumCells; ++j) { for (int i = 0; i < NumVertLvls; ++i) { - if (TstHost4DI4(m, k, j, i) != RefArr4DI4(m, k, j, i)) + if (TstHost3DI4(k, j, i) != RefArr3DI4(k, j, i)) ++icount; } } } - } - TstHost4DI4.deallocate(); - RefArr4DI4.deallocate(); - TstArr4DI4.deallocate(); - if (icount == 0) - std::cout << "YAKL 4DI4 test: PASS" << std::endl; - else - std::cout << "YAKL 4DI4 test: FAIL" << std::endl; + if (icount == 0) + std::cout << "Kokkos 3DI4 test: PASS" << std::endl; + else + std::cout << "Kokkos 3DI4 test: FAIL" << std::endl; - // Test for 5DI4 - OMEGA::Array5DI4 TstArr5DI4("TstArr", NumExtra, NumTimeLvls, NumTracers, - NumCells, NumVertLvls); - OMEGA::ArrayHost5DI4 RefArr5DI4("RefArr", NumExtra, NumTimeLvls, NumTracers, - NumCells, NumVertLvls); + // Test for 4DI4 + Array4DI4 TstArr4DI4("TstArr4DI4", NumTimeLvls, NumTracers, NumCells, + NumVertLvls); + HostArray4DI4 RefArr4DI4("RefArr4DI4", NumTimeLvls, NumTracers, NumCells, + NumVertLvls); - for (int n = 0; n < NumExtra; ++n) { for (int m = 0; m < NumTimeLvls; ++m) { for (int k = 0; k < NumTracers; ++k) { for (int j = 0; j < NumCells; ++j) { for (int i = 0; i < NumVertLvls; ++i) { - RefArr5DI4(n, m, k, j, i) = i + j + k + m + n; + RefArr4DI4(m, k, j, i) = i + j + k + m; } } } } - } - parallel_for( - Bounds<5>(NumExtra, NumTimeLvls, NumTracers, NumCells, NumVertLvls), - YAKL_LAMBDA(int n, int m, int k, int j, int i) { - TstArr5DI4(n, m, k, j, i) = i + j + k + m + n; - }); + parallelFor( + {NumTimeLvls, NumTracers, NumCells, NumVertLvls}, + KOKKOS_LAMBDA(int m, int k, int j, int i) { + TstArr4DI4(m, k, j, i) = i + j + k + m; + }); - yakl::fence(); - auto TstHost5DI4 = TstArr5DI4.createHostCopy(); + Kokkos::fence(); - icount = 0; - for (int n = 0; n < NumExtra; ++n) { + auto TstHost4DI4 = createHostCopy(TstArr4DI4); + + icount = 0; for (int m = 0; m < NumTimeLvls; ++m) { for (int k = 0; k < NumTracers; ++k) { for (int j = 0; j < NumCells; ++j) { for (int i = 0; i < NumVertLvls; ++i) { - if (TstHost5DI4(n, m, k, j, i) != RefArr5DI4(n, m, k, j, i)) + if (TstHost4DI4(m, k, j, i) != RefArr4DI4(m, k, j, i)) ++icount; } } } } - } - TstHost5DI4.deallocate(); - RefArr5DI4.deallocate(); - TstArr5DI4.deallocate(); - if (icount == 0) - std::cout << "YAKL 5DI4 test: PASS" << std::endl; - else - std::cout << "YAKL 5DI4 test: FAIL" << std::endl; + if (icount == 0) + std::cout << "Kokkos 4DI4 test: PASS" << std::endl; + else + std::cout << "Kokkos 4DI4 test: FAIL" << std::endl; - // Test for 1DI8 - OMEGA::Array1DI8 TstArr1DI8("TstArr", NumCells); - OMEGA::ArrayHost1DI8 RefArr1DI8("RefArr", NumCells); + // Test for 5DI4 + Array5DI4 TstArr5DI4("TstArr5DI4", NumExtra, NumTimeLvls, NumTracers, + NumCells, NumVertLvls); + HostArray5DI4 RefArr5DI4("RefArr5DI4", NumExtra, NumTimeLvls, NumTracers, + NumCells, NumVertLvls); - for (int i = 0; i < NumCells; ++i) { - RefArr1DI8(i) = i; - } + for (int n = 0; n < NumExtra; ++n) { + for (int m = 0; m < NumTimeLvls; ++m) { + for (int k = 0; k < NumTracers; ++k) { + for (int j = 0; j < NumCells; ++j) { + for (int i = 0; i < NumVertLvls; ++i) { + RefArr5DI4(n, m, k, j, i) = i + j + k + m + n; + } + } + } + } + } - parallel_for( - Bounds<1>(NumCells), YAKL_LAMBDA(int i) { TstArr1DI8(i) = i; }); + parallelFor( + {NumExtra, NumTimeLvls, NumTracers, NumCells, NumVertLvls}, + KOKKOS_LAMBDA(int n, int m, int k, int j, int i) { + TstArr5DI4(n, m, k, j, i) = i + j + k + m + n; + }); + + Kokkos::fence(); + + auto TstHost5DI4 = createHostCopy(TstArr5DI4); + + icount = 0; + for (int n = 0; n < NumExtra; ++n) { + for (int m = 0; m < NumTimeLvls; ++m) { + for (int k = 0; k < NumTracers; ++k) { + for (int j = 0; j < NumCells; ++j) { + for (int i = 0; i < NumVertLvls; ++i) { + if (TstHost5DI4(n, m, k, j, i) != + RefArr5DI4(n, m, k, j, i)) + ++icount; + } + } + } + } + } - yakl::fence(); - auto TstHost1DI8 = TstArr1DI8.createHostCopy(); + if (icount == 0) + std::cout << "Kokkos 5DI4 test: PASS" << std::endl; + else + std::cout << "Kokkos 5DI4 test: FAIL" << std::endl; - icount = 0; - for (int i = 0; i < NumCells; ++i) { - if (TstHost1DI8(i) != RefArr1DI8(i)) - ++icount; - } - TstHost1DI8.deallocate(); - RefArr1DI8.deallocate(); - TstArr1DI8.deallocate(); - - if (icount == 0) - std::cout << "YAKL 1DI8 test: PASS" << std::endl; - else - std::cout << "YAKL 1DI8 test: FAIL" << std::endl; - - // Test for 2DI8 - OMEGA::Array2DI8 TstArr2DI8("TstArr", NumCells, NumVertLvls); - OMEGA::ArrayHost2DI8 RefArr2DI8("RefArr", NumCells, NumVertLvls); - - for (int j = 0; j < NumCells; ++j) { - for (int i = 0; i < NumVertLvls; ++i) { - RefArr2DI8(j, i) = i + j; + // Test for 1DI8 + Array1DI8 TstArr1DI8("TstArr1DI8", NumCells); + HostArray1DI8 RefArr1DI8("RefArr1DI8", NumCells); + + for (int i = 0; i < NumCells; ++i) { + RefArr1DI8(i) = i; } - } - parallel_for( - Bounds<2>(NumCells, NumVertLvls), - YAKL_LAMBDA(int j, int i) { TstArr2DI8(j, i) = i + j; }); + parallelFor( + {NumCells}, KOKKOS_LAMBDA(int i) { TstArr1DI8(i) = i; }); - yakl::fence(); - auto TstHost2DI8 = TstArr2DI8.createHostCopy(); + Kokkos::fence(); - icount = 0; - for (int j = 0; j < NumCells; ++j) { - for (int i = 0; i < NumVertLvls; ++i) { - if (TstHost2DI8(j, i) != RefArr2DI8(j, i)) + auto TstHost1DI8 = createHostCopy(TstArr1DI8); + + icount = 0; + for (int i = 0; i < NumCells; ++i) { + if (TstHost1DI8(i) != RefArr1DI8(i)) ++icount; } - } - TstHost2DI8.deallocate(); - RefArr2DI8.deallocate(); - TstArr2DI8.deallocate(); - if (icount == 0) - std::cout << "YAKL 2DI8 test: PASS" << std::endl; - else - std::cout << "YAKL 2DI8 test: FAIL" << std::endl; + if (icount == 0) + std::cout << "Kokkos 1DI8 test: PASS" << std::endl; + else + std::cout << "Kokkos 1DI8 test: FAIL" << std::endl; - // Test for 3DI8 - OMEGA::Array3DI8 TstArr3DI8("TstArr", NumTracers, NumCells, NumVertLvls); - OMEGA::ArrayHost3DI8 RefArr3DI8("RefArr", NumTracers, NumCells, NumVertLvls); + // Test for 2DI8 + Array2DI8 TstArr2DI8("TstArr2DI8", NumCells, NumVertLvls); + HostArray2DI8 RefArr2DI8("RefArr2DI8", NumCells, NumVertLvls); - for (int k = 0; k < NumTracers; ++k) { for (int j = 0; j < NumCells; ++j) { for (int i = 0; i < NumVertLvls; ++i) { - RefArr3DI8(k, j, i) = i + j + k; + RefArr2DI8(j, i) = i + j; } } - } - parallel_for( - Bounds<3>(NumTracers, NumCells, NumVertLvls), - YAKL_LAMBDA(int k, int j, int i) { TstArr3DI8(k, j, i) = i + j + k; }); + parallelFor( + {NumCells, NumVertLvls}, + KOKKOS_LAMBDA(int j, int i) { TstArr2DI8(j, i) = i + j; }); - yakl::fence(); - auto TstHost3DI8 = TstArr3DI8.createHostCopy(); + Kokkos::fence(); - icount = 0; - for (int k = 0; k < NumTracers; ++k) { + auto TstHost2DI8 = createHostCopy(TstArr2DI8); + + icount = 0; for (int j = 0; j < NumCells; ++j) { for (int i = 0; i < NumVertLvls; ++i) { - if (TstHost3DI8(k, j, i) != RefArr3DI8(k, j, i)) + if (TstHost2DI8(j, i) != RefArr2DI8(j, i)) ++icount; } } - } - TstHost3DI8.deallocate(); - RefArr3DI8.deallocate(); - TstArr3DI8.deallocate(); - if (icount == 0) - std::cout << "YAKL 3DI8 test: PASS" << std::endl; - else - std::cout << "YAKL 3DI8 test: FAIL" << std::endl; + if (icount == 0) + std::cout << "Kokkos 2DI8 test: PASS" << std::endl; + else + std::cout << "Kokkos 2DI8 test: FAIL" << std::endl; - // Test for 4DI8 - OMEGA::Array4DI8 TstArr4DI8("TstArr", NumTimeLvls, NumTracers, NumCells, - NumVertLvls); - OMEGA::ArrayHost4DI8 RefArr4DI8("RefArr", NumTimeLvls, NumTracers, NumCells, - NumVertLvls); + // Test for 3DI8 + Array3DI8 TstArr3DI8("TstArr3DI8", NumTracers, NumCells, NumVertLvls); + HostArray3DI8 RefArr3DI8("RefArr3DI8", NumTracers, NumCells, NumVertLvls); - for (int m = 0; m < NumTimeLvls; ++m) { for (int k = 0; k < NumTracers; ++k) { for (int j = 0; j < NumCells; ++j) { for (int i = 0; i < NumVertLvls; ++i) { - RefArr4DI8(m, k, j, i) = i + j + k + m; + RefArr3DI8(k, j, i) = i + j + k; } } } - } - parallel_for( - Bounds<4>(NumTimeLvls, NumTracers, NumCells, NumVertLvls), - YAKL_LAMBDA(int m, int k, int j, int i) { - TstArr4DI8(m, k, j, i) = i + j + k + m; - }); + parallelFor( + {NumTracers, NumCells, NumVertLvls}, + KOKKOS_LAMBDA(int k, int j, int i) { + TstArr3DI8(k, j, i) = i + j + k; + }); - yakl::fence(); - auto TstHost4DI8 = TstArr4DI8.createHostCopy(); + Kokkos::fence(); - icount = 0; - for (int m = 0; m < NumTimeLvls; ++m) { + auto TstHost3DI8 = createHostCopy(TstArr3DI8); + + icount = 0; for (int k = 0; k < NumTracers; ++k) { for (int j = 0; j < NumCells; ++j) { for (int i = 0; i < NumVertLvls; ++i) { - if (TstHost4DI8(m, k, j, i) != RefArr4DI8(m, k, j, i)) + if (TstHost3DI8(k, j, i) != RefArr3DI8(k, j, i)) ++icount; } } } - } - TstHost4DI8.deallocate(); - RefArr4DI8.deallocate(); - TstArr4DI8.deallocate(); - if (icount == 0) - std::cout << "YAKL 4DI8 test: PASS" << std::endl; - else - std::cout << "YAKL 4DI8 test: FAIL" << std::endl; + if (icount == 0) + std::cout << "Kokkos 3DI8 test: PASS" << std::endl; + else + std::cout << "Kokkos 3DI8 test: FAIL" << std::endl; - // Test for 5DI8 - OMEGA::Array5DI8 TstArr5DI8("TstArr", NumExtra, NumTimeLvls, NumTracers, - NumCells, NumVertLvls); - OMEGA::ArrayHost5DI8 RefArr5DI8("RefArr", NumExtra, NumTimeLvls, NumTracers, - NumCells, NumVertLvls); + // Test for 4DI8 + Array4DI8 TstArr4DI8("TstArr4DI8", NumTimeLvls, NumTracers, NumCells, + NumVertLvls); + HostArray4DI8 RefArr4DI8("RefArr4DI8", NumTimeLvls, NumTracers, NumCells, + NumVertLvls); - for (int n = 0; n < NumExtra; ++n) { for (int m = 0; m < NumTimeLvls; ++m) { for (int k = 0; k < NumTracers; ++k) { for (int j = 0; j < NumCells; ++j) { for (int i = 0; i < NumVertLvls; ++i) { - RefArr5DI8(n, m, k, j, i) = i + j + k + m + n; + RefArr4DI8(m, k, j, i) = i + j + k + m; } } } } - } - parallel_for( - Bounds<5>(NumExtra, NumTimeLvls, NumTracers, NumCells, NumVertLvls), - YAKL_LAMBDA(int n, int m, int k, int j, int i) { - TstArr5DI8(n, m, k, j, i) = i + j + k + m + n; - }); + parallelFor( + {NumTimeLvls, NumTracers, NumCells, NumVertLvls}, + KOKKOS_LAMBDA(int m, int k, int j, int i) { + TstArr4DI8(m, k, j, i) = i + j + k + m; + }); + + Kokkos::fence(); - yakl::fence(); - auto TstHost5DI8 = TstArr5DI8.createHostCopy(); + auto TstHost4DI8 = createHostCopy(TstArr4DI8); - icount = 0; - for (int n = 0; n < NumExtra; ++n) { + icount = 0; for (int m = 0; m < NumTimeLvls; ++m) { for (int k = 0; k < NumTracers; ++k) { for (int j = 0; j < NumCells; ++j) { for (int i = 0; i < NumVertLvls; ++i) { - if (TstHost5DI8(n, m, k, j, i) != RefArr5DI8(n, m, k, j, i)) + if (TstHost4DI8(m, k, j, i) != RefArr4DI8(m, k, j, i)) ++icount; } } } } - } - TstHost5DI8.deallocate(); - RefArr5DI8.deallocate(); - TstArr5DI8.deallocate(); - if (icount == 0) - std::cout << "YAKL 5DI8 test: PASS" << std::endl; - else - std::cout << "YAKL 5DI8 test: FAIL" << std::endl; + if (icount == 0) + std::cout << "Kokkos 4DI8 test: PASS" << std::endl; + else + std::cout << "Kokkos 4DI8 test: FAIL" << std::endl; - // Test for 1DR4 - OMEGA::Array1DR4 TstArr1DR4("TstArr", NumCells); - OMEGA::ArrayHost1DR4 RefArr1DR4("RefArr", NumCells); + // Test for 5DI8 + Array5DI8 TstArr5DI8("TstArr5DI8", NumExtra, NumTimeLvls, NumTracers, + NumCells, NumVertLvls); + HostArray5DI8 RefArr5DI8("RefArr5DI8", NumExtra, NumTimeLvls, NumTracers, + NumCells, NumVertLvls); - for (int i = 0; i < NumCells; ++i) { - RefArr1DR4(i) = i; - } + for (int n = 0; n < NumExtra; ++n) { + for (int m = 0; m < NumTimeLvls; ++m) { + for (int k = 0; k < NumTracers; ++k) { + for (int j = 0; j < NumCells; ++j) { + for (int i = 0; i < NumVertLvls; ++i) { + RefArr5DI8(n, m, k, j, i) = i + j + k + m + n; + } + } + } + } + } - parallel_for( - Bounds<1>(NumCells), YAKL_LAMBDA(int i) { TstArr1DR4(i) = i; }); + parallelFor( + {NumExtra, NumTimeLvls, NumTracers, NumCells, NumVertLvls}, + KOKKOS_LAMBDA(int n, int m, int k, int j, int i) { + TstArr5DI8(n, m, k, j, i) = i + j + k + m + n; + }); + + Kokkos::fence(); + + auto TstHost5DI8 = createHostCopy(TstArr5DI8); + + icount = 0; + for (int n = 0; n < NumExtra; ++n) { + for (int m = 0; m < NumTimeLvls; ++m) { + for (int k = 0; k < NumTracers; ++k) { + for (int j = 0; j < NumCells; ++j) { + for (int i = 0; i < NumVertLvls; ++i) { + if (TstHost5DI8(n, m, k, j, i) != + RefArr5DI8(n, m, k, j, i)) + ++icount; + } + } + } + } + } - yakl::fence(); - auto TstHost1DR4 = TstArr1DR4.createHostCopy(); + if (icount == 0) + std::cout << "Kokkos 5DI8 test: PASS" << std::endl; + else + std::cout << "Kokkos 5DI8 test: FAIL" << std::endl; - icount = 0; - for (int i = 0; i < NumCells; ++i) { - if (TstHost1DR4(i) != RefArr1DR4(i)) - ++icount; - } - TstHost1DR4.deallocate(); - RefArr1DR4.deallocate(); - TstArr1DR4.deallocate(); - - if (icount == 0) - std::cout << "YAKL 1DR4 test: PASS" << std::endl; - else - std::cout << "YAKL 1DR4 test: FAIL" << std::endl; - - // Test for 2DR4 - OMEGA::Array2DR4 TstArr2DR4("TstArr", NumCells, NumVertLvls); - OMEGA::ArrayHost2DR4 RefArr2DR4("RefArr", NumCells, NumVertLvls); - - for (int j = 0; j < NumCells; ++j) { - for (int i = 0; i < NumVertLvls; ++i) { - RefArr2DR4(j, i) = i + j; + // Test for 1DR4 + Array1DR4 TstArr1DR4("TstArr1DR4", NumCells); + HostArray1DR4 RefArr1DR4("RefArr1DR4", NumCells); + + for (int i = 0; i < NumCells; ++i) { + RefArr1DR4(i) = i; } - } - parallel_for( - Bounds<2>(NumCells, NumVertLvls), - YAKL_LAMBDA(int j, int i) { TstArr2DR4(j, i) = i + j; }); + parallelFor( + {NumCells}, KOKKOS_LAMBDA(int i) { TstArr1DR4(i) = i; }); + + Kokkos::fence(); - yakl::fence(); - auto TstHost2DR4 = TstArr2DR4.createHostCopy(); + auto TstHost1DR4 = createHostCopy(TstArr1DR4); - icount = 0; - for (int j = 0; j < NumCells; ++j) { - for (int i = 0; i < NumVertLvls; ++i) { - if (TstHost2DR4(j, i) != RefArr2DR4(j, i)) + icount = 0; + for (int i = 0; i < NumCells; ++i) { + if (TstHost1DR4(i) != RefArr1DR4(i)) ++icount; } - } - TstHost2DR4.deallocate(); - RefArr2DR4.deallocate(); - TstArr2DR4.deallocate(); - if (icount == 0) - std::cout << "YAKL 2DR4 test: PASS" << std::endl; - else - std::cout << "YAKL 2DR4 test: FAIL" << std::endl; + if (icount == 0) + std::cout << "Kokkos 1DR4 test: PASS" << std::endl; + else + std::cout << "Kokkos 1DR4 test: FAIL" << std::endl; - // Test for 3DR4 - OMEGA::Array3DR4 TstArr3DR4("TstArr", NumTracers, NumCells, NumVertLvls); - OMEGA::ArrayHost3DR4 RefArr3DR4("RefArr", NumTracers, NumCells, NumVertLvls); + // Test for 2DR4 + Array2DR4 TstArr2DR4("TstArr2DR4", NumCells, NumVertLvls); + HostArray2DR4 RefArr2DR4("RefArr2DR4", NumCells, NumVertLvls); - for (int k = 0; k < NumTracers; ++k) { for (int j = 0; j < NumCells; ++j) { for (int i = 0; i < NumVertLvls; ++i) { - RefArr3DR4(k, j, i) = i + j + k; + RefArr2DR4(j, i) = i + j; } } - } - parallel_for( - Bounds<3>(NumTracers, NumCells, NumVertLvls), - YAKL_LAMBDA(int k, int j, int i) { TstArr3DR4(k, j, i) = i + j + k; }); + parallelFor( + {NumCells, NumVertLvls}, + KOKKOS_LAMBDA(int j, int i) { TstArr2DR4(j, i) = i + j; }); + + Kokkos::fence(); - yakl::fence(); - auto TstHost3DR4 = TstArr3DR4.createHostCopy(); + auto TstHost2DR4 = createHostCopy(TstArr2DR4); - icount = 0; - for (int k = 0; k < NumTracers; ++k) { + icount = 0; for (int j = 0; j < NumCells; ++j) { for (int i = 0; i < NumVertLvls; ++i) { - if (TstHost3DR4(k, j, i) != RefArr3DR4(k, j, i)) + if (TstHost2DR4(j, i) != RefArr2DR4(j, i)) ++icount; } } - } - TstHost3DR4.deallocate(); - RefArr3DR4.deallocate(); - TstArr3DR4.deallocate(); - if (icount == 0) - std::cout << "YAKL 3DR4 test: PASS" << std::endl; - else - std::cout << "YAKL 3DR4 test: FAIL" << std::endl; + if (icount == 0) + std::cout << "Kokkos 2DR4 test: PASS" << std::endl; + else + std::cout << "Kokkos 2DR4 test: FAIL" << std::endl; - // Test for 4DR4 - OMEGA::Array4DR4 TstArr4DR4("TstArr", NumTimeLvls, NumTracers, NumCells, - NumVertLvls); - OMEGA::ArrayHost4DR4 RefArr4DR4("RefArr", NumTimeLvls, NumTracers, NumCells, - NumVertLvls); + // Test for 3DR4 + Array3DR4 TstArr3DR4("TstArr3DR4", NumTracers, NumCells, NumVertLvls); + HostArray3DR4 RefArr3DR4("RefArr3DR4", NumTracers, NumCells, NumVertLvls); - for (int m = 0; m < NumTimeLvls; ++m) { for (int k = 0; k < NumTracers; ++k) { for (int j = 0; j < NumCells; ++j) { for (int i = 0; i < NumVertLvls; ++i) { - RefArr4DR4(m, k, j, i) = i + j + k + m; + RefArr3DR4(k, j, i) = i + j + k; } } } - } - parallel_for( - Bounds<4>(NumTimeLvls, NumTracers, NumCells, NumVertLvls), - YAKL_LAMBDA(int m, int k, int j, int i) { - TstArr4DR4(m, k, j, i) = i + j + k + m; - }); + parallelFor( + {NumTracers, NumCells, NumVertLvls}, + KOKKOS_LAMBDA(int k, int j, int i) { + TstArr3DR4(k, j, i) = i + j + k; + }); - yakl::fence(); - auto TstHost4DR4 = TstArr4DR4.createHostCopy(); + Kokkos::fence(); - icount = 0; - for (int m = 0; m < NumTimeLvls; ++m) { + auto TstHost3DR4 = createHostCopy(TstArr3DR4); + + icount = 0; for (int k = 0; k < NumTracers; ++k) { for (int j = 0; j < NumCells; ++j) { for (int i = 0; i < NumVertLvls; ++i) { - if (TstHost4DR4(m, k, j, i) != RefArr4DR4(m, k, j, i)) + if (TstHost3DR4(k, j, i) != RefArr3DR4(k, j, i)) ++icount; } } } - } - TstHost4DR4.deallocate(); - RefArr4DR4.deallocate(); - TstArr4DR4.deallocate(); - if (icount == 0) - std::cout << "YAKL 4DR4 test: PASS" << std::endl; - else - std::cout << "YAKL 4DR4 test: FAIL" << std::endl; + if (icount == 0) + std::cout << "Kokkos 3DR4 test: PASS" << std::endl; + else + std::cout << "Kokkos 3DR4 test: FAIL" << std::endl; - // Test for 5DR4 - OMEGA::Array5DR4 TstArr5DR4("TstArr", NumExtra, NumTimeLvls, NumTracers, - NumCells, NumVertLvls); - OMEGA::ArrayHost5DR4 RefArr5DR4("RefArr", NumExtra, NumTimeLvls, NumTracers, - NumCells, NumVertLvls); + // Test for 4DR4 + Array4DR4 TstArr4DR4("TstArr4DR4", NumTimeLvls, NumTracers, NumCells, + NumVertLvls); + HostArray4DR4 RefArr4DR4("RefArr4DR4", NumTimeLvls, NumTracers, NumCells, + NumVertLvls); - for (int n = 0; n < NumExtra; ++n) { for (int m = 0; m < NumTimeLvls; ++m) { for (int k = 0; k < NumTracers; ++k) { for (int j = 0; j < NumCells; ++j) { for (int i = 0; i < NumVertLvls; ++i) { - RefArr5DR4(n, m, k, j, i) = i + j + k + m + n; + RefArr4DR4(m, k, j, i) = i + j + k + m; } } } } - } - parallel_for( - Bounds<5>(NumExtra, NumTimeLvls, NumTracers, NumCells, NumVertLvls), - YAKL_LAMBDA(int n, int m, int k, int j, int i) { - TstArr5DR4(n, m, k, j, i) = i + j + k + m + n; - }); + parallelFor( + {NumTimeLvls, NumTracers, NumCells, NumVertLvls}, + KOKKOS_LAMBDA(int m, int k, int j, int i) { + TstArr4DR4(m, k, j, i) = i + j + k + m; + }); + + Kokkos::fence(); - yakl::fence(); - auto TstHost5DR4 = TstArr5DR4.createHostCopy(); + auto TstHost4DR4 = createHostCopy(TstArr4DR4); - icount = 0; - for (int n = 0; n < NumExtra; ++n) { + icount = 0; for (int m = 0; m < NumTimeLvls; ++m) { for (int k = 0; k < NumTracers; ++k) { for (int j = 0; j < NumCells; ++j) { for (int i = 0; i < NumVertLvls; ++i) { - if (TstHost5DR4(n, m, k, j, i) != RefArr5DR4(n, m, k, j, i)) + if (TstHost4DR4(m, k, j, i) != RefArr4DR4(m, k, j, i)) ++icount; } } } } - } - TstHost5DR4.deallocate(); - RefArr5DR4.deallocate(); - TstArr5DR4.deallocate(); - if (icount == 0) - std::cout << "YAKL 5DR4 test: PASS" << std::endl; - else - std::cout << "YAKL 5DR4 test: FAIL" << std::endl; + if (icount == 0) + std::cout << "Kokkos 4DR4 test: PASS" << std::endl; + else + std::cout << "Kokkos 4DR4 test: FAIL" << std::endl; - // Test for 1DR8 - OMEGA::Array1DR8 TstArr1DR8("TstArr", NumCells); - OMEGA::ArrayHost1DR8 RefArr1DR8("RefArr", NumCells); + // Test for 5DR4 + Array5DR4 TstArr5DR4("TstArr5DR4", NumExtra, NumTimeLvls, NumTracers, + NumCells, NumVertLvls); + HostArray5DR4 RefArr5DR4("RefArr5DR4", NumExtra, NumTimeLvls, NumTracers, + NumCells, NumVertLvls); - for (int i = 0; i < NumCells; ++i) { - RefArr1DR8(i) = i; - } + for (int n = 0; n < NumExtra; ++n) { + for (int m = 0; m < NumTimeLvls; ++m) { + for (int k = 0; k < NumTracers; ++k) { + for (int j = 0; j < NumCells; ++j) { + for (int i = 0; i < NumVertLvls; ++i) { + RefArr5DR4(n, m, k, j, i) = i + j + k + m + n; + } + } + } + } + } - parallel_for( - Bounds<1>(NumCells), YAKL_LAMBDA(int i) { TstArr1DR8(i) = i; }); + parallelFor( + {NumExtra, NumTimeLvls, NumTracers, NumCells, NumVertLvls}, + KOKKOS_LAMBDA(int n, int m, int k, int j, int i) { + TstArr5DR4(n, m, k, j, i) = i + j + k + m + n; + }); + + Kokkos::fence(); + + auto TstHost5DR4 = createHostCopy(TstArr5DR4); + + icount = 0; + for (int n = 0; n < NumExtra; ++n) { + for (int m = 0; m < NumTimeLvls; ++m) { + for (int k = 0; k < NumTracers; ++k) { + for (int j = 0; j < NumCells; ++j) { + for (int i = 0; i < NumVertLvls; ++i) { + if (TstHost5DR4(n, m, k, j, i) != + RefArr5DR4(n, m, k, j, i)) + ++icount; + } + } + } + } + } - yakl::fence(); - auto TstHost1DR8 = TstArr1DR8.createHostCopy(); + if (icount == 0) + std::cout << "Kokkos 5DR4 test: PASS" << std::endl; + else + std::cout << "Kokkos 5DR4 test: FAIL" << std::endl; - icount = 0; - for (int i = 0; i < NumCells; ++i) { - if (TstHost1DR8(i) != RefArr1DR8(i)) - ++icount; - } - TstHost1DR8.deallocate(); - RefArr1DR8.deallocate(); - TstArr1DR8.deallocate(); - - if (icount == 0) - std::cout << "YAKL 1DR8 test: PASS" << std::endl; - else - std::cout << "YAKL 1DR8 test: FAIL" << std::endl; - - // Test for 2DR8 - OMEGA::Array2DR8 TstArr2DR8("TstArr", NumCells, NumVertLvls); - OMEGA::ArrayHost2DR8 RefArr2DR8("RefArr", NumCells, NumVertLvls); - - for (int j = 0; j < NumCells; ++j) { - for (int i = 0; i < NumVertLvls; ++i) { - RefArr2DR8(j, i) = i + j; + // Test for 1DR8 + Array1DR8 TstArr1DR8("TstArr1DR8", NumCells); + HostArray1DR8 RefArr1DR8("RefArr1DR8", NumCells); + + for (int i = 0; i < NumCells; ++i) { + RefArr1DR8(i) = i; } - } - parallel_for( - Bounds<2>(NumCells, NumVertLvls), - YAKL_LAMBDA(int j, int i) { TstArr2DR8(j, i) = i + j; }); + parallelFor( + {NumCells}, KOKKOS_LAMBDA(int i) { TstArr1DR8(i) = i; }); + + Kokkos::fence(); - yakl::fence(); - auto TstHost2DR8 = TstArr2DR8.createHostCopy(); + auto TstHost1DR8 = createHostCopy(TstArr1DR8); - icount = 0; - for (int j = 0; j < NumCells; ++j) { - for (int i = 0; i < NumVertLvls; ++i) { - if (TstHost2DR8(j, i) != RefArr2DR8(j, i)) + icount = 0; + for (int i = 0; i < NumCells; ++i) { + if (TstHost1DR8(i) != RefArr1DR8(i)) ++icount; } - } - TstHost2DR8.deallocate(); - RefArr2DR8.deallocate(); - TstArr2DR8.deallocate(); - if (icount == 0) - std::cout << "YAKL 2DR8 test: PASS" << std::endl; - else - std::cout << "YAKL 2DR8 test: FAIL" << std::endl; + if (icount == 0) + std::cout << "Kokkos 1DR8 test: PASS" << std::endl; + else + std::cout << "Kokkos 1DR8 test: FAIL" << std::endl; - // Test for 3DR8 - OMEGA::Array3DR8 TstArr3DR8("TstArr", NumTracers, NumCells, NumVertLvls); - OMEGA::ArrayHost3DR8 RefArr3DR8("RefArr", NumTracers, NumCells, NumVertLvls); + // Test for 2DR8 + Array2DR8 TstArr2DR8("TstArr2DR8", NumCells, NumVertLvls); + HostArray2DR8 RefArr2DR8("RefArr2DR8", NumCells, NumVertLvls); - for (int k = 0; k < NumTracers; ++k) { for (int j = 0; j < NumCells; ++j) { for (int i = 0; i < NumVertLvls; ++i) { - RefArr3DR8(k, j, i) = i + j + k; + RefArr2DR8(j, i) = i + j; } } - } - parallel_for( - Bounds<3>(NumTracers, NumCells, NumVertLvls), - YAKL_LAMBDA(int k, int j, int i) { TstArr3DR8(k, j, i) = i + j + k; }); + parallelFor( + {NumCells, NumVertLvls}, + KOKKOS_LAMBDA(int j, int i) { TstArr2DR8(j, i) = i + j; }); - yakl::fence(); - auto TstHost3DR8 = TstArr3DR8.createHostCopy(); + Kokkos::fence(); - icount = 0; - for (int k = 0; k < NumTracers; ++k) { + auto TstHost2DR8 = createHostCopy(TstArr2DR8); + + icount = 0; for (int j = 0; j < NumCells; ++j) { for (int i = 0; i < NumVertLvls; ++i) { - if (TstHost3DR8(k, j, i) != RefArr3DR8(k, j, i)) + if (TstHost2DR8(j, i) != RefArr2DR8(j, i)) ++icount; } } - } - TstHost3DR8.deallocate(); - RefArr3DR8.deallocate(); - TstArr3DR8.deallocate(); - if (icount == 0) - std::cout << "YAKL 3DR8 test: PASS" << std::endl; - else - std::cout << "YAKL 3DR8 test: FAIL" << std::endl; + if (icount == 0) + std::cout << "Kokkos 2DR8 test: PASS" << std::endl; + else + std::cout << "Kokkos 2DR8 test: FAIL" << std::endl; - // Test for 4DR8 - OMEGA::Array4DR8 TstArr4DR8("TstArr", NumTimeLvls, NumTracers, NumCells, - NumVertLvls); - OMEGA::ArrayHost4DR8 RefArr4DR8("RefArr", NumTimeLvls, NumTracers, NumCells, - NumVertLvls); + // Test for 3DR8 + Array3DR8 TstArr3DR8("TstArr3DR8", NumTracers, NumCells, NumVertLvls); + HostArray3DR8 RefArr3DR8("RefArr3DR8", NumTracers, NumCells, NumVertLvls); - for (int m = 0; m < NumTimeLvls; ++m) { for (int k = 0; k < NumTracers; ++k) { for (int j = 0; j < NumCells; ++j) { for (int i = 0; i < NumVertLvls; ++i) { - RefArr4DR8(m, k, j, i) = i + j + k + m; + RefArr3DR8(k, j, i) = i + j + k; } } } - } - parallel_for( - Bounds<4>(NumTimeLvls, NumTracers, NumCells, NumVertLvls), - YAKL_LAMBDA(int m, int k, int j, int i) { - TstArr4DR8(m, k, j, i) = i + j + k + m; - }); + parallelFor( + {NumTracers, NumCells, NumVertLvls}, + KOKKOS_LAMBDA(int k, int j, int i) { + TstArr3DR8(k, j, i) = i + j + k; + }); + + Kokkos::fence(); - yakl::fence(); - auto TstHost4DR8 = TstArr4DR8.createHostCopy(); + auto TstHost3DR8 = createHostCopy(TstArr3DR8); - icount = 0; - for (int m = 0; m < NumTimeLvls; ++m) { + icount = 0; for (int k = 0; k < NumTracers; ++k) { for (int j = 0; j < NumCells; ++j) { for (int i = 0; i < NumVertLvls; ++i) { - if (TstHost4DR8(m, k, j, i) != RefArr4DR8(m, k, j, i)) + if (TstHost3DR8(k, j, i) != RefArr3DR8(k, j, i)) ++icount; } } } - } - TstHost4DR8.deallocate(); - RefArr4DR8.deallocate(); - TstArr4DR8.deallocate(); - if (icount == 0) - std::cout << "YAKL 4DR8 test: PASS" << std::endl; - else - std::cout << "YAKL 4DR8 test: FAIL" << std::endl; + if (icount == 0) + std::cout << "Kokkos 3DR8 test: PASS" << std::endl; + else + std::cout << "Kokkos 3DR8 test: FAIL" << std::endl; - // Test for 5DR8 - OMEGA::Array5DR8 TstArr5DR8("TstArr", NumExtra, NumTimeLvls, NumTracers, - NumCells, NumVertLvls); - OMEGA::ArrayHost5DR8 RefArr5DR8("RefArr", NumExtra, NumTimeLvls, NumTracers, - NumCells, NumVertLvls); + // Test for 4DR8 + Array4DR8 TstArr4DR8("TstArr4DR8", NumTimeLvls, NumTracers, NumCells, + NumVertLvls); + HostArray4DR8 RefArr4DR8("RefArr4DR8", NumTimeLvls, NumTracers, NumCells, + NumVertLvls); - for (int n = 0; n < NumExtra; ++n) { for (int m = 0; m < NumTimeLvls; ++m) { for (int k = 0; k < NumTracers; ++k) { for (int j = 0; j < NumCells; ++j) { for (int i = 0; i < NumVertLvls; ++i) { - RefArr5DR8(n, m, k, j, i) = i + j + k + m + n; + RefArr4DR8(m, k, j, i) = i + j + k + m; } } } } - } - parallel_for( - Bounds<5>(NumExtra, NumTimeLvls, NumTracers, NumCells, NumVertLvls), - YAKL_LAMBDA(int n, int m, int k, int j, int i) { - TstArr5DR8(n, m, k, j, i) = i + j + k + m + n; - }); + parallelFor( + {NumTimeLvls, NumTracers, NumCells, NumVertLvls}, + KOKKOS_LAMBDA(int m, int k, int j, int i) { + TstArr4DR8(m, k, j, i) = i + j + k + m; + }); - yakl::fence(); - auto TstHost5DR8 = TstArr5DR8.createHostCopy(); + Kokkos::fence(); - icount = 0; - for (int n = 0; n < NumExtra; ++n) { + auto TstHost4DR8 = createHostCopy(TstArr4DR8); + + icount = 0; for (int m = 0; m < NumTimeLvls; ++m) { for (int k = 0; k < NumTracers; ++k) { for (int j = 0; j < NumCells; ++j) { for (int i = 0; i < NumVertLvls; ++i) { - if (TstHost5DR8(n, m, k, j, i) != RefArr5DR8(n, m, k, j, i)) + if (TstHost4DR8(m, k, j, i) != RefArr4DR8(m, k, j, i)) ++icount; } } } } - } - TstHost5DR8.deallocate(); - RefArr5DR8.deallocate(); - TstArr5DR8.deallocate(); - if (icount == 0) - std::cout << "YAKL 5DR8 test: PASS" << std::endl; - else - std::cout << "YAKL 5DR8 test: FAIL" << std::endl; + if (icount == 0) + std::cout << "Kokkos 4DR8 test: PASS" << std::endl; + else + std::cout << "Kokkos 4DR8 test: FAIL" << std::endl; - // Test for 1DReal - OMEGA::Array1DReal TstArr1DReal("TstArr", NumCells); - OMEGA::ArrayHost1DReal RefArr1DReal("RefArr", NumCells); + // Test for 5DR8 + Array5DR8 TstArr5DR8("TstArr5DR8", NumExtra, NumTimeLvls, NumTracers, + NumCells, NumVertLvls); + HostArray5DR8 RefArr5DR8("RefArr5DR8", NumExtra, NumTimeLvls, NumTracers, + NumCells, NumVertLvls); - for (int i = 0; i < NumCells; ++i) { - RefArr1DReal(i) = i; - } + for (int n = 0; n < NumExtra; ++n) { + for (int m = 0; m < NumTimeLvls; ++m) { + for (int k = 0; k < NumTracers; ++k) { + for (int j = 0; j < NumCells; ++j) { + for (int i = 0; i < NumVertLvls; ++i) { + RefArr5DR8(n, m, k, j, i) = i + j + k + m + n; + } + } + } + } + } - parallel_for( - Bounds<1>(NumCells), YAKL_LAMBDA(int i) { TstArr1DReal(i) = i; }); + parallelFor( + {NumExtra, NumTimeLvls, NumTracers, NumCells, NumVertLvls}, + KOKKOS_LAMBDA(int n, int m, int k, int j, int i) { + TstArr5DR8(n, m, k, j, i) = i + j + k + m + n; + }); + + Kokkos::fence(); + + auto TstHost5DR8 = createHostCopy(TstArr5DR8); + + icount = 0; + for (int n = 0; n < NumExtra; ++n) { + for (int m = 0; m < NumTimeLvls; ++m) { + for (int k = 0; k < NumTracers; ++k) { + for (int j = 0; j < NumCells; ++j) { + for (int i = 0; i < NumVertLvls; ++i) { + if (TstHost5DR8(n, m, k, j, i) != + RefArr5DR8(n, m, k, j, i)) + ++icount; + } + } + } + } + } - yakl::fence(); - auto TstHost1DReal = TstArr1DReal.createHostCopy(); + if (icount == 0) + std::cout << "Kokkos 5DR8 test: PASS" << std::endl; + else + std::cout << "Kokkos 5DR8 test: FAIL" << std::endl; - icount = 0; - for (int i = 0; i < NumCells; ++i) { - if (TstHost1DReal(i) != RefArr1DReal(i)) - ++icount; - } - TstHost1DReal.deallocate(); - RefArr1DReal.deallocate(); - TstArr1DReal.deallocate(); - - if (icount == 0) - std::cout << "YAKL 1DReal test: PASS" << std::endl; - else - std::cout << "YAKL 1DReal test: FAIL" << std::endl; - - // Test for 2DReal - OMEGA::Array2DReal TstArr2DReal("TstArr", NumCells, NumVertLvls); - OMEGA::ArrayHost2DReal RefArr2DReal("RefArr", NumCells, NumVertLvls); - - for (int j = 0; j < NumCells; ++j) { - for (int i = 0; i < NumVertLvls; ++i) { - RefArr2DReal(j, i) = i + j; + // Test for 1DReal + Array1DReal TstArr1DReal("TstArr1DReal", NumCells); + HostArray1DReal RefArr1DReal("RefArr1DReal", NumCells); + + for (int i = 0; i < NumCells; ++i) { + RefArr1DReal(i) = i; } - } - parallel_for( - Bounds<2>(NumCells, NumVertLvls), - YAKL_LAMBDA(int j, int i) { TstArr2DReal(j, i) = i + j; }); + parallelFor( + {NumCells}, KOKKOS_LAMBDA(int i) { TstArr1DReal(i) = i; }); - yakl::fence(); - auto TstHost2DReal = TstArr2DReal.createHostCopy(); + Kokkos::fence(); - icount = 0; - for (int j = 0; j < NumCells; ++j) { - for (int i = 0; i < NumVertLvls; ++i) { - if (TstHost2DReal(j, i) != RefArr2DReal(j, i)) + auto TstHost1DReal = createHostCopy(TstArr1DReal); + + icount = 0; + for (int i = 0; i < NumCells; ++i) { + if (TstHost1DReal(i) != RefArr1DReal(i)) ++icount; } - } - TstHost2DReal.deallocate(); - RefArr2DReal.deallocate(); - TstArr2DReal.deallocate(); - if (icount == 0) - std::cout << "YAKL 2DReal test: PASS" << std::endl; - else - std::cout << "YAKL 2DReal test: FAIL" << std::endl; + if (icount == 0) + std::cout << "Kokkos 1DReal test: PASS" << std::endl; + else + std::cout << "Kokkos 1DReal test: FAIL" << std::endl; - // Test for 3DReal - OMEGA::Array3DReal TstArr3DReal("TstArr", NumTracers, NumCells, NumVertLvls); - OMEGA::ArrayHost3DReal RefArr3DReal("RefArr", NumTracers, NumCells, - NumVertLvls); + // Test for 2DReal + Array2DReal TstArr2DReal("TstArr2DReal", NumCells, NumVertLvls); + HostArray2DReal RefArr2DReal("RefArr2DReal", NumCells, NumVertLvls); - for (int k = 0; k < NumTracers; ++k) { for (int j = 0; j < NumCells; ++j) { for (int i = 0; i < NumVertLvls; ++i) { - RefArr3DReal(k, j, i) = i + j + k; + RefArr2DReal(j, i) = i + j; } } - } - parallel_for( - Bounds<3>(NumTracers, NumCells, NumVertLvls), - YAKL_LAMBDA(int k, int j, int i) { TstArr3DReal(k, j, i) = i + j + k; }); + parallelFor( + {NumCells, NumVertLvls}, + KOKKOS_LAMBDA(int j, int i) { TstArr2DReal(j, i) = i + j; }); + + Kokkos::fence(); - yakl::fence(); - auto TstHost3DReal = TstArr3DReal.createHostCopy(); + auto TstHost2DReal = createHostCopy(TstArr2DReal); - icount = 0; - for (int k = 0; k < NumTracers; ++k) { + icount = 0; for (int j = 0; j < NumCells; ++j) { for (int i = 0; i < NumVertLvls; ++i) { - if (TstHost3DReal(k, j, i) != RefArr3DReal(k, j, i)) + if (TstHost2DReal(j, i) != RefArr2DReal(j, i)) ++icount; } } - } - TstHost3DReal.deallocate(); - RefArr3DReal.deallocate(); - TstArr3DReal.deallocate(); - if (icount == 0) - std::cout << "YAKL 3DReal test: PASS" << std::endl; - else - std::cout << "YAKL 3DReal test: FAIL" << std::endl; + if (icount == 0) + std::cout << "Kokkos 2DReal test: PASS" << std::endl; + else + std::cout << "Kokkos 2DReal test: FAIL" << std::endl; - // Test for 4DReal - OMEGA::Array4DReal TstArr4DReal("TstArr", NumTimeLvls, NumTracers, NumCells, + // Test for 3DReal + Array3DReal TstArr3DReal("TstArr3DReal", NumTracers, NumCells, + NumVertLvls); + HostArray3DReal RefArr3DReal("RefArr3DReal", NumTracers, NumCells, NumVertLvls); - OMEGA::ArrayHost4DReal RefArr4DReal("RefArr", NumTimeLvls, NumTracers, - NumCells, NumVertLvls); - for (int m = 0; m < NumTimeLvls; ++m) { for (int k = 0; k < NumTracers; ++k) { for (int j = 0; j < NumCells; ++j) { for (int i = 0; i < NumVertLvls; ++i) { - RefArr4DReal(m, k, j, i) = i + j + k + m; + RefArr3DReal(k, j, i) = i + j + k; } } } - } - parallel_for( - Bounds<4>(NumTimeLvls, NumTracers, NumCells, NumVertLvls), - YAKL_LAMBDA(int m, int k, int j, int i) { - TstArr4DReal(m, k, j, i) = i + j + k + m; - }); + parallelFor( + {NumTracers, NumCells, NumVertLvls}, + KOKKOS_LAMBDA(int k, int j, int i) { + TstArr3DReal(k, j, i) = i + j + k; + }); + + Kokkos::fence(); - yakl::fence(); - auto TstHost4DReal = TstArr4DReal.createHostCopy(); + auto TstHost3DReal = createHostCopy(TstArr3DReal); - icount = 0; - for (int m = 0; m < NumTimeLvls; ++m) { + icount = 0; for (int k = 0; k < NumTracers; ++k) { for (int j = 0; j < NumCells; ++j) { for (int i = 0; i < NumVertLvls; ++i) { - if (TstHost4DReal(m, k, j, i) != RefArr4DReal(m, k, j, i)) + if (TstHost3DReal(k, j, i) != RefArr3DReal(k, j, i)) ++icount; } } } - } - TstHost4DReal.deallocate(); - RefArr4DReal.deallocate(); - TstArr4DReal.deallocate(); - if (icount == 0) - std::cout << "YAKL 4DReal test: PASS" << std::endl; - else - std::cout << "YAKL 4DReal test: FAIL" << std::endl; + if (icount == 0) + std::cout << "Kokkos 3DReal test: PASS" << std::endl; + else + std::cout << "Kokkos 3DReal test: FAIL" << std::endl; - // Test for 5DReal - OMEGA::Array5DReal TstArr5DReal("TstArr", NumExtra, NumTimeLvls, NumTracers, + // Test for 4DReal + Array4DReal TstArr4DReal("TstArr4DReal", NumTimeLvls, NumTracers, + NumCells, NumVertLvls); + HostArray4DReal RefArr4DReal("RefArr4DReal", NumTimeLvls, NumTracers, NumCells, NumVertLvls); - OMEGA::ArrayHost5DReal RefArr5DReal("RefArr", NumExtra, NumTimeLvls, - NumTracers, NumCells, NumVertLvls); - for (int n = 0; n < NumExtra; ++n) { for (int m = 0; m < NumTimeLvls; ++m) { for (int k = 0; k < NumTracers; ++k) { for (int j = 0; j < NumCells; ++j) { for (int i = 0; i < NumVertLvls; ++i) { - RefArr5DReal(n, m, k, j, i) = i + j + k + m + n; + RefArr4DReal(m, k, j, i) = i + j + k + m; } } } } - } - parallel_for( - Bounds<5>(NumExtra, NumTimeLvls, NumTracers, NumCells, NumVertLvls), - YAKL_LAMBDA(int n, int m, int k, int j, int i) { - TstArr5DReal(n, m, k, j, i) = i + j + k + m + n; - }); + parallelFor( + {NumTimeLvls, NumTracers, NumCells, NumVertLvls}, + KOKKOS_LAMBDA(int m, int k, int j, int i) { + TstArr4DReal(m, k, j, i) = i + j + k + m; + }); - yakl::fence(); - auto TstHost5DReal = TstArr5DReal.createHostCopy(); + Kokkos::fence(); - icount = 0; - for (int n = 0; n < NumExtra; ++n) { + auto TstHost4DReal = createHostCopy(TstArr4DReal); + + icount = 0; for (int m = 0; m < NumTimeLvls; ++m) { for (int k = 0; k < NumTracers; ++k) { for (int j = 0; j < NumCells; ++j) { for (int i = 0; i < NumVertLvls; ++i) { - if (TstHost5DReal(n, m, k, j, i) != - RefArr5DReal(n, m, k, j, i)) + if (TstHost4DReal(m, k, j, i) != RefArr4DReal(m, k, j, i)) ++icount; } } } } + + if (icount == 0) + std::cout << "Kokkos 4DReal test: PASS" << std::endl; + else + std::cout << "Kokkos 4DReal test: FAIL" << std::endl; + + // Test for 5DReal + Array5DReal TstArr5DReal("TstArr5DReal", NumExtra, NumTimeLvls, + NumTracers, NumCells, NumVertLvls); + HostArray5DReal RefArr5DReal("RefArr5DReal", NumExtra, NumTimeLvls, + NumTracers, NumCells, NumVertLvls); + + for (int n = 0; n < NumExtra; ++n) { + for (int m = 0; m < NumTimeLvls; ++m) { + for (int k = 0; k < NumTracers; ++k) { + for (int j = 0; j < NumCells; ++j) { + for (int i = 0; i < NumVertLvls; ++i) { + RefArr5DReal(n, m, k, j, i) = i + j + k + m + n; + } + } + } + } + } + + parallelFor( + {NumExtra, NumTimeLvls, NumTracers, NumCells, NumVertLvls}, + KOKKOS_LAMBDA(int n, int m, int k, int j, int i) { + TstArr5DReal(n, m, k, j, i) = i + j + k + m + n; + }); + + Kokkos::fence(); + + auto TstHost5DReal = createHostCopy(TstArr5DReal); + + icount = 0; + for (int n = 0; n < NumExtra; ++n) { + for (int m = 0; m < NumTimeLvls; ++m) { + for (int k = 0; k < NumTracers; ++k) { + for (int j = 0; j < NumCells; ++j) { + for (int i = 0; i < NumVertLvls; ++i) { + if (TstHost5DReal(n, m, k, j, i) != + RefArr5DReal(n, m, k, j, i)) + ++icount; + } + } + } + } + } + + if (icount == 0) + std::cout << "Kokkos 5DReal test: PASS" << std::endl; + else + std::cout << "Kokkos 5DReal test: FAIL" << std::endl; + + // finalize environments + // MPI_Status status; } - TstHost5DReal.deallocate(); - RefArr5DReal.deallocate(); - TstArr5DReal.deallocate(); - - if (icount == 0) - std::cout << "YAKL 5DReal test: PASS" << std::endl; - else - std::cout << "YAKL 5DReal test: FAIL" << std::endl; - - // finalize environments - // MPI_Status status; - yakl::finalize(); + Kokkos::finalize(); MPI_Finalize(); } // end of main diff --git a/components/omega/test/base/DecompTest.cpp b/components/omega/test/base/DecompTest.cpp index 7e7435e31e62..b559b62a3759 100644 --- a/components/omega/test/base/DecompTest.cpp +++ b/components/omega/test/base/DecompTest.cpp @@ -56,87 +56,90 @@ int main(int argc, char *argv[]) { // Initialize the global MPI environment MPI_Init(&argc, &argv); - yakl::init(); - - // Call initialization routine to create the default decomposition - int Err = initDecompTest(); - if (Err != 0) - LOG_CRITICAL("DecompTest: Error initializing"); - - // Get MPI vars if needed - OMEGA::MachEnv *DefEnv = OMEGA::MachEnv::getDefaultEnv(); - MPI_Comm Comm = DefEnv->getComm(); - OMEGA::I4 MyTask = DefEnv->getMyTask(); - OMEGA::I4 NumTasks = DefEnv->getNumTasks(); - bool IsMaster = DefEnv->isMasterTask(); - - // Test retrieval of the default decomposition - OMEGA::Decomp *DefDecomp = OMEGA::Decomp::getDefault(); - if (DefDecomp) { // true if non-null ptr - LOG_INFO("DecompTest: Default decomp retrieval PASS"); - } else { - LOG_INFO("DecompTest: Default decomp retrieval FAIL"); - return -1; - } - - // Test that all Cells, Edges, Vertices are accounted for by - // summing the list of owned values by all tasks. The result should - // be the sum of the integers from 1 to NCellsGlobal (or edges, vertices). - OMEGA::I4 RefSumCells = 0; - OMEGA::I4 RefSumEdges = 0; - OMEGA::I4 RefSumVertices = 0; - for (int n = 0; n < DefDecomp->NCellsGlobal; ++n) - RefSumCells += n + 1; - for (int n = 0; n < DefDecomp->NEdgesGlobal; ++n) - RefSumEdges += n + 1; - for (int n = 0; n < DefDecomp->NVerticesGlobal; ++n) - RefSumVertices += n + 1; - OMEGA::I4 LocSumCells = 0; - OMEGA::I4 LocSumEdges = 0; - OMEGA::I4 LocSumVertices = 0; - OMEGA::ArrayHost1DI4 CellIDH = DefDecomp->CellIDH; - OMEGA::ArrayHost1DI4 EdgeIDH = DefDecomp->EdgeIDH; - OMEGA::ArrayHost1DI4 VertexIDH = DefDecomp->VertexIDH; - for (int n = 0; n < DefDecomp->NCellsOwned; ++n) - LocSumCells += CellIDH(n); - for (int n = 0; n < DefDecomp->NEdgesOwned; ++n) - LocSumEdges += EdgeIDH(n); - for (int n = 0; n < DefDecomp->NVerticesOwned; ++n) - LocSumVertices += VertexIDH(n); - OMEGA::I4 SumCells = 0; - OMEGA::I4 SumEdges = 0; - OMEGA::I4 SumVertices = 0; - Err = MPI_Allreduce(&LocSumCells, &SumCells, 1, MPI_INT32_T, MPI_SUM, Comm); - Err = MPI_Allreduce(&LocSumEdges, &SumEdges, 1, MPI_INT32_T, MPI_SUM, Comm); - Err = MPI_Allreduce(&LocSumVertices, &SumVertices, 1, MPI_INT32_T, MPI_SUM, - Comm); - - if (SumCells == RefSumCells) { - LOG_INFO("DecompTest: Sum cell ID test PASS"); - } else { - LOG_INFO("DecompTest: Sum cell ID test FAIL {} {}", SumCells, - RefSumCells); - } - if (SumEdges == RefSumEdges) { - LOG_INFO("DecompTest: Sum edge ID test PASS"); - } else { - LOG_INFO("DecompTest: Sum edge ID test FAIL {} {}", SumEdges, - RefSumEdges); - } - if (SumVertices == RefSumVertices) { - LOG_INFO("DecompTest: Sum vertex ID test PASS"); - } else { - LOG_INFO("DecompTest: Sum vertex ID test FAIL {} {}", SumVertices, - RefSumVertices); + Kokkos::initialize(); + { + // Call initialization routine to create the default decomposition + int Err = initDecompTest(); + if (Err != 0) + LOG_CRITICAL("DecompTest: Error initializing"); + + // Get MPI vars if needed + OMEGA::MachEnv *DefEnv = OMEGA::MachEnv::getDefaultEnv(); + MPI_Comm Comm = DefEnv->getComm(); + OMEGA::I4 MyTask = DefEnv->getMyTask(); + OMEGA::I4 NumTasks = DefEnv->getNumTasks(); + bool IsMaster = DefEnv->isMasterTask(); + + // Test retrieval of the default decomposition + OMEGA::Decomp *DefDecomp = OMEGA::Decomp::getDefault(); + if (DefDecomp) { // true if non-null ptr + LOG_INFO("DecompTest: Default decomp retrieval PASS"); + } else { + LOG_INFO("DecompTest: Default decomp retrieval FAIL"); + return -1; + } + + // Test that all Cells, Edges, Vertices are accounted for by + // summing the list of owned values by all tasks. The result should + // be the sum of the integers from 1 to NCellsGlobal (or edges, vertices). + OMEGA::I4 RefSumCells = 0; + OMEGA::I4 RefSumEdges = 0; + OMEGA::I4 RefSumVertices = 0; + for (int n = 0; n < DefDecomp->NCellsGlobal; ++n) + RefSumCells += n + 1; + for (int n = 0; n < DefDecomp->NEdgesGlobal; ++n) + RefSumEdges += n + 1; + for (int n = 0; n < DefDecomp->NVerticesGlobal; ++n) + RefSumVertices += n + 1; + OMEGA::I4 LocSumCells = 0; + OMEGA::I4 LocSumEdges = 0; + OMEGA::I4 LocSumVertices = 0; + OMEGA::HostArray1DI4 CellIDH = DefDecomp->CellIDH; + OMEGA::HostArray1DI4 EdgeIDH = DefDecomp->EdgeIDH; + OMEGA::HostArray1DI4 VertexIDH = DefDecomp->VertexIDH; + for (int n = 0; n < DefDecomp->NCellsOwned; ++n) + LocSumCells += CellIDH(n); + for (int n = 0; n < DefDecomp->NEdgesOwned; ++n) + LocSumEdges += EdgeIDH(n); + for (int n = 0; n < DefDecomp->NVerticesOwned; ++n) + LocSumVertices += VertexIDH(n); + OMEGA::I4 SumCells = 0; + OMEGA::I4 SumEdges = 0; + OMEGA::I4 SumVertices = 0; + Err = + MPI_Allreduce(&LocSumCells, &SumCells, 1, MPI_INT32_T, MPI_SUM, Comm); + Err = + MPI_Allreduce(&LocSumEdges, &SumEdges, 1, MPI_INT32_T, MPI_SUM, Comm); + Err = MPI_Allreduce(&LocSumVertices, &SumVertices, 1, MPI_INT32_T, + MPI_SUM, Comm); + + if (SumCells == RefSumCells) { + LOG_INFO("DecompTest: Sum cell ID test PASS"); + } else { + LOG_INFO("DecompTest: Sum cell ID test FAIL {} {}", SumCells, + RefSumCells); + } + if (SumEdges == RefSumEdges) { + LOG_INFO("DecompTest: Sum edge ID test PASS"); + } else { + LOG_INFO("DecompTest: Sum edge ID test FAIL {} {}", SumEdges, + RefSumEdges); + } + if (SumVertices == RefSumVertices) { + LOG_INFO("DecompTest: Sum vertex ID test PASS"); + } else { + LOG_INFO("DecompTest: Sum vertex ID test FAIL {} {}", SumVertices, + RefSumVertices); + } + + // Clean up + OMEGA::Decomp::clear(); + OMEGA::MachEnv::removeAll(); + + if (Err == 0) + LOG_INFO("DecompTest: Successful completion"); } - - // Clean up - OMEGA::Decomp::clear(); - OMEGA::MachEnv::removeAll(); - - if (Err == 0) - LOG_INFO("DecompTest: Successful completion"); - yakl::finalize(); + Kokkos::finalize(); MPI_Finalize(); } // end of main diff --git a/components/omega/test/base/HaloTest.cpp b/components/omega/test/base/HaloTest.cpp index 3cc6045fdffa..aa3c99776790 100644 --- a/components/omega/test/base/HaloTest.cpp +++ b/components/omega/test/base/HaloTest.cpp @@ -4,9 +4,9 @@ /// \brief Test driver for OMEGA Halo class /// /// This driver tests the OMEGA model Halo class, which collects and stores -/// everything needed to perform halo exchanges on any supported YAKL array +/// everything needed to perform halo exchanges on any supported Kokkos array /// defined on a mesh in OMEGA with a given parallel decomposition. This -/// unit test driver tests functionality by creating YAKL arrays of every +/// unit test driver tests functionality by creating Kokkos arrays of every /// type and dimensionality supported in OMEGA, initializing each array based /// on global IDs of the mesh elememts, performing halo exchanges, and /// confirming the exchanged arrays are identical to the initial arrays. @@ -20,11 +20,12 @@ #include "IO.h" #include "Logging.h" #include "MachEnv.h" +#include "OmegaKokkos.h" #include "mpi.h" //------------------------------------------------------------------------------ -// This function template performs a single test on a YAKL array type in a -// given index space. Two YAKL arrays of the same type and size are input, +// This function template performs a single test on a Kokkos array type in a +// given index space. Two Kokkos arrays of the same type and size are input, // InitArray contains the global IDs of the mesh elements for all the owned and // halo elements of the array, while TestArray contains the global IDs only in // the owned elements. The Halo class object, a label describing the test for @@ -46,8 +47,8 @@ void haloExchangeTest( OMEGA::I4 IErr{0}; // error code // Set total array size and ensure arrays are of same size - OMEGA::I4 NTot = InitArray.totElems(); - if (NTot != TestArray.totElems()) { + OMEGA::I4 NTot = InitArray.size(); + if (NTot != TestArray.size()) { LOG_ERROR("HaloTest: {} arrays must be of same size", Label); TotErr += -1; return; @@ -62,8 +63,14 @@ void haloExchangeTest( } // Collapse arrays to 1D for easy iteration - auto CollapsedInit = InitArray.collapse(); - auto CollapsedTest = TestArray.collapse(); + // auto CollapsedInit = InitArray.collapse(); + // auto CollapsedTest = TestArray.collapse(); + Kokkos::View + CollapsedInit(InitArray.data(), InitArray.size()); + Kokkos::View + CollapsedTest(TestArray.data(), TestArray.size()); // Confirm all elements are identical, if not set error code // and break out of loop @@ -87,7 +94,7 @@ void haloExchangeTest( //------------------------------------------------------------------------------ // The test driver. Performs halo exchange tests of all index spaces and all -// supported YAKL array types. For each test, an initial array is set based on +// supported Kokkos array types. For each test, an initial array is set based on // Global IDs of the mesh elements in the given index space for all owned and // halo elements, and is copied into the test array. The test array halo // elements are then set to junk values and a halo exchange is performed, which @@ -100,304 +107,317 @@ int main(int argc, char *argv[]) { OMEGA::I4 TotErr{0}; OMEGA::I4 IErr{0}; - // Initialize global MPI environment and YAKL + // Initialize global MPI environment and Kokkos MPI_Init(&argc, &argv); - yakl::init(); + Kokkos::initialize(); + { - // Initialize the machine environment and fetch the default environment - // pointer, the MPI communicator and the task ID of the local task - OMEGA::MachEnv::init(MPI_COMM_WORLD); - OMEGA::MachEnv *DefEnv = OMEGA::MachEnv::getDefaultEnv(); - MPI_Comm DefComm = DefEnv->getComm(); - OMEGA::I4 MyTask = DefEnv->getMyTask(); + // Initialize the machine environment and fetch the default environment + // pointer, the MPI communicator and the task ID of the local task + OMEGA::MachEnv::init(MPI_COMM_WORLD); + OMEGA::MachEnv *DefEnv = OMEGA::MachEnv::getDefaultEnv(); + MPI_Comm DefComm = DefEnv->getComm(); + OMEGA::I4 MyTask = DefEnv->getMyTask(); - // Initialize the IO system - IErr = OMEGA::IO::init(DefComm); - if (IErr != 0) - LOG_ERROR("DecompTest: error initializing parallel IO"); + // Initialize the IO system + IErr = OMEGA::IO::init(DefComm); + if (IErr != 0) + LOG_ERROR("DecompTest: error initializing parallel IO"); - // Create the default decomposition (initializes the decomposition) - IErr = OMEGA::Decomp::init(); - if (IErr != 0) - LOG_ERROR("DecompTest: error initializing default decomposition"); + // Create the default decomposition (initializes the decomposition) + IErr = OMEGA::Decomp::init(); + if (IErr != 0) + LOG_ERROR("DecompTest: error initializing default decomposition"); - // Retrieve the default decomposition - OMEGA::Decomp *DefDecomp = OMEGA::Decomp::getDefault(); + // Retrieve the default decomposition + OMEGA::Decomp *DefDecomp = OMEGA::Decomp::getDefault(); - // Create the halo exchange object for the given MachEnv and Decomp - OMEGA::Halo MyHalo(DefEnv, DefDecomp); + // Create the halo exchange object for the given MachEnv and Decomp + OMEGA::Halo MyHalo(DefEnv, DefDecomp); - OMEGA::I4 NumOwned; - OMEGA::I4 NumAll; + OMEGA::I4 NumOwned; + OMEGA::I4 NumAll; - // Perform 1DI4 array tests for each index space (cell, edge, and vertex) + // Perform 1DI4 array tests for each index space (cell, edge, and vertex) - OMEGA::ArrayHost1DI4 Init1DI4Cell("Init1DI4Cell", DefDecomp->NCellsSize); - OMEGA::ArrayHost1DI4 Test1DI4Cell("Test1DI4Cell", DefDecomp->NCellsSize); + OMEGA::HostArray1DI4 Init1DI4Cell("Init1DI4Cell", DefDecomp->NCellsSize); + OMEGA::HostArray1DI4 Test1DI4Cell("Test1DI4Cell", DefDecomp->NCellsSize); - NumOwned = DefDecomp->NCellsOwned; - NumAll = DefDecomp->NCellsAll; - Init1DI4Cell = DefDecomp->CellIDH; - Init1DI4Cell.deep_copy_to(Test1DI4Cell); - for (int ICell = NumOwned; ICell < NumAll; ++ICell) { - Test1DI4Cell(ICell) = -1; - } + NumOwned = DefDecomp->NCellsOwned; + NumAll = DefDecomp->NCellsAll; + Init1DI4Cell = DefDecomp->CellIDH; + OMEGA::deepCopy(Test1DI4Cell, Init1DI4Cell); - haloExchangeTest(MyHalo, Init1DI4Cell, Test1DI4Cell, "1DI4 Cell", TotErr); + for (int ICell = NumOwned; ICell < NumAll; ++ICell) { + Test1DI4Cell(ICell) = -1; + } - OMEGA::ArrayHost1DI4 Init1DI4Edge("Init1DI4Edge", DefDecomp->NEdgesSize); - OMEGA::ArrayHost1DI4 Test1DI4Edge("Test1DI4Edge", DefDecomp->NEdgesSize); + haloExchangeTest(MyHalo, Init1DI4Cell, Test1DI4Cell, "1DI4 Cell", TotErr); - NumOwned = DefDecomp->NEdgesOwned; - NumAll = DefDecomp->NEdgesAll; - Init1DI4Edge = DefDecomp->EdgeIDH; - Init1DI4Edge.deep_copy_to(Test1DI4Edge); - for (int IEdge = NumOwned; IEdge < NumAll; ++IEdge) { - Test1DI4Edge(IEdge) = -1; - } + OMEGA::HostArray1DI4 Init1DI4Edge("Init1DI4Edge", DefDecomp->NEdgesSize); + OMEGA::HostArray1DI4 Test1DI4Edge("Test1DI4Edge", DefDecomp->NEdgesSize); - haloExchangeTest(MyHalo, Init1DI4Edge, Test1DI4Edge, "1DI4 Edge", TotErr, - OMEGA::OnEdge); + NumOwned = DefDecomp->NEdgesOwned; + NumAll = DefDecomp->NEdgesAll; + Init1DI4Edge = DefDecomp->EdgeIDH; + OMEGA::deepCopy(Test1DI4Edge, Init1DI4Edge); - OMEGA::ArrayHost1DI4 Init1DI4Vertex("Init1DI4Vertex", - DefDecomp->NVerticesSize); - OMEGA::ArrayHost1DI4 Test1DI4Vertex("Test1DI4Vertex", - DefDecomp->NVerticesSize); + for (int IEdge = NumOwned; IEdge < NumAll; ++IEdge) { + Test1DI4Edge(IEdge) = -1; + } - NumOwned = DefDecomp->NVerticesOwned; - NumAll = DefDecomp->NVerticesAll; - Init1DI4Vertex = DefDecomp->VertexIDH; - Init1DI4Vertex.deep_copy_to(Test1DI4Vertex); - for (int IVertex = NumOwned; IVertex < NumAll; ++IVertex) { - Test1DI4Vertex(IVertex) = -1; - } - haloExchangeTest(MyHalo, Init1DI4Vertex, Test1DI4Vertex, "1DI4 Vertex", - TotErr, OMEGA::OnVertex); - - // Declaration of variables for remaining tests - - // Random dimension sizes - OMEGA::I4 N2{20}; - OMEGA::I4 N3{10}; - OMEGA::I4 N4{3}; - OMEGA::I4 N5{2}; - - NumOwned = DefDecomp->NCellsOwned; - NumAll = DefDecomp->NCellsAll; - - // Declare init and test arrays for all the remaining array types - OMEGA::ArrayHost1DI8 Init1DI8("Init1DI8", NumAll); - OMEGA::ArrayHost1DR4 Init1DR4("Init1DR4", NumAll); - OMEGA::ArrayHost1DR8 Init1DR8("Init1DR8", NumAll); - OMEGA::ArrayHost2DI4 Init2DI4("Init2DI4", NumAll, N2); - OMEGA::ArrayHost2DI8 Init2DI8("Init2DI8", NumAll, N2); - OMEGA::ArrayHost2DR4 Init2DR4("Init2DR4", NumAll, N2); - OMEGA::ArrayHost2DR8 Init2DR8("Init2DR8", NumAll, N2); - OMEGA::ArrayHost3DI4 Init3DI4("Init3DI4", N3, NumAll, N2); - OMEGA::ArrayHost3DI8 Init3DI8("Init3DI8", N3, NumAll, N2); - OMEGA::ArrayHost3DR4 Init3DR4("Init3DR4", N3, NumAll, N2); - OMEGA::ArrayHost3DR8 Init3DR8("Init3DR8", N3, NumAll, N2); - OMEGA::ArrayHost4DI4 Init4DI4("Init4DI4", N4, N3, NumAll, N2); - OMEGA::ArrayHost4DI8 Init4DI8("Init4DI8", N4, N3, NumAll, N2); - OMEGA::ArrayHost4DR4 Init4DR4("Init4DR4", N4, N3, NumAll, N2); - OMEGA::ArrayHost4DR8 Init4DR8("Init4DR8", N4, N3, NumAll, N2); - OMEGA::ArrayHost5DI4 Init5DI4("Init5DI4", N5, N4, N3, NumAll, N2); - OMEGA::ArrayHost5DI8 Init5DI8("Init5DI8", N5, N4, N3, NumAll, N2); - OMEGA::ArrayHost5DR4 Init5DR4("Init5DR4", N5, N4, N3, NumAll, N2); - OMEGA::ArrayHost5DR8 Init5DR8("Init5DR8", N5, N4, N3, NumAll, N2); - OMEGA::ArrayHost1DI8 Test1DI8("Test1DI8", NumAll); - OMEGA::ArrayHost1DR4 Test1DR4("Test1DR4", NumAll); - OMEGA::ArrayHost1DR8 Test1DR8("Test1DR8", NumAll); - OMEGA::ArrayHost2DI4 Test2DI4("Test2DI4", NumAll, N2); - OMEGA::ArrayHost2DI8 Test2DI8("Test2DI8", NumAll, N2); - OMEGA::ArrayHost2DR4 Test2DR4("Test2DR4", NumAll, N2); - OMEGA::ArrayHost2DR8 Test2DR8("Test2DR8", NumAll, N2); - OMEGA::ArrayHost3DI4 Test3DI4("Test3DI4", N3, NumAll, N2); - OMEGA::ArrayHost3DI8 Test3DI8("Test3DI8", N3, NumAll, N2); - OMEGA::ArrayHost3DR4 Test3DR4("Test3DR4", N3, NumAll, N2); - OMEGA::ArrayHost3DR8 Test3DR8("Test3DR8", N3, NumAll, N2); - OMEGA::ArrayHost4DI4 Test4DI4("Test4DI4", N4, N3, NumAll, N2); - OMEGA::ArrayHost4DI8 Test4DI8("Test4DI8", N4, N3, NumAll, N2); - OMEGA::ArrayHost4DR4 Test4DR4("Test4DR4", N4, N3, NumAll, N2); - OMEGA::ArrayHost4DR8 Test4DR8("Test4DR8", N4, N3, NumAll, N2); - OMEGA::ArrayHost5DI4 Test5DI4("Test5DI4", N5, N4, N3, NumAll, N2); - OMEGA::ArrayHost5DI8 Test5DI8("Test5DI8", N5, N4, N3, NumAll, N2); - OMEGA::ArrayHost5DR4 Test5DR4("Test5DR4", N5, N4, N3, NumAll, N2); - OMEGA::ArrayHost5DR8 Test5DR8("Test5DR8", N5, N4, N3, NumAll, N2); - - // Initialize and run remaining 1D tests - for (int ICell = 0; ICell < NumAll; ++ICell) { - OMEGA::I4 NewVal = DefDecomp->CellIDH(ICell); - Init1DI8(ICell) = static_cast(NewVal); - Init1DR4(ICell) = static_cast(NewVal); - Init1DR8(ICell) = static_cast(NewVal); - } + haloExchangeTest(MyHalo, Init1DI4Edge, Test1DI4Edge, "1DI4 Edge", TotErr, + OMEGA::OnEdge); - Init1DI8.deep_copy_to(Test1DI8); - Init1DR4.deep_copy_to(Test1DR4); - Init1DR8.deep_copy_to(Test1DR8); - for (int ICell = NumOwned; ICell < NumAll; ++ICell) { - Test1DI8(ICell) = -1; - Test1DR4(ICell) = -1; - Test1DR8(ICell) = -1; - } + OMEGA::HostArray1DI4 Init1DI4Vertex("Init1DI4Vertex", + DefDecomp->NVerticesSize); + OMEGA::HostArray1DI4 Test1DI4Vertex("Test1DI4Vertex", + DefDecomp->NVerticesSize); + + NumOwned = DefDecomp->NVerticesOwned; + NumAll = DefDecomp->NVerticesAll; + Init1DI4Vertex = DefDecomp->VertexIDH; + OMEGA::deepCopy(Test1DI4Vertex, Init1DI4Vertex); - haloExchangeTest(MyHalo, Init1DI8, Test1DI8, "1DI8", TotErr); - haloExchangeTest(MyHalo, Init1DR4, Test1DR4, "1DR4", TotErr); - haloExchangeTest(MyHalo, Init1DR8, Test1DR8, "1DR8", TotErr); - - // Initialize and run 2D tests - for (int ICell = 0; ICell < NumAll; ++ICell) { - for (int J = 0; J < N2; ++J) { - OMEGA::I4 NewVal = (J + 1) * DefDecomp->CellIDH(ICell); - Init2DI4(ICell, J) = NewVal; - Init2DI8(ICell, J) = static_cast(NewVal); - Init2DR4(ICell, J) = static_cast(NewVal); - Init2DR8(ICell, J) = static_cast(NewVal); + for (int IVertex = NumOwned; IVertex < NumAll; ++IVertex) { + Test1DI4Vertex(IVertex) = -1; + } + haloExchangeTest(MyHalo, Init1DI4Vertex, Test1DI4Vertex, "1DI4 Vertex", + TotErr, OMEGA::OnVertex); + + // Declaration of variables for remaining tests + + // Random dimension sizes + OMEGA::I4 N2{20}; + OMEGA::I4 N3{10}; + OMEGA::I4 N4{3}; + OMEGA::I4 N5{2}; + + NumOwned = DefDecomp->NCellsOwned; + NumAll = DefDecomp->NCellsAll; + + // Declare init and test arrays for all the remaining array types + OMEGA::HostArray1DI8 Init1DI8("Init1DI8", NumAll); + OMEGA::HostArray1DR4 Init1DR4("Init1DR4", NumAll); + OMEGA::HostArray1DR8 Init1DR8("Init1DR8", NumAll); + OMEGA::HostArray2DI4 Init2DI4("Init2DI4", NumAll, N2); + OMEGA::HostArray2DI8 Init2DI8("Init2DI8", NumAll, N2); + OMEGA::HostArray2DR4 Init2DR4("Init2DR4", NumAll, N2); + OMEGA::HostArray2DR8 Init2DR8("Init2DR8", NumAll, N2); + OMEGA::HostArray3DI4 Init3DI4("Init3DI4", N3, NumAll, N2); + OMEGA::HostArray3DI8 Init3DI8("Init3DI8", N3, NumAll, N2); + OMEGA::HostArray3DR4 Init3DR4("Init3DR4", N3, NumAll, N2); + OMEGA::HostArray3DR8 Init3DR8("Init3DR8", N3, NumAll, N2); + OMEGA::HostArray4DI4 Init4DI4("Init4DI4", N4, N3, NumAll, N2); + OMEGA::HostArray4DI8 Init4DI8("Init4DI8", N4, N3, NumAll, N2); + OMEGA::HostArray4DR4 Init4DR4("Init4DR4", N4, N3, NumAll, N2); + OMEGA::HostArray4DR8 Init4DR8("Init4DR8", N4, N3, NumAll, N2); + OMEGA::HostArray5DI4 Init5DI4("Init5DI4", N5, N4, N3, NumAll, N2); + OMEGA::HostArray5DI8 Init5DI8("Init5DI8", N5, N4, N3, NumAll, N2); + OMEGA::HostArray5DR4 Init5DR4("Init5DR4", N5, N4, N3, NumAll, N2); + OMEGA::HostArray5DR8 Init5DR8("Init5DR8", N5, N4, N3, NumAll, N2); + OMEGA::HostArray1DI8 Test1DI8("Test1DI8", NumAll); + OMEGA::HostArray1DR4 Test1DR4("Test1DR4", NumAll); + OMEGA::HostArray1DR8 Test1DR8("Test1DR8", NumAll); + OMEGA::HostArray2DI4 Test2DI4("Test2DI4", NumAll, N2); + OMEGA::HostArray2DI8 Test2DI8("Test2DI8", NumAll, N2); + OMEGA::HostArray2DR4 Test2DR4("Test2DR4", NumAll, N2); + OMEGA::HostArray2DR8 Test2DR8("Test2DR8", NumAll, N2); + OMEGA::HostArray3DI4 Test3DI4("Test3DI4", N3, NumAll, N2); + OMEGA::HostArray3DI8 Test3DI8("Test3DI8", N3, NumAll, N2); + OMEGA::HostArray3DR4 Test3DR4("Test3DR4", N3, NumAll, N2); + OMEGA::HostArray3DR8 Test3DR8("Test3DR8", N3, NumAll, N2); + OMEGA::HostArray4DI4 Test4DI4("Test4DI4", N4, N3, NumAll, N2); + OMEGA::HostArray4DI8 Test4DI8("Test4DI8", N4, N3, NumAll, N2); + OMEGA::HostArray4DR4 Test4DR4("Test4DR4", N4, N3, NumAll, N2); + OMEGA::HostArray4DR8 Test4DR8("Test4DR8", N4, N3, NumAll, N2); + OMEGA::HostArray5DI4 Test5DI4("Test5DI4", N5, N4, N3, NumAll, N2); + OMEGA::HostArray5DI8 Test5DI8("Test5DI8", N5, N4, N3, NumAll, N2); + OMEGA::HostArray5DR4 Test5DR4("Test5DR4", N5, N4, N3, NumAll, N2); + OMEGA::HostArray5DR8 Test5DR8("Test5DR8", N5, N4, N3, NumAll, N2); + + // Initialize and run remaining 1D tests + for (int ICell = 0; ICell < NumAll; ++ICell) { + OMEGA::I4 NewVal = DefDecomp->CellIDH(ICell); + Init1DI8(ICell) = static_cast(NewVal); + Init1DR4(ICell) = static_cast(NewVal); + Init1DR8(ICell) = static_cast(NewVal); } - } - Init2DI4.deep_copy_to(Test2DI4); - Init2DI8.deep_copy_to(Test2DI8); - Init2DR4.deep_copy_to(Test2DR4); - Init2DR8.deep_copy_to(Test2DR8); - for (int ICell = NumOwned; ICell < NumAll; ++ICell) { - for (int J = 0; J < N2; ++J) { - Test2DI4(ICell, J) = -1; - Test2DI8(ICell, J) = -1; - Test2DR4(ICell, J) = -1; - Test2DR8(ICell, J) = -1; + OMEGA::deepCopy(Test1DI8, Init1DI8); + OMEGA::deepCopy(Test1DR4, Init1DR4); + OMEGA::deepCopy(Test1DR8, Init1DR8); + + for (int ICell = NumOwned; ICell < NumAll; ++ICell) { + Test1DI8(ICell) = -1; + Test1DR4(ICell) = -1; + Test1DR8(ICell) = -1; } - } - haloExchangeTest(MyHalo, Init2DI4, Test2DI4, "2DI4", TotErr); - haloExchangeTest(MyHalo, Init2DI8, Test2DI8, "2DI8", TotErr); - haloExchangeTest(MyHalo, Init2DR4, Test2DR4, "2DR4", TotErr); - haloExchangeTest(MyHalo, Init2DR8, Test2DR8, "2DR8", TotErr); + haloExchangeTest(MyHalo, Init1DI8, Test1DI8, "1DI8", TotErr); + haloExchangeTest(MyHalo, Init1DR4, Test1DR4, "1DR4", TotErr); + haloExchangeTest(MyHalo, Init1DR8, Test1DR8, "1DR8", TotErr); - // Initialize and run 3D tests - for (int K = 0; K < N3; ++K) { + // Initialize and run 2D tests for (int ICell = 0; ICell < NumAll; ++ICell) { for (int J = 0; J < N2; ++J) { - OMEGA::I4 NewVal = (K + 1) * (J + 1) * DefDecomp->CellIDH(ICell); - Init3DI4(K, ICell, J) = NewVal; - Init3DI8(K, ICell, J) = static_cast(NewVal); - Init3DR4(K, ICell, J) = static_cast(NewVal); - Init3DR8(K, ICell, J) = static_cast(NewVal); + OMEGA::I4 NewVal = (J + 1) * DefDecomp->CellIDH(ICell); + Init2DI4(ICell, J) = NewVal; + Init2DI8(ICell, J) = static_cast(NewVal); + Init2DR4(ICell, J) = static_cast(NewVal); + Init2DR8(ICell, J) = static_cast(NewVal); } } - } - Init3DI4.deep_copy_to(Test3DI4); - Init3DI8.deep_copy_to(Test3DI8); - Init3DR4.deep_copy_to(Test3DR4); - Init3DR8.deep_copy_to(Test3DR8); - for (int K = 0; K < N3; ++K) { + OMEGA::deepCopy(Test2DI4, Init2DI4); + OMEGA::deepCopy(Test2DI8, Init2DI8); + OMEGA::deepCopy(Test2DR4, Init2DR4); + OMEGA::deepCopy(Test2DR8, Init2DR8); + for (int ICell = NumOwned; ICell < NumAll; ++ICell) { for (int J = 0; J < N2; ++J) { - Test3DI4(K, ICell, J) = -1; - Test3DI8(K, ICell, J) = -1; - Test3DR4(K, ICell, J) = -1; - Test3DR8(K, ICell, J) = -1; + Test2DI4(ICell, J) = -1; + Test2DI8(ICell, J) = -1; + Test2DR4(ICell, J) = -1; + Test2DR8(ICell, J) = -1; } } - } - haloExchangeTest(MyHalo, Init3DI4, Test3DI4, "3DI4", TotErr); - haloExchangeTest(MyHalo, Init3DI8, Test3DI8, "3DI8", TotErr); - haloExchangeTest(MyHalo, Init3DR4, Test3DR4, "3DR4", TotErr); - haloExchangeTest(MyHalo, Init3DR8, Test3DR8, "3DR8", TotErr); + haloExchangeTest(MyHalo, Init2DI4, Test2DI4, "2DI4", TotErr); + haloExchangeTest(MyHalo, Init2DI8, Test2DI8, "2DI8", TotErr); + haloExchangeTest(MyHalo, Init2DR4, Test2DR4, "2DR4", TotErr); + haloExchangeTest(MyHalo, Init2DR8, Test2DR8, "2DR8", TotErr); - // Initialize and run 4D tests - for (int L = 0; L < N4; ++L) { + // Initialize and run 3D tests for (int K = 0; K < N3; ++K) { for (int ICell = 0; ICell < NumAll; ++ICell) { for (int J = 0; J < N2; ++J) { - OMEGA::I4 NewVal = - (L + 1) * (K + 1) * (J + 1) * DefDecomp->CellIDH(ICell); - Init4DI4(L, K, ICell, J) = NewVal; - Init4DI8(L, K, ICell, J) = static_cast(NewVal); - Init4DR4(L, K, ICell, J) = static_cast(NewVal); - Init4DR8(L, K, ICell, J) = static_cast(NewVal); + OMEGA::I4 NewVal = (K + 1) * (J + 1) * DefDecomp->CellIDH(ICell); + Init3DI4(K, ICell, J) = NewVal; + Init3DI8(K, ICell, J) = static_cast(NewVal); + Init3DR4(K, ICell, J) = static_cast(NewVal); + Init3DR8(K, ICell, J) = static_cast(NewVal); } } } - } - Init4DI4.deep_copy_to(Test4DI4); - Init4DI8.deep_copy_to(Test4DI8); - Init4DR4.deep_copy_to(Test4DR4); - Init4DR8.deep_copy_to(Test4DR8); - for (int L = 0; L < N4; ++L) { + OMEGA::deepCopy(Test3DI4, Init3DI4); + OMEGA::deepCopy(Test3DI8, Init3DI8); + OMEGA::deepCopy(Test3DR4, Init3DR4); + OMEGA::deepCopy(Test3DR8, Init3DR8); + for (int K = 0; K < N3; ++K) { for (int ICell = NumOwned; ICell < NumAll; ++ICell) { for (int J = 0; J < N2; ++J) { - Test4DI4(L, K, ICell, J) = -1; - Test4DI8(L, K, ICell, J) = -1; - Test4DR4(L, K, ICell, J) = -1; - Test4DR8(L, K, ICell, J) = -1; + Test3DI4(K, ICell, J) = -1; + Test3DI8(K, ICell, J) = -1; + Test3DR4(K, ICell, J) = -1; + Test3DR8(K, ICell, J) = -1; } } } - } - haloExchangeTest(MyHalo, Init4DI4, Test4DI4, "4DI4", TotErr); - haloExchangeTest(MyHalo, Init4DI8, Test4DI8, "4DI8", TotErr); - haloExchangeTest(MyHalo, Init4DR4, Test4DR4, "4DR4", TotErr); - haloExchangeTest(MyHalo, Init4DR8, Test4DR8, "4DR8", TotErr); + haloExchangeTest(MyHalo, Init3DI4, Test3DI4, "3DI4", TotErr); + haloExchangeTest(MyHalo, Init3DI8, Test3DI8, "3DI8", TotErr); + haloExchangeTest(MyHalo, Init3DR4, Test3DR4, "3DR4", TotErr); + haloExchangeTest(MyHalo, Init3DR8, Test3DR8, "3DR8", TotErr); - // Initialize and run 5D tests - for (int M = 0; M < N5; ++M) { + // Initialize and run 4D tests for (int L = 0; L < N4; ++L) { for (int K = 0; K < N3; ++K) { for (int ICell = 0; ICell < NumAll; ++ICell) { for (int J = 0; J < N2; ++J) { - OMEGA::I4 NewVal = (M + 1) * (L + 1) * (K + 1) * (J + 1) * - DefDecomp->CellIDH(ICell); - Init5DI4(M, L, K, ICell, J) = NewVal; - Init5DI8(M, L, K, ICell, J) = static_cast(NewVal); - Init5DR4(M, L, K, ICell, J) = static_cast(NewVal); - Init5DR8(M, L, K, ICell, J) = static_cast(NewVal); + OMEGA::I4 NewVal = + (L + 1) * (K + 1) * (J + 1) * DefDecomp->CellIDH(ICell); + Init4DI4(L, K, ICell, J) = NewVal; + Init4DI8(L, K, ICell, J) = static_cast(NewVal); + Init4DR4(L, K, ICell, J) = static_cast(NewVal); + Init4DR8(L, K, ICell, J) = static_cast(NewVal); } } } } - } - Init5DI4.deep_copy_to(Test5DI4); - Init5DI8.deep_copy_to(Test5DI8); - Init5DR4.deep_copy_to(Test5DR4); - Init5DR8.deep_copy_to(Test5DR8); - for (int M = 0; M < N5; ++M) { + OMEGA::deepCopy(Test4DI4, Init4DI4); + OMEGA::deepCopy(Test4DI8, Init4DI8); + OMEGA::deepCopy(Test4DR4, Init4DR4); + OMEGA::deepCopy(Test4DR8, Init4DR8); + for (int L = 0; L < N4; ++L) { for (int K = 0; K < N3; ++K) { for (int ICell = NumOwned; ICell < NumAll; ++ICell) { for (int J = 0; J < N2; ++J) { - Test5DI4(M, L, K, ICell, J) = -1; - Test5DI8(M, L, K, ICell, J) = -1; - Test5DR4(M, L, K, ICell, J) = -1; - Test5DR8(M, L, K, ICell, J) = -1; + Test4DI4(L, K, ICell, J) = -1; + Test4DI8(L, K, ICell, J) = -1; + Test4DR4(L, K, ICell, J) = -1; + Test4DR8(L, K, ICell, J) = -1; + } + } + } + } + + haloExchangeTest(MyHalo, Init4DI4, Test4DI4, "4DI4", TotErr); + haloExchangeTest(MyHalo, Init4DI8, Test4DI8, "4DI8", TotErr); + haloExchangeTest(MyHalo, Init4DR4, Test4DR4, "4DR4", TotErr); + haloExchangeTest(MyHalo, Init4DR8, Test4DR8, "4DR8", TotErr); + + // Initialize and run 5D tests + for (int M = 0; M < N5; ++M) { + for (int L = 0; L < N4; ++L) { + for (int K = 0; K < N3; ++K) { + for (int ICell = 0; ICell < NumAll; ++ICell) { + for (int J = 0; J < N2; ++J) { + OMEGA::I4 NewVal = (M + 1) * (L + 1) * (K + 1) * (J + 1) * + DefDecomp->CellIDH(ICell); + Init5DI4(M, L, K, ICell, J) = NewVal; + Init5DI8(M, L, K, ICell, J) = + static_cast(NewVal); + Init5DR4(M, L, K, ICell, J) = + static_cast(NewVal); + Init5DR8(M, L, K, ICell, J) = + static_cast(NewVal); + } } } } } - } - haloExchangeTest(MyHalo, Init5DI4, Test5DI4, "5DI4", TotErr); - haloExchangeTest(MyHalo, Init5DI8, Test5DI8, "5DI8", TotErr); - haloExchangeTest(MyHalo, Init5DR4, Test5DR4, "5DR4", TotErr); - haloExchangeTest(MyHalo, Init5DR8, Test5DR8, "5DR8", TotErr); + OMEGA::deepCopy(Test5DI4, Init5DI4); + OMEGA::deepCopy(Test5DI8, Init5DI8); + OMEGA::deepCopy(Test5DR4, Init5DR4); + OMEGA::deepCopy(Test5DR8, Init5DR8); + + for (int M = 0; M < N5; ++M) { + for (int L = 0; L < N4; ++L) { + for (int K = 0; K < N3; ++K) { + for (int ICell = NumOwned; ICell < NumAll; ++ICell) { + for (int J = 0; J < N2; ++J) { + Test5DI4(M, L, K, ICell, J) = -1; + Test5DI8(M, L, K, ICell, J) = -1; + Test5DR4(M, L, K, ICell, J) = -1; + Test5DR8(M, L, K, ICell, J) = -1; + } + } + } + } + } - // Memory clean up - OMEGA::Decomp::clear(); - OMEGA::MachEnv::removeAll(); + haloExchangeTest(MyHalo, Init5DI4, Test5DI4, "5DI4", TotErr); + haloExchangeTest(MyHalo, Init5DI8, Test5DI8, "5DI8", TotErr); + haloExchangeTest(MyHalo, Init5DR4, Test5DR4, "5DR4", TotErr); + haloExchangeTest(MyHalo, Init5DR8, Test5DR8, "5DR8", TotErr); - if (TotErr == 0) { - LOG_INFO("HaloTest: Successful completion"); - } else { - LOG_INFO("HaloTest: Failed"); + // Memory clean up + OMEGA::Decomp::clear(); + OMEGA::MachEnv::removeAll(); + + if (TotErr == 0) { + LOG_INFO("HaloTest: Successful completion"); + } else { + LOG_INFO("HaloTest: Failed"); + } } - yakl::finalize(); + Kokkos::finalize(); MPI_Finalize(); } // end of main diff --git a/components/omega/test/base/IOTest.cpp b/components/omega/test/base/IOTest.cpp index 170db27b1147..3cab49efb6ef 100644 --- a/components/omega/test/base/IOTest.cpp +++ b/components/omega/test/base/IOTest.cpp @@ -55,779 +55,798 @@ int main(int argc, char *argv[]) { // Initialize the global MPI environment MPI_Init(&argc, &argv); - yakl::init(); - - // Call initialization routine to create the default decomposition - // and initialize the parallel IO library - int Err = initIOTest(); - if (Err != 0) - LOG_CRITICAL("IOTest: Error initializing"); - - // Get MPI vars if needed - OMEGA::MachEnv *DefEnv = OMEGA::MachEnv::getDefaultEnv(); - MPI_Comm Comm = DefEnv->getComm(); - OMEGA::I4 MyTask = DefEnv->getMyTask(); - OMEGA::I4 NumTasks = DefEnv->getNumTasks(); - bool IsMaster = DefEnv->isMasterTask(); - - // Retrieve the default decomposition - OMEGA::Decomp *DefDecomp = OMEGA::Decomp::getDefault(); - if (DefDecomp) { // true if non-null ptr - LOG_INFO("IOTest: Default decomp retrieval PASS"); - } else { - LOG_INFO("IOTest: Default decomp retrieval FAIL"); - return -1; - } - - // Create YAKL arrays of each type and at various mesh locations - OMEGA::I4 NCellsSize = DefDecomp->NCellsSize; - OMEGA::I4 NEdgesSize = DefDecomp->NEdgesSize; - OMEGA::I4 NVerticesSize = DefDecomp->NVerticesSize; - OMEGA::I4 NCellsOwned = DefDecomp->NCellsOwned; - OMEGA::I4 NEdgesOwned = DefDecomp->NEdgesOwned; - OMEGA::I4 NVerticesOwned = DefDecomp->NVerticesOwned; - OMEGA::I4 NCellsGlobal = DefDecomp->NCellsGlobal; - OMEGA::I4 NEdgesGlobal = DefDecomp->NEdgesGlobal; - OMEGA::I4 NVerticesGlobal = DefDecomp->NVerticesGlobal; - OMEGA::I4 NVertLevels = 128; - - OMEGA::ArrayHost2DI4 RefI4Cell("RefI4Cell", NCellsSize, NVertLevels); - OMEGA::ArrayHost2DI8 RefI8Cell("RefI8Cell", NCellsSize, NVertLevels); - OMEGA::ArrayHost2DR4 RefR4Cell("RefR4Cell", NCellsSize, NVertLevels); - OMEGA::ArrayHost2DR8 RefR8Cell("RefR8Cell", NCellsSize, NVertLevels); - - OMEGA::ArrayHost2DI4 RefI4Edge("RefI4Edge", NEdgesSize, NVertLevels); - OMEGA::ArrayHost2DI8 RefI8Edge("RefI8Edge", NEdgesSize, NVertLevels); - OMEGA::ArrayHost2DR4 RefR4Edge("RefR4Edge", NEdgesSize, NVertLevels); - OMEGA::ArrayHost2DR8 RefR8Edge("RefR8Edge", NEdgesSize, NVertLevels); - - OMEGA::ArrayHost2DI4 RefI4Vrtx("RefI4Vrtx", NVerticesSize, NVertLevels); - OMEGA::ArrayHost2DI8 RefI8Vrtx("RefI8Vrtx", NVerticesSize, NVertLevels); - OMEGA::ArrayHost2DR4 RefR4Vrtx("RefR4Vrtx", NVerticesSize, NVertLevels); - OMEGA::ArrayHost2DR8 RefR8Vrtx("RefR8Vrtx", NVerticesSize, NVertLevels); - - OMEGA::ArrayHost1DI4 CellIDH = DefDecomp->CellIDH; - OMEGA::ArrayHost1DI4 EdgeIDH = DefDecomp->EdgeIDH; - OMEGA::ArrayHost1DI4 VrtxIDH = DefDecomp->VertexIDH; - - // Offset arrays - initialize to -1, corresponding to entries - // that should not be written; - std::vector OffsetCell(NCellsSize * NVertLevels, -1); - std::vector OffsetEdge(NEdgesSize * NVertLevels, -1); - std::vector OffsetVrtx(NVerticesSize * NVertLevels, -1); - for (int Cell = 0; Cell < NCellsOwned; ++Cell) { - int GlobalCellAdd = CellIDH(Cell) - 1; // 0-based offset - for (int k = 0; k < NVertLevels; ++k) { - RefI4Cell(Cell, k) = GlobalCellAdd * k; - RefI8Cell(Cell, k) = GlobalCellAdd * k * 1000000000; - RefR4Cell(Cell, k) = GlobalCellAdd * k * 123.45; - RefR8Cell(Cell, k) = GlobalCellAdd * k * 1.23456789; - int VectorAdd = Cell * NVertLevels + k; - OffsetCell[VectorAdd] = GlobalCellAdd * NVertLevels + k; + Kokkos::initialize(); + { + // Call initialization routine to create the default decomposition + // and initialize the parallel IO library + int Err = initIOTest(); + if (Err != 0) + LOG_CRITICAL("IOTest: Error initializing"); + + // Get MPI vars if needed + OMEGA::MachEnv *DefEnv = OMEGA::MachEnv::getDefaultEnv(); + MPI_Comm Comm = DefEnv->getComm(); + OMEGA::I4 MyTask = DefEnv->getMyTask(); + OMEGA::I4 NumTasks = DefEnv->getNumTasks(); + bool IsMaster = DefEnv->isMasterTask(); + + // Retrieve the default decomposition + OMEGA::Decomp *DefDecomp = OMEGA::Decomp::getDefault(); + if (DefDecomp) { // true if non-null ptr + LOG_INFO("IOTest: Default decomp retrieval PASS"); + } else { + LOG_INFO("IOTest: Default decomp retrieval FAIL"); + return -1; } - } - for (int Edge = 0; Edge < NEdgesOwned; ++Edge) { - int GlobalEdgeAdd = EdgeIDH(Edge) - 1; - for (int k = 0; k < NVertLevels; ++k) { - RefI4Edge(Edge, k) = GlobalEdgeAdd * k; - RefI8Edge(Edge, k) = GlobalEdgeAdd * k * 1000000000; - RefR4Edge(Edge, k) = GlobalEdgeAdd * k * 123.45; - RefR8Edge(Edge, k) = GlobalEdgeAdd * k * 1.23456789; - int VectorAdd = Edge * NVertLevels + k; - OffsetEdge[VectorAdd] = GlobalEdgeAdd * NVertLevels + k; + // Create Kokkos arrays of each type and at various mesh locations + OMEGA::I4 NCellsSize = DefDecomp->NCellsSize; + OMEGA::I4 NEdgesSize = DefDecomp->NEdgesSize; + OMEGA::I4 NVerticesSize = DefDecomp->NVerticesSize; + OMEGA::I4 NCellsOwned = DefDecomp->NCellsOwned; + OMEGA::I4 NEdgesOwned = DefDecomp->NEdgesOwned; + OMEGA::I4 NVerticesOwned = DefDecomp->NVerticesOwned; + OMEGA::I4 NCellsGlobal = DefDecomp->NCellsGlobal; + OMEGA::I4 NEdgesGlobal = DefDecomp->NEdgesGlobal; + OMEGA::I4 NVerticesGlobal = DefDecomp->NVerticesGlobal; + OMEGA::I4 NVertLevels = 128; + + OMEGA::HostArray2DI4 RefI4Cell("RefI4Cell", NCellsSize, NVertLevels); + OMEGA::HostArray2DI8 RefI8Cell("RefI8Cell", NCellsSize, NVertLevels); + OMEGA::HostArray2DR4 RefR4Cell("RefR4Cell", NCellsSize, NVertLevels); + OMEGA::HostArray2DR8 RefR8Cell("RefR8Cell", NCellsSize, NVertLevels); + + OMEGA::HostArray2DI4 RefI4Edge("RefI4Edge", NEdgesSize, NVertLevels); + OMEGA::HostArray2DI8 RefI8Edge("RefI8Edge", NEdgesSize, NVertLevels); + OMEGA::HostArray2DR4 RefR4Edge("RefR4Edge", NEdgesSize, NVertLevels); + OMEGA::HostArray2DR8 RefR8Edge("RefR8Edge", NEdgesSize, NVertLevels); + + OMEGA::HostArray2DI4 RefI4Vrtx("RefI4Vrtx", NVerticesSize, NVertLevels); + OMEGA::HostArray2DI8 RefI8Vrtx("RefI8Vrtx", NVerticesSize, NVertLevels); + OMEGA::HostArray2DR4 RefR4Vrtx("RefR4Vrtx", NVerticesSize, NVertLevels); + OMEGA::HostArray2DR8 RefR8Vrtx("RefR8Vrtx", NVerticesSize, NVertLevels); + + OMEGA::HostArray1DI4 CellIDH = DefDecomp->CellIDH; + OMEGA::HostArray1DI4 EdgeIDH = DefDecomp->EdgeIDH; + OMEGA::HostArray1DI4 VrtxIDH = DefDecomp->VertexIDH; + + // Offset arrays - initialize to -1, corresponding to entries + // that should not be written; + std::vector OffsetCell(NCellsSize * NVertLevels, -1); + std::vector OffsetEdge(NEdgesSize * NVertLevels, -1); + std::vector OffsetVrtx(NVerticesSize * NVertLevels, -1); + for (int Cell = 0; Cell < NCellsOwned; ++Cell) { + int GlobalCellAdd = CellIDH(Cell) - 1; // 0-based offset + for (int k = 0; k < NVertLevels; ++k) { + RefI4Cell(Cell, k) = GlobalCellAdd * k; + RefI8Cell(Cell, k) = GlobalCellAdd * k * 1000000000; + RefR4Cell(Cell, k) = GlobalCellAdd * k * 123.45; + RefR8Cell(Cell, k) = GlobalCellAdd * k * 1.23456789; + int VectorAdd = Cell * NVertLevels + k; + OffsetCell[VectorAdd] = GlobalCellAdd * NVertLevels + k; + } } - } - for (int Vrtx = 0; Vrtx < NVerticesOwned; ++Vrtx) { - int GlobalVrtxAdd = VrtxIDH(Vrtx) - 1; - for (int k = 0; k < NVertLevels; ++k) { - RefI4Vrtx(Vrtx, k) = GlobalVrtxAdd * k; - RefI8Vrtx(Vrtx, k) = GlobalVrtxAdd * k * 1000000000; - RefR4Vrtx(Vrtx, k) = GlobalVrtxAdd * k * 123.45; - RefR8Vrtx(Vrtx, k) = GlobalVrtxAdd * k * 1.23456789; - int VectorAdd = Vrtx * NVertLevels + k; - OffsetVrtx[VectorAdd] = GlobalVrtxAdd * NVertLevels + k; + for (int Edge = 0; Edge < NEdgesOwned; ++Edge) { + int GlobalEdgeAdd = EdgeIDH(Edge) - 1; + for (int k = 0; k < NVertLevels; ++k) { + RefI4Edge(Edge, k) = GlobalEdgeAdd * k; + RefI8Edge(Edge, k) = GlobalEdgeAdd * k * 1000000000; + RefR4Edge(Edge, k) = GlobalEdgeAdd * k * 123.45; + RefR8Edge(Edge, k) = GlobalEdgeAdd * k * 1.23456789; + int VectorAdd = Edge * NVertLevels + k; + OffsetEdge[VectorAdd] = GlobalEdgeAdd * NVertLevels + k; + } } - } - - // Create the needed decomposition offsets - - int DecompCellI4; - int DecompCellI8; - int DecompCellR4; - int DecompCellR8; - int DecompEdgeI4; - int DecompEdgeI8; - int DecompEdgeR4; - int DecompEdgeR8; - int DecompVrtxI4; - int DecompVrtxI8; - int DecompVrtxR4; - int DecompVrtxR8; - std::vector CellDims{NCellsGlobal, NVertLevels}; - std::vector EdgeDims{NEdgesGlobal, NVertLevels}; - std::vector VrtxDims{NVerticesGlobal, NVertLevels}; - int CellArraySize = NCellsSize * NVertLevels; - int EdgeArraySize = NEdgesSize * NVertLevels; - int VrtxArraySize = NVerticesSize * NVertLevels; - - Err = OMEGA::IO::createDecomp(DecompCellI4, OMEGA::IO::IOTypeI4, 2, CellDims, - CellArraySize, OffsetCell, - OMEGA::IO::DefaultRearr); - if (Err != 0) - LOG_ERROR("IOTest: error creating cell decomp I4 FAIL"); - Err = OMEGA::IO::createDecomp(DecompCellI8, OMEGA::IO::IOTypeI8, 2, CellDims, - CellArraySize, OffsetCell, - OMEGA::IO::DefaultRearr); - if (Err != 0) - LOG_ERROR("IOTest: error creating cell decomp I8 FAIL"); - Err = OMEGA::IO::createDecomp(DecompCellR4, OMEGA::IO::IOTypeR4, 2, CellDims, - CellArraySize, OffsetCell, - OMEGA::IO::DefaultRearr); - if (Err != 0) - LOG_ERROR("IOTest: error creating cell decomp R4 FAIL"); - Err = OMEGA::IO::createDecomp(DecompCellR8, OMEGA::IO::IOTypeR8, 2, CellDims, - CellArraySize, OffsetCell, - OMEGA::IO::DefaultRearr); - if (Err != 0) - LOG_ERROR("IOTest: error creating cell decomp R8 FAIL"); - - Err = OMEGA::IO::createDecomp(DecompEdgeI4, OMEGA::IO::IOTypeI4, 2, EdgeDims, - EdgeArraySize, OffsetEdge, - OMEGA::IO::DefaultRearr); - if (Err != 0) - LOG_ERROR("IOTest: error creating edge decomp I4 FAIL"); - Err = OMEGA::IO::createDecomp(DecompEdgeI8, OMEGA::IO::IOTypeI8, 2, EdgeDims, - EdgeArraySize, OffsetEdge, - OMEGA::IO::DefaultRearr); - if (Err != 0) - LOG_ERROR("IOTest: error creating edge decomp I8 FAIL"); - Err = OMEGA::IO::createDecomp(DecompEdgeR4, OMEGA::IO::IOTypeR4, 2, EdgeDims, - EdgeArraySize, OffsetEdge, - OMEGA::IO::DefaultRearr); - if (Err != 0) - LOG_ERROR("IOTest: error creating edge decomp R4 FAIL"); - Err = OMEGA::IO::createDecomp(DecompEdgeR8, OMEGA::IO::IOTypeR8, 2, EdgeDims, - EdgeArraySize, OffsetEdge, - OMEGA::IO::DefaultRearr); - if (Err != 0) - LOG_ERROR("IOTest: error creating edge decomp R8 FAIL"); - - Err = OMEGA::IO::createDecomp(DecompVrtxI4, OMEGA::IO::IOTypeI4, 2, VrtxDims, - VrtxArraySize, OffsetVrtx, - OMEGA::IO::DefaultRearr); - if (Err != 0) - LOG_ERROR("IOTest: error creating vertex decomp I4 FAIL"); - Err = OMEGA::IO::createDecomp(DecompVrtxI8, OMEGA::IO::IOTypeI8, 2, VrtxDims, - VrtxArraySize, OffsetVrtx, - OMEGA::IO::DefaultRearr); - if (Err != 0) - LOG_ERROR("IOTest: error creating vertex decomp I8 FAIL"); - Err = OMEGA::IO::createDecomp(DecompVrtxR4, OMEGA::IO::IOTypeR4, 2, VrtxDims, - VrtxArraySize, OffsetVrtx, - OMEGA::IO::DefaultRearr); - if (Err != 0) - LOG_ERROR("IOTest: error creating vertex decomp R4 FAIL"); - Err = OMEGA::IO::createDecomp(DecompVrtxR8, OMEGA::IO::IOTypeR8, 2, VrtxDims, - VrtxArraySize, OffsetVrtx, - OMEGA::IO::DefaultRearr); - if (Err != 0) - LOG_ERROR("IOTest: error creating vertex decomp R8 FAIL"); - - // Open a file for output - int OutFileID; - Err = OMEGA::IO::openFile( - OutFileID, "IOTest.nc", OMEGA::IO::ModeWrite, OMEGA::IO::FmtDefault, - OMEGA::IO::IfExists::Replace, OMEGA::IO::Precision::Double); - if (Err != 0) - LOG_ERROR("IOTest: error opening file for output FAIL"); - - // Define array dimensions - int DimCellID; - int DimEdgeID; - int DimVrtxID; - int DimVertID; - Err = OMEGA::IO::defineDim(OutFileID, "NVertLevels", NVertLevels, DimVertID); - if (Err != 0) - LOG_ERROR("IOTest: error defining vertical dimension FAIL"); - Err = OMEGA::IO::defineDim(OutFileID, "NCells", NCellsGlobal, DimCellID); - if (Err != 0) - LOG_ERROR("IOTest: error defining Cell dimension FAIL"); - Err = OMEGA::IO::defineDim(OutFileID, "NEdges", NEdgesGlobal, DimEdgeID); - if (Err != 0) - LOG_ERROR("IOTest: error defining Edge dimension FAIL"); - Err = - OMEGA::IO::defineDim(OutFileID, "NVertices", NVerticesGlobal, DimVrtxID); - if (Err != 0) - LOG_ERROR("IOTest: error defining Vertex dimension FAIL"); - - // Write some global file metadata - OMEGA::I4 FileMetaI4Ref = 2; - OMEGA::I8 FileMetaI8Ref = 4; - OMEGA::R4 FileMetaR4Ref = 6.789; - OMEGA::R8 FileMetaR8Ref = 1.23456789; - std::string FileMetaDescr = "OMEGA IO Unit test file"; - - Err = OMEGA::IO::writeMeta("FileMetaI4", FileMetaI4Ref, OutFileID, - OMEGA::IO::GlobalID); - if (Err != 0) - LOG_ERROR("IOTest: error writing global I4 metadata FAIL"); - Err = OMEGA::IO::writeMeta("FileMetaI8", FileMetaI8Ref, OutFileID, - OMEGA::IO::GlobalID); - if (Err != 0) - LOG_ERROR("IOTest: error writing global I8 metadata FAIL"); - Err = OMEGA::IO::writeMeta("FileMetaR4", FileMetaR4Ref, OutFileID, - OMEGA::IO::GlobalID); - if (Err != 0) - LOG_ERROR("IOTest: error writing global R4 metadata FAIL"); - Err = OMEGA::IO::writeMeta("FileMetaR8", FileMetaR8Ref, OutFileID, - OMEGA::IO::GlobalID); - if (Err != 0) - LOG_ERROR("IOTest: error writing global R8 metadata FAIL"); - Err = OMEGA::IO::writeMeta("FileMetaDescr", FileMetaDescr, OutFileID, - OMEGA::IO::GlobalID); - if (Err != 0) - LOG_ERROR("IOTest: error writing global char metadata FAIL"); - - // Define variables/arrays - int VarIDCellI4; - int VarIDCellI8; - int VarIDCellR4; - int VarIDCellR8; - int VarIDEdgeI4; - int VarIDEdgeI8; - int VarIDEdgeR4; - int VarIDEdgeR8; - int VarIDVrtxI4; - int VarIDVrtxI8; - int VarIDVrtxR4; - int VarIDVrtxR8; - - int CellDimIDs[2] = {DimCellID, DimVertID}; - int EdgeDimIDs[2] = {DimEdgeID, DimVertID}; - int VrtxDimIDs[2] = {DimVrtxID, DimVertID}; - - Err = OMEGA::IO::defineVar(OutFileID, "CellI4", OMEGA::IO::IOTypeI4, 2, - CellDimIDs, VarIDCellI4); - if (Err != 0) - LOG_ERROR("IOTest: Error defining CellI4 array FAIL"); - Err = OMEGA::IO::defineVar(OutFileID, "CellI8", OMEGA::IO::IOTypeI8, 2, - CellDimIDs, VarIDCellI8); - if (Err != 0) - LOG_ERROR("IOTest: Error defining CellI8 array FAIL"); - Err = OMEGA::IO::defineVar(OutFileID, "CellR4", OMEGA::IO::IOTypeR4, 2, - CellDimIDs, VarIDCellR4); - if (Err != 0) - LOG_ERROR("IOTest: Error defining CellR4 array FAIL"); - Err = OMEGA::IO::defineVar(OutFileID, "CellR8", OMEGA::IO::IOTypeR8, 2, - CellDimIDs, VarIDCellR8); - if (Err != 0) - LOG_ERROR("IOTest: Error defining CellR8 array FAIL"); - - Err = OMEGA::IO::defineVar(OutFileID, "EdgeI4", OMEGA::IO::IOTypeI4, 2, - EdgeDimIDs, VarIDEdgeI4); - if (Err != 0) - LOG_ERROR("IOTest: Error defining EdgeI4 array FAIL"); - Err = OMEGA::IO::defineVar(OutFileID, "EdgeI8", OMEGA::IO::IOTypeI8, 2, - EdgeDimIDs, VarIDEdgeI8); - if (Err != 0) - LOG_ERROR("IOTest: Error defining EdgeI8 array FAIL"); - Err = OMEGA::IO::defineVar(OutFileID, "EdgeR4", OMEGA::IO::IOTypeR4, 2, - EdgeDimIDs, VarIDEdgeR4); - if (Err != 0) - LOG_ERROR("IOTest: Error defining EdgeR4 array FAIL"); - Err = OMEGA::IO::defineVar(OutFileID, "EdgeR8", OMEGA::IO::IOTypeR8, 2, - EdgeDimIDs, VarIDEdgeR8); - if (Err != 0) - LOG_ERROR("IOTest: Error defining EdgeR8 array FAIL"); - - Err = OMEGA::IO::defineVar(OutFileID, "VrtxI4", OMEGA::IO::IOTypeI4, 2, - VrtxDimIDs, VarIDVrtxI4); - if (Err != 0) - LOG_ERROR("IOTest: Error defining VrtxI4 array FAIL"); - Err = OMEGA::IO::defineVar(OutFileID, "VrtxI8", OMEGA::IO::IOTypeI8, 2, - VrtxDimIDs, VarIDVrtxI8); - if (Err != 0) - LOG_ERROR("IOTest: Error defining VrtxI8 array FAIL"); - Err = OMEGA::IO::defineVar(OutFileID, "VrtxR4", OMEGA::IO::IOTypeR4, 2, - VrtxDimIDs, VarIDVrtxR4); - if (Err != 0) - LOG_ERROR("IOTest: Error defining VrtxR4 array FAIL"); - Err = OMEGA::IO::defineVar(OutFileID, "VrtxR8", OMEGA::IO::IOTypeR8, 2, - VrtxDimIDs, VarIDVrtxR8); - if (Err != 0) - LOG_ERROR("IOTest: Error defining VrtxR8 array FAIL"); - // Add Variable metadata just for one array - OMEGA::I4 VarMetaI4Ref = 3; - OMEGA::I8 VarMetaI8Ref = 5; - OMEGA::R4 VarMetaR4Ref = 5.789; - OMEGA::R8 VarMetaR8Ref = 2.23456789; - std::string VarMetaDescrRef = "Test array for I4 on Cells"; - - Err = - OMEGA::IO::writeMeta("VarMetaI4", VarMetaI4Ref, OutFileID, VarIDCellI4); - if (Err != 0) - LOG_ERROR("IOTest: error writing var I4 metadata FAIL"); - Err = - OMEGA::IO::writeMeta("VarMetaI8", VarMetaI8Ref, OutFileID, VarIDCellI4); - if (Err != 0) - LOG_ERROR("IOTest: error writing var I8 metadata FAIL"); - Err = - OMEGA::IO::writeMeta("VarMetaR4", VarMetaR4Ref, OutFileID, VarIDCellI4); - if (Err != 0) - LOG_ERROR("IOTest: error writing var R4 metadata FAIL"); - Err = - OMEGA::IO::writeMeta("VarMetaR8", VarMetaR8Ref, OutFileID, VarIDCellI4); - if (Err != 0) - LOG_ERROR("IOTest: error writing var R8 metadata FAIL"); - Err = OMEGA::IO::writeMeta("VarMetaDescr", VarMetaDescrRef, OutFileID, - VarIDCellI4); - if (Err != 0) - LOG_ERROR("IOTest: error writing var char metadata FAIL"); - - // Exit define mode - Err = OMEGA::IO::endDefinePhase(OutFileID); - if (Err != 0) - LOG_ERROR("IOTest: error ending define mode FAIL"); - - // Write variables - OMEGA::I4 FillI4 = -999; - OMEGA::I8 FillI8 = -999999; - OMEGA::R4 FillR4 = -1.234e30; - OMEGA::R8 FillR8 = -1.23456789e30; - - Err = OMEGA::IO::writeArray(RefI4Cell.data(), NCellsSize * NVertLevels, - &FillI4, OutFileID, DecompCellI4, VarIDCellI4); - if (Err != 0) - LOG_ERROR("IOTest: error writing I4 array on cells FAIL"); - Err = OMEGA::IO::writeArray(RefI8Cell.data(), NCellsSize * NVertLevels, - &FillI8, OutFileID, DecompCellI8, VarIDCellI8); - if (Err != 0) - LOG_ERROR("IOTest: error writing I8 array on cells FAIL"); - Err = OMEGA::IO::writeArray(RefR4Cell.data(), NCellsSize * NVertLevels, - &FillR4, OutFileID, DecompCellR4, VarIDCellR4); - if (Err != 0) - LOG_ERROR("IOTest: error writing R4 array on cells FAIL"); - Err = OMEGA::IO::writeArray(RefR8Cell.data(), NCellsSize * NVertLevels, - &FillR8, OutFileID, DecompCellR8, VarIDCellR8); - if (Err != 0) - LOG_ERROR("IOTest: error writing R8 array on cells FAIL"); - - Err = OMEGA::IO::writeArray(RefI4Edge.data(), NEdgesSize * NVertLevels, - &FillI4, OutFileID, DecompEdgeI4, VarIDEdgeI4); - if (Err != 0) - LOG_ERROR("IOTest: error writing I4 array on Edges FAIL"); - Err = OMEGA::IO::writeArray(RefI8Edge.data(), NEdgesSize * NVertLevels, - &FillI8, OutFileID, DecompEdgeI8, VarIDEdgeI8); - if (Err != 0) - LOG_ERROR("IOTest: error writing I8 array on Edges FAIL"); - Err = OMEGA::IO::writeArray(RefR4Edge.data(), NEdgesSize * NVertLevels, - &FillR4, OutFileID, DecompEdgeR4, VarIDEdgeR4); - if (Err != 0) - LOG_ERROR("IOTest: error writing R4 array on Edges FAIL"); - Err = OMEGA::IO::writeArray(RefR8Edge.data(), NEdgesSize * NVertLevels, - &FillR8, OutFileID, DecompEdgeR8, VarIDEdgeR8); - if (Err != 0) - LOG_ERROR("IOTest: error writing R8 array on Edges FAIL"); - - Err = OMEGA::IO::writeArray(RefI4Vrtx.data(), NVerticesSize * NVertLevels, - &FillI4, OutFileID, DecompVrtxI4, VarIDVrtxI4); - if (Err != 0) - LOG_ERROR("IOTest: error writing I4 array on vertices FAIL"); - Err = OMEGA::IO::writeArray(RefI8Vrtx.data(), NVerticesSize * NVertLevels, - &FillI8, OutFileID, DecompVrtxI8, VarIDVrtxI8); - if (Err != 0) - LOG_ERROR("IOTest: error writing I8 array on vertices FAIL"); - Err = OMEGA::IO::writeArray(RefR4Vrtx.data(), NVerticesSize * NVertLevels, - &FillR4, OutFileID, DecompVrtxR4, VarIDVrtxR4); - if (Err != 0) - LOG_ERROR("IOTest: error writing R4 array on vertices FAIL"); - Err = OMEGA::IO::writeArray(RefR8Vrtx.data(), NVerticesSize * NVertLevels, - &FillR8, OutFileID, DecompVrtxR8, VarIDVrtxR8); - if (Err != 0) - LOG_ERROR("IOTest: error writing R8 array on vertices FAIL"); - - // Finished writing, close file - Err = OMEGA::IO::closeFile(OutFileID); - if (Err != 0) - LOG_ERROR("IOTest: error closing output file FAIL"); - - // Open a file for reading to verify read/write - int InFileID; - Err = OMEGA::IO::openFile(InFileID, "IOTest.nc", OMEGA::IO::ModeRead); - if (Err != 0) - LOG_ERROR("IOTest: error opening file for reading FAIL"); - - // Get dimension lengths to verify read/write of dimension info - OMEGA::I4 NVertLevelsNew = OMEGA::IO::getDimLength(InFileID, "NVertLevels"); - if (NVertLevelsNew == NVertLevels) { - LOG_INFO("IOTest: read/write vert dimension test PASS"); - } else { - LOG_INFO("IOTest: read/write vert dimension test FAIL"); - } - - OMEGA::I4 NCellsNew = OMEGA::IO::getDimLength(InFileID, "NCells"); - if (NCellsNew == NCellsGlobal) { - LOG_INFO("IOTest: read/write cell dimension test PASS"); - } else { - LOG_INFO("IOTest: read/write cell dimension test FAIL"); - } - - OMEGA::I4 NEdgesNew = OMEGA::IO::getDimLength(InFileID, "NEdges"); - if (NEdgesNew == NEdgesGlobal) { - LOG_INFO("IOTest: read/write edge dimension test PASS"); - } else { - LOG_INFO("IOTest: read/write edge dimension test FAIL"); - } - - OMEGA::I4 NVerticesNew = OMEGA::IO::getDimLength(InFileID, "NVertices"); - if (NVerticesNew == NVerticesGlobal) { - LOG_INFO("IOTest: read/write vertex dimension test PASS"); - } else { - LOG_INFO("IOTest: read/write vertex dimension test FAIL"); - } - - // Read global attributes - OMEGA::I4 FileMetaI4New; - OMEGA::I8 FileMetaI8New; - OMEGA::R4 FileMetaR4New; - OMEGA::R8 FileMetaR8New; - std::string FileMetaDescrNew; - - Err = OMEGA::IO::readMeta("FileMetaI4", FileMetaI4New, InFileID, - OMEGA::IO::GlobalID); - if (Err != 0) - LOG_ERROR("IOTest: error reading file I4 metadata FAIL"); - if (FileMetaI4New == FileMetaI4Ref) { - LOG_INFO("IOTest: read/write file metadata I4 test PASS"); - } else { - LOG_INFO("IOTest: read/write file metadata I4 test FAIL"); - } - - Err = OMEGA::IO::readMeta("FileMetaI8", FileMetaI8New, InFileID, - OMEGA::IO::GlobalID); - if (Err != 0) - LOG_ERROR("IOTest: error reading file I8 metadata FAIL"); - if (FileMetaI8New == FileMetaI8Ref) { - LOG_INFO("IOTest: read/write file metadata I8 test PASS"); - } else { - LOG_INFO("IOTest: read/write file metadata I8 test FAIL"); - } - - Err = OMEGA::IO::readMeta("FileMetaR4", FileMetaR4New, InFileID, - OMEGA::IO::GlobalID); - if (Err != 0) - LOG_ERROR("IOTest: error reading file R4 metadata FAIL"); - if (FileMetaR4New == FileMetaR4Ref) { - LOG_INFO("IOTest: read/write file metadata R4 test PASS"); - } else { - LOG_INFO("IOTest: read/write file metadata R4 test FAIL"); - } - - Err = OMEGA::IO::readMeta("FileMetaR8", FileMetaR8New, InFileID, - OMEGA::IO::GlobalID); - if (Err != 0) - LOG_ERROR("IOTest: error reading file R8 metadata FAIL"); - if (FileMetaR8New == FileMetaR8Ref) { - LOG_INFO("IOTest: read/write file metadata R8 test PASS"); - } else { - LOG_INFO("IOTest: read/write file metadata R8 test FAIL"); - } - - Err = OMEGA::IO::readMeta("FileMetaDescr", FileMetaDescrNew, InFileID, - OMEGA::IO::GlobalID); - if (Err != 0) - LOG_ERROR("IOTest: error reading file string metadata FAIL"); - if (FileMetaDescrNew == FileMetaDescr) { - LOG_INFO("IOTest: read/write file metadata string test PASS"); - } else { - LOG_INFO("IOTest: read/write file metadata string test FAIL"); - } + for (int Vrtx = 0; Vrtx < NVerticesOwned; ++Vrtx) { + int GlobalVrtxAdd = VrtxIDH(Vrtx) - 1; + for (int k = 0; k < NVertLevels; ++k) { + RefI4Vrtx(Vrtx, k) = GlobalVrtxAdd * k; + RefI8Vrtx(Vrtx, k) = GlobalVrtxAdd * k * 1000000000; + RefR4Vrtx(Vrtx, k) = GlobalVrtxAdd * k * 123.45; + RefR8Vrtx(Vrtx, k) = GlobalVrtxAdd * k * 1.23456789; + int VectorAdd = Vrtx * NVertLevels + k; + OffsetVrtx[VectorAdd] = GlobalVrtxAdd * NVertLevels + k; + } + } - // Read arrays - OMEGA::ArrayHost2DI4 NewI4Cell("NewI4Cell", NCellsSize, NVertLevels); - OMEGA::ArrayHost2DI8 NewI8Cell("NewI8Cell", NCellsSize, NVertLevels); - OMEGA::ArrayHost2DR4 NewR4Cell("NewR4Cell", NCellsSize, NVertLevels); - OMEGA::ArrayHost2DR8 NewR8Cell("NewR8Cell", NCellsSize, NVertLevels); + // Create the needed decomposition offsets + + int DecompCellI4; + int DecompCellI8; + int DecompCellR4; + int DecompCellR8; + int DecompEdgeI4; + int DecompEdgeI8; + int DecompEdgeR4; + int DecompEdgeR8; + int DecompVrtxI4; + int DecompVrtxI8; + int DecompVrtxR4; + int DecompVrtxR8; + std::vector CellDims{NCellsGlobal, NVertLevels}; + std::vector EdgeDims{NEdgesGlobal, NVertLevels}; + std::vector VrtxDims{NVerticesGlobal, NVertLevels}; + int CellArraySize = NCellsSize * NVertLevels; + int EdgeArraySize = NEdgesSize * NVertLevels; + int VrtxArraySize = NVerticesSize * NVertLevels; + + Err = OMEGA::IO::createDecomp(DecompCellI4, OMEGA::IO::IOTypeI4, 2, + CellDims, CellArraySize, OffsetCell, + OMEGA::IO::DefaultRearr); + if (Err != 0) + LOG_ERROR("IOTest: error creating cell decomp I4 FAIL"); + Err = OMEGA::IO::createDecomp(DecompCellI8, OMEGA::IO::IOTypeI8, 2, + CellDims, CellArraySize, OffsetCell, + OMEGA::IO::DefaultRearr); + if (Err != 0) + LOG_ERROR("IOTest: error creating cell decomp I8 FAIL"); + Err = OMEGA::IO::createDecomp(DecompCellR4, OMEGA::IO::IOTypeR4, 2, + CellDims, CellArraySize, OffsetCell, + OMEGA::IO::DefaultRearr); + if (Err != 0) + LOG_ERROR("IOTest: error creating cell decomp R4 FAIL"); + Err = OMEGA::IO::createDecomp(DecompCellR8, OMEGA::IO::IOTypeR8, 2, + CellDims, CellArraySize, OffsetCell, + OMEGA::IO::DefaultRearr); + if (Err != 0) + LOG_ERROR("IOTest: error creating cell decomp R8 FAIL"); + + Err = OMEGA::IO::createDecomp(DecompEdgeI4, OMEGA::IO::IOTypeI4, 2, + EdgeDims, EdgeArraySize, OffsetEdge, + OMEGA::IO::DefaultRearr); + if (Err != 0) + LOG_ERROR("IOTest: error creating edge decomp I4 FAIL"); + Err = OMEGA::IO::createDecomp(DecompEdgeI8, OMEGA::IO::IOTypeI8, 2, + EdgeDims, EdgeArraySize, OffsetEdge, + OMEGA::IO::DefaultRearr); + if (Err != 0) + LOG_ERROR("IOTest: error creating edge decomp I8 FAIL"); + Err = OMEGA::IO::createDecomp(DecompEdgeR4, OMEGA::IO::IOTypeR4, 2, + EdgeDims, EdgeArraySize, OffsetEdge, + OMEGA::IO::DefaultRearr); + if (Err != 0) + LOG_ERROR("IOTest: error creating edge decomp R4 FAIL"); + Err = OMEGA::IO::createDecomp(DecompEdgeR8, OMEGA::IO::IOTypeR8, 2, + EdgeDims, EdgeArraySize, OffsetEdge, + OMEGA::IO::DefaultRearr); + if (Err != 0) + LOG_ERROR("IOTest: error creating edge decomp R8 FAIL"); + + Err = OMEGA::IO::createDecomp(DecompVrtxI4, OMEGA::IO::IOTypeI4, 2, + VrtxDims, VrtxArraySize, OffsetVrtx, + OMEGA::IO::DefaultRearr); + if (Err != 0) + LOG_ERROR("IOTest: error creating vertex decomp I4 FAIL"); + Err = OMEGA::IO::createDecomp(DecompVrtxI8, OMEGA::IO::IOTypeI8, 2, + VrtxDims, VrtxArraySize, OffsetVrtx, + OMEGA::IO::DefaultRearr); + if (Err != 0) + LOG_ERROR("IOTest: error creating vertex decomp I8 FAIL"); + Err = OMEGA::IO::createDecomp(DecompVrtxR4, OMEGA::IO::IOTypeR4, 2, + VrtxDims, VrtxArraySize, OffsetVrtx, + OMEGA::IO::DefaultRearr); + if (Err != 0) + LOG_ERROR("IOTest: error creating vertex decomp R4 FAIL"); + Err = OMEGA::IO::createDecomp(DecompVrtxR8, OMEGA::IO::IOTypeR8, 2, + VrtxDims, VrtxArraySize, OffsetVrtx, + OMEGA::IO::DefaultRearr); + if (Err != 0) + LOG_ERROR("IOTest: error creating vertex decomp R8 FAIL"); + + // Open a file for output + int OutFileID; + Err = OMEGA::IO::openFile( + OutFileID, "IOTest.nc", OMEGA::IO::ModeWrite, OMEGA::IO::FmtDefault, + OMEGA::IO::IfExists::Replace, OMEGA::IO::Precision::Double); + if (Err != 0) + LOG_ERROR("IOTest: error opening file for output FAIL"); + + // Define array dimensions + int DimCellID; + int DimEdgeID; + int DimVrtxID; + int DimVertID; + Err = OMEGA::IO::defineDim(OutFileID, "NVertLevels", NVertLevels, + DimVertID); + if (Err != 0) + LOG_ERROR("IOTest: error defining vertical dimension FAIL"); + Err = OMEGA::IO::defineDim(OutFileID, "NCells", NCellsGlobal, DimCellID); + if (Err != 0) + LOG_ERROR("IOTest: error defining Cell dimension FAIL"); + Err = OMEGA::IO::defineDim(OutFileID, "NEdges", NEdgesGlobal, DimEdgeID); + if (Err != 0) + LOG_ERROR("IOTest: error defining Edge dimension FAIL"); + Err = OMEGA::IO::defineDim(OutFileID, "NVertices", NVerticesGlobal, + DimVrtxID); + if (Err != 0) + LOG_ERROR("IOTest: error defining Vertex dimension FAIL"); + + // Write some global file metadata + OMEGA::I4 FileMetaI4Ref = 2; + OMEGA::I8 FileMetaI8Ref = 4; + OMEGA::R4 FileMetaR4Ref = 6.789; + OMEGA::R8 FileMetaR8Ref = 1.23456789; + std::string FileMetaDescr = "OMEGA IO Unit test file"; + + Err = OMEGA::IO::writeMeta("FileMetaI4", FileMetaI4Ref, OutFileID, + OMEGA::IO::GlobalID); + if (Err != 0) + LOG_ERROR("IOTest: error writing global I4 metadata FAIL"); + Err = OMEGA::IO::writeMeta("FileMetaI8", FileMetaI8Ref, OutFileID, + OMEGA::IO::GlobalID); + if (Err != 0) + LOG_ERROR("IOTest: error writing global I8 metadata FAIL"); + Err = OMEGA::IO::writeMeta("FileMetaR4", FileMetaR4Ref, OutFileID, + OMEGA::IO::GlobalID); + if (Err != 0) + LOG_ERROR("IOTest: error writing global R4 metadata FAIL"); + Err = OMEGA::IO::writeMeta("FileMetaR8", FileMetaR8Ref, OutFileID, + OMEGA::IO::GlobalID); + if (Err != 0) + LOG_ERROR("IOTest: error writing global R8 metadata FAIL"); + Err = OMEGA::IO::writeMeta("FileMetaDescr", FileMetaDescr, OutFileID, + OMEGA::IO::GlobalID); + if (Err != 0) + LOG_ERROR("IOTest: error writing global char metadata FAIL"); + + // Define variables/arrays + int VarIDCellI4; + int VarIDCellI8; + int VarIDCellR4; + int VarIDCellR8; + int VarIDEdgeI4; + int VarIDEdgeI8; + int VarIDEdgeR4; + int VarIDEdgeR8; + int VarIDVrtxI4; + int VarIDVrtxI8; + int VarIDVrtxR4; + int VarIDVrtxR8; + + int CellDimIDs[2] = {DimCellID, DimVertID}; + int EdgeDimIDs[2] = {DimEdgeID, DimVertID}; + int VrtxDimIDs[2] = {DimVrtxID, DimVertID}; + + Err = OMEGA::IO::defineVar(OutFileID, "CellI4", OMEGA::IO::IOTypeI4, 2, + CellDimIDs, VarIDCellI4); + if (Err != 0) + LOG_ERROR("IOTest: Error defining CellI4 array FAIL"); + Err = OMEGA::IO::defineVar(OutFileID, "CellI8", OMEGA::IO::IOTypeI8, 2, + CellDimIDs, VarIDCellI8); + if (Err != 0) + LOG_ERROR("IOTest: Error defining CellI8 array FAIL"); + Err = OMEGA::IO::defineVar(OutFileID, "CellR4", OMEGA::IO::IOTypeR4, 2, + CellDimIDs, VarIDCellR4); + if (Err != 0) + LOG_ERROR("IOTest: Error defining CellR4 array FAIL"); + Err = OMEGA::IO::defineVar(OutFileID, "CellR8", OMEGA::IO::IOTypeR8, 2, + CellDimIDs, VarIDCellR8); + if (Err != 0) + LOG_ERROR("IOTest: Error defining CellR8 array FAIL"); + + Err = OMEGA::IO::defineVar(OutFileID, "EdgeI4", OMEGA::IO::IOTypeI4, 2, + EdgeDimIDs, VarIDEdgeI4); + if (Err != 0) + LOG_ERROR("IOTest: Error defining EdgeI4 array FAIL"); + Err = OMEGA::IO::defineVar(OutFileID, "EdgeI8", OMEGA::IO::IOTypeI8, 2, + EdgeDimIDs, VarIDEdgeI8); + if (Err != 0) + LOG_ERROR("IOTest: Error defining EdgeI8 array FAIL"); + Err = OMEGA::IO::defineVar(OutFileID, "EdgeR4", OMEGA::IO::IOTypeR4, 2, + EdgeDimIDs, VarIDEdgeR4); + if (Err != 0) + LOG_ERROR("IOTest: Error defining EdgeR4 array FAIL"); + Err = OMEGA::IO::defineVar(OutFileID, "EdgeR8", OMEGA::IO::IOTypeR8, 2, + EdgeDimIDs, VarIDEdgeR8); + if (Err != 0) + LOG_ERROR("IOTest: Error defining EdgeR8 array FAIL"); + + Err = OMEGA::IO::defineVar(OutFileID, "VrtxI4", OMEGA::IO::IOTypeI4, 2, + VrtxDimIDs, VarIDVrtxI4); + if (Err != 0) + LOG_ERROR("IOTest: Error defining VrtxI4 array FAIL"); + Err = OMEGA::IO::defineVar(OutFileID, "VrtxI8", OMEGA::IO::IOTypeI8, 2, + VrtxDimIDs, VarIDVrtxI8); + if (Err != 0) + LOG_ERROR("IOTest: Error defining VrtxI8 array FAIL"); + Err = OMEGA::IO::defineVar(OutFileID, "VrtxR4", OMEGA::IO::IOTypeR4, 2, + VrtxDimIDs, VarIDVrtxR4); + if (Err != 0) + LOG_ERROR("IOTest: Error defining VrtxR4 array FAIL"); + Err = OMEGA::IO::defineVar(OutFileID, "VrtxR8", OMEGA::IO::IOTypeR8, 2, + VrtxDimIDs, VarIDVrtxR8); + if (Err != 0) + LOG_ERROR("IOTest: Error defining VrtxR8 array FAIL"); + + // Add Variable metadata just for one array + OMEGA::I4 VarMetaI4Ref = 3; + OMEGA::I8 VarMetaI8Ref = 5; + OMEGA::R4 VarMetaR4Ref = 5.789; + OMEGA::R8 VarMetaR8Ref = 2.23456789; + std::string VarMetaDescrRef = "Test array for I4 on Cells"; + + Err = OMEGA::IO::writeMeta("VarMetaI4", VarMetaI4Ref, OutFileID, + VarIDCellI4); + if (Err != 0) + LOG_ERROR("IOTest: error writing var I4 metadata FAIL"); + Err = OMEGA::IO::writeMeta("VarMetaI8", VarMetaI8Ref, OutFileID, + VarIDCellI4); + if (Err != 0) + LOG_ERROR("IOTest: error writing var I8 metadata FAIL"); + Err = OMEGA::IO::writeMeta("VarMetaR4", VarMetaR4Ref, OutFileID, + VarIDCellI4); + if (Err != 0) + LOG_ERROR("IOTest: error writing var R4 metadata FAIL"); + Err = OMEGA::IO::writeMeta("VarMetaR8", VarMetaR8Ref, OutFileID, + VarIDCellI4); + if (Err != 0) + LOG_ERROR("IOTest: error writing var R8 metadata FAIL"); + Err = OMEGA::IO::writeMeta("VarMetaDescr", VarMetaDescrRef, OutFileID, + VarIDCellI4); + if (Err != 0) + LOG_ERROR("IOTest: error writing var char metadata FAIL"); + + // Exit define mode + Err = OMEGA::IO::endDefinePhase(OutFileID); + if (Err != 0) + LOG_ERROR("IOTest: error ending define mode FAIL"); + + // Write variables + OMEGA::I4 FillI4 = -999; + OMEGA::I8 FillI8 = -999999; + OMEGA::R4 FillR4 = -1.234e30; + OMEGA::R8 FillR8 = -1.23456789e30; + + Err = + OMEGA::IO::writeArray(RefI4Cell.data(), NCellsSize * NVertLevels, + &FillI4, OutFileID, DecompCellI4, VarIDCellI4); + if (Err != 0) + LOG_ERROR("IOTest: error writing I4 array on cells FAIL"); + Err = + OMEGA::IO::writeArray(RefI8Cell.data(), NCellsSize * NVertLevels, + &FillI8, OutFileID, DecompCellI8, VarIDCellI8); + if (Err != 0) + LOG_ERROR("IOTest: error writing I8 array on cells FAIL"); + Err = + OMEGA::IO::writeArray(RefR4Cell.data(), NCellsSize * NVertLevels, + &FillR4, OutFileID, DecompCellR4, VarIDCellR4); + if (Err != 0) + LOG_ERROR("IOTest: error writing R4 array on cells FAIL"); + Err = + OMEGA::IO::writeArray(RefR8Cell.data(), NCellsSize * NVertLevels, + &FillR8, OutFileID, DecompCellR8, VarIDCellR8); + if (Err != 0) + LOG_ERROR("IOTest: error writing R8 array on cells FAIL"); + + Err = + OMEGA::IO::writeArray(RefI4Edge.data(), NEdgesSize * NVertLevels, + &FillI4, OutFileID, DecompEdgeI4, VarIDEdgeI4); + if (Err != 0) + LOG_ERROR("IOTest: error writing I4 array on Edges FAIL"); + Err = + OMEGA::IO::writeArray(RefI8Edge.data(), NEdgesSize * NVertLevels, + &FillI8, OutFileID, DecompEdgeI8, VarIDEdgeI8); + if (Err != 0) + LOG_ERROR("IOTest: error writing I8 array on Edges FAIL"); + Err = + OMEGA::IO::writeArray(RefR4Edge.data(), NEdgesSize * NVertLevels, + &FillR4, OutFileID, DecompEdgeR4, VarIDEdgeR4); + if (Err != 0) + LOG_ERROR("IOTest: error writing R4 array on Edges FAIL"); + Err = + OMEGA::IO::writeArray(RefR8Edge.data(), NEdgesSize * NVertLevels, + &FillR8, OutFileID, DecompEdgeR8, VarIDEdgeR8); + if (Err != 0) + LOG_ERROR("IOTest: error writing R8 array on Edges FAIL"); + + Err = + OMEGA::IO::writeArray(RefI4Vrtx.data(), NVerticesSize * NVertLevels, + &FillI4, OutFileID, DecompVrtxI4, VarIDVrtxI4); + if (Err != 0) + LOG_ERROR("IOTest: error writing I4 array on vertices FAIL"); + Err = + OMEGA::IO::writeArray(RefI8Vrtx.data(), NVerticesSize * NVertLevels, + &FillI8, OutFileID, DecompVrtxI8, VarIDVrtxI8); + if (Err != 0) + LOG_ERROR("IOTest: error writing I8 array on vertices FAIL"); + Err = + OMEGA::IO::writeArray(RefR4Vrtx.data(), NVerticesSize * NVertLevels, + &FillR4, OutFileID, DecompVrtxR4, VarIDVrtxR4); + if (Err != 0) + LOG_ERROR("IOTest: error writing R4 array on vertices FAIL"); + Err = + OMEGA::IO::writeArray(RefR8Vrtx.data(), NVerticesSize * NVertLevels, + &FillR8, OutFileID, DecompVrtxR8, VarIDVrtxR8); + if (Err != 0) + LOG_ERROR("IOTest: error writing R8 array on vertices FAIL"); + + // Finished writing, close file + Err = OMEGA::IO::closeFile(OutFileID); + if (Err != 0) + LOG_ERROR("IOTest: error closing output file FAIL"); + + // Open a file for reading to verify read/write + int InFileID; + Err = OMEGA::IO::openFile(InFileID, "IOTest.nc", OMEGA::IO::ModeRead); + if (Err != 0) + LOG_ERROR("IOTest: error opening file for reading FAIL"); + + // Get dimension lengths to verify read/write of dimension info + OMEGA::I4 NVertLevelsNew = + OMEGA::IO::getDimLength(InFileID, "NVertLevels"); + if (NVertLevelsNew == NVertLevels) { + LOG_INFO("IOTest: read/write vert dimension test PASS"); + } else { + LOG_INFO("IOTest: read/write vert dimension test FAIL"); + } - OMEGA::ArrayHost2DI4 NewI4Edge("NewI4Edge", NEdgesSize, NVertLevels); - OMEGA::ArrayHost2DI8 NewI8Edge("NewI8Edge", NEdgesSize, NVertLevels); - OMEGA::ArrayHost2DR4 NewR4Edge("NewR4Edge", NEdgesSize, NVertLevels); - OMEGA::ArrayHost2DR8 NewR8Edge("NewR8Edge", NEdgesSize, NVertLevels); + OMEGA::I4 NCellsNew = OMEGA::IO::getDimLength(InFileID, "NCells"); + if (NCellsNew == NCellsGlobal) { + LOG_INFO("IOTest: read/write cell dimension test PASS"); + } else { + LOG_INFO("IOTest: read/write cell dimension test FAIL"); + } - OMEGA::ArrayHost2DI4 NewI4Vrtx("NewI4Vrtx", NVerticesSize, NVertLevels); - OMEGA::ArrayHost2DI8 NewI8Vrtx("NewI8Vrtx", NVerticesSize, NVertLevels); - OMEGA::ArrayHost2DR4 NewR4Vrtx("NewR4Vrtx", NVerticesSize, NVertLevels); - OMEGA::ArrayHost2DR8 NewR8Vrtx("NewR8Vrtx", NVerticesSize, NVertLevels); + OMEGA::I4 NEdgesNew = OMEGA::IO::getDimLength(InFileID, "NEdges"); + if (NEdgesNew == NEdgesGlobal) { + LOG_INFO("IOTest: read/write edge dimension test PASS"); + } else { + LOG_INFO("IOTest: read/write edge dimension test FAIL"); + } - Err = OMEGA::IO::readArray(NewI4Cell.data(), NCellsSize * NVertLevels, - "CellI4", InFileID, DecompCellI4, VarIDCellI4); - if (Err != 0) - LOG_ERROR("IOTest: error writing I4 array on cells FAIL"); - Err = OMEGA::IO::readArray(NewI8Cell.data(), NCellsSize * NVertLevels, - "CellI8", InFileID, DecompCellI8, VarIDCellI8); - if (Err != 0) - LOG_ERROR("IOTest: error writing I8 array on cells FAIL"); - Err = OMEGA::IO::readArray(NewR4Cell.data(), NCellsSize * NVertLevels, - "CellR4", InFileID, DecompCellR4, VarIDCellR4); - if (Err != 0) - LOG_ERROR("IOTest: error writing R4 array on cells FAIL"); - Err = OMEGA::IO::readArray(NewR8Cell.data(), NCellsSize * NVertLevels, - "CellR8", InFileID, DecompCellR8, VarIDCellR8); - if (Err != 0) - LOG_ERROR("IOTest: error writing R8 array on cells FAIL"); + OMEGA::I4 NVerticesNew = OMEGA::IO::getDimLength(InFileID, "NVertices"); + if (NVerticesNew == NVerticesGlobal) { + LOG_INFO("IOTest: read/write vertex dimension test PASS"); + } else { + LOG_INFO("IOTest: read/write vertex dimension test FAIL"); + } - Err = OMEGA::IO::readArray(NewI4Edge.data(), NEdgesSize * NVertLevels, - "EdgeI4", InFileID, DecompEdgeI4, VarIDEdgeI4); - if (Err != 0) - LOG_ERROR("IOTest: error writing I4 array on Edges FAIL"); - Err = OMEGA::IO::readArray(NewI8Edge.data(), NEdgesSize * NVertLevels, - "EdgeI8", InFileID, DecompEdgeI8, VarIDEdgeI8); - if (Err != 0) - LOG_ERROR("IOTest: error writing I8 array on Edges FAIL"); - Err = OMEGA::IO::readArray(NewR4Edge.data(), NEdgesSize * NVertLevels, - "EdgeR4", InFileID, DecompEdgeR4, VarIDEdgeR4); - if (Err != 0) - LOG_ERROR("IOTest: error writing R4 array on Edges FAIL"); - Err = OMEGA::IO::readArray(NewR8Edge.data(), NEdgesSize * NVertLevels, - "EdgeR8", InFileID, DecompEdgeR8, VarIDEdgeR8); - if (Err != 0) - LOG_ERROR("IOTest: error writing R8 array on Edges FAIL"); + // Read global attributes + OMEGA::I4 FileMetaI4New; + OMEGA::I8 FileMetaI8New; + OMEGA::R4 FileMetaR4New; + OMEGA::R8 FileMetaR8New; + std::string FileMetaDescrNew; + + Err = OMEGA::IO::readMeta("FileMetaI4", FileMetaI4New, InFileID, + OMEGA::IO::GlobalID); + if (Err != 0) + LOG_ERROR("IOTest: error reading file I4 metadata FAIL"); + if (FileMetaI4New == FileMetaI4Ref) { + LOG_INFO("IOTest: read/write file metadata I4 test PASS"); + } else { + LOG_INFO("IOTest: read/write file metadata I4 test FAIL"); + } - Err = OMEGA::IO::readArray(NewI4Vrtx.data(), NVerticesSize * NVertLevels, - "VrtxI4", InFileID, DecompVrtxI4, VarIDVrtxI4); - if (Err != 0) - LOG_ERROR("IOTest: error writing I4 array on vertices FAIL"); - Err = OMEGA::IO::readArray(NewI8Vrtx.data(), NVerticesSize * NVertLevels, - "VrtxI8", InFileID, DecompVrtxI8, VarIDVrtxI8); - if (Err != 0) - LOG_ERROR("IOTest: error writing I8 array on vertices FAIL"); - Err = OMEGA::IO::readArray(NewR4Vrtx.data(), NVerticesSize * NVertLevels, - "VrtxR4", InFileID, DecompVrtxR4, VarIDVrtxR4); - if (Err != 0) - LOG_ERROR("IOTest: error writing R4 array on vertices FAIL"); - Err = OMEGA::IO::readArray(NewR8Vrtx.data(), NVerticesSize * NVertLevels, - "VrtxR8", InFileID, DecompVrtxR8, VarIDVrtxR8); - if (Err != 0) - LOG_ERROR("IOTest: error writing R8 array on vertices FAIL"); - - // Check that arrays match the reference cases that were written - // Only check the owned values - these would need to be followed by - // a halo update. - - int Err1 = 0; - int Err2 = 0; - int Err3 = 0; - int Err4 = 0; - for (int Cell = 0; Cell < NCellsOwned; ++Cell) { - for (int k = 0; k < NVertLevels; ++k) { - if (NewI4Cell(Cell, k) != RefI4Cell(Cell, k)) - Err1++; - if (NewI8Cell(Cell, k) != RefI8Cell(Cell, k)) - Err2++; - if (NewR4Cell(Cell, k) != RefR4Cell(Cell, k)) - Err3++; - if (NewR8Cell(Cell, k) != RefR8Cell(Cell, k)) - Err4++; + Err = OMEGA::IO::readMeta("FileMetaI8", FileMetaI8New, InFileID, + OMEGA::IO::GlobalID); + if (Err != 0) + LOG_ERROR("IOTest: error reading file I8 metadata FAIL"); + if (FileMetaI8New == FileMetaI8Ref) { + LOG_INFO("IOTest: read/write file metadata I8 test PASS"); + } else { + LOG_INFO("IOTest: read/write file metadata I8 test FAIL"); } - } - if (Err1 == 0) { - LOG_INFO("IOTest: read/write array I4 on Cells test PASS"); - } else { - LOG_INFO("IOTest: read/write array I4 on Cells test FAIL"); - } - if (Err2 == 0) { - LOG_INFO("IOTest: read/write array I8 on Cells test PASS"); - } else { - LOG_INFO("IOTest: read/write array I8 on Cells test FAIL"); - } - if (Err3 == 0) { - LOG_INFO("IOTest: read/write array R4 on Cells test PASS"); - } else { - LOG_INFO("IOTest: read/write array R4 on Cells test FAIL"); - } - if (Err4 == 0) { - LOG_INFO("IOTest: read/write array R8 on Cells test PASS"); - } else { - LOG_INFO("IOTest: read/write array R8 on Cells test FAIL"); - } - Err1 = 0; - Err2 = 0; - Err3 = 0; - Err4 = 0; - for (int Edge = 0; Edge < NEdgesOwned; ++Edge) { - for (int k = 0; k < NVertLevels; ++k) { - if (NewI4Edge(Edge, k) != RefI4Edge(Edge, k)) - Err1++; - if (NewI8Edge(Edge, k) != RefI8Edge(Edge, k)) - Err2++; - if (NewR4Edge(Edge, k) != RefR4Edge(Edge, k)) - Err3++; - if (NewR8Edge(Edge, k) != RefR8Edge(Edge, k)) - Err4++; + Err = OMEGA::IO::readMeta("FileMetaR4", FileMetaR4New, InFileID, + OMEGA::IO::GlobalID); + if (Err != 0) + LOG_ERROR("IOTest: error reading file R4 metadata FAIL"); + if (FileMetaR4New == FileMetaR4Ref) { + LOG_INFO("IOTest: read/write file metadata R4 test PASS"); + } else { + LOG_INFO("IOTest: read/write file metadata R4 test FAIL"); } - } - if (Err1 == 0) { - LOG_INFO("IOTest: read/write array I4 on Edges test PASS"); - } else { - LOG_INFO("IOTest: read/write array I4 on Edges test FAIL"); - } - if (Err2 == 0) { - LOG_INFO("IOTest: read/write array I8 on Edges test PASS"); - } else { - LOG_INFO("IOTest: read/write array I8 on Edges test FAIL"); - } - if (Err3 == 0) { - LOG_INFO("IOTest: read/write array R4 on Edges test PASS"); - } else { - LOG_INFO("IOTest: read/write array R4 on Edges test FAIL"); - } - if (Err4 == 0) { - LOG_INFO("IOTest: read/write array R8 on Edges test PASS"); - } else { - LOG_INFO("IOTest: read/write array R8 on Edges test FAIL"); - } - Err1 = 0; - Err2 = 0; - Err3 = 0; - Err4 = 0; - for (int Vrtx = 0; Vrtx < NVerticesOwned; ++Vrtx) { - for (int k = 0; k < NVertLevels; ++k) { - if (NewI4Vrtx(Vrtx, k) != RefI4Vrtx(Vrtx, k)) - Err1++; - if (NewI8Vrtx(Vrtx, k) != RefI8Vrtx(Vrtx, k)) - Err2++; - if (NewR4Vrtx(Vrtx, k) != RefR4Vrtx(Vrtx, k)) - Err3++; - if (NewR8Vrtx(Vrtx, k) != RefR8Vrtx(Vrtx, k)) - Err4++; + Err = OMEGA::IO::readMeta("FileMetaR8", FileMetaR8New, InFileID, + OMEGA::IO::GlobalID); + if (Err != 0) + LOG_ERROR("IOTest: error reading file R8 metadata FAIL"); + if (FileMetaR8New == FileMetaR8Ref) { + LOG_INFO("IOTest: read/write file metadata R8 test PASS"); + } else { + LOG_INFO("IOTest: read/write file metadata R8 test FAIL"); } - } - if (Err1 == 0) { - LOG_INFO("IOTest: read/write array I4 on Vertices test PASS"); - } else { - LOG_INFO("IOTest: read/write array I4 on Vertices test FAIL"); - } - if (Err2 == 0) { - LOG_INFO("IOTest: read/write array I8 on Vertices test PASS"); - } else { - LOG_INFO("IOTest: read/write array I8 on Vertices test FAIL"); - } - if (Err3 == 0) { - LOG_INFO("IOTest: read/write array R4 on Vertices test PASS"); - } else { - LOG_INFO("IOTest: read/write array R4 on Vertices test FAIL"); - } - if (Err4 == 0) { - LOG_INFO("IOTest: read/write array R8 on Vertices test PASS"); - } else { - LOG_INFO("IOTest: read/write array R8 on Vertices test FAIL"); - } - // Read array attributes - OMEGA::I4 VarMetaI4New; - OMEGA::I8 VarMetaI8New; - OMEGA::R4 VarMetaR4New; - OMEGA::R8 VarMetaR8New; - std::string VarMetaDescrNew; + Err = OMEGA::IO::readMeta("FileMetaDescr", FileMetaDescrNew, InFileID, + OMEGA::IO::GlobalID); + if (Err != 0) + LOG_ERROR("IOTest: error reading file string metadata FAIL"); + if (FileMetaDescrNew == FileMetaDescr) { + LOG_INFO("IOTest: read/write file metadata string test PASS"); + } else { + LOG_INFO("IOTest: read/write file metadata string test FAIL"); + } - Err = OMEGA::IO::readMeta("VarMetaI4", VarMetaI4New, InFileID, VarIDCellI4); - if (Err != 0) - LOG_ERROR("IOTest: error reading var I4 metadata FAIL"); - if (VarMetaI4New == VarMetaI4Ref) { - LOG_INFO("IOTest: read/write var metadata I4 test PASS"); - } else { - LOG_INFO("IOTest: read/write var metadata I4 test FAIL"); - } - Err = OMEGA::IO::readMeta("VarMetaI8", VarMetaI8New, InFileID, VarIDCellI4); - if (Err != 0) - LOG_ERROR("IOTest: error reading var I8 metadata FAIL"); - if (VarMetaI8New == VarMetaI8Ref) { - LOG_INFO("IOTest: read/write var metadata I8 test PASS"); - } else { - LOG_INFO("IOTest: read/write var metadata I8 test FAIL"); - } - Err = OMEGA::IO::readMeta("VarMetaR4", VarMetaR4New, InFileID, VarIDCellI4); - if (Err != 0) - LOG_ERROR("IOTest: error reading var R4 metadata FAIL"); - if (VarMetaR4New == VarMetaR4Ref) { - LOG_INFO("IOTest: read/write var metadata R4 test PASS"); - } else { - LOG_INFO("IOTest: read/write var metadata R4 test FAIL"); - } - Err = OMEGA::IO::readMeta("VarMetaR8", VarMetaR8New, InFileID, VarIDCellI4); - if (Err != 0) - LOG_ERROR("IOTest: error reading var R8 metadata FAIL"); - if (VarMetaR8New == VarMetaR8Ref) { - LOG_INFO("IOTest: read/write var metadata R8 test PASS"); - } else { - LOG_INFO("IOTest: read/write var metadata R8 test FAIL"); - } - Err = OMEGA::IO::readMeta("VarMetaDescr", VarMetaDescrNew, InFileID, - VarIDCellI4); - if (Err != 0) - LOG_ERROR("IOTest: error reading var string metadata FAIL"); - if (VarMetaDescrNew == VarMetaDescrRef) { - LOG_INFO("IOTest: read/write var metadata string test PASS"); - } else { - LOG_INFO("IOTest: read/write var metadata string test FAIL"); - } + // Read arrays + OMEGA::HostArray2DI4 NewI4Cell("NewI4Cell", NCellsSize, NVertLevels); + OMEGA::HostArray2DI8 NewI8Cell("NewI8Cell", NCellsSize, NVertLevels); + OMEGA::HostArray2DR4 NewR4Cell("NewR4Cell", NCellsSize, NVertLevels); + OMEGA::HostArray2DR8 NewR8Cell("NewR8Cell", NCellsSize, NVertLevels); + + OMEGA::HostArray2DI4 NewI4Edge("NewI4Edge", NEdgesSize, NVertLevels); + OMEGA::HostArray2DI8 NewI8Edge("NewI8Edge", NEdgesSize, NVertLevels); + OMEGA::HostArray2DR4 NewR4Edge("NewR4Edge", NEdgesSize, NVertLevels); + OMEGA::HostArray2DR8 NewR8Edge("NewR8Edge", NEdgesSize, NVertLevels); + + OMEGA::HostArray2DI4 NewI4Vrtx("NewI4Vrtx", NVerticesSize, NVertLevels); + OMEGA::HostArray2DI8 NewI8Vrtx("NewI8Vrtx", NVerticesSize, NVertLevels); + OMEGA::HostArray2DR4 NewR4Vrtx("NewR4Vrtx", NVerticesSize, NVertLevels); + OMEGA::HostArray2DR8 NewR8Vrtx("NewR8Vrtx", NVerticesSize, NVertLevels); + + Err = OMEGA::IO::readArray(NewI4Cell.data(), NCellsSize * NVertLevels, + "CellI4", InFileID, DecompCellI4, VarIDCellI4); + if (Err != 0) + LOG_ERROR("IOTest: error writing I4 array on cells FAIL"); + Err = OMEGA::IO::readArray(NewI8Cell.data(), NCellsSize * NVertLevels, + "CellI8", InFileID, DecompCellI8, VarIDCellI8); + if (Err != 0) + LOG_ERROR("IOTest: error writing I8 array on cells FAIL"); + Err = OMEGA::IO::readArray(NewR4Cell.data(), NCellsSize * NVertLevels, + "CellR4", InFileID, DecompCellR4, VarIDCellR4); + if (Err != 0) + LOG_ERROR("IOTest: error writing R4 array on cells FAIL"); + Err = OMEGA::IO::readArray(NewR8Cell.data(), NCellsSize * NVertLevels, + "CellR8", InFileID, DecompCellR8, VarIDCellR8); + if (Err != 0) + LOG_ERROR("IOTest: error writing R8 array on cells FAIL"); + + Err = OMEGA::IO::readArray(NewI4Edge.data(), NEdgesSize * NVertLevels, + "EdgeI4", InFileID, DecompEdgeI4, VarIDEdgeI4); + if (Err != 0) + LOG_ERROR("IOTest: error writing I4 array on Edges FAIL"); + Err = OMEGA::IO::readArray(NewI8Edge.data(), NEdgesSize * NVertLevels, + "EdgeI8", InFileID, DecompEdgeI8, VarIDEdgeI8); + if (Err != 0) + LOG_ERROR("IOTest: error writing I8 array on Edges FAIL"); + Err = OMEGA::IO::readArray(NewR4Edge.data(), NEdgesSize * NVertLevels, + "EdgeR4", InFileID, DecompEdgeR4, VarIDEdgeR4); + if (Err != 0) + LOG_ERROR("IOTest: error writing R4 array on Edges FAIL"); + Err = OMEGA::IO::readArray(NewR8Edge.data(), NEdgesSize * NVertLevels, + "EdgeR8", InFileID, DecompEdgeR8, VarIDEdgeR8); + if (Err != 0) + LOG_ERROR("IOTest: error writing R8 array on Edges FAIL"); + + Err = OMEGA::IO::readArray(NewI4Vrtx.data(), NVerticesSize * NVertLevels, + "VrtxI4", InFileID, DecompVrtxI4, VarIDVrtxI4); + if (Err != 0) + LOG_ERROR("IOTest: error writing I4 array on vertices FAIL"); + Err = OMEGA::IO::readArray(NewI8Vrtx.data(), NVerticesSize * NVertLevels, + "VrtxI8", InFileID, DecompVrtxI8, VarIDVrtxI8); + if (Err != 0) + LOG_ERROR("IOTest: error writing I8 array on vertices FAIL"); + Err = OMEGA::IO::readArray(NewR4Vrtx.data(), NVerticesSize * NVertLevels, + "VrtxR4", InFileID, DecompVrtxR4, VarIDVrtxR4); + if (Err != 0) + LOG_ERROR("IOTest: error writing R4 array on vertices FAIL"); + Err = OMEGA::IO::readArray(NewR8Vrtx.data(), NVerticesSize * NVertLevels, + "VrtxR8", InFileID, DecompVrtxR8, VarIDVrtxR8); + if (Err != 0) + LOG_ERROR("IOTest: error writing R8 array on vertices FAIL"); + + // Check that arrays match the reference cases that were written + // Only check the owned values - these would need to be followed by + // a halo update. + + int Err1 = 0; + int Err2 = 0; + int Err3 = 0; + int Err4 = 0; + for (int Cell = 0; Cell < NCellsOwned; ++Cell) { + for (int k = 0; k < NVertLevels; ++k) { + if (NewI4Cell(Cell, k) != RefI4Cell(Cell, k)) + Err1++; + if (NewI8Cell(Cell, k) != RefI8Cell(Cell, k)) + Err2++; + if (NewR4Cell(Cell, k) != RefR4Cell(Cell, k)) + Err3++; + if (NewR8Cell(Cell, k) != RefR8Cell(Cell, k)) + Err4++; + } + } + if (Err1 == 0) { + LOG_INFO("IOTest: read/write array I4 on Cells test PASS"); + } else { + LOG_INFO("IOTest: read/write array I4 on Cells test FAIL"); + } + if (Err2 == 0) { + LOG_INFO("IOTest: read/write array I8 on Cells test PASS"); + } else { + LOG_INFO("IOTest: read/write array I8 on Cells test FAIL"); + } + if (Err3 == 0) { + LOG_INFO("IOTest: read/write array R4 on Cells test PASS"); + } else { + LOG_INFO("IOTest: read/write array R4 on Cells test FAIL"); + } + if (Err4 == 0) { + LOG_INFO("IOTest: read/write array R8 on Cells test PASS"); + } else { + LOG_INFO("IOTest: read/write array R8 on Cells test FAIL"); + } - // Finished reading, close file - Err = OMEGA::IO::closeFile(InFileID); - if (Err != 0) - LOG_ERROR("IOTest: error closing input file FAIL"); + Err1 = 0; + Err2 = 0; + Err3 = 0; + Err4 = 0; + for (int Edge = 0; Edge < NEdgesOwned; ++Edge) { + for (int k = 0; k < NVertLevels; ++k) { + if (NewI4Edge(Edge, k) != RefI4Edge(Edge, k)) + Err1++; + if (NewI8Edge(Edge, k) != RefI8Edge(Edge, k)) + Err2++; + if (NewR4Edge(Edge, k) != RefR4Edge(Edge, k)) + Err3++; + if (NewR8Edge(Edge, k) != RefR8Edge(Edge, k)) + Err4++; + } + } + if (Err1 == 0) { + LOG_INFO("IOTest: read/write array I4 on Edges test PASS"); + } else { + LOG_INFO("IOTest: read/write array I4 on Edges test FAIL"); + } + if (Err2 == 0) { + LOG_INFO("IOTest: read/write array I8 on Edges test PASS"); + } else { + LOG_INFO("IOTest: read/write array I8 on Edges test FAIL"); + } + if (Err3 == 0) { + LOG_INFO("IOTest: read/write array R4 on Edges test PASS"); + } else { + LOG_INFO("IOTest: read/write array R4 on Edges test FAIL"); + } + if (Err4 == 0) { + LOG_INFO("IOTest: read/write array R8 on Edges test PASS"); + } else { + LOG_INFO("IOTest: read/write array R8 on Edges test FAIL"); + } - // Test destruction of Decompositions - Err = OMEGA::IO::destroyDecomp(DecompCellI4); - if (Err != 0) - LOG_ERROR("IOTest: error destroying decomp cell I4 FAIL"); - Err = OMEGA::IO::destroyDecomp(DecompCellI8); - if (Err != 0) - LOG_ERROR("IOTest: error destroying decomp cell I8 FAIL"); - Err = OMEGA::IO::destroyDecomp(DecompCellR4); - if (Err != 0) - LOG_ERROR("IOTest: error destroying decomp cell R4 FAIL"); - Err = OMEGA::IO::destroyDecomp(DecompCellR8); - if (Err != 0) - LOG_ERROR("IOTest: error destroying decomp cell R8 FAIL"); + Err1 = 0; + Err2 = 0; + Err3 = 0; + Err4 = 0; + for (int Vrtx = 0; Vrtx < NVerticesOwned; ++Vrtx) { + for (int k = 0; k < NVertLevels; ++k) { + if (NewI4Vrtx(Vrtx, k) != RefI4Vrtx(Vrtx, k)) + Err1++; + if (NewI8Vrtx(Vrtx, k) != RefI8Vrtx(Vrtx, k)) + Err2++; + if (NewR4Vrtx(Vrtx, k) != RefR4Vrtx(Vrtx, k)) + Err3++; + if (NewR8Vrtx(Vrtx, k) != RefR8Vrtx(Vrtx, k)) + Err4++; + } + } + if (Err1 == 0) { + LOG_INFO("IOTest: read/write array I4 on Vertices test PASS"); + } else { + LOG_INFO("IOTest: read/write array I4 on Vertices test FAIL"); + } + if (Err2 == 0) { + LOG_INFO("IOTest: read/write array I8 on Vertices test PASS"); + } else { + LOG_INFO("IOTest: read/write array I8 on Vertices test FAIL"); + } + if (Err3 == 0) { + LOG_INFO("IOTest: read/write array R4 on Vertices test PASS"); + } else { + LOG_INFO("IOTest: read/write array R4 on Vertices test FAIL"); + } + if (Err4 == 0) { + LOG_INFO("IOTest: read/write array R8 on Vertices test PASS"); + } else { + LOG_INFO("IOTest: read/write array R8 on Vertices test FAIL"); + } - Err = OMEGA::IO::destroyDecomp(DecompEdgeI4); - if (Err != 0) - LOG_ERROR("IOTest: error destroying decomp Edge I4 FAIL"); - Err = OMEGA::IO::destroyDecomp(DecompEdgeI8); - if (Err != 0) - LOG_ERROR("IOTest: error destroying decomp Edge I8 FAIL"); - Err = OMEGA::IO::destroyDecomp(DecompEdgeR4); - if (Err != 0) - LOG_ERROR("IOTest: error destroying decomp Edge R4 FAIL"); - Err = OMEGA::IO::destroyDecomp(DecompEdgeR8); - if (Err != 0) - LOG_ERROR("IOTest: error destroying decomp Edge R8 FAIL"); + // Read array attributes + OMEGA::I4 VarMetaI4New; + OMEGA::I8 VarMetaI8New; + OMEGA::R4 VarMetaR4New; + OMEGA::R8 VarMetaR8New; + std::string VarMetaDescrNew; + + Err = + OMEGA::IO::readMeta("VarMetaI4", VarMetaI4New, InFileID, VarIDCellI4); + if (Err != 0) + LOG_ERROR("IOTest: error reading var I4 metadata FAIL"); + if (VarMetaI4New == VarMetaI4Ref) { + LOG_INFO("IOTest: read/write var metadata I4 test PASS"); + } else { + LOG_INFO("IOTest: read/write var metadata I4 test FAIL"); + } + Err = + OMEGA::IO::readMeta("VarMetaI8", VarMetaI8New, InFileID, VarIDCellI4); + if (Err != 0) + LOG_ERROR("IOTest: error reading var I8 metadata FAIL"); + if (VarMetaI8New == VarMetaI8Ref) { + LOG_INFO("IOTest: read/write var metadata I8 test PASS"); + } else { + LOG_INFO("IOTest: read/write var metadata I8 test FAIL"); + } + Err = + OMEGA::IO::readMeta("VarMetaR4", VarMetaR4New, InFileID, VarIDCellI4); + if (Err != 0) + LOG_ERROR("IOTest: error reading var R4 metadata FAIL"); + if (VarMetaR4New == VarMetaR4Ref) { + LOG_INFO("IOTest: read/write var metadata R4 test PASS"); + } else { + LOG_INFO("IOTest: read/write var metadata R4 test FAIL"); + } + Err = + OMEGA::IO::readMeta("VarMetaR8", VarMetaR8New, InFileID, VarIDCellI4); + if (Err != 0) + LOG_ERROR("IOTest: error reading var R8 metadata FAIL"); + if (VarMetaR8New == VarMetaR8Ref) { + LOG_INFO("IOTest: read/write var metadata R8 test PASS"); + } else { + LOG_INFO("IOTest: read/write var metadata R8 test FAIL"); + } + Err = OMEGA::IO::readMeta("VarMetaDescr", VarMetaDescrNew, InFileID, + VarIDCellI4); + if (Err != 0) + LOG_ERROR("IOTest: error reading var string metadata FAIL"); + if (VarMetaDescrNew == VarMetaDescrRef) { + LOG_INFO("IOTest: read/write var metadata string test PASS"); + } else { + LOG_INFO("IOTest: read/write var metadata string test FAIL"); + } - Err = OMEGA::IO::destroyDecomp(DecompVrtxI4); - if (Err != 0) - LOG_ERROR("IOTest: error destroying decomp Vrtx I4 FAIL"); - Err = OMEGA::IO::destroyDecomp(DecompVrtxI8); - if (Err != 0) - LOG_ERROR("IOTest: error destroying decomp Vrtx I8 FAIL"); - Err = OMEGA::IO::destroyDecomp(DecompVrtxR4); - if (Err != 0) - LOG_ERROR("IOTest: error destroying decomp Vrtx R4 FAIL"); - Err = OMEGA::IO::destroyDecomp(DecompVrtxR8); - if (Err != 0) - LOG_ERROR("IOTest: error destroying decomp Vrtx R8 FAIL"); - - // Exit environments - OMEGA::Decomp::clear(); - OMEGA::MachEnv::removeAll(); - if (Err == 0) - LOG_INFO("IOTest: Successful completion"); - yakl::finalize(); + // Finished reading, close file + Err = OMEGA::IO::closeFile(InFileID); + if (Err != 0) + LOG_ERROR("IOTest: error closing input file FAIL"); + + // Test destruction of Decompositions + Err = OMEGA::IO::destroyDecomp(DecompCellI4); + if (Err != 0) + LOG_ERROR("IOTest: error destroying decomp cell I4 FAIL"); + Err = OMEGA::IO::destroyDecomp(DecompCellI8); + if (Err != 0) + LOG_ERROR("IOTest: error destroying decomp cell I8 FAIL"); + Err = OMEGA::IO::destroyDecomp(DecompCellR4); + if (Err != 0) + LOG_ERROR("IOTest: error destroying decomp cell R4 FAIL"); + Err = OMEGA::IO::destroyDecomp(DecompCellR8); + if (Err != 0) + LOG_ERROR("IOTest: error destroying decomp cell R8 FAIL"); + + Err = OMEGA::IO::destroyDecomp(DecompEdgeI4); + if (Err != 0) + LOG_ERROR("IOTest: error destroying decomp Edge I4 FAIL"); + Err = OMEGA::IO::destroyDecomp(DecompEdgeI8); + if (Err != 0) + LOG_ERROR("IOTest: error destroying decomp Edge I8 FAIL"); + Err = OMEGA::IO::destroyDecomp(DecompEdgeR4); + if (Err != 0) + LOG_ERROR("IOTest: error destroying decomp Edge R4 FAIL"); + Err = OMEGA::IO::destroyDecomp(DecompEdgeR8); + if (Err != 0) + LOG_ERROR("IOTest: error destroying decomp Edge R8 FAIL"); + + Err = OMEGA::IO::destroyDecomp(DecompVrtxI4); + if (Err != 0) + LOG_ERROR("IOTest: error destroying decomp Vrtx I4 FAIL"); + Err = OMEGA::IO::destroyDecomp(DecompVrtxI8); + if (Err != 0) + LOG_ERROR("IOTest: error destroying decomp Vrtx I8 FAIL"); + Err = OMEGA::IO::destroyDecomp(DecompVrtxR4); + if (Err != 0) + LOG_ERROR("IOTest: error destroying decomp Vrtx R4 FAIL"); + Err = OMEGA::IO::destroyDecomp(DecompVrtxR8); + if (Err != 0) + LOG_ERROR("IOTest: error destroying decomp Vrtx R8 FAIL"); + + // Exit environments + OMEGA::Decomp::clear(); + OMEGA::MachEnv::removeAll(); + if (Err == 0) + LOG_INFO("IOTest: Successful completion"); + } + Kokkos::finalize(); MPI_Finalize(); } // end of main diff --git a/components/omega/test/infra/IOFieldTest.cpp b/components/omega/test/infra/IOFieldTest.cpp index b57660857b37..71443ac614a7 100644 --- a/components/omega/test/infra/IOFieldTest.cpp +++ b/components/omega/test/infra/IOFieldTest.cpp @@ -13,6 +13,8 @@ #include "DataTypes.h" #include "Logging.h" #include "MetaData.h" +#include "OmegaKokkos.h" +#include "mpi.h" #include #include @@ -79,8 +81,8 @@ int initIOFieldTest() { ); // Create host data arrays - OMEGA::ArrayHost2DI4 DataI4H("FieldI4H", NCellsSize, NVertLevels); - OMEGA::ArrayHost2DR8 DataR8H("FieldR8H", NCellsSize, NVertLevels); + OMEGA::HostArray2DI4 DataI4H("FieldI4H", NCellsSize, NVertLevels); + OMEGA::HostArray2DR8 DataR8H("FieldR8H", NCellsSize, NVertLevels); for (int Cell = 0; Cell < NCellsSize; ++Cell) { for (int k = 0; k < NVertLevels; ++k) { DataI4H(Cell, k) = Cell + k; @@ -91,9 +93,8 @@ int initIOFieldTest() { // Create device data arrays OMEGA::Array2DI4 DataI4D("FieldI4D", NCellsSize, NVertLevels); OMEGA::Array2DR8 DataR8D("FieldR8D", NCellsSize, NVertLevels); - yakl::c::parallel_for( - yakl::c::Bounds<2>(NCellsSize, NVertLevels), - YAKL_LAMBDA(int Cell, int k) { + OMEGA::parallelFor( + {NCellsSize, NVertLevels}, KOKKOS_LAMBDA(int Cell, int k) { DataI4D(Cell, k) = Cell + k + 1; DataR8D(Cell, k) = Cell + k + 2.2345678; }); @@ -108,7 +109,7 @@ int initIOFieldTest() { Err += std::abs(Err1) + std::abs(Err2); } - Err1 = OMEGA::IOField::attachData("FieldI4H", DataI4H); + Err1 = OMEGA::IOField::attachData("FieldI4H", DataI4H); Err2 = OMEGA::IOField::attachData("FieldI4D", DataI4D); if (Err1 == 0 && Err2 == 0) { LOG_INFO("IOField: attaching I4 data: PASS"); @@ -126,7 +127,7 @@ int initIOFieldTest() { Err += std::abs(Err1) + std::abs(Err2); } - Err1 = OMEGA::IOField::attachData("FieldR8H", DataR8H); + Err1 = OMEGA::IOField::attachData("FieldR8H", DataR8H); Err2 = OMEGA::IOField::attachData("FieldR8D", DataR8D); if (Err1 == 0 && Err2 == 0) { LOG_INFO("IOField: attaching R8 data: PASS"); @@ -158,182 +159,180 @@ int main(int argc, char **argv) { // We do not actually use message passing but need to test the // array types and behavior within the distributed environment MPI_Init(&argc, &argv); - yakl::init(); - - // Call initialization to create reference IO field - Err = initIOFieldTest(); - if (Err != 0) - LOG_ERROR("IOFieldTest: Error in initialization routine"); - - // Set reference data - must match the values in the init routine - std::string RefIUnits = "unitless"; - std::string RefRUnits = "m"; - int NCellsSize = 100; - int NVertLevels = 64; - OMEGA::ArrayHost2DI4 RefI4H("RefI4H", NCellsSize, NVertLevels); - OMEGA::ArrayHost2DR8 RefR8H("RefR8H", NCellsSize, NVertLevels); - for (int Cell = 0; Cell < NCellsSize; ++Cell) { - for (int k = 0; k < NVertLevels; ++k) { - RefI4H(Cell, k) = Cell + k; - RefR8H(Cell, k) = Cell + k + 1.2345678; + Kokkos::initialize(); + { + // Call initialization to create reference IO field + Err = initIOFieldTest(); + if (Err != 0) + LOG_ERROR("IOFieldTest: Error in initialization routine"); + + // Set reference data - must match the values in the init routine + std::string RefIUnits = "unitless"; + std::string RefRUnits = "m"; + int NCellsSize = 100; + int NVertLevels = 64; + OMEGA::HostArray2DI4 RefI4H("RefI4H", NCellsSize, NVertLevels); + OMEGA::HostArray2DR8 RefR8H("RefR8H", NCellsSize, NVertLevels); + for (int Cell = 0; Cell < NCellsSize; ++Cell) { + for (int k = 0; k < NVertLevels; ++k) { + RefI4H(Cell, k) = Cell + k; + RefR8H(Cell, k) = Cell + k + 1.2345678; + } + } + OMEGA::Array2DI4 RefI4D("RefI4D", NCellsSize, NVertLevels); + OMEGA::Array2DR8 RefR8D("RefR8D", NCellsSize, NVertLevels); + OMEGA::parallelFor( + {NCellsSize, NVertLevels}, KOKKOS_LAMBDA(int Cell, int k) { + RefI4D(Cell, k) = Cell + k + 1; + RefR8D(Cell, k) = Cell + k + 2.2345678; + }); + + // Check existence of fields + bool FieldExistsI4H = OMEGA::IOField::isDefined("FieldI4H"); + bool FieldExistsI4D = OMEGA::IOField::isDefined("FieldI4D"); + bool FieldExistsR8H = OMEGA::IOField::isDefined("FieldR8H"); + bool FieldExistsR8D = OMEGA::IOField::isDefined("FieldR8D"); + if (FieldExistsI4H && FieldExistsI4D && FieldExistsR8H && + FieldExistsR8D) { + LOG_INFO("IOFieldTest: existence test PASS"); + } else { + LOG_ERROR("IOFieldTest: existence test FAIL"); } - } - OMEGA::Array2DI4 RefI4D("RefI4D", NCellsSize, NVertLevels); - OMEGA::Array2DR8 RefR8D("RefR8D", NCellsSize, NVertLevels); - yakl::c::parallel_for( - yakl::c::Bounds<2>(NCellsSize, NVertLevels), - YAKL_LAMBDA(int Cell, int k) { - RefI4D(Cell, k) = Cell + k + 1; - RefR8D(Cell, k) = Cell + k + 2.2345678; - }); - - // Check existence of fields - bool FieldExistsI4H = OMEGA::IOField::isDefined("FieldI4H"); - bool FieldExistsI4D = OMEGA::IOField::isDefined("FieldI4D"); - bool FieldExistsR8H = OMEGA::IOField::isDefined("FieldR8H"); - bool FieldExistsR8D = OMEGA::IOField::isDefined("FieldR8D"); - if (FieldExistsI4H && FieldExistsI4D && FieldExistsR8H && FieldExistsR8D) { - LOG_INFO("IOFieldTest: existence test PASS"); - } else { - LOG_ERROR("IOFieldTest: existence test FAIL"); - } - - bool FieldExistsJunk = OMEGA::IOField::isDefined("FieldJunk"); - if (!FieldExistsJunk) { - LOG_INFO("IOFieldTest: non-existence test PASS"); - } else { - LOG_ERROR("IOFieldTest: non-existence test FAIL"); - } - - // Test retrieval of data and metadata - // Retrieve metadata first - std::shared_ptr MetaI4D = - OMEGA::IOField::getMetaData("FieldI4D"); - std::shared_ptr MetaR8D = - OMEGA::IOField::getMetaData("FieldR8D"); - std::shared_ptr MetaI4H = - OMEGA::IOField::getMetaData("FieldI4H"); - std::shared_ptr MetaR8H = - OMEGA::IOField::getMetaData("FieldR8H"); - - std::string NewIUnits; - std::string NewRUnits; - - Err1 = MetaI4H->getEntry("Units", NewIUnits); - if (Err1 == 0 && NewIUnits == RefIUnits) { - LOG_INFO("IOField: Retrieve I4H metadata by name: PASS"); - } else { - LOG_ERROR("IOField: Retrieve I4H metadata by name: FAIL"); - } - - Err2 = MetaR8H->getEntry("Units", NewRUnits); - if (Err2 == 0 && NewRUnits == RefRUnits) { - LOG_INFO("IOField: Retrieve R8H metadata by name: PASS"); - } else { - LOG_ERROR("IOField: Retrieve R8H metadata by name: FAIL"); - } - - Err3 = MetaI4D->getEntry("Units", NewIUnits); - if (Err3 == 0 && NewIUnits == RefIUnits) { - LOG_INFO("IOField: Retrieve I4D metadata by name: PASS"); - } else { - LOG_ERROR("IOField: Retrieve I4D metadata by name: FAIL"); - } - - Err4 = MetaR8D->getEntry("Units", NewRUnits); - if (Err4 == 0 && NewRUnits == RefRUnits) { - LOG_INFO("IOField: Retrieve R8D metadata by name: PASS"); - } else { - LOG_ERROR("IOField: Retrieve R8D metadata by name: FAIL"); - } - Err += std::abs(Err1) + std::abs(Err2) + std::abs(Err3) + std::abs(Err4); - // Now retrieve full data - OMEGA::ArrayHost2DI4 NewI4H = - OMEGA::IOField::getData("FieldI4H"); - OMEGA::ArrayHost2DR8 NewR8H = - OMEGA::IOField::getData("FieldR8H"); - OMEGA::Array2DI4 NewI4D = - OMEGA::IOField::getData("FieldI4D"); - OMEGA::Array2DR8 NewR8D = - OMEGA::IOField::getData("FieldR8D"); + bool FieldExistsJunk = OMEGA::IOField::isDefined("FieldJunk"); + if (!FieldExistsJunk) { + LOG_INFO("IOFieldTest: non-existence test PASS"); + } else { + LOG_ERROR("IOFieldTest: non-existence test FAIL"); + } - Err1 = 0; - Err2 = 0; + // Test retrieval of data and metadata + // Retrieve metadata first + std::shared_ptr MetaI4D = + OMEGA::IOField::getMetaData("FieldI4D"); + std::shared_ptr MetaR8D = + OMEGA::IOField::getMetaData("FieldR8D"); + std::shared_ptr MetaI4H = + OMEGA::IOField::getMetaData("FieldI4H"); + std::shared_ptr MetaR8H = + OMEGA::IOField::getMetaData("FieldR8H"); + + std::string NewIUnits; + std::string NewRUnits; + + Err1 = MetaI4H->getEntry("Units", NewIUnits); + if (Err1 == 0 && NewIUnits == RefIUnits) { + LOG_INFO("IOField: Retrieve I4H metadata by name: PASS"); + } else { + LOG_ERROR("IOField: Retrieve I4H metadata by name: FAIL"); + } - for (int Cell = 0; Cell < NCellsSize; ++Cell) { - for (int k = 0; k < NVertLevels; ++k) { - if (NewI4H(Cell, k) != RefI4H(Cell, k)) - ++Err1; - if (NewR8H(Cell, k) != RefR8H(Cell, k)) - ++Err2; + Err2 = MetaR8H->getEntry("Units", NewRUnits); + if (Err2 == 0 && NewRUnits == RefRUnits) { + LOG_INFO("IOField: Retrieve R8H metadata by name: PASS"); + } else { + LOG_ERROR("IOField: Retrieve R8H metadata by name: FAIL"); } - } - if (Err1 == 0) { - LOG_INFO("IOField: Retrieve I4 host data by name: PASS"); - } else { - LOG_ERROR("IOField: Retrieve I4 host data by name: FAIL"); - } - if (Err2 == 0) { - LOG_INFO("IOField: Retrieve R8 host data by name: PASS"); - } else { - LOG_ERROR("IOField: Retrieve R8 host data by name: FAIL"); - } - OMEGA::Array2DI4 ErrArray("ErrorArray", NCellsSize, NVertLevels); + Err3 = MetaI4D->getEntry("Units", NewIUnits); + if (Err3 == 0 && NewIUnits == RefIUnits) { + LOG_INFO("IOField: Retrieve I4D metadata by name: PASS"); + } else { + LOG_ERROR("IOField: Retrieve I4D metadata by name: FAIL"); + } - yakl::c::parallel_for( - yakl::c::Bounds<2>(NCellsSize, NVertLevels), - YAKL_LAMBDA(int Cell, int k) { - if ((NewI4D(Cell, k) != RefI4D(Cell, k)) or - (NewR8D(Cell, k) != RefR8D(Cell, k))) - ErrArray(Cell, k) += 1; - }); - Err3 = yakl::intrinsics::sum(ErrArray); + Err4 = MetaR8D->getEntry("Units", NewRUnits); + if (Err4 == 0 && NewRUnits == RefRUnits) { + LOG_INFO("IOField: Retrieve R8D metadata by name: PASS"); + } else { + LOG_ERROR("IOField: Retrieve R8D metadata by name: FAIL"); + } + Err += std::abs(Err1) + std::abs(Err2) + std::abs(Err3) + std::abs(Err4); + + // Now retrieve full data + OMEGA::HostArray2DI4 NewI4H = + OMEGA::IOField::getData("FieldI4H"); + OMEGA::HostArray2DR8 NewR8H = + OMEGA::IOField::getData("FieldR8H"); + OMEGA::Array2DI4 NewI4D = + OMEGA::IOField::getData("FieldI4D"); + OMEGA::Array2DR8 NewR8D = + OMEGA::IOField::getData("FieldR8D"); + + Err1 = 0; + Err2 = 0; + + for (int Cell = 0; Cell < NCellsSize; ++Cell) { + for (int k = 0; k < NVertLevels; ++k) { + if (NewI4H(Cell, k) != RefI4H(Cell, k)) + ++Err1; + if (NewR8H(Cell, k) != RefR8H(Cell, k)) + ++Err2; + } + } + if (Err1 == 0) { + LOG_INFO("IOField: Retrieve I4 host data by name: PASS"); + } else { + LOG_ERROR("IOField: Retrieve I4 host data by name: FAIL"); + } + if (Err2 == 0) { + LOG_INFO("IOField: Retrieve R8 host data by name: PASS"); + } else { + LOG_ERROR("IOField: Retrieve R8 host data by name: FAIL"); + } - if (Err3 == 0) { - LOG_INFO("IOField: Retrieve device data by name: PASS"); - } else { - LOG_ERROR("IOField: Retrieve device data by name: FAIL"); - } + OMEGA::Array2DI4 ErrArray("ErrorArray", NCellsSize, NVertLevels); + OMEGA::parallelFor( + {NCellsSize, NVertLevels}, KOKKOS_LAMBDA(int Cell, int k) { + if ((NewI4D(Cell, k) != RefI4D(Cell, k)) or + (NewR8D(Cell, k) != RefR8D(Cell, k))) + ErrArray(Cell, k) += 1; + }); + + // Create a reducer + Kokkos::Sum reducer(Err3); + + // Perform the reduction + OMEGA::parallelReduce( + "SumReduce", {NCellsSize, NVertLevels}, + KOKKOS_LAMBDA(int Cell, int k, OMEGA::I4 &update) { + update += ErrArray(Cell, k); + }, + reducer); + + if (Err3 == 0) { + LOG_INFO("IOField: Retrieve device data by name: PASS"); + } else { + LOG_ERROR("IOField: Retrieve device data by name: FAIL"); + } - Err += std::abs(Err1) + std::abs(Err2) + std::abs(Err3); + Err += std::abs(Err1) + std::abs(Err2) + std::abs(Err3); - // Erase a field and check for non-existence - OMEGA::IOField::erase("FieldI4D"); - FieldExistsI4D = OMEGA::IOField::isDefined("FieldI4D"); - if (!FieldExistsI4D) { - LOG_INFO("IOFieldTest: erase field PASS"); - } else { - LOG_ERROR("IOFieldTest: erase field FAIL"); - ++Err; - } + // Erase a field and check for non-existence + OMEGA::IOField::erase("FieldI4D"); + FieldExistsI4D = OMEGA::IOField::isDefined("FieldI4D"); + if (!FieldExistsI4D) { + LOG_INFO("IOFieldTest: erase field PASS"); + } else { + LOG_ERROR("IOFieldTest: erase field FAIL"); + ++Err; + } - // Clear all fields - OMEGA::IOField::clear(); - FieldExistsI4H = OMEGA::IOField::isDefined("FieldI4H"); - FieldExistsI4D = OMEGA::IOField::isDefined("FieldI4D"); - FieldExistsR8H = OMEGA::IOField::isDefined("FieldR8H"); - FieldExistsR8D = OMEGA::IOField::isDefined("FieldR8D"); - if (FieldExistsI4H or FieldExistsI4D or FieldExistsR8H or FieldExistsR8D) { - LOG_ERROR("IOFieldTest: clear all data FAIL"); - ++Err; - } else { - LOG_INFO("IOFieldTest: clear all data PASS"); + // Clear all fields + OMEGA::IOField::clear(); + FieldExistsI4H = OMEGA::IOField::isDefined("FieldI4H"); + FieldExistsI4D = OMEGA::IOField::isDefined("FieldI4D"); + FieldExistsR8H = OMEGA::IOField::isDefined("FieldR8H"); + FieldExistsR8D = OMEGA::IOField::isDefined("FieldR8D"); + if (FieldExistsI4H or FieldExistsI4D or FieldExistsR8H or + FieldExistsR8D) { + LOG_ERROR("IOFieldTest: clear all data FAIL"); + ++Err; + } else { + LOG_INFO("IOFieldTest: clear all data PASS"); + } } - - // Clean up - RefI4H.deallocate(); - RefI4D.deallocate(); - RefR8H.deallocate(); - RefR8D.deallocate(); - NewI4H.deallocate(); - NewI4D.deallocate(); - NewR8H.deallocate(); - NewR8D.deallocate(); - ErrArray.deallocate(); - - yakl::finalize(); + Kokkos::finalize(); MPI_Finalize(); // End of testing diff --git a/components/omega/test/infra/LoggingTest.cpp b/components/omega/test/infra/LoggingTest.cpp index df7d6b4e0096..0312063d99e8 100644 --- a/components/omega/test/infra/LoggingTest.cpp +++ b/components/omega/test/infra/LoggingTest.cpp @@ -5,14 +5,14 @@ /// /// This driver tests the logging capabilities for the OMEGA /// model. In particular, it tests creating a log file according to -/// log levels and supporting YAKL data types. +/// log levels and supporting Kokkos data types. /// // //===-----------------------------------------------------------------------===/ #include -#include "DataTypes.h" +// #include "DataTypes.h" #include "Logging.h" #include "spdlog/sinks/basic_file_sink.h" @@ -94,30 +94,30 @@ int testDefaultLogLevel() { return RetVal; } -int testYaklDataTypes() { +int testKokkosDataTypes() { int RetVal = 0; int constexpr d1 = 2; int constexpr d2 = 3; - yakl::init(); + Kokkos::initialize(); { - ArrayHost1DReal test1d("test1d", d1); - ArrayHost2DReal test2d("test2d", d1, d2); + HostArray1DReal test1d("test1d", d1); + HostArray2DReal test2d("test2d", d1, d2); LOG_INFO("1d var {}", test1d); - // check if ArrayHost1DReal is detected + // check if HostArray1DReal is detected RetVal -= - outputTestResult("Yakl data type 1", "ArrayHost1DReal", Contains); + outputTestResult("Kokkos data type 1", "HostArray1DReal", Contains); LOG_INFO("2d var {}", test2d); - // check if ArrayHost2DReal is detected + // check if HostArray2DReal is detected RetVal -= - outputTestResult("Yakl data type 2", "ArrayHost2DReal", Contains); + outputTestResult("Kokkos data type 2", "HostArray2DReal", Contains); } - yakl::finalize(); + Kokkos::finalize(); return RetVal; } @@ -142,7 +142,7 @@ int main(int argc, char **argv) { initLogging(logger); RetVal -= testDefaultLogLevel(); - RetVal -= testYaklDataTypes(); + RetVal -= testKokkosDataTypes(); // std::remove(LogFilePath.c_str()); diff --git a/components/omega/test/infra/OmegaKokkosTest.cpp b/components/omega/test/infra/OmegaKokkosTest.cpp new file mode 100644 index 000000000000..5d1210fca3b7 --- /dev/null +++ b/components/omega/test/infra/OmegaKokkosTest.cpp @@ -0,0 +1,138 @@ +//===-- Test driver for OMEGA Kokkos -------------------------*- C++ -*-===/ +// +/// \file +/// \brief Test driver for OMEGA Kokkos +/// +/// This driver tests the OmegaKokkos capabilities. +/// The code is based on Kokkos Tutorial +/// +// +//===-----------------------------------------------------------------------===/ + +#include + +#include "OmegaKokkos.h" + +using namespace OMEGA; + +int main(int argc, char **argv) { + + int RetVal = 0; + int N = 4096; + int M = 1024; + int S = 4194304; + int nrepeat = 100; + + try { + + Kokkos::initialize(argc, argv); + { + + // Allocate y, x vectors and Matrix A on device. + Array1DR8 y("y", N); + Array1DR8 x("x", M); + Array2DR8 A("A", N, M); + + deepCopy(y, 1); + deepCopy(x, 1); + deepCopy(A, 1); + /* + #ifdef OMEGA_TARGET_DEVICE + // Create host mirrors of device views. + Array1DR8::HostMirror h_y = createHostMirror( d_y ); + Array1DR8::HostMirror h_x = createHostMirror( d_x ); + Array2DR8::HostMirror h_A = createHostMirror( d_A ); + #endif + + // Initialize y vector on host. + for ( int i = 0; i < N; ++i ) { + y( i ) = 1; + } + + // Initialize x vector on host. + for ( int i = 0; i < M; ++i ) { + x( i ) = 1; + } + + // Initialize A matrix on host. + for ( int j = 0; j < N; ++j ) { + for ( int i = 0; i < M; ++i ) { + A( j, i ) = 1; + } + } + + #ifdef OMEGA_TARGET_DEVICE + // Deep copy host views to device views. + Kokkos::deep_copy( d_y, y ); + Kokkos::deep_copy( d_x, x ); + Kokkos::deep_copy( d_A, A ); + #endif + */ + + // Timer products. + Kokkos::Timer timer; + + for (int repeat = 0; repeat < nrepeat; repeat++) { + + // Application: = y^T*A*x + double result = 0; + + parallelReduce( + "yAx", {N}, + KOKKOS_LAMBDA(int j, double &update) { + double temp2 = 0; + + for (int i = 0; i < M; ++i) { + temp2 += A(j, i) * x(i); + } + + update += y(j) * temp2; + }, + result); + + // Output result. + if (repeat == (nrepeat - 1)) { + std::cout << " Computed result for " << N << " x " << M + << " is " << result << std::endl; + } + + const double solution = (double)N * (double)M; + + if (result != solution) { + std::cout << " FAIL: result( " << result << " ) != solution( " + << solution << " )" << std::endl; + RetVal -= -1; + } + + // Calculate time. + double time = timer.seconds(); + + // Calculate bandwidth. + // Each matrix A row (each of length M) is read once. + // The x vector (of length M) is read N times. + // The y vector (of length N) is read once. + // double Gbytes = 1.0e-9 * double( sizeof(double) * ( 2 * M * N + N + // ) ); + double Gbytes = 1.0e-9 * double(sizeof(double) * (M + M * N + N)); + + // Print results (problem size, time and bandwidth in GB/s). + std::cout << " N( " << N << " ) M( " << M << " ) nrepeat ( " + << nrepeat << " ) problem( " << Gbytes * 1000 + << " MB ) time( " << time << " s ) bandwidth( " + << Gbytes * nrepeat / time << " GB/s )" << std::endl; + } + + std::cout << "OmegaKokkos test: PASS" << std::endl; + } + Kokkos::finalize(); + + } catch (const std::exception &Ex) { + std::cout << Ex.what() << ": FAIL" << std::endl; + RetVal -= -1; + } catch (...) { + std::cout << "Unknown: FAIL" << std::endl; + RetVal -= -1; + } + + return RetVal; +} diff --git a/components/omega/test/ocn/HorzMeshTest.cpp b/components/omega/test/ocn/HorzMeshTest.cpp index f603f48fa513..49a26857f241 100644 --- a/components/omega/test/ocn/HorzMeshTest.cpp +++ b/components/omega/test/ocn/HorzMeshTest.cpp @@ -15,6 +15,7 @@ #include "IO.h" #include "Logging.h" #include "MachEnv.h" +#include "OmegaKokkos.h" #include "mpi.h" #include @@ -125,607 +126,616 @@ int main(int argc, char *argv[]) { // Initialize the global MPI environment MPI_Init(&argc, &argv); - yakl::init(); - - OMEGA::R8 tol = 1e-6; - OMEGA::R8 pi = 4.0 * atan(1.0); - - // Call initialization routine to create the default decomposition - int Err = initHorzMeshTest(); - if (Err != 0) - LOG_CRITICAL("HorzMeshTest: Error initializing"); - - // Get MPI vars if needed - OMEGA::MachEnv *DefEnv = OMEGA::MachEnv::getDefaultEnv(); - MPI_Comm Comm = DefEnv->getComm(); - OMEGA::I4 MyTask = DefEnv->getMyTask(); - OMEGA::I4 NumTasks = DefEnv->getNumTasks(); - bool IsMaster = DefEnv->isMasterTask(); - - // Test retrieval of the default decomposition - OMEGA::Decomp *DefDecomp = OMEGA::Decomp::getDefault(); - if (DefDecomp) { // true if non-null ptr - LOG_INFO("HorzMeshTest: Default decomp retrieval PASS"); - } else { - LOG_INFO("HorzMeshTest: Default decomp retrieval FAIL"); - } + Kokkos::initialize(); + { + + OMEGA::R8 tol = 1e-6; + OMEGA::R8 pi = 4.0 * atan(1.0); + + // Call initialization routine to create the default decomposition + int Err = initHorzMeshTest(); + if (Err != 0) + LOG_CRITICAL("HorzMeshTest: Error initializing"); + + // Get MPI vars if needed + OMEGA::MachEnv *DefEnv = OMEGA::MachEnv::getDefaultEnv(); + MPI_Comm Comm = DefEnv->getComm(); + OMEGA::I4 MyTask = DefEnv->getMyTask(); + OMEGA::I4 NumTasks = DefEnv->getNumTasks(); + bool IsMaster = DefEnv->isMasterTask(); + + // Test retrieval of the default decomposition + OMEGA::Decomp *DefDecomp = OMEGA::Decomp::getDefault(); + if (DefDecomp) { // true if non-null ptr + LOG_INFO("HorzMeshTest: Default decomp retrieval PASS"); + } else { + LOG_INFO("HorzMeshTest: Default decomp retrieval FAIL"); + } - // Retrieve default mesh - OMEGA::HorzMesh *Mesh = OMEGA::HorzMesh::getDefault(); - - // Test sum of local mesh cells - // Get the global sum of all local cell counts - // Tests that the correct cell counts have been retrieved from the Decomp - // object - OMEGA::I4 SumCells; - OMEGA::I4 LocCells; - LocCells = Mesh->NCellsOwned; - Err = MPI_Allreduce(&LocCells, &SumCells, 1, MPI_INT32_T, MPI_SUM, Comm); - - if (SumCells == DefDecomp->NCellsGlobal) { - LOG_INFO("HorzMeshTest: Sum cell ID test PASS"); - } else { - LOG_INFO("HorzMeshTest: Sum cell ID test FAIL {} {}", SumCells, - DefDecomp->NCellsGlobal); - } + // Retrieve default mesh + OMEGA::HorzMesh *Mesh = OMEGA::HorzMesh::getDefault(); + + // Test sum of local mesh cells + // Get the global sum of all local cell counts + // Tests that the correct cell counts have been retrieved from the Decomp + // object + OMEGA::I4 SumCells; + OMEGA::I4 LocCells; + LocCells = Mesh->NCellsOwned; + Err = MPI_Allreduce(&LocCells, &SumCells, 1, MPI_INT32_T, MPI_SUM, Comm); + + if (SumCells == DefDecomp->NCellsGlobal) { + LOG_INFO("HorzMeshTest: Sum cell ID test PASS"); + } else { + LOG_INFO("HorzMeshTest: Sum cell ID test FAIL {} {}", SumCells, + DefDecomp->NCellsGlobal); + } - // Test that cell centers are on sphere - // Check that all cell centers are a uniform distance from the origin - // Tests that the Cartesian coordinates for cell centers have been read in - // corectly - OMEGA::R8 sphere_radius = - distance(Mesh->XCellH(0), Mesh->YCellH(0), Mesh->ZCellH(0)); - OMEGA::R8 dist; - OMEGA::I4 count = 0; - for (int Cell = 0; Cell < LocCells; Cell++) { - dist = - distance(Mesh->XCellH(Cell), Mesh->YCellH(Cell), Mesh->ZCellH(Cell)); - if (abs(sphere_radius - dist) > tol) - count++; - } + // Test that cell centers are on sphere + // Check that all cell centers are a uniform distance from the origin + // Tests that the Cartesian coordinates for cell centers have been read in + // corectly + OMEGA::R8 sphere_radius = + distance(Mesh->XCellH(0), Mesh->YCellH(0), Mesh->ZCellH(0)); + OMEGA::R8 dist; + OMEGA::I4 count = 0; + for (int Cell = 0; Cell < LocCells; Cell++) { + dist = distance(Mesh->XCellH(Cell), Mesh->YCellH(Cell), + Mesh->ZCellH(Cell)); + if (abs(sphere_radius - dist) > tol) + count++; + } - if (count > 0) { - LOG_INFO("HorzMeshTest: Cell sphere radius test FAIL"); - } else { - LOG_INFO("HorzMeshTest: Cell sphere radius test PASS"); - } + if (count > 0) { + LOG_INFO("HorzMeshTest: Cell sphere radius test FAIL"); + } else { + LOG_INFO("HorzMeshTest: Cell sphere radius test PASS"); + } - // Test lon/lat coordinates of cell centers - // Convert Cartesian coordinates to lon/lat and check these agree with the - // values that have been read in - // Tests that the lon/lat coordinates for cell - // centers have been read in correctly - OMEGA::R8 lon; - OMEGA::R8 lat; - count = 0; - for (int Cell = 0; Cell < LocCells; Cell++) { - - lon = computeLon(Mesh->XCellH(Cell), Mesh->YCellH(Cell), - Mesh->ZCellH(Cell)); - lat = computeLat(Mesh->XCellH(Cell), Mesh->YCellH(Cell), - Mesh->ZCellH(Cell)); - - if (abs(lon - Mesh->LonCellH(Cell)) > tol) - count++; - if (abs(lat - Mesh->LatCellH(Cell)) > tol) - count++; - } + // Test lon/lat coordinates of cell centers + // Convert Cartesian coordinates to lon/lat and check these agree with the + // values that have been read in + // Tests that the lon/lat coordinates for cell + // centers have been read in correctly + OMEGA::R8 lon; + OMEGA::R8 lat; + count = 0; + for (int Cell = 0; Cell < LocCells; Cell++) { + + lon = computeLon(Mesh->XCellH(Cell), Mesh->YCellH(Cell), + Mesh->ZCellH(Cell)); + lat = computeLat(Mesh->XCellH(Cell), Mesh->YCellH(Cell), + Mesh->ZCellH(Cell)); + + if (abs(lon - Mesh->LonCellH(Cell)) > tol) + count++; + if (abs(lat - Mesh->LatCellH(Cell)) > tol) + count++; + } - if (count > 0) { - LOG_INFO("HorzMeshTest: Cell lon/lat test FAIL"); - } else { - LOG_INFO("HorzMeshTest: Cell lon/lat test PASS"); - } + if (count > 0) { + LOG_INFO("HorzMeshTest: Cell lon/lat test FAIL"); + } else { + LOG_INFO("HorzMeshTest: Cell lon/lat test PASS"); + } - // Test sum of local mesh edges - // Get the global sum of all local edge counts - // Tests that the correct edge counts have been retrieved from the Decomp - // object - OMEGA::I4 SumEdges; - OMEGA::I4 LocEdges; - LocEdges = Mesh->NEdgesOwned; - Err = MPI_Allreduce(&LocEdges, &SumEdges, 1, MPI_INT32_T, MPI_SUM, Comm); - - if (SumEdges == DefDecomp->NEdgesGlobal) { - LOG_INFO("HorzMeshTest: Sum edge ID test PASS"); - } else { - LOG_INFO("HorzMeshTest: Sum edge ID test FAIL {} {}", SumEdges, - DefDecomp->NEdgesGlobal); - } + // Test sum of local mesh edges + // Get the global sum of all local edge counts + // Tests that the correct edge counts have been retrieved from the Decomp + // object + OMEGA::I4 SumEdges; + OMEGA::I4 LocEdges; + LocEdges = Mesh->NEdgesOwned; + Err = MPI_Allreduce(&LocEdges, &SumEdges, 1, MPI_INT32_T, MPI_SUM, Comm); + + if (SumEdges == DefDecomp->NEdgesGlobal) { + LOG_INFO("HorzMeshTest: Sum edge ID test PASS"); + } else { + LOG_INFO("HorzMeshTest: Sum edge ID test FAIL {} {}", SumEdges, + DefDecomp->NEdgesGlobal); + } - // Test that edge coordinates are on sphere - // Check that all edge centers are a uniform distance from the origin - // Tests that the Cartesian coordinates for edge centers have been read in - // correctly - sphere_radius = distance(Mesh->XEdgeH(0), Mesh->YEdgeH(0), Mesh->ZEdgeH(0)); - count = 0; - for (int Edge = 0; Edge < LocEdges; Edge++) { - dist = - distance(Mesh->XEdgeH(Edge), Mesh->YEdgeH(Edge), Mesh->ZEdgeH(Edge)); - if (abs(sphere_radius - dist) > tol) - count++; - } + // Test that edge coordinates are on sphere + // Check that all edge centers are a uniform distance from the origin + // Tests that the Cartesian coordinates for edge centers have been read in + // correctly + sphere_radius = + distance(Mesh->XEdgeH(0), Mesh->YEdgeH(0), Mesh->ZEdgeH(0)); + count = 0; + for (int Edge = 0; Edge < LocEdges; Edge++) { + dist = distance(Mesh->XEdgeH(Edge), Mesh->YEdgeH(Edge), + Mesh->ZEdgeH(Edge)); + if (abs(sphere_radius - dist) > tol) + count++; + } - if (count > 0) { - LOG_INFO("HorzMeshTest: Edge sphere radius test FAIL"); - } else { - LOG_INFO("HorzMeshTest: Edge sphere radius test PASS"); - } + if (count > 0) { + LOG_INFO("HorzMeshTest: Edge sphere radius test FAIL"); + } else { + LOG_INFO("HorzMeshTest: Edge sphere radius test PASS"); + } - // Test lon/lat coordinates of edge centers - // Convert Cartesian coordinates to lon/lat and check these agree with the - // values that have been read in - // Tests that the lon/lat coordinates for edge centers have been read in - // correctly - count = 0; - for (int Edge = 0; Edge < LocEdges; Edge++) { - - lon = computeLon(Mesh->XEdgeH(Edge), Mesh->YEdgeH(Edge), - Mesh->ZEdgeH(Edge)); - lat = computeLat(Mesh->XEdgeH(Edge), Mesh->YEdgeH(Edge), - Mesh->ZEdgeH(Edge)); - - if (abs(lon - Mesh->LonEdgeH(Edge)) > tol) - count++; - if (abs(lat - Mesh->LatEdgeH(Edge)) > tol) - count++; - } + // Test lon/lat coordinates of edge centers + // Convert Cartesian coordinates to lon/lat and check these agree with the + // values that have been read in + // Tests that the lon/lat coordinates for edge centers have been read in + // correctly + count = 0; + for (int Edge = 0; Edge < LocEdges; Edge++) { - if (count > 0) { - LOG_INFO("HorzMeshTest: Edge lon/lat test FAIL"); - } else { - LOG_INFO("HorzMeshTest: Edge lon/lat test PASS"); - } + lon = computeLon(Mesh->XEdgeH(Edge), Mesh->YEdgeH(Edge), + Mesh->ZEdgeH(Edge)); + lat = computeLat(Mesh->XEdgeH(Edge), Mesh->YEdgeH(Edge), + Mesh->ZEdgeH(Edge)); - // Test sum of local mesh vertices - // Get the global sum of all local vertex counts - // Tests that the correct vertex counts have been retrieved from the Decomp - // object - OMEGA::I4 SumVertices; - OMEGA::I4 LocVertices; - LocVertices = Mesh->NVerticesOwned; - Err = - MPI_Allreduce(&LocVertices, &SumVertices, 1, MPI_INT32_T, MPI_SUM, Comm); - - if (SumVertices == DefDecomp->NVerticesGlobal) { - LOG_INFO("HorzMeshTest: Sum vertex ID test PASS"); - } else { - LOG_INFO("HorzMeshTest: Sum vertex ID test FAIL {} {}", SumVertices, - DefDecomp->NVerticesGlobal); - } + if (abs(lon - Mesh->LonEdgeH(Edge)) > tol) + count++; + if (abs(lat - Mesh->LatEdgeH(Edge)) > tol) + count++; + } - // Test that vertex coordinates are on sphere - // Check that all vertices are a uniform distance from the origin - // Tests that the Cartesian coordinates for vertices have been read in - // correctly - sphere_radius = - distance(Mesh->XVertexH(0), Mesh->YVertexH(0), Mesh->ZVertexH(0)); - count = 0; - for (int Vertex = 0; Vertex < LocVertices; Vertex++) { - dist = distance(Mesh->XVertexH(Vertex), Mesh->YVertexH(Vertex), - Mesh->ZVertexH(Vertex)); - if (abs(sphere_radius - dist) > tol) - count++; - } + if (count > 0) { + LOG_INFO("HorzMeshTest: Edge lon/lat test FAIL"); + } else { + LOG_INFO("HorzMeshTest: Edge lon/lat test PASS"); + } - if (count > 0) { - LOG_INFO("HorzMeshTest: Vertex sphere radius test FAIL"); - } else { - LOG_INFO("HorzMeshTest: Vertex sphere radius test PASS"); - } + // Test sum of local mesh vertices + // Get the global sum of all local vertex counts + // Tests that the correct vertex counts have been retrieved from the + // Decomp object + OMEGA::I4 SumVertices; + OMEGA::I4 LocVertices; + LocVertices = Mesh->NVerticesOwned; + Err = MPI_Allreduce(&LocVertices, &SumVertices, 1, MPI_INT32_T, MPI_SUM, + Comm); + + if (SumVertices == DefDecomp->NVerticesGlobal) { + LOG_INFO("HorzMeshTest: Sum vertex ID test PASS"); + } else { + LOG_INFO("HorzMeshTest: Sum vertex ID test FAIL {} {}", SumVertices, + DefDecomp->NVerticesGlobal); + } - // Test lon/lat coordinates of vertices - // Convert Cartesian coordinates to lon/lat and check these agree with the - // values that have been read in - // Tests that the lon/lat coordinates for vertices have been read in - // correctly - count = 0; - for (int Vertex = 0; Vertex < LocVertices; Vertex++) { + // Test that vertex coordinates are on sphere + // Check that all vertices are a uniform distance from the origin + // Tests that the Cartesian coordinates for vertices have been read in + // correctly + sphere_radius = + distance(Mesh->XVertexH(0), Mesh->YVertexH(0), Mesh->ZVertexH(0)); + count = 0; + for (int Vertex = 0; Vertex < LocVertices; Vertex++) { + dist = distance(Mesh->XVertexH(Vertex), Mesh->YVertexH(Vertex), + Mesh->ZVertexH(Vertex)); + if (abs(sphere_radius - dist) > tol) + count++; + } - lon = computeLon(Mesh->XVertexH(Vertex), Mesh->YVertexH(Vertex), - Mesh->ZVertexH(Vertex)); - lat = computeLat(Mesh->XVertexH(Vertex), Mesh->YVertexH(Vertex), - Mesh->ZVertexH(Vertex)); + if (count > 0) { + LOG_INFO("HorzMeshTest: Vertex sphere radius test FAIL"); + } else { + LOG_INFO("HorzMeshTest: Vertex sphere radius test PASS"); + } - if (abs(lon - Mesh->LonVertexH(Vertex)) > tol) - count++; + // Test lon/lat coordinates of vertices + // Convert Cartesian coordinates to lon/lat and check these agree with the + // values that have been read in + // Tests that the lon/lat coordinates for vertices have been read in + // correctly + count = 0; + for (int Vertex = 0; Vertex < LocVertices; Vertex++) { - if (abs(lat - Mesh->LatVertexH(Vertex)) > tol) - count++; - } + lon = computeLon(Mesh->XVertexH(Vertex), Mesh->YVertexH(Vertex), + Mesh->ZVertexH(Vertex)); + lat = computeLat(Mesh->XVertexH(Vertex), Mesh->YVertexH(Vertex), + Mesh->ZVertexH(Vertex)); - if (count > 0) { - LOG_INFO("HorzMeshTest: Vertex lon/lat test FAIL"); - } else { - LOG_INFO("HorzMeshTest: Vertex lon/lat test PASS"); - } + if (abs(lon - Mesh->LonVertexH(Vertex)) > tol) + count++; - // Test bounds of bathymetry - // Find minimum and maximum values of the bottom depth - // and compares to reasonable values - // Tests that the bottom depth has been read in correctly - OMEGA::R8 MaxBathy = -1e10; - OMEGA::R8 MinBathy = 1e10; - for (int Cell = 0; Cell < LocCells; Cell++) { - if (Mesh->BottomDepthH(Cell) < MinBathy) { - MinBathy = Mesh->BottomDepthH(Cell); + if (abs(lat - Mesh->LatVertexH(Vertex)) > tol) + count++; } - if (Mesh->BottomDepthH(Cell) > MaxBathy) { - MaxBathy = Mesh->BottomDepthH(Cell); + + if (count > 0) { + LOG_INFO("HorzMeshTest: Vertex lon/lat test FAIL"); + } else { + LOG_INFO("HorzMeshTest: Vertex lon/lat test PASS"); } - } - if ((MinBathy > 0) && (MaxBathy < 11000.0)) { - LOG_INFO("HorzMeshTest: Bathy min/max test PASS"); - } else { - LOG_INFO("HorzMeshTest: Bathy min/max test FAIL"); - } + // Test bounds of bathymetry + // Find minimum and maximum values of the bottom depth + // and compares to reasonable values + // Tests that the bottom depth has been read in correctly + OMEGA::R8 MaxBathy = -1e10; + OMEGA::R8 MinBathy = 1e10; + for (int Cell = 0; Cell < LocCells; Cell++) { + if (Mesh->BottomDepthH(Cell) < MinBathy) { + MinBathy = Mesh->BottomDepthH(Cell); + } + if (Mesh->BottomDepthH(Cell) > MaxBathy) { + MaxBathy = Mesh->BottomDepthH(Cell); + } + } - // Test cell areas - // Find the global sum of all the local cell areas - // and compares to reasonable value for Earth's ocean area - // Tests that cell areas have been read in correctly - OMEGA::R8 LocSumArea = 0; - OMEGA::R8 SumCellArea; - for (int Cell = 0; Cell < LocCells; Cell++) { - LocSumArea += Mesh->AreaCellH(Cell); - } - Err = MPI_Allreduce(&LocSumArea, &SumCellArea, 1, MPI_DOUBLE, MPI_SUM, Comm); + if ((MinBathy > 0) && (MaxBathy < 11000.0)) { + LOG_INFO("HorzMeshTest: Bathy min/max test PASS"); + } else { + LOG_INFO("HorzMeshTest: Bathy min/max test FAIL"); + } - OMEGA::R8 OceanArea = 3.61e14; - if (abs(SumCellArea - OceanArea) / OceanArea < 0.05) { - LOG_INFO("HorzMeshTest: Cell area test PASS"); - } else { - LOG_INFO("HorzMeshTest: Cell area test FAIL"); - } + // Test cell areas + // Find the global sum of all the local cell areas + // and compares to reasonable value for Earth's ocean area + // Tests that cell areas have been read in correctly + OMEGA::R8 LocSumArea = 0; + OMEGA::R8 SumCellArea; + for (int Cell = 0; Cell < LocCells; Cell++) { + LocSumArea += Mesh->AreaCellH(Cell); + } + Err = MPI_Allreduce(&LocSumArea, &SumCellArea, 1, MPI_DOUBLE, MPI_SUM, + Comm); + + OMEGA::R8 OceanArea = 3.61e14; + if (abs(SumCellArea - OceanArea) / OceanArea < 0.05) { + LOG_INFO("HorzMeshTest: Cell area test PASS"); + } else { + LOG_INFO("HorzMeshTest: Cell area test FAIL"); + } - // Test triangle areas - // Find the global sum of all the local triangle areas - // and compare to resonable value for the Earth's ocean area - // Tests that the triangle areas have been read in correctly - LocSumArea = 0; - OMEGA::R8 SumTriangleArea; - for (int Vertex = 0; Vertex < LocVertices; Vertex++) { - LocSumArea += Mesh->AreaTriangleH(Vertex); - } - Err = MPI_Allreduce(&LocSumArea, &SumTriangleArea, 1, MPI_DOUBLE, MPI_SUM, - Comm); + // Test triangle areas + // Find the global sum of all the local triangle areas + // and compare to resonable value for the Earth's ocean area + // Tests that the triangle areas have been read in correctly + LocSumArea = 0; + OMEGA::R8 SumTriangleArea; + for (int Vertex = 0; Vertex < LocVertices; Vertex++) { + LocSumArea += Mesh->AreaTriangleH(Vertex); + } + Err = MPI_Allreduce(&LocSumArea, &SumTriangleArea, 1, MPI_DOUBLE, MPI_SUM, + Comm); - if (abs(SumTriangleArea - OceanArea) / OceanArea < 0.05) { - LOG_INFO("HorzMeshTest: Triangle area test PASS"); - } else { - LOG_INFO("HorzMeshTest: Triangle area test FAIL"); - } + if (abs(SumTriangleArea - OceanArea) / OceanArea < 0.05) { + LOG_INFO("HorzMeshTest: Triangle area test PASS"); + } else { + LOG_INFO("HorzMeshTest: Triangle area test FAIL"); + } - // Test kite areas - // Find the local sum of all the local kite areas - // and compare to reasonable value for the Earth's ocean area - // Tests that the kite areas have been read in correctly - LocSumArea = 0; - OMEGA::R8 SumKiteArea; - for (int Vertex = 0; Vertex < LocVertices; Vertex++) { - for (int i = 0; i < Mesh->VertexDegree; i++) { - LocSumArea += Mesh->KiteAreasOnVertexH(Vertex, i); + // Test kite areas + // Find the local sum of all the local kite areas + // and compare to reasonable value for the Earth's ocean area + // Tests that the kite areas have been read in correctly + LocSumArea = 0; + OMEGA::R8 SumKiteArea; + for (int Vertex = 0; Vertex < LocVertices; Vertex++) { + for (int i = 0; i < Mesh->VertexDegree; i++) { + LocSumArea += Mesh->KiteAreasOnVertexH(Vertex, i); + } } - } - Err = MPI_Allreduce(&LocSumArea, &SumKiteArea, 1, MPI_DOUBLE, MPI_SUM, Comm); + Err = MPI_Allreduce(&LocSumArea, &SumKiteArea, 1, MPI_DOUBLE, MPI_SUM, + Comm); - if (abs(SumKiteArea - OceanArea) / OceanArea < 0.05) { - LOG_INFO("HorzMeshTest: Kite area test PASS"); - } else { - LOG_INFO("HorzMeshTest: Kite area test FAIL"); - } + if (abs(SumKiteArea - OceanArea) / OceanArea < 0.05) { + LOG_INFO("HorzMeshTest: Kite area test PASS"); + } else { + LOG_INFO("HorzMeshTest: Kite area test FAIL"); + } - // Test dcEdge - // Compute spherical distance between cell centers and compare to value that - // was read in - // Tests that the distances between cell centers have been read in correctly - count = 0; - for (int Edge = 0; Edge < LocEdges; Edge++) { - int Cell1 = Mesh->CellsOnEdgeH(Edge, 0); - int Cell2 = Mesh->CellsOnEdgeH(Edge, 1); + // Test dcEdge + // Compute spherical distance between cell centers and compare to value + // that was read in Tests that the distances between cell centers have + // been read in correctly + count = 0; + for (int Edge = 0; Edge < LocEdges; Edge++) { + int Cell1 = Mesh->CellsOnEdgeH(Edge, 0); + int Cell2 = Mesh->CellsOnEdgeH(Edge, 1); - if ((Cell1 < DefDecomp->NCellsAll) && (Cell2 < DefDecomp->NCellsAll)) { + if ((Cell1 < DefDecomp->NCellsAll) && (Cell2 < DefDecomp->NCellsAll)) { - OMEGA::R8 dc = - sphereDistance(Mesh->LonCellH(Cell1), Mesh->LatCellH(Cell1), - Mesh->LonCellH(Cell2), Mesh->LatCellH(Cell2)); - dc = sphere_radius * dc; + OMEGA::R8 dc = + sphereDistance(Mesh->LonCellH(Cell1), Mesh->LatCellH(Cell1), + Mesh->LonCellH(Cell2), Mesh->LatCellH(Cell2)); + dc = sphere_radius * dc; - if (abs((dc - Mesh->DcEdgeH(Edge)) / Mesh->DcEdgeH(Edge)) > tol) { - count++; + if (abs((dc - Mesh->DcEdgeH(Edge)) / Mesh->DcEdgeH(Edge)) > tol) { + count++; + } } } - } - if (count == 0) { - LOG_INFO("HorzMeshTest: dcEdge test PASS"); - } else { - LOG_INFO("HorzMeshTest: dcEdge test FAIL"); - } + if (count == 0) { + LOG_INFO("HorzMeshTest: dcEdge test PASS"); + } else { + LOG_INFO("HorzMeshTest: dcEdge test FAIL"); + } - // Test dvEdge - // Compute spherical distance between vertices on edges and compare to value - // that was read in - // Tests that the distances between vertices have been read in correctly - count = 0; - for (int Edge = 0; Edge < LocEdges; Edge++) { - int Vertex1 = Mesh->VerticesOnEdgeH(Edge, 0); - int Vertex2 = Mesh->VerticesOnEdgeH(Edge, 1); + // Test dvEdge + // Compute spherical distance between vertices on edges and compare to + // value that was read in Tests that the distances between vertices have + // been read in correctly + count = 0; + for (int Edge = 0; Edge < LocEdges; Edge++) { + int Vertex1 = Mesh->VerticesOnEdgeH(Edge, 0); + int Vertex2 = Mesh->VerticesOnEdgeH(Edge, 1); - if ((Vertex1 < DefDecomp->NVerticesAll) && - (Vertex2 < DefDecomp->NVerticesAll)) { + if ((Vertex1 < DefDecomp->NVerticesAll) && + (Vertex2 < DefDecomp->NVerticesAll)) { - OMEGA::R8 dv = sphereDistance( - Mesh->LonVertexH(Vertex1), Mesh->LatVertexH(Vertex1), - Mesh->LonVertexH(Vertex2), Mesh->LatVertexH(Vertex2)); + OMEGA::R8 dv = sphereDistance( + Mesh->LonVertexH(Vertex1), Mesh->LatVertexH(Vertex1), + Mesh->LonVertexH(Vertex2), Mesh->LatVertexH(Vertex2)); - dv = sphere_radius * dv; + dv = sphere_radius * dv; - if (abs((dv - Mesh->DvEdgeH(Edge)) / Mesh->DvEdgeH(Edge)) > tol) { - count++; + if (abs((dv - Mesh->DvEdgeH(Edge)) / Mesh->DvEdgeH(Edge)) > tol) { + count++; + } } } - } - if (count == 0) { - LOG_INFO("HorzMeshTest: dvEdge test PASS"); - } else { - LOG_INFO("HorzMeshTest: dvEdge test FAIL"); - } + if (count == 0) { + LOG_INFO("HorzMeshTest: dvEdge test PASS"); + } else { + LOG_INFO("HorzMeshTest: dvEdge test FAIL"); + } - // Test angleEdge - // Check that the range of edge angles is between (-pi, pi) - // Tests that the edge angles have been read in correctly - count = 0; - for (int Edge = 0; Edge < LocEdges; Edge++) { - if (abs(Mesh->AngleEdgeH(Edge)) > pi) { - count++; + // Test angleEdge + // Check that the range of edge angles is between (-pi, pi) + // Tests that the edge angles have been read in correctly + count = 0; + for (int Edge = 0; Edge < LocEdges; Edge++) { + if (abs(Mesh->AngleEdgeH(Edge)) > pi) { + count++; + } } - } - if (count == 0) { - LOG_INFO("HorzMeshTest: angleEdge test PASS"); - } else { - LOG_INFO("HorzMeshTest: angleEdge test FAIL"); - } + if (count == 0) { + LOG_INFO("HorzMeshTest: angleEdge test PASS"); + } else { + LOG_INFO("HorzMeshTest: angleEdge test FAIL"); + } - // Test fCell - // Compute the Coriolis parameter for cell centers and compare with values - // that were read in - // Tests that the cell Coriolis values were read in correctly - count = 0; - for (int Cell = 0; Cell < LocCells; Cell++) { - OMEGA::R8 f = coriolis(Mesh->LatCellH(Cell)); + // Test fCell + // Compute the Coriolis parameter for cell centers and compare with values + // that were read in + // Tests that the cell Coriolis values were read in correctly + count = 0; + for (int Cell = 0; Cell < LocCells; Cell++) { + OMEGA::R8 f = coriolis(Mesh->LatCellH(Cell)); - if (abs(f - Mesh->FCellH(Cell)) > tol) { - count++; + if (abs(f - Mesh->FCellH(Cell)) > tol) { + count++; + } } - } - if (count == 0) { - LOG_INFO("HorzMeshTest: fCell test PASS"); - } else { - LOG_INFO("HorzMeshTest: fCell test FAIL"); - } + if (count == 0) { + LOG_INFO("HorzMeshTest: fCell test PASS"); + } else { + LOG_INFO("HorzMeshTest: fCell test FAIL"); + } - // Test fVertex - // Compute the Coriolis parameter for vertices and compare with values that - // were read in - // Tests that the vertex Coriolis values were read in correctly - count = 0; - for (int Vertex = 0; Vertex < LocVertices; Vertex++) { + // Test fVertex + // Compute the Coriolis parameter for vertices and compare with values + // that were read in Tests that the vertex Coriolis values were read in + // correctly + count = 0; + for (int Vertex = 0; Vertex < LocVertices; Vertex++) { - OMEGA::R8 f = coriolis(Mesh->LatVertexH(Vertex)); + OMEGA::R8 f = coriolis(Mesh->LatVertexH(Vertex)); - if (abs(f - Mesh->FVertexH(Vertex)) > tol) { - count++; + if (abs(f - Mesh->FVertexH(Vertex)) > tol) { + count++; + } } - } - if (count == 0) { - LOG_INFO("HorzMeshTest: fVertex test PASS"); - } else { - LOG_INFO("HorzMeshTest: fVertex test FAIL"); - } + if (count == 0) { + LOG_INFO("HorzMeshTest: fVertex test PASS"); + } else { + LOG_INFO("HorzMeshTest: fVertex test FAIL"); + } - // Test fEdge - // Compute the Coriolis parameter for edges and compare with values that - // were read in - // Tests that the edge Coriolis values were read in correctly - count = 0; - for (int Edge = 0; Edge < LocEdges; Edge++) { - OMEGA::R8 f = coriolis(Mesh->LatEdgeH(Edge)); + // Test fEdge + // Compute the Coriolis parameter for edges and compare with values that + // were read in + // Tests that the edge Coriolis values were read in correctly + count = 0; + for (int Edge = 0; Edge < LocEdges; Edge++) { + OMEGA::R8 f = coriolis(Mesh->LatEdgeH(Edge)); - if (abs(f - Mesh->FEdgeH(Edge)) > tol) { - count++; + if (abs(f - Mesh->FEdgeH(Edge)) > tol) { + count++; + } } - } - if (count == 0) { - LOG_INFO("HorzMeshTest: fEdge test PASS"); - } else { - LOG_INFO("HorzMeshTest: fEdge test FAIL"); - } + if (count == 0) { + LOG_INFO("HorzMeshTest: fEdge test PASS"); + } else { + LOG_INFO("HorzMeshTest: fEdge test FAIL"); + } - // Test weightsOnEdge - // Check the range of the edge weights - // Tests that the edge weights were read in correctly - count = 0; - for (int Edge = 0; Edge < LocEdges; Edge++) { - for (int i = 0; i < Mesh->MaxEdges2; i++) { - if (abs(Mesh->WeightsOnEdgeH(Edge, i)) > 1.0) { - count++; + // Test weightsOnEdge + // Check the range of the edge weights + // Tests that the edge weights were read in correctly + count = 0; + for (int Edge = 0; Edge < LocEdges; Edge++) { + for (int i = 0; i < Mesh->MaxEdges2; i++) { + if (abs(Mesh->WeightsOnEdgeH(Edge, i)) > 1.0) { + count++; + } } } - } - if (count == 0) { - LOG_INFO("HorzMeshTest: weightsOnEdge test PASS"); - } else { - LOG_INFO("HorzMeshTest: weightsOnEdge test FAIL"); - } + if (count == 0) { + LOG_INFO("HorzMeshTest: weightsOnEdge test PASS"); + } else { + LOG_INFO("HorzMeshTest: weightsOnEdge test FAIL"); + } + // Test edgeSignOnCell + // Check that the sign corresponds with convention + // Tests that the edge sign values were calculated correctly + count = 0; + for (int Edge = 0; Edge < LocEdges; Edge++) { + int Cell0 = Mesh->CellsOnEdgeH(Edge, 0); + int iEdge0; + for (int i = 0; i < Mesh->NEdgesOnCellH(Cell0); i++) { + if (Mesh->EdgesOnCellH(Cell0, i) == Edge) { + iEdge0 = i; + break; + } + } + if (abs(Mesh->EdgeSignOnCellH(Cell0, iEdge0) + 1.0) > tol) { + count++; + } - // Test edgeSignOnCell - // Check that the sign corresponds with convention - // Tests that the edge sign values were calculated correctly - count = 0; - for (int Edge = 0; Edge < LocEdges; Edge++) { - int Cell0 = Mesh->CellsOnEdgeH(Edge, 0); - int iEdge0; - for (int i = 0; i < Mesh->NEdgesOnCellH(Cell0); i++) { - if (Mesh->EdgesOnCellH(Cell0, i) == Edge) { - iEdge0 = i; - break; + int Cell1 = Mesh->CellsOnEdgeH(Edge, 1); + if (Cell1 < DefDecomp->NCellsAll) { + int iEdge1; + for (int i = 0; i < Mesh->NEdgesOnCellH(Cell1); i++) { + if (Mesh->EdgesOnCellH(Cell1, i) == Edge) { + iEdge1 = i; + break; + } + } + if (abs(Mesh->EdgeSignOnCellH(Cell1, iEdge1) - 1.0) > tol) { + count++; + } } } - if (abs(Mesh->EdgeSignOnCellH(Cell0, iEdge0) + 1.0) > tol) { - count++; + + if (count == 0) { + LOG_INFO("HorzMeshTest: edgeSignOnCell test PASS"); + } else { + LOG_INFO("HorzMeshTest: edgeSignOnCell test FAIL"); } - int Cell1 = Mesh->CellsOnEdgeH(Edge, 1); - if (Cell1 < DefDecomp->NCellsAll) { + // Test edgeSignOnVertex + // Check that the sign corresponds with convention + // Tests that the edge sign vlues were calculated correctly + count = 0; + for (int Edge = 0; Edge < LocEdges; Edge++) { + int Vertex0 = Mesh->VerticesOnEdgeH(Edge, 0); + int iEdge0; + for (int i = 0; i < Mesh->VertexDegree; i++) { + if (Mesh->EdgesOnVertexH(Vertex0, i) == Edge) { + iEdge0 = i; + break; + } + } + if (abs(Mesh->EdgeSignOnVertexH(Vertex0, iEdge0) + 1.0) > tol) { + count++; + } + + int Vertex1 = Mesh->VerticesOnEdgeH(Edge, 1); int iEdge1; - for (int i = 0; i < Mesh->NEdgesOnCellH(Cell1); i++) { - if (Mesh->EdgesOnCellH(Cell1, i) == Edge) { + for (int i = 0; i < Mesh->VertexDegree; i++) { + if (Mesh->EdgesOnVertexH(Vertex1, i) == Edge) { iEdge1 = i; break; } } - if (abs(Mesh->EdgeSignOnCellH(Cell1, iEdge1) - 1.0) > tol) { + if (abs(Mesh->EdgeSignOnVertexH(Vertex1, iEdge1) - 1.0) > tol) { count++; } } - } - - if (count == 0) { - LOG_INFO("HorzMeshTest: edgeSignOnCell test PASS"); - } else { - LOG_INFO("HorzMeshTest: edgeSignOnCell test FAIL"); - } - // Test edgeSignOnVertex - // Check that the sign corresponds with convention - // Tests that the edge sign vlues were calculated correctly - count = 0; - for (int Edge = 0; Edge < LocEdges; Edge++) { - int Vertex0 = Mesh->VerticesOnEdgeH(Edge, 0); - int iEdge0; - for (int i = 0; i < Mesh->VertexDegree; i++) { - if (Mesh->EdgesOnVertexH(Vertex0, i) == Edge) { - iEdge0 = i; - break; - } + if (count == 0) { + LOG_INFO("HorzMeshTest: edgeSignOnVertex test PASS"); + } else { + LOG_INFO("HorzMeshTest: edgeSignOnVertex test FAIL"); } - if (abs(Mesh->EdgeSignOnVertexH(Vertex0, iEdge0) + 1.0) > tol) { - count++; + + // Test cell halo values + // Perform halo exhange on owned cell only array and compare + // read values + // Tests that halo values are read in correctly + OMEGA::Halo MyHalo(DefEnv, DefDecomp); + OMEGA::HostArray1DR8 XCellTest("XCellTest", Mesh->NCellsSize); + // Mesh->XCellH.deep_copy_to(XCellTest); + OMEGA::deepCopy(XCellTest, Mesh->XCellH); + + for (int Cell = Mesh->NCellsOwned; Cell < Mesh->NCellsAll; Cell++) { + XCellTest(Cell) = 0.0; } + MyHalo.exchangeFullArrayHalo(XCellTest, OMEGA::OnCell); - int Vertex1 = Mesh->VerticesOnEdgeH(Edge, 1); - int iEdge1; - for (int i = 0; i < Mesh->VertexDegree; i++) { - if (Mesh->EdgesOnVertexH(Vertex1, i) == Edge) { - iEdge1 = i; + count = 0; + for (int Cell = 0; Cell < Mesh->NCellsAll; Cell++) { + if (Mesh->XCellH(Cell) != XCellTest(Cell)) { + count++; break; } } - if (abs(Mesh->EdgeSignOnVertex(Vertex1, iEdge1) - 1.0) > tol) { - count++; - } - } - if (count == 0) { - LOG_INFO("HorzMeshTest: edgeSignOnVertex test PASS"); - } else { - LOG_INFO("HorzMeshTest: edgeSignOnVertex test FAIL"); - } + if (count == 0) { + LOG_INFO("HorzMeshTest: cell halo exhange PASS"); + } else { + LOG_INFO("HorzMeshTest: cell halo exhange FAIL"); + } - // Test cell halo values - // Perform halo exhange on owned cell only array and compare - // read values - // Tests that halo values are read in correctly - OMEGA::Halo MyHalo(DefEnv, DefDecomp); - OMEGA::ArrayHost1DR8 XCellTest("XCellTest", Mesh->NCellsSize); - Mesh->XCellH.deep_copy_to(XCellTest); - for (int Cell = Mesh->NCellsOwned; Cell < Mesh->NCellsAll; Cell++) { - XCellTest(Cell) = 0.0; - } - MyHalo.exchangeFullArrayHalo(XCellTest, OMEGA::OnCell); + // Test edge halo values + // Perform halo exhange on owned edge only array and compare + // read values + // Tests that halo values are read in correctly + OMEGA::HostArray1DR8 XEdgeTest("XEdgeTest", Mesh->NEdgesSize); + // Mesh->XEdgeH.deep_copy_to(XEdgeTest); + OMEGA::deepCopy(XEdgeTest, Mesh->XEdgeH); - count = 0; - for (int Cell = 0; Cell < Mesh->NCellsAll; Cell++) { - if (Mesh->XCellH(Cell) != XCellTest(Cell)) { - count++; - break; + for (int Edge = Mesh->NEdgesOwned; Edge < Mesh->NEdgesAll; Edge++) { + XEdgeTest(Edge) = 0.0; } - } + MyHalo.exchangeFullArrayHalo(XEdgeTest, OMEGA::OnEdge); - if (count == 0) { - LOG_INFO("HorzMeshTest: cell halo exhange PASS"); - } else { - LOG_INFO("HorzMeshTest: cell halo exhange FAIL"); - } - - // Test edge halo values - // Perform halo exhange on owned edge only array and compare - // read values - // Tests that halo values are read in correctly - OMEGA::ArrayHost1DR8 XEdgeTest("XEdgeTest", Mesh->NEdgesSize); - Mesh->XEdgeH.deep_copy_to(XEdgeTest); - for (int Edge = Mesh->NEdgesOwned; Edge < Mesh->NEdgesAll; Edge++) { - XEdgeTest(Edge) = 0.0; - } - MyHalo.exchangeFullArrayHalo(XEdgeTest, OMEGA::OnEdge); + count = 0; + for (int Edge = 0; Edge < Mesh->NEdgesAll; Edge++) { + if (Mesh->XEdgeH(Edge) != XEdgeTest(Edge)) { + count++; + break; + } + } - count = 0; - for (int Edge = 0; Edge < Mesh->NEdgesAll; Edge++) { - if (Mesh->XEdgeH(Edge) != XEdgeTest(Edge)) { - count++; - break; + if (count == 0) { + LOG_INFO("HorzMeshTest: edge halo exhange PASS"); + } else { + LOG_INFO("HorzMeshTest: edge halo exhange FAIL"); } - } - if (count == 0) { - LOG_INFO("HorzMeshTest: edge halo exhange PASS"); - } else { - LOG_INFO("HorzMeshTest: edge halo exhange FAIL"); - } + // Test vertex halo values + // Perform halo exhange on owned vertex only array and compare + // read values + // Tests that halo values are read in correctly + OMEGA::HostArray1DR8 XVertexTest("XVertexTest", Mesh->NVerticesSize); + // Mesh->XVertexH.deep_copy_to(XVertexTest); + OMEGA::deepCopy(XVertexTest, Mesh->XVertexH); + + for (int Vertex = Mesh->NVerticesOwned; Vertex < Mesh->NVerticesAll; + Vertex++) { + XVertexTest(Vertex) = 0.0; + } + MyHalo.exchangeFullArrayHalo(XVertexTest, OMEGA::OnVertex); - // Test vertex halo values - // Perform halo exhange on owned vertex only array and compare - // read values - // Tests that halo values are read in correctly - OMEGA::ArrayHost1DR8 XVertexTest("XVertexTest", Mesh->NVerticesSize); - Mesh->XVertexH.deep_copy_to(XVertexTest); - for (int Vertex = Mesh->NVerticesOwned; Vertex < Mesh->NVerticesAll; - Vertex++) { - XVertexTest(Vertex) = 0.0; - } - MyHalo.exchangeFullArrayHalo(XVertexTest, OMEGA::OnVertex); + count = 0; + for (int Vertex = 0; Vertex < Mesh->NVerticesAll; Vertex++) { + if (Mesh->XVertexH(Vertex) != XVertexTest(Vertex)) { + count++; + break; + } + } - count = 0; - for (int Vertex = 0; Vertex < Mesh->NVerticesAll; Vertex++) { - if (Mesh->XVertexH(Vertex) != XVertexTest(Vertex)) { - count++; - break; + if (count == 0) { + LOG_INFO("HorzMeshTest: vertex halo exhange PASS"); + } else { + LOG_INFO("HorzMeshTest: vertex halo exhange FAIL"); } - } + // Finalize Omega objects + OMEGA::HorzMesh::clear(); + OMEGA::Decomp::clear(); + OMEGA::MachEnv::removeAll(); - if (count == 0) { - LOG_INFO("HorzMeshTest: vertex halo exhange PASS"); - } else { - LOG_INFO("HorzMeshTest: vertex halo exhange FAIL"); + // MPI_Status status; + if (Err == 0) + LOG_INFO("HorzMeshTest: Successful completion"); } - - // Finalize Omega objects - OMEGA::HorzMesh::clear(); - OMEGA::Decomp::clear(); - OMEGA::MachEnv::removeAll(); - - // MPI_Status status; - if (Err == 0) - LOG_INFO("HorzMeshTest: Successful completion"); - yakl::finalize(); + Kokkos::finalize(); MPI_Finalize(); } // end of main From 2c2ac1d6c3fb6d014a2f056fcbef119524844bfc Mon Sep 17 00:00:00 2001 From: Youngsung Kim Date: Mon, 1 Apr 2024 15:04:40 -0400 Subject: [PATCH 02/11] fix formatting issues --- components/omega/src/base/Decomp.cpp | 24 ++-- components/omega/src/base/Halo.cpp | 160 +++++++++++++-------------- components/omega/src/base/Halo.h | 4 +- 3 files changed, 94 insertions(+), 94 deletions(-) diff --git a/components/omega/src/base/Decomp.cpp b/components/omega/src/base/Decomp.cpp index 9aad17e27942..cb4724ecb443 100644 --- a/components/omega/src/base/Decomp.cpp +++ b/components/omega/src/base/Decomp.cpp @@ -630,26 +630,26 @@ Decomp::Decomp( // Create device copies of all arrays NCellsHalo = createDeviceCopy(NCellsHaloH); - CellID = createDeviceCopy(CellIDH); - CellLoc = createDeviceCopy(CellLocH); + CellID = createDeviceCopy(CellIDH); + CellLoc = createDeviceCopy(CellLocH); NEdgesHalo = createDeviceCopy(NEdgesHaloH); - EdgeID = createDeviceCopy(EdgeIDH); - EdgeLoc = createDeviceCopy(EdgeLocH); + EdgeID = createDeviceCopy(EdgeIDH); + EdgeLoc = createDeviceCopy(EdgeLocH); NVerticesHalo = createDeviceCopy(NVerticesHaloH); - VertexID = createDeviceCopy(VertexIDH); - VertexLoc = createDeviceCopy(VertexLocH); + VertexID = createDeviceCopy(VertexIDH); + VertexLoc = createDeviceCopy(VertexLocH); - CellsOnCell = createDeviceCopy(CellsOnCellH); - EdgesOnCell = createDeviceCopy(EdgesOnCellH); + CellsOnCell = createDeviceCopy(CellsOnCellH); + EdgesOnCell = createDeviceCopy(EdgesOnCellH); VerticesOnCell = createDeviceCopy(VerticesOnCellH); - NEdgesOnCell = createDeviceCopy(NEdgesOnCellH); + NEdgesOnCell = createDeviceCopy(NEdgesOnCellH); - CellsOnEdge = createDeviceCopy(CellsOnEdgeH); - EdgesOnEdge = createDeviceCopy(EdgesOnEdgeH); + CellsOnEdge = createDeviceCopy(CellsOnEdgeH); + EdgesOnEdge = createDeviceCopy(EdgesOnEdgeH); VerticesOnEdge = createDeviceCopy(VerticesOnEdgeH); - NEdgesOnEdge = createDeviceCopy(NEdgesOnEdgeH); + NEdgesOnEdge = createDeviceCopy(NEdgesOnEdgeH); CellsOnVertex = createDeviceCopy(CellsOnVertexH); EdgesOnVertex = createDeviceCopy(EdgesOnVertexH); diff --git a/components/omega/src/base/Halo.cpp b/components/omega/src/base/Halo.cpp index dc9a572907b0..2b2461959a75 100644 --- a/components/omega/src/base/Halo.cpp +++ b/components/omega/src/base/Halo.cpp @@ -511,7 +511,7 @@ int Halo::packBuffer(const HostArray1DR8 Array) { int Halo::packBuffer(const HostArray2DI4 Array) { ExchList *MyList = &MyNeighbor->SendLists[MyElem]; - int NJ = Array.extent(0); + int NJ = Array.extent(0); MyNeighbor->SendBuffer.resize(MyList->NTot * TotSize); @@ -531,7 +531,7 @@ int Halo::packBuffer(const HostArray2DI4 Array) { int Halo::packBuffer(const HostArray2DI8 Array) { ExchList *MyList = &MyNeighbor->SendLists[MyElem]; - int NJ = Array.extent(1); + int NJ = Array.extent(1); MyNeighbor->SendBuffer.resize(MyList->NTot * TotSize); @@ -551,7 +551,7 @@ int Halo::packBuffer(const HostArray2DI8 Array) { int Halo::packBuffer(const HostArray2DR4 Array) { ExchList *MyList = &MyNeighbor->SendLists[MyElem]; - int NJ = Array.extent(1); + int NJ = Array.extent(1); MyNeighbor->SendBuffer.resize(MyList->NTot * TotSize); @@ -571,7 +571,7 @@ int Halo::packBuffer(const HostArray2DR4 Array) { int Halo::packBuffer(const HostArray2DR8 Array) { ExchList *MyList = &MyNeighbor->SendLists[MyElem]; - int NJ = Array.extent(1); + int NJ = Array.extent(1); MyNeighbor->SendBuffer.resize(MyList->NTot * TotSize); @@ -591,8 +591,8 @@ int Halo::packBuffer(const HostArray2DR8 Array) { int Halo::packBuffer(const HostArray3DI4 Array) { ExchList *MyList = &MyNeighbor->SendLists[MyElem]; - int NK = Array.extent(0); - int NJ = Array.extent(2); + int NK = Array.extent(0); + int NJ = Array.extent(2); MyNeighbor->SendBuffer.resize(MyList->NTot * TotSize); @@ -616,8 +616,8 @@ int Halo::packBuffer(const HostArray3DI4 Array) { int Halo::packBuffer(const HostArray3DI8 Array) { ExchList *MyList = &MyNeighbor->SendLists[MyElem]; - int NK = Array.extent(0); - int NJ = Array.extent(2); + int NK = Array.extent(0); + int NJ = Array.extent(2); MyNeighbor->SendBuffer.resize(MyList->NTot * TotSize); @@ -641,8 +641,8 @@ int Halo::packBuffer(const HostArray3DI8 Array) { int Halo::packBuffer(const HostArray3DR4 Array) { ExchList *MyList = &MyNeighbor->SendLists[MyElem]; - int NK = Array.extent(0); - int NJ = Array.extent(2); + int NK = Array.extent(0); + int NJ = Array.extent(2); MyNeighbor->SendBuffer.resize(MyList->NTot * TotSize); @@ -666,8 +666,8 @@ int Halo::packBuffer(const HostArray3DR4 Array) { int Halo::packBuffer(const HostArray3DR8 Array) { ExchList *MyList = &MyNeighbor->SendLists[MyElem]; - int NK = Array.extent(0); - int NJ = Array.extent(2); + int NK = Array.extent(0); + int NJ = Array.extent(2); MyNeighbor->SendBuffer.resize(MyList->NTot * TotSize); @@ -691,9 +691,9 @@ int Halo::packBuffer(const HostArray3DR8 Array) { int Halo::packBuffer(const HostArray4DI4 Array) { ExchList *MyList = &MyNeighbor->SendLists[MyElem]; - int NL = Array.extent(0); - int NK = Array.extent(1); - int NJ = Array.extent(3); + int NL = Array.extent(0); + int NK = Array.extent(1); + int NJ = Array.extent(3); MyNeighbor->SendBuffer.resize(MyList->NTot * TotSize); @@ -720,9 +720,9 @@ int Halo::packBuffer(const HostArray4DI4 Array) { int Halo::packBuffer(const HostArray4DI8 Array) { ExchList *MyList = &MyNeighbor->SendLists[MyElem]; - int NL = Array.extent(0); - int NK = Array.extent(1); - int NJ = Array.extent(3); + int NL = Array.extent(0); + int NK = Array.extent(1); + int NJ = Array.extent(3); MyNeighbor->SendBuffer.resize(MyList->NTot * TotSize); @@ -749,9 +749,9 @@ int Halo::packBuffer(const HostArray4DI8 Array) { int Halo::packBuffer(const HostArray4DR4 Array) { ExchList *MyList = &MyNeighbor->SendLists[MyElem]; - int NL = Array.extent(0); - int NK = Array.extent(1); - int NJ = Array.extent(3); + int NL = Array.extent(0); + int NK = Array.extent(1); + int NJ = Array.extent(3); MyNeighbor->SendBuffer.resize(MyList->NTot * TotSize); @@ -778,9 +778,9 @@ int Halo::packBuffer(const HostArray4DR4 Array) { int Halo::packBuffer(const HostArray4DR8 Array) { ExchList *MyList = &MyNeighbor->SendLists[MyElem]; - int NL = Array.extent(0); - int NK = Array.extent(1); - int NJ = Array.extent(3); + int NL = Array.extent(0); + int NK = Array.extent(1); + int NJ = Array.extent(3); MyNeighbor->SendBuffer.resize(MyList->NTot * TotSize); @@ -807,10 +807,10 @@ int Halo::packBuffer(const HostArray4DR8 Array) { int Halo::packBuffer(const HostArray5DI4 Array) { ExchList *MyList = &MyNeighbor->SendLists[MyElem]; - int NM = Array.extent(0); - int NL = Array.extent(1); - int NK = Array.extent(2); - int NJ = Array.extent(4); + int NM = Array.extent(0); + int NL = Array.extent(1); + int NK = Array.extent(2); + int NJ = Array.extent(4); MyNeighbor->SendBuffer.resize(MyList->NTot * TotSize); @@ -839,10 +839,10 @@ int Halo::packBuffer(const HostArray5DI4 Array) { int Halo::packBuffer(const HostArray5DI8 Array) { ExchList *MyList = &MyNeighbor->SendLists[MyElem]; - int NM = Array.extent(0); - int NL = Array.extent(1); - int NK = Array.extent(2); - int NJ = Array.extent(4); + int NM = Array.extent(0); + int NL = Array.extent(1); + int NK = Array.extent(2); + int NJ = Array.extent(4); MyNeighbor->SendBuffer.resize(MyList->NTot * TotSize); @@ -871,10 +871,10 @@ int Halo::packBuffer(const HostArray5DI8 Array) { int Halo::packBuffer(const HostArray5DR4 Array) { ExchList *MyList = &MyNeighbor->SendLists[MyElem]; - int NM = Array.extent(0); - int NL = Array.extent(1); - int NK = Array.extent(2); - int NJ = Array.extent(4); + int NM = Array.extent(0); + int NL = Array.extent(1); + int NK = Array.extent(2); + int NJ = Array.extent(4); MyNeighbor->SendBuffer.resize(MyList->NTot * TotSize); @@ -903,10 +903,10 @@ int Halo::packBuffer(const HostArray5DR4 Array) { int Halo::packBuffer(const HostArray5DR8 Array) { ExchList *MyList = &MyNeighbor->SendLists[MyElem]; - int NM = Array.extent(0); - int NL = Array.extent(1); - int NK = Array.extent(2); - int NJ = Array.extent(4); + int NM = Array.extent(0); + int NL = Array.extent(1); + int NK = Array.extent(2); + int NJ = Array.extent(4); MyNeighbor->SendBuffer.resize(MyList->NTot * TotSize); @@ -1004,7 +1004,7 @@ int Halo::unpackBuffer(HostArray1DR8 &Array) { int Halo::unpackBuffer(HostArray2DI4 &Array) { ExchList *MyList = &MyNeighbor->RecvLists[MyElem]; - int NJ = Array.extent(1); + int NJ = Array.extent(1); for (int ILayer = 0; ILayer < NumLayers; ++ILayer) { for (int IExch = 0; IExch < MyList->NList[ILayer]; ++IExch) { @@ -1022,7 +1022,7 @@ int Halo::unpackBuffer(HostArray2DI4 &Array) { int Halo::unpackBuffer(HostArray2DI8 &Array) { ExchList *MyList = &MyNeighbor->RecvLists[MyElem]; - int NJ = Array.extent(1); + int NJ = Array.extent(1); for (int ILayer = 0; ILayer < NumLayers; ++ILayer) { for (int IExch = 0; IExch < MyList->NList[ILayer]; ++IExch) { @@ -1040,7 +1040,7 @@ int Halo::unpackBuffer(HostArray2DI8 &Array) { int Halo::unpackBuffer(HostArray2DR4 &Array) { ExchList *MyList = &MyNeighbor->RecvLists[MyElem]; - int NJ = Array.extent(1); + int NJ = Array.extent(1); for (int ILayer = 0; ILayer < NumLayers; ++ILayer) { for (int IExch = 0; IExch < MyList->NList[ILayer]; ++IExch) { @@ -1058,7 +1058,7 @@ int Halo::unpackBuffer(HostArray2DR4 &Array) { int Halo::unpackBuffer(HostArray2DR8 &Array) { ExchList *MyList = &MyNeighbor->RecvLists[MyElem]; - int NJ = Array.extent(1); + int NJ = Array.extent(1); for (int ILayer = 0; ILayer < NumLayers; ++ILayer) { for (int IExch = 0; IExch < MyList->NList[ILayer]; ++IExch) { @@ -1076,8 +1076,8 @@ int Halo::unpackBuffer(HostArray2DR8 &Array) { int Halo::unpackBuffer(HostArray3DI4 &Array) { ExchList *MyList = &MyNeighbor->RecvLists[MyElem]; - int NK = Array.extent(0); - int NJ = Array.extent(2); + int NK = Array.extent(0); + int NJ = Array.extent(2); for (int K = 0; K < NK; ++K) { for (int ILayer = 0; ILayer < NumLayers; ++ILayer) { @@ -1099,8 +1099,8 @@ int Halo::unpackBuffer(HostArray3DI4 &Array) { int Halo::unpackBuffer(HostArray3DI8 &Array) { ExchList *MyList = &MyNeighbor->RecvLists[MyElem]; - int NK = Array.extent(0); - int NJ = Array.extent(2); + int NK = Array.extent(0); + int NJ = Array.extent(2); for (int K = 0; K < NK; ++K) { for (int ILayer = 0; ILayer < NumLayers; ++ILayer) { @@ -1122,8 +1122,8 @@ int Halo::unpackBuffer(HostArray3DI8 &Array) { int Halo::unpackBuffer(HostArray3DR4 &Array) { ExchList *MyList = &MyNeighbor->RecvLists[MyElem]; - int NK = Array.extent(0); - int NJ = Array.extent(2); + int NK = Array.extent(0); + int NJ = Array.extent(2); for (int K = 0; K < NK; ++K) { for (int ILayer = 0; ILayer < NumLayers; ++ILayer) { @@ -1145,8 +1145,8 @@ int Halo::unpackBuffer(HostArray3DR4 &Array) { int Halo::unpackBuffer(HostArray3DR8 &Array) { ExchList *MyList = &MyNeighbor->RecvLists[MyElem]; - int NK = Array.extent(0); - int NJ = Array.extent(2); + int NK = Array.extent(0); + int NJ = Array.extent(2); for (int K = 0; K < NK; ++K) { for (int ILayer = 0; ILayer < NumLayers; ++ILayer) { @@ -1168,9 +1168,9 @@ int Halo::unpackBuffer(HostArray3DR8 &Array) { int Halo::unpackBuffer(HostArray4DI4 &Array) { ExchList *MyList = &MyNeighbor->RecvLists[MyElem]; - int NL = Array.extent(0); - int NK = Array.extent(1); - int NJ = Array.extent(3); + int NL = Array.extent(0); + int NK = Array.extent(1); + int NJ = Array.extent(3); for (int L = 0; L < NL; ++L) { for (int K = 0; K < NK; ++K) { @@ -1195,9 +1195,9 @@ int Halo::unpackBuffer(HostArray4DI4 &Array) { int Halo::unpackBuffer(HostArray4DI8 &Array) { ExchList *MyList = &MyNeighbor->RecvLists[MyElem]; - int NL = Array.extent(0); - int NK = Array.extent(1); - int NJ = Array.extent(3); + int NL = Array.extent(0); + int NK = Array.extent(1); + int NJ = Array.extent(3); for (int L = 0; L < NL; ++L) { for (int K = 0; K < NK; ++K) { @@ -1222,9 +1222,9 @@ int Halo::unpackBuffer(HostArray4DI8 &Array) { int Halo::unpackBuffer(HostArray4DR4 &Array) { ExchList *MyList = &MyNeighbor->RecvLists[MyElem]; - int NL = Array.extent(0); - int NK = Array.extent(1); - int NJ = Array.extent(3); + int NL = Array.extent(0); + int NK = Array.extent(1); + int NJ = Array.extent(3); for (int L = 0; L < NL; ++L) { for (int K = 0; K < NK; ++K) { @@ -1249,9 +1249,9 @@ int Halo::unpackBuffer(HostArray4DR4 &Array) { int Halo::unpackBuffer(HostArray4DR8 &Array) { ExchList *MyList = &MyNeighbor->RecvLists[MyElem]; - int NL = Array.extent(0); - int NK = Array.extent(1); - int NJ = Array.extent(3); + int NL = Array.extent(0); + int NK = Array.extent(1); + int NJ = Array.extent(3); for (int L = 0; L < NL; ++L) { for (int K = 0; K < NK; ++K) { @@ -1276,10 +1276,10 @@ int Halo::unpackBuffer(HostArray4DR8 &Array) { int Halo::unpackBuffer(HostArray5DI4 &Array) { ExchList *MyList = &MyNeighbor->RecvLists[MyElem]; - int NM = Array.extent(0); - int NL = Array.extent(1); - int NK = Array.extent(2); - int NJ = Array.extent(4); + int NM = Array.extent(0); + int NL = Array.extent(1); + int NK = Array.extent(2); + int NJ = Array.extent(4); for (int M = 0; M < NM; ++M) { for (int L = 0; L < NL; ++L) { @@ -1306,10 +1306,10 @@ int Halo::unpackBuffer(HostArray5DI4 &Array) { int Halo::unpackBuffer(HostArray5DI8 &Array) { ExchList *MyList = &MyNeighbor->RecvLists[MyElem]; - int NM = Array.extent(0); - int NL = Array.extent(1); - int NK = Array.extent(2); - int NJ = Array.extent(4); + int NM = Array.extent(0); + int NL = Array.extent(1); + int NK = Array.extent(2); + int NJ = Array.extent(4); for (int M = 0; M < NM; ++M) { for (int L = 0; L < NL; ++L) { @@ -1336,10 +1336,10 @@ int Halo::unpackBuffer(HostArray5DI8 &Array) { int Halo::unpackBuffer(HostArray5DR4 &Array) { ExchList *MyList = &MyNeighbor->RecvLists[MyElem]; - int NM = Array.extent(0); - int NL = Array.extent(1); - int NK = Array.extent(2); - int NJ = Array.extent(4); + int NM = Array.extent(0); + int NL = Array.extent(1); + int NK = Array.extent(2); + int NJ = Array.extent(4); for (int M = 0; M < NM; ++M) { for (int L = 0; L < NL; ++L) { @@ -1366,10 +1366,10 @@ int Halo::unpackBuffer(HostArray5DR4 &Array) { int Halo::unpackBuffer(HostArray5DR8 &Array) { ExchList *MyList = &MyNeighbor->RecvLists[MyElem]; - int NM = Array.extent(0); - int NL = Array.extent(1); - int NK = Array.extent(2); - int NJ = Array.extent(4); + int NM = Array.extent(0); + int NL = Array.extent(1); + int NK = Array.extent(2); + int NJ = Array.extent(4); for (int M = 0; M < NM; ++M) { for (int L = 0; L < NL; ++L) { diff --git a/components/omega/src/base/Halo.h b/components/omega/src/base/Halo.h index a2bf1068d839..7c77163c8bb6 100644 --- a/components/omega/src/base/Halo.h +++ b/components/omega/src/base/Halo.h @@ -226,8 +226,8 @@ class Halo { Halo(const MachEnv *InEnv, const Decomp *InDecomp); //--------------------------------------------------------------------------- - // Function template to perform a full halo exchange on the input Kokkos array - // of any supported type defined on the input index space ThisElem + // Function template to perform a full halo exchange on the input Kokkos + // array of any supported type defined on the input index space ThisElem template int exchangeFullArrayHalo(T &Array, // Kokkos array of any type From eb7712e15119c3807013ebe1945f77b39b904f53 Mon Sep 17 00:00:00 2001 From: Youngsung Kim Date: Mon, 1 Apr 2024 15:14:18 -0400 Subject: [PATCH 03/11] fix formatting issues --- components/omega/src/base/DataTypes.h | 2 +- components/omega/src/base/Decomp.cpp | 28 ++++++++++---------- components/omega/src/infra/LogFormatters.h | 8 +++--- components/omega/test/base/DataTypesTest.cpp | 15 ++++------- 4 files changed, 24 insertions(+), 29 deletions(-) diff --git a/components/omega/src/base/DataTypes.h b/components/omega/src/base/DataTypes.h index 4a9d62c6a179..28ed75f65043 100644 --- a/components/omega/src/base/DataTypes.h +++ b/components/omega/src/base/DataTypes.h @@ -30,7 +30,7 @@ using R8 = double; ///< alias for 64-bit (double prec) real #ifdef SINGLE_PRECISION using Real = float; #else -using Real = double; +using Real = double; #endif // user-defined literal for generic reals diff --git a/components/omega/src/base/Decomp.cpp b/components/omega/src/base/Decomp.cpp index cb4724ecb443..51c18b4ae995 100644 --- a/components/omega/src/base/Decomp.cpp +++ b/components/omega/src/base/Decomp.cpp @@ -216,7 +216,7 @@ int readMesh(const int MeshFileID, // file ID for open mesh file OnVertexOffset[Vrtx * VertexDegree + Cell] = VertexGlob * VertexDegree + Cell; } // end loop VertexDegree - } // end loop NVerticesLocal + } // end loop NVerticesLocal // Create the parallel IO decompositions IO::Rearranger Rearr = IO::RearrBox; @@ -765,7 +765,7 @@ int Decomp::partCellsKWay( for (int n = 0; n < CellsOnCellSize; ++n) { CellsOnCellBuf[n] = CellsOnCellInit[n]; } // end loop CellsOnCell - } // end if this is MyTask + } // end if this is MyTask Err = MPI_Bcast(&CellsOnCellBuf[0], CellsOnCellSize, MPI_INT32_T, Task, Comm); if (Err != 0) { @@ -794,8 +794,8 @@ int Decomp::partCellsKWay( ++Add; // increment address counter } } - } // end cell loop for buffer - } // end task loop + } // end cell loop for buffer + } // end task loop AdjAdd[NCellsGlobal] = Add; // Add the ending address // Set up remaining partitioning variables @@ -878,7 +878,7 @@ int Decomp::partCellsKWay( CellLocTmp[2 * LocalAdd] = TaskLoc; CellLocTmp[2 * LocalAdd + 1] = LocalAdd; } // end if my task - } // end loop over all cells + } // end loop over all cells // Find and add the halo cells to the cell list. Here we use the // adjacency array to find the active neighbor cells and store if they @@ -917,7 +917,7 @@ int Decomp::partCellsKWay( HaloList.insert(NbrID); CellsInList.insert(NbrID); } // end search for existing entry - } // end if not on task + } // end if not on task } // end loop over neighbors @@ -1148,8 +1148,8 @@ int Decomp::partEdges( ++HaloCount; EdgesAll.erase(EdgeGlob); } // end if valid edge - } // end loop over cell edges - } // end cell loop + } // end loop over cell edges + } // end cell loop // reset address range for next halo and set NEdgesHalo CellStart = CellEnd; if ((Halo + 1) < HaloWidth) @@ -1553,8 +1553,8 @@ int Decomp::rearrangeCellArrays( } NEdgesOnCellTmp(LocCell) = EdgeCount; } // end if local cell - } // end loop over chunk of global cells - } // end loop over MPI tasks + } // end loop over chunk of global cells + } // end loop over MPI tasks // Copy to final location on host - wait to create device copies until // the entries are translated to local addresses rather than global IDs @@ -1689,8 +1689,8 @@ int Decomp::rearrangeEdgeArrays( } NEdgesOnEdgeTmp(LocEdge) = EdgeCount; } // end if local cell - } // end loop over chunk of global cells - } // end loop over MPI tasks + } // end loop over chunk of global cells + } // end loop over MPI tasks // Copy to final location on host - wait to create device copies until // the entries are translated to local addresses rather than global IDs @@ -1798,8 +1798,8 @@ int Decomp::rearrangeVertexArrays( ++BufAdd; } } // end if local cell - } // end loop over chunk of global cells - } // end loop over MPI tasks + } // end loop over chunk of global cells + } // end loop over MPI tasks // Copy to final location on host - wait to create device copies until // the entries are translated to local addresses rather than global IDs diff --git a/components/omega/src/infra/LogFormatters.h b/components/omega/src/infra/LogFormatters.h index d1eed707388a..029f27a718b6 100644 --- a/components/omega/src/infra/LogFormatters.h +++ b/components/omega/src/infra/LogFormatters.h @@ -31,8 +31,8 @@ template <> struct fmt::formatter : fmt::formatter { - auto format(OMEGA::HostArray1DReal my, format_context &ctx) - -> decltype(ctx.out()) { + auto format(OMEGA::HostArray1DReal my, + format_context &ctx) -> decltype(ctx.out()) { #ifdef OMEGA_DEBUG return fmt::format_to( ctx.out(), "[data type of '{}' is HostArray1DReal.]", my.label()); @@ -44,8 +44,8 @@ struct fmt::formatter : fmt::formatter { template <> struct fmt::formatter : fmt::formatter { - auto format(OMEGA::HostArray2DReal my, format_context &ctx) - -> decltype(ctx.out()) { + auto format(OMEGA::HostArray2DReal my, + format_context &ctx) -> decltype(ctx.out()) { #ifdef OMEGA_DEBUG return fmt::format_to( ctx.out(), "[data type of '{}' is HostArray2DReal.]", my.label()); diff --git a/components/omega/test/base/DataTypesTest.cpp b/components/omega/test/base/DataTypesTest.cpp index 82f8ff45e67c..4cf1dfeda6ae 100644 --- a/components/omega/test/base/DataTypesTest.cpp +++ b/components/omega/test/base/DataTypesTest.cpp @@ -99,8 +99,7 @@ int main(int argc, char *argv[]) { RefArr1DI4(i) = i; } - parallelFor( - {NumCells}, KOKKOS_LAMBDA(int i) { TstArr1DI4(i) = i; }); + parallelFor({NumCells}, KOKKOS_LAMBDA(int i) { TstArr1DI4(i) = i; }); Kokkos::fence(); @@ -284,8 +283,7 @@ int main(int argc, char *argv[]) { RefArr1DI8(i) = i; } - parallelFor( - {NumCells}, KOKKOS_LAMBDA(int i) { TstArr1DI8(i) = i; }); + parallelFor({NumCells}, KOKKOS_LAMBDA(int i) { TstArr1DI8(i) = i; }); Kokkos::fence(); @@ -469,8 +467,7 @@ int main(int argc, char *argv[]) { RefArr1DR4(i) = i; } - parallelFor( - {NumCells}, KOKKOS_LAMBDA(int i) { TstArr1DR4(i) = i; }); + parallelFor({NumCells}, KOKKOS_LAMBDA(int i) { TstArr1DR4(i) = i; }); Kokkos::fence(); @@ -654,8 +651,7 @@ int main(int argc, char *argv[]) { RefArr1DR8(i) = i; } - parallelFor( - {NumCells}, KOKKOS_LAMBDA(int i) { TstArr1DR8(i) = i; }); + parallelFor({NumCells}, KOKKOS_LAMBDA(int i) { TstArr1DR8(i) = i; }); Kokkos::fence(); @@ -839,8 +835,7 @@ int main(int argc, char *argv[]) { RefArr1DReal(i) = i; } - parallelFor( - {NumCells}, KOKKOS_LAMBDA(int i) { TstArr1DReal(i) = i; }); + parallelFor({NumCells}, KOKKOS_LAMBDA(int i) { TstArr1DReal(i) = i; }); Kokkos::fence(); From a95b11e4013631046484600fc1d0856d945d1972 Mon Sep 17 00:00:00 2001 From: Youngsung Kim Date: Mon, 1 Apr 2024 15:17:42 -0400 Subject: [PATCH 04/11] fix formatting issues --- components/omega/create_scripts.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/components/omega/create_scripts.py b/components/omega/create_scripts.py index c953fd7e35aa..0736beb58632 100644 --- a/components/omega/create_scripts.py +++ b/components/omega/create_scripts.py @@ -355,7 +355,7 @@ def generate_scripts(self, outvar): f.write(f"export {key}=\"{value}\"\n") if "OMP_NUM_THREADS" not in self.__OMEGA_SCRIPT_EXPORTS__: - f.write(f"export OMP_NUM_THREADS=\"1\"\n") + f.write("export OMP_NUM_THREADS=\"1\"\n") with open(omega_build, "w") as f: f.write("#!/usr/bin/env bash\n\n") From eba04e3b23dc54ca344e9875a05f57a31454cef1 Mon Sep 17 00:00:00 2001 From: Youngsung Kim Date: Fri, 5 Apr 2024 12:53:52 -0400 Subject: [PATCH 05/11] Applies reviews for PR #74 * adds OMP_PROC_BIND and OMP_PLACES env. variables * adds -cpu-bind=cores srun flag to fix a performance bug on PM-GPU as a temporary solution * renamed createHostCopy and createDeviceCopy function names to createHostMirrorCopy and createDeviceMirrorCopy each * streamlined ctest script * undef MAKE_OMEGA_VIEW_DIMS * deletes else branch for creatXXXCopy functions --- components/omega/OmegaBuild.cmake | 2 + components/omega/create_scripts.py | 6 ++ components/omega/doc/design/HorzMeshClass.md | 2 +- components/omega/doc/devGuide/DataTypes.md | 2 +- components/omega/doc/devGuide/HorzMesh.md | 2 +- components/omega/src/CMakeLists.txt | 42 ++++++++----- components/omega/src/base/DataTypes.h | 6 +- components/omega/src/base/Decomp.cpp | 66 ++++++++++---------- components/omega/src/infra/OmegaKokkos.h | 14 +---- components/omega/src/ocn/HorzMesh.cpp | 24 +++---- components/omega/test/CMakeLists.txt | 1 - components/omega/test/base/DataTypesTest.cpp | 65 ++++++++++--------- 12 files changed, 125 insertions(+), 107 deletions(-) diff --git a/components/omega/OmegaBuild.cmake b/components/omega/OmegaBuild.cmake index d1ef5652e3b9..81ec1212b404 100644 --- a/components/omega/OmegaBuild.cmake +++ b/components/omega/OmegaBuild.cmake @@ -459,6 +459,8 @@ macro(update_variables) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DOMEGA_TARGET_DEVICE") elseif(OMEGA_ARCH STREQUAL "OPENMP") + set(ENV{OMP_PROC_BIND} "spread") + set(ENV{OMP_PLACES} "threads") option(Kokkos_ENABLE_OPENMP "" ON) elseif(OMEGA_ARCH STREQUAL "THREADS") diff --git a/components/omega/create_scripts.py b/components/omega/create_scripts.py index 0736beb58632..4364cd6f0d33 100644 --- a/components/omega/create_scripts.py +++ b/components/omega/create_scripts.py @@ -357,6 +357,12 @@ def generate_scripts(self, outvar): if "OMP_NUM_THREADS" not in self.__OMEGA_SCRIPT_EXPORTS__: f.write("export OMP_NUM_THREADS=\"1\"\n") + if "OMP_PROC_BIND" not in self.__OMEGA_SCRIPT_EXPORTS__: + f.write("export OMP_PROC_BIND=\"spread\"\n") + + if "OMP_PLACES" not in self.__OMEGA_SCRIPT_EXPORTS__: + f.write("export OMP_PLACES=\"threads\"\n") + with open(omega_build, "w") as f: f.write("#!/usr/bin/env bash\n\n") diff --git a/components/omega/doc/design/HorzMeshClass.md b/components/omega/doc/design/HorzMeshClass.md index 9b983bba4b75..c1fb9d77ec50 100644 --- a/components/omega/doc/design/HorzMeshClass.md +++ b/components/omega/doc/design/HorzMeshClass.md @@ -113,7 +113,7 @@ The compute method will be a private method called by the constructor. It will b This method will be repsonsible for creating the device copies of the required mesh information on the host. It will be a private method called by the constructor. ```c++ -AreaCell = OMEGA::createDeviceCopy(AreaCellH) +AreaCell = OMEGA::createDeviceMirrorCopy(AreaCellH) ``` diff --git a/components/omega/doc/devGuide/DataTypes.md b/components/omega/doc/devGuide/DataTypes.md index 8fdb15a12ee4..41f6895311f3 100644 --- a/components/omega/doc/devGuide/DataTypes.md +++ b/components/omega/doc/devGuide/DataTypes.md @@ -48,7 +48,7 @@ As an example, we can define and allocate a device and host array using: Alternatively, you can use the copy functions to create a host copy from the device or vice versa. ```c++ - auto TemperatureHost = OMEGA::createHostCopy(Temperature); + auto TemperatureHost = OMEGA::createHostMirrorCopy(Temperature); ``` Finally, the arrays can be deallocated explicity using the class deallocate method, eg `Temperature.deallocate();` or if they are local diff --git a/components/omega/doc/devGuide/HorzMesh.md b/components/omega/doc/devGuide/HorzMesh.md index 0e8e39896666..4e31a17c141e 100644 --- a/components/omega/doc/devGuide/HorzMesh.md +++ b/components/omega/doc/devGuide/HorzMesh.md @@ -48,7 +48,7 @@ For member variables that are host arrays, variable names are appended with an `H`. Array variable names not ending in `H` are device arrays. The copy from host to device array is performed in the constructor via: ```c++ -AreaCell = OMEGA::createDeviceCopy(AreaCellH); +AreaCell = OMEGA::createDeviceMirrorCopy(AreaCellH); ``` The device arrays are deallocated by the `HorzMesh::clear()` method, which is diff --git a/components/omega/src/CMakeLists.txt b/components/omega/src/CMakeLists.txt index c67914cb72be..09a078eeceac 100644 --- a/components/omega/src/CMakeLists.txt +++ b/components/omega/src/CMakeLists.txt @@ -3,34 +3,46 @@ # Add source files for the library file(GLOB _LIBSRC_FILES infra/*.cpp base/*.cpp ocn/*.cpp) - add_library(${OMEGA_LIB_NAME} ${_LIBSRC_FILES}) +add_library(${OMEGA_LIB_NAME} ${_LIBSRC_FILES}) - target_include_directories( +target_include_directories( ${OMEGA_LIB_NAME} - PRIVATE + PUBLIC ${OMEGA_SOURCE_DIR}/src/base ${OMEGA_SOURCE_DIR}/src/infra ${OMEGA_SOURCE_DIR}/src/ocn ${Parmetis_INCLUDE_DIRS} - ) +) - target_compile_definitions( +target_compile_definitions( ${OMEGA_LIB_NAME} PUBLIC OMEGA_ARCH=${OMEGA_ARCH} - ) +) -# add linker options target_link_options( ${OMEGA_LIB_NAME} - PRIVATE + PUBLIC ${OMEGA_LINK_OPTIONS} ) -target_link_libraries(${OMEGA_LIB_NAME} Kokkos::kokkos spdlog pioc yaml-cpp parmetis metis) +target_link_libraries( + ${OMEGA_LIB_NAME} + PUBLIC + Kokkos::kokkos + spdlog + pioc + yaml-cpp + parmetis + metis +) if(GKlib_FOUND) - target_link_libraries(${OMEGA_LIB_NAME} gklib) + target_link_libraries( + ${OMEGA_LIB_NAME} + PUBLIC + gklib + ) endif() # build Omega executable @@ -50,11 +62,11 @@ if(OMEGA_BUILD_EXECUTABLE) # "-L${CMAKE_CURRENT_SOURCE_DIR}/infra" # ) - target_compile_definitions( - ${OMEGA_EXE_NAME} - PUBLIC - OMEGA_ARCH=${OMEGA_ARCH} - ) +# target_compile_definitions( +# ${OMEGA_EXE_NAME} +# PUBLIC +# OMEGA_ARCH=${OMEGA_ARCH} +# ) target_link_libraries(${OMEGA_EXE_NAME} ${OMEGA_LIB_NAME}) diff --git a/components/omega/src/base/DataTypes.h b/components/omega/src/base/DataTypes.h index 28ed75f65043..15af6f964fee 100644 --- a/components/omega/src/base/DataTypes.h +++ b/components/omega/src/base/DataTypes.h @@ -30,7 +30,7 @@ using R8 = double; ///< alias for 64-bit (double prec) real #ifdef SINGLE_PRECISION using Real = float; #else -using Real = double; +using Real = double; #endif // user-defined literal for generic reals @@ -139,6 +139,10 @@ MAKE_OMEGA_VIEW_TYPES(Array, View, MemLayout, MemSpace) // Aliases for Kokkos host arrays of various dimensions and types MAKE_OMEGA_VIEW_TYPES(HostArray, View, HostMemLayout, HostMemSpace) + +#undef MAKE_OMEGA_VIEW_TYPES +#undef MAKE_OMEGA_VIEW_DIMS + } // end namespace OMEGA //===----------------------------------------------------------------------===// diff --git a/components/omega/src/base/Decomp.cpp b/components/omega/src/base/Decomp.cpp index 51c18b4ae995..96a0a1afd673 100644 --- a/components/omega/src/base/Decomp.cpp +++ b/components/omega/src/base/Decomp.cpp @@ -216,7 +216,7 @@ int readMesh(const int MeshFileID, // file ID for open mesh file OnVertexOffset[Vrtx * VertexDegree + Cell] = VertexGlob * VertexDegree + Cell; } // end loop VertexDegree - } // end loop NVerticesLocal + } // end loop NVerticesLocal // Create the parallel IO decompositions IO::Rearranger Rearr = IO::RearrBox; @@ -629,30 +629,30 @@ Decomp::Decomp( // Create device copies of all arrays - NCellsHalo = createDeviceCopy(NCellsHaloH); - CellID = createDeviceCopy(CellIDH); - CellLoc = createDeviceCopy(CellLocH); + NCellsHalo = createDeviceMirrorCopy(NCellsHaloH); + CellID = createDeviceMirrorCopy(CellIDH); + CellLoc = createDeviceMirrorCopy(CellLocH); - NEdgesHalo = createDeviceCopy(NEdgesHaloH); - EdgeID = createDeviceCopy(EdgeIDH); - EdgeLoc = createDeviceCopy(EdgeLocH); + NEdgesHalo = createDeviceMirrorCopy(NEdgesHaloH); + EdgeID = createDeviceMirrorCopy(EdgeIDH); + EdgeLoc = createDeviceMirrorCopy(EdgeLocH); - NVerticesHalo = createDeviceCopy(NVerticesHaloH); - VertexID = createDeviceCopy(VertexIDH); - VertexLoc = createDeviceCopy(VertexLocH); + NVerticesHalo = createDeviceMirrorCopy(NVerticesHaloH); + VertexID = createDeviceMirrorCopy(VertexIDH); + VertexLoc = createDeviceMirrorCopy(VertexLocH); - CellsOnCell = createDeviceCopy(CellsOnCellH); - EdgesOnCell = createDeviceCopy(EdgesOnCellH); - VerticesOnCell = createDeviceCopy(VerticesOnCellH); - NEdgesOnCell = createDeviceCopy(NEdgesOnCellH); + CellsOnCell = createDeviceMirrorCopy(CellsOnCellH); + EdgesOnCell = createDeviceMirrorCopy(EdgesOnCellH); + VerticesOnCell = createDeviceMirrorCopy(VerticesOnCellH); + NEdgesOnCell = createDeviceMirrorCopy(NEdgesOnCellH); - CellsOnEdge = createDeviceCopy(CellsOnEdgeH); - EdgesOnEdge = createDeviceCopy(EdgesOnEdgeH); - VerticesOnEdge = createDeviceCopy(VerticesOnEdgeH); - NEdgesOnEdge = createDeviceCopy(NEdgesOnEdgeH); + CellsOnEdge = createDeviceMirrorCopy(CellsOnEdgeH); + EdgesOnEdge = createDeviceMirrorCopy(EdgesOnEdgeH); + VerticesOnEdge = createDeviceMirrorCopy(VerticesOnEdgeH); + NEdgesOnEdge = createDeviceMirrorCopy(NEdgesOnEdgeH); - CellsOnVertex = createDeviceCopy(CellsOnVertexH); - EdgesOnVertex = createDeviceCopy(EdgesOnVertexH); + CellsOnVertex = createDeviceMirrorCopy(CellsOnVertexH); + EdgesOnVertex = createDeviceMirrorCopy(EdgesOnVertexH); // Assign this as the default decomposition AllDecomps.emplace(Name, *this); @@ -765,7 +765,7 @@ int Decomp::partCellsKWay( for (int n = 0; n < CellsOnCellSize; ++n) { CellsOnCellBuf[n] = CellsOnCellInit[n]; } // end loop CellsOnCell - } // end if this is MyTask + } // end if this is MyTask Err = MPI_Bcast(&CellsOnCellBuf[0], CellsOnCellSize, MPI_INT32_T, Task, Comm); if (Err != 0) { @@ -794,8 +794,8 @@ int Decomp::partCellsKWay( ++Add; // increment address counter } } - } // end cell loop for buffer - } // end task loop + } // end cell loop for buffer + } // end task loop AdjAdd[NCellsGlobal] = Add; // Add the ending address // Set up remaining partitioning variables @@ -878,7 +878,7 @@ int Decomp::partCellsKWay( CellLocTmp[2 * LocalAdd] = TaskLoc; CellLocTmp[2 * LocalAdd + 1] = LocalAdd; } // end if my task - } // end loop over all cells + } // end loop over all cells // Find and add the halo cells to the cell list. Here we use the // adjacency array to find the active neighbor cells and store if they @@ -917,7 +917,7 @@ int Decomp::partCellsKWay( HaloList.insert(NbrID); CellsInList.insert(NbrID); } // end search for existing entry - } // end if not on task + } // end if not on task } // end loop over neighbors @@ -1148,8 +1148,8 @@ int Decomp::partEdges( ++HaloCount; EdgesAll.erase(EdgeGlob); } // end if valid edge - } // end loop over cell edges - } // end cell loop + } // end loop over cell edges + } // end cell loop // reset address range for next halo and set NEdgesHalo CellStart = CellEnd; if ((Halo + 1) < HaloWidth) @@ -1553,8 +1553,8 @@ int Decomp::rearrangeCellArrays( } NEdgesOnCellTmp(LocCell) = EdgeCount; } // end if local cell - } // end loop over chunk of global cells - } // end loop over MPI tasks + } // end loop over chunk of global cells + } // end loop over MPI tasks // Copy to final location on host - wait to create device copies until // the entries are translated to local addresses rather than global IDs @@ -1689,8 +1689,8 @@ int Decomp::rearrangeEdgeArrays( } NEdgesOnEdgeTmp(LocEdge) = EdgeCount; } // end if local cell - } // end loop over chunk of global cells - } // end loop over MPI tasks + } // end loop over chunk of global cells + } // end loop over MPI tasks // Copy to final location on host - wait to create device copies until // the entries are translated to local addresses rather than global IDs @@ -1798,8 +1798,8 @@ int Decomp::rearrangeVertexArrays( ++BufAdd; } } // end if local cell - } // end loop over chunk of global cells - } // end loop over MPI tasks + } // end loop over chunk of global cells + } // end loop over MPI tasks // Copy to final location on host - wait to create device copies until // the entries are translated to local addresses rather than global IDs diff --git a/components/omega/src/infra/OmegaKokkos.h b/components/omega/src/infra/OmegaKokkos.h index 2e23915b829f..07d0cd93f9bb 100644 --- a/components/omega/src/infra/OmegaKokkos.h +++ b/components/omega/src/infra/OmegaKokkos.h @@ -18,28 +18,18 @@ namespace OMEGA { using ExecSpace = MemSpace::execution_space; using HostExecSpace = HostMemSpace::execution_space; -#ifdef OMEGA_TARGET_DEVICE - template -auto createHostCopy(const V &view) +auto createHostMirrorCopy(const V &view) -> Kokkos::View { return Kokkos::create_mirror_view_and_copy(HostExecSpace(), view); } template -auto createDeviceCopy(const V &view) +auto createDeviceMirrorCopy(const V &view) -> Kokkos::View { return Kokkos::create_mirror_view_and_copy(ExecSpace(), view); } -#else - -template V createHostCopy(const V &view) { return view; } - -template V createDeviceCopy(const V &view) { return view; } - -#endif - // function alias to follow Camel Naming Convention template void deepCopy(D &dst, const S &src) { Kokkos::deep_copy(dst, src); diff --git a/components/omega/src/ocn/HorzMesh.cpp b/components/omega/src/ocn/HorzMesh.cpp index af4012739417..5b0585f12c1e 100644 --- a/components/omega/src/ocn/HorzMesh.cpp +++ b/components/omega/src/ocn/HorzMesh.cpp @@ -511,7 +511,7 @@ void HorzMesh::readCoriolis() { // Compute the sign of edge contributions to a cell/vertex for each edge void HorzMesh::computeEdgeSign() { - auto EdgeSignOnCell = Array2DR8("EdgeSignOnCell", NCellsSize, MaxEdges); + EdgeSignOnCell = Array2DR8("EdgeSignOnCell", NCellsSize, MaxEdges); OMEGA_SCOPE(o_NEdgesOnCell, NEdgesOnCell); OMEGA_SCOPE(o_EdgesOnCell, EdgesOnCell); @@ -532,7 +532,7 @@ void HorzMesh::computeEdgeSign() { } }); - EdgeSignOnCellH = createHostCopy(EdgeSignOnCell); + EdgeSignOnCellH = createHostMirrorCopy(EdgeSignOnCell); EdgeSignOnVertex = Array2DR8("EdgeSignOnVertex", NVerticesSize, VertexDegree); @@ -556,22 +556,22 @@ void HorzMesh::computeEdgeSign() { } }); - EdgeSignOnVertexH = createHostCopy(EdgeSignOnVertex); + EdgeSignOnVertexH = createHostMirrorCopy(EdgeSignOnVertex); } // end computeEdgeSign //------------------------------------------------------------------------------ // Perform copy to device for mesh variables void HorzMesh::copyToDevice() { - AreaCell = createDeviceCopy(AreaCellH); - AreaTriangle = createDeviceCopy(AreaTriangleH); - KiteAreasOnVertex = createDeviceCopy(KiteAreasOnVertexH); - DcEdge = createDeviceCopy(DcEdgeH); - DvEdge = createDeviceCopy(DvEdgeH); - AngleEdge = createDeviceCopy(AngleEdgeH); - WeightsOnEdge = createDeviceCopy(WeightsOnEdgeH); - FVertex = createDeviceCopy(FVertexH); - BottomDepth = createDeviceCopy(BottomDepthH); + AreaCell = createDeviceMirrorCopy(AreaCellH); + AreaTriangle = createDeviceMirrorCopy(AreaTriangleH); + KiteAreasOnVertex = createDeviceMirrorCopy(KiteAreasOnVertexH); + DcEdge = createDeviceMirrorCopy(DcEdgeH); + DvEdge = createDeviceMirrorCopy(DvEdgeH); + AngleEdge = createDeviceMirrorCopy(AngleEdgeH); + WeightsOnEdge = createDeviceMirrorCopy(WeightsOnEdgeH); + FVertex = createDeviceMirrorCopy(FVertexH); + BottomDepth = createDeviceMirrorCopy(BottomDepthH); } // end copyToDevice diff --git a/components/omega/test/CMakeLists.txt b/components/omega/test/CMakeLists.txt index b5d3dfccb69c..884640ada95f 100644 --- a/components/omega/test/CMakeLists.txt +++ b/components/omega/test/CMakeLists.txt @@ -122,7 +122,6 @@ target_link_libraries(testTimeMgr.exe ${OMEGA_LIB_NAME}) add_test(NAME TIMEMGR_TEST COMMAND ./testTimeMgr.exe) - ################## # Kokkos test ################## diff --git a/components/omega/test/base/DataTypesTest.cpp b/components/omega/test/base/DataTypesTest.cpp index 4cf1dfeda6ae..0f2d0caa7748 100644 --- a/components/omega/test/base/DataTypesTest.cpp +++ b/components/omega/test/base/DataTypesTest.cpp @@ -99,11 +99,12 @@ int main(int argc, char *argv[]) { RefArr1DI4(i) = i; } - parallelFor({NumCells}, KOKKOS_LAMBDA(int i) { TstArr1DI4(i) = i; }); + parallelFor( + {NumCells}, KOKKOS_LAMBDA(int i) { TstArr1DI4(i) = i; }); Kokkos::fence(); - auto TstHost1DI4 = createHostCopy(TstArr1DI4); + auto TstHost1DI4 = createHostMirrorCopy(TstArr1DI4); int icount = 0; for (int i = 0; i < NumCells; ++i) { @@ -132,7 +133,7 @@ int main(int argc, char *argv[]) { Kokkos::fence(); - auto TstHost2DI4 = createHostCopy(TstArr2DI4); + auto TstHost2DI4 = createHostMirrorCopy(TstArr2DI4); icount = 0; for (int j = 0; j < NumCells; ++j) { @@ -167,7 +168,7 @@ int main(int argc, char *argv[]) { Kokkos::fence(); - auto TstHost3DI4 = createHostCopy(TstArr3DI4); + auto TstHost3DI4 = createHostMirrorCopy(TstArr3DI4); icount = 0; for (int k = 0; k < NumTracers; ++k) { @@ -208,7 +209,7 @@ int main(int argc, char *argv[]) { Kokkos::fence(); - auto TstHost4DI4 = createHostCopy(TstArr4DI4); + auto TstHost4DI4 = createHostMirrorCopy(TstArr4DI4); icount = 0; for (int m = 0; m < NumTimeLvls; ++m) { @@ -253,7 +254,7 @@ int main(int argc, char *argv[]) { Kokkos::fence(); - auto TstHost5DI4 = createHostCopy(TstArr5DI4); + auto TstHost5DI4 = createHostMirrorCopy(TstArr5DI4); icount = 0; for (int n = 0; n < NumExtra; ++n) { @@ -283,11 +284,12 @@ int main(int argc, char *argv[]) { RefArr1DI8(i) = i; } - parallelFor({NumCells}, KOKKOS_LAMBDA(int i) { TstArr1DI8(i) = i; }); + parallelFor( + {NumCells}, KOKKOS_LAMBDA(int i) { TstArr1DI8(i) = i; }); Kokkos::fence(); - auto TstHost1DI8 = createHostCopy(TstArr1DI8); + auto TstHost1DI8 = createHostMirrorCopy(TstArr1DI8); icount = 0; for (int i = 0; i < NumCells; ++i) { @@ -316,7 +318,7 @@ int main(int argc, char *argv[]) { Kokkos::fence(); - auto TstHost2DI8 = createHostCopy(TstArr2DI8); + auto TstHost2DI8 = createHostMirrorCopy(TstArr2DI8); icount = 0; for (int j = 0; j < NumCells; ++j) { @@ -351,7 +353,7 @@ int main(int argc, char *argv[]) { Kokkos::fence(); - auto TstHost3DI8 = createHostCopy(TstArr3DI8); + auto TstHost3DI8 = createHostMirrorCopy(TstArr3DI8); icount = 0; for (int k = 0; k < NumTracers; ++k) { @@ -392,7 +394,7 @@ int main(int argc, char *argv[]) { Kokkos::fence(); - auto TstHost4DI8 = createHostCopy(TstArr4DI8); + auto TstHost4DI8 = createHostMirrorCopy(TstArr4DI8); icount = 0; for (int m = 0; m < NumTimeLvls; ++m) { @@ -437,7 +439,7 @@ int main(int argc, char *argv[]) { Kokkos::fence(); - auto TstHost5DI8 = createHostCopy(TstArr5DI8); + auto TstHost5DI8 = createHostMirrorCopy(TstArr5DI8); icount = 0; for (int n = 0; n < NumExtra; ++n) { @@ -467,11 +469,12 @@ int main(int argc, char *argv[]) { RefArr1DR4(i) = i; } - parallelFor({NumCells}, KOKKOS_LAMBDA(int i) { TstArr1DR4(i) = i; }); + parallelFor( + {NumCells}, KOKKOS_LAMBDA(int i) { TstArr1DR4(i) = i; }); Kokkos::fence(); - auto TstHost1DR4 = createHostCopy(TstArr1DR4); + auto TstHost1DR4 = createHostMirrorCopy(TstArr1DR4); icount = 0; for (int i = 0; i < NumCells; ++i) { @@ -500,7 +503,7 @@ int main(int argc, char *argv[]) { Kokkos::fence(); - auto TstHost2DR4 = createHostCopy(TstArr2DR4); + auto TstHost2DR4 = createHostMirrorCopy(TstArr2DR4); icount = 0; for (int j = 0; j < NumCells; ++j) { @@ -535,7 +538,7 @@ int main(int argc, char *argv[]) { Kokkos::fence(); - auto TstHost3DR4 = createHostCopy(TstArr3DR4); + auto TstHost3DR4 = createHostMirrorCopy(TstArr3DR4); icount = 0; for (int k = 0; k < NumTracers; ++k) { @@ -576,7 +579,7 @@ int main(int argc, char *argv[]) { Kokkos::fence(); - auto TstHost4DR4 = createHostCopy(TstArr4DR4); + auto TstHost4DR4 = createHostMirrorCopy(TstArr4DR4); icount = 0; for (int m = 0; m < NumTimeLvls; ++m) { @@ -621,7 +624,7 @@ int main(int argc, char *argv[]) { Kokkos::fence(); - auto TstHost5DR4 = createHostCopy(TstArr5DR4); + auto TstHost5DR4 = createHostMirrorCopy(TstArr5DR4); icount = 0; for (int n = 0; n < NumExtra; ++n) { @@ -651,11 +654,12 @@ int main(int argc, char *argv[]) { RefArr1DR8(i) = i; } - parallelFor({NumCells}, KOKKOS_LAMBDA(int i) { TstArr1DR8(i) = i; }); + parallelFor( + {NumCells}, KOKKOS_LAMBDA(int i) { TstArr1DR8(i) = i; }); Kokkos::fence(); - auto TstHost1DR8 = createHostCopy(TstArr1DR8); + auto TstHost1DR8 = createHostMirrorCopy(TstArr1DR8); icount = 0; for (int i = 0; i < NumCells; ++i) { @@ -684,7 +688,7 @@ int main(int argc, char *argv[]) { Kokkos::fence(); - auto TstHost2DR8 = createHostCopy(TstArr2DR8); + auto TstHost2DR8 = createHostMirrorCopy(TstArr2DR8); icount = 0; for (int j = 0; j < NumCells; ++j) { @@ -719,7 +723,7 @@ int main(int argc, char *argv[]) { Kokkos::fence(); - auto TstHost3DR8 = createHostCopy(TstArr3DR8); + auto TstHost3DR8 = createHostMirrorCopy(TstArr3DR8); icount = 0; for (int k = 0; k < NumTracers; ++k) { @@ -760,7 +764,7 @@ int main(int argc, char *argv[]) { Kokkos::fence(); - auto TstHost4DR8 = createHostCopy(TstArr4DR8); + auto TstHost4DR8 = createHostMirrorCopy(TstArr4DR8); icount = 0; for (int m = 0; m < NumTimeLvls; ++m) { @@ -805,7 +809,7 @@ int main(int argc, char *argv[]) { Kokkos::fence(); - auto TstHost5DR8 = createHostCopy(TstArr5DR8); + auto TstHost5DR8 = createHostMirrorCopy(TstArr5DR8); icount = 0; for (int n = 0; n < NumExtra; ++n) { @@ -835,11 +839,12 @@ int main(int argc, char *argv[]) { RefArr1DReal(i) = i; } - parallelFor({NumCells}, KOKKOS_LAMBDA(int i) { TstArr1DReal(i) = i; }); + parallelFor( + {NumCells}, KOKKOS_LAMBDA(int i) { TstArr1DReal(i) = i; }); Kokkos::fence(); - auto TstHost1DReal = createHostCopy(TstArr1DReal); + auto TstHost1DReal = createHostMirrorCopy(TstArr1DReal); icount = 0; for (int i = 0; i < NumCells; ++i) { @@ -868,7 +873,7 @@ int main(int argc, char *argv[]) { Kokkos::fence(); - auto TstHost2DReal = createHostCopy(TstArr2DReal); + auto TstHost2DReal = createHostMirrorCopy(TstArr2DReal); icount = 0; for (int j = 0; j < NumCells; ++j) { @@ -905,7 +910,7 @@ int main(int argc, char *argv[]) { Kokkos::fence(); - auto TstHost3DReal = createHostCopy(TstArr3DReal); + auto TstHost3DReal = createHostMirrorCopy(TstArr3DReal); icount = 0; for (int k = 0; k < NumTracers; ++k) { @@ -946,7 +951,7 @@ int main(int argc, char *argv[]) { Kokkos::fence(); - auto TstHost4DReal = createHostCopy(TstArr4DReal); + auto TstHost4DReal = createHostMirrorCopy(TstArr4DReal); icount = 0; for (int m = 0; m < NumTimeLvls; ++m) { @@ -991,7 +996,7 @@ int main(int argc, char *argv[]) { Kokkos::fence(); - auto TstHost5DReal = createHostCopy(TstArr5DReal); + auto TstHost5DReal = createHostMirrorCopy(TstArr5DReal); icount = 0; for (int n = 0; n < NumExtra; ++n) { From fe2fda447cc1fbca47e6197c401a5360c13718b6 Mon Sep 17 00:00:00 2001 From: Youngsung Kim Date: Fri, 5 Apr 2024 13:05:12 -0400 Subject: [PATCH 06/11] fix linting issues --- components/omega/src/base/DataTypes.h | 2 +- components/omega/src/base/Decomp.cpp | 28 ++++++++++---------- components/omega/test/base/DataTypesTest.cpp | 15 ++++------- 3 files changed, 20 insertions(+), 25 deletions(-) diff --git a/components/omega/src/base/DataTypes.h b/components/omega/src/base/DataTypes.h index 15af6f964fee..3098a381bfde 100644 --- a/components/omega/src/base/DataTypes.h +++ b/components/omega/src/base/DataTypes.h @@ -30,7 +30,7 @@ using R8 = double; ///< alias for 64-bit (double prec) real #ifdef SINGLE_PRECISION using Real = float; #else -using Real = double; +using Real = double; #endif // user-defined literal for generic reals diff --git a/components/omega/src/base/Decomp.cpp b/components/omega/src/base/Decomp.cpp index 96a0a1afd673..3387484782cb 100644 --- a/components/omega/src/base/Decomp.cpp +++ b/components/omega/src/base/Decomp.cpp @@ -216,7 +216,7 @@ int readMesh(const int MeshFileID, // file ID for open mesh file OnVertexOffset[Vrtx * VertexDegree + Cell] = VertexGlob * VertexDegree + Cell; } // end loop VertexDegree - } // end loop NVerticesLocal + } // end loop NVerticesLocal // Create the parallel IO decompositions IO::Rearranger Rearr = IO::RearrBox; @@ -765,7 +765,7 @@ int Decomp::partCellsKWay( for (int n = 0; n < CellsOnCellSize; ++n) { CellsOnCellBuf[n] = CellsOnCellInit[n]; } // end loop CellsOnCell - } // end if this is MyTask + } // end if this is MyTask Err = MPI_Bcast(&CellsOnCellBuf[0], CellsOnCellSize, MPI_INT32_T, Task, Comm); if (Err != 0) { @@ -794,8 +794,8 @@ int Decomp::partCellsKWay( ++Add; // increment address counter } } - } // end cell loop for buffer - } // end task loop + } // end cell loop for buffer + } // end task loop AdjAdd[NCellsGlobal] = Add; // Add the ending address // Set up remaining partitioning variables @@ -878,7 +878,7 @@ int Decomp::partCellsKWay( CellLocTmp[2 * LocalAdd] = TaskLoc; CellLocTmp[2 * LocalAdd + 1] = LocalAdd; } // end if my task - } // end loop over all cells + } // end loop over all cells // Find and add the halo cells to the cell list. Here we use the // adjacency array to find the active neighbor cells and store if they @@ -917,7 +917,7 @@ int Decomp::partCellsKWay( HaloList.insert(NbrID); CellsInList.insert(NbrID); } // end search for existing entry - } // end if not on task + } // end if not on task } // end loop over neighbors @@ -1148,8 +1148,8 @@ int Decomp::partEdges( ++HaloCount; EdgesAll.erase(EdgeGlob); } // end if valid edge - } // end loop over cell edges - } // end cell loop + } // end loop over cell edges + } // end cell loop // reset address range for next halo and set NEdgesHalo CellStart = CellEnd; if ((Halo + 1) < HaloWidth) @@ -1553,8 +1553,8 @@ int Decomp::rearrangeCellArrays( } NEdgesOnCellTmp(LocCell) = EdgeCount; } // end if local cell - } // end loop over chunk of global cells - } // end loop over MPI tasks + } // end loop over chunk of global cells + } // end loop over MPI tasks // Copy to final location on host - wait to create device copies until // the entries are translated to local addresses rather than global IDs @@ -1689,8 +1689,8 @@ int Decomp::rearrangeEdgeArrays( } NEdgesOnEdgeTmp(LocEdge) = EdgeCount; } // end if local cell - } // end loop over chunk of global cells - } // end loop over MPI tasks + } // end loop over chunk of global cells + } // end loop over MPI tasks // Copy to final location on host - wait to create device copies until // the entries are translated to local addresses rather than global IDs @@ -1798,8 +1798,8 @@ int Decomp::rearrangeVertexArrays( ++BufAdd; } } // end if local cell - } // end loop over chunk of global cells - } // end loop over MPI tasks + } // end loop over chunk of global cells + } // end loop over MPI tasks // Copy to final location on host - wait to create device copies until // the entries are translated to local addresses rather than global IDs diff --git a/components/omega/test/base/DataTypesTest.cpp b/components/omega/test/base/DataTypesTest.cpp index 0f2d0caa7748..93180ac74c5b 100644 --- a/components/omega/test/base/DataTypesTest.cpp +++ b/components/omega/test/base/DataTypesTest.cpp @@ -99,8 +99,7 @@ int main(int argc, char *argv[]) { RefArr1DI4(i) = i; } - parallelFor( - {NumCells}, KOKKOS_LAMBDA(int i) { TstArr1DI4(i) = i; }); + parallelFor({NumCells}, KOKKOS_LAMBDA(int i) { TstArr1DI4(i) = i; }); Kokkos::fence(); @@ -284,8 +283,7 @@ int main(int argc, char *argv[]) { RefArr1DI8(i) = i; } - parallelFor( - {NumCells}, KOKKOS_LAMBDA(int i) { TstArr1DI8(i) = i; }); + parallelFor({NumCells}, KOKKOS_LAMBDA(int i) { TstArr1DI8(i) = i; }); Kokkos::fence(); @@ -469,8 +467,7 @@ int main(int argc, char *argv[]) { RefArr1DR4(i) = i; } - parallelFor( - {NumCells}, KOKKOS_LAMBDA(int i) { TstArr1DR4(i) = i; }); + parallelFor({NumCells}, KOKKOS_LAMBDA(int i) { TstArr1DR4(i) = i; }); Kokkos::fence(); @@ -654,8 +651,7 @@ int main(int argc, char *argv[]) { RefArr1DR8(i) = i; } - parallelFor( - {NumCells}, KOKKOS_LAMBDA(int i) { TstArr1DR8(i) = i; }); + parallelFor({NumCells}, KOKKOS_LAMBDA(int i) { TstArr1DR8(i) = i; }); Kokkos::fence(); @@ -839,8 +835,7 @@ int main(int argc, char *argv[]) { RefArr1DReal(i) = i; } - parallelFor( - {NumCells}, KOKKOS_LAMBDA(int i) { TstArr1DReal(i) = i; }); + parallelFor({NumCells}, KOKKOS_LAMBDA(int i) { TstArr1DReal(i) = i; }); Kokkos::fence(); From d8d2c2a470796487e129ac27f2bb7946a8327a5c Mon Sep 17 00:00:00 2001 From: Youngsung Kim Date: Mon, 8 Apr 2024 15:30:24 -0400 Subject: [PATCH 07/11] fix typo in Halo.cpp; enable Halo unit test --- components/omega/src/base/Halo.cpp | 2 +- components/omega/test/CMakeLists.txt | 20 ++++++++++---------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/components/omega/src/base/Halo.cpp b/components/omega/src/base/Halo.cpp index 2b2461959a75..e5ff96d46d43 100644 --- a/components/omega/src/base/Halo.cpp +++ b/components/omega/src/base/Halo.cpp @@ -511,7 +511,7 @@ int Halo::packBuffer(const HostArray1DR8 Array) { int Halo::packBuffer(const HostArray2DI4 Array) { ExchList *MyList = &MyNeighbor->SendLists[MyElem]; - int NJ = Array.extent(0); + int NJ = Array.extent(1); MyNeighbor->SendBuffer.resize(MyList->NTot * TotSize); diff --git a/components/omega/test/CMakeLists.txt b/components/omega/test/CMakeLists.txt index 884640ada95f..bf54551e5f40 100644 --- a/components/omega/test/CMakeLists.txt +++ b/components/omega/test/CMakeLists.txt @@ -52,15 +52,15 @@ target_link_libraries(testDecomp.exe ${OMEGA_LIB_NAME}) add_test( NAME DECOMP_TEST COMMAND ${MPI_EXEC} -n 8 --cpu-bind=cores -- ./testDecomp.exe) -################### -## Halo test -################### -# -#add_executable(testHalo.exe base/HaloTest.cpp) -# -#target_link_libraries(testHalo.exe ${OMEGA_LIB_NAME}) -# -#add_test(NAME HALO_TEST COMMAND ${MPI_EXEC} --cpu-bind=cores -n 8 -- ./testHalo.exe) +################## +# Halo test +################## + +add_executable(testHalo.exe base/HaloTest.cpp) + +target_link_libraries(testHalo.exe ${OMEGA_LIB_NAME}) + +add_test(NAME HALO_TEST COMMAND ${MPI_EXEC} --cpu-bind=cores -n 8 -- ./testHalo.exe) ################ # HorzMesh test @@ -143,7 +143,7 @@ set_tests_properties( LOGGING_TEST CONFIG_TEST DECOMP_TEST -# HALO_TEST + HALO_TEST HORZMESH_TEST IO_TEST METADATA_TEST From 7f1bbecf81ea6deb990b5633f9ac2ab8aef55281 Mon Sep 17 00:00:00 2001 From: Youngsung Kim Date: Thu, 11 Apr 2024 12:26:39 -0700 Subject: [PATCH 08/11] Apply the PR comments on cmake scripts * check if third-party libraries are already built * use interface target for OmegaLib and unittests --- components/omega/external/CMakeLists.txt | 81 ++++++----- components/omega/src/CMakeLists.txt | 73 +++++----- components/omega/test/CMakeLists.txt | 172 ++++++++++++++--------- 3 files changed, 186 insertions(+), 140 deletions(-) diff --git a/components/omega/external/CMakeLists.txt b/components/omega/external/CMakeLists.txt index e2955a89d959..9ca11e26318d 100644 --- a/components/omega/external/CMakeLists.txt +++ b/components/omega/external/CMakeLists.txt @@ -1,50 +1,57 @@ # Add external packages -include(ExternalProject) - # Add the spdlog library -add_subdirectory( - ${E3SM_EXTERNALS_ROOT}/ekat/extern/spdlog - ${CMAKE_CURRENT_BINARY_DIR}/ekat/extern/spdlog -) +if (NOT TARGET spdlog::spdlog) + add_subdirectory( + ${E3SM_EXTERNALS_ROOT}/ekat/extern/spdlog + ${CMAKE_CURRENT_BINARY_DIR}/ekat/extern/spdlog + ) +endif() # Add the yaml-cpp library -add_subdirectory( - ${E3SM_EXTERNALS_ROOT}/ekat/extern/yaml-cpp - ${CMAKE_CURRENT_BINARY_DIR}/ekat/extern/yaml-cpp -) +if (NOT TARGET yaml-cpp::yaml-cpp) + add_subdirectory( + ${E3SM_EXTERNALS_ROOT}/ekat/extern/yaml-cpp + ${CMAKE_CURRENT_BINARY_DIR}/ekat/extern/yaml-cpp + ) +endif() # Add the Kokkos library - add_subdirectory( - ${E3SM_EXTERNALS_ROOT}/ekat/extern/kokkos - ${CMAKE_CURRENT_BINARY_DIR}/ekat/extern/kokkos - ) - -# forward env. variables to Scorpio build -if(DEFINED ENV{NETCDF_PATH}) - set(NetCDF_PATH $ENV{NETCDF_PATH} - CACHE STRING "Path to NETCDF library" FORCE) -endif() -if(DEFINED ENV{NETCDF_C_PATH}) - set(NetCDF_C_PATH $ENV{NETCDF_C_PATH} - CACHE STRING "Path to NETCDF-C library" FORCE) -endif() -if(DEFINED ENV{NETCDF_FORTRAN_PATH}) - set(NetCDF_Fortran_PATH $ENV{NETCDF_FORTRAN_PATH} - CACHE STRING "Path to NETCDF-Fortran library" FORCE) +if (NOT TARGET Kokkos::kokkos) + add_subdirectory( + ${E3SM_EXTERNALS_ROOT}/ekat/extern/kokkos + ${CMAKE_CURRENT_BINARY_DIR}/ekat/extern/kokkos + ) endif() -if(DEFINED ENV{PNETCDF_PATH}) - set(PnetCDF_PATH $ENV{PNETCDF_PATH} - CACHE STRING "Path to PNETCDF library" FORCE) -endif() - -option(PIO_ENABLE_TOOLS "" OFF) # Add the Scorpio library -add_subdirectory( - ${E3SM_EXTERNALS_ROOT}/scorpio - ${CMAKE_CURRENT_BINARY_DIR}/scorpio -) +if (NOT TARGET pioc) + + # forward env. variables to Scorpio build + if(DEFINED ENV{NETCDF_PATH}) + set(NetCDF_PATH $ENV{NETCDF_PATH} + CACHE STRING "Path to NETCDF library" FORCE) + endif() + if(DEFINED ENV{NETCDF_C_PATH}) + set(NetCDF_C_PATH $ENV{NETCDF_C_PATH} + CACHE STRING "Path to NETCDF-C library" FORCE) + endif() + if(DEFINED ENV{NETCDF_FORTRAN_PATH}) + set(NetCDF_Fortran_PATH $ENV{NETCDF_FORTRAN_PATH} + CACHE STRING "Path to NETCDF-Fortran library" FORCE) + endif() + if(DEFINED ENV{PNETCDF_PATH}) + set(PnetCDF_PATH $ENV{PNETCDF_PATH} + CACHE STRING "Path to PNETCDF library" FORCE) + endif() + + option(PIO_ENABLE_TOOLS "" OFF) + + add_subdirectory( + ${E3SM_EXTERNALS_ROOT}/scorpio + ${CMAKE_CURRENT_BINARY_DIR}/scorpio + ) +endif() # Add the parmetis and related libraries diff --git a/components/omega/src/CMakeLists.txt b/components/omega/src/CMakeLists.txt index 09a078eeceac..b7a84f417a39 100644 --- a/components/omega/src/CMakeLists.txt +++ b/components/omega/src/CMakeLists.txt @@ -1,13 +1,11 @@ # build Omega -# Add source files for the library -file(GLOB _LIBSRC_FILES infra/*.cpp base/*.cpp ocn/*.cpp) - -add_library(${OMEGA_LIB_NAME} ${_LIBSRC_FILES}) +# create library flag target for LibOmega.so +add_library(OmegaLibFlags INTERFACE) target_include_directories( - ${OMEGA_LIB_NAME} - PUBLIC + OmegaLibFlags + INTERFACE ${OMEGA_SOURCE_DIR}/src/base ${OMEGA_SOURCE_DIR}/src/infra ${OMEGA_SOURCE_DIR}/src/ocn @@ -15,36 +13,47 @@ target_include_directories( ) target_compile_definitions( - ${OMEGA_LIB_NAME} - PUBLIC + OmegaLibFlags + INTERFACE OMEGA_ARCH=${OMEGA_ARCH} ) target_link_options( - ${OMEGA_LIB_NAME} - PUBLIC + OmegaLibFlags + INTERFACE ${OMEGA_LINK_OPTIONS} ) target_link_libraries( - ${OMEGA_LIB_NAME} - PUBLIC - Kokkos::kokkos - spdlog - pioc - yaml-cpp - parmetis - metis + OmegaLibFlags + INTERFACE + Kokkos::kokkos + spdlog + pioc + yaml-cpp + parmetis + metis ) if(GKlib_FOUND) target_link_libraries( - ${OMEGA_LIB_NAME} - PUBLIC - gklib - ) + OmegaLibFlags + PUBLIC + gklib + ) endif() +# Add source files for the library +file(GLOB _LIBSRC_FILES infra/*.cpp base/*.cpp ocn/*.cpp) + +add_library(${OMEGA_LIB_NAME} ${_LIBSRC_FILES}) + +target_link_libraries( + ${OMEGA_LIB_NAME} + PRIVATE + OmegaLibFlags +) + # build Omega executable if(OMEGA_BUILD_EXECUTABLE) @@ -55,19 +64,11 @@ if(OMEGA_BUILD_EXECUTABLE) # Create the executable target add_executable(${OMEGA_EXE_NAME} ${EXESRC_FILES}) -# target_compile_options( -# ${OMEGA_EXE_NAME} -# PRIVATE -# "-L${OMEGA_SOURCE_DIR}/src/base" -# "-L${CMAKE_CURRENT_SOURCE_DIR}/infra" -# ) - -# target_compile_definitions( -# ${OMEGA_EXE_NAME} -# PUBLIC -# OMEGA_ARCH=${OMEGA_ARCH} -# ) - - target_link_libraries(${OMEGA_EXE_NAME} ${OMEGA_LIB_NAME}) + target_link_libraries( + ${OMEGA_EXE_NAME} + PRIVATE + ${OMEGA_LIB_NAME} + OmegaLibFlags +) endif() diff --git a/components/omega/test/CMakeLists.txt b/components/omega/test/CMakeLists.txt index bf54551e5f40..66da19c7f915 100644 --- a/components/omega/test/CMakeLists.txt +++ b/components/omega/test/CMakeLists.txt @@ -1,136 +1,174 @@ # Omega Unit Tests -################## -# Data type test -################## - -add_executable(testDataTypes.exe base/DataTypesTest.cpp) -target_link_libraries(testDataTypes.exe ${OMEGA_LIB_NAME}) +##################### +# Omega Test Function +##################### -add_test(NAME DATA_TYPES_TEST COMMAND ${MPI_EXEC} --cpu-bind=cores -n 1 -- ./testDataTypes.exe) +function(add_omega_test test_name exe_name source_files mpi_args) -################## -# Machine env test -################## + # Create the executable + add_executable(${exe_name} ${source_files}) -add_executable(testMachEnv.exe base/MachEnvTest.cpp) + # Link the library + target_link_libraries( + ${exe_name} + PRIVATE + ${OMEGA_LIB_NAME} + OmegaLibFlags + ) -target_link_libraries(testMachEnv.exe ${OMEGA_LIB_NAME}) + # Add the test command + add_test( + NAME ${test_name} + COMMAND ${MPI_EXEC} ${mpi_args} -- ./${exe_name} + ) -add_test(NAME MACHINE_ENV_TEST COMMAND ${MPI_EXEC} --cpu-bind=cores -n 8 -- ./testMachEnv.exe) +endfunction() ################## -# Broadcast test +# Data type test ################## -add_executable(testBroadcast.exe base/BroadcastTest.cpp) - -target_link_libraries(testBroadcast.exe ${OMEGA_LIB_NAME}) +add_omega_test( + DATA_TYPES_TEST + testDataTypes.exe + base/DataTypesTest.cpp + "-n 1;--cpu-bind=cores" +) -add_test(NAME BROADCAST_TEST COMMAND ${MPI_EXEC} --cpu-bind=cores -n 8 -- ./testBroadcast.exe) +################## +# Machine env test +################## +add_omega_test( + MACHINE_ENV_TEST + testMachEnv.exe + base/MachEnvTest.cpp + "-n 8;--cpu-bind=cores" +) ################## -# Logging test +# Broadcast test ################## -add_executable(testLogging.exe infra/LoggingTest.cpp) +add_omega_test( + BROADCAST_TEST + testBroadcast.exe + base/BroadcastTest.cpp + "-n 8;--cpu-bind=cores" +) -target_link_libraries(testLogging.exe ${OMEGA_LIB_NAME}) +################## +# Logging test +################## -add_test(NAME LOGGING_TEST COMMAND ./testLogging.exe) +add_omega_test( + LOGGING_TEST + testLogging.exe + infra/LoggingTest.cpp + "-n 8;--cpu-bind=cores" +) ############# # Decomp test ############# -add_executable(testDecomp.exe base/DecompTest.cpp) - -target_link_libraries(testDecomp.exe ${OMEGA_LIB_NAME}) - -add_test( NAME DECOMP_TEST COMMAND ${MPI_EXEC} -n 8 --cpu-bind=cores -- ./testDecomp.exe) +add_omega_test( + DECOMP_TEST + testDecomp.exe + base/DecompTest.cpp + "-n 8;--cpu-bind=cores" +) ################## # Halo test ################## -add_executable(testHalo.exe base/HaloTest.cpp) - -target_link_libraries(testHalo.exe ${OMEGA_LIB_NAME}) - -add_test(NAME HALO_TEST COMMAND ${MPI_EXEC} --cpu-bind=cores -n 8 -- ./testHalo.exe) +add_omega_test( + HALO_TEST + testHalo.exe + base/HaloTest.cpp + "-n 8;--cpu-bind=cores" +) ################ # HorzMesh test ################ -add_executable(testHorzMesh.exe ocn/HorzMeshTest.cpp) - -target_link_libraries(testHorzMesh.exe ${OMEGA_LIB_NAME}) - -add_test(NAME HORZMESH_TEST COMMAND ${MPI_EXEC} -n 8 --cpu-bind=cores -- ./testHorzMesh.exe) +add_omega_test( + HORZMESH_TEST + testHorzMesh.exe + ocn/HorzMeshTest.cpp + "-n 8;--cpu-bind=cores" +) ############# # IO test ############# -add_executable(testIO.exe base/IOTest.cpp) - -target_link_libraries(testIO.exe ${OMEGA_LIB_NAME}) - -add_test(NAME IO_TEST COMMAND ${MPI_EXEC} -n 8 --cpu-bind=cores -- ./testIO.exe) +add_omega_test( + IO_TEST + testIO.exe + base/IOTest.cpp + "-n 8;--cpu-bind=cores" +) ################## # Config test ################## -add_executable(testConfig.exe infra/ConfigTest.cpp) - -target_link_libraries(testConfig.exe ${OMEGA_LIB_NAME}) - -add_test(NAME CONFIG_TEST COMMAND ${MPI_EXEC} --cpu-bind=cores -n 8 -- ./testConfig.exe) +add_omega_test( + CONFIG_TEST + testConfig.exe + infra/ConfigTest.cpp + "-n 8;--cpu-bind=cores" +) ################## # Metadata test ################## -add_executable(testMetadata.exe infra/MetadataTest.cpp) - -target_link_libraries(testMetadata.exe ${OMEGA_LIB_NAME}) - -add_test(NAME METADATA_TEST COMMAND ./testMetadata.exe) +add_omega_test( + METADATA_TEST + testMetadata.exe + infra/MetadataTest.cpp + "-n 1;--cpu-bind=cores" +) ################## # IOField test ################## -add_executable(testIOField.exe infra/IOFieldTest.cpp) - -target_link_libraries(testIOField.exe ${OMEGA_LIB_NAME}) - -add_test(NAME IOFIELD_TEST COMMAND ${MPI_EXEC} --cpu-bind=cores -n 8 -- ./testIOField.exe) +add_omega_test( + IOFIELD_TEST + testIOField.exe + infra/IOFieldTest.cpp + "-n 8;--cpu-bind=cores" +) ################## # Time Manager test ################## -add_executable(testTimeMgr.exe infra/TimeMgrTest.cpp) - -target_link_libraries(testTimeMgr.exe ${OMEGA_LIB_NAME}) - -add_test(NAME TIMEMGR_TEST COMMAND ./testTimeMgr.exe) +add_omega_test( + TIMEMGR_TEST + testTimeMgr.exe + infra/TimeMgrTest.cpp + "-n 1;--cpu-bind=cores" +) ################## # Kokkos test ################## -add_executable(testKokkos.exe infra/OmegaKokkosTest.cpp) - -target_link_libraries(testKokkos.exe ${OMEGA_LIB_NAME}) - -add_test(NAME KOKKOS_TEST COMMAND ./testKokkos.exe) +add_omega_test( + KOKKOS_TEST + testKokkos.exe + infra/OmegaKokkosTest.cpp + "-n 1;--cpu-bind=cores" +) ################## # test properties From 74a45a9845ab774605ae9cd9a956b1be38432dcf Mon Sep 17 00:00:00 2001 From: Youngsung Kim Date: Thu, 11 Apr 2024 15:44:36 -0400 Subject: [PATCH 09/11] fix formatting issue --- components/omega/src/CMakeLists.txt | 4 ++-- components/omega/test/CMakeLists.txt | 3 +-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/components/omega/src/CMakeLists.txt b/components/omega/src/CMakeLists.txt index b7a84f417a39..dfb2e61f1275 100644 --- a/components/omega/src/CMakeLists.txt +++ b/components/omega/src/CMakeLists.txt @@ -1,7 +1,7 @@ # build Omega -# create library flag target for LibOmega.so -add_library(OmegaLibFlags INTERFACE) +# create library flag target for LibOmega +add_library(OmegaLibFlags INTERFACE) target_include_directories( OmegaLibFlags diff --git a/components/omega/test/CMakeLists.txt b/components/omega/test/CMakeLists.txt index 66da19c7f915..c946df6a87a5 100644 --- a/components/omega/test/CMakeLists.txt +++ b/components/omega/test/CMakeLists.txt @@ -20,13 +20,12 @@ function(add_omega_test test_name exe_name source_files mpi_args) # Add the test command add_test( - NAME ${test_name} + NAME ${test_name} COMMAND ${MPI_EXEC} ${mpi_args} -- ./${exe_name} ) endfunction() - ################## # Data type test ################## From 348fc23dd321757cd52ddd83ee9b541920a7d787 Mon Sep 17 00:00:00 2001 From: Youngsung Kim Date: Thu, 11 Apr 2024 16:23:55 -0400 Subject: [PATCH 10/11] update mesh data download link --- components/omega/doc/devGuide/QuickStart.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/components/omega/doc/devGuide/QuickStart.md b/components/omega/doc/devGuide/QuickStart.md index 97ef115643d9..5b6d64aeee34 100644 --- a/components/omega/doc/devGuide/QuickStart.md +++ b/components/omega/doc/devGuide/QuickStart.md @@ -135,10 +135,10 @@ script that you can run to build Omega: Omega includes several unit tests that run through CTest. These need to be run on a compute node. Some tests also require a valid Omega mesh file called `test/OmegaMesh.nc`. An appropriate mesh file can be downloaded from -[mesh.230220.nc](https://web.lcrc.anl.gov/public/e3sm/polaris/ocean/polaris_cache/global_convergence/icos/cosine_bell/Icos480/mesh/mesh.230220.nc). +[mesh.230220.nc](https://web.lcrc.anl.gov/public/e3sm/polaris/ocean/omega_ctest/ocean.QU.240km.151209.nc). ```sh -wget https://web.lcrc.anl.gov/public/e3sm/polaris/ocean/polaris_cache/global_convergence/icos/cosine_bell/Icos480/mesh/mesh.230220.nc -mv mesh.230220.nc test/OmegaMesh.nc +wget https://web.lcrc.anl.gov/public/e3sm/polaris/ocean/omega_ctest/ocean.QU.240km.151209.nc +mv ocean.QU.240km.151209.nc test/OmegaMesh.nc ``` Then, run the tests: From 1d4b73d0769dbb4695aa9534a48af87f25dd497c Mon Sep 17 00:00:00 2001 From: Youngsung Kim Date: Fri, 12 Apr 2024 09:13:15 -0500 Subject: [PATCH 11/11] update a cmake function for unit testing --- components/omega/test/CMakeLists.txt | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/components/omega/test/CMakeLists.txt b/components/omega/test/CMakeLists.txt index c946df6a87a5..d5b446f51d41 100644 --- a/components/omega/test/CMakeLists.txt +++ b/components/omega/test/CMakeLists.txt @@ -19,10 +19,18 @@ function(add_omega_test test_name exe_name source_files mpi_args) ) # Add the test command - add_test( - NAME ${test_name} - COMMAND ${MPI_EXEC} ${mpi_args} -- ./${exe_name} - ) + if (mpi_args) + add_test( + NAME ${test_name} + COMMAND ${MPI_EXEC} ${mpi_args} -- ./${exe_name} + ) + + else() + add_test( + NAME ${test_name} + COMMAND ./${exe_name} + ) + endif() endfunction() @@ -133,7 +141,7 @@ add_omega_test( METADATA_TEST testMetadata.exe infra/MetadataTest.cpp - "-n 1;--cpu-bind=cores" + "" ) ################## @@ -155,7 +163,7 @@ add_omega_test( TIMEMGR_TEST testTimeMgr.exe infra/TimeMgrTest.cpp - "-n 1;--cpu-bind=cores" + "" ) ################## @@ -166,7 +174,7 @@ add_omega_test( KOKKOS_TEST testKokkos.exe infra/OmegaKokkosTest.cpp - "-n 1;--cpu-bind=cores" + "" ) ##################