Skip to content

Commit

Permalink
Applies reviews for PR E3SM-Project#74
Browse files Browse the repository at this point in the history
* adds OMP_PROC_BIND and OMP_PLACES env. variables
* adds -cpu-bind=cores srun flag to fix a performance bug on PM-GPU as a temporary solution
* renamed createHostCopy and createDeviceCopy function names to createHostMirrorCopy and createDeviceMirrorCopy each
* streamlined ctest script
* undef MAKE_OMEGA_VIEW_DIMS
* deletes else branch for creatXXXCopy functions
  • Loading branch information
grnydawn committed Apr 5, 2024
1 parent 4ec4159 commit 67023d3
Show file tree
Hide file tree
Showing 12 changed files with 161 additions and 435 deletions.
2 changes: 2 additions & 0 deletions components/omega/OmegaBuild.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -459,6 +459,8 @@ macro(update_variables)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DOMEGA_TARGET_DEVICE")

elseif(OMEGA_ARCH STREQUAL "OPENMP")
set(ENV{OMP_PROC_BIND} "spread")
set(ENV{OMP_PLACES} "threads")
option(Kokkos_ENABLE_OPENMP "" ON)

elseif(OMEGA_ARCH STREQUAL "THREADS")
Expand Down
6 changes: 6 additions & 0 deletions components/omega/create_scripts.py
Original file line number Diff line number Diff line change
Expand Up @@ -357,6 +357,12 @@ def generate_scripts(self, outvar):
if "OMP_NUM_THREADS" not in self.__OMEGA_SCRIPT_EXPORTS__:
f.write("export OMP_NUM_THREADS=\"1\"\n")

if "OMP_PROC_BIND" not in self.__OMEGA_SCRIPT_EXPORTS__:
f.write("export OMP_PROC_BIND=\"spread\"\n")

if "OMP_PLACES" not in self.__OMEGA_SCRIPT_EXPORTS__:
f.write("export OMP_PLACES=\"threads\"\n")

with open(omega_build, "w") as f:
f.write("#!/usr/bin/env bash\n\n")

Expand Down
2 changes: 1 addition & 1 deletion components/omega/doc/design/HorzMeshClass.md
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ The compute method will be a private method called by the constructor. It will b
This method will be repsonsible for creating the device copies of the required mesh information on the host. It will be a private method called by the constructor.

```c++
AreaCell = OMEGA::createDeviceCopy(AreaCellH)
AreaCell = OMEGA::createDeviceMirrorCopy(AreaCellH)

```

Expand Down
2 changes: 1 addition & 1 deletion components/omega/doc/devGuide/DataTypes.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ As an example, we can define and allocate a device and host array using:
Alternatively, you can use the copy functions to create a host copy
from the device or vice versa.
```c++
auto TemperatureHost = OMEGA::createHostCopy(Temperature);
auto TemperatureHost = OMEGA::createHostMirrorCopy(Temperature);
```
Finally, the arrays can be deallocated explicity using the class
deallocate method, eg `Temperature.deallocate();` or if they are local
Expand Down
2 changes: 1 addition & 1 deletion components/omega/doc/devGuide/HorzMesh.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ For member variables that are host arrays, variable names are appended with an
`H`. Array variable names not ending in `H` are device arrays. The copy from
host to device array is performed in the constructor via:
```c++
AreaCell = OMEGA::createDeviceCopy(AreaCellH);
AreaCell = OMEGA::createDeviceMirrorCopy(AreaCellH);
```

The device arrays are deallocated by the `HorzMesh::clear()` method, which is
Expand Down
42 changes: 27 additions & 15 deletions components/omega/src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,34 +3,46 @@
# Add source files for the library
file(GLOB _LIBSRC_FILES infra/*.cpp base/*.cpp ocn/*.cpp)

add_library(${OMEGA_LIB_NAME} ${_LIBSRC_FILES})
add_library(${OMEGA_LIB_NAME} ${_LIBSRC_FILES})

target_include_directories(
target_include_directories(
${OMEGA_LIB_NAME}
PRIVATE
PUBLIC
${OMEGA_SOURCE_DIR}/src/base
${OMEGA_SOURCE_DIR}/src/infra
${OMEGA_SOURCE_DIR}/src/ocn
${Parmetis_INCLUDE_DIRS}
)
)

target_compile_definitions(
target_compile_definitions(
${OMEGA_LIB_NAME}
PUBLIC
OMEGA_ARCH=${OMEGA_ARCH}
)
)

# add linker options
target_link_options(
${OMEGA_LIB_NAME}
PRIVATE
PUBLIC
${OMEGA_LINK_OPTIONS}
)

target_link_libraries(${OMEGA_LIB_NAME} Kokkos::kokkos spdlog pioc yaml-cpp parmetis metis)
target_link_libraries(
${OMEGA_LIB_NAME}
PUBLIC
Kokkos::kokkos
spdlog
pioc
yaml-cpp
parmetis
metis
)

if(GKlib_FOUND)
target_link_libraries(${OMEGA_LIB_NAME} gklib)
target_link_libraries(
${OMEGA_LIB_NAME}
PUBLIC
gklib
)
endif()

# build Omega executable
Expand All @@ -50,11 +62,11 @@ if(OMEGA_BUILD_EXECUTABLE)
# "-L${CMAKE_CURRENT_SOURCE_DIR}/infra"
# )

target_compile_definitions(
${OMEGA_EXE_NAME}
PUBLIC
OMEGA_ARCH=${OMEGA_ARCH}
)
# target_compile_definitions(
# ${OMEGA_EXE_NAME}
# PUBLIC
# OMEGA_ARCH=${OMEGA_ARCH}
# )

target_link_libraries(${OMEGA_EXE_NAME} ${OMEGA_LIB_NAME})

Expand Down
6 changes: 5 additions & 1 deletion components/omega/src/base/DataTypes.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ using R8 = double; ///< alias for 64-bit (double prec) real
#ifdef SINGLE_PRECISION
using Real = float;
#else
using Real = double;
using Real = double;
#endif

// user-defined literal for generic reals
Expand Down Expand Up @@ -139,6 +139,10 @@ MAKE_OMEGA_VIEW_TYPES(Array, View, MemLayout, MemSpace)

// Aliases for Kokkos host arrays of various dimensions and types
MAKE_OMEGA_VIEW_TYPES(HostArray, View, HostMemLayout, HostMemSpace)

#undef MAKE_OMEGA_VIEW_TYPES
#undef MAKE_OMEGA_VIEW_DIMS

} // end namespace OMEGA

//===----------------------------------------------------------------------===//
Expand Down
66 changes: 33 additions & 33 deletions components/omega/src/base/Decomp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,7 @@ int readMesh(const int MeshFileID, // file ID for open mesh file
OnVertexOffset[Vrtx * VertexDegree + Cell] =
VertexGlob * VertexDegree + Cell;
} // end loop VertexDegree
} // end loop NVerticesLocal
} // end loop NVerticesLocal

// Create the parallel IO decompositions
IO::Rearranger Rearr = IO::RearrBox;
Expand Down Expand Up @@ -629,30 +629,30 @@ Decomp::Decomp(

// Create device copies of all arrays

NCellsHalo = createDeviceCopy(NCellsHaloH);
CellID = createDeviceCopy(CellIDH);
CellLoc = createDeviceCopy(CellLocH);
NCellsHalo = createDeviceMirrorCopy(NCellsHaloH);
CellID = createDeviceMirrorCopy(CellIDH);
CellLoc = createDeviceMirrorCopy(CellLocH);

NEdgesHalo = createDeviceCopy(NEdgesHaloH);
EdgeID = createDeviceCopy(EdgeIDH);
EdgeLoc = createDeviceCopy(EdgeLocH);
NEdgesHalo = createDeviceMirrorCopy(NEdgesHaloH);
EdgeID = createDeviceMirrorCopy(EdgeIDH);
EdgeLoc = createDeviceMirrorCopy(EdgeLocH);

NVerticesHalo = createDeviceCopy(NVerticesHaloH);
VertexID = createDeviceCopy(VertexIDH);
VertexLoc = createDeviceCopy(VertexLocH);
NVerticesHalo = createDeviceMirrorCopy(NVerticesHaloH);
VertexID = createDeviceMirrorCopy(VertexIDH);
VertexLoc = createDeviceMirrorCopy(VertexLocH);

CellsOnCell = createDeviceCopy(CellsOnCellH);
EdgesOnCell = createDeviceCopy(EdgesOnCellH);
VerticesOnCell = createDeviceCopy(VerticesOnCellH);
NEdgesOnCell = createDeviceCopy(NEdgesOnCellH);
CellsOnCell = createDeviceMirrorCopy(CellsOnCellH);
EdgesOnCell = createDeviceMirrorCopy(EdgesOnCellH);
VerticesOnCell = createDeviceMirrorCopy(VerticesOnCellH);
NEdgesOnCell = createDeviceMirrorCopy(NEdgesOnCellH);

CellsOnEdge = createDeviceCopy(CellsOnEdgeH);
EdgesOnEdge = createDeviceCopy(EdgesOnEdgeH);
VerticesOnEdge = createDeviceCopy(VerticesOnEdgeH);
NEdgesOnEdge = createDeviceCopy(NEdgesOnEdgeH);
CellsOnEdge = createDeviceMirrorCopy(CellsOnEdgeH);
EdgesOnEdge = createDeviceMirrorCopy(EdgesOnEdgeH);
VerticesOnEdge = createDeviceMirrorCopy(VerticesOnEdgeH);
NEdgesOnEdge = createDeviceMirrorCopy(NEdgesOnEdgeH);

CellsOnVertex = createDeviceCopy(CellsOnVertexH);
EdgesOnVertex = createDeviceCopy(EdgesOnVertexH);
CellsOnVertex = createDeviceMirrorCopy(CellsOnVertexH);
EdgesOnVertex = createDeviceMirrorCopy(EdgesOnVertexH);

// Assign this as the default decomposition
AllDecomps.emplace(Name, *this);
Expand Down Expand Up @@ -765,7 +765,7 @@ int Decomp::partCellsKWay(
for (int n = 0; n < CellsOnCellSize; ++n) {
CellsOnCellBuf[n] = CellsOnCellInit[n];
} // end loop CellsOnCell
} // end if this is MyTask
} // end if this is MyTask
Err = MPI_Bcast(&CellsOnCellBuf[0], CellsOnCellSize, MPI_INT32_T, Task,
Comm);
if (Err != 0) {
Expand Down Expand Up @@ -794,8 +794,8 @@ int Decomp::partCellsKWay(
++Add; // increment address counter
}
}
} // end cell loop for buffer
} // end task loop
} // end cell loop for buffer
} // end task loop
AdjAdd[NCellsGlobal] = Add; // Add the ending address

// Set up remaining partitioning variables
Expand Down Expand Up @@ -878,7 +878,7 @@ int Decomp::partCellsKWay(
CellLocTmp[2 * LocalAdd] = TaskLoc;
CellLocTmp[2 * LocalAdd + 1] = LocalAdd;
} // end if my task
} // end loop over all cells
} // end loop over all cells

// Find and add the halo cells to the cell list. Here we use the
// adjacency array to find the active neighbor cells and store if they
Expand Down Expand Up @@ -917,7 +917,7 @@ int Decomp::partCellsKWay(
HaloList.insert(NbrID);
CellsInList.insert(NbrID);
} // end search for existing entry
} // end if not on task
} // end if not on task

} // end loop over neighbors

Expand Down Expand Up @@ -1148,8 +1148,8 @@ int Decomp::partEdges(
++HaloCount;
EdgesAll.erase(EdgeGlob);
} // end if valid edge
} // end loop over cell edges
} // end cell loop
} // end loop over cell edges
} // end cell loop
// reset address range for next halo and set NEdgesHalo
CellStart = CellEnd;
if ((Halo + 1) < HaloWidth)
Expand Down Expand Up @@ -1553,8 +1553,8 @@ int Decomp::rearrangeCellArrays(
}
NEdgesOnCellTmp(LocCell) = EdgeCount;
} // end if local cell
} // end loop over chunk of global cells
} // end loop over MPI tasks
} // end loop over chunk of global cells
} // end loop over MPI tasks

// Copy to final location on host - wait to create device copies until
// the entries are translated to local addresses rather than global IDs
Expand Down Expand Up @@ -1689,8 +1689,8 @@ int Decomp::rearrangeEdgeArrays(
}
NEdgesOnEdgeTmp(LocEdge) = EdgeCount;
} // end if local cell
} // end loop over chunk of global cells
} // end loop over MPI tasks
} // end loop over chunk of global cells
} // end loop over MPI tasks

// Copy to final location on host - wait to create device copies until
// the entries are translated to local addresses rather than global IDs
Expand Down Expand Up @@ -1798,8 +1798,8 @@ int Decomp::rearrangeVertexArrays(
++BufAdd;
}
} // end if local cell
} // end loop over chunk of global cells
} // end loop over MPI tasks
} // end loop over chunk of global cells
} // end loop over MPI tasks

// Copy to final location on host - wait to create device copies until
// the entries are translated to local addresses rather than global IDs
Expand Down
14 changes: 2 additions & 12 deletions components/omega/src/infra/OmegaKokkos.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,28 +18,18 @@ namespace OMEGA {
using ExecSpace = MemSpace::execution_space;
using HostExecSpace = HostMemSpace::execution_space;

#ifdef OMEGA_TARGET_DEVICE

template <typename V>
auto createHostCopy(const V &view)
auto createHostMirrorCopy(const V &view)
-> Kokkos::View<typename V::data_type, HostMemLayout, HostMemSpace> {
return Kokkos::create_mirror_view_and_copy(HostExecSpace(), view);
}

template <typename V>
auto createDeviceCopy(const V &view)
auto createDeviceMirrorCopy(const V &view)
-> Kokkos::View<typename V::data_type, MemLayout, MemSpace> {
return Kokkos::create_mirror_view_and_copy(ExecSpace(), view);
}

#else

template <typename V> V createHostCopy(const V &view) { return view; }

template <typename V> V createDeviceCopy(const V &view) { return view; }

#endif

// function alias to follow Camel Naming Convention
template <typename D, typename S> void deepCopy(D &dst, const S &src) {
Kokkos::deep_copy(dst, src);
Expand Down
24 changes: 12 additions & 12 deletions components/omega/src/ocn/HorzMesh.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -511,7 +511,7 @@ void HorzMesh::readCoriolis() {
// Compute the sign of edge contributions to a cell/vertex for each edge
void HorzMesh::computeEdgeSign() {

auto EdgeSignOnCell = Array2DR8("EdgeSignOnCell", NCellsSize, MaxEdges);
EdgeSignOnCell = Array2DR8("EdgeSignOnCell", NCellsSize, MaxEdges);

OMEGA_SCOPE(o_NEdgesOnCell, NEdgesOnCell);
OMEGA_SCOPE(o_EdgesOnCell, EdgesOnCell);
Expand All @@ -532,7 +532,7 @@ void HorzMesh::computeEdgeSign() {
}
});

EdgeSignOnCellH = createHostCopy(EdgeSignOnCell);
EdgeSignOnCellH = createHostMirrorCopy(EdgeSignOnCell);

EdgeSignOnVertex =
Array2DR8("EdgeSignOnVertex", NVerticesSize, VertexDegree);
Expand All @@ -556,22 +556,22 @@ void HorzMesh::computeEdgeSign() {
}
});

EdgeSignOnVertexH = createHostCopy(EdgeSignOnVertex);
EdgeSignOnVertexH = createHostMirrorCopy(EdgeSignOnVertex);
} // end computeEdgeSign

//------------------------------------------------------------------------------
// Perform copy to device for mesh variables
void HorzMesh::copyToDevice() {

AreaCell = createDeviceCopy(AreaCellH);
AreaTriangle = createDeviceCopy(AreaTriangleH);
KiteAreasOnVertex = createDeviceCopy(KiteAreasOnVertexH);
DcEdge = createDeviceCopy(DcEdgeH);
DvEdge = createDeviceCopy(DvEdgeH);
AngleEdge = createDeviceCopy(AngleEdgeH);
WeightsOnEdge = createDeviceCopy(WeightsOnEdgeH);
FVertex = createDeviceCopy(FVertexH);
BottomDepth = createDeviceCopy(BottomDepthH);
AreaCell = createDeviceMirrorCopy(AreaCellH);
AreaTriangle = createDeviceMirrorCopy(AreaTriangleH);
KiteAreasOnVertex = createDeviceMirrorCopy(KiteAreasOnVertexH);
DcEdge = createDeviceMirrorCopy(DcEdgeH);
DvEdge = createDeviceMirrorCopy(DvEdgeH);
AngleEdge = createDeviceMirrorCopy(AngleEdgeH);
WeightsOnEdge = createDeviceMirrorCopy(WeightsOnEdgeH);
FVertex = createDeviceMirrorCopy(FVertexH);
BottomDepth = createDeviceMirrorCopy(BottomDepthH);

} // end copyToDevice

Expand Down
Loading

0 comments on commit 67023d3

Please sign in to comment.