-
Notifications
You must be signed in to change notification settings - Fork 868
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add CUDA/HIP implementations of reduction operators
The operators are generated from macros. Function pointers to kernel launch functions are stored inside the ompi_op_t as a pointer to a struct that is filled if accelerator support is available. The ompi_op* API is extended to include versions taking streams and device IDs to allow enqueuing operators on streams. The old functions map to the stream versions with a NULL stream. Signed-off-by: Joseph Schuchart <joseph.schuchart@stonybrook.edu>
- Loading branch information
Showing
24 changed files
with
8,708 additions
and
48 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,120 @@ | ||
dnl -*- autoconf -*- | ||
dnl | ||
dnl Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana | ||
dnl University Research and Technology | ||
dnl Corporation. All rights reserved. | ||
dnl Copyright (c) 2004-2005 The University of Tennessee and The University | ||
dnl of Tennessee Research Foundation. All rights | ||
dnl reserved. | ||
dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, | ||
dnl University of Stuttgart. All rights reserved. | ||
dnl Copyright (c) 2004-2005 The Regents of the University of California. | ||
dnl All rights reserved. | ||
dnl Copyright (c) 2006-2016 Cisco Systems, Inc. All rights reserved. | ||
dnl Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved. | ||
dnl Copyright (c) 2009 IBM Corporation. All rights reserved. | ||
dnl Copyright (c) 2009 Los Alamos National Security, LLC. All rights | ||
dnl reserved. | ||
dnl Copyright (c) 2009-2011 Oak Ridge National Labs. All rights reserved. | ||
dnl Copyright (c) 2011-2015 NVIDIA Corporation. All rights reserved. | ||
dnl Copyright (c) 2015 Research Organization for Information Science | ||
dnl and Technology (RIST). All rights reserved. | ||
dnl Copyright (c) 2022 Amazon.com, Inc. or its affiliates. All Rights reserved. | ||
dnl $COPYRIGHT$ | ||
dnl | ||
dnl Additional copyrights may follow | ||
dnl | ||
dnl $HEADER$ | ||
dnl | ||
|
||
|
||
# OPAL_CHECK_CUDART(prefix, [action-if-found], [action-if-not-found]) | ||
# -------------------------------------------------------- | ||
# check if CUDA runtime library support can be found. sets prefix_{CPPFLAGS, | ||
# LDFLAGS, LIBS} as needed and runs action-if-found if there is | ||
# support, otherwise executes action-if-not-found | ||
|
||
# | ||
# Check for CUDA support | ||
# | ||
AC_DEFUN([OPAL_CHECK_CUDART],[ | ||
OPAL_VAR_SCOPE_PUSH([cudart_save_CPPFLAGS cudart_save_LDFLAGS cudart_save_LIBS]) | ||
cudart_save_CPPFLAGS="$CPPFLAGS" | ||
cudart_save_LDFLAGS="$LDFLAGS" | ||
cudart_save_LIBS="$LIBS" | ||
# | ||
# Check to see if the user provided paths for CUDART | ||
# | ||
AC_ARG_WITH([cudart], | ||
[AS_HELP_STRING([--with-cudart=DIR], | ||
[Path to the CUDA runtime library and header files])]) | ||
AC_MSG_CHECKING([if --with-cudart is set]) | ||
AC_ARG_WITH([cudart-libdir], | ||
[AS_HELP_STRING([--with-cudart-libdir=DIR], | ||
[Search for CUDA runtime libraries in DIR])]) | ||
#################################### | ||
#### Check for CUDA runtime library | ||
#################################### | ||
AS_IF([test "x$with_cudart" != "xno" || test "x$with_cudart" = "x"], | ||
[opal_check_cudart_happy=no | ||
AC_MSG_RESULT([not set (--with-cudart=$with_cudart)])], | ||
[AS_IF([test ! -d "$with_cudart"], | ||
[AC_MSG_RESULT([not found]) | ||
AC_MSG_WARN([Directory $with_cudart not found])] | ||
[AS_IF([test "x`ls $with_cudart/include/cuda_runtime.h 2> /dev/null`" = "x"] | ||
[AC_MSG_RESULT([not found]) | ||
AC_MSG_WARN([Could not find cuda_runtime.h in $with_cudart/include])] | ||
[opal_check_cudart_happy=yes | ||
opal_cudart_incdir="$with_cudart/include"])])]) | ||
AS_IF([test "$opal_check_cudart_happy" = "no" && test "$with_cudart" != "no"], | ||
[AC_PATH_PROG([nvcc_bin], [nvcc], ["not-found"]) | ||
AS_IF([test "$nvcc_bin" = "not-found"], | ||
[AC_MSG_WARN([Could not find nvcc binary])], | ||
[nvcc_dirname=`AS_DIRNAME([$nvcc_bin])` | ||
with_cudart=$nvcc_dirname/../ | ||
opal_cudart_incdir=$nvcc_dirname/../include | ||
opal_check_cudart_happy=yes]) | ||
] | ||
[]) | ||
AS_IF([test x"$with_cudart_libdir" = "x"], | ||
[with_cudart_libdir=$with_cudart/lib64/] | ||
[]) | ||
AS_IF([test "$opal_check_cudart_happy" = "yes"], | ||
[OAC_CHECK_PACKAGE([cudart], | ||
[$1], | ||
[cuda_runtime.h], | ||
[cudart], | ||
[cudaMalloc], | ||
[opal_check_cudart_happy="yes"], | ||
[opal_check_cudart_happy="no"])], | ||
[]) | ||
AC_MSG_CHECKING([if have cuda runtime library support]) | ||
if test "$opal_check_cudart_happy" = "yes"; then | ||
AC_MSG_RESULT([yes (-I$opal_cudart_incdir)]) | ||
CUDART_SUPPORT=1 | ||
common_cudart_CPPFLAGS="-I$opal_cudart_incdir" | ||
AC_SUBST([common_cudart_CPPFLAGS]) | ||
else | ||
AC_MSG_RESULT([no]) | ||
CUDART_SUPPORT=0 | ||
fi | ||
OPAL_SUMMARY_ADD([Accelerators], [CUDART support], [], [$opal_check_cudart_happy]) | ||
AM_CONDITIONAL([OPAL_cudart_support], [test "x$CUDART_SUPPORT" = "x1"]) | ||
AC_DEFINE_UNQUOTED([OPAL_CUDART_SUPPORT],$CUDART_SUPPORT, | ||
[Whether we have cuda runtime library support]) | ||
CPPFLAGS=${cudart_save_CPPFLAGS} | ||
LDFLAGS=${cudart_save_LDFLAGS} | ||
LIBS=${cudart_save_LIBS} | ||
OPAL_VAR_SCOPE_POP | ||
])dnl |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,84 @@ | ||
# | ||
# Copyright (c) 2023 The University of Tennessee and The University | ||
# of Tennessee Research Foundation. All rights | ||
# reserved. | ||
# $COPYRIGHT$ | ||
# | ||
# Additional copyrights may follow | ||
# | ||
# $HEADER$ | ||
# | ||
|
||
# This component provides support for offloading reduce ops to CUDA devices. | ||
# | ||
# See https://github.com/open-mpi/ompi/wiki/devel-CreateComponent | ||
# for more details on how to make Open MPI components. | ||
|
||
# First, list all .h and .c sources. It is necessary to list all .h | ||
# files so that they will be picked up in the distribution tarball. | ||
|
||
AM_CPPFLAGS = $(op_cuda_CPPFLAGS) $(op_cudart_CPPFLAGS) | ||
|
||
dist_ompidata_DATA = help-ompi-mca-op-cuda.txt | ||
|
||
sources = op_cuda_component.c op_cuda.h op_cuda_functions.c op_cuda_impl.h | ||
#sources_extended = op_cuda_functions.cu | ||
cu_sources = op_cuda_impl.cu | ||
|
||
NVCC = nvcc -g | ||
NVCCFLAGS= --std c++17 --gpu-architecture=compute_52 | ||
|
||
.cu.l$(OBJEXT): | ||
$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ | ||
$(LIBTOOLFLAGS) --mode=compile $(NVCC) -prefer-non-pic $(NVCCFLAGS) -Wc,-Xcompiler,-fPIC,-g -c $< | ||
|
||
# -o $($@.o:.lo) | ||
|
||
# Open MPI components can be compiled two ways: | ||
# | ||
# 1. As a standalone dynamic shared object (DSO), sometimes called a | ||
# dynamically loadable library (DLL). | ||
# | ||
# 2. As a static library that is slurped up into the upper-level | ||
# libmpi library (regardless of whether libmpi is a static or dynamic | ||
# library). This is called a "Libtool convenience library". | ||
# | ||
# The component needs to create an output library in this top-level | ||
# component directory, and named either mca_<type>_<name>.la (for DSO | ||
# builds) or libmca_<type>_<name>.la (for static builds). The OMPI | ||
# build system will have set the | ||
# MCA_BUILD_ompi_<framework>_<component>_DSO AM_CONDITIONAL to indicate | ||
# which way this component should be built. | ||
|
||
if MCA_BUILD_ompi_op_cuda_DSO | ||
component_install = mca_op_cuda.la | ||
else | ||
component_install = | ||
component_noinst = libmca_op_cuda.la | ||
endif | ||
|
||
# Specific information for DSO builds. | ||
# | ||
# The DSO should install itself in $(ompilibdir) (by default, | ||
# $prefix/lib/openmpi). | ||
|
||
mcacomponentdir = $(ompilibdir) | ||
mcacomponent_LTLIBRARIES = $(component_install) | ||
mca_op_cuda_la_SOURCES = $(sources) | ||
mca_op_cuda_la_LIBADD = $(cu_sources:.cu=.lo) | ||
mca_op_cuda_la_LDFLAGS = -module -avoid-version $(top_builddir)/ompi/lib@OMPI_LIBMPI_NAME@.la \ | ||
$(op_cuda_LIBS) $(op_cudart_LDFLAGS) $(op_cudart_LIBS) | ||
EXTRA_mca_op_cuda_la_SOURCES = $(cu_sources) | ||
|
||
# Specific information for static builds. | ||
# | ||
# Note that we *must* "noinst"; the upper-layer Makefile.am's will | ||
# slurp in the resulting .la library into libmpi. | ||
|
||
noinst_LTLIBRARIES = $(component_noinst) | ||
libmca_op_cuda_la_SOURCES = $(sources) | ||
libmca_op_cuda_la_LIBADD = $(cu_sources:.cu=.lo) | ||
libmca_op_cuda_la_LDFLAGS = -module -avoid-version\ | ||
$(op_cuda_LIBS) $(op_cudart_LDFLAGS) $(op_cudart_LIBS) | ||
EXTRA_libmca_op_cuda_la_SOURCES = $(cu_sources) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
# -*- shell-script -*- | ||
# | ||
# Copyright (c) 2011-2013 NVIDIA Corporation. All rights reserved. | ||
# Copyright (c) 2023 The University of Tennessee and The University | ||
# of Tennessee Research Foundation. All rights | ||
# reserved. | ||
# Copyright (c) 2022 Amazon.com, Inc. or its affiliates. | ||
# All Rights reserved. | ||
# $COPYRIGHT$ | ||
# | ||
# Additional copyrights may follow | ||
# | ||
# $HEADER$ | ||
# | ||
|
||
# | ||
# If CUDA support was requested, then build the CUDA support library. | ||
# This code checks makes sure the check was done earlier by the | ||
# opal_check_cuda.m4 code. It also copies the flags and libs under | ||
# opal_cuda_CPPFLAGS, opal_cuda_LDFLAGS, and opal_cuda_LIBS | ||
|
||
AC_DEFUN([MCA_ompi_op_cuda_CONFIG],[ | ||
|
||
AC_CONFIG_FILES([ompi/mca/op/cuda/Makefile]) | ||
|
||
OPAL_CHECK_CUDA([op_cuda]) | ||
OPAL_CHECK_CUDART([op_cudart]) | ||
|
||
AS_IF([test "x$CUDA_SUPPORT" = "x1"], | ||
[$1], | ||
[$2]) | ||
|
||
AC_SUBST([op_cuda_CPPFLAGS]) | ||
AC_SUBST([op_cuda_LDFLAGS]) | ||
AC_SUBST([op_cuda_LIBS]) | ||
|
||
AC_SUBST([op_cudart_CPPFLAGS]) | ||
AC_SUBST([op_cudart_LDFLAGS]) | ||
AC_SUBST([op_cudart_LIBS]) | ||
|
||
])dnl |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
# -*- text -*- | ||
# | ||
# Copyright (c) 2023 The University of Tennessee and The University | ||
# of Tennessee Research Foundation. All rights | ||
# reserved. | ||
# $COPYRIGHT$ | ||
# | ||
# Additional copyrights may follow | ||
# | ||
# $HEADER$ | ||
# | ||
# This is the US/English help file for Open MPI's CUDA operator component | ||
# | ||
[CUDA call failed] | ||
"CUDA call %s failed: %s: %s\n" |
Oops, something went wrong.