Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature: Add basic NCCL communication space backend #128

Draft
wants to merge 37 commits into
base: develop
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
838f74f
feat: add NCCL communication space
dssgabriel Nov 14, 2024
710a488
feat: add NCCL datatypes
dssgabriel Nov 14, 2024
59f9259
feat: add NCCL handle & request specializations
dssgabriel Nov 14, 2024
f815922
feat: add NCCL `send`/`recv` functions
dssgabriel Nov 14, 2024
e84fda8
feat: add NCCL `reduce`
dssgabriel Nov 14, 2024
37eefe3
feat: add NCCL files to CMake
dssgabriel Nov 14, 2024
5758e42
feat(nccl): correctly find and link NCCL
dssgabriel Nov 15, 2024
6b13a6c
hotfix(nccl): make it so that MPI must be enabled to use NCCL
dssgabriel Nov 15, 2024
32ae23b
feat(nccl): add class members
dssgabriel Nov 15, 2024
4ca9e13
fix(nccl): fix type name for `ncclDataType_t` and add missing u8 conv…
dssgabriel Nov 15, 2024
c99bb49
feat(nccl): add explicit ctors for NCCL reqs
dssgabriel Nov 15, 2024
3d3a96d
feat: add high-level reduction operators
dssgabriel Nov 15, 2024
03b6d61
feat(nccl): add reduction operator conversion
dssgabriel Nov 15, 2024
b36aa46
feat(nccl): update NCCL packer to use `KokkosComm_contiguous`
dssgabriel Nov 15, 2024
17abec6
fix(nccl): fix P2P & reduce NCCL functions
dssgabriel Nov 15, 2024
30563af
feat(nccl): add `allgather` support
dssgabriel Nov 15, 2024
100861e
feat(nccl): add high-level KokkosComm functions using NCCL backend
dssgabriel Nov 15, 2024
5fa71bd
chore: ignore clang cache and compile commands
dssgabriel Nov 15, 2024
788be5b
feat: add `constexpr` to traits everywhere possible
dssgabriel Nov 15, 2024
c40247e
chore: format
dssgabriel Nov 15, 2024
856b9fa
fix(nccl): move `Nccl` members to `Handle<Nccl>` specialization
dssgabriel Nov 19, 2024
3b52ff9
refactor(nccl): rename `get_inner` as `comm`
dssgabriel Nov 19, 2024
b7bd4b1
refactor: using a concept for defining reduction operators
dssgabriel Nov 19, 2024
a1d2201
refactor(nccl)!: enabling NCCL also forward-declares MPI
dssgabriel Nov 19, 2024
c1fa898
refactor: use template specializations for `Send`/`Recv` w/ NCCL
dssgabriel Nov 19, 2024
93dbbde
feat: add `reduce` & `allgather` available as experimental functions
dssgabriel Nov 19, 2024
9890159
refactor(nccl): use template specialization for `reduce`/`allgather`
dssgabriel Nov 19, 2024
1e73707
chore: format
dssgabriel Nov 19, 2024
eb1e6b1
fix(cmake): add missing files to target sources
dssgabriel Nov 19, 2024
9c185af
fix(nccl): correctly call collective implementations with `execute`
dssgabriel Dec 16, 2024
267077a
feat(nccl): add collective functions not needing `Handle` object
dssgabriel Dec 16, 2024
11789c7
Add a NCCL smoketest
cwpearson Dec 18, 2024
f3ba011
tests: restrict MPI-specific stuff to when MPI is enabled
cwpearson Dec 18, 2024
9ba1666
add KOKKOSCOMM_ENABLE_NCCL to config
cwpearson Dec 18, 2024
c42dfb4
Always compile general unit tests
cwpearson Dec 18, 2024
de95394
nccl: fix is_communication_space scoping issue
cwpearson Dec 18, 2024
6b25163
nccl: ncclDatatype_t -> ncclDataType_t
cwpearson Dec 18, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,6 @@ Testing
kokkos
docs/_build
.python-version
.venv
.venv
.cache
compile_commands.json
8 changes: 6 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,7 @@ cmake_minimum_required(VERSION 3.23)

project(
KokkosComm
LANGUAGES
CXX
LANGUAGES CXX
VERSION 0.2.0
DESCRIPTION "Experimental MPI interfaces (and more!) for the Kokkos C++ Performance Portability Programming ecosystem"
HOMEPAGE_URL "https://kokkos.org/kokkos-comm/"
Expand All @@ -32,16 +31,21 @@ project(
option(KokkosComm_ENABLE_PERFTESTS "Build KokkosComm perf tests" OFF)
option(KokkosComm_ENABLE_TESTS "Build KokkosComm tests" OFF)
option(KokkosComm_ENABLE_MPI "Build KokkosComm with MPI transport" ON)
option(KokkosComm_ENABLE_NCCL "Build KokkosComm with NCCL transport" OFF)

# Resolve options
set(KOKKOSCOMM_ENABLE_PERFTESTS ${KokkosComm_ENABLE_PERFTESTS} CACHE BOOL "" FORCE)
set(KOKKOSCOMM_ENABLE_TESTS ${KokkosComm_ENABLE_TESTS} CACHE BOOL "" FORCE)
set(KOKKOSCOMM_ENABLE_MPI ${KokkosComm_ENABLE_MPI} CACHE BOOL "" FORCE)
set(KOKKOSCOMM_ENABLE_NCCL ${KokkosComm_ENABLE_NCCL} CACHE BOOL "" FORCE)

find_package(Kokkos REQUIRED)
if(KOKKOSCOMM_ENABLE_MPI)
find_package(MPI REQUIRED)
endif()
if(KOKKOSCOMM_ENABLE_NCCL)
find_package(NCCL REQUIRED)
endif()

add_subdirectory(src)
if(KOKKOSCOMM_ENABLE_TESTS)
Expand Down
99 changes: 99 additions & 0 deletions cmake/FindNCCL.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
# Find the NCCL libraries.
# Copied from pytorch: https://github.com/pytorch/pytorch/blob/main/cmake/Modules/FindNCCL.cmake
#
# The following variables are optionally searched for defaults
# NCCL_ROOT: Base directory where all NCCL components are found
# NCCL_INCLUDE_DIR: Directory where NCCL header is found
# NCCL_LIB_DIR: Directory where NCCL library is found
#
# The following are set after configuration is done:
# NCCL_FOUND
# NCCL_INCLUDE_DIRS
# NCCL_LIBRARIES
#
# The path hints include CUDA_TOOLKIT_ROOT_DIR seeing as some folks
# install NCCL in the same location as the CUDA toolkit.
# See https://github.com/caffe2/caffe2/issues/1601

set(NCCL_INCLUDE_DIR $ENV{NCCL_INCLUDE_DIR} CACHE PATH "Folder contains NVIDIA NCCL headers")
set(NCCL_LIB_DIR $ENV{NCCL_LIB_DIR} CACHE PATH "Folder contains NVIDIA NCCL libraries")
set(NCCL_VERSION $ENV{NCCL_VERSION} CACHE STRING "Version of NCCL to build with")

if($ENV{NCCL_ROOT_DIR})
message(WARNING "NCCL_ROOT_DIR is deprecated. Please set NCCL_ROOT instead.")
endif()
list(APPEND NCCL_ROOT $ENV{NCCL_ROOT_DIR} ${CUDA_TOOLKIT_ROOT_DIR})
# Compatible layer for CMake <3.12. NCCL_ROOT will be accounted in for searching paths and libraries for CMake >=3.12.
list(APPEND CMAKE_PREFIX_PATH ${NCCL_ROOT})

find_path(NCCL_INCLUDE_DIRS NAMES nccl.h HINTS ${NCCL_INCLUDE_DIR})

if(USE_STATIC_NCCL)
message(STATUS "USE_STATIC_NCCL is set. Linking with static NCCL library.")
set(NCCL_LIBNAME "nccl_static")
if(NCCL_VERSION) # Prefer the versioned library if a specific NCCL version is specified
set(CMAKE_FIND_LIBRARY_SUFFIXES ".a.${NCCL_VERSION}" ${CMAKE_FIND_LIBRARY_SUFFIXES})
endif()
else()
set(NCCL_LIBNAME "nccl")
if(NCCL_VERSION) # Prefer the versioned library if a specific NCCL version is specified
set(CMAKE_FIND_LIBRARY_SUFFIXES ".so.${NCCL_VERSION}" ${CMAKE_FIND_LIBRARY_SUFFIXES})
endif()
endif()

find_library(NCCL_LIBRARIES NAMES ${NCCL_LIBNAME} HINTS ${NCCL_LIB_DIR})

include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(NCCL DEFAULT_MSG NCCL_INCLUDE_DIRS NCCL_LIBRARIES)

if(NCCL_FOUND) # obtaining NCCL version and some sanity checks
set(NCCL_HEADER_FILE "${NCCL_INCLUDE_DIRS}/nccl.h")
message(STATUS "Determining NCCL version from ${NCCL_HEADER_FILE}...")
set(OLD_CMAKE_REQUIRED_INCLUDES ${CMAKE_REQUIRED_INCLUDES})
list(APPEND CMAKE_REQUIRED_INCLUDES ${NCCL_INCLUDE_DIRS})
include(CheckCXXSymbolExists)
check_cxx_symbol_exists(NCCL_VERSION_CODE nccl.h NCCL_VERSION_DEFINED)

if(NCCL_VERSION_DEFINED)
set(file "${PROJECT_BINARY_DIR}/detect_nccl_version.cc")
file(
WRITE
${file}
"
#include <iostream>
#include <nccl.h>
int main()
{
std::cout << NCCL_MAJOR << '.' << NCCL_MINOR << '.' << NCCL_PATCH << std::endl;

int x;
ncclGetVersion(&x);
return x == NCCL_VERSION_CODE;
}
"
)
try_run(
NCCL_VERSION_MATCHED
compile_result
${PROJECT_BINARY_DIR}
${file}
RUN_OUTPUT_VARIABLE NCCL_VERSION_FROM_HEADER
CMAKE_FLAGS "-DINCLUDE_DIRECTORIES=${NCCL_INCLUDE_DIRS}"
LINK_LIBRARIES ${NCCL_LIBRARIES}
)
if(NOT NCCL_VERSION_MATCHED)
message(
FATAL_ERROR
"Found NCCL header version and library version do not match! \
(include: ${NCCL_INCLUDE_DIRS}, library: ${NCCL_LIBRARIES}) Please set NCCL_INCLUDE_DIR and NCCL_LIB_DIR manually."
)
endif()
message(STATUS "NCCL version: ${NCCL_VERSION_FROM_HEADER}")
else()
message(STATUS "NCCL version < 2.3.5-5")
endif()
set(CMAKE_REQUIRED_INCLUDES ${OLD_CMAKE_REQUIRED_INCLUDES})

message(STATUS "Found NCCL (include: ${NCCL_INCLUDE_DIRS}, library: ${NCCL_LIBRARIES})")
mark_as_advanced(NCCL_ROOT_DIR NCCL_INCLUDE_DIRS NCCL_LIBRARIES)
endif()
1 change: 1 addition & 0 deletions cmake/KokkosComm_config.hpp.in
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,4 @@
#define KOKKOSCOMM_VERSION_PATCH @KOKKOSCOMM_VERSION_PATCH@

#cmakedefine KOKKOSCOMM_ENABLE_MPI
#cmakedefine KOKKOSCOMM_ENABLE_NCCL
64 changes: 56 additions & 8 deletions src/KokkosComm/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,6 @@
#
#@HEADER

set(TARGET_LIBRARY kokkoscomm)

add_library(KokkosComm INTERFACE)
add_library(KokkosComm::KokkosComm ALIAS KokkosComm)

Expand All @@ -26,7 +24,7 @@ target_sources(
FILE_SET kokkoscomm_public_headers
TYPE HEADERS
BASE_DIRS ${PROJECT_SOURCE_DIR}/src
FILES KokkosComm.hpp collective.hpp concepts.hpp fwd.hpp point_to_point.hpp traits.hpp
FILES KokkosComm.hpp collective.hpp concepts.hpp fwd.hpp point_to_point.hpp traits.hpp reduction_op.hpp
)

# Implementation detail headers
Expand All @@ -36,7 +34,7 @@ target_sources(
FILE_SET kokkoscomm_impl_headers
TYPE HEADERS
BASE_DIRS ${PROJECT_SOURCE_DIR}/src
FILES impl/KokkosComm_contiguous.hpp
FILES impl/KokkosComm_contiguous.hpp impl/send.hpp impl/recv.hpp impl/allgather.hpp impl/reduce.hpp
)

# Configuration header
Expand Down Expand Up @@ -83,14 +81,39 @@ if(KOKKOSCOMM_ENABLE_MPI)
)
endif()

if(KOKKOSCOMM_ENABLE_NCCL)
# Public NCCL headers
target_sources(
KokkosComm
INTERFACE
FILE_SET kokkoscomm_nccl_headers
TYPE HEADERS
BASE_DIRS ${PROJECT_SOURCE_DIR}/src
FILES nccl/nccl.hpp nccl/handle.hpp nccl/req.hpp nccl/send.hpp nccl/recv.hpp nccl/allgather.hpp nccl/reduce.hpp
)

# Implementation detail NCCL headers
target_sources(
KokkosComm
INTERFACE
FILE_SET kokkoscomm_nccl_impl_headers
TYPE HEADERS
BASE_DIRS ${PROJECT_SOURCE_DIR}/src
FILES nccl/impl/pack_traits.hpp nccl/impl/packer.hpp nccl/impl/types.hpp
)
endif()

# --- COMPILE FLAGS --- #
include(CheckCXXCompilerFlag)

macro(kokkoscomm_check_and_add_compile_options)
set(target ${ARGV0})
set(flag ${ARGV1})

check_cxx_compiler_flag(${flag} HAS_${flag})
check_cxx_compiler_flag(
${flag}
HAS_${flag}
)
if(HAS_${flag})
target_compile_options(${target} INTERFACE ${flag})
endif()
Expand All @@ -100,7 +123,12 @@ endmacro()
add_library(KokkosCommFlags INTERFACE)
add_library(KokkosComm::KokkosCommFlags ALIAS KokkosCommFlags)
target_compile_features(KokkosCommFlags INTERFACE cxx_std_20)
set_target_properties(KokkosCommFlags PROPERTIES CXX_EXTENSIONS OFF)
set_target_properties(
KokkosCommFlags
PROPERTIES
CXX_EXTENSIONS
OFF
)

kokkoscomm_check_and_add_compile_options(KokkosCommFlags -Wall)
kokkoscomm_check_and_add_compile_options(KokkosCommFlags -Wextra)
Expand All @@ -113,14 +141,30 @@ kokkoscomm_check_and_add_compile_options(KokkosCommFlags -Wmissing-include-dirs)
kokkoscomm_check_and_add_compile_options(KokkosCommFlags -Wno-gnu-zero-variadic-macro-arguments)

# Linking
target_link_libraries(KokkosComm INTERFACE KokkosComm::KokkosCommFlags Kokkos::kokkos)
target_link_libraries(
KokkosComm
INTERFACE
KokkosComm::KokkosCommFlags
Kokkos::kokkos
)
if(KOKKOSCOMM_ENABLE_MPI)
target_link_libraries(KokkosComm INTERFACE MPI::MPI_CXX)
endif()
if(KOKKOSCOMM_ENABLE_NCCL)
# TODO: determine if this is the right way to link with NCCL
target_link_libraries(
KokkosComm
INTERFACE
${NCCL_LIBRARIES}
)
target_include_directories(KokkosComm SYSTEM INTERFACE ${NCCL_INCLUDE_DIRS})
endif()

# Install library
install(
TARGETS KokkosComm KokkosCommFlags
TARGETS
KokkosComm
KokkosCommFlags
EXPORT KokkosCommTargets
FILE_SET
kokkoscomm_public_headers
Expand All @@ -131,5 +175,9 @@ install(
FILE_SET
kokkoscomm_mpi_impl_headers
FILE_SET
kokkoscomm_nccl_headers
FILE_SET
kokkoscomm_nccl_impl_headers
FILE_SET
kokkoscomm_config_headers
)
39 changes: 34 additions & 5 deletions src/KokkosComm/collective.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,19 +16,48 @@

#pragma once

#include <utility>
#include <KokkosComm/fwd.hpp>

#include <Kokkos_Core.hpp>
#include <Kokkos_Core_fwd.hpp>

#include "fwd.hpp"
#include "concepts.hpp"
#include <utility>

namespace KokkosComm {

template <KokkosExecutionSpace ExecSpace = Kokkos::DefaultExecutionSpace,
CommunicationSpace CommSpace = DefaultCommunicationSpace>
void barrier(Handle<ExecSpace, CommSpace> &&h) {
void barrier(Handle<ExecSpace, CommSpace>&& h) {
Impl::Barrier<ExecSpace, CommSpace>{std::forward<Handle<ExecSpace, CommSpace>>(h)};
}

namespace Experimental {

template <KokkosView SendView, KokkosView RecvView, ReductionOperator RedOp,
KokkosExecutionSpace ExecSpace = Kokkos::DefaultExecutionSpace,
CommunicationSpace CommSpace = DefaultCommunicationSpace>
auto reduce(Handle<ExecSpace, CommSpace>& h, const SendView& sv, RecvView& rv, int root) -> Req<CommSpace> {
return Impl::Reduce<SendView, RecvView, RedOp, ExecSpace, CommSpace>::execute(h, sv, rv, root);
}

template <KokkosView SendView, KokkosView RecvView, ReductionOperator RedOp,
KokkosExecutionSpace ExecSpace = Kokkos::DefaultExecutionSpace,
CommunicationSpace CommSpace = DefaultCommunicationSpace>
auto reduce(const SendView& sv, RecvView& rv, int root) -> Req<CommSpace> {
return reduce(Handle<ExecSpace, CommSpace>{}, sv, rv, root);
}

template <KokkosView SendView, KokkosView RecvView, KokkosExecutionSpace ExecSpace = Kokkos::DefaultExecutionSpace,
CommunicationSpace CommSpace = DefaultCommunicationSpace>
auto allgather(Handle<ExecSpace, CommSpace>& h, const SendView& sv, RecvView& rv) -> Req<CommSpace> {
return Impl::AllGather<SendView, RecvView, ExecSpace, CommSpace>::execute(h, sv, rv);
}

template <KokkosView SendView, KokkosView RecvView, KokkosExecutionSpace ExecSpace = Kokkos::DefaultExecutionSpace,
CommunicationSpace CommSpace = DefaultCommunicationSpace>
auto allgather(const SendView& sv, RecvView& rv) -> Req<CommSpace> {
return allgather(Handle<ExecSpace, CommSpace>{}, sv, rv);
}

} // namespace Experimental

} // namespace KokkosComm
8 changes: 8 additions & 0 deletions src/KokkosComm/concepts.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,14 @@
namespace KokkosComm {

namespace Impl {
//
// fallback - most types are not a KokkosComm transport
template <typename T>
struct is_communication_space : public std::false_type {};

template <typename T>
struct is_reduction_operator : public std::false_type {};

} // namespace Impl

template <typename T>
Expand All @@ -37,4 +42,7 @@ concept KokkosExecutionSpace = Kokkos::is_execution_space_v<T>;
template <typename T>
concept CommunicationSpace = KokkosComm::Impl::is_communication_space<T>::value;

template <typename T>
concept ReductionOperator = KokkosComm::Impl::is_reduction_operator<T>::value;

} // namespace KokkosComm
Loading
Loading