rapidsai · BradReesWork · Apr 10, 2020 · Mar 31, 2020 · Mar 31, 2020 · Apr 8, 2020
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -14,6 +14,7 @@
 - PR #803 Enable Ninja build
 - PR #804 Cythonize in parallel
 - PR #807 Updating the Python docs
+- PR #820 OPG infra and all-gather smoke test
 
 ## Bug Fixes
 - PR #763 Update RAPIDS conda dependencies to v0.14

@@ -66,6 +66,7 @@ conda install -c nvidia -c rapidsai -c rapidsai-nightly -c conda-forge -c defaul
       distributed>=2.12.0 \
       dask-cudf=${MINOR_VERSION} \
       dask-cuda=${MINOR_VERSION} \
+      nccl>=2.5 \
       libcypher-parser \
       ipython=7.3* \
       jupyterlab

@@ -13,6 +13,7 @@ dependencies:
 - distributed>=2.12.0
 - dask-cuda=0.14*
 - dask-cudf=0.14*
+- nccl>=2.5
 - scipy
 - networkx
 - python-louvain

@@ -12,6 +12,7 @@ dependencies:
 - distributed>=2.12.0
 - dask-cuda=0.14*
 - dask-cudf=0.14*
+- nccl>=2.5
 - scipy
 - networkx
 - python-louvain

@@ -12,6 +12,7 @@ dependencies:
 - distributed>=2.12.0
 - dask-cuda=0.14*
 - dask-cudf=0.14*
+- nccl>=2.5
 - scipy
 - networkx
 - python-louvain

@@ -32,6 +32,7 @@ requirements:
     - python x.x
     - libcugraph={{ version }}
     - cudf={{ minor_version }}
+    - nccl>=2.5
 
 #test:
 #  commands:

@@ -30,9 +30,11 @@ requirements:
     - cudatoolkit {{ cuda_version }}.*
     - boost-cpp>=1.66
     - libcypher-parser
+    - nccl>=2.5
   run:
     - libcudf={{ minor_version }}
     - {{ pin_compatible('cudatoolkit', max_pin='x.x') }}
+    - nccl>=2.5
 
 #test:
 #  commands:

@@ -104,6 +104,13 @@ set(CMAKE_EXE_LINKER_FLAGS "-Wl,--disable-new-dtags")
 option(BUILD_TESTS "Configure CMake to build tests"
        ON)
 
+option(BUILD_MPI "Build with MPI" OFF)
+if (BUILD_MPI)
+    find_package(MPI REQUIRED)
+    set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${MPI_C_COMPILE_FLAGS}")
+    set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${MPI_CXX_COMPILE_FLAGS}")
+    set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${MPI_CXX_LINK_FLAGS}")
+endif(BUILD_MPI)
 ###################################################################################################
 # - cmake modules ---------------------------------------------------------------------------------
 
@@ -302,6 +309,15 @@ add_dependencies(gunrock cugunrock)
 
 set_property(TARGET gunrock PROPERTY IMPORTED_LOCATION ${CUGUNROCK_DIR}/lib/libgunrock.a)
 
+# - NCCL
+if(NOT NCCL_PATH)
+    find_package(NCCL REQUIRED)
+else()
+    message("-- Manually set NCCL PATH to ${NCCL_PATH}")
+    set(NCCL_INCLUDE_DIRS ${NCCL_PATH}/include)
+    set(NCCL_LIBRARIES ${NCCL_PATH}/lib/libnccl.so)
+endif(NOT NCCL_PATH)
+
 ###################################################################################################
 # - library targets -------------------------------------------------------------------------------
 
@@ -383,7 +399,6 @@ add_dependencies(cugraph cugunrock)
 
 ###################################################################################################
 # - include paths ---------------------------------------------------------------------------------
-
 target_include_directories(cugraph
     PRIVATE
     "${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}"
@@ -399,6 +414,8 @@ target_include_directories(cugraph
     "${CUHORNET_INCLUDE_DIR}/primitives"
     "${CMAKE_CURRENT_SOURCE_DIR}/src"
     "${CUGUNROCK_DIR}/include"
+    "${NCCL_INCLUDE_DIRS}"
+    "${MPI_CXX_INCLUDE_PATH}"
     PUBLIC
     "${CMAKE_CURRENT_SOURCE_DIR}/include"
 )
@@ -407,7 +424,7 @@ target_include_directories(cugraph
 # - link libraries --------------------------------------------------------------------------------
 
 target_link_libraries(cugraph PRIVATE
-    ${CUDF_LIBRARY} ${RMM_LIBRARY} gunrock ${NVSTRINGS_LIBRARY} cublas cusparse curand cusolver cudart cuda ${LIBCYPHERPARSER_LIBRARY})
+    ${CUDF_LIBRARY} ${RMM_LIBRARY} gunrock ${NVSTRINGS_LIBRARY} cublas cusparse curand cusolver cudart cuda ${LIBCYPHERPARSER_LIBRARY} ${MPI_CXX_LIBRARIES} ${NCCL_LIBRARIES})
 if(OpenMP_CXX_FOUND)
 target_link_libraries(cugraph PRIVATE
 ###################################################################################################

@@ -0,0 +1,116 @@
+# Copyright (c) 2019, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Based on FindPNG.cmake from cmake 3.14.3
+
+#[=======================================================================[.rst:
+FindNCCL
+--------
+
+Find libnccl, the NVIDIA Collective Communication Library. A hint to find NCCL
+can be provided by setting NCCL_INSTALL_DIR.
+
+Imported targets
+^^^^^^^^^^^^^^^^
+
+This module defines the following :prop_tgt:`IMPORTED` target:
+
+``NCCL::NCCL``
+  The libnccl library, if found.
+
+Result variables
+^^^^^^^^^^^^^^^^
+
+This module will set the following variables in your project:
+
+``NCCL_INCLUDE_DIRS``
+  where to find nccl.h , etc.
+``NCCL_LIBRARIES``
+  the libraries to link against to use NCCL.
+``NCCL_FOUND``
+  If false, do not try to use NCCL.
+``NCCL_VERSION_STRING``
+  the version of the NCCL library found
+
+#]=======================================================================]
+
+find_path(NCCL_NCCL_INCLUDE_DIR nccl.h HINTS ${NCCL_INSTALL_DIR} PATH_SUFFIXES include)
+
+#TODO: Does this need to support finding the static library?
+
+list(APPEND NCCL_NAMES nccl libnccl)
+set(_NCCL_VERSION_SUFFIXES 2)
+
+foreach(v IN LISTS _NCCL_VERSION_SUFFIXES)
+  list(APPEND NCCL_NAMES nccl${v} libnccl${v})
+endforeach()
+unset(_NCCL_VERSION_SUFFIXES)
+# For compatibility with versions prior to this multi-config search, honor
+# any NCCL_LIBRARY that is already specified and skip the search.
+if(NOT NCCL_LIBRARY)
+  find_library(NCCL_LIBRARY_RELEASE NAMES ${NCCL_NAMES} HINTS ${NCCL_INSTALL_DIR} PATH_SUFFIXES lib)
+  include(${CMAKE_ROOT}/Modules/SelectLibraryConfigurations.cmake)
+  select_library_configurations(NCCL)
+  mark_as_advanced(NCCL_LIBRARY_RELEASE)
+endif()
+unset(NCCL_NAMES)
+
+# Set by select_library_configurations(), but we want the one from
+# find_package_handle_standard_args() below.
+unset(NCCL_FOUND)
+
+if (NCCL_LIBRARY AND NCCL_NCCL_INCLUDE_DIR)
+  set(NCCL_INCLUDE_DIRS ${NCCL_NCCL_INCLUDE_DIR} )
+  set(NCCL_LIBRARY ${NCCL_LIBRARY})
+
+  if(NOT TARGET NCCL::NCCL)
+    add_library(NCCL::NCCL UNKNOWN IMPORTED)
+    set_target_properties(NCCL::NCCL PROPERTIES
+      INTERFACE_INCLUDE_DIRECTORIES "${NCCL_INCLUDE_DIRS}")
+    if(EXISTS "${NCCL_LIBRARY}")
+      set_target_properties(NCCL::NCCL PROPERTIES
+        IMPORTED_LINK_INTERFACE_LANGUAGES "C"
+        IMPORTED_LOCATION "${NCCL_LIBRARY}")
+    endif()
+  endif()
+endif ()
+
+if (NCCL_NCCL_INCLUDE_DIR AND EXISTS "${NCCL_NCCL_INCLUDE_DIR}/nccl.h")
+  file(STRINGS "${NCCL_NCCL_INCLUDE_DIR}/nccl.h" nccl_major_version_str REGEX "^#define[ \t]+NCCL_MAJOR[ \t]+[0-9]+")
+  string(REGEX REPLACE "^#define[ \t]+NCCL_MAJOR[ \t]+([0-9]+)" "\\1" nccl_major_version_str "${nccl_major_version_str}")
+
+  file(STRINGS "${NCCL_NCCL_INCLUDE_DIR}/nccl.h" nccl_minor_version_str REGEX "^#define[ \t]+NCCL_MINOR[ \t]+[0-9]+")
+  string(REGEX REPLACE "^#define[ \t]+NCCL_MINOR[ \t]+([0-9]+)" "\\1" nccl_minor_version_str "${nccl_minor_version_str}")
+
+  file(STRINGS "${NCCL_NCCL_INCLUDE_DIR}/nccl.h" nccl_patch_version_str REGEX "^#define[ \t]+NCCL_PATCH[ \t]+[0-9]+")
+  string(REGEX REPLACE "^#define[ \t]+NCCL_PATCH[ \t]+([0-9]+)" "\\1" nccl_patch_version_str "${nccl_patch_version_str}")
+
+  file(STRINGS "${NCCL_NCCL_INCLUDE_DIR}/nccl.h" nccl_suffix_version_str REGEX "^#define[ \t]+NCCL_SUFFIX[ \t]+\".*\"")
+  string(REGEX REPLACE "^#define[ \t]+NCCL_SUFFIX[ \t]+\"(.*)\"" "\\1" nccl_suffix_version_str "${nccl_suffix_version_str}")
+
+  set(NCCL_VERSION_STRING "${nccl_major_version_str}.${nccl_minor_version_str}.${nccl_patch_version_str}${nccl_suffix_version_str}")
+
+  unset(nccl_major_version_str)
+  unset(nccl_minor_version_str)
+  unset(nccl_patch_version_str)
+  unset(nccl_suffix_version_str)
+endif ()
+
+include(${CMAKE_ROOT}/Modules/FindPackageHandleStandardArgs.cmake)
+find_package_handle_standard_args(NCCL
+                                  REQUIRED_VARS NCCL_LIBRARY NCCL_NCCL_INCLUDE_DIR
+                                  VERSION_VAR NCCL_VERSION_STRING)
+
+mark_as_advanced(NCCL_NCCL_INCLUDE_DIR NCCL_LIBRARY)
@@ -44,6 +44,12 @@ function(ConfigureTest CMAKE_TEST_NAME CMAKE_TEST_SRC CMAKE_EXTRA_LIBS)
     target_link_libraries(${CMAKE_TEST_NAME}
         PRIVATE
         gtest gmock_main gmock cugraph ${CUDF_LIBRARY} ${RMM_LIBRARY} ${CMAKE_EXTRA_LIBS} cudart cuda)
+    if (BUILD_MPI)
+        include_directories(include ${MPI_CXX_INCLUDE_PATH} ${NCCL_INCLUDE_DIRS})
+        target_link_libraries(${CMAKE_TEST_NAME} PRIVATE ${MPI_C_LIBRARIES} ${NCCL_LIBRARIES} )
+        target_compile_options(${CMAKE_TEST_NAME} PUBLIC ${MPI_C_COMPILE_FLAGS})
+    endif(BUILD_MPI)
+
     if(OpenMP_CXX_FOUND)
         target_link_libraries(${CMAKE_TEST_NAME} PRIVATE
 ###################################################################################################
@@ -271,6 +277,16 @@ set(FIND_MATCHES_TEST_SRC
 
 ConfigureTest(FIND_MATCHES_TEST "${FIND_MATCHES_TEST_SRC}" "")
 
+###################################################################################################
+#-NCCL tests ---------------------------------------------------------------------
+
+if (BUILD_MPI)
+    set(NCCL_TEST_SRC
+        "${CMAKE_CURRENT_SOURCE_DIR}/nccl/nccl_test.cu")
+
+    ConfigureTest(NCCL_TEST "${NCCL_TEST_SRC}" "")
+endif(BUILD_MPI)
+
 ###################################################################################################
 ### enable testing ################################################################################
 ###################################################################################################

@@ -0,0 +1,75 @@
+#include "gtest/gtest.h"
+#include <cugraph.h>
+#include "test_utils.h"
+#include <string.h>
+#include <mpi.h>
+#include <nccl.h>
+#include <thrust/device_vector.h>
+#include <thrust/functional.h>
+
+TEST(allgather, success)
+{
+  int p = 1, r = 0, dev = 0, dev_count = 0;
+  MPICHECK(MPI_Comm_size(MPI_COMM_WORLD, &p));
+  MPICHECK(MPI_Comm_rank(MPI_COMM_WORLD, &r));
+  CUDA_RT_CALL(cudaGetDeviceCount(&dev_count));
+
+  // shortcut for device ID here
+  // may need something smarter later
+  dev = r%dev_count;
+  // cudaSetDevice must happen before ncclCommInitRank
+  CUDA_RT_CALL(cudaSetDevice(dev));
+
+  // print info
+  printf("#   Rank %2d - Pid %6d - device %2d\n", 
+          r, getpid(), dev);
+
+  // NCCL init
+  ncclUniqueId id;
+  ncclComm_t comm;
+  if (r == 0) NCCLCHECK(ncclGetUniqueId(&id));
+  MPICHECK(MPI_Bcast((void *)&id, sizeof(id), MPI_BYTE, 0, MPI_COMM_WORLD));
+  NCCLCHECK(ncclCommInitRank(&comm, p, id, r));
+  MPICHECK(MPI_Barrier(MPI_COMM_WORLD));
+
+  //allocate device buffers
+  int size = 3;
+  float *sendbuff, *recvbuff;
+  CUDA_RT_CALL(cudaMalloc(&sendbuff, size * sizeof(float)));
+  CUDA_RT_CALL(cudaMalloc(&recvbuff, size*p * sizeof(float)));
+
+  //init values
+  thrust::fill(thrust::device_pointer_cast(sendbuff),
+               thrust::device_pointer_cast(sendbuff + size), (float)r);
+  thrust::fill(thrust::device_pointer_cast(recvbuff),
+               thrust::device_pointer_cast(recvbuff + size*p), -1.0f);  
+
+  // ncclAllGather
+  NCCLCHECK(ncclAllGather((const void*)sendbuff, (void*)recvbuff, size, ncclFloat, comm, cudaStreamDefault));
+
+  // expect each rankid printed size times in ascending order
+  if (r == 0) {
+    thrust::device_ptr<float> dev_ptr(recvbuff);
+    std::cout.precision(15);
+    thrust::copy(dev_ptr, dev_ptr + size*p, std::ostream_iterator<float>(std::cout, " ")); 
+    std::cout << std::endl;
+  }
+
+  //free device buffers
+  CUDA_RT_CALL(cudaFree(sendbuff));
+  CUDA_RT_CALL(cudaFree(recvbuff));
+
+  //finalizing NCCL
+  NCCLCHECK(ncclCommDestroy(comm));
+}
+
+int main( int argc, char** argv )
+{
+    testing::InitGoogleTest(&argc,argv);
+    MPI_Init(&argc, &argv);
+    rmmInitialize(nullptr);
+    int rc = RUN_ALL_TESTS();
+    rmmFinalize();
+    MPI_Finalize();
+    return rc;
+}
@@ -62,6 +62,23 @@ extern "C" {
 }
 #endif
 
+#define NCCLCHECK(cmd) do {                                   \
+    ncclResult_t nccl_status = cmd;                           \
+    if (nccl_status!= ncclSuccess) {                          \
+      printf("NCCL failure %s:%d '%s'\n",                     \
+          __FILE__,__LINE__,ncclGetErrorString(nccl_status)); \
+    }                                                 \
+  } while(0)
+
+#define MPICHECK(cmd) do {                          \
+  int e = cmd;                                      \
+  if( e != MPI_SUCCESS ) {                          \
+    printf("Failed: MPI error %s:%d '%d'\n",        \
+        __FILE__,__LINE__, e);   \
+    exit(EXIT_FAILURE);                             \
+  }                                                 \
+} while(0)
+
 std::function<void(gdf_column*)> gdf_col_deleter = [](gdf_column* col){
   if (col) {
     col->size = 0;

@@ -73,7 +73,7 @@
                             cuda_include_dir],
               library_dirs=[get_python_lib()],
               runtime_library_dirs=[conda_lib_dir],
-              libraries=['cugraph', 'cudf'],
+              libraries=['cugraph', 'cudf', 'nccl'],
               language='c++',
               extra_compile_args=['-std=c++14'])
 ]