dmlc · hcho3 · Aug 12, 2020 · Jul 8, 2020 · Jul 8, 2020 · Jul 9, 2020
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -44,6 +44,7 @@ option(USE_NCCL  "Build with NCCL to enable distributed GPU support." OFF)
 option(BUILD_WITH_SHARED_NCCL "Build with shared NCCL library." OFF)
 set(GPU_COMPUTE_VER "" CACHE STRING
   "Semicolon separated list of compute versions to be built against, e.g. '35;61'")
+option(USE_RMM "Build with RAPIDS Memory Manager (RMM)" OFF)
 ## Copied From dmlc
 option(USE_HDFS "Build with HDFS support" OFF)
 option(USE_AZURE "Build with AZURE support" OFF)
@@ -79,6 +80,9 @@ endif (R_LIB AND GOOGLE_TEST)
 if (USE_AVX)
   message(SEND_ERROR  "The option 'USE_AVX' is deprecated as experimental AVX features have been removed from XGBoost.")
 endif (USE_AVX)
+if (USE_RMM AND NOT (USE_CUDA))
+  message(SEND_ERROR "`USE_RMM` must be enabled with `USE_CUDA` flag.")
+endif (USE_RMM AND NOT (USE_CUDA))
 
 #-- Sanitizer
 if (USE_SANITIZER)
@@ -170,6 +174,9 @@ endif (R_LIB)
 # Plugin
 add_subdirectory(${xgboost_SOURCE_DIR}/plugin)
 
+# 3rd-party libs
+include(cmake/ExternalLibs.cmake)
+
 #-- library
 if (BUILD_STATIC_LIB)
   add_library(xgboost STATIC)

diff --git a/Jenkinsfile b/Jenkinsfile
@@ -66,6 +66,7 @@ pipeline {
             'build-cpu-non-omp': { BuildCPUNonOmp() },
             'build-gpu-cuda10.0': { BuildCUDA(cuda_version: '10.0') },
             'build-gpu-cuda10.1': { BuildCUDA(cuda_version: '10.1') },
+            'build-gpu-rmm-cuda10.2': { BuildCUDAWithRMM(cuda_version: '10.2') },
             'build-jvm-packages': { BuildJVMPackages(spark_version: '2.4.3') },
             'build-jvm-doc': { BuildJVMDoc() }
           ])
@@ -84,6 +85,7 @@ pipeline {
             'test-python-mgpu-cuda10.1': { TestPythonGPU(cuda_version: '10.1', multi_gpu: true) },
             'test-cpp-gpu': { TestCppGPU(cuda_version: '10.1') },
             'test-cpp-mgpu': { TestCppGPU(cuda_version: '10.1', multi_gpu: true) },
+            'test-rmm-cpp-gpu': { TestCppGPUWithRMM(cuda_version: '10.2') },
             'test-jvm-jdk8': { CrossTestJVMwithJDK(jdk_version: '8', spark_version: '2.4.3') },
             'test-jvm-jdk11': { CrossTestJVMwithJDK(jdk_version: '11') },
             'test-jvm-jdk12': { CrossTestJVMwithJDK(jdk_version: '12') },
@@ -262,6 +264,22 @@ def BuildCUDA(args) {
   }
 }
 
+def BuildCUDAWithRMM(args) {
+  node('linux && cpu_build') {
+    unstash name: 'srcs'
+    echo "Build with CUDA ${args.cuda_version} and RMM"
+    def container_type = "rmm"
+    def docker_binary = "docker"
+    def docker_args = "--build-arg CUDA_VERSION=${args.cuda_version}"
+    sh """
+    ${dockerRun} ${container_type} ${docker_binary} ${docker_args} tests/ci_build/build_via_cmake.sh --conda-env=rmm_test -DUSE_CUDA=ON -DUSE_RMM=ON
+    """
+    echo 'Stashing C++ test executable (testxgboost)...'
+    stash name: 'xgboost_rmm_cpp_tests', includes: 'build/testxgboost'
+    deleteDir()
+  }
+}
+
 def BuildJVMPackages(args) {
   node('linux && cpu') {
     unstash name: 'srcs'
@@ -368,6 +386,22 @@ def TestCppGPU(args) {
   }
 }
 
+def TestCppGPUWithRMM(args) {
+  node('linux && gpu') {
+    unstash name: 'xgboost_rmm_cpp_tests'
+    unstash name: 'srcs'
+    echo "Test C++, CUDA ${args.cuda_version} with RMM"
+    def container_type = "rmm"
+    def docker_binary = "nvidia-docker"
+    def docker_args = "--build-arg CUDA_VERSION=${args.cuda_version}"
+    echo "Using a single GPU"
+    sh """
+    ${dockerRun} ${container_type} ${docker_binary} ${docker_args} bash -c "source activate rmm_test && build/testxgboost --gtest_filter=-*.MGPU_*:*DeathTest.*"
+    """
+    deleteDir()
+  }
+}
+
 def CrossTestJVMwithJDK(args) {
   node('linux && cpu') {
     unstash name: 'xgboost4j_jar'

diff --git a/cmake/ExternalLibs.cmake b/cmake/ExternalLibs.cmake
@@ -0,0 +1,27 @@
+# RMM
+if (USE_RMM)
+  # Use Conda env if available
+  if(DEFINED ENV{CONDA_PREFIX})
+    set(CMAKE_PREFIX_PATH "$ENV{CONDA_PREFIX};${CMAKE_PREFIX_PATH}")
+    message(STATUS "Detected Conda environment, CMAKE_PREFIX_PATH set to: ${CMAKE_PREFIX_PATH}")
+  else()
+    message(STATUS "No Conda environment detected")
+  endif()
+
+  find_path(RMM_INCLUDE "rmm"
+    HINTS "$ENV{RMM_ROOT}/include")
+
+  find_library(RMM_LIBRARY "rmm"
+    HINTS "$ENV{RMM_ROOT}/lib" "$ENV{RMM_ROOT}/build")
+
+  if ((NOT RMM_LIBRARY) OR (NOT RMM_INCLUDE))
+    message(FATAL_ERROR "Could not locate RMM library")
+  endif ()
+
+  message(STATUS "RMM: RMM_LIBRARY set to ${RMM_LIBRARY}")
+  message(STATUS "RMM: RMM_INCLUDE set to ${RMM_INCLUDE}")
+
+  target_include_directories(objxgboost PUBLIC ${RMM_INCLUDE})
+  target_link_libraries(objxgboost PUBLIC ${RMM_LIBRARY} cuda)
+  target_compile_definitions(objxgboost PUBLIC -DXGBOOST_USE_RMM=1)
+endif ()
diff --git a/src/common/device_helpers.cuh b/src/common/device_helpers.cuh
@@ -36,7 +36,12 @@
 
 #ifdef XGBOOST_USE_NCCL
 #include "nccl.h"
-#endif
+#endif  // XGBOOST_USE_NCCL
+
+#if defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1
+#include "rmm/mr/device/default_memory_resource.hpp"
+#include "rmm/mr/device/thrust_allocator_adaptor.hpp"
+#endif  // defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1
 
 #if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 600 || defined(__clang__)
 
@@ -370,12 +375,21 @@ inline void DebugSyncDevice(std::string file="", int32_t line = -1) {
 }
 
 namespace detail {
+
+#if defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1
+template <typename T>
+using XGBBaseDeviceAllocator = rmm::mr::thrust_allocator<T>;
+#else  // defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1
+template <typename T>
+using XGBBaseDeviceAllocator = thrust::device_malloc_allocator<T>;
+#endif  // defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1
+
 /**
  * \brief Default memory allocator, uses cudaMalloc/Free and logs allocations if verbose.
  */
 template <class T>
-struct XGBDefaultDeviceAllocatorImpl : thrust::device_malloc_allocator<T> {
-  using SuperT = thrust::device_malloc_allocator<T>;
+struct XGBDefaultDeviceAllocatorImpl : XGBBaseDeviceAllocator<T> {
+  using SuperT = XGBBaseDeviceAllocator<T>;
   using pointer = thrust::device_ptr<T>;  // NOLINT
   template<typename U>
   struct rebind  // NOLINT
@@ -391,10 +405,14 @@ struct XGBDefaultDeviceAllocatorImpl : thrust::device_malloc_allocator<T> {
     GlobalMemoryLogger().RegisterDeallocation(ptr.get(), n * sizeof(T));
     return SuperT::deallocate(ptr, n);
   }
+#if defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1
+  XGBDefaultDeviceAllocatorImpl() : SuperT(rmm::mr::get_default_resource(), cudaStream_t{0}) {}
+#endif  // defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1
 };
 
 /**
- * \brief Caching memory allocator, uses cub::CachingDeviceAllocator as a back-end and logs allocations if verbose. Does not initialise memory on construction.
+ * \brief Caching memory allocator, uses cub::CachingDeviceAllocator as a back-end and logs
+ * allocations if verbose. Does not initialise memory on construction.
  */
 template <class T>
 struct XGBCachingDeviceAllocatorImpl : thrust::device_malloc_allocator<T> {

diff --git a/tests/ci_build/Dockerfile.rmm b/tests/ci_build/Dockerfile.rmm
@@ -0,0 +1,39 @@
+ARG CUDA_VERSION
+FROM nvidia/cuda:$CUDA_VERSION-devel-ubuntu18.04
+
+# Environment
+ENV DEBIAN_FRONTEND noninteractive
+SHELL ["/bin/bash", "-c"]   # Use Bash as shell
+
+# Install all basic requirements
+RUN \
+    apt-get update && \
+    apt-get install -y wget unzip bzip2 libgomp1 build-essential ninja-build git && \
+    # Python
+    wget -O Miniconda3.sh https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh && \
+    bash Miniconda3.sh -b -p /opt/python && \
+    # CMake
+    wget -nv -nc https://cmake.org/files/v3.13/cmake-3.13.0-Linux-x86_64.sh --no-check-certificate && \
+    bash cmake-3.13.0-Linux-x86_64.sh --skip-license --prefix=/usr
+
+ENV PATH=/opt/python/bin:$PATH
+
+# Create new Conda environment with RMM
+RUN \
+    conda create -n rmm_test -c nvidia -c rapidsai -c conda-forge -c defaults \
+        python=3.7 rmm=0.14 cudatoolkit=$CUDA_VERSION
+
+ENV GOSU_VERSION 1.10
+
+# Install lightweight sudo (not bound to TTY)
+RUN set -ex; \
+    wget -O /usr/local/bin/gosu "https://github.com/tianon/gosu/releases/download/$GOSU_VERSION/gosu-amd64" && \
+    chmod +x /usr/local/bin/gosu && \
+    gosu nobody true
+
+# Default entry-point to use if running locally
+# It will preserve attributes of created files
+COPY entrypoint.sh /scripts/
+
+WORKDIR /workspace
+ENTRYPOINT ["/scripts/entrypoint.sh"]
diff --git a/tests/ci_build/build_via_cmake.sh b/tests/ci_build/build_via_cmake.sh
@@ -1,10 +1,21 @@
 #!/usr/bin/env bash
 set -e
 
+if [[ "$1" == --conda-env=* ]]
+then
+  conda_env=$(echo "$1" | sed 's/^--conda-env=//g' -)
+  echo "Activating Conda environment ${conda_env}"
+  shift 1
+  cmake_args="$@"
+  source activate ${conda_env}
+else
+  cmake_args="$@"
+fi
+
 rm -rf build
 mkdir build
 cd build
-cmake .. "$@" -DGOOGLE_TEST=ON -DUSE_DMLC_GTEST=ON -DCMAKE_VERBOSE_MAKEFILE=ON
+cmake .. ${cmake_args} -DGOOGLE_TEST=ON -DUSE_DMLC_GTEST=ON -DCMAKE_VERBOSE_MAKEFILE=ON
 make clean
 make -j$(nproc)
 cd ..
diff --git a/tests/cpp/common/test_span.cc b/tests/cpp/common/test_span.cc
@@ -97,11 +97,6 @@ TEST(Span, FromPtrLen) {
     }
   }
 
-  {
-    auto lazy = [=]() {Span<float const, 16> tmp (arr, 5);};
-    EXPECT_DEATH(lazy(), "\\[xgboost\\] Condition .* failed.\n");
-  }
-
   // dynamic extent
   {
     Span<float, 16> s (arr, 16);
@@ -122,6 +117,15 @@ TEST(Span, FromPtrLen) {
   }
 }
 
+TEST(SpanDeathTest, FromPtrLen) {
+  float arr[16];
+  InitializeRange(arr, arr+16);
+  {
+    auto lazy = [=]() {Span<float const, 16> tmp (arr, 5);};
+    EXPECT_DEATH(lazy(), "\\[xgboost\\] Condition .* failed.\n");
+  }
+}
+
 TEST(Span, FromFirstLast) {
   float arr[16];
   InitializeRange(arr, arr+16);
@@ -285,7 +289,13 @@ TEST(Span, ElementAccess) {
     ASSERT_EQ(i, arr[j]);
     ++j;
   }
+}
+
+TEST(SpanDeathTest, ElementAccess) {
+  float arr[16];
+  InitializeRange(arr, arr + 16);
 
+  Span<float> s (arr);
   EXPECT_DEATH(s[16], "\\[xgboost\\] Condition .* failed.\n");
   EXPECT_DEATH(s[-1], "\\[xgboost\\] Condition .* failed.\n");
 
@@ -312,7 +322,9 @@ TEST(Span, FrontBack) {
     ASSERT_EQ(s.front(), 0);
     ASSERT_EQ(s.back(), 3);
   }
+}
 
+TEST(SpanDeathTest, FrontBack) {
   {
     Span<float, 0> s;
     EXPECT_DEATH(s.front(), "\\[xgboost\\] Condition .* failed.\n");
@@ -340,10 +352,6 @@ TEST(Span, FirstLast) {
     for (size_t i = 0; i < first.size(); ++i) {
       ASSERT_EQ(first[i], arr[i]);
     }
-    auto constexpr kOne = static_cast<Span<float, 4>::index_type>(-1);
-    EXPECT_DEATH(s.first<kOne>(), "\\[xgboost\\] Condition .* failed.\n");
-    EXPECT_DEATH(s.first<17>(), "\\[xgboost\\] Condition .* failed.\n");
-    EXPECT_DEATH(s.first<32>(), "\\[xgboost\\] Condition .* failed.\n");
   }
 
   {
@@ -359,10 +367,6 @@ TEST(Span, FirstLast) {
     for (size_t i = 0; i < last.size(); ++i) {
       ASSERT_EQ(last[i], arr[i+12]);
     }
-    auto constexpr kOne = static_cast<Span<float, 4>::index_type>(-1);
-    EXPECT_DEATH(s.last<kOne>(), "\\[xgboost\\] Condition .* failed.\n");
-    EXPECT_DEATH(s.last<17>(), "\\[xgboost\\] Condition .* failed.\n");
-    EXPECT_DEATH(s.last<32>(), "\\[xgboost\\] Condition .* failed.\n");
   }
 
   // dynamic extent
@@ -379,10 +383,6 @@ TEST(Span, FirstLast) {
       ASSERT_EQ(first[i], s[i]);
     }
 
-    EXPECT_DEATH(s.first(-1), "\\[xgboost\\] Condition .* failed.\n");
-    EXPECT_DEATH(s.first(17), "\\[xgboost\\] Condition .* failed.\n");
-    EXPECT_DEATH(s.first(32), "\\[xgboost\\] Condition .* failed.\n");
-
     delete [] arr;
   }
 
@@ -399,6 +399,50 @@ TEST(Span, FirstLast) {
       ASSERT_EQ(s[12 + i], last[i]);
     }
 
+    delete [] arr;
+  }
+}
+
+TEST(SpanDeathTest, FirstLast) {
+  // static extent
+  {
+    float arr[16];
+    InitializeRange(arr, arr + 16);
+
+    Span<float> s (arr);
+    auto constexpr kOne = static_cast<Span<float, 4>::index_type>(-1);
+    EXPECT_DEATH(s.first<kOne>(), "\\[xgboost\\] Condition .* failed.\n");
+    EXPECT_DEATH(s.first<17>(), "\\[xgboost\\] Condition .* failed.\n");
+    EXPECT_DEATH(s.first<32>(), "\\[xgboost\\] Condition .* failed.\n");
+  }
+
+  {
+    float arr[16];
+    InitializeRange(arr, arr + 16);
+
+    Span<float> s (arr);
+    auto constexpr kOne = static_cast<Span<float, 4>::index_type>(-1);
+    EXPECT_DEATH(s.last<kOne>(), "\\[xgboost\\] Condition .* failed.\n");
+    EXPECT_DEATH(s.last<17>(), "\\[xgboost\\] Condition .* failed.\n");
+    EXPECT_DEATH(s.last<32>(), "\\[xgboost\\] Condition .* failed.\n");
+  }
+
+  // dynamic extent
+  {
+    float *arr = new float[16];
+    InitializeRange(arr, arr + 16);
+    Span<float> s (arr, 16);
+    EXPECT_DEATH(s.first(-1), "\\[xgboost\\] Condition .* failed.\n");
+    EXPECT_DEATH(s.first(17), "\\[xgboost\\] Condition .* failed.\n");
+    EXPECT_DEATH(s.first(32), "\\[xgboost\\] Condition .* failed.\n");
+
+    delete [] arr;
+  }
+
+  {
+    float *arr = new float[16];
+    InitializeRange(arr, arr + 16);
+    Span<float> s (arr, 16);
     EXPECT_DEATH(s.last(-1), "\\[xgboost\\] Condition .* failed.\n");
     EXPECT_DEATH(s.last(17), "\\[xgboost\\] Condition .* failed.\n");
     EXPECT_DEATH(s.last(32), "\\[xgboost\\] Condition .* failed.\n");
@@ -420,7 +464,11 @@ TEST(Span, Subspan) {
   auto s4 = s1.subspan(2, dynamic_extent);
   ASSERT_EQ(s1.data() + 2, s4.data());
   ASSERT_EQ(s4.size(), s1.size() - 2);
+}
 
+TEST(SpanDeathTest, Subspan) {
+  int arr[16] {0};
+  Span<int> s1 (arr);
   EXPECT_DEATH(s1.subspan(-1, 0), "\\[xgboost\\] Condition .* failed.\n");
   EXPECT_DEATH(s1.subspan(17, 0), "\\[xgboost\\] Condition .* failed.\n");
 

diff --git a/tests/cpp/common/test_span.cu b/tests/cpp/common/test_span.cu
@@ -221,7 +221,7 @@ struct TestElementAccess {
   }
 };
 
-TEST(GPUSpan, ElementAccess) {
+TEST(GPUSpanDeathTest, ElementAccess) {
   dh::safe_cuda(cudaSetDevice(0));
   auto test_element_access = []() {
     thrust::host_vector<float> h_vec (16);