rapidsai · rapids-bot · May 29, 2024 · May 8, 2024 · May 8, 2024 · May 8, 2024
@@ -7,6 +7,11 @@ FROM ${BASE} as pip-base
 
 ENV DEFAULT_VIRTUAL_ENV=rapids
 
+RUN apt update -y \
+ && DEBIAN_FRONTEND=noninteractive apt install -y \
+ libblas-dev liblapack-dev \
+ && rm -rf /tmp/* /var/tmp/* /var/cache/apt/* /var/lib/apt/lists/*;
+
 FROM ${BASE} as conda-base
 
 ENV DEFAULT_CONDA_ENV=rapids

diff --git a/.devcontainer/cuda11.8-conda/devcontainer.json b/.devcontainer/cuda11.8-conda/devcontainer.json
@@ -11,7 +11,7 @@
  "runArgs": [
  "--rm",
  "--name",
- "${localEnv:USER}-rapids-${localWorkspaceFolderBasename}-24.06-cuda11.8-conda"
+ "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.06-cuda11.8-conda"
  ],
  "hostRequirements": {"gpu": "optional"},
  "features": {

diff --git a/.devcontainer/cuda11.8-pip/devcontainer.json b/.devcontainer/cuda11.8-pip/devcontainer.json
@@ -5,19 +5,16 @@
  "args": {
  "CUDA": "11.8",
  "PYTHON_PACKAGE_MANAGER": "pip",
- "BASE": "rapidsai/devcontainers:24.06-cpp-cuda11.8-ubuntu22.04"
+ "BASE": "rapidsai/devcontainers:24.06-cpp-cuda11.8-ucx1.15.0-openmpi-ubuntu22.04"
  }
  },
  "runArgs": [
  "--rm",
  "--name",
- "${localEnv:USER}-rapids-${localWorkspaceFolderBasename}-24.06-cuda11.8-pip"
+ "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.06-cuda11.8-pip"
  ],
  "hostRequirements": {"gpu": "optional"},
  "features": {
- "ghcr.io/rapidsai/devcontainers/features/ucx:24.6": {
- "version": "1.15.0"
- },
  "ghcr.io/rapidsai/devcontainers/features/cuda:24.6": {
  "version": "11.8",
  "installcuBLAS": true,
@@ -28,7 +25,6 @@
  "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.6": {}
  },
  "overrideFeatureInstallOrder": [
- "ghcr.io/rapidsai/devcontainers/features/ucx",
  "ghcr.io/rapidsai/devcontainers/features/cuda",
  "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils"
  ],

diff --git a/.devcontainer/cuda12.2-conda/devcontainer.json b/.devcontainer/cuda12.2-conda/devcontainer.json
@@ -11,7 +11,7 @@
  "runArgs": [
  "--rm",
  "--name",
- "${localEnv:USER}-rapids-${localWorkspaceFolderBasename}-24.06-cuda12.2-conda"
+ "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.06-cuda12.2-conda"
  ],
  "hostRequirements": {"gpu": "optional"},
  "features": {

diff --git a/.devcontainer/cuda12.2-pip/devcontainer.json b/.devcontainer/cuda12.2-pip/devcontainer.json
@@ -5,19 +5,16 @@
  "args": {
  "CUDA": "12.2",
  "PYTHON_PACKAGE_MANAGER": "pip",
- "BASE": "rapidsai/devcontainers:24.06-cpp-cuda12.2-ubuntu22.04"
+ "BASE": "rapidsai/devcontainers:24.06-cpp-cuda12.2-ucx1.15.0-openmpi-ubuntu22.04"
  }
  },
  "runArgs": [
  "--rm",
  "--name",
- "${localEnv:USER}-rapids-${localWorkspaceFolderBasename}-24.06-cuda12.2-pip"
+ "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.06-cuda12.2-pip"
  ],
  "hostRequirements": {"gpu": "optional"},
  "features": {
- "ghcr.io/rapidsai/devcontainers/features/ucx:24.6": {
- "version": "1.15.0"
- },
  "ghcr.io/rapidsai/devcontainers/features/cuda:24.6": {
  "version": "12.2",
  "installcuBLAS": true,
@@ -28,7 +25,6 @@
  "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.6": {}
  },
  "overrideFeatureInstallOrder": [
- "ghcr.io/rapidsai/devcontainers/features/ucx",
  "ghcr.io/rapidsai/devcontainers/features/cuda",
  "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils"
  ],

@@ -196,5 +196,5 @@ jobs:
  extra-repo-deploy-key: CUGRAPH_OPS_SSH_PRIVATE_DEPLOY_KEY
  build_command: |
  sccache -z;
- build-all --verbose -j$(nproc --ignore=1);
+ build-all --verbose -j$(nproc --ignore=1) -DBUILD_CUGRAPH_MG_TESTS=ON;
  sccache -s;
@@ -92,14 +92,14 @@ set(CUGRAPH_CXX_FLAGS "")
 set(CUGRAPH_CUDA_FLAGS "")
 
 if(CMAKE_COMPILER_IS_GNUCXX)
- list(APPEND CUGRAPH_CXX_FLAGS -Werror -Wno-error=deprecated-declarations)
+ list(APPEND CUGRAPH_CXX_FLAGS -Werror -Wno-error=deprecated-declarations -Wno-deprecated-declarations -DRAFT_HIDE_DEPRECATION_WARNINGS)
 endif(CMAKE_COMPILER_IS_GNUCXX)
 
 
 message("-- Building for GPU_ARCHS = ${CMAKE_CUDA_ARCHITECTURES}")
 
 list(APPEND CUGRAPH_CUDA_FLAGS --expt-extended-lambda --expt-relaxed-constexpr)
-list(APPEND CUGRAPH_CUDA_FLAGS -Werror=cross-execution-space-call -Wno-deprecated-declarations -Xptxas=--disable-warnings)
+list(APPEND CUGRAPH_CUDA_FLAGS -Werror=cross-execution-space-call -Wno-deprecated-declarations -DRAFT_HIDE_DEPRECATION_WARNINGS -Xptxas=--disable-warnings)
 list(APPEND CUGRAPH_CUDA_FLAGS -Xcompiler=-Wall,-Wno-error=sign-compare,-Wno-error=unused-but-set-variable)
 list(APPEND CUGRAPH_CUDA_FLAGS -Xfatbin=-compress-all)
 

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -78,13 +78,14 @@ struct indirection_t {
 
 template <typename index_t, typename Iterator>
 struct indirection_if_idx_valid_t {
+ using value_type = typename thrust::iterator_traits<Iterator>::value_type;
  Iterator first{};
  index_t invalid_idx{};
- typename thrust::iterator_traits<Iterator>::value_type invalid_value{};
+ value_type invalid_value{};
 
- __device__ typename thrust::iterator_traits<Iterator>::value_type operator()(index_t i) const
+ __device__ value_type operator()(index_t i) const
  {
- return (i != invalid_idx) ? *(first + i) : invalid_value;
+ return (i != invalid_idx) ? static_cast<value_type>(*(first + i)) : invalid_value;
  }
 };
 

@@ -20,6 +20,7 @@
 
 #include <raft/core/handle.hpp>
 
+#include <cuda/functional>
 #include <thrust/copy.h>
 #include <thrust/functional.h>
 #include <thrust/iterator/counting_iterator.h>
@@ -160,13 +161,13 @@ size_t count_set_bits(raft::handle_t const& handle, MaskIterator mask_first, siz
  handle.get_thrust_policy(),
  thrust::make_counting_iterator(size_t{0}),
  thrust::make_counting_iterator(packed_bool_size(num_bits)),
- [mask_first, num_bits] __device__(size_t i) {
+ cuda::proclaim_return_type<size_t>([mask_first, num_bits] __device__(size_t i) -> size_t {
  auto word = *(mask_first + i);
  if ((i + 1) * packed_bools_per_word() > num_bits) {
  word &= packed_bool_partial_mask(num_bits % packed_bools_per_word());
  }
  return static_cast<size_t>(__popc(word));
- },
+ }),
  size_t{0},
  thrust::plus<size_t>{});
 }

diff --git a/cpp/libcugraph_etl/cmake/thirdparty/get_cudf.cmake b/cpp/libcugraph_etl/cmake/thirdparty/get_cudf.cmake
@@ -1,5 +1,5 @@
 #=============================================================================
-# Copyright (c) 2021, NVIDIA CORPORATION.
+# Copyright (c) 2021-2024, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -42,6 +42,6 @@ set(CUGRAPH_ETL_BRANCH_VERSION_cudf "${CUGRAPH_ETL_VERSION_MAJOR}.${CUGRAPH_ETL_
 # To use a different RAFT locally, set the CMake variable
 # RPM_cudf_SOURCE=/path/to/local/cudf
 find_and_configure_cudf(VERSION ${CUGRAPH_ETL_MIN_VERSION_cudf}
- FORK rapidsai
- PINNED_TAG branch-${CUGRAPH_ETL_BRANCH_VERSION_cudf}
+ FORK trxcllnt
+ PINNED_TAG fix/cccl-2.5
  )
@@ -29,6 +29,7 @@
 #include <cugraph/detail/utility_wrappers.hpp>
 #include <cugraph/graph_functions.hpp>
 
+#include <cuda/functional>
 #include <thrust/binary_search.h>
 #include <thrust/execution_policy.h>
 #include <thrust/functional.h>
@@ -178,7 +179,7 @@ weight_t compute_modularity(
  handle.get_thrust_policy(),
  cluster_weights.begin(),
  cluster_weights.end(),
- [] __device__(weight_t p) { return p * p; },
+ cuda::proclaim_return_type<weight_t>([] __device__(weight_t p) -> weight_t { return p * p; }),
  weight_t{0},
  thrust::plus<weight_t>());
 

@@ -22,13 +22,15 @@
 
 #include <cugraph/dendrogram.hpp>
 #include <cugraph/legacy/graph.hpp>
+
 #ifdef TIMING
 #include <cugraph/utilities/high_res_timer.hpp>
 #endif
 
 #include <rmm/device_uvector.hpp>
 #include <rmm/exec_policy.hpp>
 
+#include <cuda/functional>
 #include <thrust/copy.h>
 #include <thrust/distance.h>
 #include <thrust/execution_policy.h>
@@ -141,12 +143,13 @@ class Louvain {
  handle_.get_thrust_policy(),
  thrust::make_counting_iterator(0),
  thrust::make_counting_iterator(graph.number_of_vertices),
- [d_deg = deg.data(), d_inc = inc.data(), total_edge_weight, resolution] __device__(
- vertex_t community) {
- return ((d_inc[community] / total_edge_weight) - resolution *
- (d_deg[community] * d_deg[community]) /
- (total_edge_weight * total_edge_weight));
- },
+ cuda::proclaim_return_type<weight_t>(
+ [d_deg = deg.data(), d_inc = inc.data(), total_edge_weight, resolution] __device__(
+ vertex_t community) -> weight_t {
+ return ((d_inc[community] / total_edge_weight) -
+ resolution * (d_deg[community] * d_deg[community]) /
+ (total_edge_weight * total_edge_weight));
+ }),
  weight_t{0.0},
  thrust::plus<weight_t>());
 

@@ -34,6 +34,7 @@
 
 #include <rmm/device_uvector.hpp>
 
+#include <cuda/functional>
 #include <thrust/binary_search.h>
 #include <thrust/copy.h>
 #include <thrust/distance.h>
@@ -400,9 +401,10 @@ void weakly_connected_components_impl(raft::handle_t const& handle,
  handle.get_thrust_policy(),
  new_root_candidates.begin(),
  new_root_candidates.begin() + (new_root_candidates.size() > 0 ? 1 : 0),
- [vertex_partition, degrees = degrees.data()] __device__(auto v) {
- return degrees[vertex_partition.local_vertex_partition_offset_from_vertex_nocheck(v)];
- },
+ cuda::proclaim_return_type<edge_t>(
+ [vertex_partition, degrees = degrees.data()] __device__(auto v) -> edge_t {
+ return degrees[vertex_partition.local_vertex_partition_offset_from_vertex_nocheck(v)];
+ }),
  edge_t{0},
  thrust::plus<edge_t>{});
 
@@ -642,9 +644,10 @@ void weakly_connected_components_impl(raft::handle_t const& handle,
  handle.get_thrust_policy(),
  thrust::get<0>(vertex_frontier.bucket(bucket_idx_cur).begin().get_iterator_tuple()),
  thrust::get<0>(vertex_frontier.bucket(bucket_idx_cur).end().get_iterator_tuple()),
- [vertex_partition, degrees = degrees.data()] __device__(auto v) {
- return degrees[vertex_partition.local_vertex_partition_offset_from_vertex_nocheck(v)];
- },
+ cuda::proclaim_return_type<edge_t>(
+ [vertex_partition, degrees = degrees.data()] __device__(auto v) -> edge_t {
+ return degrees[vertex_partition.local_vertex_partition_offset_from_vertex_nocheck(v)];
+ }),
  edge_t{0},
  thrust::plus<edge_t>());
 

@@ -21,6 +21,7 @@
 
 #include <rmm/exec_policy.hpp>
 
+#include <cuda/functional>
 #include <thrust/count.h>
 #include <thrust/distance.h>
 #include <thrust/functional.h>
@@ -139,7 +140,8 @@ vertex_t compute_maximum_vertex_id(rmm::cuda_stream_view const& stream_view,
  rmm::exec_policy(stream_view),
  edge_first,
  edge_first + num_edges,
- [] __device__(auto e) { return std::max(thrust::get<0>(e), thrust::get<1>(e)); },
+ cuda::proclaim_return_type<vertex_t>(
+ [] __device__(auto e) -> vertex_t { return std::max(thrust::get<0>(e), thrust::get<1>(e)); }),
  vertex_t{0},
  thrust::maximum<vertex_t>());
 }

@@ -17,6 +17,7 @@
 
 #include "prims/detail/optional_dataframe_buffer.hpp"
 
+#include <cugraph/graph.hpp>
 #include <cugraph/utilities/dataframe_buffer.hpp>
 #include <cugraph/utilities/device_functors.cuh>
 
@@ -945,6 +946,30 @@ class kv_store_t {
  {
  }
 
+ /* when use_binary_search = true */
+ template <bool binary_search = use_binary_search>
+ kv_store_t(rmm::device_uvector<key_t>&& keys,
+ decltype(allocate_dataframe_buffer<value_t>(0, rmm::cuda_stream_view{}))&& values,
+ decltype(cugraph::invalid_idx<key_t>::value)
+ invalid_value /* invalid_value is returned when match fails for the given key */,
+ bool key_sorted /* if set to true, assume that the input data is sorted and skip
+ sorting (which is necessary for binary-search) */
+ ,
+ rmm::cuda_stream_view stream,
+ std::enable_if_t<binary_search && is_thrust_tuple<value_t>::value, int32_t> = 0)
+ : store_(
+ std::move(keys),
+ std::move(values),
+ [=]() {
+ auto invalid_row = value_t{};
+ thrust::get<0>(invalid_row) = invalid_value;
+ return invalid_row;
+ }(),
+ key_sorted,
+ stream)
+ {
+ }
+
  /* when use binary_search = false, this requires that the capacity is large enough */
  template <typename KeyIterator, typename ValueIterator, bool binary_search = use_binary_search>
  std::enable_if_t<!binary_search, void> insert(KeyIterator key_first,

@@ -353,7 +353,7 @@ edge_t count_edge_partition_multi_edges(
  execution_policy,
  thrust::make_counting_iterator(edge_partition.major_range_first()) + (*segment_offsets)[2],
  thrust::make_counting_iterator(edge_partition.major_range_first()) + (*segment_offsets)[3],
- [edge_partition] __device__(auto major) {
+ cuda::proclaim_return_type<edge_t>([edge_partition] __device__(auto major) -> edge_t {
  auto major_offset = edge_partition.major_offset_from_major_nocheck(major);
  vertex_t const* indices{nullptr};
  [[maybe_unused]] edge_t edge_offset{};
@@ -365,7 +365,7 @@ edge_t count_edge_partition_multi_edges(
  if (indices[i - 1] == indices[i]) { ++count; }
  }
  return count;
- },
+ }),
  edge_t{0},
  thrust::plus<edge_t>{});
  }
@@ -374,19 +374,21 @@ edge_t count_edge_partition_multi_edges(
  execution_policy,
  thrust::make_counting_iterator(vertex_t{0}),
  thrust::make_counting_iterator(*(edge_partition.dcs_nzd_vertex_count())),
- [edge_partition, major_start_offset = (*segment_offsets)[3]] __device__(auto idx) {
- auto major_idx =
- major_start_offset + idx; // major_offset != major_idx in the hypersparse region
- vertex_t const* indices{nullptr};
- [[maybe_unused]] edge_t edge_offset{};
- edge_t local_degree{};
- thrust::tie(indices, edge_offset, local_degree) = edge_partition.local_edges(major_idx);
- edge_t count{0};
- for (edge_t i = 1; i < local_degree; ++i) { // assumes neighbors are sorted
- if (indices[i - 1] == indices[i]) { ++count; }
- }
- return count;
- },
+ cuda::proclaim_return_type<edge_t>(
+ [edge_partition,
+ major_start_offset = (*segment_offsets)[3]] __device__(auto idx) -> edge_t {
+ auto major_idx =
+ major_start_offset + idx; // major_offset != major_idx in the hypersparse region
+ vertex_t const* indices{nullptr};
+ [[maybe_unused]] edge_t edge_offset{};
+ edge_t local_degree{};
+ thrust::tie(indices, edge_offset, local_degree) = edge_partition.local_edges(major_idx);
+ edge_t count{0};
+ for (edge_t i = 1; i < local_degree; ++i) { // assumes neighbors are sorted
+ if (indices[i - 1] == indices[i]) { ++count; }
+ }
+ return count;
+ }),
  edge_t{0},
  thrust::plus<edge_t>{});
  }
@@ -398,7 +400,7 @@ edge_t count_edge_partition_multi_edges(
  thrust::make_counting_iterator(edge_partition.major_range_first()),
  thrust::make_counting_iterator(edge_partition.major_range_first()) +
  edge_partition.major_range_size(),
- [edge_partition] __device__(auto major) {
+ cuda::proclaim_return_type<edge_t>([edge_partition] __device__(auto major) -> edge_t {
  auto major_offset = edge_partition.major_offset_from_major_nocheck(major);
  vertex_t const* indices{nullptr};
  [[maybe_unused]] edge_t edge_offset{};
@@ -409,7 +411,7 @@ edge_t count_edge_partition_multi_edges(
  if (indices[i - 1] == indices[i]) { ++count; }
  }
  return count;
- },
+ }),
  edge_t{0},
  thrust::plus<edge_t>{});
  }