Skip to content

Commit

Permalink
Forward merge branch-24.06 into branch-24.08 (#4489)
Browse files Browse the repository at this point in the history
  • Loading branch information
nv-rliu authored Jun 19, 2024
1 parent bc0771e commit f519ac1
Show file tree
Hide file tree
Showing 18 changed files with 110 additions and 28 deletions.
8 changes: 7 additions & 1 deletion ci/build_wheel.sh
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,13 @@ fi

cd "${package_dir}"

python -m pip wheel . -w dist -vvv --no-deps --disable-pip-version-check
python -m pip wheel \
-w dist \
-vvv \
--no-deps \
--disable-pip-version-check \
--extra-index-url https://pypi.nvidia.com \
.

# pure-python packages should be marked as pure, and not have auditwheel run on them.
if [[ ${package_name} == "nx-cugraph" ]] || \
Expand Down
6 changes: 5 additions & 1 deletion ci/build_wheel_cugraph.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/bin/bash
# Copyright (c) 2023, NVIDIA CORPORATION.
# Copyright (c) 2023-2024, NVIDIA CORPORATION.

set -euo pipefail

Expand All @@ -12,6 +12,10 @@ RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
RAPIDS_PY_WHEEL_NAME=pylibcugraph_${RAPIDS_PY_CUDA_SUFFIX} rapids-download-wheels-from-s3 ./local-pylibcugraph
export PIP_FIND_LINKS=$(pwd)/local-pylibcugraph

PARALLEL_LEVEL=$(python -c \
"from math import ceil; from multiprocessing import cpu_count; print(ceil(cpu_count()/4))")

export SKBUILD_CMAKE_ARGS="-DDETECT_CONDA_ENV=OFF;-DFIND_CUGRAPH_CPP=OFF;-DCPM_cugraph-ops_SOURCE=${GITHUB_WORKSPACE}/cugraph-ops/"
export SKBUILD_BUILD_TOOL_ARGS="-j${PARALLEL_LEVEL};-l${PARALLEL_LEVEL}"

./ci/build_wheel.sh cugraph python/cugraph
6 changes: 5 additions & 1 deletion ci/build_wheel_pylibcugraph.sh
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
#!/bin/bash
# Copyright (c) 2023, NVIDIA CORPORATION.
# Copyright (c) 2023-2024, NVIDIA CORPORATION.

set -euo pipefail

PARALLEL_LEVEL=$(python -c \
"from math import ceil; from multiprocessing import cpu_count; print(ceil(cpu_count()/4))")

export SKBUILD_CMAKE_ARGS="-DDETECT_CONDA_ENV=OFF;-DFIND_CUGRAPH_CPP=OFF;-DCPM_cugraph-ops_SOURCE=${GITHUB_WORKSPACE}/cugraph-ops/"
export SKBUILD_BUILD_TOOL_ARGS="-j${PARALLEL_LEVEL};-l${PARALLEL_LEVEL}"

./ci/build_wheel.sh pylibcugraph python/pylibcugraph
15 changes: 5 additions & 10 deletions ci/test_python.sh
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ rapids-mamba-retry install \
rapids-logger "Check GPU usage"
nvidia-smi

export LD_PRELOAD="${CONDA_PREFIX}/lib/libgomp.so.1"

# RAPIDS_DATASET_ROOT_DIR is used by test scripts
export RAPIDS_DATASET_ROOT_DIR="$(realpath datasets)"
pushd "${RAPIDS_DATASET_ROOT_DIR}"
Expand Down Expand Up @@ -191,6 +193,8 @@ if [[ "${RAPIDS_CUDA_VERSION}" == "11.8.0" ]]; then
conda activate test_cugraph_pyg
set -u

rapids-print-env

# TODO re-enable logic once CUDA 12 is testable
#if [[ "${RAPIDS_CUDA_VERSION}" == "11.8.0" ]]; then
CONDA_CUDA_VERSION="11.8"
Expand All @@ -204,18 +208,9 @@ if [[ "${RAPIDS_CUDA_VERSION}" == "11.8.0" ]]; then
rapids-mamba-retry install \
--channel "${CPP_CHANNEL}" \
--channel "${PYTHON_CHANNEL}" \
--channel pytorch \
--channel pyg \
--channel nvidia \
"cugraph-pyg" \
"pytorch=2.1.0" \
"pytorch-cuda=${CONDA_CUDA_VERSION}"

# Install pyg dependencies (which requires pip)

pip install \
ogb \
tensordict
"ogb"

pip install \
pyg_lib \
Expand Down
1 change: 0 additions & 1 deletion ci/test_wheel_cugraph-pyg.sh
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@ rapids-retry python -m pip install \
pyg_lib \
torch_scatter \
torch_sparse \
tensordict \
-f ${PYG_URL}

rapids-logger "pytest cugraph-pyg (single GPU)"
Expand Down
7 changes: 6 additions & 1 deletion ci/test_wheel_nx-cugraph.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
#!/bin/bash
# Copyright (c) 2023, NVIDIA CORPORATION.
# Copyright (c) 2023-2024, NVIDIA CORPORATION.

set -eoxu pipefail

# Download wheels built during this job.
RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
RAPIDS_PY_WHEEL_NAME="pylibcugraph_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./local-deps
python -m pip install ./local-deps/*.whl

./ci/test_wheel.sh nx-cugraph python/nx-cugraph
3 changes: 2 additions & 1 deletion conda/environments/all_cuda-118_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ dependencies:
- numpy>=1.23,<2.0a0
- numpydoc
- nvcc_linux-64=11.8
- openmpi
- openmpi<5.0.3
- packaging>=21
- pandas
- pre-commit
Expand All @@ -56,6 +56,7 @@ dependencies:
- pytest-mpl
- pytest-xdist
- python-louvain
- pytorch>=2.0,<2.2.0a0
- raft-dask==24.8.*
- rapids-dask-dependency==24.8.*
- recommonmark
Expand Down
3 changes: 2 additions & 1 deletion conda/environments/all_cuda-122_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ dependencies:
- numba>=0.57
- numpy>=1.23,<2.0a0
- numpydoc
- openmpi
- openmpi<5.0.3
- packaging>=21
- pandas
- pre-commit
Expand All @@ -61,6 +61,7 @@ dependencies:
- pytest-mpl
- pytest-xdist
- python-louvain
- pytorch>=2.0,<2.2.0a0
- raft-dask==24.8.*
- rapids-dask-dependency==24.8.*
- recommonmark
Expand Down
2 changes: 1 addition & 1 deletion conda/recipes/libcugraph/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ requirements:
- {{ compiler('cxx') }}
- cmake {{ cmake_version }}
- ninja
- openmpi # Required for building cpp-mgtests (multi-GPU tests)
- openmpi<5.0.3 # Required for building cpp-mgtests (multi-GPU tests)
- {{ stdlib("c") }}
host:
{% if cuda_major == "11" %}
Expand Down
29 changes: 27 additions & 2 deletions dependencies.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ files:
- depends_on_pylibcugraphops
- depends_on_pylibwholegraph
- depends_on_cupy
- depends_on_pytorch
- python_run_cugraph
- python_run_nx_cugraph
- python_run_cugraph_dgl
Expand Down Expand Up @@ -62,6 +63,7 @@ files:
- cuda_version
- depends_on_cudf
- depends_on_pylibwholegraph
- depends_on_pytorch
- py_version
- test_python_common
- test_python_cugraph
Expand Down Expand Up @@ -177,6 +179,7 @@ files:
includes:
- test_python_common
- depends_on_pylibwholegraph
- depends_on_pytorch
py_build_cugraph_pyg:
output: pyproject
pyproject_dir: python/cugraph-pyg
Expand All @@ -201,6 +204,7 @@ files:
includes:
- test_python_common
- depends_on_pylibwholegraph
- depends_on_pytorch
py_build_cugraph_equivariant:
output: pyproject
pyproject_dir: python/cugraph-equivariant
Expand Down Expand Up @@ -362,7 +366,7 @@ dependencies:
- libraft-headers==24.8.*
- libraft==24.8.*
- librmm==24.8.*
- openmpi # Required for building cpp-mgtests (multi-GPU tests)
- openmpi<5.0.3 # Required for building cpp-mgtests (multi-GPU tests)
specific:
- output_types: [conda]
matrices:
Expand Down Expand Up @@ -568,9 +572,30 @@ dependencies:
- cugraph==24.8.*
- pytorch>=2.0
- pytorch-cuda==11.8
- tensordict>=0.1.2
- &tensordict tensordict>=0.1.2
- pyg>=2.5,<2.6

depends_on_pytorch:
common:
- output_types: [conda]
packages:
- &pytorch_conda pytorch>=2.0,<2.2.0a0

specific:
- output_types: [requirements, pyproject]
matrices:
- matrix: {cuda: "12.*"}
packages:
- &pytorch_pip torch>=2.0,<2.2.0a0
- *tensordict
- --extra-index-url=https://download.pytorch.org/whl/cu121
- matrix: {cuda: "11.*"}
packages:
- *pytorch_pip
- *tensordict
- --extra-index-url=https://download.pytorch.org/whl/cu118
- {matrix: null, packages: [*pytorch_pip, *tensordict]}

depends_on_pylibwholegraph:
common:
- output_types: conda
Expand Down
2 changes: 2 additions & 0 deletions python/cugraph-dgl/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@ test = [
"pytest-cov",
"pytest-xdist",
"scipy",
"tensordict>=0.1.2",
"torch>=2.0,<2.2.0a0",
] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.

[project.urls]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,15 @@

from cugraph_equivariant.utils import scatter_reduce

from pylibcugraphops.pytorch.operators import FusedFullyConnectedTensorProduct
try:
from pylibcugraphops.pytorch.operators import FusedFullyConnectedTensorProduct
except ImportError as exc:
raise RuntimeError(
"FullyConnectedTensorProductConv is no longer supported in "
"cugraph-equivariant starting from version 24.08. It will be migrated "
"to the new `cuequivariance` package. Please use 24.06 release for the "
"legacy interface."
) from exc


class FullyConnectedTensorProductConv(nn.Module):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,14 @@
import torch
from torch import nn
from e3nn import o3
from cugraph_equivariant.nn import FullyConnectedTensorProductConv

try:
from cugraph_equivariant.nn import FullyConnectedTensorProductConv
except RuntimeError:
pytest.skip(
"Migrated to cuequivariance package starting from 24.08.",
allow_module_level=True,
)

device = torch.device("cuda:0")

Expand Down
2 changes: 2 additions & 0 deletions python/cugraph-pyg/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@ test = [
"pytest-cov",
"pytest-xdist",
"scipy",
"tensordict>=0.1.2",
"torch>=2.0,<2.2.0a0",
] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.

[tool.setuptools]
Expand Down
24 changes: 19 additions & 5 deletions python/cugraph/cugraph/gnn/data_loading/dist_sampler.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,12 @@

from typing import Union, List, Dict, Tuple, Iterator, Optional

from cugraph.utilities import import_optional
from cugraph.utilities.utils import import_optional, MissingModule
from cugraph.gnn.comms import cugraph_comms_get_raft_handle

from cugraph.gnn.data_loading.bulk_sampler_io import create_df_from_disjoint_arrays

# PyTorch is NOT optional but this is required for container builds.
torch = import_optional("torch")

torch = MissingModule("torch")
TensorType = Union["torch.Tensor", cupy.ndarray, cudf.Series]


Expand All @@ -44,6 +42,8 @@ def __init__(
rank: Optional[int] = None,
filelist=None,
):
torch = import_optional("torch")

self.__format = format
self.__directory = directory

Expand Down Expand Up @@ -77,6 +77,8 @@ def __iter__(self):
return self

def __next__(self):
torch = import_optional("torch")

if len(self.__files) > 0:
f = self.__files.pop()
fname = f[0]
Expand Down Expand Up @@ -404,6 +406,7 @@ def get_reader(self) -> Iterator[Tuple[Dict[str, "torch.Tensor"], int, int]]:
"""
Returns an iterator over sampled data.
"""
torch = import_optional("torch")
rank = torch.distributed.get_rank() if self.is_multi_gpu else None
return self.__writer.get_reader(rank)

Expand Down Expand Up @@ -461,6 +464,8 @@ def get_label_list_and_output_rank(
label_to_output_comm_rank: TensorType
The global mapping of labels to ranks.
"""
torch = import_optional("torch")

world_size = torch.distributed.get_world_size()

if assume_equal_input_size:
Expand Down Expand Up @@ -528,6 +533,8 @@ def get_start_batch_offset(
and whether the input sizes on each rank are equal (bool).
"""
torch = import_optional("torch")

input_size_is_equal = True
if self.is_multi_gpu:
rank = torch.distributed.get_rank()
Expand Down Expand Up @@ -581,6 +588,8 @@ def sample_from_nodes(
random_state: int
The random seed to use for sampling.
"""
torch = import_optional("torch")

nodes = torch.as_tensor(nodes, device="cuda")

batches_per_call = self._local_seeds_per_call // batch_size
Expand Down Expand Up @@ -700,6 +709,8 @@ def __init__(
)

def __calc_local_seeds_per_call(self, local_seeds_per_call: Optional[int] = None):
torch = import_optional("torch")

if local_seeds_per_call is None:
if len([x for x in self.__fanout if x <= 0]) > 0:
return UniformNeighborSampler.UNKNOWN_VERTICES_DEFAULT
Expand All @@ -721,6 +732,7 @@ def sample_batches(
random_state: int = 0,
assume_equal_input_size: bool = False,
) -> Dict[str, TensorType]:
torch = import_optional("torch")
if self.is_multi_gpu:
rank = torch.distributed.get_rank()

Expand Down Expand Up @@ -800,7 +812,9 @@ def sample_batches(
compression=self.__compression,
compress_per_hop=self.__compress_per_hop,
retain_seeds=self._retain_original_seeds,
label_offsets=cupy.asarray(label_offsets),
label_offsets=None
if label_offsets is None
else cupy.asarray(label_offsets),
return_dict=True,
)

Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2023, NVIDIA CORPORATION.
# Copyright (c) 2023-2024, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
Expand Down Expand Up @@ -169,6 +169,7 @@ def test_bulk_sampler_io_empty_batch(scratch_dir):


@pytest.mark.sg
@pytest.mark.skip(reason="broken")
def test_bulk_sampler_io_mock_csr(scratch_dir):
major_offsets_array = cudf.Series([0, 5, 10, 15])
minors_array = cudf.Series([1, 2, 3, 4, 8, 9, 1, 3, 4, 5, 3, 0, 4, 9, 1])
Expand Down
4 changes: 4 additions & 0 deletions python/cugraph/cugraph/tests/sampling/test_dist_sampler.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,10 @@


torch = import_optional("torch")
if not isinstance(torch, MissingModule):
from rmm.allocators.torch import rmm_torch_allocator

torch.cuda.change_current_allocator(rmm_torch_allocator)


@pytest.fixture
Expand Down
Loading

0 comments on commit f519ac1

Please sign in to comment.