Skip to content

Commit

Permalink
Merge branch 'branch-24.10' into fea-remove-adaptor-factories
Browse files Browse the repository at this point in the history
  • Loading branch information
harrism committed Jul 31, 2024
2 parents 507777c + 2a79a83 commit 899590e
Show file tree
Hide file tree
Showing 4 changed files with 32 additions and 29 deletions.
2 changes: 1 addition & 1 deletion .devcontainer/cuda12.5-pip/devcontainer.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
"args": {
"CUDA": "12.5",
"PYTHON_PACKAGE_MANAGER": "pip",
"BASE": "rapidsai/devcontainers:24.10-cpp-cuda12.5-ucx1.15.0-openmpi-ubuntu22.04"
"BASE": "rapidsai/devcontainers:24.10-cpp-cuda12.5-ucx1.17.0-openmpi-ubuntu22.04"
}
},
"runArgs": [
Expand Down
13 changes: 7 additions & 6 deletions ci/release/update-version.sh
Original file line number Diff line number Diff line change
Expand Up @@ -69,18 +69,19 @@ DEPENDENCIES=(
pyraft
raft-dask
rmm
ucx-py
rapids-dask-dependency
)
for DEP in "${DEPENDENCIES[@]}"; do
for FILE in dependencies.yaml conda/environments/*.yaml python/cugraph-{pyg,dgl}/conda/*.yaml; do
for FILE in dependencies.yaml conda/environments/*.yaml python/cugraph-{pyg,dgl}/conda/*.yaml; do
for DEP in "${DEPENDENCIES[@]}"; do
sed_runner "/-.* ${DEP}\(-cu[[:digit:]]\{2\}\)\{0,1\}==/ s/==.*/==${NEXT_SHORT_TAG_PEP440}.*,>=0.0.0a0/g" "${FILE}"
sed_runner "/-.* ucx-py==/ s/==.*/==${NEXT_UCX_PY_VERSION}.*,>=0.0.0a0/g" "${FILE}"
done
for FILE in python/**/pyproject.toml python/**/**/pyproject.toml; do
sed_runner "/-.* ucx-py\(-cu[[:digit:]]\{2\}\)\{0,1\}==/ s/==.*/==${NEXT_UCX_PY_VERSION}.*,>=0.0.0a0/g" "${FILE}"
done
for FILE in python/**/pyproject.toml python/**/**/pyproject.toml; do
for DEP in "${DEPENDENCIES[@]}"; do
sed_runner "/\"${DEP}\(-cu[[:digit:]]\{2\}\)\{0,1\}==/ s/==.*\"/==${NEXT_SHORT_TAG_PEP440}.*,>=0.0.0a0\"/g" "${FILE}"
sed_runner "/\"ucx-py==/ s/==.*\"/==${NEXT_UCX_PY_VERSION}.*,>=0.0.0a0\"/g" "${FILE}"
done
sed_runner "/\"ucx-py\(-cu[[:digit:]]\{2\}\)\{0,1\}==/ s/==.*\"/==${NEXT_UCX_PY_VERSION}.*,>=0.0.0a0\"/g" "${FILE}"
done

# ucx-py version
Expand Down
4 changes: 2 additions & 2 deletions dependencies.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -506,12 +506,12 @@ dependencies:
cuda: "11.*"
cuda_suffixed: "true"
packages:
- &ucx_py_cu11 ucx-py-cu11==24.10.*,>=0.0.0a0
- &ucx_py_cu11 ucx-py-cu11==0.40.*,>=0.0.0a0
- matrix:
cuda: "12.*"
cuda_suffixed: "true"
packages:
- &ucx_py_cu12 ucx-py-cu12==24.10.*,>=0.0.0a0
- &ucx_py_cu12 ucx-py-cu12==0.40.*,>=0.0.0a0
- matrix:
packages:
- *ucx_py_unsuffixed
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

import pytest
import numpy as np
import os

from cugraph.gnn import FeatureStore

Expand All @@ -21,18 +22,23 @@
pylibwholegraph = import_optional("pylibwholegraph")
wmb = import_optional("pylibwholegraph.binding.wholememory_binding")
torch = import_optional("torch")
wgth = import_optional("pylibwholegraph.torch")


def runtest(world_rank: int, world_size: int):
from pylibwholegraph.torch.initialize import init_torch_env_and_create_wm_comm
def runtest(rank: int, world_size: int):
torch.cuda.set_device(rank)

wm_comm, _ = init_torch_env_and_create_wm_comm(
world_rank,
os.environ["MASTER_ADDR"] = "localhost"
os.environ["MASTER_PORT"] = "12355"
torch.distributed.init_process_group("nccl", rank=rank, world_size=world_size)

pylibwholegraph.torch.initialize.init(
rank,
world_size,
world_rank,
rank,
world_size,
)
wm_comm = wm_comm.wmb_comm
wm_comm = wgth.get_global_communicator()

generator = np.random.default_rng(62)
arr = (
Expand All @@ -52,36 +58,32 @@ def runtest(world_rank: int, world_size: int):
expected = arr[indices_to_fetch]
np.testing.assert_array_equal(output_fs.cpu().numpy(), expected)

wmb.finalize()
pylibwholegraph.torch.initialize.finalize()


@pytest.mark.sg
@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available")
@pytest.mark.skipif(
isinstance(pylibwholegraph, MissingModule), reason="wholegraph not available"
)
@pytest.mark.skip(reason="broken")
def test_feature_storage_wholegraph_backend():
from pylibwholegraph.utils.multiprocess import multiprocess_run
world_size = torch.cuda.device_count()
print("gpu count:", world_size)
assert world_size > 0

gpu_count = wmb.fork_get_gpu_count()
print("gpu count:", gpu_count)
assert gpu_count > 0
print("ignoring gpu count and running on 1 GPU only")

multiprocess_run(1, runtest)
torch.multiprocessing.spawn(runtest, args=(1,), nprocs=1)


@pytest.mark.mg
@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available")
@pytest.mark.skipif(
isinstance(pylibwholegraph, MissingModule), reason="wholegraph not available"
)
@pytest.mark.skip(reason="broken")
def test_feature_storage_wholegraph_backend_mg():
from pylibwholegraph.utils.multiprocess import multiprocess_run

gpu_count = wmb.fork_get_gpu_count()
print("gpu count:", gpu_count)
assert gpu_count > 0
world_size = torch.cuda.device_count()
print("gpu count:", world_size)
assert world_size > 0

multiprocess_run(gpu_count, runtest)
torch.multiprocessing.spawn(runtest, args=(world_size,), nprocs=world_size)

0 comments on commit 899590e

Please sign in to comment.