Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Forward-merge branch-23.12 to branch-24.02 #5657

Merged
merged 5 commits into from
Nov 17, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ programming. In most cases, cuML's Python API matches the API from

For large datasets, these GPU-based implementations can complete 10-50x faster
than their CPU equivalents. For details on performance, see the [cuML Benchmarks
Notebook](https://github.com/rapidsai/cuml/tree/branch-23.04/notebooks/tools).
Notebook](https://github.com/rapidsai/cuml/tree/branch-24.02/notebooks/tools).

As an example, the following Python snippet loads input and computes DBSCAN clusters, all on GPU, using cuDF:
```python
Expand Down Expand Up @@ -74,7 +74,7 @@ neighbors = nn.kneighbors(df)
For additional examples, browse our complete [API
documentation](https://docs.rapids.ai/api/cuml/stable/), or check out our
example [walkthrough
notebooks](https://github.com/rapidsai/cuml/tree/branch-23.04/notebooks). Finally, you
notebooks](https://github.com/rapidsai/cuml/tree/branch-24.02/notebooks). Finally, you
can find complete end-to-end examples in the [notebooks-contrib
repo](https://github.com/rapidsai/notebooks-contrib).

Expand Down
4 changes: 4 additions & 0 deletions ci/build_wheel.sh
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,10 @@ for dep in cudf pylibraft raft-dask rmm; do
sed -r -i "s/${dep}==(.*)\"/${dep}${PACKAGE_CUDA_SUFFIX}==\1${alpha_spec}\"/g" ${pyproject_file}
done

for dep in dask-cuda rapids-dask-dependency; do
sed -r -i "s/${dep}==(.*)\"/${dep}==\1${alpha_spec}\"/g" ${pyproject_file}
done

if [[ $PACKAGE_CUDA_SUFFIX == "-cu12" ]]; then
sed -i "s/cuda-python[<=>\.,0-9]*/cuda-python>=12.0,<13.0/g" ${pyproject_file}
sed -i "s/cupy-cuda11x/cupy-cuda12x/g" ${pyproject_file}
Expand Down
10 changes: 3 additions & 7 deletions ci/release/update-version.sh
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ DEPENDENCIES=(
librmm
pylibraft
raft-dask
rapids-dask-dependency
rmm
)
for FILE in dependencies.yaml conda/environments/*.yaml; do
Expand All @@ -81,17 +82,12 @@ for FILE in dependencies.yaml conda/environments/*.yaml; do
done
done

sed_runner "s|/branch-.*?/|/branch-${NEXT_SHORT_TAG}/|g" README.md
sed_runner "s|/branch-.*?/|/branch-${NEXT_SHORT_TAG}/|g" python/README.md
sed_runner "s|/branch-[^/]*/|/branch-${NEXT_SHORT_TAG}/|g" README.md
sed_runner "s|/branch-[^/]*/|/branch-${NEXT_SHORT_TAG}/|g" python/README.md

# Wheel builds clone cumlprims_mg, update its branch
sed_runner "s/extra-repo-sha: branch-.*/extra-repo-sha: branch-${NEXT_SHORT_TAG}/g" .github/workflows/*.yaml

# Wheel builds install dask-cuda from source, update its branch
for FILE in .github/workflows/*.yaml; do
sed_runner "s/dask-cuda.git@branch-[^\"\s]\+/dask-cuda.git@branch-${NEXT_SHORT_TAG}/g" ${FILE};
done

# CI files
for FILE in .github/workflows/*.yaml; do
sed_runner "/shared-workflows/ s/@.*/@branch-${NEXT_SHORT_TAG}/g" "${FILE}"
Expand Down
3 changes: 0 additions & 3 deletions ci/test_wheel.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,6 @@ if [[ "$(arch)" == "aarch64" ]]; then
python -m pip install cmake
fi

# Always install latest dask for testing
python -m pip install git+https://github.com/dask/dask.git@main git+https://github.com/dask/distributed.git@main git+https://github.com/rapidsai/dask-cuda.git@branch-24.02

# echo to expand wildcard before adding `[extra]` requires for pip
python -m pip install $(echo ./dist/cuml*.whl)[test]

Expand Down
4 changes: 1 addition & 3 deletions conda/environments/all_cuda-118_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,9 @@ dependencies:
- cupy>=12.0.0
- cxx-compiler
- cython>=3.0.0
- dask-core>=2023.9.2
- dask-cuda==24.2.*
- dask-cudf==24.2.*
- dask-ml
- dask>=2023.9.2
- distributed>=2023.9.2
- doxygen=1.9.1
- gcc_linux-64=11.*
- gmock>=1.13.0
Expand Down Expand Up @@ -63,6 +60,7 @@ dependencies:
- pytest-xdist
- python>=3.9,<3.11
- raft-dask==24.2.*
- rapids-dask-dependency==24.2.*
- recommonmark
- rmm==24.2.*
- scikit-build>=0.13.1
Expand Down
4 changes: 1 addition & 3 deletions conda/environments/all_cuda-120_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,9 @@ dependencies:
- cupy>=12.0.0
- cxx-compiler
- cython>=3.0.0
- dask-core>=2023.9.2
- dask-cuda==24.2.*
- dask-cudf==24.2.*
- dask-ml
- dask>=2023.9.2
- distributed>=2023.9.2
- doxygen=1.9.1
- gcc_linux-64=11.*
- gmock>=1.13.0
Expand Down Expand Up @@ -59,6 +56,7 @@ dependencies:
- pytest-xdist
- python>=3.9,<3.11
- raft-dask==24.2.*
- rapids-dask-dependency==24.2.*
- recommonmark
- rmm==24.2.*
- scikit-build>=0.13.1
Expand Down
4 changes: 1 addition & 3 deletions conda/recipes/cuml/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -76,15 +76,13 @@ requirements:
- cudf ={{ minor_version }}
- cupy >=12.0.0
- dask-cudf ={{ minor_version }}
- dask >=2023.9.2
- dask-core>=2023.9.2
- distributed >=2023.9.2
- joblib >=0.11
- libcuml ={{ version }}
- libcumlprims ={{ minor_version }}
- pylibraft ={{ minor_version }}
- python x.x
- raft-dask ={{ minor_version }}
- rapids-dask-dependency ={{ minor_version }}
- treelite {{ treelite_version }}

tests:
Expand Down
6 changes: 1 addition & 5 deletions dependencies.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -175,24 +175,20 @@ dependencies:
- output_types: [conda, requirements, pyproject]
packages:
- cudf==24.2.*
- dask>=2023.9.2
- dask-cuda==24.2.*
- dask-cudf==24.2.*
- distributed>=2023.9.2
- joblib>=0.11
- numba>=0.57
# TODO: Is scipy really a hard dependency, or should
# we make it optional (i.e. an extra for pip
# installation/run_constrained for conda)?
- scipy>=1.8.0
- raft-dask==24.2.*
- rapids-dask-dependency==24.2.*
- *treelite
- output_types: [conda, requirements]
packages:
- cupy>=12.0.0
- output_types: conda
packages:
- dask-core>=2023.9.2
- output_types: pyproject
packages:
- *treelite_runtime
Expand Down
7 changes: 3 additions & 4 deletions python/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ example `setup.py --singlegpu`) are:

RAFT's Python and Cython is located in the [RAFT repository](https://github.com/rapidsai/raft/python). It was designed to be included in projects as opposed to be distributed by itself, so at build time, **setup.py creates a symlink from cuML, located in `/python/cuml/raft/` to the Python folder of RAFT**.

For developers that need to modify RAFT code, please refer to the [RAFT Developer Guide](https://github.com/rapidsai/raft/blob/branch-23.04/BUILD.md#developer-guide) for recommendations.
For developers that need to modify RAFT code, please refer to the [RAFT Developer Guide](https://github.com/rapidsai/raft/blob/branch-24.02/docs/source/build.md) for recommendations.

To configure RAFT at build time:

Expand All @@ -50,7 +50,7 @@ The RAFT Python code gets included in the cuML build and distributable artifacts

### Build Requirements

cuML's convenience [development yaml files](https://github.com/rapidsai/cuml/tree/branch-23.04/environments) includes all dependencies required to build cuML.
cuML's convenience [development yaml files](https://github.com/rapidsai/cuml/tree/branch-24.02/environments) includes all dependencies required to build cuML.

To build cuML's Python package, the following dependencies are required:

Expand All @@ -70,8 +70,7 @@ Packages required for multigpu algorithms*:
- ucx-py version matching the cuML version
- dask-cudf version matching the cuML version
- nccl>=2.5
- dask>=2023.9.2
- distributed>=2023.9.2
- rapids-dask-dependency version matching the cuML version

* this can be avoided with `--singlegpu` argument flag.

Expand Down
18 changes: 9 additions & 9 deletions python/cuml/preprocessing/LabelEncoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,19 +171,19 @@ def fit(self, y, _classes=None):
A fitted instance of itself to allow method chaining

"""
if _classes is None:
y = self._to_cudf_series(y)

self._validate_keywords()

self.dtype = y.dtype if y.dtype != cp.dtype("O") else str
if _classes is not None:
self.classes_ = _classes
else:
self.classes_ = y.drop_duplicates().sort_values(
ignore_index=True
if _classes is None:
y = (
self._to_cudf_series(y)
.drop_duplicates()
.sort_values(ignore_index=True)
) # dedupe and sort
self.classes_ = y
else:
self.classes_ = _classes

self.dtype = y.dtype if y.dtype != cp.dtype("O") else str
self._fitted = True
return self

Expand Down
4 changes: 1 addition & 3 deletions python/cuml/testing/strategies.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,9 +187,7 @@ def create_cuml_array_input(input_type, dtype, shape, order):

input_type = "cupy" if input_type is None else input_type

multidimensional = (
isinstance(shape, tuple) and len([d for d in shape if d > 1]) > 1
)
multidimensional = isinstance(shape, tuple) and len(shape) > 1
assume(
not (
input_type == "series"
Expand Down
2 changes: 1 addition & 1 deletion python/cuml/tests/test_input_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -396,7 +396,7 @@ def get_input(
result = cudf.DataFrame(rand_mat, index=index)

if type == "cudf-series":
result = cudf.Series(rand_mat, index=index)
result = cudf.Series(rand_mat.reshape(nrows), index=index)

if type == "pandas":
result = pdDF(cp.asnumpy(rand_mat), index=index)
Expand Down
3 changes: 1 addition & 2 deletions python/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -61,11 +61,10 @@ dependencies = [
"cupy-cuda11x>=12.0.0",
"dask-cuda==24.2.*",
"dask-cudf==24.2.*",
"dask>=2023.9.2",
"distributed>=2023.9.2",
"joblib>=0.11",
"numba>=0.57",
"raft-dask==24.2.*",
"rapids-dask-dependency==24.2.*",
"scipy>=1.8.0",
"treelite==3.9.1",
"treelite_runtime==3.9.1",
Expand Down