diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index 6cfbc14a6b..fb5a8f118b 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -24,7 +24,7 @@ dependencies: - fmt>=11.0.2,<12 - gcc_linux-64=11.* - graphviz -- hdbscan>=0.8.38,<0.8.39 +- hdbscan>=0.8.39,<0.8.40 - hypothesis>=6.0,<7 - ipykernel - ipython @@ -69,7 +69,6 @@ dependencies: - scikit-learn==1.5 - scipy>=1.8.0 - seaborn -- setuptools - spdlog>=1.14.1,<1.15 - sphinx-copybutton - sphinx-markdown-tables diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-125_arch-x86_64.yaml index e6e0d0c726..44a5164ba6 100644 --- a/conda/environments/all_cuda-125_arch-x86_64.yaml +++ b/conda/environments/all_cuda-125_arch-x86_64.yaml @@ -26,7 +26,7 @@ dependencies: - fmt>=11.0.2,<12 - gcc_linux-64=11.* - graphviz -- hdbscan>=0.8.38,<0.8.39 +- hdbscan>=0.8.39,<0.8.40 - hypothesis>=6.0,<7 - ipykernel - ipython @@ -65,7 +65,6 @@ dependencies: - scikit-learn==1.5 - scipy>=1.8.0 - seaborn -- setuptools - spdlog>=1.14.1,<1.15 - sphinx-copybutton - sphinx-markdown-tables diff --git a/dependencies.yaml b/dependencies.yaml index 8d6a028d90..a53a8c4d1a 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -512,7 +512,7 @@ dependencies: packages: - *cython - dask-ml - - hdbscan>=0.8.38,<0.8.39 + - hdbscan>=0.8.39,<0.8.40 - hypothesis>=6.0,<7 - nltk - numpydoc @@ -526,7 +526,6 @@ dependencies: - statsmodels - umap-learn==0.5.6 - pynndescent - - setuptools # Needed on Python 3.12 for dask-glm, which requires pkg_resources but Python 3.12 doesn't have setuptools by default test_notebooks: common: - output_types: [conda, requirements] diff --git a/python/cuml/cuml/cluster/hdbscan/prediction.pyx b/python/cuml/cuml/cluster/hdbscan/prediction.pyx index b3ef5b3587..169b26328b 100644 --- a/python/cuml/cuml/cluster/hdbscan/prediction.pyx +++ b/python/cuml/cuml/cluster/hdbscan/prediction.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2021-2023, NVIDIA CORPORATION. +# Copyright (c) 2021-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -166,19 +166,25 @@ def all_points_membership_vectors(clusterer, batch_size=4096): # trained on gpu if not hasattr(clusterer, "_cpu_model"): - # the reference HDBSCAN implementations uses @property - # for attributes without setters available for them, - # so they can't be transferred from the GPU model - # to the CPU model - raise ValueError("Inferring on CPU is not supported yet when the " - "model has been trained on GPU") + clusterer.import_cpu_model() + clusterer.build_cpu_model() + clusterer.gpu_to_cpu() + # These attributes have to be reassigned to the CPU model + # as the raw arrays because the reference HDBSCAN implementation + # reconstructs the objects from the raw arrays + clusterer._cpu_model.condensed_tree_ = \ + clusterer.condensed_tree_._raw_tree + clusterer._cpu_model.single_linkage_tree_ = \ + clusterer.single_linkage_tree_._linkage + clusterer._cpu_model.minimum_spanning_tree_ = \ + clusterer.minimum_spanning_tree_._mst # this took a long debugging session to figure out, but # this method on cpu does not work without this copy for some reason clusterer._cpu_model.prediction_data_.raw_data = \ clusterer._cpu_model.prediction_data_.raw_data.copy() return cpu_all_points_membership_vectors(clusterer._cpu_model) - + # gpu infer, cpu/gpu train elif device_type == DeviceType.device: # trained on cpu if hasattr(clusterer, "_cpu_model"): diff --git a/python/cuml/cuml/tests/test_device_selection.py b/python/cuml/cuml/tests/test_device_selection.py index 449c032161..31c0f9aed6 100644 --- a/python/cuml/cuml/tests/test_device_selection.py +++ b/python/cuml/cuml/tests/test_device_selection.py @@ -932,9 +932,6 @@ def test_nn_methods(train_device, infer_device): @pytest.mark.parametrize("infer_device", ["cpu", "gpu"]) def test_hdbscan_methods(train_device, infer_device): - if train_device == "gpu" and infer_device == "cpu": - pytest.skip("Can't transfer attributes to cpu for now") - ref_model = refHDBSCAN( prediction_data=True, approx_min_span_tree=False, @@ -951,11 +948,13 @@ def test_hdbscan_methods(train_device, infer_device): ref_membership = cpu_all_points_membership_vectors(ref_model) ref_labels, ref_probs = cpu_approximate_predict(ref_model, X_test_blob) + gen_min_span_tree = train_device == "gpu" and infer_device == "cpu" model = HDBSCAN( prediction_data=True, approx_min_span_tree=False, max_cluster_size=0, min_cluster_size=30, + gen_min_span_tree=gen_min_span_tree, ) with using_device_type(train_device): trained_labels = model.fit_predict(X_train_blob) diff --git a/python/cuml/pyproject.toml b/python/cuml/pyproject.toml index 1b4bdeca47..f31f5c61b1 100644 --- a/python/cuml/pyproject.toml +++ b/python/cuml/pyproject.toml @@ -113,7 +113,7 @@ classifiers = [ test = [ "cython>=3.0.0", "dask-ml", - "hdbscan>=0.8.38,<0.8.39", + "hdbscan>=0.8.39,<0.8.40", "hypothesis>=6.0,<7", "nltk", "numpydoc", @@ -125,7 +125,6 @@ test = [ "pytest==7.*", "scikit-learn==1.5", "seaborn", - "setuptools", "statsmodels", "umap-learn==0.5.6", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.