Skip to content

Commit

Permalink
Merge branch 'branch-25.02' into feat/raft_logger
Browse files Browse the repository at this point in the history
  • Loading branch information
vyasr authored Dec 31, 2024
2 parents 238c961 + 7731ba2 commit 354501f
Show file tree
Hide file tree
Showing 7 changed files with 21 additions and 7 deletions.
2 changes: 1 addition & 1 deletion conda/environments/all_cuda-118_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -71,9 +71,9 @@ dependencies:
- scipy>=1.8.0
- seaborn
- spdlog>=1.14.1,<1.15
- sphinx
- sphinx-copybutton
- sphinx-markdown-tables
- sphinx<6
- statsmodels
- sysroot_linux-64==2.17
- treelite==4.3.0
Expand Down
2 changes: 1 addition & 1 deletion conda/environments/all_cuda-125_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -67,9 +67,9 @@ dependencies:
- scipy>=1.8.0
- seaborn
- spdlog>=1.14.1,<1.15
- sphinx
- sphinx-copybutton
- sphinx-markdown-tables
- sphinx<6
- statsmodels
- sysroot_linux-64==2.17
- treelite==4.3.0
Expand Down
2 changes: 1 addition & 1 deletion dependencies.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -464,7 +464,7 @@ dependencies:
- pydata-sphinx-theme!=0.14.2
- recommonmark
- &scikit_learn scikit-learn==1.5.*
- sphinx<6
- sphinx
- sphinx-copybutton
- sphinx-markdown-tables
- output_types: conda
Expand Down
6 changes: 6 additions & 0 deletions python/cuml/cuml/cluster/hdbscan/hdbscan.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ from cuml.common import input_to_cuml_array
from cuml.common.array_descriptor import CumlArrayDescriptor
from cuml.internals.api_decorators import device_interop_preparation
from cuml.internals.api_decorators import enable_device_interop
from cuml.internals.global_settings import GlobalSettings
from cuml.internals.mixins import ClusterMixin
from cuml.internals.mixins import CMajorInputTagMixin
from cuml.internals.import_utils import has_hdbscan
Expand Down Expand Up @@ -782,6 +783,9 @@ class HDBSCAN(UniversalBase, ClusterMixin, CMajorInputTagMixin):
self.n_rows = n_rows
self.n_cols = n_cols

if GlobalSettings().accelerator_active:
self._raw_data = self.X_m.to_output("numpy")

cdef uintptr_t _input_ptr = X_m.ptr

IF GPUBUILD == 1:
Expand Down Expand Up @@ -1133,6 +1137,8 @@ class HDBSCAN(UniversalBase, ClusterMixin, CMajorInputTagMixin):
self.condensed_tree_._raw_tree
self._cpu_model.single_linkage_tree_ = \
self.single_linkage_tree_._linkage
if hasattr(self, "_raw_data"):
self._cpu_model._raw_data = self._raw_data
if self.gen_min_span_tree:
self._cpu_model.minimum_spanning_tree_ = \
self.minimum_spanning_tree_._mst
Expand Down
6 changes: 5 additions & 1 deletion python/cuml/cuml/cluster/kmeans.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -289,6 +289,7 @@ class KMeans(UniversalBase,
Compute k-means clustering with X.

"""
self._n_features_out = self.n_clusters
if self.init == 'preset':
check_cols = self.n_features_in_
check_dtype = self.dtype
Expand All @@ -306,6 +307,8 @@ class KMeans(UniversalBase,
else None),
check_dtype=check_dtype)

self.feature_names_in_ = _X_m.index

IF GPUBUILD == 1:

cdef uintptr_t input_ptr = _X_m.ptr
Expand Down Expand Up @@ -708,4 +711,5 @@ class KMeans(UniversalBase,

def get_attr_names(self):
return ['cluster_centers_', 'labels_', 'inertia_',
'n_iter_', 'n_features_in_', '_n_threads']
'n_iter_', 'n_features_in_', '_n_threads',
"feature_names_in_", "_n_features_out"]
5 changes: 4 additions & 1 deletion python/cuml/cuml/manifold/umap.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -577,11 +577,13 @@ class UMAP(UniversalBase,
convert_format=False)
self.n_rows, self.n_dims = self._raw_data.shape
self.sparse_fit = True
self._sparse_data = True
if self.build_algo == "nn_descent":
raise ValueError("NN Descent does not support sparse inputs")

# Handle dense inputs
else:
self._sparse_data = False
if data_on_host:
convert_to_mem_type = MemoryType.host
else:
Expand Down Expand Up @@ -908,6 +910,7 @@ class UMAP(UniversalBase,
self.metric_kwds, False, self.random_state)

super().gpu_to_cpu()
self._cpu_model._validate_parameters()

@classmethod
def _get_param_names(cls):
Expand Down Expand Up @@ -943,4 +946,4 @@ class UMAP(UniversalBase,
return ['_raw_data', 'embedding_', '_input_hash', '_small_data',
'_knn_dists', '_knn_indices', '_knn_search_index',
'_disconnection_distance', '_n_neighbors', '_a', '_b',
'_initial_alpha']
'_initial_alpha', '_sparse_data']
5 changes: 3 additions & 2 deletions python/cuml/cuml/tests/test_kmeans.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ def test_weighted_kmeans(nrows, ncols, nclusters, max_weight, random_state):
sk_kmeans.fit(cp.asnumpy(X), sample_weight=wt)
sk_score = sk_kmeans.score(cp.asnumpy(X))

assert abs(cu_score - sk_score) <= cluster_std * 1.5
assert cu_score - sk_score <= cluster_std * 1.5


@pytest.mark.parametrize("nrows", [1000, 10000])
Expand Down Expand Up @@ -418,5 +418,6 @@ def test_fit_transform_weighted_kmeans(
sk_transf = sk_kmeans.fit_transform(cp.asnumpy(X), sample_weight=wt)
sk_score = sk_kmeans.score(cp.asnumpy(X))

assert abs(cu_score - sk_score) <= cluster_std * 1.5
# we fail if cuML's score is significantly worse than sklearn's
assert cu_score - sk_score <= cluster_std * 1.5
assert sk_transf.shape == cuml_transf.shape

0 comments on commit 354501f

Please sign in to comment.