diff --git a/python/cuml/cuml/manifold/umap.pyx b/python/cuml/cuml/manifold/umap.pyx index 86933ab31b..3aad3e76d3 100644 --- a/python/cuml/cuml/manifold/umap.pyx +++ b/python/cuml/cuml/manifold/umap.pyx @@ -16,9 +16,11 @@ # distutils: language = c++ -from cuml.internals.safe_imports import cpu_only_import +from cuml.internals.safe_imports import cpu_only_import, safe_import_from np = cpu_only_import('numpy') pd = cpu_only_import('pandas') +nearest_neighbors = safe_import_from('umap.umap_', 'nearest_neighbors') +DISCONNECTION_DISTANCES = safe_import_from('umap.umap_', 'DISCONNECTION_DISTANCES') import joblib import warnings @@ -627,6 +629,8 @@ class UMAP(UniversalBase, _knn_dists_ptr = knn_dists.ptr _knn_indices_ptr = knn_indices.ptr + self._knn_dists = knn_dists + self._knn_indices = knn_indices self.n_neighbors = min(self.n_rows, self.n_neighbors) @@ -853,6 +857,60 @@ class UMAP(UniversalBase, del X_m return embedding + @property + def _n_neighbors(self): + return self.n_neighbors + + @_n_neighbors.setter + def _n_neighbors(self, value): + self.n_neighbors = value + + @property + def _a(self): + return self.a + + @_a.setter + def _a(self, value): + self.a = value + + @property + def _b(self): + return self.b + + @_b.setter + def _b(self, value): + self.b = value + + @property + def _initial_alpha(self): + return self.learning_rate + + @_initial_alpha.setter + def _initial_alpha(self, value): + self.learning_rate = value + + @property + def _disconnection_distance(self): + self.disconnection_distance = DISCONNECTION_DISTANCES.get(self.metric, np.inf) + return self.disconnection_distance + + @_disconnection_distance.setter + def _disconnection_distance(self, value): + self.disconnection_distance = value + + def gpu_to_cpu(self): + if hasattr(self, 'knn_dists') and hasattr(self, 'knn_indices'): + self._knn_dists = self.knn_dists + self._knn_indices = self.knn_indices + self._knn_search_index = None + elif hasattr(self, '_raw_data'): + self._raw_data = self._raw_data.to_output('numpy') + self._knn_dists, self._knn_indices, self._knn_search_index = \ + nearest_neighbors(self._raw_data, self.n_neighbors, self.metric, + self.metric_kwds, False, self.random_state) + + super().gpu_to_cpu() + def get_param_names(self): return super().get_param_names() + [ "n_neighbors", @@ -883,4 +941,7 @@ class UMAP(UniversalBase, ] def get_attr_names(self): - return ['_raw_data', 'embedding_', '_input_hash', '_small_data'] + return ['_raw_data', 'embedding_', '_input_hash', '_small_data', + '_knn_dists', '_knn_indices', '_knn_search_index', + '_disconnection_distance', '_n_neighbors', '_a', '_b', + '_initial_alpha'] diff --git a/python/cuml/cuml/tests/test_device_selection.py b/python/cuml/cuml/tests/test_device_selection.py index 1da3b0738e..6c7d1852c1 100644 --- a/python/cuml/cuml/tests/test_device_selection.py +++ b/python/cuml/cuml/tests/test_device_selection.py @@ -596,8 +596,6 @@ def test_train_cpu_infer_cpu(test_data): def test_train_gpu_infer_cpu(test_data): cuEstimator = test_data["cuEstimator"] - if cuEstimator is UMAP: - pytest.skip("UMAP GPU training CPU inference not yet implemented") model = cuEstimator(**test_data["kwargs"]) with using_device_type("gpu"): @@ -655,8 +653,6 @@ def test_pickle_interop(tmp_path, test_data): pickle_filepath = tmp_path / "model.pickle" cuEstimator = test_data["cuEstimator"] - if cuEstimator is UMAP: - pytest.skip("UMAP GPU training CPU inference not yet implemented") model = cuEstimator(**test_data["kwargs"]) with using_device_type("gpu"): if "y_train" in test_data: