rapidsai · rapids-bot · Jul 29, 2024 · Jul 28, 2024 · Jul 28, 2024 · Jul 28, 2024
@@ -274,7 +274,7 @@ def _pandas_indexing(X, key, key_dtype, axis):
     if hasattr(key, 'shape'):
         # Work-around for indexing with read-only key in pandas
         # FIXME: solved in pandas 0.25
-        key = np.asarray(key)
+        key = key.to_numpy()
         key = key if key.flags.writeable else key.copy()
     elif isinstance(key, tuple):
         key = list(key)

@@ -265,7 +265,9 @@ def inverse_transform(self, y: cudf.Series) -> cudf.Series:
         ord_label = y.unique()
         category_num = len(self.classes_)
         if self.handle_unknown == "error":
-            for ordi in ord_label.values_host:
+            if not isinstance(ord_label, (cp.ndarray, np.ndarray)):
+                ord_label = ord_label.values_host
+            for ordi in ord_label:
                 if ordi < 0 or ordi >= category_num:
                     raise ValueError(
                         "y contains previously unseen label {}".format(ordi)

@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2019-2023, NVIDIA CORPORATION.
+# Copyright (c) 2019-2024, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -315,8 +315,11 @@ def _rename_col(df, col):
             return df.reset_index()
 
         res = []
-        for f in train[self.fold_col].unique().values_host:
-            mask = train[self.fold_col] == f
+        unq_vals = train[self.fold_col].unique()
+        if not isinstance(unq_vals, (cp.ndarray, np.ndarray)):
+            unq_vals = unq_vals.values_host
+        for f in unq_vals:
+            mask = train[self.fold_col].values == f
             dg = train.loc[~mask].groupby(x_cols).agg({self.y_col: self.stat})
             dg = _rename_col(dg, self.out_col)
             res.append(train.loc[mask].merge(dg, on=x_cols, how="left"))

@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2023, NVIDIA CORPORATION.
+# Copyright (c) 2020-2024, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -38,6 +38,7 @@
 pd = cpu_only_import("pandas")
 
 cuda = gpu_only_import_from("numba", "cuda")
+cudf_pandas_active = gpu_only_import_from("cudf.pandas", "LOADED")
 
 
 cudf = gpu_only_import("cudf")
@@ -599,15 +600,22 @@ def generate_inputs_from_categories(
         inp_ary = cp.array(ary)
         return inp_ary, ary
     else:
-        df = cudf.DataFrame.from_pandas(pandas_df)
+        if cudf_pandas_active:
+            df = pandas_df
+        else:
+            df = cudf.DataFrame.from_pandas(pandas_df)
         return df, ary
 
 
 def assert_inverse_equal(ours, ref):
     if isinstance(ours, cp.ndarray):
         cp.testing.assert_array_equal(ours, ref)
     else:
-        pd.testing.assert_frame_equal(ours.to_pandas(), ref.to_pandas())
+        if hasattr(ours, "to_pandas"):
+            ours = ours.to_pandas()
+        if hasattr(ref, "to_pandas"):
+            ref = ref.to_pandas()
+        pd.testing.assert_frame_equal(ours, ref)
 
 
 def from_df_to_numpy(df):

@@ -1,4 +1,4 @@
-# Copyright (c) 2021-2023, NVIDIA CORPORATION.
+# Copyright (c) 2021-2024, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -23,6 +23,7 @@
 np = cpu_only_import("numpy")
 pd = cpu_only_import("pandas")
 cuda = gpu_only_import_from("numba", "cuda")
+cudf_pandas_active = gpu_only_import_from("cudf.pandas", "LOADED")
 
 
 @pytest.mark.parametrize(
@@ -64,7 +65,7 @@ def test_kmeans_input(input_type):
     elif input_type == "cudf-series":
         cp.testing.assert_array_equal(summary[0].values.tolist(), [23.0, 52.0])
         assert isinstance(summary[0], cudf.Series)
-    elif input_type == "pandas-series":
+    elif input_type == "pandas-series" and not cudf_pandas_active:
         cp.testing.assert_array_equal(
             summary[0].to_numpy().flatten(), [23.0, 52.0]
         )

@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2020-2023, NVIDIA CORPORATION.
+# Copyright (c) 2020-2024, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -19,13 +19,15 @@
 import cuml
 import pytest
 
-from cuml.internals.safe_imports import gpu_only_import
+from cuml.internals.safe_imports import gpu_only_import, gpu_only_import_from
 
 cudf = gpu_only_import("cudf")
 cp = gpu_only_import("cupy")
 np = cpu_only_import("numpy")
 pd = cpu_only_import("pandas")
 
+cudf_pandas_active = gpu_only_import_from("cudf.pandas", "LOADED")
+
 
 ###############################################################################
 #                                    Parameters                               #
@@ -71,7 +73,7 @@ def test_default_global_output_type(input_type):
 
     if input_type == "numba":
         assert is_cuda_array(res)
-    else:
+    elif not (input_type == "pandas" and cudf_pandas_active):
         assert isinstance(res, test_output_types[input_type])
 
 

@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2023, NVIDIA CORPORATION.
+# Copyright (c) 2020-2024, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -20,6 +20,7 @@
 from cuml.internals.safe_imports import gpu_only_import_from
 from cuml.preprocessing import OrdinalEncoder
 
+cudf_pandas_active = gpu_only_import_from("cudf.pandas", "LOADED")
 DataFrame = gpu_only_import_from("cudf", "DataFrame")
 
 
@@ -97,7 +98,8 @@ def test_output_type(test_sample) -> None:
     enc = OrdinalEncoder(output_type="cudf").fit(X)
     assert isinstance(enc.transform(X), DataFrame)
     enc = OrdinalEncoder(output_type="pandas").fit(X)
-    assert isinstance(enc.transform(X), pd.DataFrame)
+    if not cudf_pandas_active:
+        assert isinstance(enc.transform(X), pd.DataFrame)
     enc = OrdinalEncoder(output_type="numpy").fit(X)
     assert isinstance(enc.transform(X), np.ndarray)
     # output_type == "input"

@@ -54,6 +54,7 @@
 np = cpu_only_import("numpy")
 
 cuda = gpu_only_import_from("numba", "cuda")
+cudf_pandas_active = gpu_only_import_from("cudf.pandas", "LOADED")
 
 
 pytestmark = pytest.mark.filterwarnings(
@@ -276,6 +277,11 @@ def test_tweedie_convergence(max_depth, split_criterion):
 )
 @pytest.mark.parametrize("datatype", [np.float32, np.float64])
 @pytest.mark.parametrize("max_features", [1.0, "log2", "sqrt"])
+@pytest.mark.skipif(
+    cudf_pandas_active,
+    reason="cudf.pandas causes sklearn RF estimators crashes sometimes. "
+    "Issue: https://github.com/rapidsai/cuml/issues/5991",
+)
 def test_rf_classification(small_clf, datatype, max_samples, max_features):
     use_handle = True
 
@@ -405,6 +411,11 @@ def test_rf_classification_unorder(
         (1.0, 32),
     ],
 )
+@pytest.mark.skipif(
+    cudf_pandas_active,
+    reason="cudf.pandas causes sklearn RF estimators crashes sometimes. "
+    "Issue: https://github.com/rapidsai/cuml/issues/5991",
+)
 def test_rf_regression(
     special_reg, datatype, max_features, max_samples, n_bins
 ):
@@ -510,6 +521,11 @@ def test_rf_classification_seed(small_clf, datatype):
 )
 @pytest.mark.parametrize("convert_dtype", [True, False])
 @pytest.mark.filterwarnings("ignore:To use pickling(.*)::cuml[.*]")
+@pytest.mark.skipif(
+    cudf_pandas_active,
+    reason="cudf.pandas causes sklearn RF estimators crashes sometimes. "
+    "Issue: https://github.com/rapidsai/cuml/issues/5991",
+)
 def test_rf_classification_float64(small_clf, datatype, convert_dtype):
 
     X, y = small_clf
@@ -552,6 +568,11 @@ def test_rf_classification_float64(small_clf, datatype, convert_dtype):
     "datatype", [(np.float64, np.float32), (np.float32, np.float64)]
 )
 @pytest.mark.filterwarnings("ignore:To use pickling(.*)::cuml[.*]")
+@pytest.mark.skipif(
+    cudf_pandas_active,
+    reason="cudf.pandas causes sklearn RF estimators crashes sometimes. "
+    "Issue: https://github.com/rapidsai/cuml/issues/5991",
+)
 def test_rf_regression_float64(large_reg, datatype):
 
     X, y = large_reg
@@ -675,13 +696,23 @@ def rf_classification(
 
 @pytest.mark.parametrize("datatype", [(np.float32, np.float64)])
 @pytest.mark.parametrize("array_type", ["dataframe", "numpy"])
+@pytest.mark.skipif(
+    cudf_pandas_active,
+    reason="cudf.pandas causes sklearn RF estimators crashes sometimes. "
+    "Issue: https://github.com/rapidsai/cuml/issues/5991",
+)
 def test_rf_classification_multi_class(mclass_clf, datatype, array_type):
     rf_classification(datatype, array_type, 1.0, 1.0, mclass_clf)
 
 
 @pytest.mark.parametrize("datatype", [(np.float32, np.float64)])
 @pytest.mark.parametrize("max_samples", [unit_param(1.0), stress_param(0.95)])
 @pytest.mark.parametrize("max_features", [1.0, "log2", "sqrt"])
+@pytest.mark.skipif(
+    cudf_pandas_active,
+    reason="cudf.pandas causes sklearn RF estimators crashes sometimes. "
+    "Issue: https://github.com/rapidsai/cuml/issues/5991",
+)
 def test_rf_classification_proba(
     small_clf, datatype, max_samples, max_features
 ):
@@ -695,6 +726,11 @@ def test_rf_classification_proba(
 @pytest.mark.parametrize(
     "algo", ["auto", "naive", "tree_reorg", "batch_tree_reorg"]
 )
+@pytest.mark.skipif(
+    cudf_pandas_active,
+    reason="cudf.pandas causes sklearn RF estimators crashes sometimes. "
+    "Issue: https://github.com/rapidsai/cuml/issues/5991",
+)
 def test_rf_classification_sparse(
     small_clf, datatype, fil_sparse_format, algo
 ):
@@ -783,6 +819,11 @@ def test_rf_classification_sparse(
 @pytest.mark.parametrize(
     "algo", ["auto", "naive", "tree_reorg", "batch_tree_reorg"]
 )
+@pytest.mark.skipif(
+    cudf_pandas_active,
+    reason="cudf.pandas causes sklearn RF estimators crashes sometimes. "
+    "Issue: https://github.com/rapidsai/cuml/issues/5991",
+)
 def test_rf_regression_sparse(special_reg, datatype, fil_sparse_format, algo):
     use_handle = True
     num_treees = 50

@@ -1,4 +1,4 @@
-# Copyright (c) 2019-2023, NVIDIA CORPORATION.
+# Copyright (c) 2019-2024, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -44,6 +44,8 @@
 cudf = gpu_only_import("cudf")
 scipy_sparse = cpu_only_import("scipy.sparse")
 
+cudf_pandas_active = gpu_only_import_from("cudf.pandas", "LOADED")
+
 IS_ARM = platform.processor() == "aarch64"
 
 
@@ -666,6 +668,10 @@ def test_svm_predict_convert_dtype(train_dtype, test_dtype, classifier):
     reason="Test fails unexpectedly on ARM. "
     "github.com/rapidsai/cuml/issues/5100",
 )
+@pytest.mark.skipif(
+    cudf_pandas_active,
+    reason="cudf.pandas causes small numeric issues in this test only ",
+)
 def test_svm_no_support_vectors():
     n_rows = 10
     n_cols = 3