rapidsai · rapids-bot · May 22, 2024 · May 20, 2024 · May 20, 2024 · May 20, 2024
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2023, NVIDIA CORPORATION.
+# Copyright (c) 2020-2024, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -64,14 +64,19 @@ def label_binarize(
 
     cp.cuda.Stream.null.synchronize()
 
+    is_binary = classes.shape[0] == 2
+
     if sparse_output:
         sp = sp.tocsr()
+        if is_binary:
+            sp = sp.getcol(1)  # getcol does not support -1 indexing
         return sp
     else:
 
         arr = sp.toarray().astype(y.dtype)
         arr[arr == 0] = neg_label
-
+        if is_binary:
+            arr = arr[:, -1].reshape((-1, 1))
         return arr
 
 

@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2023, NVIDIA CORPORATION.
+# Copyright (c) 2020-2024, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -43,6 +43,7 @@
     quantile_transform as cu_quantile_transform,
     robust_scale as cu_robust_scale,
     scale as cu_scale,
+    label_binarize as cu_label_binarize,
 )
 from sklearn.preprocessing import (
     Binarizer as skBinarizer,
@@ -68,6 +69,7 @@
     quantile_transform as sk_quantile_transform,
     robust_scale as sk_robust_scale,
     scale as sk_scale,
+    label_binarize as sk_label_binarize,
 )
 from sklearn.impute import (
     MissingIndicator as skMissingIndicator,
@@ -1135,6 +1137,36 @@ def test_kernel_centerer():
     assert_allclose(sk_t_X, t_X)
 
 
+def test_label_binarize():
+    cu_bin = cu_label_binarize(
+        cp.array([1, 0, 1, 1]), classes=cp.array([0, 1])
+    )
+    sk_bin = sk_label_binarize([1, 0, 1, 1], classes=[0, 1])
+    assert_allclose(cu_bin, sk_bin)
+
+    cu_bin_sparse = cu_label_binarize(
+        cp.array([1, 0, 1, 1]), classes=cp.array([0, 1]), sparse_output=True
+    )
+    sk_bin_sparse = sk_label_binarize(
+        [1, 0, 1, 1], classes=[0, 1], sparse_output=True
+    )
+    assert_allclose(cu_bin_sparse, sk_bin_sparse)
+
+    cu_multi = cu_label_binarize(
+        cp.array([1, 6, 3]), classes=cp.array([1, 3, 4, 6])
+    )
+    sk_multi = sk_label_binarize([1, 6, 3], classes=[1, 3, 4, 6])
+    assert_allclose(cu_multi, sk_multi)
+
+    cu_multi_sparse = cu_label_binarize(
+        cp.array([1, 6, 3]), classes=cp.array([1, 3, 4, 6]), sparse_output=True
+    )
+    sk_multi_sparse = sk_label_binarize(
+        [1, 6, 3], classes=[1, 3, 4, 6], sparse_output=True
+    )
+    assert_allclose(cu_multi_sparse, sk_multi_sparse)
+
+
 def test__repr__():
     assert cuBinarizer().__repr__() == "Binarizer()"
     assert cuFunctionTransformer().__repr__() == "FunctionTransformer()"