From 743f02da224b3192f8ab8ddb786b4b3cd6edf24f Mon Sep 17 00:00:00 2001 From: jinsolp Date: Mon, 20 May 2024 22:41:42 +0000 Subject: [PATCH 1/7] fix binarization for binary classes --- python/cuml/preprocessing/label.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/python/cuml/preprocessing/label.py b/python/cuml/preprocessing/label.py index 78daee603e..3e113c74a3 100644 --- a/python/cuml/preprocessing/label.py +++ b/python/cuml/preprocessing/label.py @@ -64,14 +64,19 @@ def label_binarize( cp.cuda.Stream.null.synchronize() + is_binary = True if classes.shape[0] == 2 else False + if sparse_output: sp = sp.tocsr() + if is_binary: + sp = sp.getcol(1) # getcol does not support -1 indexing return sp else: arr = sp.toarray().astype(y.dtype) arr[arr == 0] = neg_label - + if is_binary: + arr = arr[:, -1].reshape((-1, 1)) return arr From 7c5bf94194ff2678d15217010d2089742c4e2797 Mon Sep 17 00:00:00 2001 From: jinsolp Date: Mon, 20 May 2024 23:17:06 +0000 Subject: [PATCH 2/7] add tests for label_binzarize --- python/cuml/tests/test_preprocessing.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/python/cuml/tests/test_preprocessing.py b/python/cuml/tests/test_preprocessing.py index 332ef4be93..99427a0c48 100644 --- a/python/cuml/tests/test_preprocessing.py +++ b/python/cuml/tests/test_preprocessing.py @@ -43,6 +43,7 @@ quantile_transform as cu_quantile_transform, robust_scale as cu_robust_scale, scale as cu_scale, + label_binarize as cu_label_binarize, ) from sklearn.preprocessing import ( Binarizer as skBinarizer, @@ -68,6 +69,7 @@ quantile_transform as sk_quantile_transform, robust_scale as sk_robust_scale, scale as sk_scale, + label_binarize as sk_label_binarize, ) from sklearn.impute import ( MissingIndicator as skMissingIndicator, @@ -1135,6 +1137,23 @@ def test_kernel_centerer(): assert_allclose(sk_t_X, t_X) +def test_label_binarize(): + cu_bin = cu_label_binarize(cp.array([1, 0, 1, 1]), classes=cp.array([0, 1])) + sk_bin = sk_label_binarize([1, 0, 1, 1], classes=[0, 1]) + assert_allclose(cu_bin, sk_bin) + + cu_bin_sparse = cu_label_binarize(cp.array([1, 0, 1, 1]), classes=cp.array([0, 1]), sparse_output=True) + sk_bin_sparse = sk_label_binarize([1, 0, 1, 1], classes=[0, 1], sparse_output=True) + assert_allclose(cu_bin_sparse, sk_bin_sparse) + + cu_multi = cu_label_binarize(cp.array([1, 6, 3]), classes=cp.array([1, 3, 4, 6])) + sk_multi = sk_label_binarize([1, 6, 3], classes=[1, 3, 4, 6]) + assert_allclose(cu_multi, sk_multi) + + cu_multi_sparse = cu_label_binarize(cp.array([1, 6, 3]), classes=cp.array([1, 3, 4, 6]), sparse_output=True) + sk_multi_sparse = sk_label_binarize([1, 6, 3], classes=[1, 3, 4, 6], sparse_output=True) + assert_allclose(cu_multi_sparse, sk_multi_sparse) + def test__repr__(): assert cuBinarizer().__repr__() == "Binarizer()" assert cuFunctionTransformer().__repr__() == "FunctionTransformer()" From 713f6f60afdfa8156483467a7382bf53d20d8eac Mon Sep 17 00:00:00 2001 From: jinsolp Date: Mon, 20 May 2024 23:54:29 +0000 Subject: [PATCH 3/7] fix copyright date --- python/cuml/preprocessing/label.py | 2 +- python/cuml/tests/test_preprocessing.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/python/cuml/preprocessing/label.py b/python/cuml/preprocessing/label.py index 3e113c74a3..2814225c39 100644 --- a/python/cuml/preprocessing/label.py +++ b/python/cuml/preprocessing/label.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2023, NVIDIA CORPORATION. +# Copyright (c) 2020-2023=4, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/python/cuml/tests/test_preprocessing.py b/python/cuml/tests/test_preprocessing.py index 99427a0c48..e6985502be 100644 --- a/python/cuml/tests/test_preprocessing.py +++ b/python/cuml/tests/test_preprocessing.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2023, NVIDIA CORPORATION. +# Copyright (c) 2020-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. From fe623f9a4e13f42ea28fac5243cc7fa6e3a055cf Mon Sep 17 00:00:00 2001 From: jinsolp Date: Tue, 21 May 2024 16:44:03 +0000 Subject: [PATCH 4/7] update based on review --- python/cuml/preprocessing/label.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cuml/preprocessing/label.py b/python/cuml/preprocessing/label.py index 2814225c39..98123a3f15 100644 --- a/python/cuml/preprocessing/label.py +++ b/python/cuml/preprocessing/label.py @@ -64,7 +64,7 @@ def label_binarize( cp.cuda.Stream.null.synchronize() - is_binary = True if classes.shape[0] == 2 else False + is_binary = classes.shape[0] == 2 if sparse_output: sp = sp.tocsr() From 55b4e83b0b7e8f30d243d1d96739165dc2d6564e Mon Sep 17 00:00:00 2001 From: jinsolp Date: Tue, 21 May 2024 16:45:49 +0000 Subject: [PATCH 5/7] fix typo in copyright date --- python/cuml/preprocessing/label.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cuml/preprocessing/label.py b/python/cuml/preprocessing/label.py index 98123a3f15..0506879ea8 100644 --- a/python/cuml/preprocessing/label.py +++ b/python/cuml/preprocessing/label.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2023=4, NVIDIA CORPORATION. +# Copyright (c) 2020-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. From 7bda331a013cf2eb9d24e47e48734f89ce66965c Mon Sep 17 00:00:00 2001 From: jinsolp Date: Tue, 21 May 2024 18:16:46 +0000 Subject: [PATCH 6/7] styling --- python/cuml/tests/test_preprocessing.py | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/python/cuml/tests/test_preprocessing.py b/python/cuml/tests/test_preprocessing.py index e6985502be..da1dc2ae22 100644 --- a/python/cuml/tests/test_preprocessing.py +++ b/python/cuml/tests/test_preprocessing.py @@ -1142,16 +1142,26 @@ def test_label_binarize(): sk_bin = sk_label_binarize([1, 0, 1, 1], classes=[0, 1]) assert_allclose(cu_bin, sk_bin) - cu_bin_sparse = cu_label_binarize(cp.array([1, 0, 1, 1]), classes=cp.array([0, 1]), sparse_output=True) - sk_bin_sparse = sk_label_binarize([1, 0, 1, 1], classes=[0, 1], sparse_output=True) + cu_bin_sparse = cu_label_binarize( + cp.array([1, 0, 1, 1]), classes=cp.array([0, 1]), sparse_output=True + ) + sk_bin_sparse = sk_label_binarize( + [1, 0, 1, 1], classes=[0, 1], sparse_output=True + ) assert_allclose(cu_bin_sparse, sk_bin_sparse) - cu_multi = cu_label_binarize(cp.array([1, 6, 3]), classes=cp.array([1, 3, 4, 6])) + cu_multi = cu_label_binarize( + cp.array([1, 6, 3]), classes=cp.array([1, 3, 4, 6]) + ) sk_multi = sk_label_binarize([1, 6, 3], classes=[1, 3, 4, 6]) assert_allclose(cu_multi, sk_multi) - cu_multi_sparse = cu_label_binarize(cp.array([1, 6, 3]), classes=cp.array([1, 3, 4, 6]), sparse_output=True) - sk_multi_sparse = sk_label_binarize([1, 6, 3], classes=[1, 3, 4, 6], sparse_output=True) + cu_multi_sparse = cu_label_binarize( + cp.array([1, 6, 3]), classes=cp.array([1, 3, 4, 6]), sparse_output=True + ) + sk_multi_sparse = sk_label_binarize( + [1, 6, 3], classes=[1, 3, 4, 6], sparse_output=True + ) assert_allclose(cu_multi_sparse, sk_multi_sparse) def test__repr__(): From e1c346b29478c4c1425a4ed07563ca5a611533be Mon Sep 17 00:00:00 2001 From: jinsolp Date: Tue, 21 May 2024 18:31:59 +0000 Subject: [PATCH 7/7] fix styling --- python/cuml/preprocessing/label.py | 4 ++-- python/cuml/tests/test_preprocessing.py | 9 ++++++--- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/python/cuml/preprocessing/label.py b/python/cuml/preprocessing/label.py index 0506879ea8..c34a9d9249 100644 --- a/python/cuml/preprocessing/label.py +++ b/python/cuml/preprocessing/label.py @@ -65,11 +65,11 @@ def label_binarize( cp.cuda.Stream.null.synchronize() is_binary = classes.shape[0] == 2 - + if sparse_output: sp = sp.tocsr() if is_binary: - sp = sp.getcol(1) # getcol does not support -1 indexing + sp = sp.getcol(1) # getcol does not support -1 indexing return sp else: diff --git a/python/cuml/tests/test_preprocessing.py b/python/cuml/tests/test_preprocessing.py index da1dc2ae22..c341fa2a63 100644 --- a/python/cuml/tests/test_preprocessing.py +++ b/python/cuml/tests/test_preprocessing.py @@ -1138,7 +1138,9 @@ def test_kernel_centerer(): def test_label_binarize(): - cu_bin = cu_label_binarize(cp.array([1, 0, 1, 1]), classes=cp.array([0, 1])) + cu_bin = cu_label_binarize( + cp.array([1, 0, 1, 1]), classes=cp.array([0, 1]) + ) sk_bin = sk_label_binarize([1, 0, 1, 1], classes=[0, 1]) assert_allclose(cu_bin, sk_bin) @@ -1149,13 +1151,13 @@ def test_label_binarize(): [1, 0, 1, 1], classes=[0, 1], sparse_output=True ) assert_allclose(cu_bin_sparse, sk_bin_sparse) - + cu_multi = cu_label_binarize( cp.array([1, 6, 3]), classes=cp.array([1, 3, 4, 6]) ) sk_multi = sk_label_binarize([1, 6, 3], classes=[1, 3, 4, 6]) assert_allclose(cu_multi, sk_multi) - + cu_multi_sparse = cu_label_binarize( cp.array([1, 6, 3]), classes=cp.array([1, 3, 4, 6]), sparse_output=True ) @@ -1164,6 +1166,7 @@ def test_label_binarize(): ) assert_allclose(cu_multi_sparse, sk_multi_sparse) + def test__repr__(): assert cuBinarizer().__repr__() == "Binarizer()" assert cuFunctionTransformer().__repr__() == "FunctionTransformer()"