rapidsai · JohnZed · Nov 2, 2020 · Oct 23, 2020 · Oct 23, 2020 · Oct 26, 2020
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -16,6 +16,7 @@
 - PR #3004: Remove Single Process Multi GPU (SPMG) code
 - PR #3044: Move leftover `linalg` and `stats` to RAFT namespaces
 - PR #3074: Reducing dask coordinate descent test runtime
+- PR #3052: Speeding up MNMG KNN Cl&Re testing
 
 ## Bug Fixes
 - PR #3072: Fusing metrics and score directories in src_prims

@@ -471,7 +471,8 @@ void class_probs(std::vector<float *> &out, const int64_t *knn_indices,
      * Build array of class probability arrays from
      * knn_indices and labels
      */
-    device_buffer<int> y_normalized(allocator, stream, n_index_rows);
+    device_buffer<int> y_normalized(allocator, stream,
+                                    n_index_rows + n_unique_labels);
 
     /*
      * Appending the array of unique labels to the original labels array

@@ -69,11 +69,11 @@ def dataset(request):
         if len(new_x) >= request.param['n_samples']:
             break
     X = X[new_x]
-    noise = np.random.normal(0, 1.2, X.shape)
+    noise = np.random.normal(0, 5., X.shape)
     X += noise
     y = np.array(new_y)
 
-    return train_test_split(X, y, test_size=0.1)
+    return train_test_split(X, y, test_size=0.3)
 
 
 def exact_match(output1, output2):
@@ -108,11 +108,11 @@ def check_probabilities(l_probas, d_probas):
 
 
 @pytest.mark.parametrize("datatype", ['dask_array', 'dask_cudf'])
-@pytest.mark.parametrize("n_neighbors", [1, 3, 8])
-@pytest.mark.parametrize("n_parts", [2, 4, 12])
-@pytest.mark.parametrize("batch_size", [128, 1024])
-def test_predict_and_score(dataset, datatype, n_neighbors,
-                           n_parts, batch_size, client):
+@pytest.mark.parametrize("parameters", [(1, 3, 256),
+                                        (8, 8, 256),
+                                        (9, 3, 128)])
+def test_predict_and_score(dataset, datatype, parameters, client):
+    n_neighbors, n_parts, batch_size = parameters
     X_train, X_test, y_train, y_test = dataset
     np_y_test = y_test
 
@@ -165,11 +165,11 @@ def test_predict_and_score(dataset, datatype, n_neighbors,
 
 
 @pytest.mark.parametrize("datatype", ['dask_array', 'dask_cudf'])
-@pytest.mark.parametrize("n_neighbors", [1, 3, 8])
-@pytest.mark.parametrize("n_parts", [2, 4, 12])
-@pytest.mark.parametrize("batch_size", [128, 1024])
-def test_predict_proba(dataset, datatype, n_neighbors,
-                       n_parts, batch_size, client):
+@pytest.mark.parametrize("parameters", [(1, 3, 256),
+                                        (8, 8, 256),
+                                        (9, 3, 128)])
+def test_predict_proba(dataset, datatype, parameters, client):
+    n_neighbors, n_parts, batch_size = parameters
     X_train, X_test, y_train, y_test = dataset
 
     l_model = lKNNClf(n_neighbors=n_neighbors)

@@ -70,11 +70,11 @@ def dataset(request):
         if len(new_x) >= request.param['n_samples']:
             break
     X = X[new_x]
-    noise = np.random.normal(0, 1.2, X.shape)
+    noise = np.random.normal(0, 5., X.shape)
     X += noise
     y = np.array(new_y, dtype=np.float32)
 
-    return train_test_split(X, y, test_size=0.1)
+    return train_test_split(X, y, test_size=0.3)
 
 
 def exact_match(output1, output2):
@@ -102,11 +102,11 @@ def exact_match(output1, output2):
 
 
 @pytest.mark.parametrize("datatype", ['dask_array', 'dask_cudf'])
-@pytest.mark.parametrize("n_neighbors", [1, 3, 8])
-@pytest.mark.parametrize("n_parts", [2, 4, 12])
-@pytest.mark.parametrize("batch_size", [128, 1024])
-def test_predict_and_score(dataset, datatype, n_neighbors,
-                           n_parts, batch_size, client):
+@pytest.mark.parametrize("parameters", [(1, 3, 256),
+                                        (8, 8, 256),
+                                        (9, 3, 128)])
+def test_predict_and_score(dataset, datatype, parameters, client):
+    n_neighbors, n_parts, batch_size = parameters
     X_train, X_test, y_train, y_test = dataset
     np_y_test = y_test