openml · PGijsbers · Oct 29, 2020 · Oct 28, 2020 · Oct 28, 2020 · Oct 29, 2020
diff --git a/doc/progress.rst b/doc/progress.rst
@@ -8,6 +8,7 @@ Changelog
 
 0.11.1
 ~~~~~~
+* MAINT #671: Improved the performance of ``check_datasets_active`` by only querying the given list of datasets in contrast to querying all datasets. Modified the corresponding unit test.
 
 0.11.0
 ~~~~~~

diff --git a/openml/datasets/functions.py b/openml/datasets/functions.py
@@ -333,14 +333,23 @@ def _load_features_from_file(features_file: str) -> Dict:
         return xml_dict["oml:data_features"]
 
 
-def check_datasets_active(dataset_ids: List[int]) -> Dict[int, bool]:
+def check_datasets_active(
+    dataset_ids: List[int],
+    raise_error_if_not_exist: bool = True,
+) -> Dict[int, bool]:
     """
     Check if the dataset ids provided are active.
 
+    Raises an error if a dataset_id in the given list
+    of dataset_ids does not exist on the server.
+
     Parameters
     ----------
     dataset_ids : List[int]
         A list of integers representing dataset ids.
+    raise_error_if_not_exist : bool (default=True)
+        Flag that if activated can raise an error, if one or more of the
+        given dataset ids do not exist on the server.
 
     Returns
     -------
@@ -353,7 +362,8 @@ def check_datasets_active(dataset_ids: List[int]) -> Dict[int, bool]:
     for did in dataset_ids:
         dataset = dataset_list.get(did, None)
         if dataset is None:
-            raise ValueError("Could not find dataset {} in OpenML dataset list.".format(did))
+            if raise_error_if_not_exist:
+                raise ValueError(f'Could not find dataset {did} in OpenML dataset list.')
         else:
             active[did] = dataset["status"] == "active"
 

diff --git a/tests/test_datasets/test_dataset_functions.py b/tests/test_datasets/test_dataset_functions.py
@@ -227,9 +227,13 @@ def test_list_datasets_empty(self):
     def test_check_datasets_active(self):
         # Have to test on live because there is no deactivated dataset on the test server.
         openml.config.server = self.production_server
-        active = openml.datasets.check_datasets_active([2, 17])
+        active = openml.datasets.check_datasets_active(
+            [2, 17, 79],
+            raise_error_if_not_exist=False,
+        )
         self.assertTrue(active[2])
         self.assertFalse(active[17])
+        self.assertIsNone(active.get(79))
         self.assertRaisesRegex(
             ValueError,
             "Could not find dataset 79 in OpenML dataset list.",