[pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
openml · Mar 14, 2023 · 46ce916 · 46ce916
1 parent 15353ee
commit 46ce916
Show file tree

Hide file tree

Showing 26 changed files with 14 additions and 81 deletions.
diff --git a/examples/30_extended/fetch_runtimes_tutorial.py b/examples/30_extended/fetch_runtimes_tutorial.py
@@ -79,6 +79,7 @@
     )
 )
 
+
 # Creating utility function
 def print_compare_runtimes(measures):
     for repeat, val1 in measures["usercpu_time_millis_training"].items():

diff --git a/openml/_api_calls.py b/openml/_api_calls.py
@@ -326,7 +326,6 @@ def _send_request(request_method, url, data, files=None, md5_checksum=None):
                 if request_method == "get" and not __is_checksum_equal(
                     response.text.encode("utf-8"), md5_checksum
                 ):
-
                     # -- Check if encoding is not UTF-8 perhaps
                     if __is_checksum_equal(response.content, md5_checksum):
                         raise OpenMLHashException(

diff --git a/openml/datasets/dataset.py b/openml/datasets/dataset.py
@@ -274,7 +274,6 @@ def _get_repr_body_fields(self) -> List[Tuple[str, Union[str, int, List[str]]]]:
         return [(key, fields[key]) for key in order if key in fields]
 
     def __eq__(self, other):
-
         if not isinstance(other, OpenMLDataset):
             return False
 

diff --git a/openml/datasets/functions.py b/openml/datasets/functions.py
@@ -74,7 +74,6 @@ def list_datasets(
     output_format: str = "dict",
     **kwargs,
 ) -> Union[Dict, pd.DataFrame]:
-
     """
     Return a list of all dataset which are on OpenML.
     Supports large amount of results.
@@ -182,7 +181,6 @@ def _list_datasets(data_id: Optional[List] = None, output_format="dict", **kwarg
 
 
 def __list_datasets(api_call, output_format="dict"):
-
     xml_string = openml._api_calls._perform_api_call(api_call, "get")
     datasets_dict = xmltodict.parse(xml_string, force_list=("oml:dataset",))
 

diff --git a/openml/extensions/sklearn/extension.py b/openml/extensions/sklearn/extension.py
@@ -1021,7 +1021,6 @@ def flatten_all(list_):
                     # when deserializing the parameter
                     sub_components_explicit.add(identifier)
                     if isinstance(sub_component, str):
-
                         external_version = self._get_external_version_string(None, {})
                         dependencies = self._get_dependencies()
                         tags = self._get_tags()
@@ -1072,7 +1071,6 @@ def flatten_all(list_):
                 parameters[k] = parameter_json
 
             elif isinstance(rval, OpenMLFlow):
-
                 # A subcomponent, for example the base model in
                 # AdaBoostClassifier
                 sub_components[k] = rval
@@ -1762,7 +1760,6 @@ def _prediction_to_probabilities(
         )
 
         if isinstance(task, (OpenMLClassificationTask, OpenMLLearningCurveTask)):
-
             try:
                 proba_y = model_copy.predict_proba(X_test)
                 proba_y = pd.DataFrame(proba_y, columns=model_classes)  # handles X_test as numpy

diff --git a/openml/flows/functions.py b/openml/flows/functions.py
@@ -120,7 +120,6 @@ def _get_flow_description(flow_id: int) -> OpenMLFlow:
     try:
         return _get_cached_flow(flow_id)
     except OpenMLCacheException:
-
         xml_file = os.path.join(
             openml.utils._create_cache_directory_for_id(FLOWS_CACHE_DIR_NAME, flow_id),
             "flow.xml",
@@ -140,7 +139,6 @@ def list_flows(
     output_format: str = "dict",
     **kwargs
 ) -> Union[Dict, pd.DataFrame]:
-
     """
     Return a list of all flows which are on OpenML.
     (Supports large amount of results)
@@ -329,7 +327,6 @@ def get_flow_id(
 
 
 def __list_flows(api_call: str, output_format: str = "dict") -> Union[Dict, pd.DataFrame]:
-
     xml_string = openml._api_calls._perform_api_call(api_call, "get")
     flows_dict = xmltodict.parse(xml_string, force_list=("oml:flow",))
 

diff --git a/openml/runs/functions.py b/openml/runs/functions.py
@@ -505,7 +505,6 @@ def _calculate_local_measure(sklearn_fn, openml_name):
             user_defined_measures_fold[openml_name] = sklearn_fn(test_y, pred_y)
 
         if isinstance(task, (OpenMLClassificationTask, OpenMLLearningCurveTask)):
-
             for i, tst_idx in enumerate(test_indices):
                 if task.class_labels is not None:
                     prediction = (
@@ -549,7 +548,6 @@ def _calculate_local_measure(sklearn_fn, openml_name):
                 )
 
         elif isinstance(task, OpenMLRegressionTask):
-
             for i, _ in enumerate(test_indices):
                 truth = test_y.iloc[i] if isinstance(test_y, pd.Series) else test_y[i]
                 arff_line = format_prediction(
@@ -570,7 +568,6 @@ def _calculate_local_measure(sklearn_fn, openml_name):
                 )
 
         elif isinstance(task, OpenMLClusteringTask):
-
             for i, _ in enumerate(test_indices):
                 arff_line = [test_indices[i], pred_y[i]]  # row_id, cluster ID
                 arff_datacontent.append(arff_line)
@@ -579,7 +576,6 @@ def _calculate_local_measure(sklearn_fn, openml_name):
             raise TypeError(type(task))
 
         for measure in user_defined_measures_fold:
-
             if measure not in user_defined_measures_per_fold:
                 user_defined_measures_per_fold[measure] = OrderedDict()
             if rep_no not in user_defined_measures_per_fold[measure]:
@@ -674,7 +670,12 @@ def _run_task_get_arffcontent_parallel_helper(
             sample_no,
         )
     )
-    pred_y, proba_y, user_defined_measures_fold, trace, = extension._run_model_on_fold(
+    (
+        pred_y,
+        proba_y,
+        user_defined_measures_fold,
+        trace,
+    ) = extension._run_model_on_fold(
         model=model,
         task=task,
         X_train=train_x,

diff --git a/openml/runs/trace.py b/openml/runs/trace.py
@@ -55,7 +55,7 @@ def get_selected_iteration(self, fold: int, repeat: int) -> int:
             The trace iteration from the given fold and repeat that was
             selected as the best iteration by the search procedure
         """
-        for (r, f, i) in self.trace_iterations:
+        for r, f, i in self.trace_iterations:
             if r == repeat and f == fold and self.trace_iterations[(r, f, i)].selected is True:
                 return i
         raise ValueError(
@@ -345,7 +345,6 @@ def trace_from_xml(cls, xml):
 
     @classmethod
     def merge_traces(cls, traces: List["OpenMLRunTrace"]) -> "OpenMLRunTrace":
-
         merged_trace = (
             OrderedDict()
         )  # type: OrderedDict[Tuple[int, int, int], OpenMLTraceIteration]  # noqa E501

diff --git a/openml/setups/functions.py b/openml/setups/functions.py
@@ -97,7 +97,7 @@ def get_setup(setup_id):
 
     try:
         return _get_cached_setup(setup_id)
-    except (openml.exceptions.OpenMLCacheException):
+    except openml.exceptions.OpenMLCacheException:
         url_suffix = "/setup/%d" % setup_id
         setup_xml = openml._api_calls._perform_api_call(url_suffix, "get")
         with io.open(setup_file, "w", encoding="utf8") as fh:

diff --git a/openml/study/study.py b/openml/study/study.py
@@ -73,7 +73,6 @@ def __init__(
         runs: Optional[List[int]],
         setups: Optional[List[int]],
     ):
-
         self.study_id = study_id
         self.alias = alias
         self.main_entity_type = main_entity_type

diff --git a/openml/tasks/functions.py b/openml/tasks/functions.py
@@ -387,7 +387,6 @@ def get_task(
 
 
 def _get_task_description(task_id):
-
     try:
         return _get_cached_task(task_id)
     except OpenMLCacheException:

diff --git a/openml/tasks/split.py b/openml/tasks/split.py
@@ -70,7 +70,6 @@ def __eq__(self, other):
 
     @classmethod
     def _from_arff_file(cls, filename: str) -> "OpenMLSplit":
-
         repetitions = None
 
         pkl_filename = filename.replace(".arff", ".pkl.py3")

diff --git a/openml/tasks/task.py b/openml/tasks/task.py
@@ -58,7 +58,6 @@ def __init__(
         evaluation_measure: Optional[str] = None,
         data_splits_url: Optional[str] = None,
     ):
-
         self.task_id = int(task_id) if task_id is not None else None
         self.task_type_id = task_type_id
         self.task_type = task_type
@@ -125,7 +124,6 @@ def get_train_test_split_indices(
         repeat: int = 0,
         sample: int = 0,
     ) -> Tuple[np.ndarray, np.ndarray]:
-
         # Replace with retrieve from cache
         if self.split is None:
             self.split = self.download_split()
@@ -165,7 +163,6 @@ def download_split(self) -> OpenMLSplit:
         return split
 
     def get_split_dimensions(self) -> Tuple[int, int, int]:
-
         if self.split is None:
             self.split = self.download_split()
 
@@ -273,7 +270,6 @@ def get_X_and_y(
         return X, y
 
     def _to_dict(self) -> "OrderedDict[str, OrderedDict]":
-
         task_container = super(OpenMLSupervisedTask, self)._to_dict()
         task_dict = task_container["oml:task_inputs"]
 
@@ -285,7 +281,6 @@ def _to_dict(self) -> "OrderedDict[str, OrderedDict]":
 
     @property
     def estimation_parameters(self):
-
         warn(
             "The estimation_parameters attribute will be "
             "deprecated in the future, please use "
@@ -296,7 +291,6 @@ def estimation_parameters(self):
 
     @estimation_parameters.setter
     def estimation_parameters(self, est_parameters):
-
         self.estimation_procedure["parameters"] = est_parameters
 
 
@@ -324,7 +318,6 @@ def __init__(
         class_labels: Optional[List[str]] = None,
         cost_matrix: Optional[np.ndarray] = None,
     ):
-
         super(OpenMLClassificationTask, self).__init__(
             task_id=task_id,
             task_type_id=task_type_id,
@@ -436,7 +429,6 @@ def get_X(
         return data
 
     def _to_dict(self) -> "OrderedDict[str, OrderedDict]":
-
         task_container = super(OpenMLClusteringTask, self)._to_dict()
 
         # Right now, it is not supported as a feature.

diff --git a/tests/test_datasets/test_dataset.py b/tests/test_datasets/test_dataset.py
@@ -176,14 +176,14 @@ def test_get_data_with_rowid(self):
         self.dataset.row_id_attribute = "condition"
         rval, _, categorical, _ = self.dataset.get_data(include_row_id=True)
         self.assertIsInstance(rval, pd.DataFrame)
-        for (dtype, is_cat, col) in zip(rval.dtypes, categorical, rval):
+        for dtype, is_cat, col in zip(rval.dtypes, categorical, rval):
             self._check_expected_type(dtype, is_cat, rval[col])
         self.assertEqual(rval.shape, (898, 39))
         self.assertEqual(len(categorical), 39)
 
         rval, _, categorical, _ = self.dataset.get_data()
         self.assertIsInstance(rval, pd.DataFrame)
-        for (dtype, is_cat, col) in zip(rval.dtypes, categorical, rval):
+        for dtype, is_cat, col in zip(rval.dtypes, categorical, rval):
             self._check_expected_type(dtype, is_cat, rval[col])
         self.assertEqual(rval.shape, (898, 38))
         self.assertEqual(len(categorical), 38)
@@ -202,7 +202,7 @@ def test_get_data_with_target_array(self):
     def test_get_data_with_target_pandas(self):
         X, y, categorical, attribute_names = self.dataset.get_data(target="class")
         self.assertIsInstance(X, pd.DataFrame)
-        for (dtype, is_cat, col) in zip(X.dtypes, categorical, X):
+        for dtype, is_cat, col in zip(X.dtypes, categorical, X):
             self._check_expected_type(dtype, is_cat, X[col])
         self.assertIsInstance(y, pd.Series)
         self.assertEqual(y.dtype.name, "category")
@@ -227,13 +227,13 @@ def test_get_data_rowid_and_ignore_and_target(self):
     def test_get_data_with_ignore_attributes(self):
         self.dataset.ignore_attribute = ["condition"]
         rval, _, categorical, _ = self.dataset.get_data(include_ignore_attribute=True)
-        for (dtype, is_cat, col) in zip(rval.dtypes, categorical, rval):
+        for dtype, is_cat, col in zip(rval.dtypes, categorical, rval):
             self._check_expected_type(dtype, is_cat, rval[col])
         self.assertEqual(rval.shape, (898, 39))
         self.assertEqual(len(categorical), 39)
 
         rval, _, categorical, _ = self.dataset.get_data(include_ignore_attribute=False)
-        for (dtype, is_cat, col) in zip(rval.dtypes, categorical, rval):
+        for dtype, is_cat, col in zip(rval.dtypes, categorical, rval):
             self._check_expected_type(dtype, is_cat, rval[col])
         self.assertEqual(rval.shape, (898, 38))
         self.assertEqual(len(categorical), 38)

diff --git a/tests/test_datasets/test_dataset_functions.py b/tests/test_datasets/test_dataset_functions.py
@@ -71,7 +71,6 @@ def _remove_pickle_files(self):
                     pass
 
     def _get_empty_param_for_dataset(self):
-
         return {
             "name": None,
             "description": None,
@@ -602,7 +601,6 @@ def test__retrieve_class_labels(self):
         self.assertEqual(labels, ["C", "H", "G"])
 
     def test_upload_dataset_with_url(self):
-
         dataset = OpenMLDataset(
             "%s-UploadTestWithURL" % self._get_sentinel(),
             "test",
@@ -719,7 +717,6 @@ def test_attributes_arff_from_df_unknown_dtype(self):
                 attributes_arff_from_df(df)
 
     def test_create_dataset_numpy(self):
-
         data = np.array([[1, 2, 3], [1.2, 2.5, 3.8], [2, 5, 8], [0, 1, 0]]).T
 
         attributes = [("col_{}".format(i), "REAL") for i in range(data.shape[1])]
@@ -755,7 +752,6 @@ def test_create_dataset_numpy(self):
         self.assertEqual(_get_online_dataset_format(dataset.id), "arff", "Wrong format for dataset")
 
     def test_create_dataset_list(self):
-
         data = [
             ["a", "sunny", 85.0, 85.0, "FALSE", "no"],
             ["b", "sunny", 80.0, 90.0, "TRUE", "no"],
@@ -812,7 +808,6 @@ def test_create_dataset_list(self):
         self.assertEqual(_get_online_dataset_format(dataset.id), "arff", "Wrong format for dataset")
 
     def test_create_dataset_sparse(self):
-
         # test the scipy.sparse.coo_matrix
         sparse_data = scipy.sparse.coo_matrix(
             ([0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], ([0, 1, 1, 2, 2, 3, 3], [0, 1, 2, 0, 2, 0, 1]))
@@ -890,7 +885,6 @@ def test_create_dataset_sparse(self):
         )
 
     def test_create_invalid_dataset(self):
-
         data = [
             "sunny",
             "overcast",
@@ -954,7 +948,6 @@ def test_topic_api_error(self):
         )
 
     def test_get_online_dataset_format(self):
-
         # Phoneme dataset
         dataset_id = 77
         dataset = openml.datasets.get_dataset(dataset_id, download_data=False)
@@ -1409,7 +1402,6 @@ def test_get_dataset_cache_format_pickle(self):
         self.assertEqual(len(attribute_names), X.shape[1])
 
     def test_get_dataset_cache_format_feather(self):
-
         dataset = openml.datasets.get_dataset(128, cache_format="feather")
         dataset.get_data()
 

diff --git a/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py b/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py
@@ -117,7 +117,6 @@ def _get_expected_pipeline_description(self, model: Any) -> str:
     def _serialization_test_helper(
         self, model, X, y, subcomponent_parameters, dependencies_mock_call_count=(1, 2)
     ):
-
         # Regex pattern for memory addresses of style 0x7f8e0f31ecf8
         pattern = re.compile("0x[0-9a-f]{12}")
 
@@ -1050,7 +1049,6 @@ def test_serialize_cvobject(self):
 
     @pytest.mark.sklearn
     def test_serialize_simple_parameter_grid(self):
-
         # We cannot easily test for scipy random variables in here, but they
         # should be covered
 
@@ -1568,7 +1566,6 @@ def test_obtain_parameter_values_flow_not_from_server(self):
 
     @pytest.mark.sklearn
     def test_obtain_parameter_values(self):
-
         model = sklearn.model_selection.RandomizedSearchCV(
             estimator=sklearn.ensemble.RandomForestClassifier(n_estimators=5),
             param_distributions={
@@ -2035,7 +2032,6 @@ def test_run_model_on_fold_clustering(self):
 
     @pytest.mark.sklearn
     def test__extract_trace_data(self):
-
         param_grid = {
             "hidden_layer_sizes": [[5, 5], [10, 10], [20, 20]],
             "activation": ["identity", "logistic", "tanh", "relu"],
@@ -2078,7 +2074,6 @@ def test__extract_trace_data(self):
 
             self.assertEqual(len(trace_iteration.parameters), len(param_grid))
             for param in param_grid:
-
                 # Prepend with the "parameter_" prefix
                 param_in_trace = "parameter_%s" % param
                 self.assertIn(param_in_trace, trace_iteration.parameters)
-Original file line number
+Diff line change
@@ Expand Up / @@ -79,6 +79,7 @@ @@
         )
     )
     # Creating utility function
     def print_compare_runtimes(measures):
         for repeat, val1 in measures["usercpu_time_millis_training"].items():
@@ Expand Down @@