Skip to content

Commit

Permalink
[pre-commit.ci] auto fixes from pre-commit.com hooks
Browse files Browse the repository at this point in the history
for more information, see https://pre-commit.ci
  • Loading branch information
pre-commit-ci[bot] committed Mar 14, 2023
1 parent 15353ee commit 46ce916
Show file tree
Hide file tree
Showing 26 changed files with 14 additions and 81 deletions.
1 change: 1 addition & 0 deletions examples/30_extended/fetch_runtimes_tutorial.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@
)
)


# Creating utility function
def print_compare_runtimes(measures):
for repeat, val1 in measures["usercpu_time_millis_training"].items():
Expand Down
1 change: 0 additions & 1 deletion openml/_api_calls.py
Original file line number Diff line number Diff line change
Expand Up @@ -326,7 +326,6 @@ def _send_request(request_method, url, data, files=None, md5_checksum=None):
if request_method == "get" and not __is_checksum_equal(
response.text.encode("utf-8"), md5_checksum
):

# -- Check if encoding is not UTF-8 perhaps
if __is_checksum_equal(response.content, md5_checksum):
raise OpenMLHashException(
Expand Down
1 change: 0 additions & 1 deletion openml/datasets/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -274,7 +274,6 @@ def _get_repr_body_fields(self) -> List[Tuple[str, Union[str, int, List[str]]]]:
return [(key, fields[key]) for key in order if key in fields]

def __eq__(self, other):

if not isinstance(other, OpenMLDataset):
return False

Expand Down
2 changes: 0 additions & 2 deletions openml/datasets/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,6 @@ def list_datasets(
output_format: str = "dict",
**kwargs,
) -> Union[Dict, pd.DataFrame]:

"""
Return a list of all dataset which are on OpenML.
Supports large amount of results.
Expand Down Expand Up @@ -182,7 +181,6 @@ def _list_datasets(data_id: Optional[List] = None, output_format="dict", **kwarg


def __list_datasets(api_call, output_format="dict"):

xml_string = openml._api_calls._perform_api_call(api_call, "get")
datasets_dict = xmltodict.parse(xml_string, force_list=("oml:dataset",))

Expand Down
3 changes: 0 additions & 3 deletions openml/extensions/sklearn/extension.py
Original file line number Diff line number Diff line change
Expand Up @@ -1021,7 +1021,6 @@ def flatten_all(list_):
# when deserializing the parameter
sub_components_explicit.add(identifier)
if isinstance(sub_component, str):

external_version = self._get_external_version_string(None, {})
dependencies = self._get_dependencies()
tags = self._get_tags()
Expand Down Expand Up @@ -1072,7 +1071,6 @@ def flatten_all(list_):
parameters[k] = parameter_json

elif isinstance(rval, OpenMLFlow):

# A subcomponent, for example the base model in
# AdaBoostClassifier
sub_components[k] = rval
Expand Down Expand Up @@ -1762,7 +1760,6 @@ def _prediction_to_probabilities(
)

if isinstance(task, (OpenMLClassificationTask, OpenMLLearningCurveTask)):

try:
proba_y = model_copy.predict_proba(X_test)
proba_y = pd.DataFrame(proba_y, columns=model_classes) # handles X_test as numpy
Expand Down
3 changes: 0 additions & 3 deletions openml/flows/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,6 @@ def _get_flow_description(flow_id: int) -> OpenMLFlow:
try:
return _get_cached_flow(flow_id)
except OpenMLCacheException:

xml_file = os.path.join(
openml.utils._create_cache_directory_for_id(FLOWS_CACHE_DIR_NAME, flow_id),
"flow.xml",
Expand All @@ -140,7 +139,6 @@ def list_flows(
output_format: str = "dict",
**kwargs
) -> Union[Dict, pd.DataFrame]:

"""
Return a list of all flows which are on OpenML.
(Supports large amount of results)
Expand Down Expand Up @@ -329,7 +327,6 @@ def get_flow_id(


def __list_flows(api_call: str, output_format: str = "dict") -> Union[Dict, pd.DataFrame]:

xml_string = openml._api_calls._perform_api_call(api_call, "get")
flows_dict = xmltodict.parse(xml_string, force_list=("oml:flow",))

Expand Down
11 changes: 6 additions & 5 deletions openml/runs/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -505,7 +505,6 @@ def _calculate_local_measure(sklearn_fn, openml_name):
user_defined_measures_fold[openml_name] = sklearn_fn(test_y, pred_y)

if isinstance(task, (OpenMLClassificationTask, OpenMLLearningCurveTask)):

for i, tst_idx in enumerate(test_indices):
if task.class_labels is not None:
prediction = (
Expand Down Expand Up @@ -549,7 +548,6 @@ def _calculate_local_measure(sklearn_fn, openml_name):
)

elif isinstance(task, OpenMLRegressionTask):

for i, _ in enumerate(test_indices):
truth = test_y.iloc[i] if isinstance(test_y, pd.Series) else test_y[i]
arff_line = format_prediction(
Expand All @@ -570,7 +568,6 @@ def _calculate_local_measure(sklearn_fn, openml_name):
)

elif isinstance(task, OpenMLClusteringTask):

for i, _ in enumerate(test_indices):
arff_line = [test_indices[i], pred_y[i]] # row_id, cluster ID
arff_datacontent.append(arff_line)
Expand All @@ -579,7 +576,6 @@ def _calculate_local_measure(sklearn_fn, openml_name):
raise TypeError(type(task))

for measure in user_defined_measures_fold:

if measure not in user_defined_measures_per_fold:
user_defined_measures_per_fold[measure] = OrderedDict()
if rep_no not in user_defined_measures_per_fold[measure]:
Expand Down Expand Up @@ -674,7 +670,12 @@ def _run_task_get_arffcontent_parallel_helper(
sample_no,
)
)
pred_y, proba_y, user_defined_measures_fold, trace, = extension._run_model_on_fold(
(
pred_y,
proba_y,
user_defined_measures_fold,
trace,
) = extension._run_model_on_fold(
model=model,
task=task,
X_train=train_x,
Expand Down
3 changes: 1 addition & 2 deletions openml/runs/trace.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def get_selected_iteration(self, fold: int, repeat: int) -> int:
The trace iteration from the given fold and repeat that was
selected as the best iteration by the search procedure
"""
for (r, f, i) in self.trace_iterations:
for r, f, i in self.trace_iterations:
if r == repeat and f == fold and self.trace_iterations[(r, f, i)].selected is True:
return i
raise ValueError(
Expand Down Expand Up @@ -345,7 +345,6 @@ def trace_from_xml(cls, xml):

@classmethod
def merge_traces(cls, traces: List["OpenMLRunTrace"]) -> "OpenMLRunTrace":

merged_trace = (
OrderedDict()
) # type: OrderedDict[Tuple[int, int, int], OpenMLTraceIteration] # noqa E501
Expand Down
2 changes: 1 addition & 1 deletion openml/setups/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ def get_setup(setup_id):

try:
return _get_cached_setup(setup_id)
except (openml.exceptions.OpenMLCacheException):
except openml.exceptions.OpenMLCacheException:
url_suffix = "/setup/%d" % setup_id
setup_xml = openml._api_calls._perform_api_call(url_suffix, "get")
with io.open(setup_file, "w", encoding="utf8") as fh:
Expand Down
1 change: 0 additions & 1 deletion openml/study/study.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,6 @@ def __init__(
runs: Optional[List[int]],
setups: Optional[List[int]],
):

self.study_id = study_id
self.alias = alias
self.main_entity_type = main_entity_type
Expand Down
1 change: 0 additions & 1 deletion openml/tasks/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -387,7 +387,6 @@ def get_task(


def _get_task_description(task_id):

try:
return _get_cached_task(task_id)
except OpenMLCacheException:
Expand Down
1 change: 0 additions & 1 deletion openml/tasks/split.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,6 @@ def __eq__(self, other):

@classmethod
def _from_arff_file(cls, filename: str) -> "OpenMLSplit":

repetitions = None

pkl_filename = filename.replace(".arff", ".pkl.py3")
Expand Down
8 changes: 0 additions & 8 deletions openml/tasks/task.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,6 @@ def __init__(
evaluation_measure: Optional[str] = None,
data_splits_url: Optional[str] = None,
):

self.task_id = int(task_id) if task_id is not None else None
self.task_type_id = task_type_id
self.task_type = task_type
Expand Down Expand Up @@ -125,7 +124,6 @@ def get_train_test_split_indices(
repeat: int = 0,
sample: int = 0,
) -> Tuple[np.ndarray, np.ndarray]:

# Replace with retrieve from cache
if self.split is None:
self.split = self.download_split()
Expand Down Expand Up @@ -165,7 +163,6 @@ def download_split(self) -> OpenMLSplit:
return split

def get_split_dimensions(self) -> Tuple[int, int, int]:

if self.split is None:
self.split = self.download_split()

Expand Down Expand Up @@ -273,7 +270,6 @@ def get_X_and_y(
return X, y

def _to_dict(self) -> "OrderedDict[str, OrderedDict]":

task_container = super(OpenMLSupervisedTask, self)._to_dict()
task_dict = task_container["oml:task_inputs"]

Expand All @@ -285,7 +281,6 @@ def _to_dict(self) -> "OrderedDict[str, OrderedDict]":

@property
def estimation_parameters(self):

warn(
"The estimation_parameters attribute will be "
"deprecated in the future, please use "
Expand All @@ -296,7 +291,6 @@ def estimation_parameters(self):

@estimation_parameters.setter
def estimation_parameters(self, est_parameters):

self.estimation_procedure["parameters"] = est_parameters


Expand Down Expand Up @@ -324,7 +318,6 @@ def __init__(
class_labels: Optional[List[str]] = None,
cost_matrix: Optional[np.ndarray] = None,
):

super(OpenMLClassificationTask, self).__init__(
task_id=task_id,
task_type_id=task_type_id,
Expand Down Expand Up @@ -436,7 +429,6 @@ def get_X(
return data

def _to_dict(self) -> "OrderedDict[str, OrderedDict]":

task_container = super(OpenMLClusteringTask, self)._to_dict()

# Right now, it is not supported as a feature.
Expand Down
10 changes: 5 additions & 5 deletions tests/test_datasets/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,14 +176,14 @@ def test_get_data_with_rowid(self):
self.dataset.row_id_attribute = "condition"
rval, _, categorical, _ = self.dataset.get_data(include_row_id=True)
self.assertIsInstance(rval, pd.DataFrame)
for (dtype, is_cat, col) in zip(rval.dtypes, categorical, rval):
for dtype, is_cat, col in zip(rval.dtypes, categorical, rval):
self._check_expected_type(dtype, is_cat, rval[col])
self.assertEqual(rval.shape, (898, 39))
self.assertEqual(len(categorical), 39)

rval, _, categorical, _ = self.dataset.get_data()
self.assertIsInstance(rval, pd.DataFrame)
for (dtype, is_cat, col) in zip(rval.dtypes, categorical, rval):
for dtype, is_cat, col in zip(rval.dtypes, categorical, rval):
self._check_expected_type(dtype, is_cat, rval[col])
self.assertEqual(rval.shape, (898, 38))
self.assertEqual(len(categorical), 38)
Expand All @@ -202,7 +202,7 @@ def test_get_data_with_target_array(self):
def test_get_data_with_target_pandas(self):
X, y, categorical, attribute_names = self.dataset.get_data(target="class")
self.assertIsInstance(X, pd.DataFrame)
for (dtype, is_cat, col) in zip(X.dtypes, categorical, X):
for dtype, is_cat, col in zip(X.dtypes, categorical, X):
self._check_expected_type(dtype, is_cat, X[col])
self.assertIsInstance(y, pd.Series)
self.assertEqual(y.dtype.name, "category")
Expand All @@ -227,13 +227,13 @@ def test_get_data_rowid_and_ignore_and_target(self):
def test_get_data_with_ignore_attributes(self):
self.dataset.ignore_attribute = ["condition"]
rval, _, categorical, _ = self.dataset.get_data(include_ignore_attribute=True)
for (dtype, is_cat, col) in zip(rval.dtypes, categorical, rval):
for dtype, is_cat, col in zip(rval.dtypes, categorical, rval):
self._check_expected_type(dtype, is_cat, rval[col])
self.assertEqual(rval.shape, (898, 39))
self.assertEqual(len(categorical), 39)

rval, _, categorical, _ = self.dataset.get_data(include_ignore_attribute=False)
for (dtype, is_cat, col) in zip(rval.dtypes, categorical, rval):
for dtype, is_cat, col in zip(rval.dtypes, categorical, rval):
self._check_expected_type(dtype, is_cat, rval[col])
self.assertEqual(rval.shape, (898, 38))
self.assertEqual(len(categorical), 38)
Expand Down
8 changes: 0 additions & 8 deletions tests/test_datasets/test_dataset_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,6 @@ def _remove_pickle_files(self):
pass

def _get_empty_param_for_dataset(self):

return {
"name": None,
"description": None,
Expand Down Expand Up @@ -602,7 +601,6 @@ def test__retrieve_class_labels(self):
self.assertEqual(labels, ["C", "H", "G"])

def test_upload_dataset_with_url(self):

dataset = OpenMLDataset(
"%s-UploadTestWithURL" % self._get_sentinel(),
"test",
Expand Down Expand Up @@ -719,7 +717,6 @@ def test_attributes_arff_from_df_unknown_dtype(self):
attributes_arff_from_df(df)

def test_create_dataset_numpy(self):

data = np.array([[1, 2, 3], [1.2, 2.5, 3.8], [2, 5, 8], [0, 1, 0]]).T

attributes = [("col_{}".format(i), "REAL") for i in range(data.shape[1])]
Expand Down Expand Up @@ -755,7 +752,6 @@ def test_create_dataset_numpy(self):
self.assertEqual(_get_online_dataset_format(dataset.id), "arff", "Wrong format for dataset")

def test_create_dataset_list(self):

data = [
["a", "sunny", 85.0, 85.0, "FALSE", "no"],
["b", "sunny", 80.0, 90.0, "TRUE", "no"],
Expand Down Expand Up @@ -812,7 +808,6 @@ def test_create_dataset_list(self):
self.assertEqual(_get_online_dataset_format(dataset.id), "arff", "Wrong format for dataset")

def test_create_dataset_sparse(self):

# test the scipy.sparse.coo_matrix
sparse_data = scipy.sparse.coo_matrix(
([0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], ([0, 1, 1, 2, 2, 3, 3], [0, 1, 2, 0, 2, 0, 1]))
Expand Down Expand Up @@ -890,7 +885,6 @@ def test_create_dataset_sparse(self):
)

def test_create_invalid_dataset(self):

data = [
"sunny",
"overcast",
Expand Down Expand Up @@ -954,7 +948,6 @@ def test_topic_api_error(self):
)

def test_get_online_dataset_format(self):

# Phoneme dataset
dataset_id = 77
dataset = openml.datasets.get_dataset(dataset_id, download_data=False)
Expand Down Expand Up @@ -1409,7 +1402,6 @@ def test_get_dataset_cache_format_pickle(self):
self.assertEqual(len(attribute_names), X.shape[1])

def test_get_dataset_cache_format_feather(self):

dataset = openml.datasets.get_dataset(128, cache_format="feather")
dataset.get_data()

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,6 @@ def _get_expected_pipeline_description(self, model: Any) -> str:
def _serialization_test_helper(
self, model, X, y, subcomponent_parameters, dependencies_mock_call_count=(1, 2)
):

# Regex pattern for memory addresses of style 0x7f8e0f31ecf8
pattern = re.compile("0x[0-9a-f]{12}")

Expand Down Expand Up @@ -1050,7 +1049,6 @@ def test_serialize_cvobject(self):

@pytest.mark.sklearn
def test_serialize_simple_parameter_grid(self):

# We cannot easily test for scipy random variables in here, but they
# should be covered

Expand Down Expand Up @@ -1568,7 +1566,6 @@ def test_obtain_parameter_values_flow_not_from_server(self):

@pytest.mark.sklearn
def test_obtain_parameter_values(self):

model = sklearn.model_selection.RandomizedSearchCV(
estimator=sklearn.ensemble.RandomForestClassifier(n_estimators=5),
param_distributions={
Expand Down Expand Up @@ -2035,7 +2032,6 @@ def test_run_model_on_fold_clustering(self):

@pytest.mark.sklearn
def test__extract_trace_data(self):

param_grid = {
"hidden_layer_sizes": [[5, 5], [10, 10], [20, 20]],
"activation": ["identity", "logistic", "tanh", "relu"],
Expand Down Expand Up @@ -2078,7 +2074,6 @@ def test__extract_trace_data(self):

self.assertEqual(len(trace_iteration.parameters), len(param_grid))
for param in param_grid:

# Prepend with the "parameter_" prefix
param_in_trace = "parameter_%s" % param
self.assertIn(param_in_trace, trace_iteration.parameters)
Expand Down
Loading

0 comments on commit 46ce916

Please sign in to comment.