Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[pre-commit.ci] pre-commit autoupdate #1223

Merged
merged 6 commits into from
Apr 25, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/pre-commit.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,10 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Setup Python 3.7
- name: Setup Python 3.8
uses: actions/setup-python@v4
with:
python-version: 3.7
python-version: 3.8
- name: Install pre-commit
run: |
pip install pre-commit
Expand Down
6 changes: 3 additions & 3 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
repos:
- repo: https://github.com/psf/black
rev: 22.6.0
rev: 23.3.0
hooks:
- id: black
args: [--line-length=100]
- repo: https://github.com/pre-commit/mirrors-mypy
rev: v0.961
rev: v1.2.0
hooks:
- id: mypy
name: mypy openml
Expand All @@ -20,7 +20,7 @@ repos:
- types-requests
- types-python-dateutil
- repo: https://github.com/pycqa/flake8
rev: 4.0.1
rev: 6.0.0
hooks:
- id: flake8
name: flake8 openml
Expand Down
3 changes: 2 additions & 1 deletion doc/progress.rst
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,16 @@ Changelog
0.13.1
~~~~~~

* DOC #1241 #1229 #1231: Minor documentation fixes and resolve documentation examples not working.
* ADD #1028: Add functions to delete runs, flows, datasets, and tasks (e.g., ``openml.datasets.delete_dataset``).
* ADD #1144: Add locally computed results to the ``OpenMLRun`` object's representation if the run was created locally and not downloaded from the server.
* ADD #1180: Improve the error message when the checksum of a downloaded dataset does not match the checksum provided by the API.
* ADD #1201: Make ``OpenMLTraceIteration`` a dataclass.
* DOC #1069: Add argument documentation for the ``OpenMLRun`` class.
* DOC #1241 #1229 #1231: Minor documentation fixes and resolve documentation examples not working.
* FIX #1197 #559 #1131: Fix the order of ground truth and predictions in the ``OpenMLRun`` object and in ``format_prediction``.
* FIX #1198: Support numpy 1.24 and higher.
* FIX #1216: Allow unknown task types on the server. This is only relevant when new task types are added to the test server.
* FIX #1223: Fix mypy errors for implicit optional typing.
* MAINT #1155: Add dependabot github action to automatically update other github actions.
* MAINT #1199: Obtain pre-commit's flake8 from github.com instead of gitlab.com.
* MAINT #1215: Support latest numpy version.
Expand Down
1 change: 1 addition & 0 deletions examples/30_extended/fetch_runtimes_tutorial.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@
)
)


# Creating utility function
def print_compare_runtimes(measures):
for repeat, val1 in measures["usercpu_time_millis_training"].items():
Expand Down
3 changes: 1 addition & 2 deletions openml/_api_calls.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,7 @@ def _download_minio_bucket(
def _download_text_file(
source: str,
output_path: Optional[str] = None,
md5_checksum: str = None,
md5_checksum: Optional[str] = None,
exists_ok: bool = True,
encoding: str = "utf8",
) -> Optional[str]:
Expand Down Expand Up @@ -326,7 +326,6 @@ def _send_request(request_method, url, data, files=None, md5_checksum=None):
if request_method == "get" and not __is_checksum_equal(
response.text.encode("utf-8"), md5_checksum
):

# -- Check if encoding is not UTF-8 perhaps
if __is_checksum_equal(response.content, md5_checksum):
raise OpenMLHashException(
Expand Down
1 change: 0 additions & 1 deletion openml/datasets/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -274,7 +274,6 @@ def _get_repr_body_fields(self) -> List[Tuple[str, Union[str, int, List[str]]]]:
return [(key, fields[key]) for key in order if key in fields]

def __eq__(self, other):

if not isinstance(other, OpenMLDataset):
return False

Expand Down
14 changes: 7 additions & 7 deletions openml/datasets/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,6 @@ def list_datasets(
output_format: str = "dict",
**kwargs,
) -> Union[Dict, pd.DataFrame]:

"""
Return a list of all dataset which are on OpenML.
Supports large amount of results.
Expand Down Expand Up @@ -182,7 +181,6 @@ def _list_datasets(data_id: Optional[List] = None, output_format="dict", **kwarg


def __list_datasets(api_call, output_format="dict"):

xml_string = openml._api_calls._perform_api_call(api_call, "get")
datasets_dict = xmltodict.parse(xml_string, force_list=("oml:dataset",))

Expand Down Expand Up @@ -353,7 +351,7 @@ def get_datasets(
def get_dataset(
dataset_id: Union[int, str],
download_data: bool = True,
version: int = None,
version: Optional[int] = None,
error_if_multiple: bool = False,
cache_format: str = "pickle",
download_qualities: bool = True,
Expand Down Expand Up @@ -984,7 +982,7 @@ def _get_dataset_description(did_cache_dir, dataset_id):

def _get_dataset_parquet(
description: Union[Dict, OpenMLDataset],
cache_directory: str = None,
cache_directory: Optional[str] = None,
download_all_files: bool = False,
) -> Optional[str]:
"""Return the path to the local parquet file of the dataset. If is not cached, it is downloaded.
Expand Down Expand Up @@ -1051,7 +1049,9 @@ def _get_dataset_parquet(
return output_file_path


def _get_dataset_arff(description: Union[Dict, OpenMLDataset], cache_directory: str = None) -> str:
def _get_dataset_arff(
description: Union[Dict, OpenMLDataset], cache_directory: Optional[str] = None
) -> str:
"""Return the path to the local arff file of the dataset. If is not cached, it is downloaded.
Checks if the file is in the cache, if yes, return the path to the file.
Expand Down Expand Up @@ -1173,8 +1173,8 @@ def _create_dataset_from_description(
description: Dict[str, str],
features_file: str,
qualities_file: str,
arff_file: str = None,
parquet_file: str = None,
arff_file: Optional[str] = None,
parquet_file: Optional[str] = None,
cache_format: str = "pickle",
) -> OpenMLDataset:
"""Create a dataset object from a description dict.
Expand Down
4 changes: 3 additions & 1 deletion openml/exceptions.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# License: BSD 3-Clause

from typing import Optional


class PyOpenMLError(Exception):
def __init__(self, message: str):
Expand All @@ -20,7 +22,7 @@ class OpenMLServerException(OpenMLServerError):

# Code needs to be optional to allow the exception to be picklable:
# https://stackoverflow.com/questions/16244923/how-to-make-a-custom-exception-class-with-multiple-init-args-pickleable # noqa: E501
def __init__(self, message: str, code: int = None, url: str = None):
def __init__(self, message: str, code: Optional[int] = None, url: Optional[str] = None):
self.message = message
self.code = code
self.url = url
Expand Down
2 changes: 1 addition & 1 deletion openml/extensions/extension_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ def _run_model_on_fold(
y_train: Optional[np.ndarray] = None,
X_test: Optional[Union[np.ndarray, scipy.sparse.spmatrix]] = None,
) -> Tuple[np.ndarray, np.ndarray, "OrderedDict[str, float]", Optional["OpenMLRunTrace"]]:
"""Run a model on a repeat,fold,subsample triplet of the task and return prediction information.
"""Run a model on a repeat, fold, subsample triplet of the task.
Returns the data that is necessary to construct the OpenML Run object. Is used by
:func:`openml.runs.run_flow_on_task`.
Expand Down
3 changes: 0 additions & 3 deletions openml/extensions/sklearn/extension.py
Original file line number Diff line number Diff line change
Expand Up @@ -1021,7 +1021,6 @@ def flatten_all(list_):
# when deserializing the parameter
sub_components_explicit.add(identifier)
if isinstance(sub_component, str):

external_version = self._get_external_version_string(None, {})
dependencies = self._get_dependencies()
tags = self._get_tags()
Expand Down Expand Up @@ -1072,7 +1071,6 @@ def flatten_all(list_):
parameters[k] = parameter_json

elif isinstance(rval, OpenMLFlow):

# A subcomponent, for example the base model in
# AdaBoostClassifier
sub_components[k] = rval
Expand Down Expand Up @@ -1762,7 +1760,6 @@ def _prediction_to_probabilities(
)

if isinstance(task, (OpenMLClassificationTask, OpenMLLearningCurveTask)):

try:
proba_y = model_copy.predict_proba(X_test)
proba_y = pd.DataFrame(proba_y, columns=model_classes) # handles X_test as numpy
Expand Down
5 changes: 1 addition & 4 deletions openml/flows/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,6 @@ def _get_flow_description(flow_id: int) -> OpenMLFlow:
try:
return _get_cached_flow(flow_id)
except OpenMLCacheException:

xml_file = os.path.join(
openml.utils._create_cache_directory_for_id(FLOWS_CACHE_DIR_NAME, flow_id),
"flow.xml",
Expand All @@ -140,7 +139,6 @@ def list_flows(
output_format: str = "dict",
**kwargs
) -> Union[Dict, pd.DataFrame]:

"""
Return a list of all flows which are on OpenML.
(Supports large amount of results)
Expand Down Expand Up @@ -329,7 +327,6 @@ def get_flow_id(


def __list_flows(api_call: str, output_format: str = "dict") -> Union[Dict, pd.DataFrame]:

xml_string = openml._api_calls._perform_api_call(api_call, "get")
flows_dict = xmltodict.parse(xml_string, force_list=("oml:flow",))

Expand Down Expand Up @@ -377,7 +374,7 @@ def _check_flow_for_server_id(flow: OpenMLFlow) -> None:
def assert_flows_equal(
flow1: OpenMLFlow,
flow2: OpenMLFlow,
ignore_parameter_values_on_older_children: str = None,
ignore_parameter_values_on_older_children: Optional[str] = None,
ignore_parameter_values: bool = False,
ignore_custom_name_if_none: bool = False,
check_description: bool = True,
Expand Down
23 changes: 12 additions & 11 deletions openml/runs/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,8 @@ def run_model_on_task(
model: Any,
task: Union[int, str, OpenMLTask],
avoid_duplicate_runs: bool = True,
flow_tags: List[str] = None,
seed: int = None,
flow_tags: Optional[List[str]] = None,
seed: Optional[int] = None,
add_local_measures: bool = True,
upload_flow: bool = False,
return_flow: bool = False,
Expand Down Expand Up @@ -148,8 +148,8 @@ def run_flow_on_task(
flow: OpenMLFlow,
task: OpenMLTask,
avoid_duplicate_runs: bool = True,
flow_tags: List[str] = None,
seed: int = None,
flow_tags: Optional[List[str]] = None,
seed: Optional[int] = None,
add_local_measures: bool = True,
upload_flow: bool = False,
dataset_format: str = "dataframe",
Expand Down Expand Up @@ -438,7 +438,7 @@ def _run_task_get_arffcontent(
extension: "Extension",
add_local_measures: bool,
dataset_format: str,
n_jobs: int = None,
n_jobs: Optional[int] = None,
) -> Tuple[
List[List],
Optional[OpenMLRunTrace],
Expand Down Expand Up @@ -505,7 +505,6 @@ def _calculate_local_measure(sklearn_fn, openml_name):
user_defined_measures_fold[openml_name] = sklearn_fn(test_y, pred_y)

if isinstance(task, (OpenMLClassificationTask, OpenMLLearningCurveTask)):

for i, tst_idx in enumerate(test_indices):
if task.class_labels is not None:
prediction = (
Expand Down Expand Up @@ -549,7 +548,6 @@ def _calculate_local_measure(sklearn_fn, openml_name):
)

elif isinstance(task, OpenMLRegressionTask):

for i, _ in enumerate(test_indices):
truth = test_y.iloc[i] if isinstance(test_y, pd.Series) else test_y[i]
arff_line = format_prediction(
Expand All @@ -570,7 +568,6 @@ def _calculate_local_measure(sklearn_fn, openml_name):
)

elif isinstance(task, OpenMLClusteringTask):

for i, _ in enumerate(test_indices):
arff_line = [test_indices[i], pred_y[i]] # row_id, cluster ID
arff_datacontent.append(arff_line)
Expand All @@ -579,7 +576,6 @@ def _calculate_local_measure(sklearn_fn, openml_name):
raise TypeError(type(task))

for measure in user_defined_measures_fold:

if measure not in user_defined_measures_per_fold:
user_defined_measures_per_fold[measure] = OrderedDict()
if rep_no not in user_defined_measures_per_fold[measure]:
Expand Down Expand Up @@ -625,7 +621,7 @@ def _run_task_get_arffcontent_parallel_helper(
sample_no: int,
task: OpenMLTask,
dataset_format: str,
configuration: Dict = None,
configuration: Optional[Dict] = None,
) -> Tuple[
np.ndarray,
Optional[pd.DataFrame],
Expand Down Expand Up @@ -674,7 +670,12 @@ def _run_task_get_arffcontent_parallel_helper(
sample_no,
)
)
pred_y, proba_y, user_defined_measures_fold, trace, = extension._run_model_on_fold(
(
pred_y,
proba_y,
user_defined_measures_fold,
trace,
) = extension._run_model_on_fold(
model=model,
task=task,
X_train=train_x,
Expand Down
3 changes: 1 addition & 2 deletions openml/runs/trace.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def get_selected_iteration(self, fold: int, repeat: int) -> int:
The trace iteration from the given fold and repeat that was
selected as the best iteration by the search procedure
"""
for (r, f, i) in self.trace_iterations:
for r, f, i in self.trace_iterations:
if r == repeat and f == fold and self.trace_iterations[(r, f, i)].selected is True:
return i
raise ValueError(
Expand Down Expand Up @@ -345,7 +345,6 @@ def trace_from_xml(cls, xml):

@classmethod
def merge_traces(cls, traces: List["OpenMLRunTrace"]) -> "OpenMLRunTrace":

merged_trace = (
OrderedDict()
) # type: OrderedDict[Tuple[int, int, int], OpenMLTraceIteration] # noqa E501
Expand Down
2 changes: 1 addition & 1 deletion openml/setups/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ def get_setup(setup_id):

try:
return _get_cached_setup(setup_id)
except (openml.exceptions.OpenMLCacheException):
except openml.exceptions.OpenMLCacheException:
url_suffix = "/setup/%d" % setup_id
setup_xml = openml._api_calls._perform_api_call(url_suffix, "get")
with io.open(setup_file, "w", encoding="utf8") as fh:
Expand Down
5 changes: 2 additions & 3 deletions openml/study/study.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,6 @@ def __init__(
runs: Optional[List[int]],
setups: Optional[List[int]],
):

self.study_id = study_id
self.alias = alias
self.main_entity_type = main_entity_type
Expand All @@ -100,11 +99,11 @@ def id(self) -> Optional[int]:

def _get_repr_body_fields(self) -> List[Tuple[str, Union[str, int, List[str]]]]:
"""Collect all information to display in the __repr__ body."""
fields = {
fields: Dict[str, Any] = {
"Name": self.name,
"Status": self.status,
"Main Entity Type": self.main_entity_type,
} # type: Dict[str, Any]
}
if self.study_id is not None:
fields["ID"] = self.study_id
fields["Study URL"] = self.openml_url
Expand Down
1 change: 0 additions & 1 deletion openml/tasks/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -387,7 +387,6 @@ def get_task(


def _get_task_description(task_id):

try:
return _get_cached_task(task_id)
except OpenMLCacheException:
Expand Down
1 change: 0 additions & 1 deletion openml/tasks/split.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,6 @@ def __eq__(self, other):

@classmethod
def _from_arff_file(cls, filename: str) -> "OpenMLSplit":

repetitions = None

pkl_filename = filename.replace(".arff", ".pkl.py3")
Expand Down
Loading