Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Minor Documentation Fixes: TaskID for Example Custom Flow; Comment on Homepage; More documentation for components #1243

Merged
merged 8 commits into from
Apr 18, 2023
2 changes: 1 addition & 1 deletion doc/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ Example
('estimator', tree.DecisionTreeClassifier())
]
)
# Download the OpenML task for the german credit card dataset with 10-fold
# Download the OpenML task for the pendigits dataset with 10-fold
# cross-validation.
task = openml.tasks.get_task(32)
# Run the scikit-learn model on the task.
Expand Down
1 change: 1 addition & 0 deletions doc/progress.rst
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ Changelog
0.13.1
~~~~~~

* DOC #1241 #1229 #1231: Minor documentation fixes and resolve documentation examples not working.
* ADD #1028: Add functions to delete runs, flows, datasets, and tasks (e.g., ``openml.datasets.delete_dataset``).
* ADD #1144: Add locally computed results to the ``OpenMLRun`` object's representation if the run was created locally and not downloaded from the server.
* ADD #1180: Improve the error message when the checksum of a downloaded dataset does not match the checksum provided by the API.
Expand Down
4 changes: 2 additions & 2 deletions examples/30_extended/configure_logging.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,8 @@

import logging

openml.config.console_log.setLevel(logging.DEBUG)
openml.config.file_log.setLevel(logging.WARNING)
openml.config.set_console_log_level(logging.DEBUG)
openml.config.set_file_log_level(logging.WARNING)
openml.datasets.get_dataset("iris")

# Now the log level that was previously written to file should also be shown in the console.
Expand Down
6 changes: 5 additions & 1 deletion examples/30_extended/custom_flow_.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,8 @@
# you can use the Random Forest Classifier flow as a *subflow*. It allows for
# all hyperparameters of the Random Classifier Flow to also be specified in your pipeline flow.
#
# Note: you can currently only specific one subflow as part of the components.
#
# In this example, the auto-sklearn flow is a subflow: the auto-sklearn flow is entirely executed as part of this flow.
# This allows people to specify auto-sklearn hyperparameters used in this flow.
# In general, using a subflow is not required.
Expand All @@ -87,6 +89,8 @@
autosklearn_flow = openml.flows.get_flow(9313) # auto-sklearn 0.5.1
subflow = dict(
components=OrderedDict(automl_tool=autosklearn_flow),
# If you do not want to reference a subflow, you can use the following:
# components=OrderedDict(),
)

####################################################################################################
Expand Down Expand Up @@ -124,7 +128,7 @@
OrderedDict([("oml:name", "time"), ("oml:value", 120), ("oml:component", flow_id)]),
]

task_id = 1965 # Iris Task
task_id = 1200 # Iris Task
task = openml.tasks.get_task(task_id)
dataset_id = task.get_dataset().dataset_id

Expand Down
2 changes: 1 addition & 1 deletion openml/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -283,7 +283,7 @@ def _list_all(listing_call, output_format="dict", *args, **filters):
if len(result) == 0:
result = new_batch
else:
result = result.append(new_batch, ignore_index=True)
result = pd.concat([result, new_batch], ignore_index=True)
else:
# For output_format = 'dict' or 'object'
result.update(new_batch)
Expand Down
17 changes: 17 additions & 0 deletions tests/test_utils/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,23 @@ def mocked_perform_api_call(call, request_method):

def test_list_all(self):
openml.utils._list_all(listing_call=openml.tasks.functions._list_tasks)
openml.utils._list_all(
listing_call=openml.tasks.functions._list_tasks, output_format="dataframe"
)

def test_list_all_with_multiple_batches(self):
res = openml.utils._list_all(
listing_call=openml.tasks.functions._list_tasks, output_format="dict", batch_size=2000
)
# Verify that test server state is still valid for this test to work as intended
# -> If the number of results is less than 2000, the test can not test the
# batching operation.
assert len(res) > 2000
openml.utils._list_all(
listing_call=openml.tasks.functions._list_tasks,
output_format="dataframe",
batch_size=2000,
)

@unittest.mock.patch("openml._api_calls._perform_api_call", side_effect=mocked_perform_api_call)
def test_list_all_few_results_available(self, _perform_api_call):
Expand Down