diff --git a/examples/20_basic/simple_datasets_tutorial.py b/examples/20_basic/simple_datasets_tutorial.py index 35b325fd9..b90d53660 100644 --- a/examples/20_basic/simple_datasets_tutorial.py +++ b/examples/20_basic/simple_datasets_tutorial.py @@ -27,7 +27,7 @@ # ================== # Iris dataset https://www.openml.org/d/61 -dataset = openml.datasets.get_dataset(61) +dataset = openml.datasets.get_dataset(dataset_id="iris", version=1) # Print a summary print( diff --git a/examples/20_basic/simple_flows_and_runs_tutorial.py b/examples/20_basic/simple_flows_and_runs_tutorial.py index 0176328b6..eec6d7e8b 100644 --- a/examples/20_basic/simple_flows_and_runs_tutorial.py +++ b/examples/20_basic/simple_flows_and_runs_tutorial.py @@ -20,8 +20,8 @@ # Train a machine learning model # ============================== -# NOTE: We are using dataset 20 from the test server: https://test.openml.org/d/20 -dataset = openml.datasets.get_dataset(20) +# NOTE: We are using dataset "diabetes" from the test server: https://test.openml.org/d/20 +dataset = openml.datasets.get_dataset(dataset_id="diabetes", version=1) X, y, categorical_indicator, attribute_names = dataset.get_data( target=dataset.default_target_attribute ) diff --git a/examples/20_basic/simple_suites_tutorial.py b/examples/20_basic/simple_suites_tutorial.py index 92dfb3c04..3daf7b992 100644 --- a/examples/20_basic/simple_suites_tutorial.py +++ b/examples/20_basic/simple_suites_tutorial.py @@ -39,7 +39,9 @@ # Downloading benchmark suites # ============================ -suite = openml.study.get_suite(99) +# OpenML Benchmarking Suites and the OpenML-CC18 +# https://www.openml.org/s/99 +suite = openml.study.get_suite("OpenML-CC18") print(suite) #################################################################################################### diff --git a/examples/30_extended/configure_logging.py b/examples/30_extended/configure_logging.py index 3d33f1546..3878b0436 100644 --- a/examples/30_extended/configure_logging.py +++ b/examples/30_extended/configure_logging.py @@ -24,7 +24,7 @@ import openml -openml.datasets.get_dataset("iris") +openml.datasets.get_dataset("iris", version=1) # With default configuration, the above example will show no output to console. # However, in your cache directory you should find a file named 'openml_python.log', @@ -39,7 +39,7 @@ openml.config.set_console_log_level(logging.DEBUG) openml.config.set_file_log_level(logging.WARNING) -openml.datasets.get_dataset("iris") +openml.datasets.get_dataset("iris", version=1) # Now the log level that was previously written to file should also be shown in the console. # The message is now no longer written to file as the `file_log` was set to level `WARNING`. diff --git a/examples/30_extended/datasets_tutorial.py b/examples/30_extended/datasets_tutorial.py index 764cb8f36..606455dd8 100644 --- a/examples/30_extended/datasets_tutorial.py +++ b/examples/30_extended/datasets_tutorial.py @@ -51,7 +51,7 @@ # ================= # This is done based on the dataset ID. -dataset = openml.datasets.get_dataset(1471) +dataset = openml.datasets.get_dataset(dataset_id="eeg-eye-state", version=1) # Print a summary print( @@ -87,8 +87,7 @@ # Starting from 0.15, not downloading data will be the default behavior instead. # The data will be downloading automatically when you try to access it through # openml objects, e.g., using `dataset.features`. -dataset = openml.datasets.get_dataset(1471, download_data=False) - +dataset = openml.datasets.get_dataset(dataset_id="eeg-eye-state", version=1, download_data=False) ############################################################################ # Exercise 2 # ********** diff --git a/examples/30_extended/flows_and_runs_tutorial.py b/examples/30_extended/flows_and_runs_tutorial.py index 3c017087d..b7c000101 100644 --- a/examples/30_extended/flows_and_runs_tutorial.py +++ b/examples/30_extended/flows_and_runs_tutorial.py @@ -25,7 +25,7 @@ # Train a scikit-learn model on the data manually. # NOTE: We are using dataset 68 from the test server: https://test.openml.org/d/68 -dataset = openml.datasets.get_dataset(68) +dataset = openml.datasets.get_dataset(dataset_id="eeg-eye-state", version=1) X, y, categorical_indicator, attribute_names = dataset.get_data( target=dataset.default_target_attribute ) @@ -36,7 +36,7 @@ # You can also ask for meta-data to automatically preprocess the data. # # * e.g. categorical features -> do feature encoding -dataset = openml.datasets.get_dataset(17) +dataset = openml.datasets.get_dataset(dataset_id="credit-g", version=1) X, y, categorical_indicator, attribute_names = dataset.get_data( target=dataset.default_target_attribute ) diff --git a/examples/30_extended/study_tutorial.py b/examples/30_extended/study_tutorial.py index d5bfcd88a..8715dfb4a 100644 --- a/examples/30_extended/study_tutorial.py +++ b/examples/30_extended/study_tutorial.py @@ -79,7 +79,8 @@ tasks = [115, 259, 307] # To verify -suite = openml.study.get_suite(1) +# https://test.openml.org/api/v1/study/1 +suite = openml.study.get_suite("OpenML100") print(all([t_id in suite.tasks for t_id in tasks])) run_ids = [] diff --git a/examples/30_extended/suites_tutorial.py b/examples/30_extended/suites_tutorial.py index ff9902356..935d4c529 100644 --- a/examples/30_extended/suites_tutorial.py +++ b/examples/30_extended/suites_tutorial.py @@ -37,7 +37,8 @@ ############################################################################ # This is done based on the dataset ID. -suite = openml.study.get_suite(99) +# https://www.openml.org/api/v1/study/99 +suite = openml.study.get_suite("OpenML-CC18") print(suite) ############################################################################ diff --git a/openml/datasets/functions.py b/openml/datasets/functions.py index f7eee98d6..0901171d6 100644 --- a/openml/datasets/functions.py +++ b/openml/datasets/functions.py @@ -477,7 +477,7 @@ def get_dataset( # noqa: C901, PLR0912 Parameters ---------- dataset_id : int or str - Dataset ID of the dataset to download + The ID or name of the dataset to download. download_data : bool (default=False) If True, also download the data file. Beware that some datasets are large and it might make the operation noticeably slower. Metadata is also still retrieved.