doc: make all examples use names instead of IDs as reference. (#1367)

Co-authored-by: ArlindKadra <ArlindKadra@users.noreply.github.com>
openml · Oct 16, 2024 · aa0aca0 · aa0aca0
1 parent 8261a87
commit aa0aca0
Show file tree

Hide file tree

Showing 9 changed files with 17 additions and 14 deletions.
diff --git a/examples/20_basic/simple_datasets_tutorial.py b/examples/20_basic/simple_datasets_tutorial.py
@@ -27,7 +27,7 @@
 # ==================
 
 # Iris dataset https://www.openml.org/d/61
-dataset = openml.datasets.get_dataset(61)
+dataset = openml.datasets.get_dataset(dataset_id="iris", version=1)
 
 # Print a summary
 print(

diff --git a/examples/20_basic/simple_flows_and_runs_tutorial.py b/examples/20_basic/simple_flows_and_runs_tutorial.py
@@ -20,8 +20,8 @@
 # Train a machine learning model
 # ==============================
 
-# NOTE: We are using dataset 20 from the test server: https://test.openml.org/d/20
-dataset = openml.datasets.get_dataset(20)
+# NOTE: We are using dataset "diabetes" from the test server: https://test.openml.org/d/20
+dataset = openml.datasets.get_dataset(dataset_id="diabetes", version=1)
 X, y, categorical_indicator, attribute_names = dataset.get_data(
     target=dataset.default_target_attribute
 )

diff --git a/examples/20_basic/simple_suites_tutorial.py b/examples/20_basic/simple_suites_tutorial.py
@@ -39,7 +39,9 @@
 # Downloading benchmark suites
 # ============================
 
-suite = openml.study.get_suite(99)
+# OpenML Benchmarking Suites and the OpenML-CC18
+# https://www.openml.org/s/99
+suite = openml.study.get_suite("OpenML-CC18")
 print(suite)
 
 ####################################################################################################

diff --git a/examples/30_extended/configure_logging.py b/examples/30_extended/configure_logging.py
@@ -24,7 +24,7 @@
 
 import openml
 
-openml.datasets.get_dataset("iris")
+openml.datasets.get_dataset("iris", version=1)
 
 # With default configuration, the above example will show no output to console.
 # However, in your cache directory you should find a file named 'openml_python.log',
@@ -39,7 +39,7 @@
 
 openml.config.set_console_log_level(logging.DEBUG)
 openml.config.set_file_log_level(logging.WARNING)
-openml.datasets.get_dataset("iris")
+openml.datasets.get_dataset("iris", version=1)
 
 # Now the log level that was previously written to file should also be shown in the console.
 # The message is now no longer written to file as the `file_log` was set to level `WARNING`.

diff --git a/examples/30_extended/datasets_tutorial.py b/examples/30_extended/datasets_tutorial.py
@@ -51,7 +51,7 @@
 # =================
 
 # This is done based on the dataset ID.
-dataset = openml.datasets.get_dataset(1471)
+dataset = openml.datasets.get_dataset(dataset_id="eeg-eye-state", version=1)
 
 # Print a summary
 print(
@@ -87,8 +87,7 @@
 # Starting from 0.15, not downloading data will be the default behavior instead.
 # The data will be downloading automatically when you try to access it through
 # openml objects, e.g., using `dataset.features`.
-dataset = openml.datasets.get_dataset(1471, download_data=False)
-
+dataset = openml.datasets.get_dataset(dataset_id="eeg-eye-state", version=1, download_data=False)
 ############################################################################
 # Exercise 2
 # **********

diff --git a/examples/30_extended/flows_and_runs_tutorial.py b/examples/30_extended/flows_and_runs_tutorial.py
@@ -25,7 +25,7 @@
 # Train a scikit-learn model on the data manually.
 
 # NOTE: We are using dataset 68 from the test server: https://test.openml.org/d/68
-dataset = openml.datasets.get_dataset(68)
+dataset = openml.datasets.get_dataset(dataset_id="eeg-eye-state", version=1)
 X, y, categorical_indicator, attribute_names = dataset.get_data(
     target=dataset.default_target_attribute
 )
@@ -36,7 +36,7 @@
 # You can also ask for meta-data to automatically preprocess the data.
 #
 # * e.g. categorical features -> do feature encoding
-dataset = openml.datasets.get_dataset(17)
+dataset = openml.datasets.get_dataset(dataset_id="credit-g", version=1)
 X, y, categorical_indicator, attribute_names = dataset.get_data(
     target=dataset.default_target_attribute
 )

diff --git a/examples/30_extended/study_tutorial.py b/examples/30_extended/study_tutorial.py
@@ -79,7 +79,8 @@
 tasks = [115, 259, 307]
 
 # To verify
-suite = openml.study.get_suite(1)
+# https://test.openml.org/api/v1/study/1
+suite = openml.study.get_suite("OpenML100")
 print(all([t_id in suite.tasks for t_id in tasks]))
 
 run_ids = []

diff --git a/examples/30_extended/suites_tutorial.py b/examples/30_extended/suites_tutorial.py
@@ -37,7 +37,8 @@
 
 ############################################################################
 # This is done based on the dataset ID.
-suite = openml.study.get_suite(99)
+# https://www.openml.org/api/v1/study/99
+suite = openml.study.get_suite("OpenML-CC18")
 print(suite)
 
 ############################################################################

diff --git a/openml/datasets/functions.py b/openml/datasets/functions.py
@@ -477,7 +477,7 @@ def get_dataset(  # noqa: C901, PLR0912
     Parameters
     ----------
     dataset_id : int or str
-        Dataset ID of the dataset to download
+        The ID or name of the dataset to download.
     download_data : bool (default=False)
         If True, also download the data file. Beware that some datasets are large and it might
         make the operation noticeably slower. Metadata is also still retrieved.