From b7a8121d8c3a9813452d31c473beeea09001b7bc Mon Sep 17 00:00:00 2001
From: Eric Larson <larson.eric.d@gmail.com>
Date: Tue, 18 Apr 2023 13:15:12 -0400
Subject: [PATCH 1/7] MAINT: Report download time and size [circle full] [skip
 azp] [skip actions] [skip cirrus]

---
 mne/datasets/_fetch.py | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/mne/datasets/_fetch.py b/mne/datasets/_fetch.py
index 87cd1664534..62a265e2dcb 100644
--- a/mne/datasets/_fetch.py
+++ b/mne/datasets/_fetch.py
@@ -8,6 +8,7 @@
 import os.path as op
 from pathlib import Path
 from shutil import rmtree
+import time
 
 from .. import __version__ as mne_version
 from ..utils import logger, warn, _safe_input
@@ -130,6 +131,7 @@ def fetch_dataset(
     pass a list of dicts.
     """  # noqa E501
     import pooch
+    t = time.time()
 
     if auth is not None:
         if len(auth) != 2:
@@ -241,8 +243,9 @@ def fetch_dataset(
         registry[archive_name] = dataset_hash
 
     # create the download manager
+    use_path = final_path if processor is None else Path(path)
     fetcher = pooch.create(
-        path=str(final_path) if processor is None else path,
+        path=str(use_path),
         base_url="",  # Full URLs are given in the `urls` dict.
         version=None,  # Data versioning is decoupled from MNE-Python version.
         urls=urls,
@@ -252,6 +255,7 @@ def fetch_dataset(
 
     # use our logger level for pooch's logger too
     pooch.get_logger().setLevel(logger.getEffectiveLevel())
+    sz = 0
 
     for idx in range(len(names)):
         # fetch and unpack the data
@@ -268,9 +272,11 @@ def fetch_dataset(
                     'the dataset to be downloaded again.') from None
             else:
                 raise
+        fname = use_path / archive_name
+        sz += fname.stat().st_size
         # after unpacking, remove the archive file
         if processor is not None:
-            os.remove(op.join(path, archive_name))
+            fname.unlink()
 
     # remove version number from "misc" and "testing" datasets folder names
     if name == "misc":
@@ -299,4 +305,13 @@ def fetch_dataset(
                 name=name, current=data_version, newest=mne_version
             )
         )
+    t = time.time() - t
+    fmt = '%Ss'
+    if t > 60:
+        fmt = f'%Mm{fmt}'
+    if t > 3600:
+        fmt = f'%Hh{fmt}'
+    sz = sz / 1048576  # 1024 ** 2
+    t = time.strftime(fmt, time.gmtime(time.time() - t))
+    logger.info(f'Download complete in {t} ({sz:.1f} MB)')
     return (final_path, data_version) if return_version else final_path

From 52492e1d56681132a6e99b6c65613089dd5f614f Mon Sep 17 00:00:00 2001
From: Eric Larson <larson.eric.d@gmail.com>
Date: Tue, 18 Apr 2023 13:52:50 -0400
Subject: [PATCH 2/7] FIX: Better logging [circle full] [skip azp] [skip
 actions] [skip cirrus]

---
 mne/datasets/_fetch.py                    | 10 +++-
 mne/datasets/eegbci/eegbci.py             | 20 +++++--
 mne/datasets/limo/limo.py                 | 23 ++++++--
 mne/datasets/sleep_physionet/_utils.py    | 25 ++++----
 mne/datasets/sleep_physionet/age.py       | 27 ++++++---
 mne/datasets/sleep_physionet/temazepam.py | 27 ++++++---
 mne/datasets/utils.py                     | 69 ++++++++++++-----------
 mne/utils/config.py                       |  1 +
 8 files changed, 131 insertions(+), 71 deletions(-)

diff --git a/mne/datasets/_fetch.py b/mne/datasets/_fetch.py
index 62a265e2dcb..5d669553942 100644
--- a/mne/datasets/_fetch.py
+++ b/mne/datasets/_fetch.py
@@ -131,7 +131,7 @@ def fetch_dataset(
     pass a list of dicts.
     """  # noqa E501
     import pooch
-    t = time.time()
+    t0 = time.time()
 
     if auth is not None:
         if len(auth) != 2:
@@ -305,7 +305,12 @@ def fetch_dataset(
                 name=name, current=data_version, newest=mne_version
             )
         )
-    t = time.time() - t
+    _log_time_size(t0, sz)
+    return (final_path, data_version) if return_version else final_path
+
+
+def _log_time_size(t0, sz):
+    t = time.time() - t0
     fmt = '%Ss'
     if t > 60:
         fmt = f'%Mm{fmt}'
@@ -314,4 +319,3 @@ def fetch_dataset(
     sz = sz / 1048576  # 1024 ** 2
     t = time.strftime(fmt, time.gmtime(time.time() - t))
     logger.info(f'Download complete in {t} ({sz:.1f} MB)')
-    return (final_path, data_version) if return_version else final_path
diff --git a/mne/datasets/eegbci/eegbci.py b/mne/datasets/eegbci/eegbci.py
index d976425dd7a..4ec9a800b07 100644
--- a/mne/datasets/eegbci/eegbci.py
+++ b/mne/datasets/eegbci/eegbci.py
@@ -7,9 +7,11 @@
 import re
 from os import path as op
 from pathlib import Path
+import time
 
-from ...utils import _url_to_local_path, verbose
+from ...utils import _url_to_local_path, verbose, logger
 from ..utils import _do_path_update, _get_path
+from .._fetch import _log_time_size
 
 # TODO: remove try/except when our min version is py 3.9
 try:
@@ -162,6 +164,7 @@ def load_data(subject, runs, path=None, force_update=False, update_path=None,
     .. footbibliography::
     """  # noqa: E501
     import pooch
+    t0 = time.time()
 
     if not hasattr(runs, '__iter__'):
         runs = [runs]
@@ -195,14 +198,23 @@ def load_data(subject, runs, path=None, force_update=False, update_path=None,
 
     # fetch the file(s)
     data_paths = []
+    sz = 0
     for run in runs:
         file_part = f'S{subject:03d}/S{subject:03d}R{run:02d}.edf'
-        destination = op.join(base_path, file_part)
-        if force_update and op.isfile(destination):
-            os.remove(destination)
+        destination = Path(op.join(base_path, file_part))
+        if destination.exists():
+            if force_update:
+                destination.unlink()
+            else:
+                continue
+        if sz == 0:  # log once
+            logger.info('Downloading EEGBCI data')
         data_paths.append(fetcher.fetch(file_part))
         # update path in config if desired
         _do_path_update(path, update_path, config_key, name)
+        sz += destination.stat().st_size
+    if sz > 0:
+        _log_time_size(t0, sz)
     return data_paths
 
 
diff --git a/mne/datasets/limo/limo.py b/mne/datasets/limo/limo.py
index 143a9dd1162..d06e0cecbc8 100644
--- a/mne/datasets/limo/limo.py
+++ b/mne/datasets/limo/limo.py
@@ -2,8 +2,9 @@
 #
 # License: BSD-3-Clause
 
-import os
 import os.path as op
+from pathlib import Path
+import time
 
 import numpy as np
 
@@ -12,6 +13,7 @@
 from ...io.meas_info import create_info
 from ...utils import _check_pandas_installed, verbose
 from ..utils import _get_path, _do_path_update, logger
+from .._fetch import _log_time_size
 
 
 # root url for LIMO files
@@ -67,6 +69,7 @@ def data_path(subject, path=None, force_update=False, update_path=None, *,
     .. footbibliography::
     """  # noqa: E501
     import pooch
+    t0 = time.time()
 
     downloader = pooch.HTTPDownloader(progressbar=True)  # use tqdm
 
@@ -168,14 +171,23 @@ def data_path(subject, path=None, force_update=False, update_path=None, *,
     # use our logger level for pooch's logger too
     pooch.get_logger().setLevel(logger.getEffectiveLevel())
     # fetch the data
+    sz = 0
     for fname in ('LIMO.mat', 'Yr.mat'):
-        destination = op.join(subject_path, fname)
-        if force_update and op.isfile(destination):
-            os.remove(destination)
+        destination = Path(op.join(subject_path, fname))
+        if destination.exists():
+            if force_update:
+                destination.unlink()
+            else:
+                continue
+        if sz == 0:  # log once
+            logger.info('Downloading LIMO data')
         # fetch the remote file (if local file missing or has hash mismatch)
         fetcher.fetch(fname=fname, downloader=downloader)
+        sz += destination.stat().st_size
     # update path in config if desired
     _do_path_update(path, update_path, config_key, name)
+    if sz > 0:
+        _log_time_size(t0, sz)
     return base_path
 
 
@@ -282,7 +294,8 @@ def load_data(subject, path=None, force_update=False, update_path=None,
     metadata = pd.DataFrame(metadata)
 
     # -- 6) Create custom epochs array
-    epochs = EpochsArray(data, info, events, tmin, event_id, metadata=metadata)
+    epochs = EpochsArray(data, info, events, tmin, event_id, metadata=metadata,
+                         verbose=False)
     epochs.info['bads'] = missing_chans  # missing channels are marked as bad.
 
     return epochs
diff --git a/mne/datasets/sleep_physionet/_utils.py b/mne/datasets/sleep_physionet/_utils.py
index 0c2c0632857..b496e7859dd 100644
--- a/mne/datasets/sleep_physionet/_utils.py
+++ b/mne/datasets/sleep_physionet/_utils.py
@@ -30,18 +30,19 @@ def _fetch_one(fname, hashsum, path, force_update, base_url):
     # Fetch the file
     url = base_url + '/' + fname
     destination = op.join(path, fname)
-    if not op.isfile(destination) or force_update:
-        if op.isfile(destination):
-            os.remove(destination)
-        if not op.isdir(op.dirname(destination)):
-            os.makedirs(op.dirname(destination))
-        pooch.retrieve(
-            url=url,
-            known_hash=f"sha1:{hashsum}",
-            path=path,
-            fname=fname
-        )
-    return destination
+    if op.isfile(destination) and not force_update:
+        return destination, False
+    if op.isfile(destination):
+        os.remove(destination)
+    if not op.isdir(op.dirname(destination)):
+        os.makedirs(op.dirname(destination))
+    pooch.retrieve(
+        url=url,
+        known_hash=f"sha1:{hashsum}",
+        path=path,
+        fname=fname
+    )
+    return destination, True
 
 
 @verbose
diff --git a/mne/datasets/sleep_physionet/age.py b/mne/datasets/sleep_physionet/age.py
index 4a0d8456639..f7a09a3e872 100644
--- a/mne/datasets/sleep_physionet/age.py
+++ b/mne/datasets/sleep_physionet/age.py
@@ -3,9 +3,13 @@
 #
 # License: BSD Style.
 
+import os
+import time
+
 import numpy as np
 
 from ...utils import verbose
+from .._fetch import _log_time_size
 from ._utils import _fetch_one, _data_path, _on_missing, AGE_SLEEP_RECORDS
 from ._utils import _check_subjects
 
@@ -79,6 +83,7 @@ def fetch_data(subjects, recording=(1, 2), path=None, force_update=False,
     ----------
     .. footbibliography::
     """  # noqa: E501
+    t0 = time.time()
     records = np.loadtxt(AGE_SLEEP_RECORDS,
                          skiprows=1,
                          delimiter=',',
@@ -107,15 +112,23 @@ def fetch_data(subjects, recording=(1, 2), path=None, force_update=False,
         _on_missing(on_missing, msg)
 
     fnames = []
+    sz = 0
     for subject in subjects:
         for idx in np.where(psg_records['subject'] == subject)[0]:
             if psg_records['record'][idx] in recording:
-                psg_fname = _fetch_one(psg_records['fname'][idx].decode(),
-                                       psg_records['sha'][idx].decode(),
-                                       *params)
-                hyp_fname = _fetch_one(hyp_records['fname'][idx].decode(),
-                                       hyp_records['sha'][idx].decode(),
-                                       *params)
+                psg_fname, pdl = _fetch_one(
+                    psg_records['fname'][idx].decode(),
+                    psg_records['sha'][idx].decode(),
+                    *params)
+                hyp_fname, hdl = _fetch_one(
+                    hyp_records['fname'][idx].decode(),
+                    hyp_records['sha'][idx].decode(),
+                    *params)
                 fnames.append([psg_fname, hyp_fname])
-
+                if pdl:
+                    sz += os.path.getsize(psg_fname)
+                if hdl:
+                    sz += os.path.getsize(hyp_fname)
+    if sz > 0:
+        _log_time_size(t0, sz)
     return fnames
diff --git a/mne/datasets/sleep_physionet/temazepam.py b/mne/datasets/sleep_physionet/temazepam.py
index a18f126ab5f..8c290843f7f 100644
--- a/mne/datasets/sleep_physionet/temazepam.py
+++ b/mne/datasets/sleep_physionet/temazepam.py
@@ -3,9 +3,13 @@
 #
 # License: BSD Style.
 
+import os
+import time
+
 import numpy as np
 
 from ...utils import verbose
+from .._fetch import _log_time_size
 from ._utils import _fetch_one, _data_path, TEMAZEPAM_SLEEP_RECORDS
 from ._utils import _check_subjects
 
@@ -67,6 +71,7 @@ def fetch_data(subjects, path=None, force_update=False, base_url=BASE_URL, *,
     ----------
     .. footbibliography::
     """
+    t0 = time.time()
     records = np.loadtxt(TEMAZEPAM_SLEEP_RECORDS,
                          skiprows=1,
                          delimiter=',',
@@ -83,15 +88,23 @@ def fetch_data(subjects, path=None, force_update=False, base_url=BASE_URL, *,
     params = [path, force_update, base_url]
 
     fnames = []
+    sz = 0
     for subject in subjects:  # all the subjects are present at this point
         for idx in np.where(records['subject'] == subject)[0]:
             if records['record'][idx] == b'Placebo':
-                psg_fname = _fetch_one(records['psg fname'][idx].decode(),
-                                       records['psg sha'][idx].decode(),
-                                       *params)
-                hyp_fname = _fetch_one(records['hyp fname'][idx].decode(),
-                                       records['hyp sha'][idx].decode(),
-                                       *params)
+                psg_fname, pdl = _fetch_one(
+                    records['psg fname'][idx].decode(),
+                    records['psg sha'][idx].decode(),
+                    *params)
+                hyp_fname, hdl = _fetch_one(
+                    records['hyp fname'][idx].decode(),
+                    records['hyp sha'][idx].decode(),
+                    *params)
                 fnames.append([psg_fname, hyp_fname])
-
+                if pdl:
+                    sz += os.path.getsize(psg_fname)
+                if hdl:
+                    sz += os.path.getsize(hyp_fname)
+    if sz > 0:
+        _log_time_size(t0, sz)
     return fnames
diff --git a/mne/datasets/utils.py b/mne/datasets/utils.py
index 50a894bfd7b..0d04f090235 100644
--- a/mne/datasets/utils.py
+++ b/mne/datasets/utils.py
@@ -299,49 +299,52 @@ def _download_all_example_data(verbose=True):
     #
     # verbose=True by default so we get nice status messages.
     # Consider adding datasets from here to CircleCI for PR-auto-build
-    from . import (sample, testing, misc, spm_face, somato, brainstorm,
-                   eegbci, multimodal, opm, hf_sef, mtrf, fieldtrip_cmc,
-                   kiloword, phantom_4dbti, sleep_physionet, limo,
-                   fnirs_motor, refmeg_noise, fetch_infant_template,
-                   fetch_fsaverage, ssvep, erp_core, epilepsy_ecog,
-                   fetch_phantom, eyelink, ucl_opm_auditory)
-    sample_path = sample.data_path()
-    testing.data_path()
-    misc.data_path()
-    spm_face.data_path()
-    somato.data_path()
-    hf_sef.data_path()
-    multimodal.data_path()
-    fnirs_motor.data_path()
-    opm.data_path()
-    mtrf.data_path()
-    fieldtrip_cmc.data_path()
-    kiloword.data_path()
-    phantom_4dbti.data_path()
-    refmeg_noise.data_path()
-    ssvep.data_path()
-    epilepsy_ecog.data_path()
-    ucl_opm_auditory.data_path()
-    brainstorm.bst_raw.data_path(accept=True)
-    brainstorm.bst_auditory.data_path(accept=True)
-    brainstorm.bst_resting.data_path(accept=True)
-    phantom_path = brainstorm.bst_phantom_elekta.data_path(accept=True)
-    fetch_phantom('otaniemi', subjects_dir=phantom_path)
-    eyelink.data_path()
-    brainstorm.bst_phantom_ctf.data_path(accept=True)
+    paths = dict()
+    for kind in ('sample testing misc spm_face somato hf_sef multimodal '
+                 'fnirs_motor opm mtrf fieldtrip_cmc kiloword phantom_4dbti '
+                 'refmeg_noise ssvep epilepsy_ecog ucl_opm_auditory eyelink '
+                 'erp_core brainstorm.bst_raw brainstorm.bst_auditory '
+                 'brainstorm.bst_resting brainstorm.bst_phantom_ctf '
+                 'brainstorm.bst_phantom_elekta'
+                 ).split():
+        mod = importlib.import_module(f'mne.datasets.{kind}')
+        data_path_func = getattr(mod, 'data_path')
+        kwargs = dict()
+        if 'accept' in inspect.getfullargspec(data_path_func).args:
+            kwargs['accept'] = True
+        paths[kind] = data_path_func(**kwargs)
+        logger.info(f'[done {kind}]')
+
+    # Now for the exceptions:
+    from . import (
+        eegbci, sleep_physionet, limo, fetch_fsaverage, fetch_infant_template,
+        fetch_hcp_mmp_parcellation, fetch_phantom)
     eegbci.load_data(1, [6, 10, 14], update_path=True)
     for subj in range(4):
         eegbci.load_data(subj + 1, runs=[3], update_path=True)
+    logger.info('[done eegbci]')
+
     sleep_physionet.age.fetch_data(subjects=[0, 1], recording=[1])
+    logger.info('[done sleep_physionet]')
+
     # If the user has SUBJECTS_DIR, respect it, if not, set it to the EEG one
     # (probably on CircleCI, or otherwise advanced user)
     fetch_fsaverage(None)
+    logger.info('[done fsaverage]')
+
     fetch_infant_template('6mo')
+    logger.info('[done infant_template]')
+
     fetch_hcp_mmp_parcellation(
-        subjects_dir=sample_path / 'subjects', accept=True)
-    limo.load_data(subject=1, update_path=True)
+        subjects_dir=paths['sample'] / 'subjects', accept=True)
+    logger.info('[done hcp_mmp_parcellation]')
+
+    fetch_phantom(
+        'otaniemi', subjects_dir=paths['brainstorm.bst_phantom_elekta'])
+    logger.info('[done phantom]')
 
-    erp_core.data_path()
+    limo.load_data(subject=1, update_path=True)
+    logger.info('[done limo]')
 
 
 @verbose
diff --git a/mne/utils/config.py b/mne/utils/config.py
index 5056fcfd18a..09a89fe9a0f 100644
--- a/mne/utils/config.py
+++ b/mne/utils/config.py
@@ -104,6 +104,7 @@ def set_memmap_min_size(memmap_min_size):
     'MNE_DATASETS_BRAINSTORM_PATH',
     'MNE_DATASETS_EEGBCI_PATH',
     'MNE_DATASETS_EPILEPSY_ECOG_PATH',
+    'MNE_DATASETS_EYELINK_PATH',
     'MNE_DATASETS_HF_SEF_PATH',
     'MNE_DATASETS_MEGSIM_PATH',
     'MNE_DATASETS_MISC_PATH',

From 0d6446c769868226a1c057c9830de770540569a9 Mon Sep 17 00:00:00 2001
From: Eric Larson <larson.eric.d@gmail.com>
Date: Tue, 18 Apr 2023 15:36:18 -0400
Subject: [PATCH 3/7] FIX: Better logging [circle full] [skip azp] [skip
 actions] [skip cirrus]

---
 mne/datasets/_fetch.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mne/datasets/_fetch.py b/mne/datasets/_fetch.py
index 5d669553942..57c11385b0c 100644
--- a/mne/datasets/_fetch.py
+++ b/mne/datasets/_fetch.py
@@ -317,5 +317,5 @@ def _log_time_size(t0, sz):
     if t > 3600:
         fmt = f'%Hh{fmt}'
     sz = sz / 1048576  # 1024 ** 2
-    t = time.strftime(fmt, time.gmtime(time.time() - t))
+    t = time.strftime(fmt, time.gmtime(t))
     logger.info(f'Download complete in {t} ({sz:.1f} MB)')

From 73208888024010af0aac8d106afefbeec91b326c Mon Sep 17 00:00:00 2001
From: Eric Larson <larson.eric.d@gmail.com>
Date: Tue, 18 Apr 2023 15:49:18 -0400
Subject: [PATCH 4/7] FIX: Better logging [circle full] [skip azp] [skip
 actions] [skip cirrus]

---
 mne/datasets/_fetch.py                    | 22 ++------------
 mne/datasets/eegbci/eegbci.py             |  8 +++--
 mne/datasets/limo/limo.py                 |  8 ++---
 mne/datasets/sleep_physionet/_utils.py    | 12 ++++++--
 mne/datasets/sleep_physionet/age.py       |  2 +-
 mne/datasets/sleep_physionet/temazepam.py |  2 +-
 mne/datasets/utils.py                     | 37 +++++++++++++++++++++--
 tools/circleci_download.sh                |  1 +
 8 files changed, 59 insertions(+), 33 deletions(-)

diff --git a/mne/datasets/_fetch.py b/mne/datasets/_fetch.py
index 57c11385b0c..578c1cf82ed 100644
--- a/mne/datasets/_fetch.py
+++ b/mne/datasets/_fetch.py
@@ -2,7 +2,6 @@
 #
 # License: BSD Style.
 
-import logging
 import sys
 import os
 import os.path as op
@@ -18,7 +17,8 @@
     TESTING_VERSIONED,
     MISC_VERSIONED,
 )
-from .utils import _dataset_version, _do_path_update, _get_path
+from .utils import (_dataset_version, _do_path_update, _get_path,
+                    _log_time_size, _downloader_params)
 from ..fixes import _compare_version
 
 
@@ -222,13 +222,9 @@ def fetch_dataset(
                     "You must agree to the license to use this " "dataset"
                 )
     # downloader & processors
-    download_params = dict(progressbar=logger.level <= logging.INFO)
+    download_params = _downloader_params(auth=auth, token=token)
     if name == "fake":
         download_params["progressbar"] = False
-    if auth is not None:
-        download_params["auth"] = auth
-    if token is not None:
-        download_params["headers"] = {"Authorization": f"token {token}"}
     downloader = pooch.HTTPDownloader(**download_params)
 
     # make mappings from archive names to urls and to checksums
@@ -307,15 +303,3 @@ def fetch_dataset(
         )
     _log_time_size(t0, sz)
     return (final_path, data_version) if return_version else final_path
-
-
-def _log_time_size(t0, sz):
-    t = time.time() - t0
-    fmt = '%Ss'
-    if t > 60:
-        fmt = f'%Mm{fmt}'
-    if t > 3600:
-        fmt = f'%Hh{fmt}'
-    sz = sz / 1048576  # 1024 ** 2
-    t = time.strftime(fmt, time.gmtime(t))
-    logger.info(f'Download complete in {t} ({sz:.1f} MB)')
diff --git a/mne/datasets/eegbci/eegbci.py b/mne/datasets/eegbci/eegbci.py
index 4ec9a800b07..925f8b772a1 100644
--- a/mne/datasets/eegbci/eegbci.py
+++ b/mne/datasets/eegbci/eegbci.py
@@ -10,8 +10,8 @@
 import time
 
 from ...utils import _url_to_local_path, verbose, logger
-from ..utils import _do_path_update, _get_path
-from .._fetch import _log_time_size
+from ..utils import (_do_path_update, _get_path, _log_time_size,
+                     _downloader_params)
 
 # TODO: remove try/except when our min version is py 3.9
 try:
@@ -81,6 +81,7 @@ def data_path(url, path=None, force_update=False, update_path=None, *,
     destinations = [destination]
 
     # Fetch the file
+    downloader = pooch.HTTPDownloader(**_downloader_params())
     if not op.isfile(destination) or force_update:
         if op.isfile(destination):
             os.remove(destination)
@@ -90,7 +91,8 @@ def data_path(url, path=None, force_update=False, update_path=None, *,
             # URL to one of Pooch's test files
             url=url,
             path=destination,
-            fname=fname
+            downloader=downloader,
+            fname=fname,
         )
 
     # Offer to update the path
diff --git a/mne/datasets/limo/limo.py b/mne/datasets/limo/limo.py
index d06e0cecbc8..3701fdb4515 100644
--- a/mne/datasets/limo/limo.py
+++ b/mne/datasets/limo/limo.py
@@ -11,9 +11,9 @@
 from ...channels import make_standard_montage
 from ...epochs import EpochsArray
 from ...io.meas_info import create_info
-from ...utils import _check_pandas_installed, verbose
-from ..utils import _get_path, _do_path_update, logger
-from .._fetch import _log_time_size
+from ...utils import _check_pandas_installed, verbose, logger
+from ..utils import (_get_path, _do_path_update, _log_time_size,
+                     _downloader_params)
 
 
 # root url for LIMO files
@@ -71,7 +71,7 @@ def data_path(subject, path=None, force_update=False, update_path=None, *,
     import pooch
     t0 = time.time()
 
-    downloader = pooch.HTTPDownloader(progressbar=True)  # use tqdm
+    downloader = pooch.HTTPDownloader(**_downloader_params())
 
     # local storage patch
     config_key = 'MNE_DATASETS_LIMO_PATH'
diff --git a/mne/datasets/sleep_physionet/_utils.py b/mne/datasets/sleep_physionet/_utils.py
index b496e7859dd..50f992e7803 100644
--- a/mne/datasets/sleep_physionet/_utils.py
+++ b/mne/datasets/sleep_physionet/_utils.py
@@ -10,7 +10,7 @@
 
 from ...utils import (verbose, _TempDir, _check_pandas_installed,
                       _on_missing)
-from ..utils import _get_path
+from ..utils import _get_path, _downloader_params
 
 AGE_SLEEP_RECORDS = op.join(op.dirname(__file__), 'age_records.csv')
 TEMAZEPAM_SLEEP_RECORDS = op.join(op.dirname(__file__),
@@ -36,10 +36,12 @@ def _fetch_one(fname, hashsum, path, force_update, base_url):
         os.remove(destination)
     if not op.isdir(op.dirname(destination)):
         os.makedirs(op.dirname(destination))
+    downloader = pooch.HTTPDownloader(**_downloader_params())
     pooch.retrieve(
         url=url,
         known_hash=f"sha1:{hashsum}",
         path=path,
+        downloader=downloader,
         fname=fname
     )
     return destination, True
@@ -88,11 +90,13 @@ def _update_sleep_temazepam_records(fname=TEMAZEPAM_SLEEP_RECORDS):
 
     # Download subjects info.
     subjects_fname = op.join(tmp, 'ST-subjects.xls')
+    downloader = pooch.HTTPDownloader(**_downloader_params())
     pooch.retrieve(
         url=TEMAZEPAM_RECORDS_URL,
         known_hash=f"sha1:{TEMAZEPAM_RECORDS_URL_SHA1}",
         path=tmp,
-        fname=op.basename(subjects_fname)
+        downloader=downloader,
+        fname=op.basename(subjects_fname),
     )
 
     # Load and Massage the checksums.
@@ -147,11 +151,13 @@ def _update_sleep_age_records(fname=AGE_SLEEP_RECORDS):
 
     # Download subjects info.
     subjects_fname = op.join(tmp, 'SC-subjects.xls')
+    downloader = pooch.HTTPDownloader(**_downloader_params())
     pooch.retrieve(
         url=AGE_RECORDS_URL,
         known_hash=f"sha1:{AGE_RECORDS_URL_SHA1}",
         path=tmp,
-        fname=op.basename(subjects_fname)
+        downloader=downloader,
+        fname=op.basename(subjects_fname),
     )
 
     # Load and Massage the checksums.
diff --git a/mne/datasets/sleep_physionet/age.py b/mne/datasets/sleep_physionet/age.py
index f7a09a3e872..106d39d4e32 100644
--- a/mne/datasets/sleep_physionet/age.py
+++ b/mne/datasets/sleep_physionet/age.py
@@ -9,7 +9,7 @@
 import numpy as np
 
 from ...utils import verbose
-from .._fetch import _log_time_size
+from ..utils import _log_time_size
 from ._utils import _fetch_one, _data_path, _on_missing, AGE_SLEEP_RECORDS
 from ._utils import _check_subjects
 
diff --git a/mne/datasets/sleep_physionet/temazepam.py b/mne/datasets/sleep_physionet/temazepam.py
index 8c290843f7f..841dbe67a7f 100644
--- a/mne/datasets/sleep_physionet/temazepam.py
+++ b/mne/datasets/sleep_physionet/temazepam.py
@@ -9,7 +9,7 @@
 import numpy as np
 
 from ...utils import verbose
-from .._fetch import _log_time_size
+from ..utils import _log_time_size
 from ._utils import _fetch_one, _data_path, TEMAZEPAM_SLEEP_RECORDS
 from ._utils import _check_subjects
 
diff --git a/mne/datasets/utils.py b/mne/datasets/utils.py
index 0d04f090235..1fba832abb0 100644
--- a/mne/datasets/utils.py
+++ b/mne/datasets/utils.py
@@ -11,12 +11,14 @@
 from collections import OrderedDict
 import importlib
 import inspect
+import logging
 import os
 import os.path as op
 from pathlib import Path
 import sys
-import zipfile
+import time
 import tempfile
+import zipfile
 
 import numpy as np
 
@@ -374,6 +376,7 @@ def fetch_aparc_sub_parcellation(subjects_dir=None, verbose=None):
                 rh='https://osf.io/4kxny/download')
     hashes = dict(lh='9e4d8d6b90242b7e4b0145353436ef77',
                   rh='dd6464db8e7762d969fc1d8087cd211b')
+    downloader = pooch.HTTPDownloader(**_downloader_params())
     for hemi in ('lh', 'rh'):
         fname = f'{hemi}.aparc_sub.annot'
         fpath = destination / fname
@@ -382,6 +385,7 @@ def fetch_aparc_sub_parcellation(subjects_dir=None, verbose=None):
                 url=urls[hemi],
                 known_hash=f"md5:{hashes[hemi]}",
                 path=destination,
+                downloader=downloader,
                 fname=fname,
             )
 
@@ -433,6 +437,7 @@ def fetch_hcp_mmp_parcellation(subjects_dir=None, combine=True, *,
         if answer.lower() != 'y':
             raise RuntimeError('You must agree to the license to use this '
                                'dataset')
+    downloader = pooch.HTTPDownloader(**_downloader_params())
     for hemi, fpath in zip(('lh', 'rh'), fnames):
         if not op.isfile(fpath):
             fname = fpath.name
@@ -440,6 +445,7 @@ def fetch_hcp_mmp_parcellation(subjects_dir=None, combine=True, *,
                 url=urls[hemi],
                 known_hash=f"md5:{hashes[hemi]}",
                 path=destination,
+                downloader=downloader,
                 fname=fname,
             )
 
@@ -556,6 +562,7 @@ def _manifest_check_download(manifest_path, destination, url, hash_):
     logger.info('%d file%s missing from %s in %s'
                 % (len(need), _pl(need), manifest_path, destination))
     if len(need) > 0:
+        downloader = pooch.HTTPDownloader(**_downloader_params())
         with tempfile.TemporaryDirectory() as path:
             logger.info('Downloading missing files remotely')
 
@@ -564,7 +571,8 @@ def _manifest_check_download(manifest_path, destination, url, hash_):
                 url=url,
                 known_hash=f"md5:{hash_}",
                 path=path,
-                fname=op.basename(fname_path)
+                downloader=downloader,
+                fname=op.basename(fname_path),
             )
 
             logger.info('Extracting missing file%s' % (_pl(need),))
@@ -578,3 +586,28 @@ def _manifest_check_download(manifest_path, destination, url, hash_):
                     ff.extract(name, path=destination)
         logger.info('Successfully extracted %d file%s'
                     % (len(need), _pl(need)))
+
+
+def _log_time_size(t0, sz):
+    t = time.time() - t0
+    fmt = '%Ss'
+    if t > 60:
+        fmt = f'%Mm{fmt}'
+    if t > 3600:
+        fmt = f'%Hh{fmt}'
+    sz = sz / 1048576  # 1024 ** 2
+    t = time.strftime(fmt, time.gmtime(t))
+    logger.info(f'Download complete in {t} ({sz:.1f} MB)')
+
+
+def _downloader_params(*, auth=None, token=None):
+    params = dict()
+    params['progressbar'] = (
+        logger.level <= logging.INFO and
+        get_config('MNE_TQDM', 'tqdm.auto') != 'off'
+    )
+    if auth is not None:
+        params["auth"] = auth
+    if token is not None:
+        params["headers"] = {"Authorization": f"token {token}"}
+    return params
diff --git a/tools/circleci_download.sh b/tools/circleci_download.sh
index 421f6f63ec1..cb622cb1860 100755
--- a/tools/circleci_download.sh
+++ b/tools/circleci_download.sh
@@ -1,6 +1,7 @@
 #!/bin/bash -e
 
 set -o pipefail
+export MNE_TQDM=off
 
 if [ "$CIRCLE_BRANCH" == "main" ] || [[ $(cat gitlog.txt) == *"[circle full]"* ]]; then
     echo "Doing a full dev build";

From f23fca7511a63f6282e45f89d18ed3721ae9a746 Mon Sep 17 00:00:00 2001
From: Eric Larson <larson.eric.d@gmail.com>
Date: Tue, 18 Apr 2023 16:19:37 -0400
Subject: [PATCH 5/7] FIX: Zenodo slow-doh [circle full] [skip azp] [skip
 actions] [skip cirrus]

---
 .circleci/config.yml   | 4 ++++
 mne/datasets/config.py | 6 ++++--
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index ab70c684e4d..9cbb54338d4 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -230,6 +230,10 @@ jobs:
               - data-cache-ucl-opm-auditory
         - run:
             name: Get data
+            # This limit could be increased, but this is helpful for finding slow ones
+            # (even ~2GB datasets should be downloadable in this time from good
+            # providers)
+            no_output_timeout: 10m
             command: |
               ./tools/circleci_download.sh
         - run:
diff --git a/mne/datasets/config.py b/mne/datasets/config.py
index dc851e9bd2f..e84d63b41c4 100644
--- a/mne/datasets/config.py
+++ b/mne/datasets/config.py
@@ -320,8 +320,10 @@
 MNE_DATASETS['hf_sef_evoked'] = dict(
     archive_name='hf_sef_evoked.tar.gz',
     hash='md5:13d34cb5db584e00868677d8fb0aab2b',
-    url=('https://zenodo.org/record/3523071/files/'
-         'hf_sef_evoked.tar.gz'),
+    # Zenodo can be slow, so we use the OSF mirror
+    # url=('https://zenodo.org/record/3523071/files/'
+    #      'hf_sef_evoked.tar.gz'),
+    url='https://osf.io/25f8d/download?version=2',
     folder_name='hf_sef',
     config_key='MNE_DATASETS_HF_SEF_PATH',
 )

From abbd68a6e8e65087613fbebc4cff6f2edc9442ef Mon Sep 17 00:00:00 2001
From: Eric Larson <larson.eric.d@gmail.com>
Date: Tue, 18 Apr 2023 16:43:31 -0400
Subject: [PATCH 6/7] TST: Run CIs [circle front]


From 871e4fbab27cdd149d4b36bbf8a89bf2ce5aa3dd Mon Sep 17 00:00:00 2001
From: Eric Larson <larson.eric.d@gmail.com>
Date: Tue, 18 Apr 2023 18:38:53 -0400
Subject: [PATCH 7/7] FIX: Tests [circle front]

---
 mne/datasets/eegbci/eegbci.py                 |  3 +-
 mne/datasets/limo/limo.py                     |  2 +-
 .../sleep_physionet/tests/test_physionet.py   | 48 ++++++++-----------
 mne/datasets/tests/test_datasets.py           |  2 +-
 4 files changed, 24 insertions(+), 31 deletions(-)

diff --git a/mne/datasets/eegbci/eegbci.py b/mne/datasets/eegbci/eegbci.py
index 925f8b772a1..e89ae089fcc 100644
--- a/mne/datasets/eegbci/eegbci.py
+++ b/mne/datasets/eegbci/eegbci.py
@@ -88,7 +88,6 @@ def data_path(url, path=None, force_update=False, update_path=None, *,
         if not op.isdir(op.dirname(destination)):
             os.makedirs(op.dirname(destination))
         pooch.retrieve(
-            # URL to one of Pooch's test files
             url=url,
             path=destination,
             downloader=downloader,
@@ -203,7 +202,7 @@ def load_data(subject, runs, path=None, force_update=False, update_path=None,
     sz = 0
     for run in runs:
         file_part = f'S{subject:03d}/S{subject:03d}R{run:02d}.edf'
-        destination = Path(op.join(base_path, file_part))
+        destination = Path(base_path, file_part)
         if destination.exists():
             if force_update:
                 destination.unlink()
diff --git a/mne/datasets/limo/limo.py b/mne/datasets/limo/limo.py
index 3701fdb4515..e0f1d0f9fa9 100644
--- a/mne/datasets/limo/limo.py
+++ b/mne/datasets/limo/limo.py
@@ -173,7 +173,7 @@ def data_path(subject, path=None, force_update=False, update_path=None, *,
     # fetch the data
     sz = 0
     for fname in ('LIMO.mat', 'Yr.mat'):
-        destination = Path(op.join(subject_path, fname))
+        destination = Path(subject_path, fname)
         if destination.exists():
             if force_update:
                 destination.unlink()
diff --git a/mne/datasets/sleep_physionet/tests/test_physionet.py b/mne/datasets/sleep_physionet/tests/test_physionet.py
index 3f754b863ac..549963cb73f 100644
--- a/mne/datasets/sleep_physionet/tests/test_physionet.py
+++ b/mne/datasets/sleep_physionet/tests/test_physionet.py
@@ -3,11 +3,9 @@
 #
 # License: BSD Style.
 
-import os.path as op
-import numpy as np
+from pathlib import Path
 import pytest
 
-from numpy.testing import assert_array_equal
 import pooch
 
 from mne.utils import requires_good_network
@@ -32,14 +30,15 @@ def __init__(self):
 
     def __call__(self, *args, **kwargs):
         self.call_args_list.append((args, kwargs))
+        Path(kwargs['path'], kwargs['fname']).write_text('test')
 
     @property
     def call_count(self):
         return len(self.call_args_list)
 
 
-def _keep_basename_only(path_structure):
-    return np.vectorize(op.basename)(np.array(path_structure))
+def _keep_basename_only(paths):
+    return [Path(p).name for p in paths]
 
 
 def _get_expected_url(name):
@@ -49,7 +48,7 @@ def _get_expected_url(name):
 
 
 def _get_expected_path(base, name):
-    return op.join(base, name)
+    return Path(base, name)
 
 
 def _check_mocked_function_calls(mocked_func, call_fname_hash_pairs,
@@ -62,8 +61,8 @@ def _check_mocked_function_calls(mocked_func, call_fname_hash_pairs,
     for idx, current in enumerate(call_fname_hash_pairs):
         _, call_kwargs = mocked_func.call_args_list[idx]
         hash_type, hash = call_kwargs['known_hash'].split(':')
-        assert call_kwargs['url'] == _get_expected_url(current['name'])
-        assert op.join(call_kwargs['path'], call_kwargs['fname']) == \
+        assert call_kwargs['url'] == _get_expected_url(current['name']), idx
+        assert Path(call_kwargs['path'], call_kwargs['fname']) == \
             _get_expected_path(base_path, current['name'])
         assert hash == current['hash']
         assert hash_type == 'sha1'
@@ -130,26 +129,24 @@ def test_sleep_physionet_age(physionet_tmpdir, monkeypatch, download_is_error):
     monkeypatch.setattr(pooch, 'retrieve', my_func)
 
     paths = age.fetch_data(subjects=[0], recording=[1], path=physionet_tmpdir)
-    assert_array_equal(_keep_basename_only(paths),
-                       [['SC4001E0-PSG.edf', 'SC4001EC-Hypnogram.edf']])
+    assert _keep_basename_only(paths[0]) == \
+        ['SC4001E0-PSG.edf', 'SC4001EC-Hypnogram.edf']
 
     paths = age.fetch_data(subjects=[0, 1], recording=[1],
                            path=physionet_tmpdir)
-    assert_array_equal(_keep_basename_only(paths),
-                       [['SC4001E0-PSG.edf', 'SC4001EC-Hypnogram.edf'],
-                        ['SC4011E0-PSG.edf', 'SC4011EH-Hypnogram.edf']])
+    assert _keep_basename_only(paths[0]) == \
+        ['SC4001E0-PSG.edf', 'SC4001EC-Hypnogram.edf']
+    assert _keep_basename_only(paths[1]) == \
+        ['SC4011E0-PSG.edf', 'SC4011EH-Hypnogram.edf']
 
     paths = age.fetch_data(subjects=[0], recording=[1, 2],
                            path=physionet_tmpdir)
-    assert_array_equal(_keep_basename_only(paths),
-                       [['SC4001E0-PSG.edf', 'SC4001EC-Hypnogram.edf'],
-                        ['SC4002E0-PSG.edf', 'SC4002EC-Hypnogram.edf']])
+    assert _keep_basename_only(paths[0]) == \
+        ['SC4001E0-PSG.edf', 'SC4001EC-Hypnogram.edf']
+    assert _keep_basename_only(paths[1]) == \
+        ['SC4002E0-PSG.edf', 'SC4002EC-Hypnogram.edf']
 
     EXPECTED_CALLS = (
-        {'name': 'SC4001E0-PSG.edf',
-         'hash': 'adabd3b01fc7bb75c523a974f38ee3ae4e57b40f'},
-        {'name': 'SC4001EC-Hypnogram.edf',
-         'hash': '21c998eadc8b1e3ea6727d3585186b8f76e7e70b'},
         {'name': 'SC4001E0-PSG.edf',
          'hash': 'adabd3b01fc7bb75c523a974f38ee3ae4e57b40f'},
         {'name': 'SC4001EC-Hypnogram.edf',
@@ -158,14 +155,11 @@ def test_sleep_physionet_age(physionet_tmpdir, monkeypatch, download_is_error):
          'hash': '4d17451f7847355bcab17584de05e7e1df58c660'},
         {'name': 'SC4011EH-Hypnogram.edf',
          'hash': 'd582a3cbe2db481a362af890bc5a2f5ca7c878dc'},
-        {'name': 'SC4001E0-PSG.edf',
-         'hash': 'adabd3b01fc7bb75c523a974f38ee3ae4e57b40f'},
-        {'name': 'SC4001EC-Hypnogram.edf',
-         'hash': '21c998eadc8b1e3ea6727d3585186b8f76e7e70b'},
         {'name': 'SC4002E0-PSG.edf',
          'hash': 'c6b6d7a8605cc7e7602b6028ee77f6fbf5f7581d'},
         {'name': 'SC4002EC-Hypnogram.edf',
-         'hash': '386230188a3552b1fc90bba0fb7476ceaca174b6'})
+         'hash': '386230188a3552b1fc90bba0fb7476ceaca174b6'},
+    )
     base_path = age.data_path(path=physionet_tmpdir)
     _check_mocked_function_calls(my_func, EXPECTED_CALLS, base_path)
 
@@ -192,8 +186,8 @@ def test_sleep_physionet_temazepam(physionet_tmpdir, monkeypatch):
     monkeypatch.setattr(pooch, 'retrieve', my_func)
 
     paths = temazepam.fetch_data(subjects=[0], path=physionet_tmpdir)
-    assert_array_equal(_keep_basename_only(paths),
-                       [['ST7011J0-PSG.edf', 'ST7011JP-Hypnogram.edf']])
+    assert _keep_basename_only(paths[0]) == \
+        ['ST7011J0-PSG.edf', 'ST7011JP-Hypnogram.edf']
 
     EXPECTED_CALLS = (
         {'name': 'ST7011J0-PSG.edf',
diff --git a/mne/datasets/tests/test_datasets.py b/mne/datasets/tests/test_datasets.py
index 8709b934326..46c1ecd229f 100644
--- a/mne/datasets/tests/test_datasets.py
+++ b/mne/datasets/tests/test_datasets.py
@@ -189,7 +189,7 @@ def test_fetch_parcellations(tmp_path):
 _zip_fnames = ['foo/foo.txt', 'foo/bar.txt', 'foo/baz.txt']
 
 
-def _fake_zip_fetch(url, path, fname, known_hash):
+def _fake_zip_fetch(url, path, fname, *args, **kwargs):
     fname = op.join(path, fname)
     with zipfile.ZipFile(fname, 'w') as zipf:
         with zipf.open('foo/', 'w'):