Progress bars (#258)

* Progress bars for downloads using pooch functionality * Rectification of f-string in PhysionetMI * Evaluations subject level progress bar CV test subject level in the case of CrossSubjectEvaluation * Update poetry.lock * Update pyproject.toml * dependencies * Apply suggestions from code review (mne.utils to tqdm direct) Co-authored-by: Sylvain Chevallier <sylvain.chevallier@uvsq.fr> * Update poetry.lock * tqdm arg * Update whats_new.rst * Update mistune dep Co-authored-by: Sylvain Chevallier <sylvain.chevallier@uvsq.fr>
NeuroTechX · Feb 2, 2022 · 3e7fc04 · 3e7fc04
1 parent c060a4a
commit 3e7fc04
Show file tree

Hide file tree

Showing 10 changed files with 317 additions and 200 deletions.
diff --git a/.github/workflows/test-devel.yml b/.github/workflows/test-devel.yml
@@ -27,7 +27,7 @@ jobs:
           python-version: ${{ matrix.python-version }}
 
       - name: Install Poetry
-        uses: snok/install-poetry@v1.1.6
+        uses: snok/install-poetry@v1
         with:
           virtualenvs-create: true
           virtualenvs-in-project: true

diff --git a/docs/source/whats_new.rst b/docs/source/whats_new.rst
@@ -18,6 +18,7 @@ Develop branch
 Enhancements
 ~~~~~~~~~~~~
 
+- Progress bars, pooch, tqdm (:gh:`258` by `Divyesh Narayanan`_ and `Sylvain Chevallier`_)
 - Adding Test and Example for set_download_dir (:gh:`249` by `Divyesh Narayanan`_)
 
 Bugs

diff --git a/moabb/datasets/Weibo2014.py b/moabb/datasets/Weibo2014.py
@@ -32,7 +32,14 @@ def eeg_data_path(base_path, subject):
     def get_subjects(sub_inds, sub_names, ind):
         dataname = "data{}".format(ind)
         if not os.path.isfile(os.path.join(base_path, dataname + ".zip")):
-            retrieve(FILES[ind], None, dataname + ".zip", base_path, processor=Unzip())
+            retrieve(
+                FILES[ind],
+                None,
+                dataname + ".zip",
+                base_path,
+                processor=Unzip(),
+                progressbar=True,
+            )
 
         for fname in os.listdir(os.path.join(base_path, dataname + ".zip.unzip")):
             for ind, prefix in zip(sub_inds, sub_names):

diff --git a/moabb/datasets/Zhou2016.py b/moabb/datasets/Zhou2016.py
@@ -22,7 +22,7 @@
 def local_data_path(base_path, subject):
     if not os.path.isdir(os.path.join(base_path, "subject_{}".format(subject))):
         if not os.path.isdir(os.path.join(base_path, "data")):
-            retrieve(DATA_PATH, None, fname="data.zip", path=base_path)
+            retrieve(DATA_PATH, None, fname="data.zip", path=base_path, progressbar=True)
             with z.ZipFile(os.path.join(base_path, "data.zip"), "r") as f:
                 f.extractall(base_path)
             os.remove(os.path.join(base_path, "data.zip"))

diff --git a/moabb/datasets/bbci_eeg_fnirs.py b/moabb/datasets/bbci_eeg_fnirs.py
@@ -41,6 +41,7 @@ def eeg_data_path(base_path, subject, accept):
                         None,
                         fname="EEG.zip",
                         path=base_path,
+                        progressbar=True,
                     )
                 with z.ZipFile(op.join(base_path, "EEG.zip"), "r") as f:
                     f.extractall(op.join(base_path, "EEG"))
@@ -65,6 +66,7 @@ def fnirs_data_path(path, subject, accept):
                 None,
                 fname="fNIRS.zip",
                 path=path,
+                progressbar=True,
             )
         if not op.isdir(op.join(path, "NIRS")):
             os.makedirs(op.join(path, "NIRS"))

diff --git a/moabb/datasets/download.py b/moabb/datasets/download.py
@@ -146,7 +146,11 @@ def data_dl(url, sign, path=None, force_update=False, verbose=None):
     else:
         known_hash = file_hash(destination)
     dlpath = retrieve(
-        url, known_hash, fname=osp.basename(url), path=osp.dirname(destination)
+        url,
+        known_hash,
+        fname=osp.basename(url),
+        path=osp.dirname(destination),
+        progressbar=True,
     )
     return dlpath
 

diff --git a/moabb/datasets/physionet_mi.py b/moabb/datasets/physionet_mi.py
@@ -123,7 +123,7 @@ def _get_single_subject_data(self, subject):
             stim[stim == "T1"] = "left_hand"
             stim[stim == "T2"] = "right_hand"
             raw.annotations.description = stim
-            data["run_{idx}"] = raw
+            data[f"run_{idx}"] = raw
             idx += 1
 
         # feet runs
@@ -136,7 +136,7 @@ def _get_single_subject_data(self, subject):
             stim[stim == "T1"] = "hands"
             stim[stim == "T2"] = "feet"
             raw.annotations.description = stim
-            data["run_{idx}"] = raw
+            data[f"run_{idx}"] = raw
             idx += 1
 
         return {"session_0": data}

diff --git a/moabb/evaluations/evaluations.py b/moabb/evaluations/evaluations.py
@@ -15,6 +15,7 @@
 )
 from sklearn.model_selection._validation import _fit_and_score, _score
 from sklearn.preprocessing import LabelEncoder
+from tqdm import tqdm
 
 from moabb.evaluations.base import BaseEvaluation
 
@@ -122,7 +123,8 @@ def __init__(
             super().__init__(**kwargs)
 
     def _evaluate(self, dataset, pipelines):
-        for subject in dataset.subject_list:
+        # Progress Bar at subject level
+        for subject in tqdm(dataset.subject_list, desc=f"{dataset.code}-WithinSession"):
             # check if we already have result for this subject/pipeline
             # we might need a better granularity, if we query the DB
             run_pipes = self.results.not_yet_computed(pipelines, dataset, subject)
@@ -235,7 +237,8 @@ def score_explicit(self, clf, X_train, y_train, X_test, y_test):
         return score, duration
 
     def _evaluate_learning_curve(self, dataset, pipelines):
-        for subject in dataset.subject_list:
+        # Progressbar at subject level
+        for subject in tqdm(dataset.subject_list, desc=f"{dataset.code}-WithinSession"):
             # check if we already have result for this subject/pipeline
             # we might need a better granularity, if we query the DB
             run_pipes = self.results.not_yet_computed(pipelines, dataset, subject)
@@ -356,7 +359,8 @@ class CrossSessionEvaluation(BaseEvaluation):
     def evaluate(self, dataset, pipelines):
         if not self.is_valid(dataset):
             raise AssertionError("Dataset is not appropriate for evaluation")
-        for subject in dataset.subject_list:
+        # Progressbar at subject level
+        for subject in tqdm(dataset.subject_list, desc=f"{dataset.code}-CrossSession"):
             # check if we already have result for this subject/pipeline
             # we might need a better granularity, if we query the DB
             run_pipes = self.results.not_yet_computed(pipelines, dataset, subject)
@@ -471,12 +475,18 @@ def evaluate(self, dataset, pipelines):
             # extract metadata
             groups = metadata.subject.values
             sessions = metadata.session.values
+            n_subjects = len(dataset.subject_list)
 
             scorer = get_scorer(self.paradigm.scoring)
 
             # perform leave one subject out CV
             cv = LeaveOneGroupOut()
-            for train, test in cv.split(X, y, groups):
+            # Progressbar at subject level
+            for train, test in tqdm(
+                cv.split(X, y, groups),
+                total=n_subjects,
+                desc=f"{dataset.code}-CrossSubject",
+            ):
 
                 subject = groups[test[0]]
                 # now we can check if this subject has results