From 7c08e3b33805ca72c835f95591da44ced9a9b659 Mon Sep 17 00:00:00 2001
From: Eric Larson <larson.eric.d@gmail.com>
Date: Fri, 29 Jul 2022 14:56:32 -0400
Subject: [PATCH 1/5] DOC: Document make_dataset_description (#1029)

---
 examples/convert_mne_sample.py | 67 ++++++++++++++++++++++++++++++++--
 1 file changed, 64 insertions(+), 3 deletions(-)

diff --git a/examples/convert_mne_sample.py b/examples/convert_mne_sample.py
index ddb8efee95..2bcbeea268 100644
--- a/examples/convert_mne_sample.py
+++ b/examples/convert_mne_sample.py
@@ -10,7 +10,10 @@
 In this example we will use MNE-BIDS to organize the MNE sample data according
 to the BIDS standard.
 In a second step we will read the organized dataset using MNE-BIDS.
-"""  # noqa: D400 D205
+
+.. _BIDS dataset_description.json definition: https://bids-specification.readthedocs.io/en/stable/03-modality-agnostic-files.html#dataset-description
+.. _ds000248 dataset_description.json: https://github.com/sappelhoff/bids-examples/blob/master/ds000248/dataset_description.json
+"""  # noqa: D400 D205 E501
 
 # Authors: Mainak Jas <mainak.jas@telecom-paristech.fr>
 #          Alexandre Gramfort <alexandre.gramfort@telecom-paristech.fr>
@@ -24,14 +27,17 @@
 # First we import some basic Python libraries, followed by MNE-Python and its
 # sample data, and then finally the MNE-BIDS functions we need for this example
 
+import json
 import os.path as op
+from pprint import pprint
 import shutil
 
 import mne
 from mne.datasets import sample
 
 from mne_bids import (write_raw_bids, read_raw_bids, write_meg_calibration,
-                      write_meg_crosstalk, BIDSPath, print_dir_tree)
+                      write_meg_crosstalk, BIDSPath, print_dir_tree,
+                      make_dataset_description)
 from mne_bids.stats import count_events
 
 # %%
@@ -82,10 +88,11 @@
 raw.info['line_freq'] = 60
 raw_er.info['line_freq'] = 60
 
+task = 'audiovisual'
 bids_path = BIDSPath(
     subject='01',
     session='01',
-    task='audiovisual',
+    task=task,
     run='1',
     root=output_path
 )
@@ -170,3 +177,57 @@
 with open(readme, 'r', encoding='utf-8-sig') as fid:
     text = fid.read()
 print(text)
+
+# %%
+# It is also generally a good idea to add a description of your dataset,
+# see the `BIDS dataset_description.json definition`_ for more information.
+
+how_to_acknowledge = """\
+If you reference this dataset in a publication, please acknowledge its \
+authors and cite MNE papers: A. Gramfort, M. Luessi, E. Larson, D. Engemann, \
+D. Strohmeier, C. Brodbeck, L. Parkkonen, M. Hämäläinen, \
+MNE software for processing MEG and EEG data, NeuroImage, Volume 86, \
+1 February 2014, Pages 446-460, ISSN 1053-8119 \
+and \
+A. Gramfort, M. Luessi, E. Larson, D. Engemann, D. Strohmeier, C. Brodbeck, \
+R. Goj, M. Jas, T. Brooks, L. Parkkonen, M. Hämäläinen, MEG and EEG data \
+analysis with MNE-Python, Frontiers in Neuroscience, Volume 7, 2013, \
+ISSN 1662-453X"""
+
+make_dataset_description(
+    path=bids_path.root,
+    name=task,
+    authors=["Alexandre Gramfort", "Matti Hämäläinen"],
+    how_to_acknowledge=how_to_acknowledge,
+    acknowledgements="""\
+Alexandre Gramfort, Mainak Jas, and Stefan Appelhoff prepared and updated the \
+data in BIDS format.""",
+    data_license='CC0',
+    ethics_approvals=['Human Subjects Division at the University of Washington'],  # noqa: E501
+    funding=[
+        "NIH 5R01EB009048",
+        "NIH 1R01EB009048",
+        "NIH R01EB006385",
+        "NIH 1R01HD40712",
+        "NIH 1R01NS44319",
+        "NIH 2R01NS37462",
+        "NIH P41EB015896",
+        "ANR-11-IDEX-0003-02",
+        "ERC-StG-263584",
+        "ERC-StG-676943",
+        "ANR-14-NEUC-0002-01"
+    ],
+    references_and_links=[
+        "https://doi.org/10.1016/j.neuroimage.2014.02.017",
+        "https://doi.org/10.3389/fnins.2013.00267",
+        "https://mne.tools/stable/overview/datasets_index.html#sample"
+    ],
+    doi="doi:10.18112/openneuro.ds000248.v1.2.4",
+    overwrite=True
+)
+desc_json_path = bids_path.root / 'dataset_description.json'
+with open(desc_json_path, 'r', encoding='utf-8-sig') as fid:
+    pprint(json.loads(fid.read()))
+
+# %%
+# This should be very similar to the `ds000248 dataset_description.json`_!

From fc28eaeee7f124f141a048fef5bf9badca37b9a1 Mon Sep 17 00:00:00 2001
From: Scott Huberty <52462026+scott-huberty@users.noreply.github.com>
Date: Sat, 30 Jul 2022 04:12:43 -0400
Subject: [PATCH 2/5] amended `_write_electrodes_tsv` to exclude writing stim
 channels to electrodes.tsv (#1023)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* amended mne_bids.dig._write_electrodes_tsv to skip stim channels so they are not written to electrodes.tsv

* added test for properly writing to electrodes.tsv

* Update mne_bids/tests/test_dig.py

Co-authored-by: Richard Höchenberger <richard.hoechenberger@gmail.com>

* Update mne_bids/tests/test_dig.py

Co-authored-by: Richard Höchenberger <richard.hoechenberger@gmail.com>

* Update mne_bids/tests/test_dig.py

Co-authored-by: Richard Höchenberger <richard.hoechenberger@gmail.com>

* removed second test from test_electrodes_io

* updated change log

* updated test_electrodes_io per reviewer comments

* Update mne_bids/tests/test_dig.py

Co-authored-by: Mathieu Scheltienne <mathieu.scheltienne@gmail.com>

* Update doc/whats_new.rst

Co-authored-by: Mathieu Scheltienne <mathieu.scheltienne@gmail.com>

* Update mne_bids/tests/test_dig.py

Co-authored-by: Mathieu Scheltienne <mathieu.scheltienne@gmail.com>

* specifying encoding=utf-8 in  per reviewer suggestion

* Rework test a little

Co-authored-by: Richard Höchenberger <richard.hoechenberger@gmail.com>
Co-authored-by: Mathieu Scheltienne <mathieu.scheltienne@gmail.com>
---
 doc/whats_new.rst          |  2 ++
 mne_bids/dig.py            |  6 +++++-
 mne_bids/tests/test_dig.py | 25 +++++++++++++++++++++++++
 3 files changed, 32 insertions(+), 1 deletion(-)

diff --git a/doc/whats_new.rst b/doc/whats_new.rst
index 15390f719c..4ad76a9e3d 100644
--- a/doc/whats_new.rst
+++ b/doc/whats_new.rst
@@ -72,6 +72,8 @@ Detailed list of changes
 
 - :func:`~mne_bids.print_dir_tree` now correctly expands ``~`` to the user's home directory, by `Richard Höchenberger`_ (:gh:`1013`)
 
+- :func:`~mne_bids.write_raw_bids` now correctly excludes stim channels when writing to electrodes.tsv, by `Scott Huberty_` (:gh:`1023`)
+
 :doc:`Find out what was new in previous releases <whats_new_previous_releases>`
 
 .. include:: authors.rst
diff --git a/mne_bids/dig.py b/mne_bids/dig.py
index c28309567b..2214ee7927 100644
--- a/mne_bids/dig.py
+++ b/mne_bids/dig.py
@@ -138,7 +138,11 @@ def _write_electrodes_tsv(raw, fname, datatype, overwrite=False):
     # create list of channel coordinates and names
     x, y, z, names = list(), list(), list(), list()
     for ch in raw.info['chs']:
-        if (
+        if ch['kind'] == FIFF.FIFFV_STIM_CH:
+            logger.debug(f"Not writing stim chan {ch['ch_name']} "
+                         f"to electrodes.tsv")
+            continue
+        elif (
             np.isnan(ch['loc'][:3]).any() or
             np.allclose(ch['loc'][:3], 0)
         ):
diff --git a/mne_bids/tests/test_dig.py b/mne_bids/tests/test_dig.py
index da478a9886..63d3b10016 100644
--- a/mne_bids/tests/test_dig.py
+++ b/mne_bids/tests/test_dig.py
@@ -325,3 +325,28 @@ def test_convert_montage():
     assert pos['coord_frame'] == 'mri'
     assert_almost_equal(pos['ch_pos']['EEG 001'],
                         [-0.0313669, 0.0540269, 0.0949191])
+
+
+def test_electrodes_io(tmp_path):
+    """Ensure only electrodes end up in *_electrodes.json."""
+    raw = _load_raw()
+    raw.pick_types(eeg=True, stim=True)  # we don't need meg channels
+    bids_root = tmp_path / 'bids1'
+    bids_path = _bids_path.copy().update(root=bids_root, datatype='eeg')
+    write_raw_bids(raw=raw, bids_path=bids_path)
+
+    electrodes_path = (
+        bids_path.copy()
+        .update(
+            task=None,
+            run=None,
+            space='CapTrak',
+            suffix='electrodes',
+            extension='.tsv'
+        )
+    )
+    with open(electrodes_path, encoding='utf-8') as sidecar:
+        n_entries = len([line for line in sidecar
+                         if 'name' not in line])  # don't need the header
+        # only eeg chs w/ electrode pos should be written to electrodes.tsv
+        assert n_entries == len(raw.get_channel_types('eeg'))

From c3527fa9e7db0c8b8e97678eb23662fdda02201c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Richard=20H=C3=B6chenberger?=
 <richard.hoechenberger@gmail.com>
Date: Sat, 30 Jul 2022 13:12:02 +0200
Subject: [PATCH 3/5] MRG: In BIDSPath, don't infer the datatype from the
 suffix (#1030)

* In BIDSPath.update(), don't infer the datatype from the suffix

I needed to set datatype to None to make `find_emptyroom()`
deal with the inheritance principle correctly, but turns out it
never took effect because `BIDSPath.update()` would silently
replace my `datatype=None` with an inferred `datatype='meg'`.

* Update doc/whats_new.rst
---
 doc/whats_new.rst             |  2 ++
 mne_bids/path.py              | 35 ++++++++++++++++-------------------
 mne_bids/sidecar_updates.py   |  3 ++-
 mne_bids/tests/test_path.py   | 18 +++++++++---------
 mne_bids/tests/test_update.py |  7 +------
 mne_bids/tests/test_write.py  |  4 ++--
 6 files changed, 32 insertions(+), 37 deletions(-)

diff --git a/doc/whats_new.rst b/doc/whats_new.rst
index 4ad76a9e3d..f50ae7c18f 100644
--- a/doc/whats_new.rst
+++ b/doc/whats_new.rst
@@ -48,6 +48,8 @@ Detailed list of changes
 🧐 API and behavior changes
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
+- In many places, we used to infer the ``datatype`` of a :class:`~mne_bids.BIDSPath` from the ``suffix``, if not explicitly provided. However, this has lead to trouble in certain edge cases. In an effort to reduce the amount of implicit behavior in MNE-BIDS, we now require users to explicitly specify a ``datatype`` whenever the invoked functions or methods expect one, by `Richard Höchenberger`_ (:gh:`1030`)
+
 - :func:`mne_bids.make_dataset_description` now accepts keyword arguments only, and can now also write the following metadata: ``HEDVersion``, ``EthicsApprovals``, ``GeneratedBy``, and ``SourceDatasets``, by `Stefan Appelhoff`_ (:gh:`406`)
 
 - The deprecated function ``mne_bids.mark_bad_channels`` has been removed in favor of :func:`mne_bids.mark_channels`, by `Richard Höchenberger`_ (:gh:`1009`)
diff --git a/mne_bids/path.py b/mne_bids/path.py
index b7a46f592a..3eb01943a3 100644
--- a/mne_bids/path.py
+++ b/mne_bids/path.py
@@ -22,7 +22,7 @@
 from mne_bids.config import (
     ALLOWED_PATH_ENTITIES, ALLOWED_FILENAME_EXTENSIONS,
     ALLOWED_FILENAME_SUFFIX, ALLOWED_PATH_ENTITIES_SHORT,
-    ALLOWED_DATATYPES, SUFFIX_TO_DATATYPE, ALLOWED_DATATYPE_EXTENSIONS,
+    ALLOWED_DATATYPES, ALLOWED_DATATYPE_EXTENSIONS,
     ALLOWED_SPACES,
     reader, ENTITY_VALUE_TYPE)
 from mne_bids.utils import (_check_key_val, _check_empty_room_basename,
@@ -51,10 +51,6 @@ def _find_matched_empty_room(bids_path):
                          'date set. Cannot get matching empty-room file.')
 
     ref_date = raw.info['meas_date']
-    if not isinstance(ref_date, datetime):  # pragma: no cover
-        # for MNE < v0.20
-        ref_date = datetime.fromtimestamp(raw.info['meas_date'][0])
-
     emptyroom_dir = BIDSPath(root=bids_root, subject='emptyroom').directory
 
     if not emptyroom_dir.exists():
@@ -95,6 +91,7 @@ def _find_matched_empty_room(bids_path):
         er_bids_path = get_bids_path_from_fname(er_fname, check=False)
         er_bids_path.subject = 'emptyroom'  # er subject entity is different
         er_bids_path.root = bids_root
+        er_bids_path.datatype = 'meg'
         er_meas_date = None
 
         # Try to extract date from filename.
@@ -229,7 +226,7 @@ class BIDSPath(object):
     Generate a BIDSPath object and inspect it
 
     >>> bids_path = BIDSPath(subject='test', session='two', task='mytask',
-    ...                      suffix='ieeg', extension='.edf')
+    ...                      suffix='ieeg', extension='.edf', datatype='ieeg')
     >>> print(bids_path.basename)
     sub-test_ses-two_task-mytask_ieeg.edf
     >>> bids_path
@@ -733,11 +730,6 @@ def update(self, *, check=None, **kwargs):
                 getattr(self, f'{key}') if hasattr(self, f'_{key}') else None
             setattr(self, f'_{key}', val)
 
-        # infer datatype if suffix is uniquely the datatype
-        if self.datatype is None and \
-                self.suffix in SUFFIX_TO_DATATYPE:
-            self._datatype = SUFFIX_TO_DATATYPE[self.suffix]
-
         # Perform a check of the entities and revert changes if check fails
         try:
             self._check()
@@ -899,7 +891,11 @@ def find_empty_room(self, use_sidecar_only=False, verbose=None):
                              'Please use `bids_path.update(root="<root>")` '
                              'to set the root of the BIDS folder to read.')
 
-        sidecar_fname = _find_matching_sidecar(self, extension='.json')
+        sidecar_fname = _find_matching_sidecar(
+            # needed to deal with inheritance principle
+            self.copy().update(datatype=None),
+            extension='.json'
+        )
         with open(sidecar_fname, 'r', encoding='utf-8') as f:
             sidecar_json = json.load(f)
 
@@ -1237,14 +1233,15 @@ def _parse_ext(raw_fname):
     return fname, ext
 
 
-def _infer_datatype_from_path(fname):
+def _infer_datatype_from_path(fname: Path):
     # get the parent
-    datatype = Path(fname).parent.name
-
-    if any([datatype.startswith(entity) for entity in ['sub', 'ses']]):
-        datatype = None
-
-    if not datatype:
+    if fname.exists():
+        datatype = fname.parent.name
+        if any([datatype.startswith(entity) for entity in ['sub', 'ses']]):
+            datatype = None
+    elif fname.stem.split('_')[-1] in ('meg', 'eeg', 'ieeg'):
+        datatype = fname.stem.split('_')[-1]
+    else:
         datatype = None
 
     return datatype
diff --git a/mne_bids/sidecar_updates.py b/mne_bids/sidecar_updates.py
index ae8e0d405a..45019a59e9 100644
--- a/mne_bids/sidecar_updates.py
+++ b/mne_bids/sidecar_updates.py
@@ -85,7 +85,8 @@ def update_sidecar_json(bids_path, entries, verbose=None):
     >>> from pathlib import Path
     >>> root = Path('./mne_bids/tests/data/tiny_bids').absolute()
     >>> bids_path = BIDSPath(subject='01', task='rest', session='eeg',
-    ...                      suffix='eeg', extension='.json', root=root)
+    ...                      suffix='eeg', extension='.json', datatype='eeg',
+    ...                      root=root)
     >>> entries = {'PowerLineFrequency': 60}
     >>> update_sidecar_json(bids_path, entries, verbose=False)
 
diff --git a/mne_bids/tests/test_path.py b/mne_bids/tests/test_path.py
index 49df3d225e..25ff35d68d 100644
--- a/mne_bids/tests/test_path.py
+++ b/mne_bids/tests/test_path.py
@@ -260,10 +260,8 @@ def test_parse_ext():
 @pytest.mark.parametrize('fname', [
     'sub-01_ses-02_task-test_run-3_split-01_meg.fif',
     'sub-01_ses-02_task-test_run-3_split-01',
-    ('/bids_root/sub-01/ses-02/meg/' +
-     'sub-01_ses-02_task-test_run-3_split-01_meg.fif'),
-    ('sub-01/ses-02/meg/' +
-     'sub-01_ses-02_task-test_run-3_split-01_meg.fif')
+    '/bids_root/sub-01/ses-02/meg/sub-01_ses-02_task-test_run-3_split-01_meg.fif',  # noqa: E501
+    'sub-01/ses-02/meg/sub-01_ses-02_task-test_run-3_split-01_meg.fif'
 ])
 def test_get_bids_path_from_fname(fname):
     bids_path = get_bids_path_from_fname(fname)
@@ -598,7 +596,7 @@ def test_bids_path(return_bids_test_dir):
     # ... but raises an error with check=True
     match = r'space \(foo\) is not valid for datatype \(eeg\)'
     with pytest.raises(ValueError, match=match):
-        BIDSPath(subject=subject_id, space='foo', suffix='eeg')
+        BIDSPath(subject=subject_id, space='foo', suffix='eeg', datatype='eeg')
 
     # error check on space for datatypes that do not support space
     match = 'space entity is not valid for datatype anat'
@@ -612,7 +610,8 @@ def test_bids_path(return_bids_test_dir):
         bids_path_tmpcopy.update(space='CapTrak', check=True)
 
     # making a valid space update works
-    bids_path_tmpcopy.update(suffix='eeg', space="CapTrak", check=True)
+    bids_path_tmpcopy.update(suffix='eeg', datatype='eeg',
+                             space="CapTrak", check=True)
 
     # suffix won't be error checks if initial check was false
     bids_path.update(suffix=suffix)
@@ -628,7 +627,7 @@ def test_bids_path(return_bids_test_dir):
 
     # test repr
     bids_path = BIDSPath(subject='01', session='02',
-                         task='03', suffix='ieeg',
+                         task='03', suffix='ieeg', datatype='ieeg',
                          extension='.edf')
     assert repr(bids_path) == ('BIDSPath(\n'
                                'root: None\n'
@@ -680,7 +679,8 @@ def test_make_filenames():
     # All keys work
     prefix_data = dict(subject='one', session='two', task='three',
                        acquisition='four', run=1, processing='six',
-                       recording='seven', suffix='ieeg', extension='.json')
+                       recording='seven', suffix='ieeg', extension='.json',
+                       datatype='ieeg')
     expected_str = ('sub-one_ses-two_task-three_acq-four_run-01_proc-six_'
                     'rec-seven_ieeg.json')
     assert BIDSPath(**prefix_data).basename == expected_str
@@ -974,7 +974,7 @@ def test_find_emptyroom_ties(tmp_path):
                         'sample_audvis_trunc_raw.fif')
 
     bids_root = str(tmp_path)
-    bids_path = _bids_path.copy().update(root=bids_root)
+    bids_path = _bids_path.copy().update(root=bids_root, datatype='meg')
     session = '20010101'
     er_dir_path = BIDSPath(subject='emptyroom', session=session,
                            datatype='meg', root=bids_root)
diff --git a/mne_bids/tests/test_update.py b/mne_bids/tests/test_update.py
index 87d96949aa..59f78cfd62 100644
--- a/mne_bids/tests/test_update.py
+++ b/mne_bids/tests/test_update.py
@@ -107,7 +107,7 @@ def test_update_sidecar_jsons(_get_bids_test_dir, _bids_validate,
                        ('SEEGChannelCount', None, 0)]
 
     # get the sidecar json
-    sidecar_path = bids_path.copy().update(extension='.json')
+    sidecar_path = bids_path.copy().update(extension='.json', datatype='meg')
     sidecar_fpath = sidecar_path.fpath
     with open(sidecar_fpath, 'r', encoding='utf-8') as fin:
         sidecar_json = json.load(fin)
@@ -212,11 +212,6 @@ def test_update_anat_landmarks(tmp_path):
     update_anat_landmarks(bids_path=bids_path_mri_no_ext,
                           landmarks=landmarks_new)
 
-    # Check without datatytpe provided
-    bids_path_mri_no_datatype = bids_path_mri.copy().update(datatype=None)
-    update_anat_landmarks(bids_path=bids_path_mri_no_datatype,
-                          landmarks=landmarks)
-
     # Check handling of invalid input
     bids_path_invalid = bids_path_mri.copy().update(datatype='meg')
     with pytest.raises(ValueError, match='Can only operate on "anat"'):
diff --git a/mne_bids/tests/test_write.py b/mne_bids/tests/test_write.py
index 6e79a059f3..334e6e0bb6 100644
--- a/mne_bids/tests/test_write.py
+++ b/mne_bids/tests/test_write.py
@@ -446,7 +446,7 @@ def test_line_freq(line_freq, _bids_validate, tmp_path):
     _bids_validate(bids_root)
 
     eeg_json_fpath = (bids_path.copy()
-                      .update(suffix='eeg', extension='.json')
+                      .update(suffix='eeg', datatype='eeg', extension='.json')
                       .fpath)
     with open(eeg_json_fpath, 'r', encoding='utf-8') as fin:
         eeg_json = json.load(fin)
@@ -916,7 +916,7 @@ def test_kit(_bids_validate, tmp_path):
     # ensure the marker file is produced in the right place
     marker_fname = BIDSPath(
         subject=subject_id, session=session_id, task=task, run=run,
-        suffix='markers', extension='.sqd',
+        suffix='markers', extension='.sqd', datatype='meg',
         root=bids_root)
     assert op.exists(marker_fname)
 

From 158df4499a7843b75fc2894cd4f539b367a6e5c8 Mon Sep 17 00:00:00 2001
From: Mathieu Scheltienne <mathieu.scheltienne@gmail.com>
Date: Mon, 1 Aug 2022 07:55:52 +0200
Subject: [PATCH 4/5] [MRG, DOC] Fix whats_new.rst link (#1032)

---
 doc/authors.rst   | 1 +
 doc/whats_new.rst | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/doc/authors.rst b/doc/authors.rst
index 3c97151743..497aaa980c 100644
--- a/doc/authors.rst
+++ b/doc/authors.rst
@@ -33,3 +33,4 @@
 .. _Simon Kern: https://github.com/skjerns
 .. _Yorguin Mantilla: https://github.com/yjmantilla
 .. _Swastika Gupta: https://swastyy.github.io
+.. _Scott Huberty: https://github.com/scott-huberty
diff --git a/doc/whats_new.rst b/doc/whats_new.rst
index f50ae7c18f..4b6554111e 100644
--- a/doc/whats_new.rst
+++ b/doc/whats_new.rst
@@ -74,7 +74,7 @@ Detailed list of changes
 
 - :func:`~mne_bids.print_dir_tree` now correctly expands ``~`` to the user's home directory, by `Richard Höchenberger`_ (:gh:`1013`)
 
-- :func:`~mne_bids.write_raw_bids` now correctly excludes stim channels when writing to electrodes.tsv, by `Scott Huberty_` (:gh:`1023`)
+- :func:`~mne_bids.write_raw_bids` now correctly excludes stim channels when writing to electrodes.tsv, by `Scott Huberty`_ (:gh:`1023`)
 
 :doc:`Find out what was new in previous releases <whats_new_previous_releases>`
 

From b90dde853bba279a9985ec6e565e29fecd248d94 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Richard=20H=C3=B6chenberger?=
 <richard.hoechenberger@gmail.com>
Date: Mon, 1 Aug 2022 08:23:53 +0200
Subject: [PATCH 5/5] MRG: Don't store invalid weight and height in
 info['subject_info']; write weight and height to participants.tsv (#1031)

---
 doc/whats_new.rst           |  4 ++
 mne_bids/read.py            | 23 +++++++---
 mne_bids/tests/test_read.py | 26 ++++++++---
 mne_bids/write.py           | 90 ++++++++++++++++++++++++++++---------
 4 files changed, 108 insertions(+), 35 deletions(-)

diff --git a/doc/whats_new.rst b/doc/whats_new.rst
index 4b6554111e..a5de6ada21 100644
--- a/doc/whats_new.rst
+++ b/doc/whats_new.rst
@@ -45,6 +45,8 @@ Detailed list of changes
 
 - You can now write raw data and an associated empty-room recording with just a single call to :func:`mne_bids.write_raw_bids`: the ``empty_room`` parameter now also accepts an :class:`mne.io.Raw` data object. The empty-room session name will be derived from the recording date automatically, by `Richard Höchenberger`_ (:gh:`998`)
 
+- :func:`~mne_bids.write_raw_bids` now stores participant weight and height in ``participants.tsv``, by `Richard Höchenberger`_ (:gh:`1031`)
+
 🧐 API and behavior changes
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
@@ -76,6 +78,8 @@ Detailed list of changes
 
 - :func:`~mne_bids.write_raw_bids` now correctly excludes stim channels when writing to electrodes.tsv, by `Scott Huberty`_ (:gh:`1023`)
 
+- :func:`~mne_bids.read_raw_bids` doesn't populate ``raw.info['subject_info']`` with invalid values anymore, preventing users from writing the data to disk again, by `Richard Höchenberger`_ (:gh:`1031`)
+
 :doc:`Find out what was new in previous releases <whats_new_previous_releases>`
 
 .. include:: authors.rst
diff --git a/mne_bids/read.py b/mne_bids/read.py
index f653dcdf36..32c4ad57f6 100644
--- a/mne_bids/read.py
+++ b/mne_bids/read.py
@@ -194,10 +194,11 @@ def _handle_participants_reading(participants_fname, raw, subject):
     participants_tsv = _from_tsv(participants_fname)
     subjects = participants_tsv['participant_id']
     row_ind = subjects.index(subject)
+    raw.info['subject_info'] = dict()  # start from scratch
 
     # set data from participants tsv into subject_info
     for col_name, value in participants_tsv.items():
-        if col_name == 'sex' or col_name == 'hand':
+        if col_name in ('sex', 'hand'):
             value = _map_options(what=col_name, key=value[row_ind],
                                  fro='bids', to='mne')
             # We don't know how to translate to MNE, so skip.
@@ -206,15 +207,24 @@ def _handle_participants_reading(participants_fname, raw, subject):
                     info_str = 'subject sex'
                 else:
                     info_str = 'subject handedness'
-                warn(f'Unable to map `{col_name}` value to MNE. '
+                warn(f'Unable to map "{col_name}" value "{value}" to MNE. '
                      f'Not setting {info_str}.')
+        elif col_name in ('height', 'weight'):
+            try:
+                value = float(value[row_ind])
+            except ValueError:
+                value = None
         else:
-            value = value[row_ind]
+            if value[row_ind] == 'n/a':
+                value = None
+            else:
+                value = value[row_ind]
+
         # add data into raw.Info
-        if raw.info['subject_info'] is None:
-            raw.info['subject_info'] = dict()
         key = 'his_id' if col_name == 'participant_id' else col_name
-        raw.info['subject_info'][key] = value
+        if value is not None:
+            assert key not in raw.info['subject_info']
+            raw.info['subject_info'][key] = value
 
     return raw
 
@@ -763,6 +773,7 @@ def read_raw_bids(bids_path, extra_params=None, verbose=None):
         )
     else:
         warn(f"participants.tsv file not found for {raw_path}")
+        raw.info['subject_info'] = dict()
 
     assert raw.annotations.orig_time == raw.info['meas_date']
     return raw
diff --git a/mne_bids/tests/test_read.py b/mne_bids/tests/test_read.py
index 3a33e5c697..7d52c78875 100644
--- a/mne_bids/tests/test_read.py
+++ b/mne_bids/tests/test_read.py
@@ -120,13 +120,15 @@ def test_read_participants_data(tmp_path):
     subject_info = {
         'hand': 1,
         'sex': 2,
+        'weight': 70.5,
+        'height': 180.5
     }
     raw.info['subject_info'] = subject_info
     write_raw_bids(raw, bids_path, overwrite=True, verbose=False)
     raw = read_raw_bids(bids_path=bids_path)
-    print(raw.info['subject_info'])
     assert raw.info['subject_info']['hand'] == 1
-    assert raw.info['subject_info']['sex'] == 2
+    assert raw.info['subject_info']['weight'] == 70.5
+    assert raw.info['subject_info']['height'] == 180.5
     assert raw.info['subject_info'].get('birthday', None) is None
     assert raw.info['subject_info']['his_id'] == f'sub-{bids_path.subject}'
     assert 'participant_id' not in raw.info['subject_info']
@@ -139,25 +141,35 @@ def test_read_participants_data(tmp_path):
     raw = read_raw_bids(bids_path=bids_path)
     assert raw.info['subject_info']['hand'] == 0
     assert raw.info['subject_info']['sex'] == 2
+    assert raw.info['subject_info']['weight'] == 70.5
+    assert raw.info['subject_info']['height'] == 180.5
     assert raw.info['subject_info'].get('birthday', None) is None
 
     # make sure things are read even if the entries don't make sense
     participants_tsv = _from_tsv(participants_tsv_fpath)
     participants_tsv['hand'][0] = 'righty'
     participants_tsv['sex'][0] = 'malesy'
+    # 'n/a' values should get omitted
+    participants_tsv['weight'] = ['n/a']
+    participants_tsv['height'] = ['tall']
+
     _to_tsv(participants_tsv, participants_tsv_fpath)
     with pytest.warns(RuntimeWarning, match='Unable to map'):
         raw = read_raw_bids(bids_path=bids_path)
-        assert raw.info['subject_info']['hand'] is None
-        assert raw.info['subject_info']['sex'] is None
 
-    # make sure to read in if no participants file
+    assert 'hand' not in raw.info['subject_info']
+    assert 'sex' not in raw.info['subject_info']
+    assert 'weight' not in raw.info['subject_info']
+    assert 'height' not in raw.info['subject_info']
+
+    # test reading if participants.tsv is missing
     raw = _read_raw_fif(raw_fname, verbose=False)
     write_raw_bids(raw, bids_path, overwrite=True, verbose=False)
-    os.remove(participants_tsv_fpath)
+    participants_tsv_fpath.unlink()
     with pytest.warns(RuntimeWarning, match='participants.tsv file not found'):
         raw = read_raw_bids(bids_path=bids_path)
-        assert raw.info['subject_info'] is None
+
+    assert raw.info['subject_info'] == dict()
 
 
 @pytest.mark.parametrize(
diff --git a/mne_bids/write.py b/mne_bids/write.py
index 4bc51eca0f..5469df1d09 100644
--- a/mne_bids/write.py
+++ b/mne_bids/write.py
@@ -341,10 +341,13 @@ def _participants_tsv(raw, subject_id, fname, overwrite=False):
         False, an error will be raised.
 
     """
-    subject_age = "n/a"
-    sex = "n/a"
+    subject_age = 'n/a'
+    sex = 'n/a'
     hand = 'n/a'
+    weight = 'n/a'
+    height = 'n/a'
     subject_info = raw.info.get('subject_info', None)
+
     if subject_id != 'emptyroom' and subject_info is not None:
         # add sex
         sex = _map_options(what='sex', key=subject_info.get('sex', 0),
@@ -371,18 +374,34 @@ def _participants_tsv(raw, subject_id, fname, overwrite=False):
         else:
             subject_age = "n/a"
 
+        # add weight and height
+        weight = subject_info.get('weight', 'n/a')
+        height = subject_info.get('height', 'n/a')
+
     subject_id = 'sub-' + subject_id
     data = OrderedDict(participant_id=[subject_id])
-    data.update({'age': [subject_age], 'sex': [sex], 'hand': [hand]})
+    data.update({
+        'age': [subject_age],
+        'sex': [sex],
+        'hand': [hand],
+        'weight': [weight],
+        'height': [height]
+    })
 
     if os.path.exists(fname):
         orig_data = _from_tsv(fname)
         # whether the new data exists identically in the previous data
-        exact_included = _contains_row(orig_data,
-                                       {'participant_id': subject_id,
-                                        'age': subject_age,
-                                        'sex': sex,
-                                        'hand': hand})
+        exact_included = _contains_row(
+            data=orig_data,
+            row_data={
+                'participant_id': subject_id,
+                'age': subject_age,
+                'sex': sex,
+                'hand': hand,
+                'weight': weight,
+                'height': height
+            }
+        )
         # whether the subject id is in the previous data
         sid_included = subject_id in orig_data['participant_id']
         # if the subject data provided is different to the currently existing
@@ -440,26 +459,53 @@ def _participants_json(fname, overwrite=False):
         an error will be raised.
 
     """
-    cols = OrderedDict()
-    cols['participant_id'] = {'Description': 'Unique participant identifier'}
-    cols['age'] = {'Description': 'Age of the participant at time of testing',
-                   'Units': 'years'}
-    cols['sex'] = {'Description': 'Biological sex of the participant',
-                   'Levels': {'F': 'female', 'M': 'male'}}
-    cols['hand'] = {'Description': 'Handedness of the participant',
-                    'Levels': {'R': 'right', 'L': 'left', 'A': 'ambidextrous'}}
+    data = {
+        'participant_id': {
+            'Description': 'Unique participant identifier'
+        },
+        'age': {
+            'Description': 'Age of the participant at time of testing',
+            'Units': 'years'
+        },
+        'sex': {
+            'Description': 'Biological sex of the participant',
+            'Levels': {
+                'F': 'female',
+                'M': 'male'
+            }
+        },
+        'hand': {
+            'Description': 'Handedness of the participant',
+            'Levels': {
+                'R': 'right',
+                'L': 'left',
+                'A': 'ambidextrous'
+            }
+        },
+        'weight': {
+            'Description': 'Body weight of the participant',
+            'Units': 'kg'
+        },
+        'height': {
+            'Description': 'Body height of the participant',
+            'Units': 'm'
+        }
+    }
 
     # make sure to append any JSON fields added by the user
     # Note: mne-bids will overwrite age, sex and hand fields
     # if `overwrite` is True
-    if op.exists(fname):
-        with open(fname, 'r', encoding='utf-8-sig') as fin:
-            orig_cols = json.load(fin, object_pairs_hook=OrderedDict)
+    fname = Path(fname)
+    if fname.exists():
+        orig_cols = json.loads(
+            fname.read_text(encoding='utf-8'),
+            object_pairs_hook=OrderedDict
+        )
         for key, val in orig_cols.items():
-            if key not in cols:
-                cols[key] = val
+            if key not in data:
+                data[key] = val
 
-    _write_json(fname, cols, overwrite)
+    _write_json(fname, data, overwrite)
 
 
 def _scans_tsv(raw, raw_fname, fname, keep_source, overwrite=False):