Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

MRG: In BIDSPath, don't infer the datatype from the suffix #1030

Merged
merged 2 commits into from
Jul 30, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/whats_new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@ Detailed list of changes
🧐 API and behavior changes
^^^^^^^^^^^^^^^^^^^^^^^^^^^

- In many places, we used to infer the ``datatype`` of a :class:`~mne_bids.BIDSPath` from the ``suffix``, if not explicitly provided. However, this has lead to trouble in certain edge cases. In an effort to reduce the amount of implicit behavior in MNE-BIDS, we now require users to explicitly specify a ``datatype`` whenever the invoked functions or methods expect one, by `Richard Höchenberger`_ (:gh:`1030`)

- :func:`mne_bids.make_dataset_description` now accepts keyword arguments only, and can now also write the following metadata: ``HEDVersion``, ``EthicsApprovals``, ``GeneratedBy``, and ``SourceDatasets``, by `Stefan Appelhoff`_ (:gh:`406`)

- The deprecated function ``mne_bids.mark_bad_channels`` has been removed in favor of :func:`mne_bids.mark_channels`, by `Richard Höchenberger`_ (:gh:`1009`)
Expand Down
35 changes: 16 additions & 19 deletions mne_bids/path.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
from mne_bids.config import (
ALLOWED_PATH_ENTITIES, ALLOWED_FILENAME_EXTENSIONS,
ALLOWED_FILENAME_SUFFIX, ALLOWED_PATH_ENTITIES_SHORT,
ALLOWED_DATATYPES, SUFFIX_TO_DATATYPE, ALLOWED_DATATYPE_EXTENSIONS,
ALLOWED_DATATYPES, ALLOWED_DATATYPE_EXTENSIONS,
ALLOWED_SPACES,
reader, ENTITY_VALUE_TYPE)
from mne_bids.utils import (_check_key_val, _check_empty_room_basename,
Expand Down Expand Up @@ -51,10 +51,6 @@ def _find_matched_empty_room(bids_path):
'date set. Cannot get matching empty-room file.')

ref_date = raw.info['meas_date']
if not isinstance(ref_date, datetime): # pragma: no cover
# for MNE < v0.20
ref_date = datetime.fromtimestamp(raw.info['meas_date'][0])

emptyroom_dir = BIDSPath(root=bids_root, subject='emptyroom').directory

if not emptyroom_dir.exists():
Expand Down Expand Up @@ -95,6 +91,7 @@ def _find_matched_empty_room(bids_path):
er_bids_path = get_bids_path_from_fname(er_fname, check=False)
er_bids_path.subject = 'emptyroom' # er subject entity is different
er_bids_path.root = bids_root
er_bids_path.datatype = 'meg'
er_meas_date = None

# Try to extract date from filename.
Expand Down Expand Up @@ -229,7 +226,7 @@ class BIDSPath(object):
Generate a BIDSPath object and inspect it

>>> bids_path = BIDSPath(subject='test', session='two', task='mytask',
... suffix='ieeg', extension='.edf')
... suffix='ieeg', extension='.edf', datatype='ieeg')
>>> print(bids_path.basename)
sub-test_ses-two_task-mytask_ieeg.edf
>>> bids_path
Expand Down Expand Up @@ -733,11 +730,6 @@ def update(self, *, check=None, **kwargs):
getattr(self, f'{key}') if hasattr(self, f'_{key}') else None
setattr(self, f'_{key}', val)

# infer datatype if suffix is uniquely the datatype
if self.datatype is None and \
self.suffix in SUFFIX_TO_DATATYPE:
self._datatype = SUFFIX_TO_DATATYPE[self.suffix]

# Perform a check of the entities and revert changes if check fails
try:
self._check()
Expand Down Expand Up @@ -899,7 +891,11 @@ def find_empty_room(self, use_sidecar_only=False, verbose=None):
'Please use `bids_path.update(root="<root>")` '
'to set the root of the BIDS folder to read.')

sidecar_fname = _find_matching_sidecar(self, extension='.json')
sidecar_fname = _find_matching_sidecar(
# needed to deal with inheritance principle
self.copy().update(datatype=None),
extension='.json'
)
with open(sidecar_fname, 'r', encoding='utf-8') as f:
sidecar_json = json.load(f)

Expand Down Expand Up @@ -1237,14 +1233,15 @@ def _parse_ext(raw_fname):
return fname, ext


def _infer_datatype_from_path(fname):
def _infer_datatype_from_path(fname: Path):
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this function is used by get_bids_path_from_fname()

IMHO it's doing too much magic too, but here I only ensured it keeps on working

# get the parent
datatype = Path(fname).parent.name

if any([datatype.startswith(entity) for entity in ['sub', 'ses']]):
datatype = None

if not datatype:
if fname.exists():
datatype = fname.parent.name
if any([datatype.startswith(entity) for entity in ['sub', 'ses']]):
datatype = None
elif fname.stem.split('_')[-1] in ('meg', 'eeg', 'ieeg'):
datatype = fname.stem.split('_')[-1]
else:
datatype = None

return datatype
Expand Down
3 changes: 2 additions & 1 deletion mne_bids/sidecar_updates.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,8 @@ def update_sidecar_json(bids_path, entries, verbose=None):
>>> from pathlib import Path
>>> root = Path('./mne_bids/tests/data/tiny_bids').absolute()
>>> bids_path = BIDSPath(subject='01', task='rest', session='eeg',
... suffix='eeg', extension='.json', root=root)
... suffix='eeg', extension='.json', datatype='eeg',
... root=root)
>>> entries = {'PowerLineFrequency': 60}
>>> update_sidecar_json(bids_path, entries, verbose=False)

Expand Down
18 changes: 9 additions & 9 deletions mne_bids/tests/test_path.py
Original file line number Diff line number Diff line change
Expand Up @@ -260,10 +260,8 @@ def test_parse_ext():
@pytest.mark.parametrize('fname', [
'sub-01_ses-02_task-test_run-3_split-01_meg.fif',
'sub-01_ses-02_task-test_run-3_split-01',
('/bids_root/sub-01/ses-02/meg/' +
'sub-01_ses-02_task-test_run-3_split-01_meg.fif'),
('sub-01/ses-02/meg/' +
'sub-01_ses-02_task-test_run-3_split-01_meg.fif')
'/bids_root/sub-01/ses-02/meg/sub-01_ses-02_task-test_run-3_split-01_meg.fif', # noqa: E501
'sub-01/ses-02/meg/sub-01_ses-02_task-test_run-3_split-01_meg.fif'
])
def test_get_bids_path_from_fname(fname):
bids_path = get_bids_path_from_fname(fname)
Expand Down Expand Up @@ -598,7 +596,7 @@ def test_bids_path(return_bids_test_dir):
# ... but raises an error with check=True
match = r'space \(foo\) is not valid for datatype \(eeg\)'
with pytest.raises(ValueError, match=match):
BIDSPath(subject=subject_id, space='foo', suffix='eeg')
BIDSPath(subject=subject_id, space='foo', suffix='eeg', datatype='eeg')

# error check on space for datatypes that do not support space
match = 'space entity is not valid for datatype anat'
Expand All @@ -612,7 +610,8 @@ def test_bids_path(return_bids_test_dir):
bids_path_tmpcopy.update(space='CapTrak', check=True)

# making a valid space update works
bids_path_tmpcopy.update(suffix='eeg', space="CapTrak", check=True)
bids_path_tmpcopy.update(suffix='eeg', datatype='eeg',
space="CapTrak", check=True)

# suffix won't be error checks if initial check was false
bids_path.update(suffix=suffix)
Expand All @@ -628,7 +627,7 @@ def test_bids_path(return_bids_test_dir):

# test repr
bids_path = BIDSPath(subject='01', session='02',
task='03', suffix='ieeg',
task='03', suffix='ieeg', datatype='ieeg',
extension='.edf')
assert repr(bids_path) == ('BIDSPath(\n'
'root: None\n'
Expand Down Expand Up @@ -680,7 +679,8 @@ def test_make_filenames():
# All keys work
prefix_data = dict(subject='one', session='two', task='three',
acquisition='four', run=1, processing='six',
recording='seven', suffix='ieeg', extension='.json')
recording='seven', suffix='ieeg', extension='.json',
datatype='ieeg')
expected_str = ('sub-one_ses-two_task-three_acq-four_run-01_proc-six_'
'rec-seven_ieeg.json')
assert BIDSPath(**prefix_data).basename == expected_str
Expand Down Expand Up @@ -974,7 +974,7 @@ def test_find_emptyroom_ties(tmp_path):
'sample_audvis_trunc_raw.fif')

bids_root = str(tmp_path)
bids_path = _bids_path.copy().update(root=bids_root)
bids_path = _bids_path.copy().update(root=bids_root, datatype='meg')
session = '20010101'
er_dir_path = BIDSPath(subject='emptyroom', session=session,
datatype='meg', root=bids_root)
Expand Down
7 changes: 1 addition & 6 deletions mne_bids/tests/test_update.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ def test_update_sidecar_jsons(_get_bids_test_dir, _bids_validate,
('SEEGChannelCount', None, 0)]

# get the sidecar json
sidecar_path = bids_path.copy().update(extension='.json')
sidecar_path = bids_path.copy().update(extension='.json', datatype='meg')
sidecar_fpath = sidecar_path.fpath
with open(sidecar_fpath, 'r', encoding='utf-8') as fin:
sidecar_json = json.load(fin)
Expand Down Expand Up @@ -212,11 +212,6 @@ def test_update_anat_landmarks(tmp_path):
update_anat_landmarks(bids_path=bids_path_mri_no_ext,
landmarks=landmarks_new)

# Check without datatytpe provided
bids_path_mri_no_datatype = bids_path_mri.copy().update(datatype=None)
update_anat_landmarks(bids_path=bids_path_mri_no_datatype,
landmarks=landmarks)

# Check handling of invalid input
bids_path_invalid = bids_path_mri.copy().update(datatype='meg')
with pytest.raises(ValueError, match='Can only operate on "anat"'):
Expand Down
4 changes: 2 additions & 2 deletions mne_bids/tests/test_write.py
Original file line number Diff line number Diff line change
Expand Up @@ -446,7 +446,7 @@ def test_line_freq(line_freq, _bids_validate, tmp_path):
_bids_validate(bids_root)

eeg_json_fpath = (bids_path.copy()
.update(suffix='eeg', extension='.json')
.update(suffix='eeg', datatype='eeg', extension='.json')
.fpath)
with open(eeg_json_fpath, 'r', encoding='utf-8') as fin:
eeg_json = json.load(fin)
Expand Down Expand Up @@ -916,7 +916,7 @@ def test_kit(_bids_validate, tmp_path):
# ensure the marker file is produced in the right place
marker_fname = BIDSPath(
subject=subject_id, session=session_id, task=task, run=run,
suffix='markers', extension='.sqd',
suffix='markers', extension='.sqd', datatype='meg',
root=bids_root)
assert op.exists(marker_fname)

Expand Down