From 64262553f8793a69c7ddb7ec8ad5de3a38fa688b Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Mon, 18 Apr 2022 06:04:31 -0400 Subject: [PATCH 1/8] BIDS metadata read-in pilot. --- dandi/cli/cmd_ls.py | 1 + dandi/consts.py | 1 + dandi/metadata.py | 38 +++++++++++++++++++++++++++++++++++++- 3 files changed, 39 insertions(+), 1 deletion(-) diff --git a/dandi/cli/cmd_ls.py b/dandi/cli/cmd_ls.py index bd14f30ec..ce792964e 100644 --- a/dandi/cli/cmd_ls.py +++ b/dandi/cli/cmd_ls.py @@ -368,6 +368,7 @@ def fn(): if ( not op.isdir(path) and "nwb_version" not in rec + and "bids_version" not in rec and (keys and "nwb_version" in keys) ): # Let's at least get that one diff --git a/dandi/consts.py b/dandi/consts.py index ec5e944de..6342b01a6 100644 --- a/dandi/consts.py +++ b/dandi/consts.py @@ -30,6 +30,7 @@ metadata_nwb_dandi_fields = ("cell_id", "slice_id", "tissue_sample_id", "probe_ids") metadata_nwb_computed_fields = ( + "bids_version", "number_of_electrodes", "number_of_units", "nwb_version", diff --git a/dandi/metadata.py b/dandi/metadata.py index 0824f295a..a48192fb9 100644 --- a/dandi/metadata.py +++ b/dandi/metadata.py @@ -25,6 +25,7 @@ from xml.dom.minidom import parseString from dandischema import models +import h5py import requests import tenacity @@ -43,7 +44,27 @@ lgr = get_logger() +# Remove hard-coding when current version fallback is merged. +BIDS_VERSION = "1.7.0+012+dandi001" +BIDS_TO_DANDI = { + "subject": "subject_id", + "session": "session_id", +} + + +def _rename_bids_keys(bids_metadata, conversion=BIDS_TO_DANDI): + """Standardize BIDS metadata field naming to match DANDI.""" + converted_bids_metadata = {} + for key in bids_metadata.keys(): + if key in conversion: + converted_bids_metadata[conversion[key]] = bids_metadata[key] + else: + converted_bids_metadata[key] = bids_metadata[key] + return converted_bids_metadata + + +# Disable this for clean hacking @metadata_cache.memoize_path def get_metadata(path: Union[str, Path]) -> Optional[dict]: """Get selected metadata from a .nwb file or a dandiset directory @@ -71,6 +92,19 @@ def get_metadata(path: Union[str, Path]) -> Optional[dict]: lgr.debug("Failed to get metadata for %s: %s", path, exc) return None + # Pretty fragile way to determine that it's not nwb. + # Ideally can find something better. + try: + h5py.File(path) + except OSError: + from .bids_validator_xs import validate_bids + + meta = validate_bids(path, schema_version=BIDS_VERSION) + meta = meta["match_listing"][0] + meta["bids_version"] = BIDS_VERSION + meta = _rename_bids_keys(meta) + return meta + if nwb_has_external_links(path): raise NotImplementedError( f"NWB files with external links are not supported: {path}" @@ -195,7 +229,9 @@ def _check_decimal_parts(age_parts: List[str]) -> bool: flags=re.I, ) if m is None: - raise ValueError(f"Failed to parse the trailing part of age {age_parts[-1]!r}") + raise ValueError( + f"Failed to parse the trailing part of age {age_parts[-1]!r}" + ) age_parts = age_parts[:-1] + [m[i] for i in range(1, 3) if m[i]] decim_part = ["." in el for el in age_parts] return not (any(decim_part) and any(decim_part[:-1])) From 2d5ab5b44dc502a633d9b436c517b5e25942ebea Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Mon, 18 Apr 2022 07:08:29 -0400 Subject: [PATCH 2/8] Separate set for bids metadata --- dandi/consts.py | 7 +++++-- dandi/metadata.py | 2 +- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/dandi/consts.py b/dandi/consts.py index 6342b01a6..3626aaf45 100644 --- a/dandi/consts.py +++ b/dandi/consts.py @@ -30,13 +30,14 @@ metadata_nwb_dandi_fields = ("cell_id", "slice_id", "tissue_sample_id", "probe_ids") metadata_nwb_computed_fields = ( - "bids_version", "number_of_electrodes", "number_of_units", "nwb_version", "nd_types", ) +metadata_bids_fields = ("bids_version",) + metadata_nwb_fields = ( metadata_nwb_file_fields + metadata_nwb_subject_fields @@ -64,7 +65,9 @@ "number_of_tissue_samples", ) -metadata_all_fields = metadata_nwb_fields + metadata_dandiset_fields +metadata_all_fields = ( + metadata_bids_fields + metadata_nwb_fields + metadata_dandiset_fields +) #: Regular expression for a valid Dandiset identifier. This regex is not #: anchored. diff --git a/dandi/metadata.py b/dandi/metadata.py index a48192fb9..9d6ba1b77 100644 --- a/dandi/metadata.py +++ b/dandi/metadata.py @@ -93,7 +93,7 @@ def get_metadata(path: Union[str, Path]) -> Optional[dict]: return None # Pretty fragile way to determine that it's not nwb. - # Ideally can find something better. + # Ideally can find something better, like `is_nwb` or `is_bids`. try: h5py.File(path) except OSError: From 344b88bb7ed3b8f1c5353876327df799c4f37d41 Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Thu, 21 Apr 2022 17:44:52 -0400 Subject: [PATCH 3/8] Returning used BIDS schema version Analogous to: https://github.com/bids-standard/bids-specification/pull/969/commits/e6ed0a3e54ee451a717e0c747f1e501343c73057 --- dandi/bids_validator_xs.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/dandi/bids_validator_xs.py b/dandi/bids_validator_xs.py index 188aa2fa8..e9617d463 100644 --- a/dandi/bids_validator_xs.py +++ b/dandi/bids_validator_xs.py @@ -711,6 +711,16 @@ def validate_bids( regex_schema, debug=debug, ) + # Record schema version. + # Not sure whether to incorporate in validation_result. + if bids_schema_dir == os.path.join( + os.path.abspath(os.path.dirname(__file__)), + "data/schema", + ): + schema_version = 9999 + else: + _, schema_version = os.path.split(bids_schema_dir) + validation_result["bids_schema_version"] = schema_version if report_path: if isinstance(report_path, str): From b9997904279a3a806c4f3c09ff32a7a71c003993 Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Thu, 21 Apr 2022 17:51:26 -0400 Subject: [PATCH 4/8] reporting BIDS schema version as used by the validator --- dandi/cli/cmd_ls.py | 2 +- dandi/consts.py | 2 +- dandi/metadata.py | 7 +++---- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/dandi/cli/cmd_ls.py b/dandi/cli/cmd_ls.py index ce792964e..62002044b 100644 --- a/dandi/cli/cmd_ls.py +++ b/dandi/cli/cmd_ls.py @@ -368,7 +368,7 @@ def fn(): if ( not op.isdir(path) and "nwb_version" not in rec - and "bids_version" not in rec + and "bids_schema_version" not in rec and (keys and "nwb_version" in keys) ): # Let's at least get that one diff --git a/dandi/consts.py b/dandi/consts.py index 3626aaf45..0239bb91b 100644 --- a/dandi/consts.py +++ b/dandi/consts.py @@ -36,7 +36,7 @@ "nd_types", ) -metadata_bids_fields = ("bids_version",) +metadata_bids_fields = ("bids_schema_version",) metadata_nwb_fields = ( metadata_nwb_file_fields diff --git a/dandi/metadata.py b/dandi/metadata.py index 9d6ba1b77..fdc53b4ee 100644 --- a/dandi/metadata.py +++ b/dandi/metadata.py @@ -45,7 +45,6 @@ lgr = get_logger() # Remove hard-coding when current version fallback is merged. -BIDS_VERSION = "1.7.0+012+dandi001" BIDS_TO_DANDI = { "subject": "subject_id", @@ -99,9 +98,9 @@ def get_metadata(path: Union[str, Path]) -> Optional[dict]: except OSError: from .bids_validator_xs import validate_bids - meta = validate_bids(path, schema_version=BIDS_VERSION) - meta = meta["match_listing"][0] - meta["bids_version"] = BIDS_VERSION + _meta = validate_bids(path) + meta = _meta["match_listing"][0] + meta["bids_schema_version"] = _meta["bids_schema_version"] meta = _rename_bids_keys(meta) return meta From e4d99cdad34c624079110a4c018491a50d3b77bd Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Mon, 9 May 2022 07:45:50 -0400 Subject: [PATCH 5/8] Improved BIDS file detection And made path more Windows-robust --- dandi/bids_validator_xs.py | 3 ++- dandi/metadata.py | 48 ++++++++++++++++++++++++++++++++++---- 2 files changed, 45 insertions(+), 6 deletions(-) diff --git a/dandi/bids_validator_xs.py b/dandi/bids_validator_xs.py index e9617d463..dbb719b69 100644 --- a/dandi/bids_validator_xs.py +++ b/dandi/bids_validator_xs.py @@ -715,7 +715,8 @@ def validate_bids( # Not sure whether to incorporate in validation_result. if bids_schema_dir == os.path.join( os.path.abspath(os.path.dirname(__file__)), - "data/schema", + "data", + "schema", ): schema_version = 9999 else: diff --git a/dandi/metadata.py b/dandi/metadata.py index fdc53b4ee..9c4e47890 100644 --- a/dandi/metadata.py +++ b/dandi/metadata.py @@ -63,6 +63,43 @@ def _rename_bids_keys(bids_metadata, conversion=BIDS_TO_DANDI): return converted_bids_metadata +def _path_in_bids( + path, root_marker="dataset_description.json", end_marker="dandiset.yaml" +): + """Determine whether a path is a member of a BIDS dataset. + + Parameters + ---------- + path: str or Path + root_marker: str, optional + String giving a filename, the existence of which in a directory will mark it as a + BIDS dataset root directory. + end_marker: str, optional + String giving a filename, the existence of which in a directory will end the + search. + + Returns + ------- + bool + """ + from .utils import find_files + + path = Path(path) + if [i for i in find_files(root_marker, path)]: + return True + search_path = path + while True: + search_path = search_path.parent + bids_marker_candidate = search_path / root_marker + end_marker_candidate = search_path / end_marker + if bids_marker_candidate.is_file(): + return True + if end_marker_candidate.is_file(): + return False + if search_path == Path("/"): + return False + + # Disable this for clean hacking @metadata_cache.memoize_path def get_metadata(path: Union[str, Path]) -> Optional[dict]: @@ -91,11 +128,10 @@ def get_metadata(path: Union[str, Path]) -> Optional[dict]: lgr.debug("Failed to get metadata for %s: %s", path, exc) return None - # Pretty fragile way to determine that it's not nwb. - # Ideally can find something better, like `is_nwb` or `is_bids`. - try: - h5py.File(path) - except OSError: + # Somewhat less fragile search than previous proposals, + # could still be augmented with `_is_nwb` to disambiguate both cases + # at the detection level. + if _path_in_bids(path): from .bids_validator_xs import validate_bids _meta = validate_bids(path) @@ -103,6 +139,8 @@ def get_metadata(path: Union[str, Path]) -> Optional[dict]: meta["bids_schema_version"] = _meta["bids_schema_version"] meta = _rename_bids_keys(meta) return meta + else: + h5py.File(path) if nwb_has_external_links(path): raise NotImplementedError( From 650d630095e4734fcdded8241ea9e87b8f09eafd Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Wed, 11 May 2022 01:23:56 -0400 Subject: [PATCH 6/8] Code style improvements --- dandi/bids_validator_xs.py | 4 +++- dandi/metadata.py | 41 +++++++++++++------------------------- 2 files changed, 17 insertions(+), 28 deletions(-) diff --git a/dandi/bids_validator_xs.py b/dandi/bids_validator_xs.py index dbb719b69..e6c332d28 100644 --- a/dandi/bids_validator_xs.py +++ b/dandi/bids_validator_xs.py @@ -718,7 +718,9 @@ def validate_bids( "data", "schema", ): - schema_version = 9999 + # Declare we are using live version, + # string will evaluate as larger than numbered versions. + schema_version = "live" else: _, schema_version = os.path.split(bids_schema_dir) validation_result["bids_schema_version"] = schema_version diff --git a/dandi/metadata.py b/dandi/metadata.py index 9c4e47890..926ae8d85 100644 --- a/dandi/metadata.py +++ b/dandi/metadata.py @@ -2,6 +2,7 @@ from datetime import datetime, timedelta from functools import lru_cache +import itertools import os import os.path as op from pathlib import Path @@ -52,26 +53,20 @@ } -def _rename_bids_keys(bids_metadata, conversion=BIDS_TO_DANDI): +def _rename_bids_keys(bids_metadata, mapping=BIDS_TO_DANDI): """Standardize BIDS metadata field naming to match DANDI.""" - converted_bids_metadata = {} - for key in bids_metadata.keys(): - if key in conversion: - converted_bids_metadata[conversion[key]] = bids_metadata[key] - else: - converted_bids_metadata[key] = bids_metadata[key] - return converted_bids_metadata + return {mapping.get(k, k): v for k, v in bids_metadata.items()} def _path_in_bids( - path, root_marker="dataset_description.json", end_marker="dandiset.yaml" + check_path, bids_marker="dataset_description.json", end_marker="dandiset.yaml" ): """Determine whether a path is a member of a BIDS dataset. Parameters ---------- - path: str or Path - root_marker: str, optional + check_path: str or Path + bids_marker: str, optional String giving a filename, the existence of which in a directory will mark it as a BIDS dataset root directory. end_marker: str, optional @@ -82,22 +77,15 @@ def _path_in_bids( ------- bool """ - from .utils import find_files - - path = Path(path) - if [i for i in find_files(root_marker, path)]: - return True - search_path = path - while True: - search_path = search_path.parent - bids_marker_candidate = search_path / root_marker - end_marker_candidate = search_path / end_marker - if bids_marker_candidate.is_file(): + check_path = Path(check_path) + for dir_level in itertools.chain([check_path], check_path.parents): + bids_marker_candidate = dir_level / bids_marker + end_marker_candidate = dir_level / end_marker + if bids_marker_candidate.is_file() or bids_marker_candidate.is_symlink(): return True - if end_marker_candidate.is_file(): - return False - if search_path == Path("/"): + if end_marker_candidate.is_file() or end_marker_candidate.is_symlink(): return False + return False # Disable this for clean hacking @@ -139,8 +127,7 @@ def get_metadata(path: Union[str, Path]) -> Optional[dict]: meta["bids_schema_version"] = _meta["bids_schema_version"] meta = _rename_bids_keys(meta) return meta - else: - h5py.File(path) + h5py.File(path) if nwb_has_external_links(path): raise NotImplementedError( From a86d4569fdfb4b70bf3c7137ed36bc6d5d21d365 Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Wed, 11 May 2022 01:43:04 -0400 Subject: [PATCH 7/8] Added test for BIDS ls --- dandi/cli/tests/test_ls.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/dandi/cli/tests/test_ls.py b/dandi/cli/tests/test_ls.py index 35f963ead..b04c8e175 100644 --- a/dandi/cli/tests/test_ls.py +++ b/dandi/cli/tests/test_ls.py @@ -1,4 +1,5 @@ import json +import os from unittest.mock import ANY from click.testing import CliRunner @@ -48,6 +49,17 @@ def load(s): assert metadata[f] == simple1_nwb_metadata[f] +@mark.skipif_no_network +def test_ls_bids_file(bids_examples): + bids_file_path = "asl003/sub-Sub1/anat/sub-Sub1_T1w.nii.gz" + bids_file_path = os.path.join(bids_examples, bids_file_path) + r = CliRunner().invoke(ls, ["-f", "yaml", bids_file_path]) + assert r.exit_code == 0, r.output + data = yaml_load(r.stdout, "safe") + assert len(data) == 1 + assert data[0]["subject_id"] == "Sub1" + + @mark.skipif_no_network def test_ls_dandiset_url(): r = CliRunner().invoke( From 11ebca00c40bbd2a3a193921c0cdf4cb0799fef1 Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Wed, 11 May 2022 16:28:04 -0400 Subject: [PATCH 8/8] Use some ridiculously large semver for BIDS if using "live" instance of schema --- dandi/bids_validator_xs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dandi/bids_validator_xs.py b/dandi/bids_validator_xs.py index e6c332d28..9f6303dda 100644 --- a/dandi/bids_validator_xs.py +++ b/dandi/bids_validator_xs.py @@ -720,7 +720,7 @@ def validate_bids( ): # Declare we are using live version, # string will evaluate as larger than numbered versions. - schema_version = "live" + schema_version = "99999.0.0" else: _, schema_version = os.path.split(bids_schema_dir) validation_result["bids_schema_version"] = schema_version