Skip to content

Commit

Permalink
Merge pull request #962 from dandi/bids_metadata
Browse files Browse the repository at this point in the history
BIDS metadata read-in pilot.
  • Loading branch information
yarikoptic authored May 11, 2022
2 parents 231f7f0 + 11ebca0 commit 14030cb
Show file tree
Hide file tree
Showing 5 changed files with 92 additions and 2 deletions.
13 changes: 13 additions & 0 deletions dandi/bids_validator_xs.py
Original file line number Diff line number Diff line change
Expand Up @@ -737,6 +737,19 @@ def validate_bids(
regex_schema,
debug=debug,
)
# Record schema version.
# Not sure whether to incorporate in validation_result.
if bids_schema_dir == os.path.join(
os.path.abspath(os.path.dirname(__file__)),
"data",
"schema",
):
# Declare we are using live version,
# string will evaluate as larger than numbered versions.
schema_version = "99999.0.0"
else:
_, schema_version = os.path.split(bids_schema_dir)
validation_result["bids_schema_version"] = schema_version

if report_path:
if isinstance(report_path, str):
Expand Down
1 change: 1 addition & 0 deletions dandi/cli/cmd_ls.py
Original file line number Diff line number Diff line change
Expand Up @@ -368,6 +368,7 @@ def fn():
if (
not op.isdir(path)
and "nwb_version" not in rec
and "bids_schema_version" not in rec
and (keys and "nwb_version" in keys)
):
# Let's at least get that one
Expand Down
12 changes: 12 additions & 0 deletions dandi/cli/tests/test_ls.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import json
import os
from unittest.mock import ANY

from click.testing import CliRunner
Expand Down Expand Up @@ -48,6 +49,17 @@ def load(s):
assert metadata[f] == simple1_nwb_metadata[f]


@mark.skipif_no_network
def test_ls_bids_file(bids_examples):
bids_file_path = "asl003/sub-Sub1/anat/sub-Sub1_T1w.nii.gz"
bids_file_path = os.path.join(bids_examples, bids_file_path)
r = CliRunner().invoke(ls, ["-f", "yaml", bids_file_path])
assert r.exit_code == 0, r.output
data = yaml_load(r.stdout, "safe")
assert len(data) == 1
assert data[0]["subject_id"] == "Sub1"


@mark.skipif_no_network
def test_ls_dandiset_url():
r = CliRunner().invoke(
Expand Down
6 changes: 5 additions & 1 deletion dandi/consts.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@
"nd_types",
)

metadata_bids_fields = ("bids_schema_version",)

metadata_nwb_fields = (
metadata_nwb_file_fields
+ metadata_nwb_subject_fields
Expand Down Expand Up @@ -63,7 +65,9 @@
"number_of_tissue_samples",
)

metadata_all_fields = metadata_nwb_fields + metadata_dandiset_fields
metadata_all_fields = (
metadata_bids_fields + metadata_nwb_fields + metadata_dandiset_fields
)

#: Regular expression for a valid Dandiset identifier. This regex is not
#: anchored.
Expand Down
62 changes: 61 additions & 1 deletion dandi/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from datetime import datetime, timedelta
from functools import lru_cache
import itertools
import os
import os.path as op
from pathlib import Path
Expand All @@ -25,6 +26,7 @@
from xml.dom.minidom import parseString

from dandischema import models
import h5py
import requests
import tenacity

Expand All @@ -43,7 +45,50 @@

lgr = get_logger()

# Remove hard-coding when current version fallback is merged.

BIDS_TO_DANDI = {
"subject": "subject_id",
"session": "session_id",
}


def _rename_bids_keys(bids_metadata, mapping=BIDS_TO_DANDI):
"""Standardize BIDS metadata field naming to match DANDI."""
return {mapping.get(k, k): v for k, v in bids_metadata.items()}


def _path_in_bids(
check_path, bids_marker="dataset_description.json", end_marker="dandiset.yaml"
):
"""Determine whether a path is a member of a BIDS dataset.
Parameters
----------
check_path: str or Path
bids_marker: str, optional
String giving a filename, the existence of which in a directory will mark it as a
BIDS dataset root directory.
end_marker: str, optional
String giving a filename, the existence of which in a directory will end the
search.
Returns
-------
bool
"""
check_path = Path(check_path)
for dir_level in itertools.chain([check_path], check_path.parents):
bids_marker_candidate = dir_level / bids_marker
end_marker_candidate = dir_level / end_marker
if bids_marker_candidate.is_file() or bids_marker_candidate.is_symlink():
return True
if end_marker_candidate.is_file() or end_marker_candidate.is_symlink():
return False
return False


# Disable this for clean hacking
@metadata_cache.memoize_path
def get_metadata(path: Union[str, Path]) -> Optional[dict]:
"""Get selected metadata from a .nwb file or a dandiset directory
Expand Down Expand Up @@ -71,6 +116,19 @@ def get_metadata(path: Union[str, Path]) -> Optional[dict]:
lgr.debug("Failed to get metadata for %s: %s", path, exc)
return None

# Somewhat less fragile search than previous proposals,
# could still be augmented with `_is_nwb` to disambiguate both cases
# at the detection level.
if _path_in_bids(path):
from .bids_validator_xs import validate_bids

_meta = validate_bids(path)
meta = _meta["match_listing"][0]
meta["bids_schema_version"] = _meta["bids_schema_version"]
meta = _rename_bids_keys(meta)
return meta
h5py.File(path)

if nwb_has_external_links(path):
raise NotImplementedError(
f"NWB files with external links are not supported: {path}"
Expand Down Expand Up @@ -195,7 +253,9 @@ def _check_decimal_parts(age_parts: List[str]) -> bool:
flags=re.I,
)
if m is None:
raise ValueError(f"Failed to parse the trailing part of age {age_parts[-1]!r}")
raise ValueError(
f"Failed to parse the trailing part of age {age_parts[-1]!r}"
)
age_parts = age_parts[:-1] + [m[i] for i in range(1, 3) if m[i]]
decim_part = ["." in el for el in age_parts]
return not (any(decim_part) and any(decim_part[:-1]))
Expand Down

0 comments on commit 14030cb

Please sign in to comment.