Skip to content

Commit

Permalink
Built-in BIDS support for dandi upload
Browse files Browse the repository at this point in the history
  • Loading branch information
TheChymera committed May 12, 2022
1 parent 14030cb commit 46bcdb1
Show file tree
Hide file tree
Showing 3 changed files with 99 additions and 30 deletions.
30 changes: 1 addition & 29 deletions dandi/cli/cmd_validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import click

from .base import devel_debug_option, devel_option, lgr, map_to_click_exceptions
from ..utils import pluralize


@click.command()
Expand All @@ -31,39 +30,12 @@ def validate_bids(
if report_flag and not report:
report = report_flag

validation_result = validate_bids_(
_ = validate_bids_(
*paths,
report=report,
schema_version=schema,
devel_debug=devel_debug,
)
missing_files = [
pattern["regex"]
for pattern in validation_result["schema_tracking"]
if pattern["mandatory"]
]
error_list = []
if missing_files:
error_substring = (
f"{pluralize(len(missing_files), 'filename pattern')} required "
"by BIDS could not be found"
)
error_list.append(error_substring)
if validation_result["path_tracking"]:
error_substring = (
f"{pluralize(len(validation_result['path_tracking']), 'filename')} "
"did not match any pattern known to BIDS"
)
error_list.append(error_substring)
if error_list:
error_string = " and ".join(error_list)
error_string = f"Summary: {error_string}."
click.secho(
error_string,
bold=True,
fg="red",
)
raise SystemExit(1)


@click.command()
Expand Down
63 changes: 63 additions & 0 deletions dandi/upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,11 @@ class Uploaded(TypedDict):
errors: List[str]


class BIDSValidationError(Exception):
pass
# raise Exception("BIDS Dataset is not valid.")


def upload(
paths: Optional[List[Union[str, Path]]] = None,
existing: str = "refresh",
Expand All @@ -55,6 +60,21 @@ def upload(
dandiset_ = Dandiset.find(os.path.commonpath(paths))
else:
dandiset_ = Dandiset.find(None)

# pre-validate BIDS datasets before going for individual
# files etc
bids_datasets = _bids_discover_and_validate(dandiset_.path, paths, validation)

if bids_datasets:
bids_datasets = [str(i) for i in bids_datasets]
if not allow_any_path:
lgr.info(
"Setting allow_any_path to True since we detected %s under: %s",
pluralize(len(bids_datasets), "BIDS dataset"),
", ".join(bids_datasets),
)
allow_any_path = True

if not dandiset_:
raise RuntimeError(
f"Found no {dandiset_metadata_file} anywhere in common ancestor of"
Expand Down Expand Up @@ -383,3 +403,46 @@ def check_replace_asset(

def skip_file(msg: Any) -> Dict[str, str]:
return {"status": "skipped", "message": str(msg)}


def _bids_discover_and_validate(dandiset_path, paths, validation):
"""Temporary implementation for discovery and validation of BIDS datasets
References:
- unification of validation records: https://github.com/dandi/dandi-cli/issues/943
- validation "design doc": https://github.com/dandi/dandi-cli/pull/663
"""
from .utils import find_files
from .validate import validate_bids

if paths:
bids_lookup_paths = set(p for p in paths)
else:
bids_lookup_paths = None
bids_descriptions = map(Path, find_files("dataset_description.json", dandiset_path))
bids_datasets = [p.parent for p in bids_descriptions]
if bids_datasets:
lgr.debug(
"Detected %d BIDS datasets at following paths: %s",
len(bids_datasets),
", ".join([str(i) for i in bids_datasets]),
)

if validation != "skip":
if bids_lookup_paths:
bids_datasets_to_validate = set()
for p in bids_lookup_paths:
for bd in bids_datasets:
if p.is_relative_to(bd):
bids_datasets_to_validate.add(bd)
break
else:
bids_datasets_to_validate = bids_datasets
bids_datasets_to_validate = sorted(bids_datasets_to_validate)
for bd in sorted(bids_datasets_to_validate):
_ = validate_bids(
bd, allow_errors=True if validation == "ignore" else False
)
return bids_datasets_to_validate
else:
return bids_datasets
36 changes: 35 additions & 1 deletion dandi/validate.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
from typing import Any, Iterator, List, Optional, Tuple

import click

from .files import find_dandi_files
from .utils import pluralize

# TODO: provide our own "errors" records, which would also include warnings etc

Expand All @@ -10,6 +13,7 @@ def validate_bids(
schema_version: Optional[str] = None,
devel_debug: bool = False,
report: Optional[str] = None,
allow_errors: Optional[bool] = False,
) -> Any:
"""Validate BIDS paths.
Expand All @@ -25,6 +29,8 @@ def validate_bids(
If `True` a log will be written using the standard output path of `.write_report()`.
If string, the string will be used as the output path.
If the variable evaluates as False, no log will be written.
allow_errors : bool, optional
Whether to raise errors on invalid dataset.
Notes
-----
Expand All @@ -33,9 +39,37 @@ def validate_bids(
"""
from .bids_validator_xs import validate_bids as validate_bids_

return validate_bids_(
validation_result = validate_bids_(
paths, schema_version=schema_version, debug=devel_debug, report_path=report
)
missing_files = [
pattern["regex"]
for pattern in validation_result["schema_tracking"]
if pattern["mandatory"]
]
error_list = []
if missing_files:
error_substring = (
f"{pluralize(len(missing_files), 'filename pattern')} required "
"by BIDS could not be found"
)
error_list.append(error_substring)
if validation_result["path_tracking"]:
error_substring = (
f"{pluralize(len(validation_result['path_tracking']), 'filename')} "
"did not match any pattern known to BIDS"
)
error_list.append(error_substring)
if error_list:
error_string = " and ".join(error_list)
error_string = f"Summary: {error_string}."
click.secho(
error_string,
bold=True,
fg="red",
)
if not allow_errors:
raise SystemExit(1)


def validate(
Expand Down

0 comments on commit 46bcdb1

Please sign in to comment.