Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Built-in BIDS support for dandi upload #1011

Merged
merged 30 commits into from
Jul 8, 2022
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
46bcdb1
Built-in BIDS support for `dandi upload`
TheChymera May 12, 2022
d508c7b
fixed logic for None input
TheChymera May 13, 2022
5531d3c
not using return value
TheChymera May 13, 2022
db3379c
Code improvements as suggested by jwodder
TheChymera Jun 6, 2022
0890e0a
Code style and no SystemExit in library function.
TheChymera Jun 7, 2022
7ba78d5
Corrected return type
TheChymera Jun 7, 2022
9ae2218
Fixed typing
TheChymera Jun 8, 2022
d698804
Better user prompt
TheChymera Jun 9, 2022
953f7b4
Validation return typing and docstring
TheChymera Jun 9, 2022
adf2605
Allowing upload of BIDS datasets relative to input path
TheChymera Jun 10, 2022
e10c8ea
No click usage outside of CLI library
TheChymera Jun 20, 2022
0492dbb
Not computing list if errors are suppressed
TheChymera Jun 27, 2022
d079d9c
Typing
TheChymera Jun 27, 2022
540923a
docstring fix
TheChymera Jun 30, 2022
a3b89dc
Separating validation evaluation logic from reporting
TheChymera Jun 30, 2022
c59bae2
fix typing
TheChymera Jun 30, 2022
358d2f2
Improve function naming
TheChymera Jun 30, 2022
62cf170
Testing data
TheChymera Jul 5, 2022
044ee28
Fixed typing
TheChymera Jul 5, 2022
ac37533
testing bids upload
TheChymera Jul 5, 2022
306b2f9
using shutil instead of deprecated distutils
TheChymera Jul 5, 2022
4847cbb
fixing copytree parameter
TheChymera Jul 5, 2022
9f82246
improved bids upload testing
TheChymera Jul 5, 2022
c230d26
Creating changes file
TheChymera Jul 5, 2022
bf3aba7
allowing existing directory
TheChymera Jul 5, 2022
7f58c05
boilerplate copytree
TheChymera Jul 5, 2022
50092ea
Check asset upload
TheChymera Jul 6, 2022
427a136
Testing upload with validation ignore
TheChymera Jul 6, 2022
2864c77
simplified path processing in BIDS lookup
TheChymera Jul 6, 2022
8139966
docstring formatting
TheChymera Jul 6, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 1 addition & 29 deletions dandi/cli/cmd_validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import click

from .base import devel_debug_option, devel_option, lgr, map_to_click_exceptions
from ..utils import pluralize


@click.command()
Expand All @@ -31,39 +30,12 @@ def validate_bids(
if report_flag and not report:
report = report_flag

validation_result = validate_bids_(
validate_bids_(
*paths,
report=report,
schema_version=schema,
devel_debug=devel_debug,
)
missing_files = [
pattern["regex"]
for pattern in validation_result["schema_tracking"]
if pattern["mandatory"]
]
error_list = []
if missing_files:
error_substring = (
f"{pluralize(len(missing_files), 'filename pattern')} required "
"by BIDS could not be found"
)
error_list.append(error_substring)
if validation_result["path_tracking"]:
error_substring = (
f"{pluralize(len(validation_result['path_tracking']), 'filename')} "
"did not match any pattern known to BIDS"
)
error_list.append(error_substring)
if error_list:
error_string = " and ".join(error_list)
error_string = f"Summary: {error_string}."
click.secho(
error_string,
bold=True,
fg="red",
)
raise SystemExit(1)


@click.command()
Expand Down
66 changes: 66 additions & 0 deletions dandi/upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,21 @@ def upload(
" paths. Use 'dandi download' or 'organize' first."
)

# pre-validate BIDS datasets before going for individual
# files etc
bids_datasets = _bids_discover_and_validate(dandiset_.path, paths, validation)

if bids_datasets:
bids_datasets = [str(i) for i in bids_datasets]
TheChymera marked this conversation as resolved.
Show resolved Hide resolved
if not allow_any_path:
lgr.info(
"Setting allow_any_path to True since we detected %s under the "
TheChymera marked this conversation as resolved.
Show resolved Hide resolved
"following paths: %s",
pluralize(len(bids_datasets), "BIDS dataset"),
", ".join(bids_datasets),
)
allow_any_path = True

instance = get_instance(dandi_instance)
assert instance.api is not None
api_url = instance.api
Expand Down Expand Up @@ -383,3 +398,54 @@ def check_replace_asset(

def skip_file(msg: Any) -> Dict[str, str]:
return {"status": "skipped", "message": str(msg)}


def _bids_discover_and_validate(
dandiset_path: str,
paths: Optional[List[Union[str, Path]]] = None,
validation: str = "require",
):
TheChymera marked this conversation as resolved.
Show resolved Hide resolved
"""Temporary implementation for discovery and validation of BIDS datasets

References:
- unification of validation records: https://github.com/dandi/dandi-cli/issues/943
- validation "design doc": https://github.com/dandi/dandi-cli/pull/663
"""
from .utils import find_files
from .validate import validate_bids

if paths:
bids_lookup_paths = set(paths)
TheChymera marked this conversation as resolved.
Show resolved Hide resolved
else:
bids_lookup_paths = None
bids_descriptions = map(
Path, find_files(r"dataset_description\.json$", dandiset_path)
TheChymera marked this conversation as resolved.
Show resolved Hide resolved
)
bids_datasets = [p.parent for p in bids_descriptions]
if bids_datasets:
lgr.debug(
"Detected %s under following paths: %s",
pluralize(len(bids_datasets), "BIDS dataset"),
", ".join(str(i) for i in bids_datasets),
)

if validation != "skip":
if bids_lookup_paths:
bids_datasets_to_validate = list()
for p in bids_lookup_paths:
for bd in bids_datasets:
try:
p.relative_to(bd)
TheChymera marked this conversation as resolved.
Show resolved Hide resolved
except ValueError:
pass
else:
bids_datasets_to_validate.append(bd)
break
else:
bids_datasets_to_validate = bids_datasets
bids_datasets_to_validate = sorted(bids_datasets_to_validate)
TheChymera marked this conversation as resolved.
Show resolved Hide resolved
for bd in bids_datasets_to_validate:
_ = validate_bids(bd, allow_errors=validation == "ignore")
TheChymera marked this conversation as resolved.
Show resolved Hide resolved
return bids_datasets_to_validate
else:
return bids_datasets
36 changes: 35 additions & 1 deletion dandi/validate.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
from typing import Any, Iterator, List, Optional, Tuple

import click

from .files import find_dandi_files
from .utils import pluralize

# TODO: provide our own "errors" records, which would also include warnings etc

Expand All @@ -10,6 +13,7 @@ def validate_bids(
schema_version: Optional[str] = None,
devel_debug: bool = False,
report: Optional[str] = None,
allow_errors: Optional[bool] = False,
) -> Any:
TheChymera marked this conversation as resolved.
Show resolved Hide resolved
"""Validate BIDS paths.

Expand All @@ -25,6 +29,8 @@ def validate_bids(
If `True` a log will be written using the standard output path of `.write_report()`.
If string, the string will be used as the output path.
If the variable evaluates as False, no log will be written.
allow_errors : bool, optional
Whether to raise errors on invalid dataset.

Notes
-----
Expand All @@ -33,9 +39,37 @@ def validate_bids(
"""
from .bids_validator_xs import validate_bids as validate_bids_

return validate_bids_(
validation_result = validate_bids_(
paths, schema_version=schema_version, debug=devel_debug, report_path=report
)
missing_files = [
pattern["regex"]
for pattern in validation_result["schema_tracking"]
if pattern["mandatory"]
]
error_list = []
if missing_files:
error_substring = (
f"{pluralize(len(missing_files), 'filename pattern')} required "
"by BIDS could not be found"
)
error_list.append(error_substring)
if validation_result["path_tracking"]:
error_substring = (
f"{pluralize(len(validation_result['path_tracking']), 'filename')} "
"did not match any pattern known to BIDS"
)
error_list.append(error_substring)
if error_list:
error_string = " and ".join(error_list)
error_string = f"Summary: {error_string}."
click.secho(
TheChymera marked this conversation as resolved.
Show resolved Hide resolved
error_string,
bold=True,
fg="red",
)
if not allow_errors:
raise SystemExit(1)
TheChymera marked this conversation as resolved.
Show resolved Hide resolved


def validate(
Expand Down