From 7908c990b439ddc8c94e0746c162b1957e610146 Mon Sep 17 00:00:00 2001 From: Saksham Sharda Date: Wed, 24 Nov 2021 16:30:50 +0530 Subject: [PATCH 01/67] get images method --- dandi/consts.py | 3 +++ dandi/pynwb_utils.py | 20 ++++++++++++++++++++ 2 files changed, 23 insertions(+) diff --git a/dandi/consts.py b/dandi/consts.py index d62445990..d5613424d 100644 --- a/dandi/consts.py +++ b/dandi/consts.py @@ -130,3 +130,6 @@ #: HTTP response status codes that should always be retried (until we run out #: of retries) RETRY_STATUSES = (500, 502, 503, 504) + +external_file_types = [".mp4"] +external_file_modules = ["processing", "acquisition"] diff --git a/dandi/pynwb_utils.py b/dandi/pynwb_utils.py index f8fd6cb87..dc8d3a6da 100644 --- a/dandi/pynwb_utils.py +++ b/dandi/pynwb_utils.py @@ -18,7 +18,10 @@ metadata_nwb_computed_fields, metadata_nwb_file_fields, metadata_nwb_subject_fields, + external_file_types, + external_file_modules ) +from pathlib import Path from .utils import get_module_version lgr = get_logger() @@ -227,6 +230,23 @@ def _get_pynwb_metadata(path): key = f[len("number_of_") :] out[f] = len(getattr(nwb, key, []) or []) + # get external_file data: + out["external_file"] = get_image_series(nwb) + + return out + + +def get_image_series(nwb): + out = [] + for module_name in external_file_modules: + module_cont = getattr(nwb, module_name) + for name, ob in module_cont.items(): + if isinstance(ob, pynwb.image.ImageSeries) and ob.external_file is not None: + out_dict = dict(id=ob.object_id, name=ob.name, external_files=[]) + for ext_file in ob.external_file: + if Path(ext_file).suffix in external_file_types: + out_dict['external_files'].append(Path(ext_file)) + out.append(out_dict) return out From ddd48a6ca811883677371d8feb12b10b6100385c Mon Sep 17 00:00:00 2001 From: Saksham Sharda Date: Wed, 24 Nov 2021 18:34:32 +0530 Subject: [PATCH 02/67] rename, symlinks --- dandi/cli/cmd_organize.py | 27 +++++++++++++++++++++++++++ dandi/organize.py | 17 +++++++++++++++++ dandi/pynwb_utils.py | 24 +++++++++++++++++++++++- 3 files changed, 67 insertions(+), 1 deletion(-) diff --git a/dandi/cli/cmd_organize.py b/dandi/cli/cmd_organize.py index 3414ff6f1..765235f15 100644 --- a/dandi/cli/cmd_organize.py +++ b/dandi/cli/cmd_organize.py @@ -5,6 +5,8 @@ from .base import dandiset_path_option, devel_debug_option, lgr, map_to_click_exceptions from ..consts import file_operation_modes +from ..organize import create_external_file_names +from ..pynwb_utils import rename_nwb_external_files @click.command() @@ -212,6 +214,11 @@ def _get_metadata(path): metadata = create_unique_filenames_from_metadata(metadata) + # create video file name and re write nwb file external files: + metadata = create_external_file_names(metadata) + for meta in metadata: + rename_nwb_external_files(meta, dandiset_path) + # Verify first that the target paths do not exist yet, and fail if they do # Note: in "simulate" mode we do early check as well, so this would be # duplicate but shouldn't hurt @@ -303,6 +310,26 @@ def _get_metadata(path): raise NotImplementedError(files_mode) acted_upon.append(e) + for ext_file_dict in e['external_file_objects']: + for no, (name_old, name_new) in enumerate(zip(ext_file_dict['external_files'], + ext_file_dict['external_files_renamed'])): + new_path = dandiset_path/name_new + name_old_str = str(name_old) + if not op.exists(str(new_path.parent)): + os.makedirs(str(new_path.parent)) + new_path_str = str(new_path) + if files_mode == "symlink": + os.symlink(name_old_str, new_path_str) + elif files_mode == "hardlink": + os.link(name_old_str, new_path_str) + elif files_mode == "copy": + copy_file(name_old_str, new_path_str) + elif files_mode == "move": + move_file(name_old_str, new_path_str) + else: + raise NotImplementedError(files_mode) + + if acted_upon and in_place: # We might need to cleanup a bit - e.g. prune empty directories left # by the move in in-place mode diff --git a/dandi/organize.py b/dandi/organize.py index 6b8699fd1..a4b6b740c 100644 --- a/dandi/organize.py +++ b/dandi/organize.py @@ -172,6 +172,23 @@ def create_unique_filenames_from_metadata(metadata): return metadata +def create_external_file_names(metadata): + metadata = deepcopy(metadata) + import uuid + for meta in metadata: + if "dandi_path" not in meta or "external_file_objects" not in meta: + continue + dandi_file_path = Path(meta["dandi_path"]).with_suffix("") + for ext_file_dict in meta['external_file_objects']: + renamed_path_list = [] + uuid_str = ext_file_dict.get(id, str(uuid.uuid4())) + for no, ext_file in enumerate(ext_file_dict['external_files']): + renamed = dandi_file_path/f'{uuid_str}_external_file_{no}{ext_file.suffix}' + renamed_path_list.append(str(renamed)) + ext_file_dict['external_files_renamed'] = renamed_path_list + return metadata + + def _assign_obj_id(metadata, non_unique): msg = "%d out of %d paths are not unique" % (len(non_unique), len(metadata)) diff --git a/dandi/pynwb_utils.py b/dandi/pynwb_utils.py index dc8d3a6da..579a9bde2 100644 --- a/dandi/pynwb_utils.py +++ b/dandi/pynwb_utils.py @@ -231,7 +231,7 @@ def _get_pynwb_metadata(path): out[f] = len(getattr(nwb, key, []) or []) # get external_file data: - out["external_file"] = get_image_series(nwb) + out["external_file_objects"] = get_image_series(nwb) return out @@ -250,6 +250,28 @@ def get_image_series(nwb): return out +def rename_nwb_external_files(meta, dandiset_path): + if not np.all(i in meta for i in ["path", "dandi_path", "external_file_objects"]): + lgr.warning(f'could not rename external files, update metadata' + f'with "path", "dandi_path", "external_file_objects"') + return + with NWBHDF5IO(meta["path"], mode='r', load_namespaces=True) as io: + nwb = io.read() + for ext_file_dict in meta['external_file_objects']: + # retrieve nwb neurodata object of the given object id: + container_list = [child for child in nwb.children() if ext_file_dict['id'] == child.object_id] + if len(container_list) == 0: + continue + else: + container = container_list[0] + # rename all external files: + for no, (name_old, name_new) in enumerate(zip(ext_file_dict['external_files'], + ext_file_dict['external_files_renamed'])): + new_path = dandiset_path/name_new + container.external_file[no] = str(new_path) + + + @validate_cache.memoize_path def validate(path, devel_debug=False): """Run validation on a file and return errors From 0a8c2e06aa0684dfd2169c68a59778675b867489 Mon Sep 17 00:00:00 2001 From: Saksham Sharda Date: Sat, 27 Nov 2021 18:45:27 +0530 Subject: [PATCH 03/67] docstrings, suggested changes --- dandi/consts.py | 2 +- dandi/organize.py | 2 +- dandi/pynwb_utils.py | 15 +++++++++++++++ 3 files changed, 17 insertions(+), 2 deletions(-) diff --git a/dandi/consts.py b/dandi/consts.py index d5613424d..1423fe7bb 100644 --- a/dandi/consts.py +++ b/dandi/consts.py @@ -131,5 +131,5 @@ #: of retries) RETRY_STATUSES = (500, 502, 503, 504) -external_file_types = [".mp4"] +external_file_extensions = [".mp4", ".avi", ".wmv", ".mov"] external_file_modules = ["processing", "acquisition"] diff --git a/dandi/organize.py b/dandi/organize.py index a4b6b740c..04c4e0b63 100644 --- a/dandi/organize.py +++ b/dandi/organize.py @@ -9,6 +9,7 @@ import os.path as op from pathlib import Path import re +import uuid import numpy as np @@ -174,7 +175,6 @@ def create_unique_filenames_from_metadata(metadata): def create_external_file_names(metadata): metadata = deepcopy(metadata) - import uuid for meta in metadata: if "dandi_path" not in meta or "external_file_objects" not in meta: continue diff --git a/dandi/pynwb_utils.py b/dandi/pynwb_utils.py index 579a9bde2..29f19aa84 100644 --- a/dandi/pynwb_utils.py +++ b/dandi/pynwb_utils.py @@ -237,6 +237,19 @@ def _get_pynwb_metadata(path): def get_image_series(nwb): + """ + This method supports _get_pynwb_metadata() in retrieving all ImageSeries related metadata from an open nwb file. + Specifically it pulls out the ImageSeries uuid, name and all the externally linked files + named under the argument 'external_file'. + Parameters + ---------- + nwb: pynwb.NWBFile + Returns + ------- + out: list + list of dicts : [{id: , name: , external_files=[ImageSeries.external_file]}] + if no ImageSeries found in the given modules to check, then it returns an empty list. + """ out = [] for module_name in external_file_modules: module_cont = getattr(nwb, module_name) @@ -246,6 +259,8 @@ def get_image_series(nwb): for ext_file in ob.external_file: if Path(ext_file).suffix in external_file_types: out_dict['external_files'].append(Path(ext_file)) + else: + lgr.warning(f'external file {ext_file} should be one of: {external_file_types}') out.append(out_dict) return out From 7dc800e21730da7d9273e6afc55611b3942b50ba Mon Sep 17 00:00:00 2001 From: Saksham Sharda Date: Sat, 27 Nov 2021 18:55:59 +0530 Subject: [PATCH 04/67] docstrings --- dandi/organize.py | 18 ++++++++++++++++++ dandi/pynwb_utils.py | 10 ++++++++++ 2 files changed, 28 insertions(+) diff --git a/dandi/organize.py b/dandi/organize.py index 04c4e0b63..73317db02 100644 --- a/dandi/organize.py +++ b/dandi/organize.py @@ -174,6 +174,24 @@ def create_unique_filenames_from_metadata(metadata): def create_external_file_names(metadata): + """ + Renames the external_file attribute in an ImageSeries according to the rule: + Example, the Initial name of file: + external_file = [name1.mp4] + rename to: + external_file = [dandiset-path-of-nwbfile/ + dandi-renamed-nwbfile_name(folder without extention .nwb)/ + f'{ImageSeries.object_id}_external_file_0.mp4' + This is stored in a new field in the metadata['external_file_objects'][0]['external_files_renamed'] + Parameters + ---------- + metadata: dict + the metadata dict created prior, that will have a new key with the external_file argument. + Returns + ------- + metadata: dict + updated metadata + """ metadata = deepcopy(metadata) for meta in metadata: if "dandi_path" not in meta or "external_file_objects" not in meta: diff --git a/dandi/pynwb_utils.py b/dandi/pynwb_utils.py index 29f19aa84..4f5e116af 100644 --- a/dandi/pynwb_utils.py +++ b/dandi/pynwb_utils.py @@ -266,6 +266,16 @@ def get_image_series(nwb): def rename_nwb_external_files(meta, dandiset_path): + """ + This method, renames the external_file attribute in an ImageSeries datatype in an open nwb file. + + Parameters + ---------- + meta: dict + metadata dict contaiing all the retrieved metadata from an nwb file. + dandiset_path: pathlib.Path + path where the renamed nwb files would go as symlinks (the newly created dandiset location) + """ if not np.all(i in meta for i in ["path", "dandi_path", "external_file_objects"]): lgr.warning(f'could not rename external files, update metadata' f'with "path", "dandi_path", "external_file_objects"') From 2abc92ef4fae313d10678da206f7cd05647c6859 Mon Sep 17 00:00:00 2001 From: Saksham Sharda Date: Mon, 29 Nov 2021 13:00:15 +0530 Subject: [PATCH 05/67] create external-paths option, separate method for organizing files, relative renaming of video files wrt to nwbfiles --- dandi/cli/cmd_organize.py | 42 +++++++++++++++++---------------------- dandi/organize.py | 30 ++++++++++++++++++++++++---- dandi/pynwb_utils.py | 18 +++++++---------- 3 files changed, 51 insertions(+), 39 deletions(-) diff --git a/dandi/cli/cmd_organize.py b/dandi/cli/cmd_organize.py index 765235f15..bf10f68a1 100644 --- a/dandi/cli/cmd_organize.py +++ b/dandi/cli/cmd_organize.py @@ -5,7 +5,7 @@ from .base import dandiset_path_option, devel_debug_option, lgr, map_to_click_exceptions from ..consts import file_operation_modes -from ..organize import create_external_file_names +from ..organize import _create_external_file_names, organize_external_files from ..pynwb_utils import rename_nwb_external_files @@ -36,6 +36,15 @@ default="auto", show_default=True, ) +@click.option( + "-rw", + "--rewrite", + type=click.Choice(["external-paths"]), + default=None, + help="if set to re-write attributes in an nwbfile. For example, if the value " + "is set to external-paths then the external_path attribute of nwbfiles'" + "ImageSeries will be written with renamed video paths according to set rules." +) @click.argument("paths", nargs=-1, type=click.Path(exists=True)) @devel_debug_option() @map_to_click_exceptions @@ -45,6 +54,7 @@ def organize( invalid="fail", files_mode="auto", devel_debug=False, + rewrite=None, ): """(Re)organize files according to the metadata. @@ -215,9 +225,10 @@ def _get_metadata(path): metadata = create_unique_filenames_from_metadata(metadata) # create video file name and re write nwb file external files: - metadata = create_external_file_names(metadata) - for meta in metadata: - rename_nwb_external_files(meta, dandiset_path) + if rewrite == "external-paths": + metadata = _create_external_file_names(metadata) + for meta in metadata: + rename_nwb_external_files(meta) # Verify first that the target paths do not exist yet, and fail if they do # Note: in "simulate" mode we do early check as well, so this would be @@ -310,26 +321,6 @@ def _get_metadata(path): raise NotImplementedError(files_mode) acted_upon.append(e) - for ext_file_dict in e['external_file_objects']: - for no, (name_old, name_new) in enumerate(zip(ext_file_dict['external_files'], - ext_file_dict['external_files_renamed'])): - new_path = dandiset_path/name_new - name_old_str = str(name_old) - if not op.exists(str(new_path.parent)): - os.makedirs(str(new_path.parent)) - new_path_str = str(new_path) - if files_mode == "symlink": - os.symlink(name_old_str, new_path_str) - elif files_mode == "hardlink": - os.link(name_old_str, new_path_str) - elif files_mode == "copy": - copy_file(name_old_str, new_path_str) - elif files_mode == "move": - move_file(name_old_str, new_path_str) - else: - raise NotImplementedError(files_mode) - - if acted_upon and in_place: # We might need to cleanup a bit - e.g. prune empty directories left # by the move in in-place mode @@ -342,6 +333,9 @@ def _get_metadata(path): except Exception as exc: lgr.debug("Failed to remove directory %s: %s", d, exc) + if rewrite == "external-paths": + organize_external_files(metadata, dandiset_path, files_mode) + def msg_(msg, n, cond=None): if hasattr(n, "__len__"): n = len(n) diff --git a/dandi/organize.py b/dandi/organize.py index 73317db02..833f2bdbd 100644 --- a/dandi/organize.py +++ b/dandi/organize.py @@ -16,7 +16,7 @@ from . import get_logger from .exceptions import OrganizeImpossibleError from .pynwb_utils import get_neurodata_types_to_modalities_map, get_object_id -from .utils import ensure_datetime, flattened, yaml_load +from .utils import ensure_datetime, flattened, yaml_load, copy_file, move_file lgr = get_logger() @@ -173,7 +173,7 @@ def create_unique_filenames_from_metadata(metadata): return metadata -def create_external_file_names(metadata): +def _create_external_file_names(metadata): """ Renames the external_file attribute in an ImageSeries according to the rule: Example, the Initial name of file: @@ -196,17 +196,39 @@ def create_external_file_names(metadata): for meta in metadata: if "dandi_path" not in meta or "external_file_objects" not in meta: continue - dandi_file_path = Path(meta["dandi_path"]).with_suffix("") + nwb_folder_name = Path(meta["dandi_path"]).with_suffix("").name for ext_file_dict in meta['external_file_objects']: renamed_path_list = [] uuid_str = ext_file_dict.get(id, str(uuid.uuid4())) for no, ext_file in enumerate(ext_file_dict['external_files']): - renamed = dandi_file_path/f'{uuid_str}_external_file_{no}{ext_file.suffix}' + renamed = nwb_folder_name/f'{uuid_str}_external_file_{no}{ext_file.suffix}' renamed_path_list.append(str(renamed)) ext_file_dict['external_files_renamed'] = renamed_path_list return metadata +def organize_external_files(metadata, dandiset_path, files_mode): + for e in metadata: + for ext_file_dict in e['external_file_objects']: + for no, (name_old, name_new) in enumerate(zip(ext_file_dict['external_files'], + ext_file_dict['external_files_renamed'])): + new_path = dandiset_path/e["dandi_path"].parent/name_new + name_old_str = str(name_old) + if not op.exists(str(new_path.parent)): + os.makedirs(str(new_path.parent)) + new_path_str = str(new_path) + if files_mode == "symlink": + os.symlink(name_old_str, new_path_str) + elif files_mode == "hardlink": + os.link(name_old_str, new_path_str) + elif files_mode == "copy": + copy_file(name_old_str, new_path_str) + elif files_mode == "move": + move_file(name_old_str, new_path_str) + else: + raise NotImplementedError(files_mode) + + def _assign_obj_id(metadata, non_unique): msg = "%d out of %d paths are not unique" % (len(non_unique), len(metadata)) diff --git a/dandi/pynwb_utils.py b/dandi/pynwb_utils.py index 4f5e116af..2b5a2185b 100644 --- a/dandi/pynwb_utils.py +++ b/dandi/pynwb_utils.py @@ -18,7 +18,7 @@ metadata_nwb_computed_fields, metadata_nwb_file_fields, metadata_nwb_subject_fields, - external_file_types, + external_file_extensions, external_file_modules ) from pathlib import Path @@ -231,12 +231,12 @@ def _get_pynwb_metadata(path): out[f] = len(getattr(nwb, key, []) or []) # get external_file data: - out["external_file_objects"] = get_image_series(nwb) + out["external_file_objects"] = _get_image_series(nwb) return out -def get_image_series(nwb): +def _get_image_series(nwb): """ This method supports _get_pynwb_metadata() in retrieving all ImageSeries related metadata from an open nwb file. Specifically it pulls out the ImageSeries uuid, name and all the externally linked files @@ -257,22 +257,20 @@ def get_image_series(nwb): if isinstance(ob, pynwb.image.ImageSeries) and ob.external_file is not None: out_dict = dict(id=ob.object_id, name=ob.name, external_files=[]) for ext_file in ob.external_file: - if Path(ext_file).suffix in external_file_types: + if Path(ext_file).suffix in external_file_extensions: out_dict['external_files'].append(Path(ext_file)) else: - lgr.warning(f'external file {ext_file} should be one of: {external_file_types}') + lgr.warning(f'external file {ext_file} should be one of: {external_file_extensions}') out.append(out_dict) return out -def rename_nwb_external_files(meta, dandiset_path): +def rename_nwb_external_files(meta): """ This method, renames the external_file attribute in an ImageSeries datatype in an open nwb file. Parameters ---------- - meta: dict - metadata dict contaiing all the retrieved metadata from an nwb file. dandiset_path: pathlib.Path path where the renamed nwb files would go as symlinks (the newly created dandiset location) """ @@ -292,9 +290,7 @@ def rename_nwb_external_files(meta, dandiset_path): # rename all external files: for no, (name_old, name_new) in enumerate(zip(ext_file_dict['external_files'], ext_file_dict['external_files_renamed'])): - new_path = dandiset_path/name_new - container.external_file[no] = str(new_path) - + container.external_file[no] = str(name_new) @validate_cache.memoize_path From 41291095e98979991ae0321ca017cf7fd24eb690 Mon Sep 17 00:00:00 2001 From: Saksham Sharda Date: Mon, 29 Nov 2021 17:06:55 +0530 Subject: [PATCH 06/67] add typehints, update docstrings, check external paths as option --- dandi/cli/cmd_organize.py | 9 +++----- dandi/organize.py | 25 ++++++++++++++++------ dandi/pynwb_utils.py | 45 +++++++++++++++++++++------------------ 3 files changed, 46 insertions(+), 33 deletions(-) diff --git a/dandi/cli/cmd_organize.py b/dandi/cli/cmd_organize.py index bf10f68a1..180f1b820 100644 --- a/dandi/cli/cmd_organize.py +++ b/dandi/cli/cmd_organize.py @@ -224,12 +224,6 @@ def _get_metadata(path): metadata = create_unique_filenames_from_metadata(metadata) - # create video file name and re write nwb file external files: - if rewrite == "external-paths": - metadata = _create_external_file_names(metadata) - for meta in metadata: - rename_nwb_external_files(meta) - # Verify first that the target paths do not exist yet, and fail if they do # Note: in "simulate" mode we do early check as well, so this would be # duplicate but shouldn't hurt @@ -333,7 +327,10 @@ def _get_metadata(path): except Exception as exc: lgr.debug("Failed to remove directory %s: %s", d, exc) + # create video file name and re write nwb file external files: if rewrite == "external-paths": + metadata = _create_external_file_names(metadata) + rename_nwb_external_files(metadata) organize_external_files(metadata, dandiset_path, files_mode) def msg_(msg, n, cond=None): diff --git a/dandi/organize.py b/dandi/organize.py index 833f2bdbd..a97865e0e 100644 --- a/dandi/organize.py +++ b/dandi/organize.py @@ -173,7 +173,7 @@ def create_unique_filenames_from_metadata(metadata): return metadata -def _create_external_file_names(metadata): +def _create_external_file_names(metadata: dict): """ Renames the external_file attribute in an ImageSeries according to the rule: Example, the Initial name of file: @@ -185,12 +185,12 @@ def _create_external_file_names(metadata): This is stored in a new field in the metadata['external_file_objects'][0]['external_files_renamed'] Parameters ---------- - metadata: dict - the metadata dict created prior, that will have a new key with the external_file argument. + metadata: list + list of metadata dictionaries created during the call to pynwb_utils._get_pynwb_metadata Returns ------- - metadata: dict - updated metadata + metadata: list + updated list of metadata dictionaries """ metadata = deepcopy(metadata) for meta in metadata: @@ -207,7 +207,20 @@ def _create_external_file_names(metadata): return metadata -def organize_external_files(metadata, dandiset_path, files_mode): +def organize_external_files(metadata: list, dandiset_path: Path, files_mode: str): + """ + Organizes the external_files into the new Dandiset folder structure. + + Parameters + ---------- + metadata: list + list of metadata dictionaries created during the call to pynwb_utils._get_pynwb_metadata + dandiset_path: Path + full path of the main dandiset folder. + files_mode: str + one of "symlink", "copy", "move", "hardlink" + + """ for e in metadata: for ext_file_dict in e['external_file_objects']: for no, (name_old, name_new) in enumerate(zip(ext_file_dict['external_files'], diff --git a/dandi/pynwb_utils.py b/dandi/pynwb_utils.py index 2b5a2185b..da9524a37 100644 --- a/dandi/pynwb_utils.py +++ b/dandi/pynwb_utils.py @@ -236,7 +236,7 @@ def _get_pynwb_metadata(path): return out -def _get_image_series(nwb): +def _get_image_series(nwb: pynwb.NWBFile): """ This method supports _get_pynwb_metadata() in retrieving all ImageSeries related metadata from an open nwb file. Specifically it pulls out the ImageSeries uuid, name and all the externally linked files @@ -265,32 +265,35 @@ def _get_image_series(nwb): return out -def rename_nwb_external_files(meta): +def rename_nwb_external_files(metadata: list): """ This method, renames the external_file attribute in an ImageSeries datatype in an open nwb file. + It pulls information about the ImageSEries objects from metadata: metadata["external_file_objects"] + populated during _get_pynwb_metadata() call. Parameters ---------- - dandiset_path: pathlib.Path - path where the renamed nwb files would go as symlinks (the newly created dandiset location) + metadata: list + list of dictionaries containing the metadata gathered from the nwbfile """ - if not np.all(i in meta for i in ["path", "dandi_path", "external_file_objects"]): - lgr.warning(f'could not rename external files, update metadata' - f'with "path", "dandi_path", "external_file_objects"') - return - with NWBHDF5IO(meta["path"], mode='r', load_namespaces=True) as io: - nwb = io.read() - for ext_file_dict in meta['external_file_objects']: - # retrieve nwb neurodata object of the given object id: - container_list = [child for child in nwb.children() if ext_file_dict['id'] == child.object_id] - if len(container_list) == 0: - continue - else: - container = container_list[0] - # rename all external files: - for no, (name_old, name_new) in enumerate(zip(ext_file_dict['external_files'], - ext_file_dict['external_files_renamed'])): - container.external_file[no] = str(name_new) + for meta in metadata: + if not np.all(i in meta for i in ["path", "dandi_path", "external_file_objects"]): + lgr.warning(f'could not rename external files, update metadata' + f'with "path", "dandi_path", "external_file_objects"') + return + with NWBHDF5IO(meta["path"], mode='r', load_namespaces=True) as io: + nwb = io.read() + for ext_file_dict in meta['external_file_objects']: + # retrieve nwb neurodata object of the given object id: + container_list = [child for child in nwb.children() if ext_file_dict['id'] == child.object_id] + if len(container_list) == 0: + continue + else: + container = container_list[0] + # rename all external files: + for no, (name_old, name_new) in enumerate(zip(ext_file_dict['external_files'], + ext_file_dict['external_files_renamed'])): + container.external_file[no] = str(name_new) @validate_cache.memoize_path From fb572b84ceef9b1e75103795d5aa528a0c342584 Mon Sep 17 00:00:00 2001 From: Saksham Sharda Date: Tue, 30 Nov 2021 09:14:27 +0530 Subject: [PATCH 07/67] fix:open nwbfile in r+ --- dandi/pynwb_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dandi/pynwb_utils.py b/dandi/pynwb_utils.py index da9524a37..8d6798108 100644 --- a/dandi/pynwb_utils.py +++ b/dandi/pynwb_utils.py @@ -281,7 +281,7 @@ def rename_nwb_external_files(metadata: list): lgr.warning(f'could not rename external files, update metadata' f'with "path", "dandi_path", "external_file_objects"') return - with NWBHDF5IO(meta["path"], mode='r', load_namespaces=True) as io: + with NWBHDF5IO(meta["path"], mode='r+', load_namespaces=True) as io: nwb = io.read() for ext_file_dict in meta['external_file_objects']: # retrieve nwb neurodata object of the given object id: From d2eaf03bed9b1f0791e9df71893353c3f5e98ce2 Mon Sep 17 00:00:00 2001 From: Saksham Sharda Date: Tue, 30 Nov 2021 09:25:17 +0530 Subject: [PATCH 08/67] using os.path for handling os paths --- dandi/organize.py | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/dandi/organize.py b/dandi/organize.py index a97865e0e..a8523707d 100644 --- a/dandi/organize.py +++ b/dandi/organize.py @@ -196,18 +196,18 @@ def _create_external_file_names(metadata: dict): for meta in metadata: if "dandi_path" not in meta or "external_file_objects" not in meta: continue - nwb_folder_name = Path(meta["dandi_path"]).with_suffix("").name + nwb_folder_name = os.basename(meta["dandi_path"]).split('.')[0] for ext_file_dict in meta['external_file_objects']: renamed_path_list = [] uuid_str = ext_file_dict.get(id, str(uuid.uuid4())) for no, ext_file in enumerate(ext_file_dict['external_files']): - renamed = nwb_folder_name/f'{uuid_str}_external_file_{no}{ext_file.suffix}' + renamed = op.join(nwb_folder_name,f'{uuid_str}_external_file_{no}{ext_file.suffix}') renamed_path_list.append(str(renamed)) ext_file_dict['external_files_renamed'] = renamed_path_list return metadata -def organize_external_files(metadata: list, dandiset_path: Path, files_mode: str): +def organize_external_files(metadata: list, dandiset_path: str, files_mode: str): """ Organizes the external_files into the new Dandiset folder structure. @@ -215,7 +215,7 @@ def organize_external_files(metadata: list, dandiset_path: Path, files_mode: str ---------- metadata: list list of metadata dictionaries created during the call to pynwb_utils._get_pynwb_metadata - dandiset_path: Path + dandiset_path: str full path of the main dandiset folder. files_mode: str one of "symlink", "copy", "move", "hardlink" @@ -225,19 +225,18 @@ def organize_external_files(metadata: list, dandiset_path: Path, files_mode: str for ext_file_dict in e['external_file_objects']: for no, (name_old, name_new) in enumerate(zip(ext_file_dict['external_files'], ext_file_dict['external_files_renamed'])): - new_path = dandiset_path/e["dandi_path"].parent/name_new + new_path = op.join(dandiset_path, op.dirname(e["dandi_path"]), name_new) name_old_str = str(name_old) - if not op.exists(str(new_path.parent)): - os.makedirs(str(new_path.parent)) - new_path_str = str(new_path) + if not op.exists(op.dirname(new_path)): + os.makedirs(op.dirname(new_path)) if files_mode == "symlink": - os.symlink(name_old_str, new_path_str) + os.symlink(name_old_str, new_path) elif files_mode == "hardlink": - os.link(name_old_str, new_path_str) + os.link(name_old_str, new_path) elif files_mode == "copy": - copy_file(name_old_str, new_path_str) + copy_file(name_old_str, new_path) elif files_mode == "move": - move_file(name_old_str, new_path_str) + move_file(name_old_str, new_path) else: raise NotImplementedError(files_mode) From 8d14488ff1c3c46449ccb211bc350cc8f61ff5cc Mon Sep 17 00:00:00 2001 From: Saksham Sharda Date: Tue, 30 Nov 2021 09:25:49 +0530 Subject: [PATCH 09/67] add external-files-mode, assert files-mode to be copy/move when rewrite --- dandi/cli/cmd_organize.py | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/dandi/cli/cmd_organize.py b/dandi/cli/cmd_organize.py index 180f1b820..dd3029685 100644 --- a/dandi/cli/cmd_organize.py +++ b/dandi/cli/cmd_organize.py @@ -45,6 +45,13 @@ "is set to external-paths then the external_path attribute of nwbfiles'" "ImageSeries will be written with renamed video paths according to set rules." ) +@click.option( + "-ef", + "--external-files-mode", + type=click.Choice(file_operation_modes), + default=None, + help="check help for --files-mode option" +) @click.argument("paths", nargs=-1, type=click.Path(exists=True)) @devel_debug_option() @map_to_click_exceptions @@ -55,6 +62,7 @@ def organize( files_mode="auto", devel_debug=False, rewrite=None, + external_files_mode=None, ): """(Re)organize files according to the metadata. @@ -114,6 +122,10 @@ def act(func, *args, **kwargs): lgr.debug("%s %s %s", func.__name__, args, kwargs) return func(*args, **kwargs) + if rewrite is not None and files_mode not in ["copy", "move"]: + raise ValueError("files mode need to be one of 'copy/move', for external-file arg to be " + "changed in the nwbfile") + if dandiset_path is None: dandiset = Dandiset.find(os.curdir) if not dandiset: @@ -224,6 +236,11 @@ def _get_metadata(path): metadata = create_unique_filenames_from_metadata(metadata) + # update metadata with external_file information: + if rewrite == "external-paths": + metadata = _create_external_file_names(metadata) + rename_nwb_external_files(metadata) + # Verify first that the target paths do not exist yet, and fail if they do # Note: in "simulate" mode we do early check as well, so this would be # duplicate but shouldn't hurt @@ -329,9 +346,8 @@ def _get_metadata(path): # create video file name and re write nwb file external files: if rewrite == "external-paths": - metadata = _create_external_file_names(metadata) - rename_nwb_external_files(metadata) - organize_external_files(metadata, dandiset_path, files_mode) + files_mode_external = files_mode if external_files_mode is None else external_files_mode + organize_external_files(metadata, dandiset_path, files_mode_external) def msg_(msg, n, cond=None): if hasattr(n, "__len__"): From ab8c7a1fcc110340ba1e40fe71e7d978cbac8887 Mon Sep 17 00:00:00 2001 From: Saksham Sharda Date: Fri, 3 Dec 2021 12:22:52 +0530 Subject: [PATCH 10/67] rewrite option change to "external-file" --- dandi/cli/cmd_organize.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/dandi/cli/cmd_organize.py b/dandi/cli/cmd_organize.py index dd3029685..0750148a5 100644 --- a/dandi/cli/cmd_organize.py +++ b/dandi/cli/cmd_organize.py @@ -39,7 +39,7 @@ @click.option( "-rw", "--rewrite", - type=click.Choice(["external-paths"]), + type=click.Choice(["external-file"]), default=None, help="if set to re-write attributes in an nwbfile. For example, if the value " "is set to external-paths then the external_path attribute of nwbfiles'" @@ -123,8 +123,7 @@ def act(func, *args, **kwargs): return func(*args, **kwargs) if rewrite is not None and files_mode not in ["copy", "move"]: - raise ValueError("files mode need to be one of 'copy/move', for external-file arg to be " - "changed in the nwbfile") + raise ValueError("files_mode needs to be one of 'copy/move' for the rewrite option to work") if dandiset_path is None: dandiset = Dandiset.find(os.curdir) @@ -237,7 +236,7 @@ def _get_metadata(path): metadata = create_unique_filenames_from_metadata(metadata) # update metadata with external_file information: - if rewrite == "external-paths": + if rewrite == "external-file": metadata = _create_external_file_names(metadata) rename_nwb_external_files(metadata) @@ -345,7 +344,7 @@ def _get_metadata(path): lgr.debug("Failed to remove directory %s: %s", d, exc) # create video file name and re write nwb file external files: - if rewrite == "external-paths": + if rewrite == "external-file": files_mode_external = files_mode if external_files_mode is None else external_files_mode organize_external_files(metadata, dandiset_path, files_mode_external) From eccc1d339fb2c0dee1c6b1f96fae357c19a691db Mon Sep 17 00:00:00 2001 From: Saksham Sharda Date: Fri, 3 Dec 2021 12:23:04 +0530 Subject: [PATCH 11/67] using logger for warnings --- dandi/cli/cmd_organize.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/dandi/cli/cmd_organize.py b/dandi/cli/cmd_organize.py index 0750148a5..64b4ea14d 100644 --- a/dandi/cli/cmd_organize.py +++ b/dandi/cli/cmd_organize.py @@ -7,6 +7,10 @@ from ..consts import file_operation_modes from ..organize import _create_external_file_names, organize_external_files from ..pynwb_utils import rename_nwb_external_files +from .. import get_logger + + +lgr = get_logger() @click.command() @@ -346,6 +350,8 @@ def _get_metadata(path): # create video file name and re write nwb file external files: if rewrite == "external-file": files_mode_external = files_mode if external_files_mode is None else external_files_mode + lgr.warn('external-files-mode option not specified, assuming the same value' + f'set for files-mode : {files_mode}') organize_external_files(metadata, dandiset_path, files_mode_external) def msg_(msg, n, cond=None): From 71fbb0bbc1e2ceec31a2261cecf0e8812a16264c Mon Sep 17 00:00:00 2001 From: Saksham Sharda Date: Sat, 4 Dec 2021 17:59:12 +0530 Subject: [PATCH 12/67] assert external_files_mode as one of 'move/copy' --- dandi/cli/cmd_organize.py | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/dandi/cli/cmd_organize.py b/dandi/cli/cmd_organize.py index 64b4ea14d..dfac0067b 100644 --- a/dandi/cli/cmd_organize.py +++ b/dandi/cli/cmd_organize.py @@ -240,7 +240,20 @@ def _get_metadata(path): metadata = create_unique_filenames_from_metadata(metadata) # update metadata with external_file information: - if rewrite == "external-file": + if len(metadata["external_files"]) == 0 and rewrite == "external_file": + lgr.warning("rewrite option specified as 'external_file' but no external_files found" + "linked to the nwbfile") + elif len(metadata["external_files"]) > 0 and rewrite != "external_file": + raise ValueError("rewrite option not specified but found external video files linked to" + "the nwbfile, change option to 'external_file'") + + if rewrite == "external_file": + if external_files_mode is None: + external_files_mode = "move" + lgr.warning("external_files_mode not specified, setting to recommended mode: 'move' ") + elif external_files_mode not in ["move", "copy"]: + raise ValueError("external_files mode should be either of 'move/copy' to " + "overwrite the external_file in the nwbfile") metadata = _create_external_file_names(metadata) rename_nwb_external_files(metadata) @@ -349,10 +362,7 @@ def _get_metadata(path): # create video file name and re write nwb file external files: if rewrite == "external-file": - files_mode_external = files_mode if external_files_mode is None else external_files_mode - lgr.warn('external-files-mode option not specified, assuming the same value' - f'set for files-mode : {files_mode}') - organize_external_files(metadata, dandiset_path, files_mode_external) + organize_external_files(metadata, dandiset_path, external_files_mode) def msg_(msg, n, cond=None): if hasattr(n, "__len__"): From 2dd53b8c19f3378eac435876ceefac8f541bee74 Mon Sep 17 00:00:00 2001 From: Saksham Sharda Date: Mon, 6 Dec 2021 17:07:22 +0530 Subject: [PATCH 13/67] testing local files pass --- dandi/cli/cmd_organize.py | 15 +++++++++------ dandi/organize.py | 2 +- dandi/pynwb_utils.py | 7 ++++--- 3 files changed, 14 insertions(+), 10 deletions(-) diff --git a/dandi/cli/cmd_organize.py b/dandi/cli/cmd_organize.py index dfac0067b..8a3a9ee65 100644 --- a/dandi/cli/cmd_organize.py +++ b/dandi/cli/cmd_organize.py @@ -240,14 +240,17 @@ def _get_metadata(path): metadata = create_unique_filenames_from_metadata(metadata) # update metadata with external_file information: - if len(metadata["external_files"]) == 0 and rewrite == "external_file": + external_files_missing_metadata_bool = [len(m["external_file_objects"]) == 0 for m in metadata] + if all(external_files_missing_metadata_bool) and rewrite == "external-file": lgr.warning("rewrite option specified as 'external_file' but no external_files found" - "linked to the nwbfile") - elif len(metadata["external_files"]) > 0 and rewrite != "external_file": + f"linked to any nwbfile found in {paths}") + + elif not all(external_files_missing_metadata_bool) and rewrite != "external-file": raise ValueError("rewrite option not specified but found external video files linked to" - "the nwbfile, change option to 'external_file'") + f"the nwbfiles {[metadata[no]['path'] for no, a in enumerate(external_files_missing_metadata_bool) if not a]}, " + f"change option to 'external_file'") - if rewrite == "external_file": + if rewrite == "external-file": if external_files_mode is None: external_files_mode = "move" lgr.warning("external_files_mode not specified, setting to recommended mode: 'move' ") @@ -255,7 +258,6 @@ def _get_metadata(path): raise ValueError("external_files mode should be either of 'move/copy' to " "overwrite the external_file in the nwbfile") metadata = _create_external_file_names(metadata) - rename_nwb_external_files(metadata) # Verify first that the target paths do not exist yet, and fail if they do # Note: in "simulate" mode we do early check as well, so this would be @@ -362,6 +364,7 @@ def _get_metadata(path): # create video file name and re write nwb file external files: if rewrite == "external-file": + rename_nwb_external_files(metadata, dandiset_path) organize_external_files(metadata, dandiset_path, external_files_mode) def msg_(msg, n, cond=None): diff --git a/dandi/organize.py b/dandi/organize.py index a8523707d..d360e6e4b 100644 --- a/dandi/organize.py +++ b/dandi/organize.py @@ -196,7 +196,7 @@ def _create_external_file_names(metadata: dict): for meta in metadata: if "dandi_path" not in meta or "external_file_objects" not in meta: continue - nwb_folder_name = os.basename(meta["dandi_path"]).split('.')[0] + nwb_folder_name = op.basename(meta["dandi_path"]).split('.')[0] for ext_file_dict in meta['external_file_objects']: renamed_path_list = [] uuid_str = ext_file_dict.get(id, str(uuid.uuid4())) diff --git a/dandi/pynwb_utils.py b/dandi/pynwb_utils.py index 8d6798108..35895abf1 100644 --- a/dandi/pynwb_utils.py +++ b/dandi/pynwb_utils.py @@ -265,7 +265,7 @@ def _get_image_series(nwb: pynwb.NWBFile): return out -def rename_nwb_external_files(metadata: list): +def rename_nwb_external_files(metadata: list, dandiset_path: str): """ This method, renames the external_file attribute in an ImageSeries datatype in an open nwb file. It pulls information about the ImageSEries objects from metadata: metadata["external_file_objects"] @@ -281,11 +281,12 @@ def rename_nwb_external_files(metadata: list): lgr.warning(f'could not rename external files, update metadata' f'with "path", "dandi_path", "external_file_objects"') return - with NWBHDF5IO(meta["path"], mode='r+', load_namespaces=True) as io: + dandiset_nwbfile_path = op.join(dandiset_path, meta["dandi_path"]) + with NWBHDF5IO(dandiset_nwbfile_path, mode='r+', load_namespaces=True) as io: nwb = io.read() for ext_file_dict in meta['external_file_objects']: # retrieve nwb neurodata object of the given object id: - container_list = [child for child in nwb.children() if ext_file_dict['id'] == child.object_id] + container_list = [child for child in nwb.children if ext_file_dict['id'] == child.object_id] if len(container_list) == 0: continue else: From 4ae10bb4472ef0177fb82e4e13b8e60c56e3b5a8 Mon Sep 17 00:00:00 2001 From: Saksham Sharda Date: Mon, 6 Dec 2021 17:11:36 +0530 Subject: [PATCH 14/67] error string fix --- dandi/cli/cmd_organize.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dandi/cli/cmd_organize.py b/dandi/cli/cmd_organize.py index 8a3a9ee65..99551b0dd 100644 --- a/dandi/cli/cmd_organize.py +++ b/dandi/cli/cmd_organize.py @@ -246,9 +246,9 @@ def _get_metadata(path): f"linked to any nwbfile found in {paths}") elif not all(external_files_missing_metadata_bool) and rewrite != "external-file": - raise ValueError("rewrite option not specified but found external video files linked to" + raise ValueError("rewrite option not specified but found external video files linked to " f"the nwbfiles {[metadata[no]['path'] for no, a in enumerate(external_files_missing_metadata_bool) if not a]}, " - f"change option to 'external_file'") + f"change option: -rewrite 'external_file'") if rewrite == "external-file": if external_files_mode is None: From d0c754fa0ab162bfaeb9e37a4b15e3f14f43d595 Mon Sep 17 00:00:00 2001 From: Saksham Sharda Date: Sun, 12 Dec 2021 22:35:58 +0530 Subject: [PATCH 15/67] add tests --- dandi/consts.py | 2 +- dandi/tests/fixtures.py | 47 ++++++++++++++++++++++++++++++++++++ dandi/tests/test_organize.py | 10 ++++++++ 3 files changed, 58 insertions(+), 1 deletion(-) diff --git a/dandi/consts.py b/dandi/consts.py index 1423fe7bb..c4d0dfc89 100644 --- a/dandi/consts.py +++ b/dandi/consts.py @@ -131,5 +131,5 @@ #: of retries) RETRY_STATUSES = (500, 502, 503, 504) -external_file_extensions = [".mp4", ".avi", ".wmv", ".mov"] +external_file_extensions = [".mp4", ".avi", ".wmv", ".mov", ".flv"] external_file_modules = ["processing", "acquisition"] diff --git a/dandi/tests/fixtures.py b/dandi/tests/fixtures.py index d667b750b..3d7cf6703 100644 --- a/dandi/tests/fixtures.py +++ b/dandi/tests/fixtures.py @@ -8,10 +8,18 @@ import tempfile from time import sleep from uuid import uuid4 +from datalad.api import install, Dataset from dandischema.consts import DANDI_SCHEMA_VERSION from dateutil.tz import tzutc import pynwb +import pynwb.image +from pynwb import NWBHDF5IO, NWBFile +from pynwb.device import Device +from pynwb.file import Subject +from pynwb.ophys import ImageSeries + +from dateutil.tz import tzlocal import pytest import requests @@ -312,3 +320,42 @@ def upload_dandiset(paths=None, **kwargs): "dandiset_id": dandiset_id, "reupload": upload_dandiset, } + + +@pytest.fixture +def create_video_nwbfiles(tmp_path): + base_nwb_path = tmp_path/'nwbfiles' + if not base_nwb_path.exists(): + pt = Path.cwd()/"ophys_testing_data" + print(pt) + if pt.exists(): + dataset = Dataset(pt) + else: + dataset = install("https://gin.g-node.org/CatalystNeuro/ophys_testing_data") + resp = dataset.get("video_files") + base_vid_path = Path(resp[0]["path"]) + + for no, vid_path in enumerate(base_vid_path.iterdir()): + subject_id = f'mouse{no}' + session_id = f'sessionid{no}' + subject = Subject(subject_id=subject_id, species="Mus musculus", sex="M", + description='lab mouse ') + device = Device(f'imaging_device_{no}') + name = vid_path.name + nwbfile = NWBFile(f'{name}{no}', 'desc: contains movie for dandi .mp4 storage as external', + datetime.now(tzlocal()), + experimenter='Experimenter name', + session_id=session_id, + subject=subject, + devices=[device]) + + image_series = ImageSeries(name=f'MouseWhiskers{no}', + format='external', + external_file=[str(vid_path)], + starting_frame=[0], starting_time=0.0, rate=150.0) + nwbfile.add_acquisition(image_series) + + nwbfile_path = base_nwb_path/f'{name}_{no}.nwb' + with NWBHDF5IO(str(nwbfile_path), 'w') as io: + io.write(nwbfile) + return base_nwb_path \ No newline at end of file diff --git a/dandi/tests/test_organize.py b/dandi/tests/test_organize.py index d64f0ed34..85cbeb8a6 100644 --- a/dandi/tests/test_organize.py +++ b/dandi/tests/test_organize.py @@ -248,3 +248,13 @@ def error_link(src, dest): monkeypatch.setattr(os, "symlink", succeed_link if sym_success else error_link) monkeypatch.setattr(os, "link", succeed_link if hard_success else error_link) assert detect_link_type(tmp_path) == result + + +@pytest.mark.parametrize("mode", ["copy","move"]) +@pytest.mark.parametrize("video_mode", ["copy","move",None]) +def test_video_organize(video_mode, mode, create_video_nwbfiles, tmp_path, clirunner): + pt = tmp_path / "video_nwbfiles" + pt.mkdir(parents=True, exist_ok=True) + cmd = ["--files-mode", mode, "-rw", "external-file", "-ef", video_mode] + r = clirunner.invoke(organize, cmd) + assert r.exit_code == 0 From e23dca8fd3ef921a224be3fb0239ec9fb2e638ff Mon Sep 17 00:00:00 2001 From: Saksham Sharda Date: Mon, 13 Dec 2021 12:59:06 +0530 Subject: [PATCH 16/67] video test cli fixes --- dandi/tests/fixtures.py | 79 +++++++++++++++++++----------------- dandi/tests/test_organize.py | 9 ++-- 2 files changed, 46 insertions(+), 42 deletions(-) diff --git a/dandi/tests/fixtures.py b/dandi/tests/fixtures.py index 3d7cf6703..d3a1835d3 100644 --- a/dandi/tests/fixtures.py +++ b/dandi/tests/fixtures.py @@ -8,7 +8,7 @@ import tempfile from time import sleep from uuid import uuid4 -from datalad.api import install, Dataset + from dandischema.consts import DANDI_SCHEMA_VERSION from dateutil.tz import tzutc @@ -322,40 +322,43 @@ def upload_dandiset(paths=None, **kwargs): } -@pytest.fixture -def create_video_nwbfiles(tmp_path): - base_nwb_path = tmp_path/'nwbfiles' - if not base_nwb_path.exists(): - pt = Path.cwd()/"ophys_testing_data" - print(pt) - if pt.exists(): - dataset = Dataset(pt) - else: - dataset = install("https://gin.g-node.org/CatalystNeuro/ophys_testing_data") - resp = dataset.get("video_files") - base_vid_path = Path(resp[0]["path"]) - - for no, vid_path in enumerate(base_vid_path.iterdir()): - subject_id = f'mouse{no}' - session_id = f'sessionid{no}' - subject = Subject(subject_id=subject_id, species="Mus musculus", sex="M", - description='lab mouse ') - device = Device(f'imaging_device_{no}') - name = vid_path.name - nwbfile = NWBFile(f'{name}{no}', 'desc: contains movie for dandi .mp4 storage as external', - datetime.now(tzlocal()), - experimenter='Experimenter name', - session_id=session_id, - subject=subject, - devices=[device]) - - image_series = ImageSeries(name=f'MouseWhiskers{no}', - format='external', - external_file=[str(vid_path)], - starting_frame=[0], starting_time=0.0, rate=150.0) - nwbfile.add_acquisition(image_series) - - nwbfile_path = base_nwb_path/f'{name}_{no}.nwb' - with NWBHDF5IO(str(nwbfile_path), 'w') as io: - io.write(nwbfile) - return base_nwb_path \ No newline at end of file +@pytest.fixture(scope="session") +def create_video_nwbfiles(): + from datalad.api import install, Dataset + base_nwb_path = Path(tempfile.mktemp())/'nwbfiles' + base_nwb_path.mkdir(parents=True, exist_ok=True) + pt = Path.cwd()/"ophys_testing_data" + print(pt) + if pt.exists(): + dataset = Dataset(pt) + else: + dataset = install("https://gin.g-node.org/CatalystNeuro/ophys_testing_data") + resp = dataset.get("video_files") + base_vid_path = Path(resp[0]["path"]) + video_files_list = [i for i in base_vid_path.iterdir()] + for no in range(0,len(video_files_list),2): + vid_1 = video_files_list[no] + vid_2 = video_files_list[no+1] + subject_id = f'mouse{no}' + session_id = f'sessionid{no}' + subject = Subject(subject_id=subject_id, species="Mus musculus", sex="M", + description='lab mouse ') + device = Device(f'imaging_device_{no}') + name = f'{vid_1.stem}_{no}' + nwbfile = NWBFile(f'{name}{no}', 'desc: contains movie for dandi .mp4 storage as external', + datetime.now(tzlocal()), + experimenter='Experimenter name', + session_id=session_id, + subject=subject, + devices=[device]) + + image_series = ImageSeries(name=f'MouseWhiskers{no}', + format='external', + external_file=[str(vid_1), str(vid_2)], + starting_frame=[0], starting_time=0.0, rate=150.0) + nwbfile.add_acquisition(image_series) + + nwbfile_path = base_nwb_path/f'{name}.nwb' + with NWBHDF5IO(str(nwbfile_path), 'w') as io: + io.write(nwbfile) + return str(base_nwb_path) \ No newline at end of file diff --git a/dandi/tests/test_organize.py b/dandi/tests/test_organize.py index 85cbeb8a6..5953ee364 100644 --- a/dandi/tests/test_organize.py +++ b/dandi/tests/test_organize.py @@ -252,9 +252,10 @@ def error_link(src, dest): @pytest.mark.parametrize("mode", ["copy","move"]) @pytest.mark.parametrize("video_mode", ["copy","move",None]) -def test_video_organize(video_mode, mode, create_video_nwbfiles, tmp_path, clirunner): - pt = tmp_path / "video_nwbfiles" - pt.mkdir(parents=True, exist_ok=True) - cmd = ["--files-mode", mode, "-rw", "external-file", "-ef", video_mode] +def test_video_organize(video_mode, mode, create_video_nwbfiles, clirunner, tmp_path): + dandi_organize_path = str(tmp_path / "organized") + cmd = [create_video_nwbfiles, + "--files-mode", mode, "-rw", "external-file", "-ef", video_mode, + "-d", dandi_organize_path] r = clirunner.invoke(organize, cmd) assert r.exit_code == 0 From a3ca6909cb60383093723b9235df94317f08b5d7 Mon Sep 17 00:00:00 2001 From: Saksham Sharda Date: Mon, 13 Dec 2021 17:36:36 +0530 Subject: [PATCH 17/67] add validation for specific video files on upload --- dandi/validate.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/dandi/validate.py b/dandi/validate.py index 16d057442..056c0259e 100644 --- a/dandi/validate.py +++ b/dandi/validate.py @@ -1,7 +1,8 @@ +import os.path import os.path as op from . import get_logger -from .consts import dandiset_metadata_file +from .consts import dandiset_metadata_file, external_file_extensions from .metadata import get_metadata from .pynwb_utils import validate as pynwb_validate from .pynwb_utils import validate_cache @@ -41,6 +42,8 @@ def validate_file(filepath, schema_version=None, devel_debug=False): return validate_dandiset_yaml( filepath, schema_version=None, devel_debug=devel_debug ) + elif os.path.splitext(filepath)[-1] in external_file_extensions: + return [] else: return pynwb_validate(filepath, devel_debug=devel_debug) + validate_asset_file( filepath, schema_version=schema_version, devel_debug=devel_debug From 4c854e56815dd216dd155338a6175fd416bd4143 Mon Sep 17 00:00:00 2001 From: Saksham Sharda Date: Mon, 13 Dec 2021 18:35:42 +0530 Subject: [PATCH 18/67] open nwbfiles and test --- dandi/tests/fixtures.py | 24 ++++++++++++++++-------- dandi/tests/test_organize.py | 32 +++++++++++++++++++++++++++----- 2 files changed, 43 insertions(+), 13 deletions(-) diff --git a/dandi/tests/fixtures.py b/dandi/tests/fixtures.py index d3a1835d3..3e45bcb9c 100644 --- a/dandi/tests/fixtures.py +++ b/dandi/tests/fixtures.py @@ -323,18 +323,26 @@ def upload_dandiset(paths=None, **kwargs): @pytest.fixture(scope="session") -def create_video_nwbfiles(): - from datalad.api import install, Dataset - base_nwb_path = Path(tempfile.mktemp())/'nwbfiles' - base_nwb_path.mkdir(parents=True, exist_ok=True) - pt = Path.cwd()/"ophys_testing_data" - print(pt) +def download_video_files(): + from datalad.api import Dataset, install + pt = Path(tempfile.mkdtemp())/"ophys_testing_data" if pt.exists(): dataset = Dataset(pt) else: dataset = install("https://gin.g-node.org/CatalystNeuro/ophys_testing_data") resp = dataset.get("video_files") - base_vid_path = Path(resp[0]["path"]) + return Path(resp[0]["path"]) + + +@pytest.fixture() +def create_video_nwbfiles(download_video_files): + basedir = Path(tempfile.mkdtemp()) + base_nwb_path = basedir/'nwbfiles' + base_nwb_path.mkdir(parents=True, exist_ok=True) + base_vid_path = basedir/'video_files' + base_vid_path.mkdir(parents=True, exist_ok=True) + shutil.copytree(download_video_files,str(base_vid_path)) + video_files_list = [i for i in base_vid_path.iterdir()] for no in range(0,len(video_files_list),2): vid_1 = video_files_list[no] @@ -361,4 +369,4 @@ def create_video_nwbfiles(): nwbfile_path = base_nwb_path/f'{name}.nwb' with NWBHDF5IO(str(nwbfile_path), 'w') as io: io.write(nwbfile) - return str(base_nwb_path) \ No newline at end of file + return base_nwb_path \ No newline at end of file diff --git a/dandi/tests/test_organize.py b/dandi/tests/test_organize.py index 5953ee364..9ca336124 100644 --- a/dandi/tests/test_organize.py +++ b/dandi/tests/test_organize.py @@ -1,3 +1,4 @@ +import shutil from glob import glob import os import os.path as op @@ -16,8 +17,9 @@ get_obj_id, populate_dataset_yml, ) -from ..pynwb_utils import copy_nwb_file, get_object_id +from ..pynwb_utils import copy_nwb_file, get_object_id, _get_image_series from ..utils import find_files, on_windows, yaml_load +from pynwb import NWBHDF5IO def test_sanitize_value(): @@ -253,9 +255,29 @@ def error_link(src, dest): @pytest.mark.parametrize("mode", ["copy","move"]) @pytest.mark.parametrize("video_mode", ["copy","move",None]) def test_video_organize(video_mode, mode, create_video_nwbfiles, clirunner, tmp_path): - dandi_organize_path = str(tmp_path / "organized") - cmd = [create_video_nwbfiles, - "--files-mode", mode, "-rw", "external-file", "-ef", video_mode, - "-d", dandi_organize_path] + dandi_organize_path = create_video_nwbfiles.parent/'dandi_organized' + dandi_organize_path.mkdir(parents=True, exist_ok=True) + cmd = ["--files-mode", mode, "-rw", "external-file", "-ef", video_mode, + "-d", str(dandi_organize_path), str(create_video_nwbfiles)] + video_files_list = [i for i in (create_video_nwbfiles.parent/'video_files').iterdir()] + video_files_organized = [] r = clirunner.invoke(organize, cmd) assert r.exit_code == 0 + for nwbfile_name in dandi_organize_path.glob("**/*.nwb"): + vid_folder = nwbfile_name.with_suffix("") + assert vid_folder.exists() + with NWBHDF5IO(str(nwbfile_name),'r',load_namespaces=True) as io: + nwbfile = io.read() + # get iamgeseries objects as dict(id=object_id, external_files=[]) + ext_file_objects = _get_image_series(nwbfile) + for ext_file_ob in ext_file_objects: + filename = vid_folder.name+f'/{ext_file_ob["id"]}_external_file_' + for name in ext_file_ob["external_files"]: + video_files_organized.append(name) + # check if external_file arguments are correctly named according to convention: + assert filename in str(name) + # check if the files exist( both in case of move/copy): + assert (vid_folder.parent/name).exists() + #check all video files are organized: + assert len(video_files_list)==len(video_files_organized) + From a6f424285f32d80278dea603eabfb648fc708b65 Mon Sep 17 00:00:00 2001 From: Saksham Sharda Date: Mon, 13 Dec 2021 18:45:31 +0530 Subject: [PATCH 19/67] imageseries uuid copy bug fix --- dandi/organize.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dandi/organize.py b/dandi/organize.py index d360e6e4b..c07f27c67 100644 --- a/dandi/organize.py +++ b/dandi/organize.py @@ -199,7 +199,7 @@ def _create_external_file_names(metadata: dict): nwb_folder_name = op.basename(meta["dandi_path"]).split('.')[0] for ext_file_dict in meta['external_file_objects']: renamed_path_list = [] - uuid_str = ext_file_dict.get(id, str(uuid.uuid4())) + uuid_str = ext_file_dict.get("id", str(uuid.uuid4())) for no, ext_file in enumerate(ext_file_dict['external_files']): renamed = op.join(nwb_folder_name,f'{uuid_str}_external_file_{no}{ext_file.suffix}') renamed_path_list.append(str(renamed)) From 414544532b20fb8b7bf279aaf82ca149028b8a5b Mon Sep 17 00:00:00 2001 From: Saksham Sharda Date: Mon, 13 Dec 2021 19:00:54 +0530 Subject: [PATCH 20/67] update setup to include datalad --- setup.cfg | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.cfg b/setup.cfg index 3c5e7ca03..54adea2f3 100644 --- a/setup.cfg +++ b/setup.cfg @@ -55,6 +55,7 @@ install_requires = semantic-version tenacity tqdm + datalad zip_safe = False packages = find: include_package_data = True From 6ec8e929f0eb6af5e0e3b2a79433923aac0be6f1 Mon Sep 17 00:00:00 2001 From: Saksham Sharda Date: Mon, 13 Dec 2021 22:11:31 +0530 Subject: [PATCH 21/67] datalad as test dependency --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index 54adea2f3..497cfcb14 100644 --- a/setup.cfg +++ b/setup.cfg @@ -55,7 +55,6 @@ install_requires = semantic-version tenacity tqdm - datalad zip_safe = False packages = find: include_package_data = True @@ -83,6 +82,7 @@ test = pytest-cov pytest-mock responses + datalad tools= boto3 all = From 80a0616595304364e08ba8e2d5e8ed57ef8792bb Mon Sep 17 00:00:00 2001 From: Saksham Sharda Date: Mon, 13 Dec 2021 22:12:39 +0530 Subject: [PATCH 22/67] keep only double dash for video options --- dandi/cli/cmd_organize.py | 6 ++---- dandi/tests/test_organize.py | 3 ++- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/dandi/cli/cmd_organize.py b/dandi/cli/cmd_organize.py index 99551b0dd..781a534e0 100644 --- a/dandi/cli/cmd_organize.py +++ b/dandi/cli/cmd_organize.py @@ -41,16 +41,14 @@ show_default=True, ) @click.option( - "-rw", "--rewrite", type=click.Choice(["external-file"]), default=None, - help="if set to re-write attributes in an nwbfile. For example, if the value " - "is set to external-paths then the external_path attribute of nwbfiles'" + help="Use to re-write attributes in an nwbfile. For example, if the value " + "is set to 'external-paths' then the external_path attribute of nwbfiles' " "ImageSeries will be written with renamed video paths according to set rules." ) @click.option( - "-ef", "--external-files-mode", type=click.Choice(file_operation_modes), default=None, diff --git a/dandi/tests/test_organize.py b/dandi/tests/test_organize.py index 9ca336124..565217ef4 100644 --- a/dandi/tests/test_organize.py +++ b/dandi/tests/test_organize.py @@ -257,7 +257,8 @@ def error_link(src, dest): def test_video_organize(video_mode, mode, create_video_nwbfiles, clirunner, tmp_path): dandi_organize_path = create_video_nwbfiles.parent/'dandi_organized' dandi_organize_path.mkdir(parents=True, exist_ok=True) - cmd = ["--files-mode", mode, "-rw", "external-file", "-ef", video_mode, + cmd = ["--files-mode", mode, "--rewrite", "external-file", + "--external-files-mode", video_mode, "-d", str(dandi_organize_path), str(create_video_nwbfiles)] video_files_list = [i for i in (create_video_nwbfiles.parent/'video_files').iterdir()] video_files_organized = [] From dd2b787f1d9c2e059b74f6e40f11658aef227e4a Mon Sep 17 00:00:00 2001 From: Saksham Sharda Date: Mon, 13 Dec 2021 22:13:48 +0530 Subject: [PATCH 23/67] logger use %s formatting --- dandi/cli/cmd_organize.py | 6 +++--- dandi/pynwb_utils.py | 3 ++- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/dandi/cli/cmd_organize.py b/dandi/cli/cmd_organize.py index 781a534e0..3ec1d913b 100644 --- a/dandi/cli/cmd_organize.py +++ b/dandi/cli/cmd_organize.py @@ -163,7 +163,7 @@ def act(func, *args, **kwargs): "Only 'dry' or 'move' mode could be used to operate in-place " "within a dandiset (no paths were provided)" ) - lgr.info(f"We will organize {dandiset_path} in-place") + lgr.info(f"We will organize %s in-place", dandiset_path) in_place = True paths = dandiset_path @@ -241,7 +241,7 @@ def _get_metadata(path): external_files_missing_metadata_bool = [len(m["external_file_objects"]) == 0 for m in metadata] if all(external_files_missing_metadata_bool) and rewrite == "external-file": lgr.warning("rewrite option specified as 'external_file' but no external_files found" - f"linked to any nwbfile found in {paths}") + f"linked to any nwbfile found in %s", paths) elif not all(external_files_missing_metadata_bool) and rewrite != "external-file": raise ValueError("rewrite option not specified but found external video files linked to " @@ -356,7 +356,7 @@ def _get_metadata(path): if op.exists(d): try: os.rmdir(d) - lgr.info(f"Removed empty directory {d}") + lgr.info("Removed empty directory %s", d) except Exception as exc: lgr.debug("Failed to remove directory %s: %s", d, exc) diff --git a/dandi/pynwb_utils.py b/dandi/pynwb_utils.py index 35895abf1..260cda1b5 100644 --- a/dandi/pynwb_utils.py +++ b/dandi/pynwb_utils.py @@ -260,7 +260,8 @@ def _get_image_series(nwb: pynwb.NWBFile): if Path(ext_file).suffix in external_file_extensions: out_dict['external_files'].append(Path(ext_file)) else: - lgr.warning(f'external file {ext_file} should be one of: {external_file_extensions}') + lgr.warning("external file %s should be one of: %s}", + ext_file, external_file_extensions) out.append(out_dict) return out From 5ae93ef5c7fe0faad3426090d8c2e0f4a59e46c1 Mon Sep 17 00:00:00 2001 From: Saksham Sharda Date: Mon, 13 Dec 2021 22:14:12 +0530 Subject: [PATCH 24/67] typehints, docstrings update --- dandi/pynwb_utils.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/dandi/pynwb_utils.py b/dandi/pynwb_utils.py index 260cda1b5..3902abfda 100644 --- a/dandi/pynwb_utils.py +++ b/dandi/pynwb_utils.py @@ -246,7 +246,7 @@ def _get_image_series(nwb: pynwb.NWBFile): nwb: pynwb.NWBFile Returns ------- - out: list + out: List[dict] list of dicts : [{id: , name: , external_files=[ImageSeries.external_file]}] if no ImageSeries found in the given modules to check, then it returns an empty list. """ @@ -266,7 +266,7 @@ def _get_image_series(nwb: pynwb.NWBFile): return out -def rename_nwb_external_files(metadata: list, dandiset_path: str): +def rename_nwb_external_files(metadata: list, dandiset_path: str) -> None: """ This method, renames the external_file attribute in an ImageSeries datatype in an open nwb file. It pulls information about the ImageSEries objects from metadata: metadata["external_file_objects"] @@ -274,8 +274,10 @@ def rename_nwb_external_files(metadata: list, dandiset_path: str): Parameters ---------- - metadata: list + metadata: List[dict] list of dictionaries containing the metadata gathered from the nwbfile + dandiset_path: str + base path of dandiset """ for meta in metadata: if not np.all(i in meta for i in ["path", "dandi_path", "external_file_objects"]): From 3f3f0952dfc8199f5758f0b2f56d649c00b5ae74 Mon Sep 17 00:00:00 2001 From: Saksham Sharda Date: Mon, 13 Dec 2021 22:14:28 +0530 Subject: [PATCH 25/67] using tmp_path fixtures --- dandi/tests/fixtures.py | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/dandi/tests/fixtures.py b/dandi/tests/fixtures.py index 3e45bcb9c..001798714 100644 --- a/dandi/tests/fixtures.py +++ b/dandi/tests/fixtures.py @@ -323,23 +323,19 @@ def upload_dandiset(paths=None, **kwargs): @pytest.fixture(scope="session") -def download_video_files(): - from datalad.api import Dataset, install - pt = Path(tempfile.mkdtemp())/"ophys_testing_data" - if pt.exists(): - dataset = Dataset(pt) - else: - dataset = install("https://gin.g-node.org/CatalystNeuro/ophys_testing_data") +def download_video_files(tmp_path_factory): + from datalad.api import install + pt = tmp_path_factory.mktemp('ophys_testing_data') + dataset = install(path=pt, source="https://gin.g-node.org/CatalystNeuro/ophys_testing_data") resp = dataset.get("video_files") return Path(resp[0]["path"]) @pytest.fixture() -def create_video_nwbfiles(download_video_files): - basedir = Path(tempfile.mkdtemp()) - base_nwb_path = basedir/'nwbfiles' +def create_video_nwbfiles(download_video_files, tmp_path): + base_nwb_path = tmp_path/'nwbfiles' base_nwb_path.mkdir(parents=True, exist_ok=True) - base_vid_path = basedir/'video_files' + base_vid_path = tmp_path/'video_files' base_vid_path.mkdir(parents=True, exist_ok=True) shutil.copytree(download_video_files,str(base_vid_path)) From 3e14aa9315777f7efdf6b17aa7a3c7313b8ac399 Mon Sep 17 00:00:00 2001 From: Saksham Sharda Date: Tue, 14 Dec 2021 23:28:21 +0530 Subject: [PATCH 26/67] filter future warning from datalad --- tox.ini | 1 + 1 file changed, 1 insertion(+) diff --git a/tox.ini b/tox.ini index 5393d761d..cf4c893d3 100644 --- a/tox.ini +++ b/tox.ini @@ -49,6 +49,7 @@ filterwarnings = ignore:\s*safe_load will be removed.*:PendingDeprecationWarning:hdmf ignore:\s*load will be removed.*:PendingDeprecationWarning:ruamel.yaml ignore:Passing None into shape arguments.*:DeprecationWarning:h5py + ignore:the imp module is deprecated:DeprecationWarning [coverage:run] parallel = True From 69408b2d17d3203033ab21a910900cabd5a94bc6 Mon Sep 17 00:00:00 2001 From: Saksham Sharda Date: Wed, 15 Dec 2021 16:45:58 +0530 Subject: [PATCH 27/67] run pre-commit --- dandi/cli/cmd_organize.py | 48 ++++++++++++++--------- dandi/organize.py | 29 ++++++++------ dandi/pynwb_utils.py | 55 +++++++++++++++++--------- dandi/tests/fixtures.py | 75 +++++++++++++++++++++--------------- dandi/tests/test_organize.py | 41 ++++++++++++-------- 5 files changed, 153 insertions(+), 95 deletions(-) diff --git a/dandi/cli/cmd_organize.py b/dandi/cli/cmd_organize.py index 3ec1d913b..6e540b59c 100644 --- a/dandi/cli/cmd_organize.py +++ b/dandi/cli/cmd_organize.py @@ -7,10 +7,6 @@ from ..consts import file_operation_modes from ..organize import _create_external_file_names, organize_external_files from ..pynwb_utils import rename_nwb_external_files -from .. import get_logger - - -lgr = get_logger() @click.command() @@ -45,14 +41,14 @@ type=click.Choice(["external-file"]), default=None, help="Use to re-write attributes in an nwbfile. For example, if the value " - "is set to 'external-paths' then the external_path attribute of nwbfiles' " - "ImageSeries will be written with renamed video paths according to set rules." + "is set to 'external-paths' then the external_path attribute of nwbfiles' " + "ImageSeries will be written with renamed video paths according to set rules.", ) @click.option( "--external-files-mode", type=click.Choice(file_operation_modes), default=None, - help="check help for --files-mode option" + help="check help for --files-mode option", ) @click.argument("paths", nargs=-1, type=click.Path(exists=True)) @devel_debug_option() @@ -125,7 +121,9 @@ def act(func, *args, **kwargs): return func(*args, **kwargs) if rewrite is not None and files_mode not in ["copy", "move"]: - raise ValueError("files_mode needs to be one of 'copy/move' for the rewrite option to work") + raise ValueError( + "files_mode needs to be one of 'copy/move' for the rewrite option to work" + ) if dandiset_path is None: dandiset = Dandiset.find(os.curdir) @@ -163,7 +161,7 @@ def act(func, *args, **kwargs): "Only 'dry' or 'move' mode could be used to operate in-place " "within a dandiset (no paths were provided)" ) - lgr.info(f"We will organize %s in-place", dandiset_path) + lgr.info("We will organize %s in-place", dandiset_path) in_place = True paths = dandiset_path @@ -238,23 +236,37 @@ def _get_metadata(path): metadata = create_unique_filenames_from_metadata(metadata) # update metadata with external_file information: - external_files_missing_metadata_bool = [len(m["external_file_objects"]) == 0 for m in metadata] + external_files_missing_metadata_bool = [ + len(m["external_file_objects"]) == 0 for m in metadata + ] if all(external_files_missing_metadata_bool) and rewrite == "external-file": - lgr.warning("rewrite option specified as 'external_file' but no external_files found" - f"linked to any nwbfile found in %s", paths) + lgr.warning( + "rewrite option specified as 'external_file' but no external_files found" + "linked to any nwbfile found in %s", + paths, + ) elif not all(external_files_missing_metadata_bool) and rewrite != "external-file": - raise ValueError("rewrite option not specified but found external video files linked to " - f"the nwbfiles {[metadata[no]['path'] for no, a in enumerate(external_files_missing_metadata_bool) if not a]}, " - f"change option: -rewrite 'external_file'") + raise ValueError( + "rewrite option not specified but found external video files linked to " + f"the nwbfiles " + f"""{[metadata[no]['path'] + for no, a in enumerate(external_files_missing_metadata_bool) + if not a]}, """ + f"change option: -rewrite 'external_file'" + ) if rewrite == "external-file": if external_files_mode is None: external_files_mode = "move" - lgr.warning("external_files_mode not specified, setting to recommended mode: 'move' ") + lgr.warning( + "external_files_mode not specified, setting to recommended mode: 'move' " + ) elif external_files_mode not in ["move", "copy"]: - raise ValueError("external_files mode should be either of 'move/copy' to " - "overwrite the external_file in the nwbfile") + raise ValueError( + "external_files mode should be either of 'move/copy' to " + "overwrite the external_file in the nwbfile" + ) metadata = _create_external_file_names(metadata) # Verify first that the target paths do not exist yet, and fail if they do diff --git a/dandi/organize.py b/dandi/organize.py index c07f27c67..ecb559c0c 100644 --- a/dandi/organize.py +++ b/dandi/organize.py @@ -16,7 +16,7 @@ from . import get_logger from .exceptions import OrganizeImpossibleError from .pynwb_utils import get_neurodata_types_to_modalities_map, get_object_id -from .utils import ensure_datetime, flattened, yaml_load, copy_file, move_file +from .utils import copy_file, ensure_datetime, flattened, move_file, yaml_load lgr = get_logger() @@ -180,9 +180,10 @@ def _create_external_file_names(metadata: dict): external_file = [name1.mp4] rename to: external_file = [dandiset-path-of-nwbfile/ - dandi-renamed-nwbfile_name(folder without extention .nwb)/ + dandi-renamed-nwbfile_name(folder without extension .nwb)/ f'{ImageSeries.object_id}_external_file_0.mp4' - This is stored in a new field in the metadata['external_file_objects'][0]['external_files_renamed'] + This is stored in a new field in the + metadata['external_file_objects'][0]['external_files_renamed'] Parameters ---------- metadata: list @@ -196,14 +197,16 @@ def _create_external_file_names(metadata: dict): for meta in metadata: if "dandi_path" not in meta or "external_file_objects" not in meta: continue - nwb_folder_name = op.basename(meta["dandi_path"]).split('.')[0] - for ext_file_dict in meta['external_file_objects']: + nwb_folder_name = op.basename(meta["dandi_path"]).split(".")[0] + for ext_file_dict in meta["external_file_objects"]: renamed_path_list = [] uuid_str = ext_file_dict.get("id", str(uuid.uuid4())) - for no, ext_file in enumerate(ext_file_dict['external_files']): - renamed = op.join(nwb_folder_name,f'{uuid_str}_external_file_{no}{ext_file.suffix}') + for no, ext_file in enumerate(ext_file_dict["external_files"]): + renamed = op.join( + nwb_folder_name, f"{uuid_str}_external_file_{no}{ext_file.suffix}" + ) renamed_path_list.append(str(renamed)) - ext_file_dict['external_files_renamed'] = renamed_path_list + ext_file_dict["external_files_renamed"] = renamed_path_list return metadata @@ -222,9 +225,13 @@ def organize_external_files(metadata: list, dandiset_path: str, files_mode: str) """ for e in metadata: - for ext_file_dict in e['external_file_objects']: - for no, (name_old, name_new) in enumerate(zip(ext_file_dict['external_files'], - ext_file_dict['external_files_renamed'])): + for ext_file_dict in e["external_file_objects"]: + for no, (name_old, name_new) in enumerate( + zip( + ext_file_dict["external_files"], + ext_file_dict["external_files_renamed"], + ) + ): new_path = op.join(dandiset_path, op.dirname(e["dandi_path"]), name_new) name_old_str = str(name_old) if not op.exists(op.dirname(new_path)): diff --git a/dandi/pynwb_utils.py b/dandi/pynwb_utils.py index e5dfd46e7..7d213707c 100644 --- a/dandi/pynwb_utils.py +++ b/dandi/pynwb_utils.py @@ -1,6 +1,7 @@ from collections import Counter import os import os.path as op +from pathlib import Path import re import warnings @@ -15,13 +16,12 @@ from . import __version__, get_logger from .consts import ( + external_file_extensions, + external_file_modules, metadata_nwb_computed_fields, metadata_nwb_file_fields, metadata_nwb_subject_fields, - external_file_extensions, - external_file_modules ) -from pathlib import Path from .utils import get_module_version lgr = get_logger() @@ -238,8 +238,10 @@ def _get_pynwb_metadata(path): def _get_image_series(nwb: pynwb.NWBFile): """ - This method supports _get_pynwb_metadata() in retrieving all ImageSeries related metadata from an open nwb file. - Specifically it pulls out the ImageSeries uuid, name and all the externally linked files + This method supports _get_pynwb_metadata() in retrieving all ImageSeries + related metadata from an open nwb file. + Specifically it pulls out the ImageSeries uuid, name and all the + externally linked files named under the argument 'external_file'. Parameters ---------- @@ -247,7 +249,8 @@ def _get_image_series(nwb: pynwb.NWBFile): Returns ------- out: List[dict] - list of dicts : [{id: , name: , external_files=[ImageSeries.external_file]}] + list of dicts : [{id: , name: , + external_files=[ImageSeries.external_file]}] if no ImageSeries found in the given modules to check, then it returns an empty list. """ out = [] @@ -258,10 +261,13 @@ def _get_image_series(nwb: pynwb.NWBFile): out_dict = dict(id=ob.object_id, name=ob.name, external_files=[]) for ext_file in ob.external_file: if Path(ext_file).suffix in external_file_extensions: - out_dict['external_files'].append(Path(ext_file)) + out_dict["external_files"].append(Path(ext_file)) else: - lgr.warning("external file %s should be one of: %s}", - ext_file, external_file_extensions) + lgr.warning( + "external file %s should be one of: %s}", + ext_file, + external_file_extensions, + ) out.append(out_dict) return out @@ -269,7 +275,8 @@ def _get_image_series(nwb: pynwb.NWBFile): def rename_nwb_external_files(metadata: list, dandiset_path: str) -> None: """ This method, renames the external_file attribute in an ImageSeries datatype in an open nwb file. - It pulls information about the ImageSEries objects from metadata: metadata["external_file_objects"] + It pulls information about the ImageSEries objects + from metadata: metadata["external_file_objects"] populated during _get_pynwb_metadata() call. Parameters @@ -280,23 +287,35 @@ def rename_nwb_external_files(metadata: list, dandiset_path: str) -> None: base path of dandiset """ for meta in metadata: - if not np.all(i in meta for i in ["path", "dandi_path", "external_file_objects"]): - lgr.warning(f'could not rename external files, update metadata' - f'with "path", "dandi_path", "external_file_objects"') + if not np.all( + i in meta for i in ["path", "dandi_path", "external_file_objects"] + ): + lgr.warning( + "could not rename external files, update metadata" + 'with "path", "dandi_path", "external_file_objects"' + ) return dandiset_nwbfile_path = op.join(dandiset_path, meta["dandi_path"]) - with NWBHDF5IO(dandiset_nwbfile_path, mode='r+', load_namespaces=True) as io: + with NWBHDF5IO(dandiset_nwbfile_path, mode="r+", load_namespaces=True) as io: nwb = io.read() - for ext_file_dict in meta['external_file_objects']: + for ext_file_dict in meta["external_file_objects"]: # retrieve nwb neurodata object of the given object id: - container_list = [child for child in nwb.children if ext_file_dict['id'] == child.object_id] + container_list = [ + child + for child in nwb.children + if ext_file_dict["id"] == child.object_id + ] if len(container_list) == 0: continue else: container = container_list[0] # rename all external files: - for no, (name_old, name_new) in enumerate(zip(ext_file_dict['external_files'], - ext_file_dict['external_files_renamed'])): + for no, (name_old, name_new) in enumerate( + zip( + ext_file_dict["external_files"], + ext_file_dict["external_files_renamed"], + ) + ): container.external_file[no] = str(name_new) diff --git a/dandi/tests/fixtures.py b/dandi/tests/fixtures.py index 001798714..f34cec6c1 100644 --- a/dandi/tests/fixtures.py +++ b/dandi/tests/fixtures.py @@ -9,17 +9,14 @@ from time import sleep from uuid import uuid4 - from dandischema.consts import DANDI_SCHEMA_VERSION -from dateutil.tz import tzutc +from dateutil.tz import tzlocal, tzutc import pynwb -import pynwb.image from pynwb import NWBHDF5IO, NWBFile from pynwb.device import Device from pynwb.file import Subject +import pynwb.image from pynwb.ophys import ImageSeries - -from dateutil.tz import tzlocal import pytest import requests @@ -325,44 +322,58 @@ def upload_dandiset(paths=None, **kwargs): @pytest.fixture(scope="session") def download_video_files(tmp_path_factory): from datalad.api import install - pt = tmp_path_factory.mktemp('ophys_testing_data') - dataset = install(path=pt, source="https://gin.g-node.org/CatalystNeuro/ophys_testing_data") + + pt = tmp_path_factory.mktemp("ophys_testing_data") + dataset = install( + path=pt, source="https://gin.g-node.org/CatalystNeuro/ophys_testing_data" + ) resp = dataset.get("video_files") return Path(resp[0]["path"]) @pytest.fixture() def create_video_nwbfiles(download_video_files, tmp_path): - base_nwb_path = tmp_path/'nwbfiles' + base_nwb_path = tmp_path / "nwbfiles" base_nwb_path.mkdir(parents=True, exist_ok=True) - base_vid_path = tmp_path/'video_files' + base_vid_path = tmp_path / "video_files" base_vid_path.mkdir(parents=True, exist_ok=True) - shutil.copytree(download_video_files,str(base_vid_path)) + shutil.copytree(download_video_files, str(base_vid_path)) video_files_list = [i for i in base_vid_path.iterdir()] - for no in range(0,len(video_files_list),2): + for no in range(0, len(video_files_list), 2): vid_1 = video_files_list[no] - vid_2 = video_files_list[no+1] - subject_id = f'mouse{no}' - session_id = f'sessionid{no}' - subject = Subject(subject_id=subject_id, species="Mus musculus", sex="M", - description='lab mouse ') - device = Device(f'imaging_device_{no}') - name = f'{vid_1.stem}_{no}' - nwbfile = NWBFile(f'{name}{no}', 'desc: contains movie for dandi .mp4 storage as external', - datetime.now(tzlocal()), - experimenter='Experimenter name', - session_id=session_id, - subject=subject, - devices=[device]) - - image_series = ImageSeries(name=f'MouseWhiskers{no}', - format='external', - external_file=[str(vid_1), str(vid_2)], - starting_frame=[0], starting_time=0.0, rate=150.0) + vid_2 = video_files_list[no + 1] + subject_id = f"mouse{no}" + session_id = f"sessionid{no}" + subject = Subject( + subject_id=subject_id, + species="Mus musculus", + sex="M", + description="lab mouse ", + ) + device = Device(f"imaging_device_{no}") + name = f"{vid_1.stem}_{no}" + nwbfile = NWBFile( + f"{name}{no}", + "desc: contains movie for dandi .mp4 storage as external", + datetime.now(tzlocal()), + experimenter="Experimenter name", + session_id=session_id, + subject=subject, + devices=[device], + ) + + image_series = ImageSeries( + name=f"MouseWhiskers{no}", + format="external", + external_file=[str(vid_1), str(vid_2)], + starting_frame=[0], + starting_time=0.0, + rate=150.0, + ) nwbfile.add_acquisition(image_series) - nwbfile_path = base_nwb_path/f'{name}.nwb' - with NWBHDF5IO(str(nwbfile_path), 'w') as io: + nwbfile_path = base_nwb_path / f"{name}.nwb" + with NWBHDF5IO(str(nwbfile_path), "w") as io: io.write(nwbfile) - return base_nwb_path \ No newline at end of file + return base_nwb_path diff --git a/dandi/tests/test_organize.py b/dandi/tests/test_organize.py index 565217ef4..67f0b300a 100644 --- a/dandi/tests/test_organize.py +++ b/dandi/tests/test_organize.py @@ -1,9 +1,9 @@ -import shutil from glob import glob import os import os.path as op from pathlib import Path +from pynwb import NWBHDF5IO import pytest import ruamel.yaml @@ -17,9 +17,8 @@ get_obj_id, populate_dataset_yml, ) -from ..pynwb_utils import copy_nwb_file, get_object_id, _get_image_series +from ..pynwb_utils import _get_image_series, copy_nwb_file, get_object_id from ..utils import find_files, on_windows, yaml_load -from pynwb import NWBHDF5IO def test_sanitize_value(): @@ -252,33 +251,43 @@ def error_link(src, dest): assert detect_link_type(tmp_path) == result -@pytest.mark.parametrize("mode", ["copy","move"]) -@pytest.mark.parametrize("video_mode", ["copy","move",None]) +@pytest.mark.filterwarnings("ignore:the imp module is deprecated:DeprecationWarning") +@pytest.mark.parametrize("mode", ["copy", "move"]) +@pytest.mark.parametrize("video_mode", ["copy", "move", None]) def test_video_organize(video_mode, mode, create_video_nwbfiles, clirunner, tmp_path): - dandi_organize_path = create_video_nwbfiles.parent/'dandi_organized' + dandi_organize_path = create_video_nwbfiles.parent / "dandi_organized" dandi_organize_path.mkdir(parents=True, exist_ok=True) - cmd = ["--files-mode", mode, "--rewrite", "external-file", - "--external-files-mode", video_mode, - "-d", str(dandi_organize_path), str(create_video_nwbfiles)] - video_files_list = [i for i in (create_video_nwbfiles.parent/'video_files').iterdir()] + cmd = [ + "--files-mode", + mode, + "--rewrite", + "external-file", + "--external-files-mode", + video_mode, + "-d", + str(dandi_organize_path), + str(create_video_nwbfiles), + ] + video_files_list = [ + i for i in (create_video_nwbfiles.parent / "video_files").iterdir() + ] video_files_organized = [] r = clirunner.invoke(organize, cmd) assert r.exit_code == 0 for nwbfile_name in dandi_organize_path.glob("**/*.nwb"): vid_folder = nwbfile_name.with_suffix("") assert vid_folder.exists() - with NWBHDF5IO(str(nwbfile_name),'r',load_namespaces=True) as io: + with NWBHDF5IO(str(nwbfile_name), "r", load_namespaces=True) as io: nwbfile = io.read() # get iamgeseries objects as dict(id=object_id, external_files=[]) ext_file_objects = _get_image_series(nwbfile) for ext_file_ob in ext_file_objects: - filename = vid_folder.name+f'/{ext_file_ob["id"]}_external_file_' + filename = vid_folder.name + f'/{ext_file_ob["id"]}_external_file_' for name in ext_file_ob["external_files"]: video_files_organized.append(name) # check if external_file arguments are correctly named according to convention: assert filename in str(name) # check if the files exist( both in case of move/copy): - assert (vid_folder.parent/name).exists() - #check all video files are organized: - assert len(video_files_list)==len(video_files_organized) - + assert (vid_folder.parent / name).exists() + # check all video files are organized: + assert len(video_files_list) == len(video_files_organized) From fe3c33a59ab3af07fbac85545f36087d0e931b79 Mon Sep 17 00:00:00 2001 From: Saksham Sharda Date: Thu, 16 Dec 2021 15:28:52 +0530 Subject: [PATCH 28/67] code review suggestions --- dandi/cli/cmd_organize.py | 2 +- dandi/organize.py | 6 +++--- dandi/pynwb_utils.py | 6 +++--- dandi/tests/fixtures.py | 1 - dandi/tests/test_organize.py | 1 - setup.cfg | 2 +- 6 files changed, 8 insertions(+), 10 deletions(-) diff --git a/dandi/cli/cmd_organize.py b/dandi/cli/cmd_organize.py index 6e540b59c..ffaf1ccc4 100644 --- a/dandi/cli/cmd_organize.py +++ b/dandi/cli/cmd_organize.py @@ -253,7 +253,7 @@ def _get_metadata(path): f"""{[metadata[no]['path'] for no, a in enumerate(external_files_missing_metadata_bool) if not a]}, """ - f"change option: -rewrite 'external_file'" + f"change option: --rewrite 'external_file'" ) if rewrite == "external-file": diff --git a/dandi/organize.py b/dandi/organize.py index ecb559c0c..cab09cf35 100644 --- a/dandi/organize.py +++ b/dandi/organize.py @@ -10,7 +10,7 @@ from pathlib import Path import re import uuid - +from typing import List import numpy as np from . import get_logger @@ -173,7 +173,7 @@ def create_unique_filenames_from_metadata(metadata): return metadata -def _create_external_file_names(metadata: dict): +def _create_external_file_names(metadata: list) -> List[dict]: """ Renames the external_file attribute in an ImageSeries according to the rule: Example, the Initial name of file: @@ -210,7 +210,7 @@ def _create_external_file_names(metadata: dict): return metadata -def organize_external_files(metadata: list, dandiset_path: str, files_mode: str): +def organize_external_files(metadata: list, dandiset_path: str, files_mode: str) -> None: """ Organizes the external_files into the new Dandiset folder structure. diff --git a/dandi/pynwb_utils.py b/dandi/pynwb_utils.py index 7d213707c..9cc4e0662 100644 --- a/dandi/pynwb_utils.py +++ b/dandi/pynwb_utils.py @@ -4,7 +4,7 @@ from pathlib import Path import re import warnings - +from typing import List import dandischema from fscacher import PersistentCache import h5py @@ -236,7 +236,7 @@ def _get_pynwb_metadata(path): return out -def _get_image_series(nwb: pynwb.NWBFile): +def _get_image_series(nwb: pynwb.NWBFile) -> List[dict]: """ This method supports _get_pynwb_metadata() in retrieving all ImageSeries related metadata from an open nwb file. @@ -264,7 +264,7 @@ def _get_image_series(nwb: pynwb.NWBFile): out_dict["external_files"].append(Path(ext_file)) else: lgr.warning( - "external file %s should be one of: %s}", + "external file %s should be one of: %s", ext_file, external_file_extensions, ) diff --git a/dandi/tests/fixtures.py b/dandi/tests/fixtures.py index f34cec6c1..c3657593a 100644 --- a/dandi/tests/fixtures.py +++ b/dandi/tests/fixtures.py @@ -336,7 +336,6 @@ def create_video_nwbfiles(download_video_files, tmp_path): base_nwb_path = tmp_path / "nwbfiles" base_nwb_path.mkdir(parents=True, exist_ok=True) base_vid_path = tmp_path / "video_files" - base_vid_path.mkdir(parents=True, exist_ok=True) shutil.copytree(download_video_files, str(base_vid_path)) video_files_list = [i for i in base_vid_path.iterdir()] diff --git a/dandi/tests/test_organize.py b/dandi/tests/test_organize.py index 67f0b300a..8795d9393 100644 --- a/dandi/tests/test_organize.py +++ b/dandi/tests/test_organize.py @@ -251,7 +251,6 @@ def error_link(src, dest): assert detect_link_type(tmp_path) == result -@pytest.mark.filterwarnings("ignore:the imp module is deprecated:DeprecationWarning") @pytest.mark.parametrize("mode", ["copy", "move"]) @pytest.mark.parametrize("video_mode", ["copy", "move", None]) def test_video_organize(video_mode, mode, create_video_nwbfiles, clirunner, tmp_path): diff --git a/setup.cfg b/setup.cfg index 5c50b7bcb..2185c8f55 100644 --- a/setup.cfg +++ b/setup.cfg @@ -78,12 +78,12 @@ style = test = anys ~= 0.2 coverage + datalad pyfakefs ~= 4.0 pytest pytest-cov pytest-mock responses - datalad tools= boto3 all = From d16b93247672c8f8bc9432088ef9b2ab36465c86 Mon Sep 17 00:00:00 2001 From: Saksham Sharda Date: Thu, 16 Dec 2021 19:22:02 +0530 Subject: [PATCH 29/67] run pre-commit --- dandi/organize.py | 7 +++++-- dandi/pynwb_utils.py | 3 ++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/dandi/organize.py b/dandi/organize.py index cab09cf35..e51f68880 100644 --- a/dandi/organize.py +++ b/dandi/organize.py @@ -9,8 +9,9 @@ import os.path as op from pathlib import Path import re -import uuid from typing import List +import uuid + import numpy as np from . import get_logger @@ -210,7 +211,9 @@ def _create_external_file_names(metadata: list) -> List[dict]: return metadata -def organize_external_files(metadata: list, dandiset_path: str, files_mode: str) -> None: +def organize_external_files( + metadata: list, dandiset_path: str, files_mode: str +) -> None: """ Organizes the external_files into the new Dandiset folder structure. diff --git a/dandi/pynwb_utils.py b/dandi/pynwb_utils.py index 9cc4e0662..a651e7dc1 100644 --- a/dandi/pynwb_utils.py +++ b/dandi/pynwb_utils.py @@ -3,8 +3,9 @@ import os.path as op from pathlib import Path import re -import warnings from typing import List +import warnings + import dandischema from fscacher import PersistentCache import h5py From e7c27b7b6d66ffac3c01b0372ad7bf5afe12d896 Mon Sep 17 00:00:00 2001 From: Saksham Sharda Date: Sat, 15 Jan 2022 13:25:09 +0530 Subject: [PATCH 30/67] fixing test_metadata.test_get_metadata --- dandi/tests/test_metadata.py | 1 + 1 file changed, 1 insertion(+) diff --git a/dandi/tests/test_metadata.py b/dandi/tests/test_metadata.py index 4042132cc..d9565c136 100644 --- a/dandi/tests/test_metadata.py +++ b/dandi/tests/test_metadata.py @@ -33,6 +33,7 @@ def test_get_metadata(simple1_nwb, simple1_nwb_metadata): target_metadata["number_of_units"] = 0 # We also populate with nd_types now, although here they would be empty target_metadata["nd_types"] = [] + target_metadata["external_file_objects"] = [] # we do not populate any subject fields in our simple1_nwb for f in metadata_nwb_subject_fields: target_metadata[f] = None From 2b3c97b711502ada4b2600a4c032df0e44225820 Mon Sep 17 00:00:00 2001 From: Saksham Sharda <11567561+Saksham20@users.noreply.github.com> Date: Sun, 16 Jan 2022 21:55:17 +0530 Subject: [PATCH 31/67] update test.yml --- .github/workflows/test.yml | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 402dbadaf..30fbacff7 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -108,6 +108,22 @@ jobs: -f dandi/tests/data/dandiarchive-docker/docker-compose.yml \ down -v + - name: Install git-annex (ubuntu) + if: startsWith(matrix.os, 'ubuntu') + run: | + apt-get install git-annex + + - name: Install git-annex (macOS) + if: startsWith(matrix.os, 'macos') + run: | + brew install git-annex + + - name: Install git-annex (windows) + if: startsWith(matrix.os, 'windows') + run: | + pip install datalad-installer + datalad-installer git-annex -m datalad/packages + - name: Upload coverage to Codecov uses: codecov/codecov-action@v2 with: From 1c3d55e1da141f16ae1c813ab27f93777349c3eb Mon Sep 17 00:00:00 2001 From: Saksham Sharda Date: Sun, 16 Jan 2022 22:10:28 +0530 Subject: [PATCH 32/67] manual precommit --- dandi/tests/fixtures.py | 1 + dandi/tests/test_organize.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/dandi/tests/fixtures.py b/dandi/tests/fixtures.py index 08074041e..1f786d8d2 100644 --- a/dandi/tests/fixtures.py +++ b/dandi/tests/fixtures.py @@ -370,6 +370,7 @@ def create_video_nwbfiles(download_video_files, tmp_path): io.write(nwbfile) return base_nwb_path + @pytest.fixture() def tmp_home( monkeypatch: pytest.MonkeyPatch, tmp_path_factory: pytest.TempPathFactory diff --git a/dandi/tests/test_organize.py b/dandi/tests/test_organize.py index 3b5f4c86e..8805bafc9 100644 --- a/dandi/tests/test_organize.py +++ b/dandi/tests/test_organize.py @@ -3,8 +3,8 @@ import os.path as op from pathlib import Path -from pynwb import NWBHDF5IO from click.testing import CliRunner +from pynwb import NWBHDF5IO import pytest import ruamel.yaml From 0832f3f79e892d55923614cd123897e7fe26e07a Mon Sep 17 00:00:00 2001 From: Saksham Sharda Date: Tue, 18 Jan 2022 11:35:52 +0530 Subject: [PATCH 33/67] external file extensions const upper case --- dandi/consts.py | 4 ++-- dandi/pynwb_utils.py | 10 +++++----- dandi/validate.py | 4 ++-- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/dandi/consts.py b/dandi/consts.py index aa3c1c8a4..045e11955 100644 --- a/dandi/consts.py +++ b/dandi/consts.py @@ -143,5 +143,5 @@ class EmbargoStatus(Enum): #: of retries) RETRY_STATUSES = (500, 502, 503, 504) -external_file_extensions = [".mp4", ".avi", ".wmv", ".mov", ".flv"] -external_file_modules = ["processing", "acquisition"] +EXTERNAL_FILE_EXTENSIONS = [".mp4", ".avi", ".wmv", ".mov", ".flv"] +EXTERNAL_FILE_MODULES = ["processing", "acquisition"] diff --git a/dandi/pynwb_utils.py b/dandi/pynwb_utils.py index a651e7dc1..051334193 100644 --- a/dandi/pynwb_utils.py +++ b/dandi/pynwb_utils.py @@ -17,8 +17,8 @@ from . import __version__, get_logger from .consts import ( - external_file_extensions, - external_file_modules, + EXTERNAL_FILE_EXTENSIONS, + EXTERNAL_FILE_MODULES, metadata_nwb_computed_fields, metadata_nwb_file_fields, metadata_nwb_subject_fields, @@ -255,19 +255,19 @@ def _get_image_series(nwb: pynwb.NWBFile) -> List[dict]: if no ImageSeries found in the given modules to check, then it returns an empty list. """ out = [] - for module_name in external_file_modules: + for module_name in EXTERNAL_FILE_MODULES: module_cont = getattr(nwb, module_name) for name, ob in module_cont.items(): if isinstance(ob, pynwb.image.ImageSeries) and ob.external_file is not None: out_dict = dict(id=ob.object_id, name=ob.name, external_files=[]) for ext_file in ob.external_file: - if Path(ext_file).suffix in external_file_extensions: + if Path(ext_file).suffix in EXTERNAL_FILE_EXTENSIONS: out_dict["external_files"].append(Path(ext_file)) else: lgr.warning( "external file %s should be one of: %s", ext_file, - external_file_extensions, + EXTERNAL_FILE_EXTENSIONS, ) out.append(out_dict) return out diff --git a/dandi/validate.py b/dandi/validate.py index 056c0259e..06246ab6c 100644 --- a/dandi/validate.py +++ b/dandi/validate.py @@ -2,7 +2,7 @@ import os.path as op from . import get_logger -from .consts import dandiset_metadata_file, external_file_extensions +from .consts import EXTERNAL_FILE_EXTENSIONS, dandiset_metadata_file from .metadata import get_metadata from .pynwb_utils import validate as pynwb_validate from .pynwb_utils import validate_cache @@ -42,7 +42,7 @@ def validate_file(filepath, schema_version=None, devel_debug=False): return validate_dandiset_yaml( filepath, schema_version=None, devel_debug=devel_debug ) - elif os.path.splitext(filepath)[-1] in external_file_extensions: + elif os.path.splitext(filepath)[-1] in EXTERNAL_FILE_EXTENSIONS: return [] else: return pynwb_validate(filepath, devel_debug=devel_debug) + validate_asset_file( From effc81522049c23a4ca838eadd059bc45a84acdd Mon Sep 17 00:00:00 2001 From: Saksham Sharda <11567561+Saksham20@users.noreply.github.com> Date: Wed, 19 Jan 2022 13:16:22 +0530 Subject: [PATCH 34/67] add dated wtf to debug --- .github/workflows/test.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 30fbacff7..662bd7015 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -112,17 +112,20 @@ jobs: if: startsWith(matrix.os, 'ubuntu') run: | apt-get install git-annex + datalad wtf - name: Install git-annex (macOS) if: startsWith(matrix.os, 'macos') run: | brew install git-annex + datalad wtf - name: Install git-annex (windows) if: startsWith(matrix.os, 'windows') run: | pip install datalad-installer datalad-installer git-annex -m datalad/packages + datalad wtf - name: Upload coverage to Codecov uses: codecov/codecov-action@v2 From dff2e86d87b9cc0879b57d48825efacc297c986c Mon Sep 17 00:00:00 2001 From: Saksham Sharda <11567561+Saksham20@users.noreply.github.com> Date: Wed, 19 Jan 2022 16:08:45 +0530 Subject: [PATCH 35/67] Apply suggestions from code review Co-authored-by: Yaroslav Halchenko --- dandi/organize.py | 2 +- dandi/tests/test_organize.py | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/dandi/organize.py b/dandi/organize.py index e51f68880..aec166e01 100644 --- a/dandi/organize.py +++ b/dandi/organize.py @@ -198,7 +198,7 @@ def _create_external_file_names(metadata: list) -> List[dict]: for meta in metadata: if "dandi_path" not in meta or "external_file_objects" not in meta: continue - nwb_folder_name = op.basename(meta["dandi_path"]).split(".")[0] + nwb_folder_name = op.splitext(op.basename(meta["dandi_path"]))[0] for ext_file_dict in meta["external_file_objects"]: renamed_path_list = [] uuid_str = ext_file_dict.get("id", str(uuid.uuid4())) diff --git a/dandi/tests/test_organize.py b/dandi/tests/test_organize.py index 8805bafc9..a4cce5fca 100644 --- a/dandi/tests/test_organize.py +++ b/dandi/tests/test_organize.py @@ -268,9 +268,7 @@ def test_video_organize(video_mode, mode, create_video_nwbfiles, clirunner, tmp_ str(dandi_organize_path), str(create_video_nwbfiles), ] - video_files_list = [ - i for i in (create_video_nwbfiles.parent / "video_files").iterdir() - ] + video_files_list = list((create_video_nwbfiles.parent / "video_files").iterdir()) video_files_organized = [] r = clirunner.invoke(organize, cmd) assert r.exit_code == 0 From 5a49a8a341b9204a46a74a2b951b5b92936a5a5d Mon Sep 17 00:00:00 2001 From: Saksham Sharda Date: Wed, 19 Jan 2022 16:27:20 +0530 Subject: [PATCH 36/67] actively assert pattern in external_files --- dandi/tests/test_organize.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/dandi/tests/test_organize.py b/dandi/tests/test_organize.py index a4cce5fca..af24c0e59 100644 --- a/dandi/tests/test_organize.py +++ b/dandi/tests/test_organize.py @@ -280,11 +280,13 @@ def test_video_organize(video_mode, mode, create_video_nwbfiles, clirunner, tmp_ # get iamgeseries objects as dict(id=object_id, external_files=[]) ext_file_objects = _get_image_series(nwbfile) for ext_file_ob in ext_file_objects: - filename = vid_folder.name + f'/{ext_file_ob["id"]}_external_file_' - for name in ext_file_ob["external_files"]: + for no, name in enumerate(ext_file_ob["external_files"]): video_files_organized.append(name) # check if external_file arguments are correctly named according to convention: - assert filename in str(name) + filename = ( + f"{vid_folder.name}/{ext_file_ob['id']}_external_file_{no}" + ) + assert filename == Path(name).name # check if the files exist( both in case of move/copy): assert (vid_folder.parent / name).exists() # check all video files are organized: From 3b435b3a1c41bbea36b0abadcb0b2fbef5daa61c Mon Sep 17 00:00:00 2001 From: Saksham Sharda <11567561+Saksham20@users.noreply.github.com> Date: Wed, 19 Jan 2022 16:29:15 +0530 Subject: [PATCH 37/67] Update dandi/cli/cmd_organize.py Co-authored-by: Yaroslav Halchenko --- dandi/cli/cmd_organize.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dandi/cli/cmd_organize.py b/dandi/cli/cmd_organize.py index 0e14c3f76..470028227 100644 --- a/dandi/cli/cmd_organize.py +++ b/dandi/cli/cmd_organize.py @@ -243,7 +243,7 @@ def _get_metadata(path): ] if all(external_files_missing_metadata_bool) and rewrite == "external-file": lgr.warning( - "rewrite option specified as 'external_file' but no external_files found" + "rewrite option specified as 'external_file' but no external_files found " "linked to any nwbfile found in %s", paths, ) From 2a8c99276963b414f42888fec2b5bbed0a020b02 Mon Sep 17 00:00:00 2001 From: Saksham Sharda <11567561+Saksham20@users.noreply.github.com> Date: Wed, 19 Jan 2022 20:02:49 +0530 Subject: [PATCH 38/67] add git-annex before run all tests --- .github/workflows/test.yml | 35 ++++++++++++++++------------------- 1 file changed, 16 insertions(+), 19 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 662bd7015..d3bd03366 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -79,6 +79,22 @@ jobs: run: | pip install git+https://github.com/NeurodataWithoutBorders/pynwb + - name: Install git-annex (ubuntu) + if: startsWith(matrix.os, 'ubuntu') + run: | + sudo apt-get install git-annex + + - name: Install git-annex (macOS) + if: startsWith(matrix.os, 'macos') + run: | + brew install git-annex + + - name: Install git-annex (windows) + if: startsWith(matrix.os, 'windows') + run: | + pip install datalad-installer + datalad-installer git-annex -m datalad/packages + - name: Run all tests if: matrix.mode != 'dandi-api' && github.event_name != 'schedule' run: | @@ -108,25 +124,6 @@ jobs: -f dandi/tests/data/dandiarchive-docker/docker-compose.yml \ down -v - - name: Install git-annex (ubuntu) - if: startsWith(matrix.os, 'ubuntu') - run: | - apt-get install git-annex - datalad wtf - - - name: Install git-annex (macOS) - if: startsWith(matrix.os, 'macos') - run: | - brew install git-annex - datalad wtf - - - name: Install git-annex (windows) - if: startsWith(matrix.os, 'windows') - run: | - pip install datalad-installer - datalad-installer git-annex -m datalad/packages - datalad wtf - - name: Upload coverage to Codecov uses: codecov/codecov-action@v2 with: From 5d60bfab01f9f63614cb68ed577265495b48fb20 Mon Sep 17 00:00:00 2001 From: Saksham Sharda Date: Wed, 19 Jan 2022 20:04:35 +0530 Subject: [PATCH 39/67] fix clirunner error --- dandi/tests/test_organize.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dandi/tests/test_organize.py b/dandi/tests/test_organize.py index af24c0e59..bcc6ee5ea 100644 --- a/dandi/tests/test_organize.py +++ b/dandi/tests/test_organize.py @@ -254,7 +254,7 @@ def error_link(src, dest): @pytest.mark.parametrize("mode", ["copy", "move"]) @pytest.mark.parametrize("video_mode", ["copy", "move", None]) -def test_video_organize(video_mode, mode, create_video_nwbfiles, clirunner, tmp_path): +def test_video_organize(video_mode, mode, create_video_nwbfiles): dandi_organize_path = create_video_nwbfiles.parent / "dandi_organized" dandi_organize_path.mkdir(parents=True, exist_ok=True) cmd = [ @@ -270,7 +270,7 @@ def test_video_organize(video_mode, mode, create_video_nwbfiles, clirunner, tmp_ ] video_files_list = list((create_video_nwbfiles.parent / "video_files").iterdir()) video_files_organized = [] - r = clirunner.invoke(organize, cmd) + r = CliRunner().invoke(organize, cmd) assert r.exit_code == 0 for nwbfile_name in dandi_organize_path.glob("**/*.nwb"): vid_folder = nwbfile_name.with_suffix("") From 221b48b972ee9feb2c730ebbe2f2538c04926674 Mon Sep 17 00:00:00 2001 From: Saksham Sharda Date: Wed, 19 Jan 2022 20:05:01 +0530 Subject: [PATCH 40/67] external file name assertion fix --- dandi/tests/test_organize.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dandi/tests/test_organize.py b/dandi/tests/test_organize.py index bcc6ee5ea..a38a5778a 100644 --- a/dandi/tests/test_organize.py +++ b/dandi/tests/test_organize.py @@ -286,7 +286,7 @@ def test_video_organize(video_mode, mode, create_video_nwbfiles): filename = ( f"{vid_folder.name}/{ext_file_ob['id']}_external_file_{no}" ) - assert filename == Path(name).name + assert filename == str(Path(name).with_suffix("")) # check if the files exist( both in case of move/copy): assert (vid_folder.parent / name).exists() # check all video files are organized: From 4cc47d65782584c928722f271ec1c505b6256920 Mon Sep 17 00:00:00 2001 From: Saksham Sharda Date: Fri, 21 Jan 2022 17:52:38 +0530 Subject: [PATCH 41/67] remove pyfakefs --- setup.cfg | 1 - 1 file changed, 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index 2185c8f55..3ed2c5990 100644 --- a/setup.cfg +++ b/setup.cfg @@ -79,7 +79,6 @@ test = anys ~= 0.2 coverage datalad - pyfakefs ~= 4.0 pytest pytest-cov pytest-mock From 90978c7067e1c9c0d4f22789c117e5c1f11c8d64 Mon Sep 17 00:00:00 2001 From: Saksham Sharda Date: Fri, 21 Jan 2022 18:00:27 +0530 Subject: [PATCH 42/67] pip installation of datalad-installer in ubuntu --- .github/workflows/test.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index d3bd03366..6c4b6f9ae 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -82,7 +82,8 @@ jobs: - name: Install git-annex (ubuntu) if: startsWith(matrix.os, 'ubuntu') run: | - sudo apt-get install git-annex + pip install datalad-installer + datalad-installer --sudo ok neurodebian git-annex -m neurodebian - name: Install git-annex (macOS) if: startsWith(matrix.os, 'macos') @@ -94,7 +95,7 @@ jobs: run: | pip install datalad-installer datalad-installer git-annex -m datalad/packages - + - name: Run all tests if: matrix.mode != 'dandi-api' && github.event_name != 'schedule' run: | From 37e951e6f02fd159ebbedee2c5d759603fce0a7e Mon Sep 17 00:00:00 2001 From: Saksham Sharda Date: Fri, 21 Jan 2022 18:01:12 +0530 Subject: [PATCH 43/67] external files mode supplied only when video_mode is not None --- dandi/tests/test_organize.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dandi/tests/test_organize.py b/dandi/tests/test_organize.py index a38a5778a..68dda811a 100644 --- a/dandi/tests/test_organize.py +++ b/dandi/tests/test_organize.py @@ -262,12 +262,12 @@ def test_video_organize(video_mode, mode, create_video_nwbfiles): mode, "--rewrite", "external-file", - "--external-files-mode", - video_mode, "-d", str(dandi_organize_path), str(create_video_nwbfiles), ] + if video_mode is not None: + cmd.extend(["--external-files-mode", video_mode]) video_files_list = list((create_video_nwbfiles.parent / "video_files").iterdir()) video_files_organized = [] r = CliRunner().invoke(organize, cmd) From d37a9758a8600c2bcd3f0370850a99133c0a5c32 Mon Sep 17 00:00:00 2001 From: Saksham Sharda Date: Fri, 21 Jan 2022 18:01:43 +0530 Subject: [PATCH 44/67] improve doc for external-files-mode cmd option --- dandi/cli/cmd_organize.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dandi/cli/cmd_organize.py b/dandi/cli/cmd_organize.py index 470028227..2ed70772f 100644 --- a/dandi/cli/cmd_organize.py +++ b/dandi/cli/cmd_organize.py @@ -48,7 +48,7 @@ "--external-files-mode", type=click.Choice(file_operation_modes), default=None, - help="check help for --files-mode option", + help="Like --files-mode option, supply one of 'copy' or 'move'", ) @click.argument("paths", nargs=-1, type=click.Path(exists=True)) @devel_debug_option() From 5a3a64407079ffc4215d73a8710c0b499412d1a3 Mon Sep 17 00:00:00 2001 From: Saksham Sharda Date: Mon, 24 Jan 2022 15:58:15 +0530 Subject: [PATCH 45/67] rename extentions supported to VIDEO_FILE_EXTENSIONS --- dandi/consts.py | 4 ++-- dandi/pynwb_utils.py | 10 +++++----- dandi/validate.py | 4 ++-- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/dandi/consts.py b/dandi/consts.py index 045e11955..8fd38a5f7 100644 --- a/dandi/consts.py +++ b/dandi/consts.py @@ -143,5 +143,5 @@ class EmbargoStatus(Enum): #: of retries) RETRY_STATUSES = (500, 502, 503, 504) -EXTERNAL_FILE_EXTENSIONS = [".mp4", ".avi", ".wmv", ".mov", ".flv"] -EXTERNAL_FILE_MODULES = ["processing", "acquisition"] +VIDEO_FILE_EXTENSIONS = [".mp4", ".avi", ".wmv", ".mov", ".flv"] +VIDEO_FILE_MODULES = ["processing", "acquisition"] diff --git a/dandi/pynwb_utils.py b/dandi/pynwb_utils.py index 051334193..9bed7ec6a 100644 --- a/dandi/pynwb_utils.py +++ b/dandi/pynwb_utils.py @@ -17,8 +17,8 @@ from . import __version__, get_logger from .consts import ( - EXTERNAL_FILE_EXTENSIONS, - EXTERNAL_FILE_MODULES, + VIDEO_FILE_EXTENSIONS, + VIDEO_FILE_MODULES, metadata_nwb_computed_fields, metadata_nwb_file_fields, metadata_nwb_subject_fields, @@ -255,19 +255,19 @@ def _get_image_series(nwb: pynwb.NWBFile) -> List[dict]: if no ImageSeries found in the given modules to check, then it returns an empty list. """ out = [] - for module_name in EXTERNAL_FILE_MODULES: + for module_name in VIDEO_FILE_MODULES: module_cont = getattr(nwb, module_name) for name, ob in module_cont.items(): if isinstance(ob, pynwb.image.ImageSeries) and ob.external_file is not None: out_dict = dict(id=ob.object_id, name=ob.name, external_files=[]) for ext_file in ob.external_file: - if Path(ext_file).suffix in EXTERNAL_FILE_EXTENSIONS: + if Path(ext_file).suffix in VIDEO_FILE_EXTENSIONS: out_dict["external_files"].append(Path(ext_file)) else: lgr.warning( "external file %s should be one of: %s", ext_file, - EXTERNAL_FILE_EXTENSIONS, + VIDEO_FILE_EXTENSIONS, ) out.append(out_dict) return out diff --git a/dandi/validate.py b/dandi/validate.py index 06246ab6c..03516b1aa 100644 --- a/dandi/validate.py +++ b/dandi/validate.py @@ -2,7 +2,7 @@ import os.path as op from . import get_logger -from .consts import EXTERNAL_FILE_EXTENSIONS, dandiset_metadata_file +from .consts import VIDEO_FILE_EXTENSIONS, dandiset_metadata_file from .metadata import get_metadata from .pynwb_utils import validate as pynwb_validate from .pynwb_utils import validate_cache @@ -42,7 +42,7 @@ def validate_file(filepath, schema_version=None, devel_debug=False): return validate_dandiset_yaml( filepath, schema_version=None, devel_debug=devel_debug ) - elif os.path.splitext(filepath)[-1] in EXTERNAL_FILE_EXTENSIONS: + elif os.path.splitext(filepath)[-1] in VIDEO_FILE_EXTENSIONS: return [] else: return pynwb_validate(filepath, devel_debug=devel_debug) + validate_asset_file( From c4a2bb47e32ba302be98f83571bfa2be87034a5f Mon Sep 17 00:00:00 2001 From: Saksham Sharda Date: Mon, 24 Jan 2022 15:59:17 +0530 Subject: [PATCH 46/67] separate method for video files validation --- dandi/validate.py | 37 ++++++++++++++++++++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) diff --git a/dandi/validate.py b/dandi/validate.py index 03516b1aa..5229a1d35 100644 --- a/dandi/validate.py +++ b/dandi/validate.py @@ -43,13 +43,48 @@ def validate_file(filepath, schema_version=None, devel_debug=False): filepath, schema_version=None, devel_debug=devel_debug ) elif os.path.splitext(filepath)[-1] in VIDEO_FILE_EXTENSIONS: - return [] + return validate_movie_file(filepath, devel_debug=devel_debug) else: return pynwb_validate(filepath, devel_debug=devel_debug) + validate_asset_file( filepath, schema_version=schema_version, devel_debug=devel_debug ) +def validate_movie_file(filepath, devel_debug=False): + try: + import cv2 + except ImportError: + lgr.error("could not validate video file as opencv is not installed") + raise Exception( + "do 'pip install opencv-python' to validate assisting video files" + ) + + if os.path.splitext(filepath)[-1] not in VIDEO_FILE_EXTENSIONS: + msg = f"file ext must be one of supported types: {filepath}" + if devel_debug: + raise Exception(msg) + lgr.warning(msg) + + try: + cap = cv2.VideoCapture(filepath) + if not cap.isOpened(): + msg = f"could not open video file {filepath} to validate" + if devel_debug: + raise Exception(msg) + lgr.warning(msg) + return [msg] + except Exception as e: + if devel_debug: + raise + lgr.warning("validation error %s for %s", e, filepath) + return [str(e)] + success, frame = cap.read() + if success: + return [] + else: + return [f"no frames in video file {filepath}"] + + @validate_cache.memoize_path def validate_dandiset_yaml(filepath, schema_version=None, devel_debug=False): """Validate dandiset.yaml""" From d6f65220ef45180b18445bd5c4719bb01bc6c7ca Mon Sep 17 00:00:00 2001 From: Saksham Sharda Date: Mon, 24 Jan 2022 16:01:06 +0530 Subject: [PATCH 47/67] use one option to rewrite external files --- dandi/cli/cmd_organize.py | 65 ++++++++++++++++++--------------------- 1 file changed, 30 insertions(+), 35 deletions(-) diff --git a/dandi/cli/cmd_organize.py b/dandi/cli/cmd_organize.py index 2ed70772f..ddd4b6088 100644 --- a/dandi/cli/cmd_organize.py +++ b/dandi/cli/cmd_organize.py @@ -37,18 +37,16 @@ show_default=True, ) @click.option( - "--rewrite", - type=click.Choice(["external-file"]), + "--rewrite-external-files", + type=click.Choice(["copy", "move"]), default=None, - help="Use to re-write attributes in an nwbfile. For example, if the value " - "is set to 'external-paths' then the external_path attribute of nwbfiles' " - "ImageSeries will be written with renamed video paths according to set rules.", -) -@click.option( - "--external-files-mode", - type=click.Choice(file_operation_modes), - default=None, - help="Like --files-mode option, supply one of 'copy' or 'move'", + help="using this options will rewrite the 'external_file' argument of the " + "ImageSeries in the nwb file. The value written will correspond to the" + " new location of the video files after being organized. This option will " + "work only if --files-mode is copy/move since the original nwb file is " + "modified Supply one of 'copy' or 'move' as the value. Copy will copy the " + "video file in the organized dandiset " + "(Copy option is not recommended for large video files)", ) @click.argument("paths", nargs=-1, type=click.Path(exists=True)) @devel_debug_option() @@ -59,8 +57,7 @@ def organize( invalid="fail", files_mode="auto", devel_debug=False, - rewrite=None, - external_files_mode=None, + rewrite_external_files=None, ): """(Re)organize files according to the metadata. @@ -122,7 +119,7 @@ def act(func, *args, **kwargs): lgr.debug("%s %s %s", func.__name__, args, kwargs) return func(*args, **kwargs) - if rewrite is not None and files_mode not in ["copy", "move"]: + if rewrite_external_files is not None and files_mode not in ["copy", "move"]: raise ValueError( "files_mode needs to be one of 'copy/move' for the rewrite option to work" ) @@ -241,35 +238,33 @@ def _get_metadata(path): external_files_missing_metadata_bool = [ len(m["external_file_objects"]) == 0 for m in metadata ] - if all(external_files_missing_metadata_bool) and rewrite == "external-file": + + if all(external_files_missing_metadata_bool) and rewrite_external_files is not None: lgr.warning( - "rewrite option specified as 'external_file' but no external_files found " + "rewrite_external_files specified but no external_files found " "linked to any nwbfile found in %s", paths, ) - - elif not all(external_files_missing_metadata_bool) and rewrite != "external-file": + elif ( + not all(external_files_missing_metadata_bool) and rewrite_external_files is None + ): raise ValueError( - "rewrite option not specified but found external video files linked to " + "rewrite_external_files option not specified but found external video files linked to " f"the nwbfiles " f"""{[metadata[no]['path'] for no, a in enumerate(external_files_missing_metadata_bool) - if not a]}, """ - f"change option: --rewrite 'external_file'" + if not a]}""" ) - if rewrite == "external-file": - if external_files_mode is None: - external_files_mode = "move" - lgr.warning( - "external_files_mode not specified, setting to recommended mode: 'move' " - ) - elif external_files_mode not in ["move", "copy"]: - raise ValueError( - "external_files mode should be either of 'move/copy' to " - "overwrite the external_file in the nwbfile" - ) - metadata = _create_external_file_names(metadata) + if rewrite_external_files is not None and rewrite_external_files not in [ + "move", + "copy", + ]: + raise ValueError( + "external_files mode should be either of 'move/copy' to " + "overwrite the external_file attribute of ImageSeries in the nwb file" + ) + metadata = _create_external_file_names(metadata) # Verify first that the target paths do not exist yet, and fail if they do # Note: in "simulate" mode we do early check as well, so this would be @@ -375,9 +370,9 @@ def _get_metadata(path): lgr.debug("Failed to remove directory %s: %s", d, exc) # create video file name and re write nwb file external files: - if rewrite == "external-file": + if rewrite_external_files is not None: rename_nwb_external_files(metadata, dandiset_path) - organize_external_files(metadata, dandiset_path, external_files_mode) + organize_external_files(metadata, dandiset_path, rewrite_external_files) def msg_(msg, n, cond=None): if hasattr(n, "__len__"): From 602098b4aceae47a1e0e5a243718a22d085e3c86 Mon Sep 17 00:00:00 2001 From: Saksham Sharda Date: Mon, 24 Jan 2022 16:03:36 +0530 Subject: [PATCH 48/67] update tests with new rewroting options --- dandi/tests/test_organize.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/dandi/tests/test_organize.py b/dandi/tests/test_organize.py index 68dda811a..e0f9914ab 100644 --- a/dandi/tests/test_organize.py +++ b/dandi/tests/test_organize.py @@ -260,14 +260,12 @@ def test_video_organize(video_mode, mode, create_video_nwbfiles): cmd = [ "--files-mode", mode, - "--rewrite", - "external-file", "-d", str(dandi_organize_path), str(create_video_nwbfiles), ] if video_mode is not None: - cmd.extend(["--external-files-mode", video_mode]) + cmd.extend(["--rewrite-external-files", video_mode]) video_files_list = list((create_video_nwbfiles.parent / "video_files").iterdir()) video_files_organized = [] r = CliRunner().invoke(organize, cmd) From 5a2e238f436889c6c37d22dd9a2aaf3877a78f3e Mon Sep 17 00:00:00 2001 From: Saksham Sharda Date: Mon, 24 Jan 2022 18:48:54 +0530 Subject: [PATCH 49/67] create video files using opencv --- .github/workflows/test.yml | 17 ----------- dandi/tests/fixtures.py | 59 +++++++++++++++++++++++++----------- dandi/tests/test_organize.py | 2 +- setup.cfg | 2 +- 4 files changed, 43 insertions(+), 37 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 6c4b6f9ae..402dbadaf 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -79,23 +79,6 @@ jobs: run: | pip install git+https://github.com/NeurodataWithoutBorders/pynwb - - name: Install git-annex (ubuntu) - if: startsWith(matrix.os, 'ubuntu') - run: | - pip install datalad-installer - datalad-installer --sudo ok neurodebian git-annex -m neurodebian - - - name: Install git-annex (macOS) - if: startsWith(matrix.os, 'macos') - run: | - brew install git-annex - - - name: Install git-annex (windows) - if: startsWith(matrix.os, 'windows') - run: | - pip install datalad-installer - datalad-installer git-annex -m datalad/packages - - name: Run all tests if: matrix.mode != 'dandi-api' && github.event_name != 'schedule' run: | diff --git a/dandi/tests/fixtures.py b/dandi/tests/fixtures.py index 1f786d8d2..5b2aadf35 100644 --- a/dandi/tests/fixtures.py +++ b/dandi/tests/fixtures.py @@ -12,6 +12,7 @@ from click.testing import CliRunner from dandischema.consts import DANDI_SCHEMA_VERSION from dateutil.tz import tzlocal, tzutc +import numpy as np import pynwb from pynwb import NWBHDF5IO, NWBFile from pynwb.device import Device @@ -312,29 +313,51 @@ def upload_dandiset(paths=None, **kwargs): } -@pytest.fixture(scope="session") -def download_video_files(tmp_path_factory): - from datalad.api import install - - pt = tmp_path_factory.mktemp("ophys_testing_data") - dataset = install( - path=pt, source="https://gin.g-node.org/CatalystNeuro/ophys_testing_data" - ) - resp = dataset.get("video_files") - return Path(resp[0]["path"]) +@pytest.fixture() +def create_video_files(tmp_path): + video_paths = [] + import cv2 + + video_path = tmp_path / "video_files" + video_path.mkdir() + for no, ext in enumerate([".avi", ".avi"]): + movie_file1 = video_path / f"test1_{no}{ext}" + movie_file2 = video_path / f"test2_{no}{ext}" + (nf, nx, ny) = (5, 10, 20) + writer1 = cv2.VideoWriter( + filename=str(movie_file1), + apiPreference=None, + fourcc=cv2.VideoWriter_fourcc(*"DIVX"), + fps=25, + frameSize=(ny, nx), + params=None, + ) + writer2 = cv2.VideoWriter( + filename=str(movie_file2), + apiPreference=None, + fourcc=cv2.VideoWriter_fourcc(*"DIVX"), + fps=25, + frameSize=(ny, nx), + params=None, + ) + for k in range(nf): + writer1.write(np.random.randint(0, 255, (nx, ny, 3)).astype("uint8")) + writer2.write(np.random.randint(0, 255, (nx, ny, 3)).astype("uint8")) + writer1.release() + writer2.release() + video_paths.append((movie_file1, movie_file1)) + return video_paths @pytest.fixture() -def create_video_nwbfiles(download_video_files, tmp_path): - base_nwb_path = tmp_path / "nwbfiles" +def create_video_nwbfiles(create_video_files): + parent_folder = create_video_files[0][0].parent.parent + base_nwb_path = parent_folder / "nwbfiles" base_nwb_path.mkdir(parents=True, exist_ok=True) - base_vid_path = tmp_path / "video_files" - shutil.copytree(download_video_files, str(base_vid_path)) - video_files_list = [i for i in base_vid_path.iterdir()] - for no in range(0, len(video_files_list), 2): - vid_1 = video_files_list[no] - vid_2 = video_files_list[no + 1] + for no, vid_loc in enumerate(create_video_files): + vid_1 = vid_loc[0] + vid_2 = vid_loc[1] subject_id = f"mouse{no}" session_id = f"sessionid{no}" subject = Subject( diff --git a/dandi/tests/test_organize.py b/dandi/tests/test_organize.py index e0f9914ab..b7ef66624 100644 --- a/dandi/tests/test_organize.py +++ b/dandi/tests/test_organize.py @@ -265,7 +265,7 @@ def test_video_organize(video_mode, mode, create_video_nwbfiles): str(create_video_nwbfiles), ] if video_mode is not None: - cmd.extend(["--rewrite-external-files", video_mode]) + cmd = ["--rewrite-external-files", video_mode] + cmd video_files_list = list((create_video_nwbfiles.parent / "video_files").iterdir()) video_files_organized = [] r = CliRunner().invoke(organize, cmd) diff --git a/setup.cfg b/setup.cfg index 3ed2c5990..416d96eb3 100644 --- a/setup.cfg +++ b/setup.cfg @@ -78,7 +78,7 @@ style = test = anys ~= 0.2 coverage - datalad + opencv-python pytest pytest-cov pytest-mock From 52e26cfeff86298291cad2540f9fa0cd171572b9 Mon Sep 17 00:00:00 2001 From: Saksham Sharda Date: Tue, 25 Jan 2022 14:12:12 +0530 Subject: [PATCH 50/67] tests bug fix --- dandi/tests/fixtures.py | 2 +- dandi/tests/test_organize.py | 7 +++---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/dandi/tests/fixtures.py b/dandi/tests/fixtures.py index 5b2aadf35..9b83f8396 100644 --- a/dandi/tests/fixtures.py +++ b/dandi/tests/fixtures.py @@ -345,7 +345,7 @@ def create_video_files(tmp_path): writer2.write(np.random.randint(0, 255, (nx, ny, 3)).astype("uint8")) writer1.release() writer2.release() - video_paths.append((movie_file1, movie_file1)) + video_paths.append((movie_file1, movie_file2)) return video_paths diff --git a/dandi/tests/test_organize.py b/dandi/tests/test_organize.py index b7ef66624..d303832cd 100644 --- a/dandi/tests/test_organize.py +++ b/dandi/tests/test_organize.py @@ -253,19 +253,18 @@ def error_link(src, dest): @pytest.mark.parametrize("mode", ["copy", "move"]) -@pytest.mark.parametrize("video_mode", ["copy", "move", None]) +@pytest.mark.parametrize("video_mode", ["copy", "move"]) def test_video_organize(video_mode, mode, create_video_nwbfiles): dandi_organize_path = create_video_nwbfiles.parent / "dandi_organized" - dandi_organize_path.mkdir(parents=True, exist_ok=True) cmd = [ "--files-mode", mode, + "--rewrite-external-files", + video_mode, "-d", str(dandi_organize_path), str(create_video_nwbfiles), ] - if video_mode is not None: - cmd = ["--rewrite-external-files", video_mode] + cmd video_files_list = list((create_video_nwbfiles.parent / "video_files").iterdir()) video_files_organized = [] r = CliRunner().invoke(organize, cmd) From fdde52ed9747e2e66d030f6deb955106ed7993b7 Mon Sep 17 00:00:00 2001 From: Saksham Sharda Date: Tue, 25 Jan 2022 14:21:45 +0530 Subject: [PATCH 51/67] platform independent file loc --- dandi/tests/test_organize.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dandi/tests/test_organize.py b/dandi/tests/test_organize.py index d303832cd..4aa8b2b7c 100644 --- a/dandi/tests/test_organize.py +++ b/dandi/tests/test_organize.py @@ -280,10 +280,10 @@ def test_video_organize(video_mode, mode, create_video_nwbfiles): for no, name in enumerate(ext_file_ob["external_files"]): video_files_organized.append(name) # check if external_file arguments are correctly named according to convention: - filename = ( + filename = Path( f"{vid_folder.name}/{ext_file_ob['id']}_external_file_{no}" ) - assert filename == str(Path(name).with_suffix("")) + assert str(filename) == str(Path(name).with_suffix("")) # check if the files exist( both in case of move/copy): assert (vid_folder.parent / name).exists() # check all video files are organized: From 0ed3fa363e122ccd06bd292c95514bfe6950278c Mon Sep 17 00:00:00 2001 From: Saksham Sharda Date: Tue, 25 Jan 2022 17:51:09 +0530 Subject: [PATCH 52/67] validation tests --- dandi/tests/test_validate.py | 25 +++++++++++++++++++++++++ dandi/validate.py | 2 +- 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/dandi/tests/test_validate.py b/dandi/tests/test_validate.py index 66afdeb07..412d1bab6 100644 --- a/dandi/tests/test_validate.py +++ b/dandi/tests/test_validate.py @@ -1,4 +1,7 @@ +import cv2 from dandischema.models import get_schema_version +import numpy as np +import pytest from ..validate import validate_file @@ -31,3 +34,25 @@ def test_validate_bogus(tmp_path): # ATM we would get 2 errors -- since could not be open in two places, # but that would be too rigid to test. Let's just see that we have expected errors assert any(e.startswith("Failed to read metadata") for e in errors) + + +@pytest.mark.parametrize("no_frames", [10, 0]) +def test_validate_movie(tmp_path, no_frames): + frame_size = (10, 10) + movie_loc = tmp_path / "movie.avi" + writer1 = cv2.VideoWriter( + filename=str(movie_loc), + apiPreference=None, + fourcc=cv2.VideoWriter_fourcc(*"DIVX"), + fps=25, + frameSize=frame_size, + params=None, + ) + for i in range(no_frames): + writer1.write(np.random.randint(0, 255, (*frame_size, 3)).astype("uint8")) + writer1.release() + errors = validate_file(movie_loc) + if no_frames == 0: + assert errors[0] == f"no frames in video file {movie_loc}" + else: + assert not errors diff --git a/dandi/validate.py b/dandi/validate.py index 5229a1d35..6055c62a9 100644 --- a/dandi/validate.py +++ b/dandi/validate.py @@ -66,7 +66,7 @@ def validate_movie_file(filepath, devel_debug=False): lgr.warning(msg) try: - cap = cv2.VideoCapture(filepath) + cap = cv2.VideoCapture(str(filepath)) if not cap.isOpened(): msg = f"could not open video file {filepath} to validate" if devel_debug: From 5b13d32870f9cb400cac6ee0ec3a653009aa1145 Mon Sep 17 00:00:00 2001 From: Saksham Sharda Date: Thu, 27 Jan 2022 20:11:35 +0530 Subject: [PATCH 53/67] implementing intuitive naming of options, using symlinks/hardlinks for video files --- dandi/cli/cmd_organize.py | 51 +++++++++++++++++++----------------- dandi/tests/test_organize.py | 5 ++-- 2 files changed, 30 insertions(+), 26 deletions(-) diff --git a/dandi/cli/cmd_organize.py b/dandi/cli/cmd_organize.py index ddd4b6088..a329d8431 100644 --- a/dandi/cli/cmd_organize.py +++ b/dandi/cli/cmd_organize.py @@ -37,16 +37,20 @@ show_default=True, ) @click.option( - "--rewrite-external-files", - type=click.Choice(["copy", "move"]), - default=None, + "--modify-external-file-fields", + is_flag=True, + default=False, help="using this options will rewrite the 'external_file' argument of the " "ImageSeries in the nwb file. The value written will correspond to the" " new location of the video files after being organized. This option will " "work only if --files-mode is copy/move since the original nwb file is " - "modified Supply one of 'copy' or 'move' as the value. Copy will copy the " - "video file in the organized dandiset " - "(Copy option is not recommended for large video files)", + "modified", +) +@click.option( + "--media-files-mode", + type=click.Choice(["copy", "move", "symlink", "hardlink"]), + default=None, + help="Supply one of copy/move/symlink/hardlink as the value", ) @click.argument("paths", nargs=-1, type=click.Path(exists=True)) @devel_debug_option() @@ -57,7 +61,8 @@ def organize( invalid="fail", files_mode="auto", devel_debug=False, - rewrite_external_files=None, + modify_external_file_fields=False, + media_files_mode=None, ): """(Re)organize files according to the metadata. @@ -119,7 +124,7 @@ def act(func, *args, **kwargs): lgr.debug("%s %s %s", func.__name__, args, kwargs) return func(*args, **kwargs) - if rewrite_external_files is not None and files_mode not in ["copy", "move"]: + if modify_external_file_fields and files_mode not in ["copy", "move"]: raise ValueError( "files_mode needs to be one of 'copy/move' for the rewrite option to work" ) @@ -235,35 +240,33 @@ def _get_metadata(path): metadata = create_unique_filenames_from_metadata(metadata) # update metadata with external_file information: - external_files_missing_metadata_bool = [ + external_files_missing_in_nwbfiles = [ len(m["external_file_objects"]) == 0 for m in metadata ] - if all(external_files_missing_metadata_bool) and rewrite_external_files is not None: + if all(external_files_missing_in_nwbfiles) and modify_external_file_fields: lgr.warning( - "rewrite_external_files specified but no external_files found " + "modify_external_file_fields specified but no external_files found " "linked to any nwbfile found in %s", paths, ) elif ( - not all(external_files_missing_metadata_bool) and rewrite_external_files is None + not all(external_files_missing_in_nwbfiles) and not modify_external_file_fields ): raise ValueError( - "rewrite_external_files option not specified but found external video files linked to " - f"the nwbfiles " + "modify_external_file_fields option not specified but found " + "external video files linked to the nwbfiles " f"""{[metadata[no]['path'] - for no, a in enumerate(external_files_missing_metadata_bool) + for no, a in enumerate(external_files_missing_in_nwbfiles) if not a]}""" ) - if rewrite_external_files is not None and rewrite_external_files not in [ - "move", - "copy", - ]: - raise ValueError( - "external_files mode should be either of 'move/copy' to " - "overwrite the external_file attribute of ImageSeries in the nwb file" + if modify_external_file_fields and media_files_mode is None: + media_files_mode = "symlink" + lgr.warning( + "media_files_mode not specified, setting to recommended mode: 'symlink' " ) + metadata = _create_external_file_names(metadata) # Verify first that the target paths do not exist yet, and fail if they do @@ -370,9 +373,9 @@ def _get_metadata(path): lgr.debug("Failed to remove directory %s: %s", d, exc) # create video file name and re write nwb file external files: - if rewrite_external_files is not None: + if modify_external_file_fields: rename_nwb_external_files(metadata, dandiset_path) - organize_external_files(metadata, dandiset_path, rewrite_external_files) + organize_external_files(metadata, dandiset_path, media_files_mode) def msg_(msg, n, cond=None): if hasattr(n, "__len__"): diff --git a/dandi/tests/test_organize.py b/dandi/tests/test_organize.py index 4aa8b2b7c..db77d68a1 100644 --- a/dandi/tests/test_organize.py +++ b/dandi/tests/test_organize.py @@ -253,13 +253,14 @@ def error_link(src, dest): @pytest.mark.parametrize("mode", ["copy", "move"]) -@pytest.mark.parametrize("video_mode", ["copy", "move"]) +@pytest.mark.parametrize("video_mode", ["copy", "move", "symlink", "hardlink"]) def test_video_organize(video_mode, mode, create_video_nwbfiles): dandi_organize_path = create_video_nwbfiles.parent / "dandi_organized" cmd = [ "--files-mode", mode, - "--rewrite-external-files", + "--modify-external-file-fields", + "--media-files-mode", video_mode, "-d", str(dandi_organize_path), From 4bb0f96449c7ccd81412629938b56a0ca8dbb73c Mon Sep 17 00:00:00 2001 From: Saksham Sharda Date: Mon, 31 Jan 2022 22:16:24 +0530 Subject: [PATCH 54/67] remove validation test --- dandi/tests/test_validate.py | 27 --------------------------- 1 file changed, 27 deletions(-) delete mode 100644 dandi/tests/test_validate.py diff --git a/dandi/tests/test_validate.py b/dandi/tests/test_validate.py deleted file mode 100644 index ddd33c497..000000000 --- a/dandi/tests/test_validate.py +++ /dev/null @@ -1,27 +0,0 @@ -import cv2 -import numpy as np -import pytest - -from ..validate import validate_file - - -@pytest.mark.parametrize("no_frames", [10, 0]) -def test_validate_movie(tmp_path, no_frames): - frame_size = (10, 10) - movie_loc = tmp_path / "movie.avi" - writer1 = cv2.VideoWriter( - filename=str(movie_loc), - apiPreference=None, - fourcc=cv2.VideoWriter_fourcc(*"DIVX"), - fps=25, - frameSize=frame_size, - params=None, - ) - for i in range(no_frames): - writer1.write(np.random.randint(0, 255, (*frame_size, 3)).astype("uint8")) - writer1.release() - errors = validate_file(movie_loc) - if no_frames == 0: - assert errors[0] == f"no frames in video file {movie_loc}" - else: - assert not errors From 49375d926913cee3ef40a8cb5e4765cd0677a344 Mon Sep 17 00:00:00 2001 From: Saksham Sharda Date: Tue, 1 Feb 2022 20:39:29 +0530 Subject: [PATCH 55/67] optimize imports in command line commands --- dandi/cli/cmd_organize.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dandi/cli/cmd_organize.py b/dandi/cli/cmd_organize.py index a329d8431..054b9b40a 100644 --- a/dandi/cli/cmd_organize.py +++ b/dandi/cli/cmd_organize.py @@ -5,8 +5,6 @@ from .base import dandiset_path_option, devel_debug_option, lgr, map_to_click_exceptions from ..consts import file_operation_modes -from ..organize import _create_external_file_names, organize_external_files -from ..pynwb_utils import rename_nwb_external_files @click.command() @@ -100,11 +98,13 @@ def organize( from ..dandiset import Dandiset from ..metadata import get_metadata from ..organize import ( + _create_external_file_names, create_unique_filenames_from_metadata, detect_link_type, filter_invalid_metadata_rows, + organize_external_files, ) - from ..pynwb_utils import ignore_benign_pynwb_warnings + from ..pynwb_utils import ignore_benign_pynwb_warnings, rename_nwb_external_files from ..utils import Parallel, copy_file, delayed, find_files, load_jsonl, move_file in_place = False # If we deduce that we are organizing in-place From 987787059357acc4f6792dcad52f7f845bfb492c Mon Sep 17 00:00:00 2001 From: Saksham Sharda Date: Wed, 2 Feb 2022 16:23:07 +0530 Subject: [PATCH 56/67] bug fix --- dandi/pynwb_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dandi/pynwb_utils.py b/dandi/pynwb_utils.py index 1161f4691..9f8fe7975 100644 --- a/dandi/pynwb_utils.py +++ b/dandi/pynwb_utils.py @@ -290,7 +290,7 @@ def rename_nwb_external_files(metadata: list, dandiset_path: str) -> None: """ for meta in metadata: if not np.all( - i in meta for i in ["path", "dandi_path", "external_file_objects"] + [i in meta for i in ["path", "dandi_path", "external_file_objects"]] ): lgr.warning( "could not rename external files, update metadata" From 525534d5a08dd43fa370c09e14229506265a77e7 Mon Sep 17 00:00:00 2001 From: Saksham Sharda Date: Thu, 3 Feb 2022 13:19:01 +0530 Subject: [PATCH 57/67] np all remove --- dandi/pynwb_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dandi/pynwb_utils.py b/dandi/pynwb_utils.py index 9f8fe7975..0c104017e 100644 --- a/dandi/pynwb_utils.py +++ b/dandi/pynwb_utils.py @@ -289,7 +289,7 @@ def rename_nwb_external_files(metadata: list, dandiset_path: str) -> None: base path of dandiset """ for meta in metadata: - if not np.all( + if not all( [i in meta for i in ["path", "dandi_path", "external_file_objects"]] ): lgr.warning( From 5ec96a472234cd0d5e7d6deb5d2cffc98884e578 Mon Sep 17 00:00:00 2001 From: Saksham Sharda Date: Thu, 3 Feb 2022 13:41:25 +0530 Subject: [PATCH 58/67] np all remove --- dandi/pynwb_utils.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/dandi/pynwb_utils.py b/dandi/pynwb_utils.py index 0c104017e..192a4d3a0 100644 --- a/dandi/pynwb_utils.py +++ b/dandi/pynwb_utils.py @@ -289,9 +289,7 @@ def rename_nwb_external_files(metadata: list, dandiset_path: str) -> None: base path of dandiset """ for meta in metadata: - if not all( - [i in meta for i in ["path", "dandi_path", "external_file_objects"]] - ): + if not all(i in meta for i in ["path", "dandi_path", "external_file_objects"]): lgr.warning( "could not rename external files, update metadata" 'with "path", "dandi_path", "external_file_objects"' From 4241edca5a9a434ab9d75a071b60c89af8f34bb5 Mon Sep 17 00:00:00 2001 From: Saksham Sharda Date: Sat, 5 Feb 2022 11:01:18 +0530 Subject: [PATCH 59/67] update click option: --update-external-file-paths and comments --- dandi/cli/cmd_organize.py | 36 +++++++++++++++++------------------- dandi/tests/test_organize.py | 2 +- 2 files changed, 18 insertions(+), 20 deletions(-) diff --git a/dandi/cli/cmd_organize.py b/dandi/cli/cmd_organize.py index 054b9b40a..7be76baee 100644 --- a/dandi/cli/cmd_organize.py +++ b/dandi/cli/cmd_organize.py @@ -35,20 +35,20 @@ show_default=True, ) @click.option( - "--modify-external-file-fields", + "--update-external-file-paths", is_flag=True, default=False, - help="using this options will rewrite the 'external_file' argument of the " - "ImageSeries in the nwb file. The value written will correspond to the" - " new location of the video files after being organized. This option will " - "work only if --files-mode is copy/move since the original nwb file is " - "modified", + help="Rewrite the 'external_file' arguments of ImageSeries in NWB files. " + "The new values will correspond to the new locations of the video files " + "after being organized. " + "This option requires --files-mode to be 'copy' or 'move'", ) @click.option( "--media-files-mode", type=click.Choice(["copy", "move", "symlink", "hardlink"]), default=None, - help="Supply one of copy/move/symlink/hardlink as the value", + help="This option works on the video files on disc while being organized " + "along side nwb files.", ) @click.argument("paths", nargs=-1, type=click.Path(exists=True)) @devel_debug_option() @@ -59,7 +59,7 @@ def organize( invalid="fail", files_mode="auto", devel_debug=False, - modify_external_file_fields=False, + update_external_file_paths=False, media_files_mode=None, ): """(Re)organize files according to the metadata. @@ -124,9 +124,9 @@ def act(func, *args, **kwargs): lgr.debug("%s %s %s", func.__name__, args, kwargs) return func(*args, **kwargs) - if modify_external_file_fields and files_mode not in ["copy", "move"]: + if update_external_file_paths and files_mode not in ["copy", "move"]: raise ValueError( - "files_mode needs to be one of 'copy/move' for the rewrite option to work" + "--files-mode needs to be one of 'copy/move' for the rewrite option to work" ) if dandiset_path is None: @@ -244,27 +244,25 @@ def _get_metadata(path): len(m["external_file_objects"]) == 0 for m in metadata ] - if all(external_files_missing_in_nwbfiles) and modify_external_file_fields: + if all(external_files_missing_in_nwbfiles) and update_external_file_paths: lgr.warning( - "modify_external_file_fields specified but no external_files found " + "--update-external-file-paths specified but no external_files found " "linked to any nwbfile found in %s", paths, ) - elif ( - not all(external_files_missing_in_nwbfiles) and not modify_external_file_fields - ): + elif not all(external_files_missing_in_nwbfiles) and not update_external_file_paths: raise ValueError( - "modify_external_file_fields option not specified but found " + "--update-external-file-paths option not specified but found " "external video files linked to the nwbfiles " f"""{[metadata[no]['path'] for no, a in enumerate(external_files_missing_in_nwbfiles) if not a]}""" ) - if modify_external_file_fields and media_files_mode is None: + if update_external_file_paths and media_files_mode is None: media_files_mode = "symlink" lgr.warning( - "media_files_mode not specified, setting to recommended mode: 'symlink' " + "--media-files-mode not specified, setting to recommended mode: 'symlink' " ) metadata = _create_external_file_names(metadata) @@ -373,7 +371,7 @@ def _get_metadata(path): lgr.debug("Failed to remove directory %s: %s", d, exc) # create video file name and re write nwb file external files: - if modify_external_file_fields: + if update_external_file_paths: rename_nwb_external_files(metadata, dandiset_path) organize_external_files(metadata, dandiset_path, media_files_mode) diff --git a/dandi/tests/test_organize.py b/dandi/tests/test_organize.py index 3e3ed2720..6be9660cb 100644 --- a/dandi/tests/test_organize.py +++ b/dandi/tests/test_organize.py @@ -266,7 +266,7 @@ def test_video_organize(video_mode, mode, create_video_nwbfiles): cmd = [ "--files-mode", mode, - "--modify-external-file-fields", + "--update-external-file-paths", "--media-files-mode", video_mode, "-d", From ba0b74ebcc568362281ea193806ce892fa5e747a Mon Sep 17 00:00:00 2001 From: Saksham Sharda <11567561+Saksham20@users.noreply.github.com> Date: Sat, 5 Feb 2022 11:04:52 +0530 Subject: [PATCH 60/67] raise exception using click.UsageError Co-authored-by: John T. Wodder II --- dandi/cli/cmd_organize.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dandi/cli/cmd_organize.py b/dandi/cli/cmd_organize.py index 7be76baee..a0195e40f 100644 --- a/dandi/cli/cmd_organize.py +++ b/dandi/cli/cmd_organize.py @@ -125,7 +125,7 @@ def act(func, *args, **kwargs): return func(*args, **kwargs) if update_external_file_paths and files_mode not in ["copy", "move"]: - raise ValueError( + raise click.UsageError( "--files-mode needs to be one of 'copy/move' for the rewrite option to work" ) @@ -251,7 +251,7 @@ def _get_metadata(path): paths, ) elif not all(external_files_missing_in_nwbfiles) and not update_external_file_paths: - raise ValueError( + raise click.UsageError( "--update-external-file-paths option not specified but found " "external video files linked to the nwbfiles " f"""{[metadata[no]['path'] From 9d95d1d7caa7adafbf06167ae61ebaff2d8daed8 Mon Sep 17 00:00:00 2001 From: Saksham Sharda <11567561+Saksham20@users.noreply.github.com> Date: Sat, 5 Feb 2022 11:07:17 +0530 Subject: [PATCH 61/67] explicit type hints for list of dicts Co-authored-by: John T. Wodder II --- dandi/organize.py | 4 ++-- dandi/pynwb_utils.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/dandi/organize.py b/dandi/organize.py index aec166e01..3817943d8 100644 --- a/dandi/organize.py +++ b/dandi/organize.py @@ -174,7 +174,7 @@ def create_unique_filenames_from_metadata(metadata): return metadata -def _create_external_file_names(metadata: list) -> List[dict]: +def _create_external_file_names(metadata: List[dict]) -> List[dict]: """ Renames the external_file attribute in an ImageSeries according to the rule: Example, the Initial name of file: @@ -212,7 +212,7 @@ def _create_external_file_names(metadata: list) -> List[dict]: def organize_external_files( - metadata: list, dandiset_path: str, files_mode: str + metadata: List[dict], dandiset_path: str, files_mode: str ) -> None: """ Organizes the external_files into the new Dandiset folder structure. diff --git a/dandi/pynwb_utils.py b/dandi/pynwb_utils.py index 192a4d3a0..25521053b 100644 --- a/dandi/pynwb_utils.py +++ b/dandi/pynwb_utils.py @@ -274,7 +274,7 @@ def _get_image_series(nwb: pynwb.NWBFile) -> List[dict]: return out -def rename_nwb_external_files(metadata: list, dandiset_path: str) -> None: +def rename_nwb_external_files(metadata: List[dict], dandiset_path: str) -> None: """ This method, renames the external_file attribute in an ImageSeries datatype in an open nwb file. It pulls information about the ImageSEries objects From 13b99276dc011c321ee26714ed5ffdeff03ef745 Mon Sep 17 00:00:00 2001 From: Saksham Sharda Date: Sat, 5 Feb 2022 11:18:25 +0530 Subject: [PATCH 62/67] code review suggestions --- dandi/organize.py | 6 +++--- dandi/pynwb_utils.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/dandi/organize.py b/dandi/organize.py index aec166e01..2edd689d9 100644 --- a/dandi/organize.py +++ b/dandi/organize.py @@ -177,6 +177,7 @@ def create_unique_filenames_from_metadata(metadata): def _create_external_file_names(metadata: list) -> List[dict]: """ Renames the external_file attribute in an ImageSeries according to the rule: + /_external_file_<.ext> Example, the Initial name of file: external_file = [name1.mp4] rename to: @@ -206,7 +207,7 @@ def _create_external_file_names(metadata: list) -> List[dict]: renamed = op.join( nwb_folder_name, f"{uuid_str}_external_file_{no}{ext_file.suffix}" ) - renamed_path_list.append(str(renamed)) + renamed_path_list.append(renamed) ext_file_dict["external_files_renamed"] = renamed_path_list return metadata @@ -237,8 +238,7 @@ def organize_external_files( ): new_path = op.join(dandiset_path, op.dirname(e["dandi_path"]), name_new) name_old_str = str(name_old) - if not op.exists(op.dirname(new_path)): - os.makedirs(op.dirname(new_path)) + os.makedirs(op.dirname(new_path), exist_ok=True) if files_mode == "symlink": os.symlink(name_old_str, new_path) elif files_mode == "hardlink": diff --git a/dandi/pynwb_utils.py b/dandi/pynwb_utils.py index 192a4d3a0..a96db9d5c 100644 --- a/dandi/pynwb_utils.py +++ b/dandi/pynwb_utils.py @@ -268,7 +268,7 @@ def _get_image_series(nwb: pynwb.NWBFile) -> List[dict]: lgr.warning( "external file %s should be one of: %s", ext_file, - VIDEO_FILE_EXTENSIONS, + ",".join(VIDEO_FILE_EXTENSIONS), ) out.append(out_dict) return out From a43c81a7aa990ddb66def6b9f93633c410b6910c Mon Sep 17 00:00:00 2001 From: Saksham Sharda Date: Sat, 5 Feb 2022 11:23:28 +0530 Subject: [PATCH 63/67] code review suggestions, remane fixture names to nouns --- dandi/tests/fixtures.py | 17 ++++++----------- dandi/tests/test_organize.py | 8 ++++---- 2 files changed, 10 insertions(+), 15 deletions(-) diff --git a/dandi/tests/fixtures.py b/dandi/tests/fixtures.py index 577bdc9eb..a2dcb2c7d 100644 --- a/dandi/tests/fixtures.py +++ b/dandi/tests/fixtures.py @@ -396,15 +396,15 @@ def zarr_dandiset(new_dandiset: SampleDandiset) -> SampleDandiset: @pytest.fixture() -def create_video_files(tmp_path): +def video_nwbfiles(tmp_path): video_paths = [] import cv2 video_path = tmp_path / "video_files" video_path.mkdir() - for no, ext in enumerate([".avi", ".avi"]): - movie_file1 = video_path / f"test1_{no}{ext}" - movie_file2 = video_path / f"test2_{no}{ext}" + for no in range(2): + movie_file1 = video_path / f"test1_{no}.avi" + movie_file2 = video_path / f"test2_{no}.avi" (nf, nx, ny) = (5, 10, 20) writer1 = cv2.VideoWriter( filename=str(movie_file1), @@ -428,16 +428,11 @@ def create_video_files(tmp_path): writer1.release() writer2.release() video_paths.append((movie_file1, movie_file2)) - return video_paths - - -@pytest.fixture() -def create_video_nwbfiles(create_video_files): - parent_folder = create_video_files[0][0].parent.parent + parent_folder = video_paths[0][0].parent.parent base_nwb_path = parent_folder / "nwbfiles" base_nwb_path.mkdir(parents=True, exist_ok=True) - for no, vid_loc in enumerate(create_video_files): + for no, vid_loc in enumerate(video_paths): vid_1 = vid_loc[0] vid_2 = vid_loc[1] subject_id = f"mouse{no}" diff --git a/dandi/tests/test_organize.py b/dandi/tests/test_organize.py index 6be9660cb..f23bac4d4 100644 --- a/dandi/tests/test_organize.py +++ b/dandi/tests/test_organize.py @@ -261,8 +261,8 @@ def error_link(src: Any, dest: Any) -> NoReturn: @pytest.mark.parametrize("mode", ["copy", "move"]) @pytest.mark.parametrize("video_mode", ["copy", "move", "symlink", "hardlink"]) -def test_video_organize(video_mode, mode, create_video_nwbfiles): - dandi_organize_path = create_video_nwbfiles.parent / "dandi_organized" +def test_video_organize(video_mode, mode, video_nwbfiles): + dandi_organize_path = video_nwbfiles.parent / "dandi_organized" cmd = [ "--files-mode", mode, @@ -271,9 +271,9 @@ def test_video_organize(video_mode, mode, create_video_nwbfiles): video_mode, "-d", str(dandi_organize_path), - str(create_video_nwbfiles), + str(video_nwbfiles), ] - video_files_list = list((create_video_nwbfiles.parent / "video_files").iterdir()) + video_files_list = list((video_nwbfiles.parent / "video_files").iterdir()) video_files_organized = [] r = CliRunner().invoke(organize, cmd) assert r.exit_code == 0 From af91844120a1e1ea005389e547d27d60a571a238 Mon Sep 17 00:00:00 2001 From: Saksham Sharda Date: Sat, 5 Feb 2022 11:45:52 +0530 Subject: [PATCH 64/67] fixture fix --- dandi/tests/fixtures.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/dandi/tests/fixtures.py b/dandi/tests/fixtures.py index a2dcb2c7d..7e9e6e50f 100644 --- a/dandi/tests/fixtures.py +++ b/dandi/tests/fixtures.py @@ -428,8 +428,7 @@ def video_nwbfiles(tmp_path): writer1.release() writer2.release() video_paths.append((movie_file1, movie_file2)) - parent_folder = video_paths[0][0].parent.parent - base_nwb_path = parent_folder / "nwbfiles" + base_nwb_path = tmp_path / "nwbfiles" base_nwb_path.mkdir(parents=True, exist_ok=True) for no, vid_loc in enumerate(video_paths): From 58c9de3e2e9b73be4469974247ed5bd6f3ea22ba Mon Sep 17 00:00:00 2001 From: Saksham Sharda <11567561+Saksham20@users.noreply.github.com> Date: Tue, 8 Feb 2022 13:39:35 +0530 Subject: [PATCH 65/67] Apply suggestions from code review Co-authored-by: Yaroslav Halchenko Co-authored-by: John T. Wodder II --- dandi/pynwb_utils.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/dandi/pynwb_utils.py b/dandi/pynwb_utils.py index 1c12cfdec..720393825 100644 --- a/dandi/pynwb_utils.py +++ b/dandi/pynwb_utils.py @@ -245,9 +245,11 @@ def _get_image_series(nwb: pynwb.NWBFile) -> List[dict]: Specifically it pulls out the ImageSeries uuid, name and all the externally linked files named under the argument 'external_file'. + Parameters ---------- nwb: pynwb.NWBFile + Returns ------- out: List[dict] @@ -268,7 +270,7 @@ def _get_image_series(nwb: pynwb.NWBFile) -> List[dict]: lgr.warning( "external file %s should be one of: %s", ext_file, - ",".join(VIDEO_FILE_EXTENSIONS), + ", ".join(VIDEO_FILE_EXTENSIONS), ) out.append(out_dict) return out @@ -291,7 +293,7 @@ def rename_nwb_external_files(metadata: List[dict], dandiset_path: str) -> None: for meta in metadata: if not all(i in meta for i in ["path", "dandi_path", "external_file_objects"]): lgr.warning( - "could not rename external files, update metadata" + 'could not rename external files, update metadata ' 'with "path", "dandi_path", "external_file_objects"' ) return From 4c894a84faa3e80c08b3a9dae1941c79068d07ad Mon Sep 17 00:00:00 2001 From: Saksham Sharda Date: Tue, 8 Feb 2022 13:47:42 +0530 Subject: [PATCH 66/67] PEP 257 --- dandi/organize.py | 11 ++++++----- dandi/pynwb_utils.py | 21 +++++++++------------ 2 files changed, 15 insertions(+), 17 deletions(-) diff --git a/dandi/organize.py b/dandi/organize.py index 528af4a80..95a8342a5 100644 --- a/dandi/organize.py +++ b/dandi/organize.py @@ -175,7 +175,8 @@ def create_unique_filenames_from_metadata(metadata): def _create_external_file_names(metadata: List[dict]) -> List[dict]: - """ + """Updates the metadata dict with renamed external files. + Renames the external_file attribute in an ImageSeries according to the rule: /_external_file_<.ext> Example, the Initial name of file: @@ -184,8 +185,9 @@ def _create_external_file_names(metadata: List[dict]) -> List[dict]: external_file = [dandiset-path-of-nwbfile/ dandi-renamed-nwbfile_name(folder without extension .nwb)/ f'{ImageSeries.object_id}_external_file_0.mp4' - This is stored in a new field in the - metadata['external_file_objects'][0]['external_files_renamed'] + This is stored in a new field in the metadata: + metadata['external_file_objects'][0]['external_files_renamed'] = + Parameters ---------- metadata: list @@ -215,8 +217,7 @@ def _create_external_file_names(metadata: List[dict]) -> List[dict]: def organize_external_files( metadata: List[dict], dandiset_path: str, files_mode: str ) -> None: - """ - Organizes the external_files into the new Dandiset folder structure. + """Organizes the external_files into the new Dandiset folder structure. Parameters ---------- diff --git a/dandi/pynwb_utils.py b/dandi/pynwb_utils.py index 720393825..b1efed435 100644 --- a/dandi/pynwb_utils.py +++ b/dandi/pynwb_utils.py @@ -239,13 +239,11 @@ def _get_pynwb_metadata(path: Union[str, Path]) -> Dict[str, Any]: def _get_image_series(nwb: pynwb.NWBFile) -> List[dict]: - """ - This method supports _get_pynwb_metadata() in retrieving all ImageSeries - related metadata from an open nwb file. + """Retrieves all ImageSeries related metadata from an open nwb file. + Specifically it pulls out the ImageSeries uuid, name and all the - externally linked files - named under the argument 'external_file'. - + externally linked files named under the argument 'external_file'. + Parameters ---------- nwb: pynwb.NWBFile @@ -277,11 +275,10 @@ def _get_image_series(nwb: pynwb.NWBFile) -> List[dict]: def rename_nwb_external_files(metadata: List[dict], dandiset_path: str) -> None: - """ - This method, renames the external_file attribute in an ImageSeries datatype in an open nwb file. - It pulls information about the ImageSEries objects - from metadata: metadata["external_file_objects"] - populated during _get_pynwb_metadata() call. + """Renames the external_file attribute in an ImageSeries datatype in an open nwb file. + + It pulls information about the ImageSeries objects from metadata: + metadata["external_file_objects"] populated during _get_pynwb_metadata() call. Parameters ---------- @@ -293,7 +290,7 @@ def rename_nwb_external_files(metadata: List[dict], dandiset_path: str) -> None: for meta in metadata: if not all(i in meta for i in ["path", "dandi_path", "external_file_objects"]): lgr.warning( - 'could not rename external files, update metadata ' + "could not rename external files, update metadata " 'with "path", "dandi_path", "external_file_objects"' ) return From d1a71e584e6be75c38b8e2dfbae93a5497c3a604 Mon Sep 17 00:00:00 2001 From: Saksham Sharda Date: Tue, 8 Feb 2022 13:52:03 +0530 Subject: [PATCH 67/67] stringifying list --- dandi/cli/cmd_organize.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/dandi/cli/cmd_organize.py b/dandi/cli/cmd_organize.py index a0195e40f..e6be46bf4 100644 --- a/dandi/cli/cmd_organize.py +++ b/dandi/cli/cmd_organize.py @@ -251,12 +251,15 @@ def _get_metadata(path): paths, ) elif not all(external_files_missing_in_nwbfiles) and not update_external_file_paths: + files_list = [ + metadata[no]["path"] + for no, a in enumerate(external_files_missing_in_nwbfiles) + if not a + ] raise click.UsageError( "--update-external-file-paths option not specified but found " "external video files linked to the nwbfiles " - f"""{[metadata[no]['path'] - for no, a in enumerate(external_files_missing_in_nwbfiles) - if not a]}""" + f"{', '.join(files_list)}" ) if update_external_file_paths and media_files_mode is None: