Skip to content

Commit f15ff93

Browse files
authored
3482 Add schema verification for metadata (#3865)
* [DLMED] complete temp test schema Signed-off-by: Nic Ma <nma@nvidia.com> * [DLMED] add cmd line API Signed-off-by: Nic Ma <nma@nvidia.com> * [DLMED] fix typo Signed-off-by: Nic Ma <nma@nvidia.com> * [DLMED] add more tests Signed-off-by: Nic Ma <nma@nvidia.com> * [DLMED] add optional import Signed-off-by: Nic Ma <nma@nvidia.com> * [DLMED] test with download link Signed-off-by: Nic Ma <nma@nvidia.com> * [DLMED] enhance doc-string Signed-off-by: Nic Ma <nma@nvidia.com> * [DLMED] fix mypy Signed-off-by: Nic Ma <nma@nvidia.com> * [DLMED] remove DS_Store Signed-off-by: Nic Ma <nma@nvidia.com> * [DLMED] update according to comments Signed-off-by: Nic Ma <nma@nvidia.com> * [DLMED] update according to comments Signed-off-by: Nic Ma <nma@nvidia.com> * [DLMED] add more tests Signed-off-by: Nic Ma <nma@nvidia.com> * [DLMED] change "target_id" to "runner_id" Signed-off-by: Nic Ma <nma@nvidia.com> * [DLMED] simplify command line Signed-off-by: Nic Ma <nma@nvidia.com> * [DLMED] fix print summary logic Signed-off-by: Nic Ma <nma@nvidia.com> * [DLMED] update log print to logging Signed-off-by: Nic Ma <nma@nvidia.com> * [DLMED] compact commands Signed-off-by: Nic Ma <nma@nvidia.com> * [DLMED] unify args update Signed-off-by: Nic Ma <nma@nvidia.com> * [DLMED] update according to comments Signed-off-by: Nic Ma <nma@nvidia.com> * [DLMED] skip windows Signed-off-by: Nic Ma <nma@nvidia.com> * [DLMED] fix typo Signed-off-by: Nic Ma <nma@nvidia.com> * [DLMED] optimize error log Signed-off-by: Nic Ma <nma@nvidia.com> * [DLMED] update according to comments Signed-off-by: Nic Ma <nma@nvidia.com> * [DLMED] update accoding to comments Signed-off-by: Nic Ma <nma@nvidia.com>
1 parent 7587e71 commit f15ff93

16 files changed

+270
-38
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,7 @@ temp/
130130
tests/testing_data/MedNIST*
131131
tests/testing_data/*Hippocampus*
132132
tests/testing_data/*.tiff
133+
tests/testing_data/schema.json
133134

134135
# clang format tool
135136
.clang-format-bin/

docs/requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,3 +27,4 @@ imagecodecs; platform_system == "Linux"
2727
tifffile; platform_system == "Linux"
2828
pyyaml
2929
fire
30+
jsonschema

docs/source/bundle.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,3 +36,4 @@ Model Bundle
3636
`Scripts`
3737
---------
3838
.. autofunction:: run
39+
.. autofunction:: verify_metadata

docs/source/installation.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -190,9 +190,9 @@ Since MONAI v0.2.0, the extras syntax such as `pip install 'monai[nibabel]'` is
190190

191191
- The options are
192192
```
193-
[nibabel, skimage, pillow, tensorboard, gdown, ignite, torchvision, itk, tqdm, lmdb, psutil, cucim, openslide, pandas, einops, transformers, mlflow, matplotlib, tensorboardX, tifffile, imagecodecs, pyyaml, fire]
193+
[nibabel, skimage, pillow, tensorboard, gdown, ignite, torchvision, itk, tqdm, lmdb, psutil, cucim, openslide, pandas, einops, transformers, mlflow, matplotlib, tensorboardX, tifffile, imagecodecs, pyyaml, fire, jsonschema]
194194
```
195195
which correspond to `nibabel`, `scikit-image`, `pillow`, `tensorboard`,
196-
`gdown`, `pytorch-ignite`, `torchvision`, `itk`, `tqdm`, `lmdb`, `psutil`, `cucim`, `openslide-python`, `pandas`, `einops`, `transformers`, `mlflow`, `matplotlib`, `tensorboardX`, `tifffile`, `imagecodecs`, `pyyaml`, `fire`, respectively.
196+
`gdown`, `pytorch-ignite`, `torchvision`, `itk`, `tqdm`, `lmdb`, `psutil`, `cucim`, `openslide-python`, `pandas`, `einops`, `transformers`, `mlflow`, `matplotlib`, `tensorboardX`, `tifffile`, `imagecodecs`, `pyyaml`, `fire`, `jsonschema`, respectively.
197197

198198
- `pip install 'monai[all]'` installs all the optional dependencies.

environment-dev.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ dependencies:
4444
- tensorboardX
4545
- pyyaml
4646
- fire
47+
- jsonschema
4748
- pip
4849
- pip:
4950
# pip for itk as conda-forge version only up to v5.1

monai/bundle/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,5 +12,5 @@
1212
from .config_item import ComponentLocator, ConfigComponent, ConfigExpression, ConfigItem, Instantiable
1313
from .config_parser import ConfigParser
1414
from .reference_resolver import ReferenceResolver
15-
from .scripts import run
15+
from .scripts import run, verify_metadata
1616
from .utils import EXPR_KEY, ID_REF_KEY, ID_SEP_KEY, MACRO_KEY

monai/bundle/__main__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
# limitations under the License.
1111

1212

13-
from monai.bundle.scripts import run
13+
from monai.bundle.scripts import run, verify_metadata
1414

1515
if __name__ == "__main__":
1616
from monai.utils import optional_import

monai/bundle/scripts.py

Lines changed: 87 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -10,18 +10,28 @@
1010
# limitations under the License.
1111

1212
import pprint
13+
import re
1314
from typing import Dict, Optional, Sequence, Union
1415

16+
from monai.apps.utils import download_url, get_logger
1517
from monai.bundle.config_parser import ConfigParser
18+
from monai.config import PathLike
19+
from monai.utils import check_parent_dir, optional_import
1620

21+
validate, _ = optional_import("jsonschema", name="validate")
22+
ValidationError, _ = optional_import("jsonschema.exceptions", name="ValidationError")
1723

18-
def _update_default_args(args: Optional[Union[str, Dict]] = None, **kwargs) -> Dict:
24+
logger = get_logger(module_name=__name__)
25+
26+
27+
def _update_args(args: Optional[Union[str, Dict]] = None, ignore_none: bool = True, **kwargs) -> Dict:
1928
"""
2029
Update the `args` with the input `kwargs`.
2130
For dict data, recursively update the content based on the keys.
2231
2332
Args:
2433
args: source args to update.
34+
ignore_none: whether to ignore input args with None value, default to `True`.
2535
kwargs: destination args to update.
2636
2737
"""
@@ -32,14 +42,26 @@ def _update_default_args(args: Optional[Union[str, Dict]] = None, **kwargs) -> D
3242

3343
# recursively update the default args with new args
3444
for k, v in kwargs.items():
35-
args_[k] = _update_default_args(args_[k], **v) if isinstance(v, dict) and isinstance(args_.get(k), dict) else v
45+
if ignore_none and v is None:
46+
continue
47+
if isinstance(v, dict) and isinstance(args_.get(k), dict):
48+
args_[k] = _update_args(args_[k], ignore_none, **v)
49+
else:
50+
args_[k] = v
3651
return args_
3752

3853

54+
def _log_input_summary(tag: str, args: Dict):
55+
logger.info(f"\n--- input summary of monai.bundle.scripts.{tag} ---")
56+
for name, val in args.items():
57+
logger.info(f"> {name}: {pprint.pformat(val)}")
58+
logger.info("---\n\n")
59+
60+
3961
def run(
62+
runner_id: Optional[str] = None,
4063
meta_file: Optional[Union[str, Sequence[str]]] = None,
4164
config_file: Optional[Union[str, Sequence[str]]] = None,
42-
target_id: Optional[str] = None,
4365
args_file: Optional[str] = None,
4466
**override,
4567
):
@@ -51,62 +73,102 @@ def run(
5173
.. code-block:: bash
5274
5375
# Execute this module as a CLI entry:
54-
python -m monai.bundle run --meta_file <meta path> --config_file <config path> --target_id trainer
76+
python -m monai.bundle run trainer --meta_file <meta path> --config_file <config path>
5577
5678
# Override config values at runtime by specifying the component id and its new value:
57-
python -m monai.bundle run --net#input_chns 1 ...
79+
python -m monai.bundle run trainer --net#input_chns 1 ...
5880
5981
# Override config values with another config file `/path/to/another.json`:
60-
python -m monai.bundle run --net %/path/to/another.json ...
82+
python -m monai.bundle run evaluator --net %/path/to/another.json ...
6183
6284
# Override config values with part content of another config file:
63-
python -m monai.bundle run --net %/data/other.json#net_arg ...
85+
python -m monai.bundle run trainer --net %/data/other.json#net_arg ...
6486
6587
# Set default args of `run` in a JSON / YAML file, help to record and simplify the command line.
6688
# Other args still can override the default args at runtime:
6789
python -m monai.bundle run --args_file "/workspace/data/args.json" --config_file <config path>
6890
6991
Args:
92+
runner_id: ID name of the runner component or workflow, it must have a `run` method.
7093
meta_file: filepath of the metadata file, if `None`, must be provided in `args_file`.
7194
if it is a list of file paths, the content of them will be merged.
7295
config_file: filepath of the config file, if `None`, must be provided in `args_file`.
7396
if it is a list of file paths, the content of them will be merged.
74-
target_id: ID name of the target component or workflow, it must have a `run` method.
7597
args_file: a JSON or YAML file to provide default values for `meta_file`, `config_file`,
76-
`target_id` and override pairs. so that the command line inputs can be simplified.
98+
`runner_id` and override pairs. so that the command line inputs can be simplified.
7799
override: id-value pairs to override or add the corresponding config content.
78100
e.g. ``--net#input_chns 42``.
79101
80102
"""
81-
k_v = zip(["meta_file", "config_file", "target_id"], [meta_file, config_file, target_id])
82-
for k, v in k_v:
83-
if v is not None:
84-
override[k] = v
85-
86-
full_kv = zip(
87-
("meta_file", "config_file", "target_id", "args_file", "override"),
88-
(meta_file, config_file, target_id, args_file, override),
89-
)
90-
print("\n--- input summary of monai.bundle.scripts.run ---")
91-
for name, val in full_kv:
92-
print(f"> {name}: {pprint.pformat(val)}")
93-
print("---\n\n")
94103

95-
_args = _update_default_args(args=args_file, **override)
104+
_args = _update_args(args=args_file, runner_id=runner_id, meta_file=meta_file, config_file=config_file, **override)
96105
for k in ("meta_file", "config_file"):
97106
if k not in _args:
98107
raise ValueError(f"{k} is required for 'monai.bundle run'.\n{run.__doc__}")
108+
_log_input_summary(tag="run", args=_args)
99109

100110
parser = ConfigParser()
101111
parser.read_config(f=_args.pop("config_file"))
102112
parser.read_meta(f=_args.pop("meta_file"))
103-
id = _args.pop("target_id", "")
113+
id = _args.pop("runner_id", "")
104114

105-
# the rest key-values in the args are to override config content
115+
# the rest key-values in the _args are to override config content
106116
for k, v in _args.items():
107117
parser[k] = v
108118

109119
workflow = parser.get_parsed_content(id=id)
110120
if not hasattr(workflow, "run"):
111121
raise ValueError(f"The parsed workflow {type(workflow)} does not have a `run` method.\n{run.__doc__}")
112122
workflow.run()
123+
124+
125+
def verify_metadata(
126+
meta_file: Optional[Union[str, Sequence[str]]] = None,
127+
filepath: Optional[PathLike] = None,
128+
create_dir: Optional[bool] = None,
129+
hash_val: Optional[str] = None,
130+
args_file: Optional[str] = None,
131+
**kwargs,
132+
):
133+
"""
134+
Verify the provided `metadata` file based on the predefined `schema`.
135+
`metadata` content must contain the `schema` field for the URL of shcema file to download.
136+
The schema standard follows: http://json-schema.org/.
137+
138+
Args:
139+
meta_file: filepath of the metadata file to verify, if `None`, must be provided in `args_file`.
140+
if it is a list of file paths, the content of them will be merged.
141+
filepath: file path to store the downloaded schema.
142+
create_dir: whether to create directories if not existing, default to `True`.
143+
hash_val: if not None, define the hash value to verify the downloaded schema file.
144+
args_file: a JSON or YAML file to provide default values for all the args in this function.
145+
so that the command line inputs can be simplified.
146+
kwargs: other arguments for `jsonschema.validate()`. for more details:
147+
https://python-jsonschema.readthedocs.io/en/stable/validate/#jsonschema.validate.
148+
149+
"""
150+
151+
_args = _update_args(
152+
args=args_file, meta_file=meta_file, filepath=filepath, create_dir=create_dir, hash_val=hash_val, **kwargs
153+
)
154+
_log_input_summary(tag="verify_metadata", args=_args)
155+
156+
filepath_ = _args.pop("filepath")
157+
create_dir_ = _args.pop("create_dir", True)
158+
check_parent_dir(path=filepath_, create_dir=create_dir_)
159+
160+
metadata = ConfigParser.load_config_files(files=_args.pop("meta_file"))
161+
url = metadata.get("schema")
162+
if url is None:
163+
raise ValueError("must provide the `schema` field in the metadata for the URL of schema file.")
164+
download_url(url=url, filepath=filepath_, hash_val=_args.pop("hash_val", None), hash_type="md5", progress=True)
165+
schema = ConfigParser.load_config_file(filepath=filepath_)
166+
167+
try:
168+
# the rest key-values in the _args are for `validate` API
169+
validate(instance=metadata, schema=schema, **_args)
170+
except ValidationError as e:
171+
# as the error message is very long, only extract the key information
172+
logger.info(re.compile(r".*Failed validating", re.S).findall(str(e))[0] + f" against schema `{url}`.")
173+
return
174+
logger.info("metadata is verified with no error.")

monai/utils/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
from .misc import (
4242
MAX_SEED,
4343
ImageMetaKey,
44+
check_parent_dir,
4445
copy_to_device,
4546
ensure_tuple,
4647
ensure_tuple_rep,

monai/utils/misc.py

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@
5050
"is_module_ver_at_least",
5151
"has_option",
5252
"sample_slices",
53+
"check_parent_dir",
5354
"save_obj",
5455
]
5556

@@ -400,6 +401,25 @@ def sample_slices(data: NdarrayOrTensor, dim: int = 1, as_indices: bool = True,
400401
return data[tuple(slices)]
401402

402403

404+
def check_parent_dir(path: PathLike, create_dir: bool = True):
405+
"""
406+
Utility to check whether the parent directory of the `path` exists.
407+
408+
Args:
409+
path: input path to check the parent directory.
410+
create_dir: if True, when the parent directory doesn't exist, create the directory,
411+
otherwise, raise exception.
412+
413+
"""
414+
path = Path(path)
415+
path_dir = path.parent
416+
if not path_dir.exists():
417+
if create_dir:
418+
path_dir.mkdir(parents=True)
419+
else:
420+
raise ValueError(f"the directory of specified path does not exist: `{path_dir}`.")
421+
422+
403423
def save_obj(
404424
obj, path: PathLike, create_dir: bool = True, atomic: bool = True, func: Optional[Callable] = None, **kwargs
405425
):
@@ -421,12 +441,7 @@ def save_obj(
421441
422442
"""
423443
path = Path(path)
424-
path_dir = path.parent
425-
if not path_dir.exists():
426-
if create_dir:
427-
path_dir.mkdir(parents=True)
428-
else:
429-
raise ValueError(f"the directory of specified path is not existing: {path_dir}.")
444+
check_parent_dir(path=path, create_dir=create_dir)
430445
if path.exists():
431446
# remove the existing file
432447
os.remove(path)

requirements-dev.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,3 +46,4 @@ tensorboardX
4646
types-PyYAML
4747
pyyaml
4848
fire
49+
jsonschema

setup.cfg

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ all =
5252
tensorboardX
5353
pyyaml
5454
fire
55+
jsonschema
5556
nibabel =
5657
nibabel
5758
skimage =
@@ -98,6 +99,8 @@ pyyaml =
9899
pyyaml
99100
fire =
100101
fire
102+
jsonschema =
103+
jsonschema
101104

102105
[flake8]
103106
select = B,C,E,F,N,P,T4,W,B9

tests/min_tests.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,7 @@ def run_testsuit():
160160
"test_prepare_batch_default_dist",
161161
"test_parallel_execution_dist",
162162
"test_bundle_run",
163+
"test_bundle_verify_metadata",
163164
]
164165
assert sorted(exclude_cases) == sorted(set(exclude_cases)), f"Duplicated items in {exclude_cases}"
165166

tests/test_bundle_run.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -64,15 +64,14 @@ def test_shape(self, config_file, expected_shape):
6464
else:
6565
override = f"--network %{overridefile1}#move_net --dataset#_target_ %{overridefile2}"
6666
# test with `monai.bundle` as CLI entry directly
67-
cmd = "-m monai.bundle run --target_id evaluator"
68-
cmd += f" --postprocessing#transforms#2#output_postfix seg {override}"
67+
cmd = f"-m monai.bundle run evaluator --postprocessing#transforms#2#output_postfix seg {override}"
6968
la = [f"{sys.executable}"] + cmd.split(" ") + ["--meta_file", meta_file] + ["--config_file", config_file]
7069
ret = subprocess.check_call(la + ["--args_file", def_args_file])
7170
self.assertEqual(ret, 0)
7271
self.assertTupleEqual(saver(os.path.join(tempdir, "image", "image_seg.nii.gz")).shape, expected_shape)
7372

7473
# here test the script with `google fire` tool as CLI
75-
cmd = "-m fire monai.bundle.scripts run --target_id evaluator"
74+
cmd = "-m fire monai.bundle.scripts run --runner_id evaluator"
7675
cmd += f" --evaluator#amp False {override}"
7776
la = [f"{sys.executable}"] + cmd.split(" ") + ["--meta_file", meta_file] + ["--config_file", config_file]
7877
ret = subprocess.check_call(la)

0 commit comments

Comments
 (0)