diff --git a/datumaro/cli/contexts/project/__init__.py b/datumaro/cli/contexts/project/__init__.py index bab5da6fb5c9..8e805e9da4ff 100644 --- a/datumaro/cli/contexts/project/__init__.py +++ b/datumaro/cli/contexts/project/__init__.py @@ -447,7 +447,8 @@ def filter_command(args): if not args.filter: raise CliException("Expected a filter expression ('-e' argument)") - dataset.filter_project(save_dir=dst_dir, expr=args.filter, **filter_args) + dataset.filter_project(save_dir=dst_dir, + filter_expr=args.filter, **filter_args) log.info("Subproject has been extracted to '%s'" % dst_dir) @@ -565,6 +566,8 @@ def diff_command(args): return 0 +_ediff_default_if = ['id', 'group'] # avoid https://bugs.python.org/issue16399 + def build_ediff_parser(parser_ctor=argparse.ArgumentParser): parser = parser_ctor(help="Compare projects for equality", description=""" @@ -583,9 +586,9 @@ def build_ediff_parser(parser_ctor=argparse.ArgumentParser): help="Ignore item attribute (repeatable)") parser.add_argument('-ia', '--ignore-attr', action='append', help="Ignore annotation attribute (repeatable)") - parser.add_argument('-if', '--ignore-field', - action='append', default=['id', 'group'], - help="Ignore annotation field (repeatable, default: %(default)s)") + parser.add_argument('-if', '--ignore-field', action='append', + help="Ignore annotation field (repeatable, default: %s)" % \ + _ediff_default_if) parser.add_argument('--match-images', action='store_true', help='Match dataset items by images instead of ids') parser.add_argument('--all', action='store_true', @@ -600,6 +603,8 @@ def ediff_command(args): first_project = load_project(args.project_dir) second_project = load_project(args.other_project_dir) + if args.ignore_field: + args.ignore_field = _ediff_default_if comparator = ExactComparator( match_images=args.match_images, ignored_fields=args.ignore_field, diff --git a/datumaro/plugins/image_dir.py b/datumaro/plugins/image_dir.py index 062387e10c96..c77fac39c99c 100644 --- a/datumaro/plugins/image_dir.py +++ b/datumaro/plugins/image_dir.py @@ -42,8 +42,8 @@ def __init__(self, url): for dirpath, _, filenames in os.walk(url): for name in filenames: path = osp.join(dirpath, name) + image = Image(path=path) try: - image = Image(path) # force loading image.data # pylint: disable=pointless-statement except Exception: diff --git a/datumaro/plugins/transforms.py b/datumaro/plugins/transforms.py index 7e7cea8badbb..10a1c50dfb51 100644 --- a/datumaro/plugins/transforms.py +++ b/datumaro/plugins/transforms.py @@ -308,6 +308,9 @@ class RandomSplit(Transform, CliPlugin): |s|s%(prog)s --subset train:.67 --subset test:.33 """ + # avoid https://bugs.python.org/issue16399 + _default_split = [('train', 0.67), ('test', 0.33)] + @staticmethod def _split_arg(s): parts = s.split(':') @@ -321,14 +324,17 @@ def build_cmdline_parser(cls, **kwargs): parser = super().build_cmdline_parser(**kwargs) parser.add_argument('-s', '--subset', action='append', type=cls._split_arg, dest='splits', - default=[('train', 0.67), ('test', 0.33)], - help="Subsets in the form of: ':' (repeatable)") + help="Subsets in the form: ':' " + "(repeatable, default: %s)" % dict(cls._default_split)) parser.add_argument('--seed', type=int, help="Random seed") return parser def __init__(self, extractor, splits, seed=None): super().__init__(extractor) + if splits is None: + splits = self._default_split + assert 0 < len(splits), "Expected at least one split" assert all(0.0 <= r and r <= 1.0 for _, r in splits), \ "Ratios are expected to be in the range [0; 1], but got %s" % splits diff --git a/datumaro/util/image.py b/datumaro/util/image.py index 626d8499769e..860c7f2d505f 100644 --- a/datumaro/util/image.py +++ b/datumaro/util/image.py @@ -41,6 +41,8 @@ def load_image(path): else: raise NotImplementedError() + if image is None: + raise ValueError("Can't open image '%s'" % path) assert len(image.shape) in {2, 3} if len(image.shape) == 3: assert image.shape[2] in {3, 4} @@ -206,6 +208,8 @@ def __init__(self, data=None, path=None, loader=None, cache=None, self._path = path assert data is not None or path or loader, "Image can not be empty" + if data is not None: + assert callable(data) or isinstance(data, np.ndarray), type(data) if data is None and (path or loader): if osp.isfile(path) or loader: data = lazy_image(path, loader=loader, cache=cache)