cvat-ai · nmanovic · Sep 2, 2020 · Aug 18, 2020 · Aug 18, 2020 · Aug 18, 2020
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Added password reset functionality (<https://github.com/opencv/cvat/pull/2058>)
 - Ability to work with data on the fly (https://github.com/opencv/cvat/pull/2007)
 - Annotation in process outline color wheel (<https://github.com/opencv/cvat/pull/2084>)
+- [Datumaro] CLI command for dataset equality comparison (<https://github.com/opencv/cvat/pull/1989>)
 
 ### Changed
 - UI models (like DEXTR) were redesigned to be more interactive (<https://github.com/opencv/cvat/pull/2054>)

@@ -4,25 +4,26 @@
 # SPDX-License-Identifier: MIT
 
 import argparse
-from enum import Enum
 import json
 import logging as log
 import os
 import os.path as osp
 import shutil
+from enum import Enum
 
-from datumaro.components.project import Project, Environment, \
- PROJECT_DEFAULT_CONFIG as DEFAULT_CONFIG
-from datumaro.components.comparator import Comparator
+from datumaro.components.cli_plugin import CliPlugin
 from datumaro.components.dataset_filter import DatasetItemEncoder
 from datumaro.components.extractor import AnnotationType
-from datumaro.components.cli_plugin import CliPlugin
-from datumaro.components.operations import \
- compute_image_statistics, compute_ann_statistics
+from datumaro.components.operations import (DistanceComparator,
+ ExactComparator, compute_ann_statistics, compute_image_statistics, mean_std)
+from datumaro.components.project import \
+ PROJECT_DEFAULT_CONFIG as DEFAULT_CONFIG
+from datumaro.components.project import Environment, Project
+
+from ...util import (CliException, MultilineFormatter, add_subparser,
+ make_file_name)
+from ...util.project import generate_next_file_name, load_project
 from .diff import DiffVisualizer
-from ...util import add_subparser, CliException, MultilineFormatter, \
- make_file_name
-from ...util.project import load_project, generate_next_file_name
 
 
 def build_create_parser(parser_ctor=argparse.ArgumentParser):
@@ -503,20 +504,20 @@ def merge_command(args):
 def build_diff_parser(parser_ctor=argparse.ArgumentParser):
  parser = parser_ctor(help="Compare projects",
  description="""
- Compares two projects.|n
+ Compares two projects, match annotations by distance.|n
  |n
  Examples:|n
- - Compare two projects, consider bboxes matching if their IoU > 0.7,|n
+ - Compare two projects, match boxes if IoU > 0.7,|n
  |s|s|s|sprint results to Tensorboard:
- |s|sdiff path/to/other/project -o diff/ -f tensorboard --iou-thresh 0.7
+ |s|sdiff path/to/other/project -o diff/ -v tensorboard --iou-thresh 0.7
  """,
  formatter_class=MultilineFormatter)
 
  parser.add_argument('other_project_dir',
  help="Directory of the second project to be compared")
  parser.add_argument('-o', '--output-dir', dest='dst_dir', default=None,
  help="Directory to save comparison results (default: do not save)")
- parser.add_argument('-f', '--format',
+ parser.add_argument('-v', '--visualizer',
  default=DiffVisualizer.DEFAULT_FORMAT,
  choices=[f.name for f in DiffVisualizer.Format],
  help="Output format (default: %(default)s)")
@@ -536,9 +537,7 @@ def diff_command(args):
  first_project = load_project(args.project_dir)
  second_project = load_project(args.other_project_dir)
 
- comparator = Comparator(
- iou_threshold=args.iou_thresh,
- conf_threshold=args.conf_thresh)
+ comparator = DistanceComparator(iou_threshold=args.iou_thresh)
 
  dst_dir = args.dst_dir
  if dst_dir:
@@ -556,7 +555,7 @@ def diff_command(args):
  dst_dir_existed = osp.exists(dst_dir)
  try:
  visualizer = DiffVisualizer(save_dir=dst_dir, comparator=comparator,
- output_format=args.format)
+ output_format=args.visualizer)
  visualizer.save_dataset_diff(
  first_project.make_dataset(),
  second_project.make_dataset())
@@ -567,6 +566,73 @@ def diff_command(args):
 
  return 0
 
+def build_ediff_parser(parser_ctor=argparse.ArgumentParser):
+ parser = parser_ctor(help="Compare projects for equality",
+ description="""
+ Compares two projects for equality.|n
+ |n
+ Examples:|n
+ - Compare two projects, exclude annotation group |n
+ |s|s|sand the 'is_crowd' attribute from comparison:|n
+ |s|sediff other/project/ -if group -ia is_crowd
+ """,
+ formatter_class=MultilineFormatter)
+
+ parser.add_argument('other_project_dir',
+ help="Directory of the second project to be compared")
+ parser.add_argument('-iia', '--ignore-item-attr', action='append',
+ help="Ignore item attribute (repeatable)")
+ parser.add_argument('-ia', '--ignore-attr', action='append',
+ help="Ignore annotation attribute (repeatable)")
+ parser.add_argument('-if', '--ignore-field',
+ action='append', default=['id', 'group'],
+ help="Ignore annotation field (repeatable, default: %(default)s)")
+ parser.add_argument('--match-images', action='store_true',
+ help='Match dataset items by images instead of ids')
+ parser.add_argument('--all', action='store_true',
+ help="Include matches in the output")
+ parser.add_argument('-p', '--project', dest='project_dir', default='.',
+ help="Directory of the first project to be compared (default: current dir)")
+ parser.set_defaults(command=ediff_command)
+
+ return parser
+
+def ediff_command(args):
+ first_project = load_project(args.project_dir)
+ second_project = load_project(args.other_project_dir)
+
+ comparator = ExactComparator(
+ match_images=args.match_images,
+ ignored_fields=args.ignore_field,
+ ignored_attrs=args.ignore_attr,
+ ignored_item_attrs=args.ignore_item_attr)
+ matches, mismatches, a_extra, b_extra, errors = \
+ comparator.compare_datasets(
+ first_project.make_dataset(), second_project.make_dataset())
+ output = {
+ "mismatches": mismatches,
+ "a_extra_items": sorted(a_extra),
+ "b_extra_items": sorted(b_extra),
+ "errors": errors,
+ }
+ if args.all:
+ output["matches"] = matches
+
+ output_file = generate_next_file_name('diff', ext='.json')
+ with open(output_file, 'w') as f:
+ json.dump(output, f, indent=4, sort_keys=True)
+
+ print("Found:")
+ print("The first project has %s unmatched items" % len(a_extra))
+ print("The second project has %s unmatched items" % len(b_extra))
+ print("%s item conflicts" % len(errors))
+ print("%s matching annotations" % len(matches))
+ print("%s mismatching annotations" % len(mismatches))
+
+ log.info("Output has been saved to '%s'" % output_file)
+
+ return 0
+
 def build_transform_parser(parser_ctor=argparse.ArgumentParser):
  builtins = sorted(Environment().transforms.items)
 
@@ -753,6 +819,7 @@ def build_parser(parser_ctor=argparse.ArgumentParser):
  add_subparser(subparsers, 'extract', build_extract_parser)
  add_subparser(subparsers, 'merge', build_merge_parser)
  add_subparser(subparsers, 'diff', build_diff_parser)
+ add_subparser(subparsers, 'ediff', build_ediff_parser)
  add_subparser(subparsers, 'transform', build_transform_parser)
  add_subparser(subparsers, 'info', build_info_parser)
  add_subparser(subparsers, 'stats', build_stats_parser)

@@ -217,7 +217,7 @@ def save_item_bbox_diff(self, item_a, item_b, diff):
  _, mispred, a_unmatched, b_unmatched = diff
 
  if 0 < len(a_unmatched) + len(b_unmatched) + len(mispred):
- img_a = item_a.image.copy()
+ img_a = item_a.image.data.copy()
  img_b = img_a.copy()
  for a_bbox, b_bbox in mispred:
  self.draw_bbox(img_a, a_bbox, (0, 255, 0))

@@ -46,7 +46,7 @@ def wrap(item, **kwargs):
 @attrs
 class Categories:
  attributes = attrib(factory=set, validator=default_if_none(set),
- kw_only=True)
+ kw_only=True, eq=False)
 
 @attrs
 class LabelCategories(Categories):
@@ -137,6 +137,8 @@ def inverse_colormap(self):
  def __eq__(self, other):
  if not super().__eq__(other):
  return False
+ if not isinstance(other, __class__):
+ return False
  for label_id, my_color in self.colormap.items():
  other_color = other.colormap.get(label_id)
  if not np.array_equal(my_color, other_color):
@@ -179,6 +181,8 @@ def paint(self, colormap):
  def __eq__(self, other):
  if not super().__eq__(other):
  return False
+ if not isinstance(other, __class__):
+ return False
  return \
  (self.label == other.label) and \
  (self.z_order == other.z_order) and \