Skip to content

Commit

Permalink
[Datumaro] Diff with exact annotation matching (#1989)
Browse files Browse the repository at this point in the history
* Add exact diff command

* Update changelog

* fix

* fix merge

* Add image matching, add test

* Add point matching test

* linter

* Update CHANGELOG.md

Co-authored-by: Nikita Manovich <nikita.manovich@intel.com>
  • Loading branch information
zhiltsov-max and Nikita Manovich authored Sep 2, 2020
1 parent ae6ec40 commit 98c06a3
Show file tree
Hide file tree
Showing 9 changed files with 611 additions and 201 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Added password reset functionality (<https://github.com/opencv/cvat/pull/2058>)
- Ability to work with data on the fly (https://github.com/opencv/cvat/pull/2007)
- Annotation in process outline color wheel (<https://github.com/opencv/cvat/pull/2084>)
- [Datumaro] CLI command for dataset equality comparison (<https://github.com/opencv/cvat/pull/1989>)

### Changed
- UI models (like DEXTR) were redesigned to be more interactive (<https://github.com/opencv/cvat/pull/2054>)
Expand Down
103 changes: 85 additions & 18 deletions datumaro/datumaro/cli/contexts/project/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,25 +4,26 @@
# SPDX-License-Identifier: MIT

import argparse
from enum import Enum
import json
import logging as log
import os
import os.path as osp
import shutil
from enum import Enum

from datumaro.components.project import Project, Environment, \
PROJECT_DEFAULT_CONFIG as DEFAULT_CONFIG
from datumaro.components.comparator import Comparator
from datumaro.components.cli_plugin import CliPlugin
from datumaro.components.dataset_filter import DatasetItemEncoder
from datumaro.components.extractor import AnnotationType
from datumaro.components.cli_plugin import CliPlugin
from datumaro.components.operations import \
compute_image_statistics, compute_ann_statistics
from datumaro.components.operations import (DistanceComparator,
ExactComparator, compute_ann_statistics, compute_image_statistics, mean_std)
from datumaro.components.project import \
PROJECT_DEFAULT_CONFIG as DEFAULT_CONFIG
from datumaro.components.project import Environment, Project

from ...util import (CliException, MultilineFormatter, add_subparser,
make_file_name)
from ...util.project import generate_next_file_name, load_project
from .diff import DiffVisualizer
from ...util import add_subparser, CliException, MultilineFormatter, \
make_file_name
from ...util.project import load_project, generate_next_file_name


def build_create_parser(parser_ctor=argparse.ArgumentParser):
Expand Down Expand Up @@ -503,20 +504,20 @@ def merge_command(args):
def build_diff_parser(parser_ctor=argparse.ArgumentParser):
parser = parser_ctor(help="Compare projects",
description="""
Compares two projects.|n
Compares two projects, match annotations by distance.|n
|n
Examples:|n
- Compare two projects, consider bboxes matching if their IoU > 0.7,|n
- Compare two projects, match boxes if IoU > 0.7,|n
|s|s|s|sprint results to Tensorboard:
|s|sdiff path/to/other/project -o diff/ -f tensorboard --iou-thresh 0.7
|s|sdiff path/to/other/project -o diff/ -v tensorboard --iou-thresh 0.7
""",
formatter_class=MultilineFormatter)

parser.add_argument('other_project_dir',
help="Directory of the second project to be compared")
parser.add_argument('-o', '--output-dir', dest='dst_dir', default=None,
help="Directory to save comparison results (default: do not save)")
parser.add_argument('-f', '--format',
parser.add_argument('-v', '--visualizer',
default=DiffVisualizer.DEFAULT_FORMAT,
choices=[f.name for f in DiffVisualizer.Format],
help="Output format (default: %(default)s)")
Expand All @@ -536,9 +537,7 @@ def diff_command(args):
first_project = load_project(args.project_dir)
second_project = load_project(args.other_project_dir)

comparator = Comparator(
iou_threshold=args.iou_thresh,
conf_threshold=args.conf_thresh)
comparator = DistanceComparator(iou_threshold=args.iou_thresh)

dst_dir = args.dst_dir
if dst_dir:
Expand All @@ -556,7 +555,7 @@ def diff_command(args):
dst_dir_existed = osp.exists(dst_dir)
try:
visualizer = DiffVisualizer(save_dir=dst_dir, comparator=comparator,
output_format=args.format)
output_format=args.visualizer)
visualizer.save_dataset_diff(
first_project.make_dataset(),
second_project.make_dataset())
Expand All @@ -567,6 +566,73 @@ def diff_command(args):

return 0

def build_ediff_parser(parser_ctor=argparse.ArgumentParser):
parser = parser_ctor(help="Compare projects for equality",
description="""
Compares two projects for equality.|n
|n
Examples:|n
- Compare two projects, exclude annotation group |n
|s|s|sand the 'is_crowd' attribute from comparison:|n
|s|sediff other/project/ -if group -ia is_crowd
""",
formatter_class=MultilineFormatter)

parser.add_argument('other_project_dir',
help="Directory of the second project to be compared")
parser.add_argument('-iia', '--ignore-item-attr', action='append',
help="Ignore item attribute (repeatable)")
parser.add_argument('-ia', '--ignore-attr', action='append',
help="Ignore annotation attribute (repeatable)")
parser.add_argument('-if', '--ignore-field',
action='append', default=['id', 'group'],
help="Ignore annotation field (repeatable, default: %(default)s)")
parser.add_argument('--match-images', action='store_true',
help='Match dataset items by images instead of ids')
parser.add_argument('--all', action='store_true',
help="Include matches in the output")
parser.add_argument('-p', '--project', dest='project_dir', default='.',
help="Directory of the first project to be compared (default: current dir)")
parser.set_defaults(command=ediff_command)

return parser

def ediff_command(args):
first_project = load_project(args.project_dir)
second_project = load_project(args.other_project_dir)

comparator = ExactComparator(
match_images=args.match_images,
ignored_fields=args.ignore_field,
ignored_attrs=args.ignore_attr,
ignored_item_attrs=args.ignore_item_attr)
matches, mismatches, a_extra, b_extra, errors = \
comparator.compare_datasets(
first_project.make_dataset(), second_project.make_dataset())
output = {
"mismatches": mismatches,
"a_extra_items": sorted(a_extra),
"b_extra_items": sorted(b_extra),
"errors": errors,
}
if args.all:
output["matches"] = matches

output_file = generate_next_file_name('diff', ext='.json')
with open(output_file, 'w') as f:
json.dump(output, f, indent=4, sort_keys=True)

print("Found:")
print("The first project has %s unmatched items" % len(a_extra))
print("The second project has %s unmatched items" % len(b_extra))
print("%s item conflicts" % len(errors))
print("%s matching annotations" % len(matches))
print("%s mismatching annotations" % len(mismatches))

log.info("Output has been saved to '%s'" % output_file)

return 0

def build_transform_parser(parser_ctor=argparse.ArgumentParser):
builtins = sorted(Environment().transforms.items)

Expand Down Expand Up @@ -753,6 +819,7 @@ def build_parser(parser_ctor=argparse.ArgumentParser):
add_subparser(subparsers, 'extract', build_extract_parser)
add_subparser(subparsers, 'merge', build_merge_parser)
add_subparser(subparsers, 'diff', build_diff_parser)
add_subparser(subparsers, 'ediff', build_ediff_parser)
add_subparser(subparsers, 'transform', build_transform_parser)
add_subparser(subparsers, 'info', build_info_parser)
add_subparser(subparsers, 'stats', build_stats_parser)
Expand Down
2 changes: 1 addition & 1 deletion datumaro/datumaro/cli/contexts/project/diff.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,7 @@ def save_item_bbox_diff(self, item_a, item_b, diff):
_, mispred, a_unmatched, b_unmatched = diff

if 0 < len(a_unmatched) + len(b_unmatched) + len(mispred):
img_a = item_a.image.copy()
img_a = item_a.image.data.copy()
img_b = img_a.copy()
for a_bbox, b_bbox in mispred:
self.draw_bbox(img_a, a_bbox, (0, 255, 0))
Expand Down
113 changes: 0 additions & 113 deletions datumaro/datumaro/components/comparator.py

This file was deleted.

6 changes: 5 additions & 1 deletion datumaro/datumaro/components/extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def wrap(item, **kwargs):
@attrs
class Categories:
attributes = attrib(factory=set, validator=default_if_none(set),
kw_only=True)
kw_only=True, eq=False)

@attrs
class LabelCategories(Categories):
Expand Down Expand Up @@ -137,6 +137,8 @@ def inverse_colormap(self):
def __eq__(self, other):
if not super().__eq__(other):
return False
if not isinstance(other, __class__):
return False
for label_id, my_color in self.colormap.items():
other_color = other.colormap.get(label_id)
if not np.array_equal(my_color, other_color):
Expand Down Expand Up @@ -179,6 +181,8 @@ def paint(self, colormap):
def __eq__(self, other):
if not super().__eq__(other):
return False
if not isinstance(other, __class__):
return False
return \
(self.label == other.label) and \
(self.z_order == other.z_order) and \
Expand Down
Loading

0 comments on commit 98c06a3

Please sign in to comment.