Skip to content

Commit

Permalink
Merge back 1.2.0rc2 (#942)
Browse files Browse the repository at this point in the history
  • Loading branch information
vinnamkim authored Apr 17, 2023
2 parents ed3cf05 + 08425db commit f57b37c
Show file tree
Hide file tree
Showing 53 changed files with 2,819 additions and 1,114 deletions.
34 changes: 34 additions & 0 deletions .github/workflows/docs_stable.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
name: Build Docs for releases

on:
workflow_dispatch: # run on request (no need for PR)
release:
types: [published]

jobs:
Build-Docs:
runs-on: ubuntu-20.04
permissions:
contents: write
steps:
- name: Checkout repository
uses: actions/checkout@v3
with:
fetch-depth: 0 # otherwise, you will failed to push refs to dest repo
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: "3.10"
- name: Install dependencies
run: python -m pip install tox
- name: Build-Docs
run: |
echo RELEASE_VERSION=${GITHUB_REF#refs/*/} >> $GITHUB_ENV
tox -e build-doc
# - name: Deploy
# uses: peaceiris/actions-gh-pages@v3
# with:
# github_token: ${{ secrets.GITHUB_TOKEN }}
# publish_dir: ./public
# destination_dir: ${{ env.RELEASE_VERSION }}
# force_orphan: true
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## \[Unreleased\]

## 14/04/2023 - Release 1.2.0rc1
## 17/04/2023 - Release 1.2.0rc2
### New features
- Add LossDynamicsAnalyzer for noisy label detection
(<https://github.com/openvinotoolkit/datumaro/pull/928>)
Expand Down
File renamed without changes.
168 changes: 89 additions & 79 deletions datumaro/cli/commands/merge.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,18 @@
# Copyright (C) 2020-2022 Intel Corporation
# Copyright (C) 2020-2023 Intel Corporation
#
# SPDX-License-Identifier: MIT

import argparse
import logging as log
import os
import os.path as osp
from collections import OrderedDict

from datumaro.components.dataset import DEFAULT_FORMAT
from datumaro.components.environment import Environment
from datumaro.components.errors import DatasetMergeError, DatasetQualityError, ProjectNotFoundError
from datumaro.components.errors import ProjectNotFoundError
from datumaro.components.hl_ops import HLOps
from datumaro.components.merge.intersect_merge import IntersectMerge
from datumaro.components.project import ProjectBuildTargets
from datumaro.util import dump_json_file
from datumaro.util.scope import scope_add, scoped

from ..util import MultilineFormatter, join_cli_args
Expand All @@ -23,18 +22,35 @@

def build_parser(parser_ctor=argparse.ArgumentParser):
parser = parser_ctor(
help="Merge few projects",
help="Merge few datasets or projects",
description="""
Merges multiple datasets into one and produces a new dataset.
The command can be useful if you have few annotations and wish
to merge them, taking into consideration potential overlaps and
conflicts. This command can try to find common ground by voting or
return a list of conflicts.|n
The command can be useful if you want to merge several datasets
which have homogeneous or heterogeneous label categories.
We provide three merge policies: 1) "exact" for homogenous datasets;
2) "union", or 3) "intersect" for heterogenous datasets. The default merge
policy is "union".
|n
|n
In simple cases, when dataset images do not intersect and new
labels are not added, the recommended way of merging is using the
"patch" command. It will offer better performance and provide the same
results.|n
labels are not added, we can use the "exact" merge policy for this situation.
|n
|n
On the other hand, when two datasets have different label categories or
have the same (id, subset) pairs in their dataset items. you have to use
the "union" or "intersect" merge.
|n
The "union" merge is more simple way to merge them.
It tries to obtain a union set from their label categories and attempts to merge
their dataset items. However, if there exist the same (id, subset) pairs, it appends
a suffix to each item's id to prevent the same (id, subset) pair in the merged dataset.
|n
The "intersect" policy is a more complicated way to merge heterogeneous datasets. If you want to merge
the annotations in the same (id, subset) pairs which coming from the different datasets
each other you wish to merge. You have to taking into consideration potential overlaps
and conflicts between the annotations. "intersect" merge policy can try to find common
ground by voting or return a list of conflicts.
|n
|n
This command has multiple forms:|n
1) %(prog)s <revpath>|n
Expand Down Expand Up @@ -64,12 +80,15 @@ def build_parser(parser_ctor=argparse.ArgumentParser):
options, which are passed after the '--' separator (see examples),
pass '-- -h' for more info. If not stated otherwise, by default
only annotations are exported; to include images pass
'--save-images' parameter.|n
'--save-media' parameter.|n
|n
Examples:|n
- Merge annotations from 3 (or more) annotators:|n
|s|s%(prog)s project1/ project2/ project3/|n
|n
- Merge datasets with varying merge policies:|n
|s|s%(prog)s project1/ project2/ -m <union|intersect|exact>|n
|n
- Check groups of the merged dataset for consistency:|n
|s|s|slook for groups consising of 'person', 'hand' 'head', 'foot'|n
|s|s%(prog)s project1/ project2/ -g 'person,hand?,head,foot?'|n
Expand All @@ -84,7 +103,7 @@ def build_parser(parser_ctor=argparse.ArgumentParser):
|s|s%(prog)s HEAD~2:source-2 path/to/dataset2:yolo
|n
- Merge datasets and save in different format:|n
|s|s%(prog)s -f voc dataset1/:yolo path2/:coco -- --save-images
|s|s%(prog)s -f voc dataset1/:yolo path2/:coco -- --save-media
""",
formatter_class=MultilineFormatter,
)
Expand All @@ -97,34 +116,11 @@ def _group(s):
) # workaround for -- eaten by positionals
parser.add_argument("targets", nargs="+", help="Target dataset revpaths (repeatable)")
parser.add_argument(
"-iou",
"--iou-thresh",
default=0.25,
type=float,
help="IoU match threshold for segments (default: %(default)s)",
)
parser.add_argument(
"-oconf",
"--output-conf-thresh",
default=0.0,
type=float,
help="Confidence threshold for output " "annotations (default: %(default)s)",
)
parser.add_argument(
"--quorum",
default=0,
type=int,
help="Minimum count for a label and attribute voting "
"results to be counted (default: %(default)s)",
)
parser.add_argument(
"-g",
"--groups",
action="append",
type=_group,
help="A comma-separated list of labels in "
"annotation groups to check. '?' postfix can be added to a label to "
"make it optional in the group (repeatable)",
"-m",
"--merge-policy",
default="union",
type=str,
help="Policy for how to merge datasets (default: %(default)s)",
)
parser.add_argument(
"-o",
Expand Down Expand Up @@ -152,6 +148,42 @@ def _group(s):
"Must be specified after the main command arguments and after "
"the '--' separator",
)

intersect_args = parser.add_argument_group(
title="intersect policy arguments",
description="These parameters are optional for the intersect policy only.",
)
intersect_args.add_argument(
"-iou",
"--iou-thresh",
default=0.25,
type=float,
help="IoU match threshold for segments (default: %(default)s)",
)
intersect_args.add_argument(
"-oconf",
"--output-conf-thresh",
default=0.0,
type=float,
help="Confidence threshold for output " "annotations (default: %(default)s)",
)
intersect_args.add_argument(
"--quorum",
default=0,
type=int,
help="Minimum count for a label and attribute voting "
"results to be counted (default: %(default)s)",
)
intersect_args.add_argument(
"-g",
"--groups",
action="append",
type=_group,
help="A comma-separated list of labels in "
"annotation groups to check. '?' postfix can be added to a label to "
"make it optional in the group (repeatable)",
)

parser.set_defaults(command=merge_command)

return parser
Expand Down Expand Up @@ -222,48 +254,26 @@ def merge_command(args):
except Exception as e:
raise CliException(str(e))

merger = IntersectMerge(
conf=IntersectMerge.Conf(
pairwise_dist=args.iou_thresh,
groups=args.groups or [],
output_conf_thresh=args.output_conf_thresh,
quorum=args.quorum,
)
report_path = osp.join(dst_dir, "merge_report.json")
options = (
{
"conf": IntersectMerge.Conf(
pairwise_dist=args.iou_thresh,
groups=args.groups or [],
output_conf_thresh=args.output_conf_thresh,
quorum=args.quorum,
)
}
if args.merge_policy == "intersect"
else {}
)
merged_dataset = HLOps.merge(
*source_datasets, merge_policy=args.merge_policy, report_path=report_path, **options
)
merged_dataset = merger.merge(source_datasets)

merged_dataset.export(save_dir=dst_dir, format=exporter, **export_args)

report_path = osp.join(dst_dir, "merge_report.json")
save_merge_report(merger, report_path)

log.info("Merge results have been saved to '%s'" % dst_dir)
log.info("Report has been saved to '%s'" % report_path)

return 0


def save_merge_report(merger, path):
item_errors = OrderedDict()
source_errors = OrderedDict()
all_errors = []

for e in merger.errors:
if isinstance(e, DatasetQualityError):
item_errors[str(e.item_id)] = item_errors.get(str(e.item_id), 0) + 1
elif isinstance(e, DatasetMergeError):
for s in e.sources:
source_errors[str(s)] = source_errors.get(s, 0) + 1
item_errors[str(e.item_id)] = item_errors.get(str(e.item_id), 0) + 1

all_errors.append(str(e))

errors = OrderedDict(
[
("Item errors", item_errors),
("Source errors", source_errors),
("All errors", all_errors),
]
)

dump_json_file(path, errors, indent=True)
2 changes: 1 addition & 1 deletion datumaro/cli/util/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ def split_local_revpath(revpath: str) -> Tuple[Revision, str]:
A local revpath is a path to a revision withing the current project.
The syntax is:
- [ <revision> : ] [ <target> ]
- [ <revision> : ] [ <target> ]
At least one part must be present.
Returns: (revision, build target)
Expand Down
5 changes: 5 additions & 0 deletions datumaro/components/abstracts/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# Copyright (C) 2023 Intel Corporation
#
# SPDX-License-Identifier: MIT

from .merger import *
37 changes: 37 additions & 0 deletions datumaro/components/abstracts/merger.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# Copyright (C) 2023 Intel Corporation
#
# SPDX-License-Identifier: MIT

from abc import ABC, abstractmethod
from typing import Dict, Optional, Sequence, Type

from datumaro.components.dataset_base import IDataset
from datumaro.components.dataset_item_storage import (
DatasetItemStorage,
DatasetItemStorageDatasetView,
)
from datumaro.components.media import MediaElement

__all__ = ["IMerger"]


class IMerger(ABC):
@abstractmethod
def merge_infos(self, sources: Sequence[IDataset]) -> Dict:
raise NotImplementedError

@abstractmethod
def merge_categories(self, sources: Sequence[IDataset]) -> Dict:
raise NotImplementedError

@abstractmethod
def merge_media_types(self, sources: Sequence[IDataset]) -> Optional[Type[MediaElement]]:
raise NotImplementedError

@abstractmethod
def merge(self, sources: Sequence[IDataset]) -> DatasetItemStorage:
raise NotImplementedError

@abstractmethod
def __call__(self, *datasets: IDataset) -> DatasetItemStorageDatasetView:
raise NotImplementedError
4 changes: 2 additions & 2 deletions datumaro/components/annotations/matcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import numpy as np
from attr import attrib, attrs

from datumaro.components.merge.base import Merger
from datumaro.components.abstracts import IMerger
from datumaro.util.annotation_util import (
OKS,
approximate_line,
Expand Down Expand Up @@ -97,7 +97,7 @@ def match_segments(

@attrs(kw_only=True)
class AnnotationMatcher:
_context: Optional[Merger] = attrib(default=None)
_context: Optional[IMerger] = attrib(default=None)

def match_annotations(self, sources):
raise NotImplementedError()
Expand Down
Loading

0 comments on commit f57b37c

Please sign in to comment.