Skip to content

Commit

Permalink
[Datumaro] Dataset annotations filter (cvat-ai#1053)
Browse files Browse the repository at this point in the history
* Fix deprecation message

* Update launcher interface

* Add dataset entity, anno filter, remove filter from project config, update transform

* Update project and source cli

* Fix help message

* Refactor tests
  • Loading branch information
zhiltsov-max authored and Chris Lee-Messer committed Mar 5, 2020
1 parent d38281f commit 30eeddb
Show file tree
Hide file tree
Showing 9 changed files with 514 additions and 264 deletions.
46 changes: 40 additions & 6 deletions datumaro/datumaro/cli/project/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

from datumaro.components.project import Project
from datumaro.components.comparator import Comparator
from datumaro.components.dataset_filter import DatasetItemEncoder
from .diff import DiffVisualizer
from ..util.project import make_project_path, load_project

Expand Down Expand Up @@ -131,7 +132,12 @@ def build_export_parser(parser):
"'/item[image/width < image/height]'; "
"extract images with large-area bboxes: "
"'/item[annotation/type=\"bbox\" and annotation/area>2000]'"
"filter out irrelevant annotations from items: "
"'/item/annotation[label = \"person\"]'"
)
parser.add_argument('-a', '--filter-annotations', action='store_true',
help="Filter annotations instead of dataset "
"items (default: %(default)s)")
parser.add_argument('-d', '--dest', dest='dst_dir', required=True,
help="Directory to save output")
parser.add_argument('-f', '--output-format', required=True,
Expand All @@ -158,10 +164,11 @@ def export_command(args):
dataset = project.make_dataset()

log.info("Exporting the project...")
dataset.export(
dataset.export_project(
save_dir=dst_dir,
output_format=args.output_format,
filter_expr=args.filter,
filter_annotations=args.filter_annotations,
cmdline_args=args.extra_args)
log.info("Project exported to '%s' as '%s'" % \
(dst_dir, args.output_format))
Expand All @@ -177,12 +184,21 @@ def build_docs_parser(parser):

def build_extract_parser(parser):
parser.add_argument('-e', '--filter', default=None,
help="Filter expression for dataset items. Examples: "
help="XML XPath filter expression for dataset items. Examples: "
"extract images with width < height: "
"'/item[image/width < image/height]'; "
"extract images with large-area bboxes: "
"'/item[annotation/type=\"bbox\" and annotation/area>2000]'"
"'/item[annotation/type=\"bbox\" and annotation/area>2000]' "
"filter out irrelevant annotations from items: "
"'/item/annotation[label = \"person\"]'"
)
parser.add_argument('-a', '--filter-annotations', action='store_true',
help="Filter annotations instead of dataset "
"items (default: %(default)s)")
parser.add_argument('--remove-empty', action='store_true',
help="Remove an item if there are no annotations left after filtration")
parser.add_argument('--dry-run', action='store_true',
help="Print XML representations to be filtered and exit")
parser.add_argument('-d', '--dest', dest='dst_dir', required=True,
help="Output directory")
parser.add_argument('-p', '--project', dest='project_dir', default='.',
Expand All @@ -193,9 +209,27 @@ def extract_command(args):
project = load_project(args.project_dir)

dst_dir = osp.abspath(args.dst_dir)
os.makedirs(dst_dir, exist_ok=False)
if not args.dry_run:
os.makedirs(dst_dir, exist_ok=False)

dataset = project.make_dataset()

kwargs = {}
if args.filter_annotations:
kwargs['remove_empty'] = args.remove_empty

if args.dry_run:
dataset = dataset.extract(filter_expr=args.filter,
filter_annotations=args.filter_annotations, **kwargs)
for item in dataset:
encoded_item = DatasetItemEncoder.encode(item, dataset.categories())
xml_item = DatasetItemEncoder.to_string(encoded_item)
print(xml_item)
return 0

dataset.extract_project(save_dir=dst_dir, filter_expr=args.filter,
filter_annotations=args.filter_annotations, **kwargs)

project.make_dataset().extract(filter_expr=args.filter, save_dir=dst_dir)
log.info("Subproject extracted to '%s'" % (dst_dir))

return 0
Expand Down Expand Up @@ -279,7 +313,7 @@ def transform_command(args):

dst_dir = osp.abspath(args.dst_dir)
os.makedirs(dst_dir, exist_ok=False)
project.make_dataset().transform(
project.make_dataset().apply_model(
save_dir=dst_dir,
model_name=args.model_name)

Expand Down
6 changes: 5 additions & 1 deletion datumaro/datumaro/cli/source/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,9 @@ def build_export_parser(parser):
"extract images with large-area bboxes: "
"'/item[annotation/type=\"bbox\" and annotation/area>2000]'"
)
parser.add_argument('-a', '--filter-annotations', action='store_true',
help="Filter annotations instead of dataset "
"items (default: %(default)s)")
parser.add_argument('-d', '--dest', dest='dst_dir', required=True,
help="Directory to save output")
parser.add_argument('-f', '--output-format', required=True,
Expand Down Expand Up @@ -215,10 +218,11 @@ def export_command(args):
dataset = source_project.make_dataset()

log.info("Exporting the project...")
dataset.export(
dataset.export_project(
save_dir=dst_dir,
output_format=args.output_format,
filter_expr=args.filter,
filter_annotations=args.filter_annotations,
cmdline_args=args.extra_args)
log.info("Source '%s' exported to '%s' as '%s'" % \
(args.name, dst_dir, args.output_format))
Expand Down
1 change: 0 additions & 1 deletion datumaro/datumaro/components/config_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,6 @@ def __init__(self, config=None):
.add('subsets', list) \
.add('sources', lambda: _DefaultConfig(
lambda v=None: Source(v))) \
.add('filter', str) \
\
.add('project_filename', str, internal=True) \
.add('project_dir', str, internal=True) \
Expand Down
2 changes: 1 addition & 1 deletion datumaro/datumaro/components/converters/voc.py
Original file line number Diff line number Diff line change
Expand Up @@ -462,7 +462,7 @@ def _make_label_id_map(self):
void_labels = [src_label for src_id, src_label in source_labels.items()
if src_label not in target_labels]
if void_labels:
log.warn("The following labels are remapped to background: %s" %
log.warning("The following labels are remapped to background: %s" %
', '.join(void_labels))

def map_id(src_id):
Expand Down
Loading

0 comments on commit 30eeddb

Please sign in to comment.