Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Datumaro] Dataset annotations filter #1053

Merged
merged 6 commits into from
Jan 14, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 40 additions & 6 deletions datumaro/datumaro/cli/project/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

from datumaro.components.project import Project
from datumaro.components.comparator import Comparator
from datumaro.components.dataset_filter import DatasetItemEncoder
from .diff import DiffVisualizer
from ..util.project import make_project_path, load_project

Expand Down Expand Up @@ -131,7 +132,12 @@ def build_export_parser(parser):
"'/item[image/width < image/height]'; "
"extract images with large-area bboxes: "
"'/item[annotation/type=\"bbox\" and annotation/area>2000]'"
"filter out irrelevant annotations from items: "
"'/item/annotation[label = \"person\"]'"
)
parser.add_argument('-a', '--filter-annotations', action='store_true',
help="Filter annotations instead of dataset "
"items (default: %(default)s)")
parser.add_argument('-d', '--dest', dest='dst_dir', required=True,
help="Directory to save output")
parser.add_argument('-f', '--output-format', required=True,
Expand All @@ -158,10 +164,11 @@ def export_command(args):
dataset = project.make_dataset()

log.info("Exporting the project...")
dataset.export(
dataset.export_project(
save_dir=dst_dir,
output_format=args.output_format,
filter_expr=args.filter,
filter_annotations=args.filter_annotations,
cmdline_args=args.extra_args)
log.info("Project exported to '%s' as '%s'" % \
(dst_dir, args.output_format))
Expand All @@ -177,12 +184,21 @@ def build_docs_parser(parser):

def build_extract_parser(parser):
parser.add_argument('-e', '--filter', default=None,
help="Filter expression for dataset items. Examples: "
help="XML XPath filter expression for dataset items. Examples: "
"extract images with width < height: "
"'/item[image/width < image/height]'; "
"extract images with large-area bboxes: "
"'/item[annotation/type=\"bbox\" and annotation/area>2000]'"
"'/item[annotation/type=\"bbox\" and annotation/area>2000]' "
"filter out irrelevant annotations from items: "
"'/item/annotation[label = \"person\"]'"
)
parser.add_argument('-a', '--filter-annotations', action='store_true',
help="Filter annotations instead of dataset "
"items (default: %(default)s)")
parser.add_argument('--remove-empty', action='store_true',
help="Remove an item if there are no annotations left after filtration")
parser.add_argument('--dry-run', action='store_true',
help="Print XML representations to be filtered and exit")
parser.add_argument('-d', '--dest', dest='dst_dir', required=True,
help="Output directory")
parser.add_argument('-p', '--project', dest='project_dir', default='.',
Expand All @@ -193,9 +209,27 @@ def extract_command(args):
project = load_project(args.project_dir)

dst_dir = osp.abspath(args.dst_dir)
os.makedirs(dst_dir, exist_ok=False)
if not args.dry_run:
os.makedirs(dst_dir, exist_ok=False)

dataset = project.make_dataset()

kwargs = {}
if args.filter_annotations:
kwargs['remove_empty'] = args.remove_empty

if args.dry_run:
dataset = dataset.extract(filter_expr=args.filter,
filter_annotations=args.filter_annotations, **kwargs)
for item in dataset:
encoded_item = DatasetItemEncoder.encode(item, dataset.categories())
xml_item = DatasetItemEncoder.to_string(encoded_item)
print(xml_item)
return 0

dataset.extract_project(save_dir=dst_dir, filter_expr=args.filter,
filter_annotations=args.filter_annotations, **kwargs)

project.make_dataset().extract(filter_expr=args.filter, save_dir=dst_dir)
log.info("Subproject extracted to '%s'" % (dst_dir))

return 0
Expand Down Expand Up @@ -279,7 +313,7 @@ def transform_command(args):

dst_dir = osp.abspath(args.dst_dir)
os.makedirs(dst_dir, exist_ok=False)
project.make_dataset().transform(
project.make_dataset().apply_model(
save_dir=dst_dir,
model_name=args.model_name)

Expand Down
6 changes: 5 additions & 1 deletion datumaro/datumaro/cli/source/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,9 @@ def build_export_parser(parser):
"extract images with large-area bboxes: "
"'/item[annotation/type=\"bbox\" and annotation/area>2000]'"
)
parser.add_argument('-a', '--filter-annotations', action='store_true',
help="Filter annotations instead of dataset "
"items (default: %(default)s)")
parser.add_argument('-d', '--dest', dest='dst_dir', required=True,
help="Directory to save output")
parser.add_argument('-f', '--output-format', required=True,
Expand Down Expand Up @@ -215,10 +218,11 @@ def export_command(args):
dataset = source_project.make_dataset()

log.info("Exporting the project...")
dataset.export(
dataset.export_project(
save_dir=dst_dir,
output_format=args.output_format,
filter_expr=args.filter,
filter_annotations=args.filter_annotations,
cmdline_args=args.extra_args)
log.info("Source '%s' exported to '%s' as '%s'" % \
(args.name, dst_dir, args.output_format))
Expand Down
1 change: 0 additions & 1 deletion datumaro/datumaro/components/config_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,6 @@ def __init__(self, config=None):
.add('subsets', list) \
.add('sources', lambda: _DefaultConfig(
lambda v=None: Source(v))) \
.add('filter', str) \
\
.add('project_filename', str, internal=True) \
.add('project_dir', str, internal=True) \
Expand Down
2 changes: 1 addition & 1 deletion datumaro/datumaro/components/converters/voc.py
Original file line number Diff line number Diff line change
Expand Up @@ -462,7 +462,7 @@ def _make_label_id_map(self):
void_labels = [src_label for src_id, src_label in source_labels.items()
if src_label not in target_labels]
if void_labels:
log.warn("The following labels are remapped to background: %s" %
log.warning("The following labels are remapped to background: %s" %
', '.join(void_labels))

def map_id(src_id):
Expand Down
Loading