Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add YOLO Loose format #856

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
(<https://github.com/openvinotoolkit/datumaro/pull/842>)
- Add Ava action data format support
(<https://github.com/openvinotoolkit/datumaro/pull/847>)
- Add YOLO Loose format
(<https://github.com/openvinotoolkit/datumaro/pull/856>)

### Changed
- Refactor Datumaro format code and test code
Expand Down
3 changes: 1 addition & 2 deletions datumaro/components/dataset_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,7 @@
from datumaro.components.importer import ImportContext, NullImportContext
from datumaro.components.media import Image, MediaElement, PointCloud
from datumaro.util.attrs_util import default_if_none, not_empty

DEFAULT_SUBSET_NAME = "default"
from datumaro.util.definitions import DEFAULT_SUBSET_NAME

T = TypeVar("T", bound=MediaElement)

Expand Down
2 changes: 1 addition & 1 deletion datumaro/components/format_detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -298,7 +298,7 @@ def _require_files_iter(
@contextlib.contextmanager
def probe_text_file(
self, path: str, requirement_desc: str, is_binary_file: bool = False
) -> Iterator[Union[BufferedReader, TextIO]]:
) -> Union[BufferedReader, TextIO]:
"""
Returns a context manager that can be used to place a requirement on
the contents of the file referred to by `path`. To do so, you must
Expand Down
7 changes: 6 additions & 1 deletion datumaro/components/importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,7 @@ def _find_sources_recursive(
dirname: str = "",
file_filter: Optional[Callable[[str], bool]] = None,
max_depth: int = 3,
recursive: bool = False,
):
"""
Finds sources in the specified location, using the matching pattern
Expand All @@ -151,6 +152,8 @@ def _find_sources_recursive(
dirname: a glob pattern for filename prefixes
file_filter: a callable (abspath: str) -> bool, to filter paths found
max_depth: the maximum depth for recursive search.
recursive: If recursive is true, the pattern '**' will match any files and
zero or more directories and subdirectories.

Returns: a list of source configurations
(i.e. Extractor type names and c-tor parameters)
Expand All @@ -174,7 +177,9 @@ def _find_sources_recursive(
for d in range(max_depth + 1):
sources.extend(
{"url": p, "format": extractor_name}
for p in iglob(osp.join(path, *("*" * d), dirname, filename + ext))
for p in iglob(
osp.join(path, *("*" * d), dirname, filename + ext), recursive=recursive
)
if (callable(file_filter) and file_filter(p)) or (not callable(file_filter))
)
if sources:
Expand Down
3 changes: 1 addition & 2 deletions datumaro/components/media.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,9 @@
import numpy as np

from datumaro.components.errors import MediaShapeError
from datumaro.util.definitions import BboxIntCoords
from datumaro.util.image import _image_loading_errors, decode_image, lazy_image, save_image

BboxIntCoords = Tuple[int, int, int, int] # (x, y, w, h)


class MediaType(IntEnum):
NONE = 0
Expand Down
175 changes: 115 additions & 60 deletions datumaro/plugins/data_formats/yolo/base.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (C) 2019-2022 Intel Corporation
# Copyright (C) 2019-2023 Intel Corporation
#
# SPDX-License-Identifier: MIT

Expand All @@ -7,7 +7,7 @@
import os.path as osp
import re
from collections import OrderedDict
from typing import Dict, List, Optional, Tuple, Type, TypeVar, Union
from typing import Dict, List, Optional, Type, TypeVar, Union

from datumaro.components.annotation import Annotation, AnnotationType, Bbox, LabelCategories
from datumaro.components.dataset_base import DatasetBase, DatasetItem, SubsetBase
Expand All @@ -16,21 +16,24 @@
InvalidAnnotationError,
UndeclaredLabelError,
)
from datumaro.components.format_detection import FormatDetectionContext
from datumaro.components.importer import Importer
from datumaro.components.media import Image
from datumaro.util.image import DEFAULT_IMAGE_META_FILE_NAME, ImageMeta, load_image_meta_file
from datumaro.util.image import (
DEFAULT_IMAGE_META_FILE_NAME,
IMAGE_EXTENSIONS,
ImageMeta,
load_image_meta_file,
)
from datumaro.util.meta_file_util import has_meta_file, parse_meta_file
from datumaro.util.os_util import split_path
from datumaro.util.os_util import extract_subset_name_from_parent, find_files, split_path

from .format import YoloPath
from .format import YoloLoosePath, YoloPath

T = TypeVar("T")


class YoloBase(SubsetBase):
class Subset(DatasetBase):
def __init__(self, name: str, parent: YoloBase):
class YoloStrictBase(SubsetBase):
class _Subset(DatasetBase):
def __init__(self, name: str, parent: YoloStrictBase):
super().__init__()
self._name = name
self._parent = parent
Expand All @@ -54,23 +57,15 @@ def __init__(
image_info: Union[None, str, ImageMeta] = None,
**kwargs,
) -> None:
super().__init__(**kwargs)

if not osp.isfile(config_path):
raise DatasetImportError(f"Can't read dataset descriptor file '{config_path}'")

super().__init__(**kwargs)

rootpath = osp.dirname(config_path)
self._path = rootpath

assert image_info is None or isinstance(image_info, (str, dict))
if image_info is None:
image_info = osp.join(rootpath, DEFAULT_IMAGE_META_FILE_NAME)
if not osp.isfile(image_info):
image_info = {}
if isinstance(image_info, str):
image_info = load_image_meta_file(image_info)

self._image_info = image_info
self._image_info = self.parse_image_info(rootpath, image_info)

config = self._parse_config(config_path)

Expand All @@ -95,21 +90,33 @@ def __init__(
if not osp.isfile(list_path):
raise InvalidAnnotationError(f"Can't find '{subset_name}' subset list file")

subset = YoloBase.Subset(subset_name, self)
subset = self._Subset(subset_name, self)
with open(list_path, "r", encoding="utf-8") as f:
subset.items = OrderedDict(
(self.name_from_path(p), self.localize_path(p)) for p in f if p.strip()
)
subsets[subset_name] = subset

self._subsets: Dict[str, YoloBase.Subset] = subsets
self._subsets: Dict[str, self._Subset] = subsets

self._categories = {
AnnotationType.label: self._load_categories(
osp.join(self._path, self.localize_path(names_path))
)
}

@staticmethod
def parse_image_info(rootpath: str, image_info: Optional[Union[str, ImageMeta]] = None):
assert image_info is None or isinstance(image_info, (str, dict))
if image_info is None:
image_info = osp.join(rootpath, DEFAULT_IMAGE_META_FILE_NAME)
if not osp.isfile(image_info):
image_info = {}
if isinstance(image_info, str):
image_info = load_image_meta_file(image_info)

return image_info

@staticmethod
def _parse_config(path: str) -> Dict[str, str]:
with open(path, "r", encoding="utf-8") as f:
Expand Down Expand Up @@ -168,13 +175,19 @@ def _get(self, item_id: str, subset_name: str) -> Optional[DatasetItem]:

anno_path = osp.splitext(image.path)[0] + ".txt"
annotations = self._parse_annotations(
anno_path, image, item_id=(item_id, subset_name)
anno_path,
image,
label_categories=self._categories[AnnotationType.label],
)

item = DatasetItem(
id=item_id, subset=subset_name, media=image, annotations=annotations
)
subset.items[item_id] = item
except (UndeclaredLabelError, InvalidAnnotationError) as e:
self._ctx.error_policy.report_annotation_error(e, item_id=(item_id, subset_name))
subset.items.pop(item_id)
item = None
except Exception as e:
self._ctx.error_policy.report_item_error(e, item_id=(item_id, subset_name))
subset.items.pop(item_id)
Expand All @@ -191,8 +204,13 @@ def _parse_field(value: str, cls: Type[T], field_name: str) -> T:
f"Can't parse {field_name} from '{value}'. Expected {cls}"
) from e

@classmethod
def _parse_annotations(
self, anno_path: str, image: Image, *, item_id: Tuple[str, str]
cls,
anno_path: str,
image: Image,
*,
label_categories: LabelCategories,
) -> List[Annotation]:
lines = []
with open(anno_path, "r", encoding="utf-8") as f:
Expand All @@ -207,39 +225,36 @@ def _parse_annotations(
# Use image info as late as possible to avoid unnecessary image loading
if image.size is None:
raise DatasetImportError(
f"Can't find image info for '{self.localize_path(image.path)}'"
f"Can't find image info for '{cls.localize_path(image.path)}'"
)
image_height, image_width = image.size

for line in lines:
try:
parts = line.split()
if len(parts) != 5:
raise InvalidAnnotationError(
f"Unexpected field count {len(parts)} in the bbox description. "
"Expected 5 fields (label, xc, yc, w, h)."
)
label_id, xc, yc, w, h = parts

label_id = self._parse_field(label_id, int, "bbox label id")
if label_id not in self._categories[AnnotationType.label]:
raise UndeclaredLabelError(str(label_id))

w = self._parse_field(w, float, "bbox width")
h = self._parse_field(h, float, "bbox height")
x = self._parse_field(xc, float, "bbox center x") - w * 0.5
y = self._parse_field(yc, float, "bbox center y") - h * 0.5
annotations.append(
Bbox(
x * image_width,
y * image_height,
w * image_width,
h * image_height,
label=label_id,
)
parts = line.split()
if len(parts) != 5:
raise InvalidAnnotationError(
f"Unexpected field count {len(parts)} in the bbox description. "
"Expected 5 fields (label, xc, yc, w, h)."
)
except Exception as e:
self._ctx.error_policy.report_annotation_error(e, item_id=item_id)
label_id, xc, yc, w, h = parts

label_id = cls._parse_field(label_id, int, "bbox label id")
if label_id not in label_categories:
raise UndeclaredLabelError(str(label_id))

w = cls._parse_field(w, float, "bbox width")
h = cls._parse_field(h, float, "bbox height")
x = cls._parse_field(xc, float, "bbox center x") - w * 0.5
y = cls._parse_field(yc, float, "bbox center y") - h * 0.5
annotations.append(
Bbox(
x * image_width,
y * image_height,
w * image_width,
h * image_height,
label=label_id,
)
)

return annotations

Expand Down Expand Up @@ -272,11 +287,51 @@ def get_subset(self, name):
return self._subsets[name]


class YoloImporter(Importer):
@classmethod
def detect(cls, context: FormatDetectionContext) -> None:
context.require_file("obj.data")
class YoloLooseBase(SubsetBase):
def __init__(
self,
config_path: str,
image_info: Union[None, str, ImageMeta] = None,
urls: Optional[List[str]] = None,
**kwargs,
) -> None:
super().__init__(**kwargs)

@classmethod
def find_sources(cls, path):
return cls._find_sources_recursive(path, ".data", "yolo")
if not osp.isdir(config_path):
raise DatasetImportError(f"{config_path} should be a directory.")

rootpath = config_path
self._path = rootpath

self._image_info = YoloStrictBase.parse_image_info(rootpath, image_info)

# Init label categories
label_categories = YoloStrictBase._load_categories(
osp.join(rootpath, YoloLoosePath.NAMES_FILE)
)
self._categories = {AnnotationType.label: label_categories}

# Parse dataset items
def _get_fname(fpath: str) -> str:
return osp.splitext(osp.basename(fpath))[0]

img_files = {
_get_fname(img_file): img_file
for img_file in find_files(rootpath, IMAGE_EXTENSIONS, recursive=True, max_depth=2)
if extract_subset_name_from_parent(img_file, rootpath) == self._subset
}

for url in urls:
try:
fname = _get_fname(url)
img = Image(path=img_files[fname])
anns = YoloStrictBase._parse_annotations(
url,
img,
label_categories=label_categories,
)
self._items.append(
DatasetItem(id=fname, subset=self._subset, media=img, annotations=anns)
)
except Exception as e:
self._ctx.error_policy.report_item_error(e, item_id=(fname, self._subset))
11 changes: 11 additions & 0 deletions datumaro/plugins/data_formats/yolo/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,19 @@
#
# SPDX-License-Identifier: MIT

from enum import IntEnum


class YoloPath:
DEFAULT_SUBSET_NAME = "train"
SUBSET_NAMES = ["train", "valid"]
RESERVED_CONFIG_KEYS = ["backup", "classes", "names"]


class YoloLoosePath:
NAMES_FILE = "obj.names"


class YoloFormatType(IntEnum):
yolo_strict = 0
yolo_loose = 1
Loading