Skip to content

Commit

Permalink
Add Ultralytics YOLO format (#859)
Browse files Browse the repository at this point in the history
* Refactor YoloExporter and Add YoloUltralyticsExporter
* Add YoloUltralyticsImporter and YoloUltralyticsBase
* Add 08_e2e_example_yolo_ultralytics_trainer.ipynb
* Add documentation

Signed-off-by: Kim, Vinnam <vinnam.kim@intel.com>
  • Loading branch information
vinnamkim authored Mar 17, 2023
1 parent f1b0f27 commit 6a739e3
Show file tree
Hide file tree
Showing 12 changed files with 1,257 additions and 44 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
(<https://github.com/openvinotoolkit/datumaro/pull/855>)
- Add YOLO Loose format
(<https://github.com/openvinotoolkit/datumaro/pull/856>)
- Add Ultralytics YOLO format
(<https://github.com/openvinotoolkit/datumaro/pull/859>)

### Changed
- Refactor Datumaro format code and test code
Expand All @@ -39,6 +41,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
(<https://github.com/openvinotoolkit/datumaro/pull/843>)
- Fix wrong `__len__()` of Subset when the item is removed
(<https://github.com/openvinotoolkit/datumaro/pull/854>)
- Fix mask visualization bug
(<https://github.com/openvinotoolkit/datumaro/pull/860>)

## 24/02/2023 - Release v1.0.0
### Added
Expand Down
39 changes: 35 additions & 4 deletions datumaro/plugins/data_formats/yolo/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
from collections import OrderedDict
from typing import Dict, List, Optional, Type, TypeVar, Union

import yaml

from datumaro.components.annotation import Annotation, AnnotationType, Bbox, LabelCategories
from datumaro.components.dataset_base import DatasetBase, DatasetItem, SubsetBase
from datumaro.components.errors import (
Expand All @@ -26,7 +28,7 @@
from datumaro.util.meta_file_util import has_meta_file, parse_meta_file
from datumaro.util.os_util import extract_subset_name_from_parent, find_files, split_path

from .format import YoloLoosePath, YoloPath
from .format import YoloLoosePath, YoloPath, YoloUltralyticsPath

T = TypeVar("T")

Expand Down Expand Up @@ -288,6 +290,8 @@ def get_subset(self, name):


class YoloLooseBase(SubsetBase):
META_FILE = YoloLoosePath.NAMES_FILE

def __init__(
self,
config_path: str,
Expand All @@ -306,9 +310,7 @@ def __init__(
self._image_info = YoloStrictBase.parse_image_info(rootpath, image_info)

# Init label categories
label_categories = YoloStrictBase._load_categories(
osp.join(rootpath, YoloLoosePath.NAMES_FILE)
)
label_categories = self._load_categories(osp.join(rootpath, self.META_FILE))
self._categories = {AnnotationType.label: label_categories}

# Parse dataset items
Expand All @@ -335,3 +337,32 @@ def _get_fname(fpath: str) -> str:
)
except Exception as e:
self._ctx.error_policy.report_item_error(e, item_id=(fname, self._subset))

def _load_categories(self, names_path: str) -> LabelCategories:
return YoloStrictBase._load_categories(names_path)


class YoloUltralyticsBase(YoloLooseBase):
META_FILE = YoloUltralyticsPath.META_FILE

def __init__(
self,
config_path: str,
image_info: Union[None, str, ImageMeta] = None,
urls: Optional[List[str]] = None,
**kwargs,
) -> None:
super().__init__(config_path, image_info, urls, **kwargs)

def _load_categories(self, names_path: str) -> LabelCategories:
if has_meta_file(osp.dirname(names_path)):
return LabelCategories.from_iterable(parse_meta_file(osp.dirname(names_path)).keys())

label_categories = LabelCategories()

with open(names_path, "r") as fp:
loaded = yaml.safe_load(fp.read())
for label_name in loaded["names"].values():
label_categories.add(label_name)

return label_categories
185 changes: 157 additions & 28 deletions datumaro/plugins/data_formats/yolo/exporter.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,17 @@
# Copyright (C) 2019-2022 Intel Corporation
#
# SPDX-License-Identifier: MIT

import logging as log
import os
import os.path as osp
from collections import OrderedDict
from collections import OrderedDict, defaultdict

import yaml

from datumaro.components.annotation import AnnotationType, Bbox
from datumaro.components.dataset import ItemStatus
from datumaro.components.dataset_base import DEFAULT_SUBSET_NAME, DatasetItem, IDataset
from datumaro.components.errors import DatasetExportError, MediaTypeError
from datumaro.components.errors import DatasetExportError, DatumaroError, MediaTypeError
from datumaro.components.exporter import Exporter
from datumaro.components.media import Image
from datumaro.util import str_to_bool
Expand Down Expand Up @@ -86,26 +87,14 @@ def apply(self):
image_paths = OrderedDict()
for item in pbar.iter(subset, desc=f"Exporting '{subset_name}'"):
try:
if not item.media or not (item.media.has_data or item.media.has_size):
raise Exception(
"Failed to export item '%s': " "item has no image info" % item.id
)

image_name = self._make_image_filename(item)
if self._save_media:
if item.media:
self._save_image(item, osp.join(subset_dir, image_name))
else:
log.warning("Item '%s' has no image" % item.id)
image_fpath = self._export_media(item, subset_dir)
image_name = osp.relpath(image_fpath, subset_dir)
image_paths[item.id] = osp.join(
self._prefix, osp.basename(subset_dir), image_name
)

yolo_annotation = self._export_item_annotation(item)
annotation_path = osp.join(subset_dir, "%s.txt" % item.id)
os.makedirs(osp.dirname(annotation_path), exist_ok=True)
with open(annotation_path, "w", encoding="utf-8") as f:
f.write(yolo_annotation)
self._export_item_annotation(item, subset_dir)

except Exception as e:
self._ctx.error_policy.report_item_error(e, item_id=(item.id, item.subset))

Expand All @@ -132,20 +121,160 @@ def apply(self):
f.write("names = %s\n" % osp.join(self._prefix, "obj.names"))
f.write("backup = backup/\n")

def _export_item_annotation(self, item):
height, width = item.media.size
def _export_media(self, item: DatasetItem, subset_img_dir: str) -> str:
try:
if not item.media or not (item.media.has_data or item.media.has_size):
raise Exception("Failed to export item '%s': " "item has no image info" % item.id)

image_name = self._make_image_filename(item)
image_fpath = osp.join(subset_img_dir, image_name)

if self._save_media:
self._save_image(item, image_fpath)

return image_fpath

except Exception as e:
self._ctx.error_policy.report_item_error(e, item_id=(item.id, item.subset))

def _export_item_annotation(self, item: DatasetItem, subset_dir: str) -> None:
try:
height, width = item.media.size

yolo_annotation = ""

for bbox in item.annotations:
if not isinstance(bbox, Bbox) or bbox.label is None:
continue

yolo_annotation = ""
yolo_bb = _make_yolo_bbox((width, height), bbox.points)
yolo_bb = " ".join("%.6f" % p for p in yolo_bb)
yolo_annotation += "%s %s\n" % (bbox.label, yolo_bb)

for bbox in item.annotations:
if not isinstance(bbox, Bbox) or bbox.label is None:
annotation_path = osp.join(subset_dir, "%s.txt" % item.id)
os.makedirs(osp.dirname(annotation_path), exist_ok=True)

with open(annotation_path, "w", encoding="utf-8") as f:
f.write(yolo_annotation)

except Exception as e:
self._ctx.error_policy.report_item_error(e, item_id=(item.id, item.subset))

@classmethod
def patch(cls, dataset, patch, save_dir, **kwargs):
conv = cls(dataset, save_dir=save_dir, **kwargs)
conv._patch = patch
conv.apply()

for (item_id, subset), status in patch.updated_items.items():
if status != ItemStatus.removed:
item = patch.data.get(item_id, subset)
else:
item = DatasetItem(item_id, subset=subset)

if not (status == ItemStatus.removed or not item.media):
continue

yolo_bb = _make_yolo_bbox((width, height), bbox.points)
yolo_bb = " ".join("%.6f" % p for p in yolo_bb)
yolo_annotation += "%s %s\n" % (bbox.label, yolo_bb)
if subset == DEFAULT_SUBSET_NAME:
subset = YoloPath.DEFAULT_SUBSET_NAME
subset_dir = osp.join(save_dir, "obj_%s_data" % subset)

image_path = osp.join(subset_dir, conv._make_image_filename(item))
if osp.isfile(image_path):
os.remove(image_path)

ann_path = osp.join(subset_dir, "%s.txt" % item.id)
if osp.isfile(ann_path):
os.remove(ann_path)


class YoloUltralyticsExporter(YoloExporter):
allowed_subset_names = {"train", "val", "test"}
must_subset_names = {"train", "val"}

def __init__(self, extractor: IDataset, save_dir: str, **kwargs) -> None:
super().__init__(extractor, save_dir, **kwargs)

if self._save_media is False:
log.warning(
"It is recommended to turn on `save_media=True` when export to `yolo_ultralytics` format. "
"If not, you will need to copy your image files and paste them into the appropriate directories."
)

def _check_dataset(self):
if self._extractor.media_type() and not issubclass(self._extractor.media_type(), Image):
raise MediaTypeError("Media type is not an image")

subset_names = set(self._extractor.subsets().keys())

for subset in subset_names:
if subset not in self.allowed_subset_names:
raise DatasetExportError(
f"The allowed subset name is in {self.allowed_subset_names}, "
f'so that subset "{subset}" is not allowed.'
)

for must_name in self.must_subset_names:
if must_name not in subset_names:
raise DatasetExportError(
f'Subset "{must_name}" is not in {subset_names}, '
"but YoloUltralytics requires both of them."
)

def apply(self):
extractor = self._extractor
save_dir = self._save_dir

os.makedirs(save_dir, exist_ok=True)

try:
self._check_dataset()
except DatumaroError as e:
self._ctx.error_policy.fail(e)

if self._save_dataset_meta:
self._save_meta_file(self._save_dir)

yaml_dict = {}

subsets = self._extractor.subsets()
pbars = self._ctx.progress_reporter.split(len(subsets))

image_fpaths = defaultdict(list)

for (subset_name, subset), pbar in zip(subsets.items(), pbars):
subset_fpath = osp.join(save_dir, subset_name + ".txt")

subset_img_dir = osp.join(save_dir, "images", subset_name)
os.makedirs(subset_img_dir, exist_ok=True)

subset_label_dir = osp.join(save_dir, "labels", subset_name)
os.makedirs(subset_label_dir, exist_ok=True)

yaml_dict[subset_name] = subset_fpath

for item in pbar.iter(subset, desc=f"Exporting '{subset_name}'"):
image_fpath = self._export_media(item, subset_img_dir)
self._export_item_annotation(item, subset_label_dir)

image_fpaths[subset_name].append(osp.relpath(image_fpath, save_dir))

for subset_name, img_fpath_list in image_fpaths.items():
subset_fname = subset_name + ".txt"
with open(osp.join(save_dir, subset_fname), "w") as fp:
# Prefix (os.curdir + os.sep) is required by Ultralytics
# Please see https://github.com/ultralytics/ultralytics/blob/30fc4b537ff1d9b115bc1558884f6bc2696a282c/ultralytics/yolo/data/utils.py#L40-L43
fp.writelines(
[os.curdir + os.sep + img_fpath + "\n" for img_fpath in img_fpath_list]
)
yaml_dict[subset_name] = subset_fname

label_categories = extractor.categories()[AnnotationType.label]
label_ids = {idx: label.name for idx, label in enumerate(label_categories.items)}
yaml_dict["names"] = label_ids

return yolo_annotation
with open(osp.join(save_dir, "data.yaml"), "w") as fp:
yaml.safe_dump(yaml_dict, fp, sort_keys=False, allow_unicode=True)

@classmethod
def patch(cls, dataset, patch, save_dir, **kwargs):
Expand Down
5 changes: 5 additions & 0 deletions datumaro/plugins/data_formats/yolo/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,11 @@ class YoloLoosePath:
NAMES_FILE = "obj.names"


class YoloUltralyticsPath:
META_FILE = "data.yaml"


class YoloFormatType(IntEnum):
yolo_strict = 0
yolo_loose = 1
yolo_ultralytics = 2
17 changes: 13 additions & 4 deletions datumaro/plugins/data_formats/yolo/importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from datumaro.components.importer import Importer
from datumaro.util.os_util import extract_subset_name_from_parent

from .format import YoloFormatType, YoloLoosePath
from .format import YoloFormatType, YoloLoosePath, YoloUltralyticsPath


class _YoloStrictImporter(Importer):
Expand All @@ -26,9 +26,12 @@ def find_sources(cls, path):


class _YoloLooseImporter(Importer):
META_FILE = YoloLoosePath.NAMES_FILE
FORMAT = YoloFormatType.yolo_loose.name

@classmethod
def detect(cls, context: FormatDetectionContext) -> FormatDetectionConfidence:
context.require_file(YoloLoosePath.NAMES_FILE)
context.require_file(cls.META_FILE)

with context.require_any():
with context.alternative():
Expand Down Expand Up @@ -94,7 +97,7 @@ def _filter_ann_file(fpath: str):
sources = [
{
"url": osp.join(path),
"format": YoloFormatType.yolo_loose.name,
"format": cls.FORMAT,
"options": {
"subset": subset,
"urls": urls,
Expand All @@ -107,7 +110,7 @@ def _filter_ann_file(fpath: str):
@classmethod
def find_sources(cls, path: str) -> List[Dict[str, Any]]:
# Check obj.names first
filename, ext = osp.splitext(YoloLoosePath.NAMES_FILE)
filename, ext = osp.splitext(cls.META_FILE)
sources = cls._find_sources_recursive(
path,
ext=ext,
Expand All @@ -133,10 +136,16 @@ def find_sources(cls, path: str) -> List[Dict[str, Any]]:
return []


class _YoloUltralyticsImporter(_YoloLooseImporter):
META_FILE = YoloUltralyticsPath.META_FILE
FORMAT = YoloFormatType.yolo_ultralytics.name


class YoloImporter(Importer):
SUB_IMPORTERS: Dict[YoloFormatType, Importer] = {
YoloFormatType.yolo_strict: _YoloStrictImporter,
YoloFormatType.yolo_loose: _YoloLooseImporter,
YoloFormatType.yolo_ultralytics: _YoloUltralyticsImporter,
}

@classmethod
Expand Down
Loading

0 comments on commit 6a739e3

Please sign in to comment.