Skip to content

Commit

Permalink
Add progress reporters for Datumaro, COCO, VOC, YOLO (#1100)
Browse files Browse the repository at this point in the history
<!-- Contributing guide:
https://github.com/openvinotoolkit/datumaro/blob/develop/CONTRIBUTING.md
-->

### Summary

<!--
Resolves #111 and #222.
Depends on #1000 (for series of dependent commits).

This PR introduces this capability to make the project better in this
and that.

- Added this feature
- Removed that feature
- Fixed the problem #1234
-->

### How to test
<!-- Describe the testing procedure for reviewers, if changes are
not fully covered by unit tests or manual testing can be complicated.
-->

### Checklist
<!-- Put an 'x' in all the boxes that apply -->
- [ ] I have added unit tests to cover my changes.​
- [ ] I have added integration tests to cover my changes.​
- [x] I have added the description of my changes into
[CHANGELOG](https://github.com/openvinotoolkit/datumaro/blob/develop/CHANGELOG.md).​
- [ ] I have updated the
[documentation](https://github.com/openvinotoolkit/datumaro/tree/develop/docs)
accordingly

### License

- [ ] I submit _my code changes_ under the same [MIT
License](https://github.com/openvinotoolkit/datumaro/blob/develop/LICENSE)
that covers the project.
  Feel free to contact the maintainers if that's a concern.
- [ ] I have updated the license header for each file (see an example
below).

```python
# Copyright (C) 2023 Intel Corporation
#
# SPDX-License-Identifier: MIT
```
  • Loading branch information
wonjuleee authored Jul 18, 2023
1 parent 144489e commit 7d6e9f7
Show file tree
Hide file tree
Showing 7 changed files with 22 additions and 21 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Enhancements
- Give notice that the deprecation works will be done in datumaro==1.5.0
(<https://github.com/openvinotoolkit/datumaro/pull/1085>)
- Unify COCO, Datumaro, VOC, YOLO importer/exporter progress reporter descriptions
(<https://github.com/openvinotoolkit/datumaro/pull/1100>)

### Bug fixes
- Create cache dir under only writable filesystem
Expand Down
14 changes: 5 additions & 9 deletions src/datumaro/plugins/data_formats/coco/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,7 +266,7 @@ def _stream_items(self) -> Iterator[DatasetItem]:

for img_info, ann_infos in pbars.iter(
self._page_mapper,
desc=f"Parsing image info in '{osp.basename(self._path)}'",
desc=f"Importing '{self._subset}'",
):
parsed = self._parse_item(img_info)
if parsed is None:
Expand All @@ -289,7 +289,7 @@ def _parse_anns(self, img_info, ann_info, item):
self._load_panoptic_ann(ann_info, parsed_annotations=item.annotations)

def _load_items(self, json_data):
pbars = self._ctx.progress_reporter.split(2)
pbar = self._ctx.progress_reporter

def _gen_ann(info_lists):
while info_lists:
Expand All @@ -298,11 +298,7 @@ def _gen_ann(info_lists):
items = {}
img_infos = {}
img_lists = self._parse_field(json_data, "images", list)
for img_info in pbars[0].iter(
_gen_ann(img_lists),
desc=f"Parsing image info in '{osp.basename(self._path)}'",
total=len(img_lists),
):
for img_info in _gen_ann(img_lists):
parsed = self._parse_item(img_info)
if parsed is None:
continue
Expand All @@ -315,9 +311,9 @@ def _gen_ann(info_lists):

ann_lists = self._parse_field(json_data, "annotations", list)

for ann_info in pbars[1].iter(
for ann_info in pbar.iter(
_gen_ann(ann_lists),
desc=f"Parsing annotations in '{osp.basename(self._path)}'",
desc=f"Importing '{self._subset}'",
total=len(ann_lists),
):
try:
Expand Down
2 changes: 1 addition & 1 deletion src/datumaro/plugins/data_formats/coco/exporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -731,7 +731,7 @@ def _apply_impl(self):
if CocoTask.panoptic in task_converters:
self._make_segmentation_dir(subset_name)

for item in pbar.iter(subset, desc=f"Exporting {subset_name}"):
for item in pbar.iter(subset, desc=f"Exporting '{subset_name}'"):
try:
if self._save_media:
if item.media:
Expand Down
7 changes: 4 additions & 3 deletions src/datumaro/plugins/data_formats/datumaro/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,16 +111,17 @@ def _load_categories(parsed) -> Dict:
return categories

def _load_items(self, parsed) -> List:
items = []

item_descs = parsed["items"]
pbar = self._ctx.progress_reporter

def _gen():
while item_descs:
yield item_descs.pop()

for item_desc in pbar.iter(_gen(), total=len(item_descs)):
items = []
for item_desc in pbar.iter(
_gen(), desc=f"Importing '{self._subset}'", total=len(item_descs)
):
item = self._parse_item(item_desc)
items.append(item)

Expand Down
11 changes: 6 additions & 5 deletions src/datumaro/plugins/data_formats/datumaro/exporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
_Shape,
)
from datumaro.components.crypter import NULL_CRYPTER
from datumaro.components.dataset_base import DEFAULT_SUBSET_NAME, DatasetItem
from datumaro.components.dataset_base import DatasetItem
from datumaro.components.dataset_item_storage import ItemStatus
from datumaro.components.exporter import ExportContextComponent, Exporter
from datumaro.components.media import Image, MediaElement, PointCloud
Expand Down Expand Up @@ -419,11 +419,12 @@ def _apply_impl(self, pool: Optional[Pool] = None, *args, **kwargs):
writer.add_infos(self._extractor.infos())
writer.add_categories(self._extractor.categories())

for item in self._extractor:
subset = item.subset or DEFAULT_SUBSET_NAME
writers[subset].add_item(item, pool)
pbar = self._ctx.progress_reporter
for subset_name, subset in self._extractor.subsets().items():
for item in pbar.iter(subset, desc=f"Exporting '{subset_name}'"):
writers[subset_name].add_item(item, pool)

self._check_hash_key_existence(item)
self._check_hash_key_existence(item)

for subset, writer in writers.items():
if self._patch and subset in self._patch.updated_subsets and writer.is_empty():
Expand Down
2 changes: 1 addition & 1 deletion src/datumaro/plugins/data_formats/voc/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ def __iter__(self):
)

for item_id in self._ctx.progress_reporter.iter(
self._items, desc=f"Parsing boxes in '{self._subset}'"
self._items, desc=f"Importing '{self._subset}'"
):
log.debug("Reading item '%s'" % item_id)
size = None
Expand Down
5 changes: 3 additions & 2 deletions src/datumaro/plugins/data_formats/yolo/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -285,7 +285,7 @@ def __iter__(self) -> Iterator[DatasetItem]:
subsets = self._subsets
pbars = self._ctx.progress_reporter.split(len(subsets))
for pbar, (subset_name, subset) in zip(pbars, subsets.items()):
for item in pbar.iter(subset, desc=f"Parsing '{subset_name}'"):
for item in pbar.iter(subset, desc=f"Importing '{subset_name}'"):
yield item

def __len__(self):
Expand Down Expand Up @@ -332,7 +332,8 @@ def __iter__(self) -> Iterator[DatasetItem]:
if label_categories is None:
raise DatasetImportError("label_categories should be not None.")

for url in self._urls:
pbar = self._ctx.progress_reporter
for url in pbar.iter(self._urls, desc=f"Importing '{self._subset}'"):
try:
fname = self._get_fname(url)
img = Image.from_file(path=self._img_files[fname])
Expand Down

0 comments on commit 7d6e9f7

Please sign in to comment.