Skip to content

Commit

Permalink
feat(opendataset): add dataloader of UrbanObjectDetection dataset
Browse files Browse the repository at this point in the history
PR Closed: #1172
  • Loading branch information
wangyuqing0424 committed Dec 21, 2021
1 parent 27f0aa4 commit 6d0f6d9
Show file tree
Hide file tree
Showing 8 changed files with 180 additions and 50 deletions.
1 change: 1 addition & 0 deletions docs/source/reference/api/opendataset.rst
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ tensorbay.opendataset
THUCNews
TLR
UAVDT
UrbanObjectDetection
VOC2012ActionClassification
VOC2012Detection
VOC2012Segmentation
Expand Down
11 changes: 11 additions & 0 deletions tensorbay/opendataset/UrbanObjectDetection/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#!/usr/bin/env python3
#
# Copyright 2021 Graviti. Licensed under MIT License.
#
# pylint: disable=invalid-name

"""Dataloader of UrbanObjectDetection."""

from tensorbay.opendataset.UrbanObjectDetection.loader import UrbanObjectDetection

__all__ = ["UrbanObjectDetection"]
21 changes: 21 additions & 0 deletions tensorbay/opendataset/UrbanObjectDetection/catalog.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
{
"BOX2D": {
"attributes": [
{
"name": "pose",
"enum": ["Rear", "Left", "Right", "Frontal", "Unspecified", "none", null]
},
{ "name": "truncated", "type": "boolean" },
{ "name": "difficult", "type": "boolean" }
],
"categories": [
{ "name": "bicycle" },
{ "name": "bus" },
{ "name": "car" },
{ "name": "motorbike" },
{ "name": "person" },
{ "name": "trafficlight" },
{ "name": "trafficsignal" }
]
}
}
60 changes: 60 additions & 0 deletions tensorbay/opendataset/UrbanObjectDetection/loader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
#!/usr/bin/env python3
#
# Copyright 2021 Graviti. Licensed under MIT License.
#
# pylint: disable=invalid-name, missing-module-docstring

"""Dataloader of UrbanObjectDetection dataset."""

import os

from tensorbay.dataset import Dataset
from tensorbay.opendataset._utility import get_boolean_attributes, get_voc_detection_data

DATASET_NAME = "UrbanObjectDetection"
_SEGMENT_NAMES = ("train", "val", "test")


def UrbanObjectDetection(path: str) -> Dataset:
"""`UrbanObjectDetection <http://www.rovit.ua.es/dataset/traffic/>`_ dataset.
The file structure should be like::
<path>
Annotations/
<image_name>.xml
...
JPEGImages/
<image_name>.jpg
...
ImageSets/
train.txt
val.txt
test.txt
Arguments:
path: The root directory of the dataset.
Returns:
Loaded :class:`~tensorbay.dataset.dataset.Dataset` instance.
"""
root_path = os.path.abspath(os.path.expanduser(path))
annotation_path = os.path.join(root_path, "Annotations")
image_path = os.path.join(root_path, "JPEGImages")
image_set_path = os.path.join(root_path, "ImageSets")

dataset = Dataset(DATASET_NAME)
dataset.load_catalog(os.path.join(os.path.dirname(__file__), "catalog.json"))
boolean_attributes = get_boolean_attributes(dataset.catalog.box2d)

for segment_name in _SEGMENT_NAMES:
segment = dataset.create_segment(segment_name)
with open(os.path.join(image_set_path, f"{segment_name}.txt"), encoding="utf-8") as fp:
for stem in fp:
segment.append(
get_voc_detection_data(
stem.rstrip(), image_path, annotation_path, boolean_attributes
)
)
return dataset
58 changes: 9 additions & 49 deletions tensorbay/opendataset/VOC2012Detection/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,19 +7,12 @@
"""Dataloader of VOC2012Detection dataset."""

import os
from typing import Any

from tensorbay.dataset import Data, Dataset
from tensorbay.label import LabeledBox2D
from tensorbay.dataset import Dataset
from tensorbay.opendataset._utility import get_boolean_attributes, get_voc_detection_data

try:
import xmltodict
except ModuleNotFoundError:
from tensorbay.opendataset._utility.mocker import xmltodict # pylint:disable=ungrouped-imports

_SEGMENT_NAMES = ("train", "val")
_BOOLEAN_ATTRIBUTES = {"occluded", "difficult", "truncated"}
DATASET_NAME = "VOC2012Detection"
_SEGMENT_NAMES = ("train", "val")


def VOC2012Detection(path: str) -> Dataset:
Expand Down Expand Up @@ -56,48 +49,15 @@ def VOC2012Detection(path: str) -> Dataset:

dataset = Dataset(DATASET_NAME)
dataset.load_catalog(os.path.join(os.path.dirname(__file__), "catalog.json"))
boolean_attributes = get_boolean_attributes(dataset.catalog.box2d)

for segment_name in _SEGMENT_NAMES:
segment = dataset.create_segment(segment_name)
with open(os.path.join(main_path, f"{segment_name}.txt"), encoding="utf-8") as fp:
for stem in fp:
segment.append(_get_data(stem.rstrip(), image_path, annotation_path))
segment.append(
get_voc_detection_data(
stem.rstrip(), image_path, annotation_path, boolean_attributes
)
)
return dataset


def _get_data(stem: str, image_path: str, annotation_path: str) -> Data:
"""Get all information of the datum corresponding to filename.
Arguments:
stem: The stem of the data.
image_path: The path of the image directory.
annotation_path: The path of the annotation directory.
Returns:
Data: class: `~tensorbay.dataset.data.Data` instance.
"""
data = Data(os.path.join(image_path, f"{stem}.jpg"))
box2d = []
with open(os.path.join(annotation_path, f"{stem}.xml"), encoding="utf-8") as fp:
labels: Any = xmltodict.parse(fp.read())
objects = labels["annotation"]["object"]

if not isinstance(objects, list):
objects = [objects]
for obj in objects:
attributes = {attribute: bool(int(obj[attribute])) for attribute in _BOOLEAN_ATTRIBUTES}
attributes["pose"] = obj["pose"]
bndbox = obj["bndbox"]
box2d.append(
LabeledBox2D(
float(bndbox["xmin"]),
float(bndbox["ymin"]),
float(bndbox["xmax"]),
float(bndbox["ymax"]),
category=obj["name"],
attributes=attributes,
)
)
data.label.box2d = box2d
return data
2 changes: 2 additions & 0 deletions tensorbay/opendataset/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@
from tensorbay.opendataset.THUCNews import THUCNews
from tensorbay.opendataset.TLR import TLR
from tensorbay.opendataset.UAVDT import UAVDT
from tensorbay.opendataset.UrbanObjectDetection import UrbanObjectDetection
from tensorbay.opendataset.VOC2012ActionClassification import VOC2012ActionClassification
from tensorbay.opendataset.VOC2012Detection import VOC2012Detection
from tensorbay.opendataset.VOC2012Segmentation import VOC2012Segmentation
Expand Down Expand Up @@ -128,6 +129,7 @@
"THUCNews",
"TLR",
"UAVDT",
"UrbanObjectDetection",
"WIDER_FACE",
"COVID_CT",
"VOC2012Detection",
Expand Down
3 changes: 2 additions & 1 deletion tensorbay/opendataset/_utility/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,6 @@

from tensorbay.opendataset._utility.coco import coco
from tensorbay.opendataset._utility.glob import glob
from tensorbay.opendataset._utility.voc import get_boolean_attributes, get_voc_detection_data

__all__ = ["coco", "glob"]
__all__ = ["coco", "glob", "get_voc_detection_data", "get_boolean_attributes"]
74 changes: 74 additions & 0 deletions tensorbay/opendataset/_utility/voc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
#!/usr/bin/env python3
#
# Copyright 2021 Graviti. Licensed under MIT License.
#

"""Common methods for loading VOC formatted datasets."""

import os
from typing import Any, List

from tensorbay.dataset import Data
from tensorbay.label import Box2DSubcatalog, LabeledBox2D

try:
import xmltodict
except ModuleNotFoundError:
from tensorbay.opendataset._utility.mocker import xmltodict # pylint:disable=ungrouped-imports


def get_voc_detection_data(
stem: str, image_path: str, annotation_path: str, boolean_attributes: List[str]
) -> Data:
"""Get all information of the datum corresponding to voc-like label files.
Arguments:
stem: The filename without extension of the data.
image_path: The path of the image directory.
annotation_path: The path of the annotation directory.
boolean_attributes: The list of boolean attribute.
Returns:
Data: class:`~tensorbay.dataset.data.Data` instance.
"""
data = Data(os.path.join(image_path, f"{stem}.jpg"))
box2d = []
with open(os.path.join(annotation_path, f"{stem}.xml"), encoding="utf-8") as fp:
labels: Any = xmltodict.parse(fp.read())
objects = labels["annotation"]["object"]

if not isinstance(objects, list):
objects = [objects]
for obj in objects:
attributes = {attribute: bool(int(obj[attribute])) for attribute in boolean_attributes}
attributes["pose"] = obj["pose"]
bndbox = obj["bndbox"]
box2d.append(
LabeledBox2D(
float(bndbox["xmin"]),
float(bndbox["ymin"]),
float(bndbox["xmax"]),
float(bndbox["ymax"]),
category=obj["name"],
attributes=attributes,
)
)
data.label.box2d = box2d
return data


def get_boolean_attributes(box2d: Box2DSubcatalog) -> List[str]:
"""Get boolean attributes.
Arguments:
box2d: The Box2DSubcatalog.
Returns:
Iterable: The list of bo olean attribute.
"""
return [
attribute.name
for attribute in box2d.attributes
if getattr(attribute, "type", None) == "boolean"
]

0 comments on commit 6d0f6d9

Please sign in to comment.