feat(opendataset): add dataloader for VOC2012Detection dataset

PR Closed: #830
Graviti-AI · Jul 16, 2021 · 11c3a8f · 11c3a8f
1 parent 2dac499
commit 11c3a8f
Show file tree

Hide file tree

Showing 4 changed files with 149 additions and 0 deletions.
diff --git a/tensorbay/opendataset/VOC2012Detection/__init__.py b/tensorbay/opendataset/VOC2012Detection/__init__.py
@@ -0,0 +1,11 @@
+#!/usr/bin/env python3
+#
+# Copyright 2021 Graviti. Licensed under MIT License.
+#
+# pylint: disable=invalid-name
+
+"""Dataloader of VOC2012Detection."""
+
+from .loader import VOC2012Detection
+
+__all__ = ["VOC2012Detection"]
diff --git a/tensorbay/opendataset/VOC2012Detection/catalog.json b/tensorbay/opendataset/VOC2012Detection/catalog.json
@@ -0,0 +1,32 @@
+{
+    "BOX2D": {
+        "categories": [
+            { "name": "aeroplane" },
+            { "name": "bicycle" },
+            { "name": "bird" },
+            { "name": "boat" },
+            { "name": "bottle" },
+            { "name": "bus" },
+            { "name": "car" },
+            { "name": "cat" },
+            { "name": "chair" },
+            { "name": "cow" },
+            { "name": "diningtable" },
+            { "name": "dog" },
+            { "name": "horse" },
+            { "name": "motorbike" },
+            { "name": "person" },
+            { "name": "pottedplant" },
+            { "name": "sheep" },
+            { "name": "sofa" },
+            { "name": "train" },
+            { "name": "tvmonitor" }
+        ],
+        "attributes": [
+            { "name": "difficult", "type": "boolean" },
+            { "name": "occluded", "type": "boolean" },
+            { "name": "pose", "enum": ["Frontal", "Left", "Rear", "Right", "Unspecified"] },
+            { "name": "truncated", "type": "boolean" }
+        ]
+    }
+}
diff --git a/tensorbay/opendataset/VOC2012Detection/loader.py b/tensorbay/opendataset/VOC2012Detection/loader.py
@@ -0,0 +1,104 @@
+#!/usr/bin/env python3
+#
+# Copyright 2021 Graviti. Licensed under MIT License.
+#
+# pylint: disable=invalid-name, missing-module-docstring
+
+import os
+import typing
+from xml.etree import ElementTree
+
+from ...dataset import Data, Dataset
+from ...label import LabeledBox2D
+
+_SEGMENT_NAMES = (
+    "train",
+    "trainval",
+    "val",
+)
+_BOOLEAN_ATTRIBUTES = {"occluded", "pose", "truncated"}
+DATASET_NAME = "VOC2012Detection"
+
+
+def VOC2012Detection(path: str) -> Dataset:
+    """Dataloader of the 'VOC2012Detection'_ dataset.
+
+    .. _VOC2012Detection: http://host.robots.ox.ac.uk/pascal/VOC/voc2012/
+
+    The file structure should be like::
+
+        <path>
+            Annotations/
+                <image_name>.xml
+                ...
+            JPEGImages/
+                <image_name>.jpg
+                ...
+            ImageSets/
+                Main/
+                    train.txt
+                    trainval.txt
+                    val.txt
+                    ...
+                ...
+            ...
+
+    Arguments:
+        path: The root directory of the dataset.
+
+    Returns:
+        Loaded :class: `~tensorbay.dataset.dataset.Dataset` instance.
+
+    """
+    paths = {
+        "Annotations": os.path.join(path, "Annotations"),
+        "Image": os.path.join(path, "JPEGImages"),
+        "Main": os.path.join(path, "ImageSets", "Main"),
+    }
+
+    dataset = Dataset(DATASET_NAME)
+    dataset.load_catalog("catalog.json")
+
+    for segment_name in _SEGMENT_NAMES:
+        segment = dataset.create_segment(segment_name)
+        with open(os.path.join(paths["Main"], f"{segment_name}.txt")) as fp:
+            for filename in fp:
+                filename = filename.strip()
+                segment.append(_get_data(filename, paths))
+    return dataset
+
+
+def _get_data(filename: str, paths: typing.Dict[str, str]) -> Data:
+    """Get all information of the datum corresponding to filename.
+
+    Arguments:
+        filename: The filename of the data.
+        paths: The dictionary includes paths.
+
+    Returns:
+        Data: class: `~tensorbay.dataset.data.Data` instance.
+
+    """
+    data = Data(os.path.join(paths["Image"], f"{filename}.jpg"))
+    data.label.box2d = []
+    tree = ElementTree.parse(os.path.join(paths["Annotations"], f"{filename}.xml"))
+    for obj in tree.findall("object"):
+        attributes = {}
+        for child in obj:
+            if child.tag == "name":
+                category = child.text
+            elif child.tag == "bndbox":
+                box = (
+                    int(child.find("xmin").text),  # type:ignore[arg-type, union-attr]
+                    int(child.find("ymin").text),  # type:ignore[arg-type, union-attr]
+                    int(child.find("xmax").text),  # type:ignore[arg-type, union-attr]
+                    int(child.find("ymax").text),  # type:ignore[arg-type, union-attr]
+                )
+            elif child.tag == "pose":
+                attributes[child.tag] = child.text
+            elif child.tag in _BOOLEAN_ATTRIBUTES:
+                attributes[child.tag] = bool(
+                    int(child.text)  # type:ignore[assignment, arg-type]
+                )
+        data.label.box2d.append(LabeledBox2D(*box, category=category, attributes=attributes))
+    return data
diff --git a/tensorbay/opendataset/__init__.py b/tensorbay/opendataset/__init__.py
@@ -44,6 +44,7 @@
 from .THUCNews import THUCNews
 from .TLR import TLR
 from .UAVDT import UAVDT
+from .VOC2012Detection import VOC2012Detection
 from .WIDER_FACE import WIDER_FACE
 
 __all__ = [
@@ -91,4 +92,5 @@
     "UAVDT",
     "WIDER_FACE",
     "COVID_CT",
+    "VOC2012Detection",
 ]