Skip to content

Commit

Permalink
feat(opendataset): add dataloader for SVHN dataset
Browse files Browse the repository at this point in the history
  • Loading branch information
graczhual committed Aug 6, 2021
1 parent 279c26f commit 2dacc22
Show file tree
Hide file tree
Showing 4 changed files with 143 additions and 0 deletions.
11 changes: 11 additions & 0 deletions tensorbay/opendataset/SVHN/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#!/usr/bin/env python3
#
# Copyright 2021 Graviti. Licensed under MIT License.
#
# pylint: disable=invalid-name

"""Dataloader of SVHN."""

from .loader import SVHN

__all__ = ["SVHN"]
36 changes: 36 additions & 0 deletions tensorbay/opendataset/SVHN/catalog.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
{
"BOX2D": {
"categories": [
{
"name": "0"
},
{
"name": "1"
},
{
"name": "2"
},
{
"name": "3"
},
{
"name": "4"
},
{
"name": "5"
},
{
"name": "6"
},
{
"name": "7"
},
{
"name": "8"
},
{
"name": "9"
}
]
}
}
94 changes: 94 additions & 0 deletions tensorbay/opendataset/SVHN/loader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
#!/usr/bin/env python3
#
# Copyright 2021 Graviti. Licensed under MIT License.
#
# pylint: disable=invalid-name, missing-module-docstring

import os
from typing import Dict, List

from ...dataset import Data, Dataset
from ...exception import ModuleImportError
from ...label import LabeledBox2D

_SVHN_SEGMENTS = ("extra", "test", "train")

DATASET_NAME = "SVHN"


def SVHN(path: str) -> Dataset:
"""Dataloader of SVHN open dataset.
.. SVHN: http://ufldl.stanford.edu/housenumbers/
The file structure should be like::
<path>
Cropped/
extra_32x32.mat
test_32x32.mat
train_32x32.mat
FullNumbers/
extra/
116507.png
116508.png
...
digitStruct.mat
see_bboxes.m
test/
train/
Arguments:
path: The root directory of the dataset.
Returns:
Loaded :class: `~tensorbay.dataset.dataset.Dataset` instance.
"""
try:
from h5py import Dataset as h5Dataset
from h5py import File # pylint: disable=import-outside-toplevel
except ModuleNotFoundError as error:
raise ModuleImportError(error.name) from error # type: ignore[arg-type]

root_path = os.path.abspath(os.path.expanduser(path))
dataset = Dataset(DATASET_NAME)
dataset.load_catalog(os.path.join(os.path.dirname(__file__), "catalog.json"))

for segment_name in _SVHN_SEGMENTS:
segment = dataset.create_segment(f"FullNumbers-{segment_name}")
mat_file = File(os.path.join(root_path, "FullNumbers", segment_name, "digitStruct.mat"))
bbox = mat_file["digitStruct"]["bbox"]
name = mat_file["digitStruct"]["name"]
for index in iter(range(bbox.shape[0])):
segment.append(_get_fullnumbers_data(mat_file, name, bbox, index))
return dataset


def _get_fullnumbers_data(
mat_file: "File", name: "h5Dataset", bbox: "h5Dataset", index: int
) -> Data:
data = Data("".join([chr(v[0]) for v in mat_file[name[index][0]]]))
data.label.box2d = []
meta_data: Dict[str, List[float]] = {}
mat_bbox = mat_file[bbox[index][0]]
length = mat_bbox["label"].shape[0]
for _name, _obj in iter(mat_bbox.items()):
meta_data[_name] = []
if length == 1:
meta_data[_name].append(_obj[0][0])
else:
for i in iter(range(length)):
meta_data[_name].append(mat_file[_obj[i][0]][0][0])

for i in iter(range(length)):
data.label.box2d.append(
LabeledBox2D.from_xywh(
x=meta_data["left"][i],
y=meta_data["top"][i],
width=meta_data["width"][i],
height=meta_data["height"][i],
category=str(int(meta_data["label"][i])),
)
)
return data
2 changes: 2 additions & 0 deletions tensorbay/opendataset/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
from .NightOwls import NightOwls
from .nuScenes import nuScenes
from .RP2K import RP2K
from .SVHN import SVHN
from .THCHS30 import THCHS30
from .THUCNews import THUCNews
from .TLR import TLR
Expand Down Expand Up @@ -90,6 +91,7 @@
"Newsgroups20",
"NightOwls",
"RP2K",
"SVHN",
"THCHS30",
"THUCNews",
"TLR",
Expand Down

0 comments on commit 2dacc22

Please sign in to comment.