diff --git a/.gitignore b/.gitignore index 5b21ae1..538d696 100644 --- a/.gitignore +++ b/.gitignore @@ -131,3 +131,7 @@ dmypy.json # IDE .idea/ .vscode/ + +# fixtures for annotation file conversion testing +testing_data_coco +testing_data_images \ No newline at end of file diff --git a/README.md b/README.md index 771288c..298d691 100644 --- a/README.md +++ b/README.md @@ -183,6 +183,96 @@ pbf.compute_area(coco_bbox, bbox_type="coco") # 12 pbf.compute_area(voc_bbox, bbox_type="voc") # 4 ``` +## Annotation file conversion +`pybboxes` now supports the conversion of annotation file(s) across different annotation formats. (yolo, voc and coco are currently supported) + +This is a 3 step process. + +### 1. Instantiate the Annotations class +```python +from pybboxes.annotations import Annotations + +anns = Annotations(annotation_type='yolo') +``` + +**Important** you have to explicitly declare `annotation_type` beforehand. post declaration, *you will be only able to load annotation in declared format* but you will be able to export to other annotation formats. + +### 2. Load the annotations file +After you have instantiated the `Annotations` class declaring `annotation_type`, you can now load the annotations using appropriate method of the `Annotations` class. + +#### 2.1 Load from yolo +```python +from pybboxes.annotations import Annotations + +anns = Annotations(annotation_type='yolo') + +anns.load_from_yolo(labels_dir='./labels', images_dir='./images', classes_file='./classes.txt') +``` + +As yolo normalizes the bounding box metadata, path to corresponding images directory must be provided (via `images_dir`) so that physical dimension of image data can be inferred. + +Also, path to `classes_file` (usually classes.txt) should be provided that lists all the class labels that is used for the annotation. Without this, `pybboxes` will fail to assign appropriate class labels when converting across different annotations format. + +#### 2.2 Load from voc +```python +from pybboxes.annotations import Annotations + +anns = Annotations(annotation_type='voc') + +anns.load_from_voc(labels_dir='./labels') +``` + +#### 2.3 Load from coco +```python +from pybboxes.annotations import Annotations + +anns = Annotations(annotation_type='coco') + +anns.load_from_coco(json_path='./validation.json') +``` + +### 3. Saving annotations to different format +#### 3.1 Saving annotations to yolo format +As every image data has its own corresponding annotation file in yolo format, you have to provide path to `export_dir` where all the annotation files will be written. + +```python +from pybboxes.annotations import Annotations + +anns = Annotations(annotation_type='coco') # just for the demonstration purpose + +anns.load_from_coco(json_path='./validation.json') # we could have loaded the annotation data from other format as well + +anns.save_as_yolo(export_dir='./labels') +``` +This will create all the required annotation files (in yolo format) in given directory. Additionally, it will also create `classes.txt` in the given folder which will list all the class labels used for the annotation. + +#### 3.2 Saving annotations to voc format +Just like yolo format, in voc format, every image data has also its own corresponding annotation file. So, you have to provide path to `export_dir` where all the annotation files will be written. + +```python +from pybboxes.annotations import Annotations + +anns = Annotations(annotation_type='coco') # just for the demonstration purpose + +anns.load_from_coco(json_path='./validation.json') # we could have loaded the annotation data from other format as well + +anns.save_as_voc(export_dir='./labels') +``` + + +#### 3.3 Saving annotations to coco format +To export annotations in coco format, you just have to provide name (or path) of the output file (in json format) via `export_file`. + +```python +from pybboxes.annotations import Annotations + +anns = Annotations(annotation_type='voc') # just for the demonstration purpose + +anns.load_from_voc(labels_dir='./labels') # we could have loaded the annotation data from other format as well + +anns.save_as_coco(export_file='./validation.json') +``` + ## Contributing ### Installation diff --git a/pybboxes/annotations/__init__.py b/pybboxes/annotations/__init__.py new file mode 100644 index 0000000..e77e7ba --- /dev/null +++ b/pybboxes/annotations/__init__.py @@ -0,0 +1,5 @@ +""" +https://medium.com/red-buffer/converting-a-custom-dataset-from-coco-format-to-yolo-format-6d98a4fd43fc +https://blog.roboflow.com/train-yolov7-instance-segmentation-on-custom-data/ +""" +from pybboxes.annotations.base import Annotations diff --git a/pybboxes/annotations/base.py b/pybboxes/annotations/base.py new file mode 100644 index 0000000..4c280f4 --- /dev/null +++ b/pybboxes/annotations/base.py @@ -0,0 +1,383 @@ +import json +import os +import xml.etree.ElementTree as ET +from dataclasses import dataclass +from typing import List + +from pycocotools.coco import COCO + +from pybboxes.boxes import BoundingBox +from pybboxes.utils.io import get_image_size + + +@dataclass +class Annotation: + # This is the format in which 'Annotations' class store the bounding box details internally + # Single instance can store the info of only one bounding box + # https://www.immersivelimit.com/tutorials/create-coco-annotations-from-scratch + box: BoundingBox + label_id: int + label_name: str = None + annotation_id: int = None + annotation_type: str = None + segmentations: List[int] = None + image_width: int = None + image_height: int = None + + +class Annotations: + def __init__(self, annotation_type: str): + """Initializes Annotations of defined format + + Parameters + ---------- + annotation_type : str + should be within (yolo, coco, voc, albumentations and fiftyone) + + Raises + ------ + ValueError + if annotation_type is not of supported type + """ + valid_types = ("yolo", "coco", "voc", "albumentations", "fiftyone") + if annotation_type not in valid_types: + raise ValueError(f"Annotation type should be one of: {valid_types}") + + self._annotation_type = annotation_type + self._class_names: List[str] = [] + self._objects: dict[str, List[Annotation]] = dict() + + @property + def names_mapping(self): + """lists all the classes in dictionary format + + Returns + ------- + dict + in {class_name:class_id, ...} format + """ + return {name: id_ for id_, name in enumerate(self._class_names)} + + # def __getitem__(self, subscript: Union[int, List[int], slice]) -> Union[Annotation, List[Annotation]]: + # if isinstance(subscript, list): + # return [self[i] for i in subscript] + # else: + # return self._objects[subscript] + + def label2id(self, name: str): + """returns class id for the given class name + + Parameters + ---------- + name : str + + Returns + ------- + int + """ + return self.names_mapping[name] + + def id2label(self, label_id: int): + """returns class name for the given class label + + Parameters + ---------- + label_id : int + + Returns + ------- + str + """ + return self._class_names[label_id] + + def load_from_albumentations(self): + raise NotImplementedError + + def load_from_fiftyone(self): + raise NotImplementedError + + def load_from_voc(self, labels_dir: str): + """ + initializes Annotations from xml annotations in pascal voc format + + Parameters + ---------- + labels_dir : str + provide path to directory that houses xml annotations in pascal voc format + """ + if self._annotation_type != "voc": + raise TypeError(f"this instance of Annotations can only process {self._annotation_type} annotation file(s)") + + if not os.path.exists(labels_dir): + raise FileNotFoundError(f"{labels_dir} doesn't exists") + + for filename in os.listdir(labels_dir): + if filename.endswith(".xml"): + tree = ET.parse(os.path.join(labels_dir, filename)) + root = tree.getroot() + + image_name = root.find("filename").text + size = root.find("size") + img_w = int(size.find("width").text) + img_h = int(size.find("height").text) + + for obj in root.findall("object"): + label_name = obj.find("name").text + if label_name not in self._class_names: + self._class_names.append(label_name) + label_id = self.label2id(label_name) + + bbox = obj.find("bndbox") + xmin = float(bbox.find("xmin").text) + ymin = float(bbox.find("ymin").text) + xmax = float(bbox.find("xmax").text) + ymax = float(bbox.find("ymax").text) + + bbox = BoundingBox.from_voc(xmin, ymin, xmax, ymax, image_size=(img_w, img_h)) + + annotatation = Annotation( + box=bbox, + label_id=label_id, + label_name=label_name, + annotation_type="voc", + image_width=img_w, + image_height=img_h, + ) + + if image_name in self._objects: + self._objects[image_name].append(annotatation) + else: + self._objects[image_name] = [annotatation] + + def load_from_coco(self, json_path: str): + """ + initializes Annotations from coco annotation file (json files) + + Parameters + ---------- + json_path : str + provide path to coco annotation file in json format + """ + if self._annotation_type != "coco": + raise TypeError(f"this instance of Annotations can only process {self._annotation_type} annotation file(s)") + + if not os.path.exists(json_path): + raise FileNotFoundError(f"{json_path} doesn't exists") + + coco = COCO(json_path) + + categories = coco.loadCats(coco.getCatIds()) + self._class_names = [category["name"] for category in categories] # we just need the names + + for ann_id in coco.getAnnIds(): + ann = coco.loadAnns(ann_id)[0] + img = coco.loadImgs(ann["image_id"])[0] + associated_img_filename = img["file_name"] + + bbox = BoundingBox.from_coco(*ann["bbox"], image_size=(img["width"], img["height"])) + + annotation = Annotation( + box=bbox, + label_id=ann["category_id"], + label_name=self.id2label(ann["category_id"]), + annotation_type="coco", + annotation_id=ann["id"], + image_width=img["width"], + image_height=img["height"], + ) + + # if the entry for this file already exists + if associated_img_filename in self._objects: + self._objects[associated_img_filename].append(annotation) + else: + self._objects[associated_img_filename] = [annotation] + + def load_from_yolo(self, labels_dir: str, images_dir: str, classes_file: str): + """load annoations in yolo format + + Parameters + ---------- + labels_dir : str + immediate parent directory that houses all the image annoatations + images_dir : str + immediate parent directory that houses all the images (we need corresponding images to labels to extract image dimensions) + classes_file : str + path to classes.txt that lists all the class labels used in the annotation + """ + + if self._annotation_type != "yolo": + raise TypeError(f"this instance of Annotations can only process {self._annotation_type} annotation file(s)") + + if not os.path.exists(classes_file): + raise FileNotFoundError(f"{classes_file} doesn't exist") + + if not os.path.exists(labels_dir): + raise NotADirectoryError(f"{labels_dir} is not a valid directory") + + if not os.path.exists(images_dir): + raise NotADirectoryError(f"{images_dir} is not a valid directory") + + with open(classes_file, "r") as f: + self._class_names = [line.strip() for line in f.readlines()] + + for filename in os.listdir(labels_dir): + if filename.endswith(".txt"): + if "classes" in filename: + continue # if this is classes label, we have to skip as it donot contains bounding boxes data + + image_name = filename.replace(".txt", ".jpg") # we are assuming jpg extension + if not os.path.exists(os.path.join(images_dir, image_name)): + image_name = filename.replace(".jpg", ".jpeg") # see if image with jpeg extension exits + if not os.path.exists(image_name): + raise FileNotFoundError(f"{image_name} not found in images directory") + + image_size = get_image_size(os.path.join(images_dir, image_name)) # we need for yolo format + + with open(os.path.join(labels_dir, filename), "r") as f: + for line in f: + parts = line.strip().split() + label_id = int(parts[0]) # extract the class/label id + x_c, y_c, w, h = map(float, parts[1:5]) + + bbox = BoundingBox.from_yolo(x_c, y_c, w, h, image_size=image_size) + + annotation = Annotation( + box=bbox, + label_id=label_id, + label_name=self.id2label(label_id), + annotation_type="yolo", + image_width=image_size[0], + image_height=image_size[1], + ) + + if image_name not in self._objects.keys(): + self._objects[image_name] = [annotation] + else: + self._objects[image_name].append(annotation) + + def save_as_yolo(self, export_dir: str): + """writes loaded annotations in yolo format + + Parameters + ---------- + export_dir : str + path to directory where all the annotation files should be written + + this will write annotation files for all the corresponding images and also 'classes.txt' that defines all the class + used for the annotation + """ + os.makedirs(export_dir, exist_ok=True) + + # write class file + with open(os.path.join(export_dir, "classes.txt"), "w") as f: + for cls in self._class_names: + f.write(f"{cls}\n") + + for image_name in self._objects.keys(): + + filename = f"{os.path.splitext(image_name)[0]}.txt" + filepath = os.path.join(export_dir, filename) + + with open(filepath, mode="a") as f: + for annotation in self._objects[image_name]: + yolo_box = annotation.box.to_yolo().raw_values + yolo_box = [f"{x:.4f}" for x in yolo_box] + yolo_box.insert(0, str(annotation.label_id)) # append class/label id at the beginning + f.write(f"{' '.join(yolo_box)}\n") + + def save_as_voc(self, export_dir: str, n_channels: int = 3): + """writes loaded annotations in voc format + + Parameters + ---------- + export_dir : str + path to directory where all the annotation files should be written + """ + os.makedirs(export_dir, exist_ok=True) + for image_name in self._objects.keys(): + filename = os.path.splitext(image_name)[0] + ".xml" + filepath = os.path.join(export_dir, filename) + + root = ET.Element("annotation") + ET.SubElement(root, "filename").text = image_name + size = ET.SubElement(root, "size") + + if len(self._objects[image_name]) == 0: + raise ValueError(f"no associated annotations for {image_name}") + + # get the first sample from list because it contains image dimensions + sample_annotation = self._objects[image_name][0] + ET.SubElement(size, "width").text = str(sample_annotation.image_width) + ET.SubElement(size, "height").text = str(sample_annotation.image_height) + ET.SubElement(size, "depth").text = str(n_channels) + del sample_annotation # after we have extracted image width and height, we donot need it anymore + + for annotation in self._objects[image_name]: + obj = ET.SubElement(root, "object") + ET.SubElement(obj, "name").text = annotation.label_name + ET.SubElement(obj, "pose").text = "Unspecified" + ET.SubElement(obj, "truncated").text = "0" + ET.SubElement(obj, "difficult").text = "0" + + bbox = ET.SubElement(obj, "bndbox") + voc_box = annotation.box.to_voc().raw_values + ET.SubElement(bbox, "xmin").text = str(int(voc_box[0])) + ET.SubElement(bbox, "ymin").text = str(int(voc_box[1])) + ET.SubElement(bbox, "xmax").text = str(int(voc_box[2])) + ET.SubElement(bbox, "ymax").text = str(int(voc_box[3])) + + tree = ET.ElementTree(root) + tree.write(filepath) + + def save_as_coco(self, export_file: str): + """writes loaded annotation in coco format (json format) + + Parameters + ---------- + export_file : str + name (or path) for the annotation file + """ + coco_data = {"images": [], "categories": [], "annotations": []} + + # embed categorical information + for i, name in enumerate(self._class_names): + coco_data["categories"].append({"id": i, "name": name, "supercategory": "none"}) + + image_id = 0 + ann_id = 0 + for image_name in self._objects.keys(): + for annotation in self._objects[image_name]: + # embed image metadata + coco_data["images"].append( + { + "id": image_id, + "file_name": image_name, + "width": annotation.image_width, + "height": annotation.image_height, + } + ) + + # embed annotation metadata + coco_box = annotation.box.to_coco().raw_values + coco_data["annotations"].append( + { + "id": ann_id, + "image_id": image_id, + "category_id": annotation.label_id, + "bbox": coco_box, + "area": coco_box[2] * coco_box[3], + "iscrowd": 0, + } + ) + + ann_id += 1 + image_id += 1 + + with open(export_file, "w", encoding="utf-8") as f: + json.dump(coco_data, f) + + def save_as_albumentations(self): + raise NotImplementedError + + def save_as_fiftyone(self): + raise NotImplementedError diff --git a/pybboxes/boxes/base.py b/pybboxes/boxes/base.py index 00079a5..37aac47 100644 --- a/pybboxes/boxes/base.py +++ b/pybboxes/boxes/base.py @@ -1,61 +1,13 @@ -import warnings from abc import ABC, abstractmethod from typing import List, Tuple, Union import numpy as np -from pybboxes.utils import find_stack_level +from pybboxes.types.box_2d import Box NORMALIZED_BOXES = ["albumentations", "fiftyone", "yolo"] -class Box: - def __init__(self, x_tl: int, y_tl: int, x_br: int, y_br: int): - self.x_tl = x_tl - self.y_tl = y_tl - self.x_br = x_br - self.y_br = y_br - - def __add__(self, other: "Box") -> int: - return self.union(other) - - def __sub__(self, other: "Box") -> int: - return int(self.area - self.intersection(other)) - - def __mul__(self, other: "Box") -> int: - return self.intersection(other) - - def __truediv__(self, other: "Box") -> float: - return self.area / other.area - - @property - def area(self) -> int: - return self.width * self.height - - @property - def height(self) -> int: - return int(self.y_br - self.y_tl) - - @property - def width(self) -> int: - return int(self.x_br - self.x_tl) - - def intersection(self, other: "Box") -> int: - x_tl, y_tl = np.maximum((self.x_tl, self.y_tl), (other.x_tl, other.y_tl)) - x_br, y_br = np.minimum((self.x_br, self.y_br), (other.x_br, other.y_br)) - if x_tl >= x_br or y_tl >= y_br: - return 0 - intersection_width = x_br - x_tl - intersection_height = y_br - y_tl - return int(intersection_width * intersection_height) - - def union(self, other: "Box") -> int: - return int(self.area + other.area - self.intersection(other)) - - def iou(self, other: "Box") -> float: - return self.intersection(other) / self.union(other) - - class BaseBoundingBox(Box, ABC): def __init__( self, diff --git a/pybboxes/types/__init__.py b/pybboxes/types/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/pybboxes/types/box_2d.py b/pybboxes/types/box_2d.py new file mode 100644 index 0000000..5a75ee1 --- /dev/null +++ b/pybboxes/types/box_2d.py @@ -0,0 +1,53 @@ +from typing import Sequence, Union + +import numpy as np + +IntegerBox: Union[Sequence[int], Sequence[Sequence[int]]] +FloatBox: Union[Sequence[float], Sequence[Sequence[float]]] + + +class Box: + def __init__(self, x_tl: int, y_tl: int, x_br: int, y_br: int): + self.x_tl = x_tl + self.y_tl = y_tl + self.x_br = x_br + self.y_br = y_br + + def __add__(self, other: "Box") -> int: + return self.union(other) + + def __sub__(self, other: "Box") -> int: + return int(self.area - self.intersection(other)) + + def __mul__(self, other: "Box") -> int: + return self.intersection(other) + + def __truediv__(self, other: "Box") -> float: + return self.area / other.area + + @property + def area(self) -> int: + return self.width * self.height + + @property + def height(self) -> int: + return int(self.y_br - self.y_tl) + + @property + def width(self) -> int: + return int(self.x_br - self.x_tl) + + def intersection(self, other: "Box") -> int: + x_tl, y_tl = np.maximum((self.x_tl, self.y_tl), (other.x_tl, other.y_tl)) + x_br, y_br = np.minimum((self.x_br, self.y_br), (other.x_br, other.y_br)) + if x_tl >= x_br or y_tl >= y_br: + return 0 + intersection_width = x_br - x_tl + intersection_height = y_br - y_tl + return int(intersection_width * intersection_height) + + def union(self, other: "Box") -> int: + return int(self.area + other.area - self.intersection(other)) + + def iou(self, other: "Box") -> float: + return self.intersection(other) / self.union(other) diff --git a/pybboxes/types/polygon.py b/pybboxes/types/polygon.py new file mode 100644 index 0000000..992eab8 --- /dev/null +++ b/pybboxes/types/polygon.py @@ -0,0 +1,18 @@ +from typing import Union + +import numpy as np + +from pybboxes.types.box_2d import FloatBox, IntegerBox + + +class Polygon: + def __init__(self): + self._points = [] + + @property + def points(self): + return self._points + + def add(self, point: Union[IntegerBox, FloatBox]) -> None: + point = np.array(point, dtype=float) + self.points.append(point.tolist()) diff --git a/pybboxes/utils.py b/pybboxes/utils.py index 5a42503..4f4e28f 100644 --- a/pybboxes/utils.py +++ b/pybboxes/utils.py @@ -1,6 +1,4 @@ import importlib.util -import inspect -import os.path from pathlib import Path from typing import Union @@ -10,28 +8,3 @@ def import_module(module_name: str, filepath: Union[str, Path]): module = importlib.util.module_from_spec(spec) spec.loader.exec_module(module) return module - - -def find_stack_level() -> int: - """ - Taken and adapted from pandas exception utility module. - ref: - https://github.com/pandas-dev/pandas/blob/22cb3793b47ed5b1f98156b58e0bfc109acebdc9/pandas/util/_exceptions.py#L27 - """ - - import pybboxes as pbx - - pkg_dir = os.path.dirname(pbx.__file__) - test_dir = os.path.join(pkg_dir, "tests") - - # https://stackoverflow.com/questions/17407119/python-inspect-stack-is-slow - frame = inspect.currentframe() - n = 0 - while frame: - fname = inspect.getfile(frame) - if fname.startswith(pkg_dir) and not fname.startswith(test_dir): - frame = frame.f_back - n += 1 - else: - break - return n diff --git a/pybboxes/utils/__init__.py b/pybboxes/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/pybboxes/utils/io.py b/pybboxes/utils/io.py new file mode 100644 index 0000000..1842fde --- /dev/null +++ b/pybboxes/utils/io.py @@ -0,0 +1,124 @@ +import json +import os +import struct +from typing import Dict, List, Optional, Union + +import yaml + + +def get_image_size(file_path: str): + """ + Return (width, height) for a given img file content - no external + dependencies except the os and struct modules from Python core + """ + with open(file_path, "rb") as fhandle: + head = fhandle.read(24) + if len(head) != 24: + return None + if head.startswith(b"\x89PNG\r\n\x1a\n"): + check = struct.unpack(">i", head[16:20])[0] + if check != 0x0D0A1A0A: + return None + width, height = struct.unpack(">ii", head[16:24]) + elif head[:2] == b"\xff\xd8": + try: + fhandle.seek(0) # Read 0xff next + size = 2 + ftype = 0 + while not 0xC0 <= ftype <= 0xCF: + fhandle.seek(size, 1) + byte = fhandle.read(1) + while ord(byte) == 0xFF: + byte = fhandle.read(1) + ftype = ord(byte) + size = struct.unpack(">H", fhandle.read(2))[0] - 2 + # We are at a SOFn block + fhandle.seek(1, 1) # Skip `precision' byte. + height, width = struct.unpack(">HH", fhandle.read(4)) + except Exception: + return None + else: + return None + return width, height + + +class IndentfulDumper(yaml.Dumper): + def increase_indent(self, flow=False, indentless=False): + return super(IndentfulDumper, self).increase_indent(flow, False) + + +def assure_overridable(f): + """ + Wrapper allowing easy use of overwrite-safe functionality. All of the write-helpers + use this wrapper. In case of a conflict, it raises an exception. + """ + + def wrapper(obj, fp, **kwargs): + overwrite = kwargs.get("overwrite", True) + if os.path.exists(fp) and not overwrite: + raise ValueError(f"Path {fp} already exists. To overwrite, use `overwrite=True`.") + return f(obj, fp, **kwargs) + + return wrapper + + +def read_json(fp: str, **kwargs) -> Union[Dict, List]: + """ + Reads a JSON file given path. + + Args: + fp: (str) File path. + + Return: + Dictionary or List of dictionaries depending on the content. + """ + with open(fp, "r") as fd_in: + data = json.load(fd_in, **kwargs) + return data + + +def read_yaml(fp: str) -> Union[Dict, List]: + """ + Reads a YAML file given path. + + Args: + fp: (str) File path. + + Return: + Generic Python object. + """ + with open(fp, "r") as fd_in: + data = yaml.safe_load(fd_in) + return data + + +@assure_overridable +def write_json(obj: Union[Dict, List], fp: str, encoding: Optional[str] = None, **kwargs) -> None: + """ + Writes a Python dictionary or list object to the given path in JSON format. + + Args: + obj: (dict, list) Python dictionary or list object. + fp: (str) Path of the output file. + encoding: (Optional(str)) Encoding for writing. + """ + with open(fp, "w", encoding=encoding) as fd_out: + json.dump(obj, fd_out, **kwargs) + + +@assure_overridable +def write_yaml(obj: Dict, fp: str, indent_blocks: bool = True, **kwargs) -> None: + """ + Writes a Python dictionary to the given path in YAML format. + + Args: + obj: (any) Serializable Python object. + fp: (str) Path of the output file. + indent_blocks: (bool) Whether dump with indents. + """ + with open(fp, "w") as fd_out: + if indent_blocks: + d = yaml.dump(obj, Dumper=IndentfulDumper, **kwargs) + fd_out.write(d) + else: + yaml.safe_dump(obj, fd_out, **kwargs) diff --git a/requirements.txt b/requirements.txt index 6ea33a7..119d00d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1,3 @@ -numpy==1.24.2 \ No newline at end of file +numpy==1.24.2 +pycocotools==2.0.6 +pyyaml==6.0 \ No newline at end of file diff --git a/setup.py b/setup.py index 5ef1447..2070f78 100644 --- a/setup.py +++ b/setup.py @@ -33,6 +33,8 @@ def get_version(): "pytest>=7.0.1", "pytest-cov>=3.0.0", "pytest-timeout>=2.1.0", + "pytest-depends>=1.0.1", + "huggingface-hub>=0.25.0", ] extras = { diff --git a/tests/pybboxes/annotations/test_annotations_conversion.py b/tests/pybboxes/annotations/test_annotations_conversion.py new file mode 100644 index 0000000..6f2a24a --- /dev/null +++ b/tests/pybboxes/annotations/test_annotations_conversion.py @@ -0,0 +1,180 @@ +import glob +import os +import shutil +from collections import Counter +from concurrent.futures import ThreadPoolExecutor, as_completed +from functools import partial + +import pytest +from huggingface_hub import HfApi, hf_hub_download +from pycocotools.coco import COCO +from tqdm import tqdm + +from pybboxes.annotations import Annotations + +# hugging face repo from where we will be downloading our fixture for unit testing +repo_id = "gauravparajuli/coco_test_set_pybboxes" + +sample_yolo_dataset_path = str(os.path.join("tests", "pybboxes", "annotations", "testing_data_yolo")) +sample_voc_dataset_path = str(os.path.join("tests", "pybboxes", "annotations", "testing_data_voc")) +sample_coco_dataset_path = str( + os.path.join("tests", "pybboxes", "annotations", "testing_data_coco", "annotations_coco.json") +) # source +persist_coco_test_path = str( + os.path.join("tests", "pybboxes", "annotations", "persist_as_coco_test.json") +) # file generated during test_persist_as_coco + +sample_images = str(os.path.join("tests", "pybboxes", "annotations", "testing_data_images")) + + +def downloadfile(filename, local_dir): + hf_hub_download( + repo_id=repo_id, + repo_type="dataset", + filename=filename, + local_dir=local_dir, + ) + + +def count_files(directory, extensions): + all_files = [] + for ext in extensions: + all_files.extend(glob.glob(f"{directory}/*{ext}")) + return Counter(file.split(".")[-1] for file in all_files) + + +sample_coco_dataset = Annotations(annotation_type="coco") + + +def test_import_from_fiftyone(): + anns = Annotations(annotation_type="fiftyone") + with pytest.raises(NotImplementedError): + anns.load_from_fiftyone() + + +def test_import_from_albumentations(): + anns = Annotations(annotation_type="albumentations") + with pytest.raises(NotImplementedError): + anns.load_from_albumentations() + + +def test_save_as_fiftyone(): + anns = Annotations(annotation_type="albumentations") + with pytest.raises(NotImplementedError): + anns.save_as_fiftyone() + + +def test_save_as_albumentations(): + anns = Annotations(annotation_type="fiftyone") + with pytest.raises(NotImplementedError): + anns.save_as_albumentations() + + +def test_annotations_initialization(): + # annotation_type should be either: yolo, coco, voc, albumentations or fiftyone + with pytest.raises(ValueError): + anns = Annotations(annotation_type="not_this_type") + + +def test_annotations_only_appropriate_loading_method_allowed(): + # tests if unappropriate method is used to load annotations + anns = Annotations("yolo") + with pytest.raises(TypeError): + anns.load_from_voc(labels_dir="./labels") + with pytest.raises(TypeError): + anns.load_from_coco(json_path="./sample.json") + + anns = Annotations("coco") + with pytest.raises(TypeError): + anns.load_from_yolo(labels_dir="./labels", images_dir="./images", classes_file="./classes.txt") + + +def test_import_from_coco(): + anns = sample_coco_dataset + anns.load_from_coco(sample_coco_dataset_path) + + assert (type(anns.names_mapping)) == dict + assert anns.names_mapping == dict(raccoons=0, raccoon=1) + + # randomly test the accuracy of annotations here + + +@pytest.mark.depends(on=["test_save_as_yolo"]) +def test_import_from_yolo(): + anns = Annotations(annotation_type="yolo") + anns.load_from_yolo( + labels_dir=sample_yolo_dataset_path, + images_dir=sample_images, + classes_file=str(os.path.join(sample_yolo_dataset_path, "classes.txt")), + ) + + assert (type(anns.names_mapping)) == dict + assert anns.names_mapping == dict(raccoons=0, raccoon=1) + + +@pytest.mark.depends(on=["test_save_as_voc"]) +def test_import_from_voc(): + anns = Annotations(annotation_type="voc") + anns.load_from_voc(labels_dir=sample_voc_dataset_path) + + assert (type(anns.names_mapping)) == dict + assert anns.names_mapping == dict(raccoon=0) # as raccoons label was not used in any bounding boxes, + # plus there is not a file that lists all the available class in voc format + # there was a loss of information + # when converting from coco format to voc format + + +@pytest.mark.depends(on=["test_import_from_coco"]) +def test_save_as_coco(): + persist_coco_path = str(os.path.join("tests", "pybboxes", "annotations", "persist_as_coco_test.json")) + sample_coco_dataset.save_as_coco(export_file=persist_coco_path) + + coco = COCO(persist_coco_path) + + assert len(coco.getImgIds()) == 196 + assert len(coco.getCatIds()) == 2 + + +@pytest.mark.depends(on=["test_import_from_coco"]) +def test_save_as_yolo(): + sample_coco_dataset.save_as_yolo(sample_yolo_dataset_path) + + assert ( + count_files(sample_yolo_dataset_path, extensions=[".txt"])["txt"] == 197 + ) # 196 annotation files, 1 classes.txt file + + +@pytest.mark.depends(on=["test_import_from_coco"]) +def test_save_as_voc(): + sample_coco_dataset.save_as_voc(sample_voc_dataset_path) + + assert count_files(sample_voc_dataset_path, extensions=[".xml"])["xml"] == 196 # 196 annotation files + + +@pytest.fixture(scope="session", autouse=True) +def cleanup(): + # setup code here + api = HfApi() + files = api.list_repo_files(repo_id=repo_id, repo_type="dataset") + files = [file for file in files if (".json" in file or ".jpg" in file)] # filter .gitattributes and README.md + + annotationfilename = files.pop(0) # annotations_coco.json + downloadfile( + annotationfilename, local_dir=os.path.dirname(sample_coco_dataset_path) + ) # download annotation file in a separate folder + + # now download test dataset images + with ThreadPoolExecutor() as executor: + partial_downloadfile = partial(downloadfile, local_dir=sample_images) + futures = [executor.submit(partial_downloadfile, filename) for filename in files] + with tqdm(total=len(futures), desc="downloading test set for unit testing", unit="file") as pbar: + for future in as_completed(futures): + pbar.set_description_str = future.result() + pbar.update(1) # update the progress bar for each completed download + + yield + + # clean up the folders that we created after all the tests have ran + shutil.rmtree(sample_voc_dataset_path) + shutil.rmtree(sample_yolo_dataset_path) + os.remove(persist_coco_test_path) # remove the test file