microsoft · adamjstewart · Apr 2, 2022 · Mar 10, 2022 · Mar 11, 2022 · Mar 11, 2022
diff --git a/docs/api/datasets.rst b/docs/api/datasets.rst
@@ -157,6 +157,11 @@ FAIR1M (Fine-grAined object recognItion in high-Resolution imagery)
 
 .. autoclass:: FAIR1M
 
+Forest Damage
+^^^^^^^^^^^^^
+
+.. autoclass:: ForestDamage
+
 GID-15 (Gaofen Image Dataset)
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 

diff --git a/tests/data/forestdamage/Data_Set_Larch_Casebearer.zip b/tests/data/forestdamage/Data_Set_Larch_Casebearer.zip
diff --git a/tests/data/forestdamage/Data_Set_Larch_Casebearer/Bebehojd_20190527/Annotations/B01_0004.xml b/tests/data/forestdamage/Data_Set_Larch_Casebearer/Bebehojd_20190527/Annotations/B01_0004.xml
@@ -0,0 +1 @@
+<annotation><filename>B01_0004.xml</filename><size><width>32</width><height>32</height><depth>3</depth></size><object><damage>other</damage><bndbox><xmin>8</xmin><ymin>8</ymin><xmax>24</xmax><ymax>24</ymax></bndbox></object></annotation>
diff --git a/tests/data/forestdamage/Data_Set_Larch_Casebearer/Bebehojd_20190527/Annotations/B01_0005.xml b/tests/data/forestdamage/Data_Set_Larch_Casebearer/Bebehojd_20190527/Annotations/B01_0005.xml
@@ -0,0 +1 @@
+<annotation><filename>B01_0005.xml</filename><size><width>32</width><height>32</height><depth>3</depth></size><object><damage>other</damage><bndbox><xmin>8</xmin><ymin>8</ymin><xmax>24</xmax><ymax>24</ymax></bndbox></object></annotation>
diff --git a/...ta/forestdamage/Data_Set_Larch_Casebearer/Bebehojd_20190527/Images/B01_0004.JPG b/...ta/forestdamage/Data_Set_Larch_Casebearer/Bebehojd_20190527/Images/B01_0004.JPG
diff --git a/...ta/forestdamage/Data_Set_Larch_Casebearer/Bebehojd_20190527/Images/B01_0005.JPG b/...ta/forestdamage/Data_Set_Larch_Casebearer/Bebehojd_20190527/Images/B01_0005.JPG
diff --git a/tests/data/forestdamage/data.py b/tests/data/forestdamage/data.py
@@ -0,0 +1,80 @@
+import hashlib
+import os
+import random
+import shutil
+import xml.etree.ElementTree as ET
+
+import numpy as np
+from PIL import Image
+
+SIZE = 32
+
+np.random.seed(0)
+random.seed(0)
+
+PATHS = {
+    "images": [
+        "Bebehojd_20190527/Images/B01_0004.JPG",
+        "Bebehojd_20190527/Images/B01_0005.JPG",
+    ],
+    "annotations": [
+        "Bebehojd_20190527/Annotations/B01_0004.xml",
+        "Bebehojd_20190527/Annotations/B01_0005.xml",
+    ],
+}
+
+
+def create_annotation(path: str) -> None:
+    root = ET.Element("annotation")
+
+    ET.SubElement(root, "filename").text = os.path.basename(path)
+
+    size = ET.SubElement(root, "size")
+
+    ET.SubElement(size, "width").text = str(SIZE)
+    ET.SubElement(size, "height").text = str(SIZE)
+    ET.SubElement(size, "depth").text = str(3)
+
+    annotation = ET.SubElement(root, "object")
+
+    ET.SubElement(annotation, "damage").text = "other"
+
+    bbox = ET.SubElement(annotation, "bndbox")
+    ET.SubElement(bbox, "xmin").text = str(0 + int(SIZE / 4))
+    ET.SubElement(bbox, "ymin").text = str(0 + int(SIZE / 4))
+    ET.SubElement(bbox, "xmax").text = str(SIZE - int(SIZE / 4))
+    ET.SubElement(bbox, "ymax").text = str(SIZE - int(SIZE / 4))
+
+    tree = ET.ElementTree(root)
+    tree.write(path)
+
+
+def create_file(path: str) -> None:
+    Z = np.random.rand(SIZE, SIZE, 3) * 255
+    img = Image.fromarray(Z.astype("uint8")).convert("RGB")
+    img.save(path)
+
+
+if __name__ == "__main__":
+    data_root = "Data_Set_Larch_Casebearer"
+    # remove old data
+    if os.path.isdir(data_root):
+        shutil.rmtree(data_root)
+    else:
+        os.makedirs(data_root)
+
+    for path in PATHS["images"]:
+        os.makedirs(os.path.join(data_root, os.path.dirname(path)), exist_ok=True)
+        create_file(os.path.join(data_root, path))
+
+    for path in PATHS["annotations"]:
+        os.makedirs(os.path.join(data_root, os.path.dirname(path)), exist_ok=True)
+        create_annotation(os.path.join(data_root, path))
+
+    # compress data
+    shutil.make_archive(data_root, "zip", ".", data_root)
+
+    # Compute checksums
+    with open(data_root + ".zip", "rb") as f:
+        md5 = hashlib.md5(f.read()).hexdigest()
+        print(f"{data_root}: {md5}")
diff --git a/tests/datasets/test_forestdamage.py b/tests/datasets/test_forestdamage.py
@@ -0,0 +1,81 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+import os
+import shutil
+from pathlib import Path
+
+import matplotlib.pyplot as plt
+import pytest
+import torch
+import torch.nn as nn
+from _pytest.monkeypatch import MonkeyPatch
+
+import torchgeo.datasets.utils
+from torchgeo.datasets import ForestDamage
+
+
+def download_url(url: str, root: str, *args: str) -> None:
+    shutil.copy(url, root)
+
+
+class TestForestDamage:
+    @pytest.fixture
+    def dataset(self, monkeypatch: MonkeyPatch, tmp_path: Path) -> ForestDamage:
+        monkeypatch.setattr(torchgeo.datasets.utils, "download_url", download_url)
+        data_dir = os.path.join("tests", "data", "forestdamage")
+
+        url = os.path.join(data_dir, "Data_Set_Larch_Casebearer.zip")
+
+        md5 = "a6adc19879c1021cc1ba8d424e19c9e0"
+
+        monkeypatch.setattr(ForestDamage, "url", url)
+        monkeypatch.setattr(ForestDamage, "md5", md5)
+        root = str(tmp_path)
+        transforms = nn.Identity()  # type: ignore[no-untyped-call]
+        return ForestDamage(
+            root=root, transforms=transforms, download=True, checksum=True
+        )
+
+    def test_already_downloaded(self, dataset: ForestDamage) -> None:
+        ForestDamage(root=dataset.root, download=True)
+
+    def test_getitem(self, dataset: ForestDamage) -> None:
+        x = dataset[0]
+        assert isinstance(x, dict)
+        assert isinstance(x["image"], torch.Tensor)
+        assert isinstance(x["label"], torch.Tensor)
+        assert isinstance(x["boxes"], torch.Tensor)
+        assert x["image"].shape[0] == 3
+        assert x["image"].ndim == 3
+
+    def test_len(self, dataset: ForestDamage) -> None:
+        assert len(dataset) == 2
+
+    def test_not_extracted(self, tmp_path: Path) -> None:
+        url = os.path.join(
+            "tests", "data", "forestdamage", "Data_Set_Larch_Casebearer.zip"
+        )
+        shutil.copy(url, tmp_path)
+        ForestDamage(root=str(tmp_path))
+
+    def test_corrupted(self, tmp_path: Path) -> None:
+        with open(os.path.join(tmp_path, "Data_Set_Larch_Casebearer.zip"), "w") as f:
+            f.write("bad")
+        with pytest.raises(RuntimeError, match="Dataset found, but corrupted."):
+            ForestDamage(root=str(tmp_path), checksum=True)
+
+    def test_not_found(self, tmp_path: Path) -> None:
+        with pytest.raises(RuntimeError, match="Dataset not found in."):
+            ForestDamage(str(tmp_path))
+
+    def test_plot(self, dataset: ForestDamage) -> None:
+        x = dataset[0].copy()
+        dataset.plot(x, suptitle="Test")
+        plt.close()
+
+    def test_plot_prediction(self, dataset: ForestDamage) -> None:
+        x = dataset[0].copy()
+        x["prediction_boxes"] = x["boxes"].clone()
+        dataset.plot(x, suptitle="Prediction")
+        plt.close()
diff --git a/torchgeo/datasets/__init__.py b/torchgeo/datasets/__init__.py
@@ -34,6 +34,7 @@
 from .eudem import EUDEM
 from .eurosat import EuroSAT
 from .fair1m import FAIR1M
+from .forestdamage import ForestDamage
 from .geo import (
     GeoDataset,
     IntersectionDataset,
@@ -138,6 +139,7 @@
     "ETCI2021",
     "EuroSAT",
     "FAIR1M",
+    "ForestDamage",
     "GID15",
     "IDTReeS",
     "InriaAerialImageLabeling",
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		<annotation><filename>B01_0004.xml</filename><size><width>32</width><height>32</height><depth>3</depth></size><object><damage>other</damage><bndbox><xmin>8</xmin><ymin>8</ymin><xmax>24</xmax><ymax>24</ymax></bndbox></object></annotation>
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		<annotation><filename>B01_0005.xml</filename><size><width>32</width><height>32</height><depth>3</depth></size><object><damage>other</damage><bndbox><xmin>8</xmin><ymin>8</ymin><xmax>24</xmax><ymax>24</ymax></bndbox></object></annotation>