Skip to content

Commit

Permalink
Add CMS Global Mangrove Canopy dataset (#391)
Browse files Browse the repository at this point in the history
* CMS dataset

* dynamically set filename

* add warning in documentation

* requested changes and data.py

* single zip file and camel case

* md5 check added

* correct error messages

* compression smaller test file

Co-authored-by: Caleb Robinson <calebrob6@gmail.com>
  • Loading branch information
nilsleh and calebrob6 authored Feb 20, 2022
1 parent 89277dc commit 9cf36fa
Show file tree
Hide file tree
Showing 9 changed files with 419 additions and 0 deletions.
5 changes: 5 additions & 0 deletions docs/api/datasets.rst
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,11 @@ Chesapeake Bay High-Resolution Land Cover Project
.. autoclass:: ChesapeakeWV
.. autoclass:: ChesapeakeCVPR

CMS Global Mangrove Canopy Dataset
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

.. autoclass:: CMSGlobalMangroveCanopy

Cropland Data Layer (CDL)
^^^^^^^^^^^^^^^^^^^^^^^^^

Expand Down
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
68 changes: 68 additions & 0 deletions tests/data/cms_mangrove_canopy/data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
#!/usr/bin/env python3

# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.

import hashlib
import os
import random
import shutil

import numpy as np
import rasterio

np.random.seed(0)
random.seed(0)

SIZE = 64


files = [
{"image": "Mangrove_agb_Angola.tif"},
{"image": "Mangrove_hba95_Angola.tif"},
{"image": "Mangrove_hmax95_Angola.tif"},
]


def create_file(path: str, dtype: str, num_channels: int) -> None:
profile = {}
profile["driver"] = "GTiff"
profile["dtype"] = dtype
profile["count"] = num_channels
profile["crs"] = "epsg:4326"
profile["transform"] = rasterio.transform.from_bounds(0, 0, 1, 1, 1, 1)
profile["height"] = SIZE
profile["width"] = SIZE
profile["compress"] = "lzw"
profile["predictor"] = 2

Z = np.random.randint(
np.iinfo(profile["dtype"]).max, size=(1, SIZE, SIZE), dtype=profile["dtype"]
)
src = rasterio.open(path, "w", **profile)
src.write(Z)


if __name__ == "__main__":
directory = "CMS_Global_Map_Mangrove_Canopy_1665"

# Remove old data
if os.path.isdir(directory):
shutil.rmtree(directory)

os.makedirs(os.path.join(directory, "data"), exist_ok=True)

for file_dict in files:
# Create mask file
path = file_dict["image"]
create_file(
os.path.join(directory, "data", path), dtype="int32", num_channels=1
)

# Compress data
shutil.make_archive(directory.replace(".zip", ""), "zip", ".", directory)

# Compute checksums
with open(directory + ".zip", "rb") as f:
md5 = hashlib.md5(f.read()).hexdigest()
print(f"{directory}: {md5}")
93 changes: 93 additions & 0 deletions tests/datasets/test_cms_mangrove_canopy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.

import os
import shutil
from pathlib import Path
from typing import Generator

import pytest
import torch
import torch.nn as nn
from _pytest.monkeypatch import MonkeyPatch
from rasterio.crs import CRS

from torchgeo.datasets import CMSGlobalMangroveCanopy, IntersectionDataset, UnionDataset


def download_url(url: str, root: str, *args: str, **kwargs: str) -> None:
shutil.copy(url, root)


class TestCMSGlobalMangroveCanopy:
@pytest.fixture
def dataset(
self, monkeypatch: Generator[MonkeyPatch, None, None], tmp_path: Path
) -> CMSGlobalMangroveCanopy:
zipfile = "CMS_Global_Map_Mangrove_Canopy_1665.zip"
monkeypatch.setattr( # type: ignore[attr-defined]
CMSGlobalMangroveCanopy, "zipfile", zipfile
)

md5 = "d6894fa6293cc9c0f3f95a810e842de5"
monkeypatch.setattr( # type: ignore[attr-defined]
CMSGlobalMangroveCanopy, "md5", md5
)

root = os.path.join("tests", "data", "cms_mangrove_canopy")
transforms = nn.Identity() # type: ignore[attr-defined]
country = "Angola"

return CMSGlobalMangroveCanopy(
root, country=country, transforms=transforms, checksum=True
)

def test_getitem(self, dataset: CMSGlobalMangroveCanopy) -> None:
x = dataset[dataset.bounds]
assert isinstance(x, dict)
assert isinstance(x["crs"], CRS)
assert isinstance(x["mask"], torch.Tensor)

def test_no_dataset(self) -> None:
with pytest.raises(RuntimeError, match="Dataset not found in."):
CMSGlobalMangroveCanopy(root="/test")

def test_already_downloaded(self, tmp_path: Path) -> None:
pathname = os.path.join(
"tests",
"data",
"cms_mangrove_canopy",
"CMS_Global_Map_Mangrove_Canopy_1665.zip",
)
root = str(tmp_path)
shutil.copy(pathname, root)
CMSGlobalMangroveCanopy(root, country="Angola")

def test_corrupted(self, tmp_path: Path) -> None:
with open(
os.path.join(tmp_path, "CMS_Global_Map_Mangrove_Canopy_1665.zip"), "w"
) as f:
f.write("bad")
with pytest.raises(RuntimeError, match="Dataset found, but corrupted."):
CMSGlobalMangroveCanopy(root=str(tmp_path), country="Angola", checksum=True)

def test_invalid_country(self) -> None:
with pytest.raises(AssertionError):
CMSGlobalMangroveCanopy(country="fakeCountry")

def test_invalid_measurement(self) -> None:
with pytest.raises(AssertionError):
CMSGlobalMangroveCanopy(measurement="wrongMeasurement")

def test_and(self, dataset: CMSGlobalMangroveCanopy) -> None:
ds = dataset & dataset
assert isinstance(ds, IntersectionDataset)

def test_or(self, dataset: CMSGlobalMangroveCanopy) -> None:
ds = dataset | dataset
assert isinstance(ds, UnionDataset)

def test_plot(self, dataset: CMSGlobalMangroveCanopy) -> None:
query = dataset.bounds
x = dataset[query]
dataset.plot(x["mask"])
2 changes: 2 additions & 0 deletions torchgeo/datasets/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
ChesapeakeVA,
ChesapeakeWV,
)
from .cms_mangrove_canopy import CMSGlobalMangroveCanopy
from .cowc import COWC, COWCCounting, COWCDetection
from .cv4a_kenya_crop_type import CV4AKenyaCropType
from .cyclone import TropicalCycloneWindEstimation
Expand Down Expand Up @@ -97,6 +98,7 @@
"ChesapeakeVA",
"ChesapeakeWV",
"ChesapeakeCVPR",
"CMSGlobalMangroveCanopy",
"Esri2020",
"Landsat",
"Landsat1",
Expand Down
Loading

0 comments on commit 9cf36fa

Please sign in to comment.