From e975e911a5d46ac0e0b11159b5fde0a7ed538879 Mon Sep 17 00:00:00 2001 From: Isaac Corley <22203655+isaaccorley@users.noreply.github.com> Date: Thu, 9 Sep 2021 21:41:42 -0500 Subject: [PATCH 1/9] add dataset to docs --- docs/api/datasets.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/docs/api/datasets.rst b/docs/api/datasets.rst index 034f3c7d463..842d995613f 100644 --- a/docs/api/datasets.rst +++ b/docs/api/datasets.rst @@ -87,6 +87,11 @@ CV4A Kenya Crop Type Competition .. autoclass:: CV4AKenyaCropType +ETCI2021 Flood Detection +^^^^^^^^^^^^^^^^^^^^^^^^ + +.. autoclass:: ETCI2021 + LandCover.ai (Land Cover from Aerial Imagery) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ From f38f7eb5406e5c92f5e1669974fe1972eef2097c Mon Sep 17 00:00:00 2001 From: Isaac Corley <22203655+isaaccorley@users.noreply.github.com> Date: Thu, 9 Sep 2021 21:42:14 -0500 Subject: [PATCH 2/9] add sample test data --- tests/data/etci2021/test_without_ref_labels.zip | Bin 0 -> 4874 bytes tests/data/etci2021/train.zip | Bin 0 -> 5610 bytes tests/data/etci2021/val_with_ref_labels.zip | Bin 0 -> 5512 bytes 3 files changed, 0 insertions(+), 0 deletions(-) create mode 100644 tests/data/etci2021/test_without_ref_labels.zip create mode 100644 tests/data/etci2021/train.zip create mode 100644 tests/data/etci2021/val_with_ref_labels.zip diff --git a/tests/data/etci2021/test_without_ref_labels.zip b/tests/data/etci2021/test_without_ref_labels.zip new file mode 100644 index 0000000000000000000000000000000000000000..0f94a1e4ce5b827f3efe7991c2c526a20d4a11a2 GIT binary patch literal 4874 zcmc(iO=uHA6vroDZDVcGR)Z9g3fdypW)tdHDWZrV#6$4oyxq{nL}F5tb#3vWq8C9t zlwQ0jf>5*~coMu>p$HYho)x7a6-Dr%ci&_`Gc&t88Q0kILJDF2Z-4XNzyIWOvG@jr ze^wXwoN2r$$OR#D37b7zw%mU}&};JQ51jUw@{F@zc~dHbBJ#5p6> zIW={vR?^|jYE3K3St)Z^%47~`l9bJk9W+!PF9oRdJZm@u5F3%?# zBDD4kY3aHlw+>Rc7;;+xeHX$2?_E%@9f7}2Kq;bfzMSQmeUdy2rR$^9mGh0e#nf>T zt$pA4v%daiZmlpm`T6&+>11^E&fwcu@m<67ou6W-C+?5l-MO_s*WcH7eInJdTwO~& zJes<>u=rqQeBss*awelc9F9eU<1#SBMKp`$LgiRYb8;-Ui{l`{@wm`{Tv1@HH+g$0Up&Fa6lut;=A%q!1rGn3sW^y3(QOQySo zk?tsN&Y83^y3agGoykJtPI{oo$Q${bqe$mGx8k!F6)npIU1s8Qbm`3J)*XnD^!%UH z+(VoIL7JKVu(X$XTG9wTx^Y`n+P6c;0?afRd^2>Kv)aV-Rfq}^!o~>F%v{Gy=)I^k zCn3`$g9*_&%}wYDe06HxKwo`t%Y#OkiOri=8Ci|si5iZF(ui;O)_fWE7mUu|Xu1>`;8~zEmz&V;PN6nUqhY2m@5ND?7H3{mnv)Q>(a>qiP3U5m2q8wcf~#WY gDsNuRM)NJ4M!4wt8%+Xc8==kcJCsD|9TY-;0F4YS0ssI2 literal 0 HcmV?d00001 diff --git a/tests/data/etci2021/train.zip b/tests/data/etci2021/train.zip new file mode 100644 index 0000000000000000000000000000000000000000..5cba717d7a1083433c852e30087e86ed24ad6373 GIT binary patch literal 5610 zcmd6rO-vI(7={Ph@*^f16ym{%L=k`KE+_&a1`nP%K&(B`1MN1g6m42UYXuXdMm=fJ z5EDfbjYjPO6TEpQQ7&@wphuC!XfP%o^dRxzH{GA^?#!?=ixwf%v zJ>Crn|4cvM7bv{ZUMkRblpR%uQ~uzoYUFwSs!N%Nac-;zm3hB-5GuU%8_!2<&o>&+ zN0ro2Qi;VgiEyhVAC=nV!&zBA(k^wXr2|m*f=KCRp3>RjWIR(8z#`1K->}VnKfgdO zr)oTd`gyv>Rv=o1FrTS2@k__x(+L;>Aq_Ue@Yo?KJPsq5YZ*xm6`F+?OIh&Ojo&k$ zKTR%0Bay|QKSrv{-`?E0Fz4Ofe5dk*=VbSTmWjr?%;c8Z+RNSb74zw(`bRzWSMxIu z-*n}#?Loy>-(CZ*go$(kB2Y=_!E7|yHIGT=)hm%S)o>bvY z0l51j@CMhQ=!~*d95329w(+m(bTXZenGsF^K0GLvKtHYn?!^8ddUk(qFcy@GM@N;+ zSw(ecS?b72QmZVt7R^)0_?w&yi+J-(Ti5sUme-y67qodrq}6=_3M_W(;L5YEL1~`6 z43O~PJ`?<)5`Je;CG+DM8>n{Pe$(CfBOt?r8(YY=o)g4UIq_2)L78_&gb%+1aPLLn z1>H!kH@C9#9UfN7-lpa4&FI^4{8rE0J~eX_yQ)~r?&yukT&u^Z#lC~U;X#Iy6#IMG zGR+p5X&xEf@14cSByL{QZ+@|Y`-2|NG6Qj+};@knV!ucLhwaYJuz%izuF;+dxJX8(gIip zE&F3(4eIRq>bpgCb7w+iE;bW-?Byn8b!QN!CS>dk^use?fwRfyO8D<_vHN3#F+J?H zi^lX=w}UHpY_f=SR_F|vBK+P+CSh}*@9|3M&WLldz|QY87KWX1ML>SV?>Bbck?q@B||k(zed``e0OR~5gSGXicuB-5)I!7}fp=x#{Ov1}kA%bo~Ta{?i> zo$&BFA(>0fYz{&MQ|}6vo_13ZK8RDQz)=ZWu-tVc%bKBnW>zQSgVTvn9sG`fc&rA0 F`UBzHrgZ=S literal 0 HcmV?d00001 diff --git a/tests/data/etci2021/val_with_ref_labels.zip b/tests/data/etci2021/val_with_ref_labels.zip new file mode 100644 index 0000000000000000000000000000000000000000..acd6d76c332619c4d33494bb8400e51489cbdb5b GIT binary patch literal 5512 zcmd6r!D|yi6vih_(l%B>s|_AXQ3wV>n@!R{3B_7O5DFz|6e?(%*fmi?Q<^ol2eDEQ z9*Rg2#fu^cMZI|tL_~Y>(jJQcfTbV`Qs}`V9t7v@?(A-MW_G9BhT1YAf$V&5e)Hy= zugwhl1G^CXJa~2Voccz838GG<$WEqg zzgn(L$YWRJTtbTXN~yS{B$DxDPg0wAWwY}}T<2A;mF22AJ17WO9Tl9tR&y0NfH}4z z!1Cm}eES6cbQ+H#aRDB<+~jd7H-$&7);&H}RJR8f$yn&m-Rsrw-{w{eBO@ytzs5sN zUuF+2Ed(Obhb>?IXZxOZ-#^?@o!j5uKHV2?{#;oNKR+3snScFkF+G3x2;y(9Fs4$Y zF_mKOm`Xc^DJ8hsH}i@-kt#vM%30>BM(*Htj z>1Dlf^PD%4(44nNYp>~#={yXsH(s^w`c&rY28(n30a z84rel=Dd3ZotGhC+galqEa&}$5i0aTx665#y<*axw}=tJ;9?r`iivVw1}4*a0Z{`l zsP{HZJm)oPjA^~fcHRLD3 zxaiK<(D9ttNlBPm2~%UZ&{hwpuc^z=3(W`h_`Dab4ZvGHLI<@7O<^&oq64&bs(vz+ zZSB@LjWrl8k{RS{M37bEv8x6#_{}Rt>y$PGl~l8L661wzvMhvN+_~V>cY%veTNW1M zeS6+Ex(+yn|AJJH22YW1&`#29 z9K(h*^ursk(!5ehcW7qqR34j*WRwS{F%u9YgR8NGUME#Hl&ql&P}V%CZWj+az>?z& zJl+M_Jj@G1Jt52}YdRktX000gn%y)k&d0b=tR71TTdPt

DDn#9Zd3p=~N1a)}sh z>aw_)Z7&VDq_>TKHsD5s$jhurYi~NTlA2&UAGL^~uIbLl46m@S8UQ-J5-|YqTU^W? zfa!dUyl?FQI*gK<0KkV Date: Thu, 9 Sep 2021 21:42:29 -0500 Subject: [PATCH 3/9] add dataset unit tests --- tests/datasets/test_etci2021.py | 77 +++++++++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) create mode 100644 tests/datasets/test_etci2021.py diff --git a/tests/datasets/test_etci2021.py b/tests/datasets/test_etci2021.py new file mode 100644 index 00000000000..6f77717d2d0 --- /dev/null +++ b/tests/datasets/test_etci2021.py @@ -0,0 +1,77 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. + +import os +import shutil +from pathlib import Path +from typing import Generator + +import pytest +import torch +from _pytest.fixtures import SubRequest +from _pytest.monkeypatch import MonkeyPatch + +import torchgeo.datasets.utils +from torchgeo.datasets import ETCI2021 +from torchgeo.transforms import Identity + + +def download_url(url: str, root: str, *args: str) -> None: + shutil.copy(url, root) + + +class TestETCI2021: + @pytest.fixture(params=["train", "val", "test"]) + def dataset( + self, + monkeypatch: Generator[MonkeyPatch, None, None], + tmp_path: Path, + request: SubRequest, + ) -> ETCI2021: + monkeypatch.setattr( # type: ignore[attr-defined] + torchgeo.datasets.utils, "download_url", download_url + ) + md5s = [ + "50c10eb07d6db9aee3ba36401e4a2c45", + "3e8b5a3cb95e6029e0e2c2d4b4ec6fba", + "c8ee1e5d3e478761cd00ebc6f28b0ae7", + ] + data_dir = os.path.join("tests", "data", "etci2021") + urls = [ + os.path.join(data_dir, "train.zip"), + os.path.join(data_dir, "val_with_ref_labels.zip"), + os.path.join(data_dir, "test_without_ref_labels.zip"), + ] + monkeypatch.setattr(ETCI2021, "md5s", md5s) # type: ignore[attr-defined] + monkeypatch.setattr(ETCI2021, "urls", urls) # type: ignore[attr-defined] + root = str(tmp_path) + split = request.param + transforms = Identity() + return ETCI2021(root, split, transforms, download=True, checksum=True) + + def test_getitem(self, dataset: ETCI2021) -> None: + x = dataset[0] + assert isinstance(x, dict) + assert isinstance(x["image"], torch.Tensor) + assert isinstance(x["mask"], torch.Tensor) + assert x["image"].shape[0] == 6 + assert x["image"].shape[-2:] == x["mask"].shape[-2:] + + if dataset.split != "test": + assert x["mask"].ndim == 3 + else: + assert x["mask"].ndim == 2 + + def test_len(self, dataset: ETCI2021) -> None: + assert len(dataset) == 2 + + def test_already_downloaded(self, dataset: ETCI2021) -> None: + ETCI2021(root=dataset.root, download=True) + + def test_invalid_split(self) -> None: + with pytest.raises(AssertionError): + ETCI2021(split="foo") + + def test_not_downloaded(self, tmp_path: Path) -> None: + with pytest.raises(RuntimeError, match="Dataset not found or corrupted."): + ETCI2021(str(tmp_path)) From 986be4fbfa96f594571b024517602e361e4c1942 Mon Sep 17 00:00:00 2001 From: Isaac Corley <22203655+isaaccorley@users.noreply.github.com> Date: Thu, 9 Sep 2021 22:06:48 -0500 Subject: [PATCH 4/9] add etci2021 dataset --- torchgeo/datasets/__init__.py | 2 + torchgeo/datasets/etci2021.py | 243 ++++++++++++++++++++++++++++++++++ 2 files changed, 245 insertions(+) create mode 100644 torchgeo/datasets/etci2021.py diff --git a/torchgeo/datasets/__init__.py b/torchgeo/datasets/__init__.py index d249b6072b4..d027012b603 100644 --- a/torchgeo/datasets/__init__.py +++ b/torchgeo/datasets/__init__.py @@ -22,6 +22,7 @@ from .cowc import COWC, COWCCounting, COWCDetection from .cv4a_kenya_crop_type import CV4AKenyaCropType from .cyclone import TropicalCycloneWindEstimation +from .etci2021 import ETCI2021 from .geo import GeoDataset, RasterDataset, VectorDataset, VisionDataset, ZipDataset from .landcoverai import LandCoverAI from .landsat import ( @@ -81,6 +82,7 @@ "COWCCounting", "COWCDetection", "CV4AKenyaCropType", + "ETCI2021", "LandCoverAI", "LEVIRCDPlus", "PatternNet", diff --git a/torchgeo/datasets/etci2021.py b/torchgeo/datasets/etci2021.py new file mode 100644 index 00000000000..7326fe6f8f7 --- /dev/null +++ b/torchgeo/datasets/etci2021.py @@ -0,0 +1,243 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. + +"""ETCI 2021 dataset.""" + +import glob +import os +import shutil +from typing import Callable, Dict, List, Optional + +import numpy as np +import torch +from PIL import Image +from torch import Tensor + +from .geo import VisionDataset +from .utils import download_and_extract_archive + + +class ETCI2021(VisionDataset): + """ETCI 2021 Flood Detection dataset. + + The `ETCI2021 `_ + dataset is a dataset for flood detection + + Dataset features: + * 33,405 VV & VH Sentinel-1 Synthetic Aperture Radar (SAR) images + * 2 binary masks per image representing water body & flood, respectively + * 2 polarization band images (VV, VH) of 3 RGB channels per band + * 3 RGB channels per band generated by the Hybrid Pluggable Processing Pipeline 'hyp3' + * Images with 5x20m per pixel resolution (256x256) px) taken in Interferometric Wide Swath acquisition mode + * Flood events from 5 different regions + + Dataset format: + * VV band three-channel png + * VH band three-channel png + * water body mask single-channel png where no water body = 0, water body = 255 + * flood mask single-channel png where no flood = 0, flood = 255 + + Dataset classes: + 1. no flood/water + 2. flood/water + + If you use this dataset in your research, please add the following to your + acknowledgements section: + + 'The authors would like to thank the NASA Earth Science Data Systems Program, + NASA Digital Transformation AI/ML thrust, and IEEE GRSS for organizing the ETCI competition'. + """ # noqa: E501 + + urls = [ + "https://drive.google.com/file/d/14HqNW5uWLS92n7KrxKgDwUTsSEST6LCr", + "https://drive.google.com/file/d/19sriKPHCZLfJn_Jmk3Z_0b3VaCBVRVyn", + "https://drive.google.com/file/d/1rpMVluASnSHBfm2FhpPDio0GyCPOqg7E", + ] + md5s = [ + "1e95792fe0f6e3c9000abdeab2a8ab0f", + "fd18cecb318efc69f8319f90c3771bdf", + "da9fa69e1498bd49d5c766338c6dac3d", + ] + filenames = ["train.zip", "val_with_ref_labels.zip", "test_without_ref_labels.zip"] + directories = ["train", "test", "test_internal"] + splits = ["train", "val", "test"] + bands = ["VV", "VH"] + masks = ["flood", "water_body"] + split_to_folder = dict(train="train", val="test", test="test_internal") + + def __init__( + self, + root: str = "data", + split: str = "train", + transforms: Optional[Callable[[Dict[str, Tensor]], Dict[str, Tensor]]] = None, + download: bool = False, + checksum: bool = False, + ) -> None: + """Initialize a new ETCI 2021 dataset instance. + + Args: + root: root directory where dataset can be found + split: one of "train", "val", or "test" + transforms: a function/transform that takes input sample and its target as + entry and returns a transformed version + download: if True, download dataset and store it in the root directory + checksum: if True, check the MD5 of the downloaded files (may be slow) + + Raises: + AssertionError: if ``split`` argument is invalid + RuntimeError: if ``download=False`` and data is not found, or checksums + don't match + """ + assert split in self.splits + + self.root = root + self.split = split + self.transforms = transforms + self.checksum = checksum + + if download: + self._download() + + if not self._check_integrity(): + raise RuntimeError( + "Dataset not found or corrupted. " + + "You can use download=True to download it" + ) + + self.files = self._load_files(self.root, self.split) + + def __getitem__(self, index: int) -> Dict[str, Tensor]: + """Return an index within the dataset. + + Args: + index: index to return + + Returns: + data and label at that index + """ + files = self.files[index] + vv = self._load_image(files["vv"]) + vh = self._load_image(files["vh"]) + water_mask = self._load_target(files["water_mask"]) + + if self.split != "test": + flood_mask = self._load_target(files["flood_mask"]) + mask = torch.stack(tensors=[water_mask, flood_mask], dim=0) + else: + mask = water_mask + + image = torch.cat(tensors=[vv, vh], dim=0) # type: ignore[attr-defined] + sample = {"image": image, "mask": mask} + + if self.transforms is not None: + sample = self.transforms(sample) + + return sample + + def __len__(self) -> int: + """Return the number of data points in the dataset. + + Returns: + length of the dataset + """ + return len(self.files) + + def _load_files(self, root: str, split: str) -> List[Dict[str, str]]: + """Return the paths of the files in the dataset. + + Args: + root: root dir of dataset + split: subset of dataset, one of [train, val, test] + + Returns: + list of dicts containing paths for each pair of vv, vh, + water body mask, flood mask (train/val only) + """ + files = [] + directory = self.split_to_folder[split] + folders = sorted(glob.glob(os.path.join(root, directory, "*"))) + folders = [os.path.join(folder, "tiles") for folder in folders] + for folder in folders: + vvs = glob.glob(os.path.join(folder, "vv", "*.png")) + vhs = glob.glob(os.path.join(folder, "vh", "*.png")) + water_masks = glob.glob(os.path.join(folder, "water_body_label", "*.png")) + + if split == "test": + flood_masks = [""] * len(water_masks) + else: + flood_masks = glob.glob(os.path.join(folder, "flood_label", "*.png")) + + for vv, vh, flood_mask, water_mask in zip( + vvs, vhs, flood_masks, water_masks + ): + files.append( + dict(vv=vv, vh=vh, flood_mask=flood_mask, water_mask=water_mask) + ) + return files + + def _load_image(self, path: str) -> Tensor: + """Load a single image. + + Args: + path: path to the image + + Returns: + the image + """ + filename = os.path.join(path) + with Image.open(filename) as img: + array = np.array(img.convert("RGB")) + tensor: Tensor = torch.from_numpy(array) # type: ignore[attr-defined] + # Convert from HxWxC to CxHxW + tensor = tensor.permute((2, 0, 1)) + return tensor + + def _load_target(self, path: str) -> Tensor: + """Load the target mask for a single image. + + Args: + path: path to the image + + Returns: + the target mask + """ + filename = os.path.join(path) + with Image.open(filename) as img: + array = np.array(img.convert("L")) + tensor: Tensor = torch.from_numpy(array) # type: ignore[attr-defined] + tensor = torch.clip(tensor, min=0, max=1) # type: ignore[attr-defined] + tensor = tensor.to(torch.long) # type: ignore[attr-defined] + return tensor + + def _check_integrity(self) -> bool: + """Checks the integrity of the dataset structure. + + Returns: + True if the dataset directories and split files are found, else False + """ + for directory in self.directories: + dirpath = os.path.join(self.root, directory) + if not os.path.exists(dirpath): + return False + return True + + def _download(self) -> None: + """Download the dataset and extract it. + + Raises: + AssertionError: if the checksum of split.py does not match + """ + if self._check_integrity(): + print("Files already downloaded and verified") + return + + for url, filename, md5 in zip(self.urls, self.filenames, self.md5s): + download_and_extract_archive( + url, + self.root, + filename=filename, + md5=md5 if self.checksum else None, + ) + + if os.path.exists(os.path.join(self.root, "__MACOSX")): + shutil.rmtree(os.path.join(self.root, "__MACOSX")) From 539a2ac7de6f66788b453b9bae3b113f8b5ce552 Mon Sep 17 00:00:00 2001 From: Isaac Corley <22203655+isaaccorley@users.noreply.github.com> Date: Thu, 9 Sep 2021 22:58:56 -0500 Subject: [PATCH 5/9] updated tests --- tests/datasets/test_etci2021.py | 37 ++++++++++++++++++++------------- 1 file changed, 23 insertions(+), 14 deletions(-) diff --git a/tests/datasets/test_etci2021.py b/tests/datasets/test_etci2021.py index 6f77717d2d0..de440dd7e3e 100644 --- a/tests/datasets/test_etci2021.py +++ b/tests/datasets/test_etci2021.py @@ -31,19 +31,28 @@ def dataset( monkeypatch.setattr( # type: ignore[attr-defined] torchgeo.datasets.utils, "download_url", download_url ) - md5s = [ - "50c10eb07d6db9aee3ba36401e4a2c45", - "3e8b5a3cb95e6029e0e2c2d4b4ec6fba", - "c8ee1e5d3e478761cd00ebc6f28b0ae7", - ] data_dir = os.path.join("tests", "data", "etci2021") - urls = [ - os.path.join(data_dir, "train.zip"), - os.path.join(data_dir, "val_with_ref_labels.zip"), - os.path.join(data_dir, "test_without_ref_labels.zip"), - ] - monkeypatch.setattr(ETCI2021, "md5s", md5s) # type: ignore[attr-defined] - monkeypatch.setattr(ETCI2021, "urls", urls) # type: ignore[attr-defined] + metadata = { + "train": { + "filename": "train.zip", + "md5": "50c10eb07d6db9aee3ba36401e4a2c45", + "directory": "train", + "url": os.path.join(data_dir, "train.zip"), + }, + "val": { + "filename": "val_with_ref_labels.zip", + "md5": "3e8b5a3cb95e6029e0e2c2d4b4ec6fba", + "directory": "test", + "url": os.path.join(data_dir, "val_with_ref_labels.zip"), + }, + "test": { + "filename": "test_without_ref_labels.zip", + "md5": "c8ee1e5d3e478761cd00ebc6f28b0ae7", + "directory": "test_internal", + "url": os.path.join(data_dir, "test_without_ref_labels.zip"), + }, + } + monkeypatch.setattr(ETCI2021, "metadata", metadata) # type: ignore[attr-defined] # noqa: E501 root = str(tmp_path) split = request.param transforms = Identity() @@ -58,9 +67,9 @@ def test_getitem(self, dataset: ETCI2021) -> None: assert x["image"].shape[-2:] == x["mask"].shape[-2:] if dataset.split != "test": - assert x["mask"].ndim == 3 + assert x["mask"].shape[0] == 2 else: - assert x["mask"].ndim == 2 + assert x["mask"].shape[0] == 1 def test_len(self, dataset: ETCI2021) -> None: assert len(dataset) == 2 From 52ff79b2aee5b19c00d526d3447a44175a7f2c91 Mon Sep 17 00:00:00 2001 From: Isaac Corley <22203655+isaaccorley@users.noreply.github.com> Date: Thu, 9 Sep 2021 22:59:09 -0500 Subject: [PATCH 6/9] updated dataset to download only desired split file --- torchgeo/datasets/etci2021.py | 58 +++++++++++++++++++---------------- 1 file changed, 32 insertions(+), 26 deletions(-) diff --git a/torchgeo/datasets/etci2021.py b/torchgeo/datasets/etci2021.py index 7326fe6f8f7..e740ab23ae9 100644 --- a/torchgeo/datasets/etci2021.py +++ b/torchgeo/datasets/etci2021.py @@ -48,22 +48,29 @@ class ETCI2021(VisionDataset): NASA Digital Transformation AI/ML thrust, and IEEE GRSS for organizing the ETCI competition'. """ # noqa: E501 - urls = [ - "https://drive.google.com/file/d/14HqNW5uWLS92n7KrxKgDwUTsSEST6LCr", - "https://drive.google.com/file/d/19sriKPHCZLfJn_Jmk3Z_0b3VaCBVRVyn", - "https://drive.google.com/file/d/1rpMVluASnSHBfm2FhpPDio0GyCPOqg7E", - ] - md5s = [ - "1e95792fe0f6e3c9000abdeab2a8ab0f", - "fd18cecb318efc69f8319f90c3771bdf", - "da9fa69e1498bd49d5c766338c6dac3d", - ] - filenames = ["train.zip", "val_with_ref_labels.zip", "test_without_ref_labels.zip"] - directories = ["train", "test", "test_internal"] splits = ["train", "val", "test"] bands = ["VV", "VH"] masks = ["flood", "water_body"] - split_to_folder = dict(train="train", val="test", test="test_internal") + metadata = { + "train": { + "filename": "train.zip", + "md5": "1e95792fe0f6e3c9000abdeab2a8ab0f", + "directory": "train", + "url": "https://drive.google.com/file/d/14HqNW5uWLS92n7KrxKgDwUTsSEST6LCr", + }, + "val": { + "filename": "val_with_ref_labels.zip", + "md5": "fd18cecb318efc69f8319f90c3771bdf", + "directory": "test", + "url": "https://drive.google.com/file/d/19sriKPHCZLfJn_Jmk3Z_0b3VaCBVRVyn", + }, + "test": { + "filename": "test_without_ref_labels.zip", + "md5": "da9fa69e1498bd49d5c766338c6dac3d", + "directory": "test_internal", + "url": "https://drive.google.com/file/d/1rpMVluASnSHBfm2FhpPDio0GyCPOqg7E", + }, + } def __init__( self, @@ -124,7 +131,7 @@ def __getitem__(self, index: int) -> Dict[str, Tensor]: flood_mask = self._load_target(files["flood_mask"]) mask = torch.stack(tensors=[water_mask, flood_mask], dim=0) else: - mask = water_mask + mask = water_mask.unsqueeze(0) image = torch.cat(tensors=[vv, vh], dim=0) # type: ignore[attr-defined] sample = {"image": image, "mask": mask} @@ -154,7 +161,7 @@ def _load_files(self, root: str, split: str) -> List[Dict[str, str]]: water body mask, flood mask (train/val only) """ files = [] - directory = self.split_to_folder[split] + directory = self.metadata[split]["directory"] folders = sorted(glob.glob(os.path.join(root, directory, "*"))) folders = [os.path.join(folder, "tiles") for folder in folders] for folder in folders: @@ -215,10 +222,10 @@ def _check_integrity(self) -> bool: Returns: True if the dataset directories and split files are found, else False """ - for directory in self.directories: - dirpath = os.path.join(self.root, directory) - if not os.path.exists(dirpath): - return False + directory = self.metadata[self.split]["directory"] + dirpath = os.path.join(self.root, directory) + if not os.path.exists(dirpath): + return False return True def _download(self) -> None: @@ -231,13 +238,12 @@ def _download(self) -> None: print("Files already downloaded and verified") return - for url, filename, md5 in zip(self.urls, self.filenames, self.md5s): - download_and_extract_archive( - url, - self.root, - filename=filename, - md5=md5 if self.checksum else None, - ) + download_and_extract_archive( + self.metadata[self.split]["url"], + self.root, + filename=self.metadata[self.split]["filename"], + md5=self.metadata[self.split]["md5"] if self.checksum else None, + ) if os.path.exists(os.path.join(self.root, "__MACOSX")): shutil.rmtree(os.path.join(self.root, "__MACOSX")) From 8914eec6a1d34289df7bf903fb3ea3a2e35cff9b Mon Sep 17 00:00:00 2001 From: Isaac Corley <22203655+isaaccorley@users.noreply.github.com> Date: Fri, 10 Sep 2021 14:43:34 -0500 Subject: [PATCH 7/9] removed flood mask from file list for test set and other formatting --- torchgeo/datasets/etci2021.py | 34 +++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/torchgeo/datasets/etci2021.py b/torchgeo/datasets/etci2021.py index e740ab23ae9..e70bdfd9a85 100644 --- a/torchgeo/datasets/etci2021.py +++ b/torchgeo/datasets/etci2021.py @@ -27,8 +27,10 @@ class ETCI2021(VisionDataset): * 33,405 VV & VH Sentinel-1 Synthetic Aperture Radar (SAR) images * 2 binary masks per image representing water body & flood, respectively * 2 polarization band images (VV, VH) of 3 RGB channels per band - * 3 RGB channels per band generated by the Hybrid Pluggable Processing Pipeline 'hyp3' - * Images with 5x20m per pixel resolution (256x256) px) taken in Interferometric Wide Swath acquisition mode + * 3 RGB channels per band generated by the Hybrid Pluggable + Processing Pipeline 'hyp3' + * Images with 5x20m per pixel resolution (256x256) px) taken in + Interferometric Wide Swath acquisition mode * Flood events from 5 different regions Dataset format: @@ -45,10 +47,10 @@ class ETCI2021(VisionDataset): acknowledgements section: 'The authors would like to thank the NASA Earth Science Data Systems Program, - NASA Digital Transformation AI/ML thrust, and IEEE GRSS for organizing the ETCI competition'. - """ # noqa: E501 + NASA Digital Transformation AI/ML thrust, and IEEE GRSS for organizing + the ETCI competition'. + """ - splits = ["train", "val", "test"] bands = ["VV", "VH"] masks = ["flood", "water_body"] metadata = { @@ -95,7 +97,7 @@ def __init__( RuntimeError: if ``download=False`` and data is not found, or checksums don't match """ - assert split in self.splits + assert split in self.metadata.keys() self.root = root self.split = split @@ -169,17 +171,19 @@ def _load_files(self, root: str, split: str) -> List[Dict[str, str]]: vhs = glob.glob(os.path.join(folder, "vh", "*.png")) water_masks = glob.glob(os.path.join(folder, "water_body_label", "*.png")) - if split == "test": - flood_masks = [""] * len(water_masks) - else: + if split != "test": flood_masks = glob.glob(os.path.join(folder, "flood_label", "*.png")) - for vv, vh, flood_mask, water_mask in zip( - vvs, vhs, flood_masks, water_masks - ): - files.append( - dict(vv=vv, vh=vh, flood_mask=flood_mask, water_mask=water_mask) - ) + for vv, vh, flood_mask, water_mask in zip( + vvs, vhs, flood_masks, water_masks + ): + files.append( + dict(vv=vv, vh=vh, flood_mask=flood_mask, water_mask=water_mask) + ) + else: + for vv, vh, water_mask in zip(vvs, vhs, water_masks): + files.append(dict(vv=vv, vh=vh, water_mask=water_mask)) + return files def _load_image(self, path: str) -> Tensor: From cb30cba5ff9f28965cd4223285674e4bb173fc11 Mon Sep 17 00:00:00 2001 From: isaac <22203655+isaaccorley@users.noreply.github.com> Date: Fri, 10 Sep 2021 16:19:25 -0500 Subject: [PATCH 8/9] Update torchgeo/datasets/etci2021.py Co-authored-by: Adam J. Stewart --- torchgeo/datasets/etci2021.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/torchgeo/datasets/etci2021.py b/torchgeo/datasets/etci2021.py index a0d0317c963..881a808b22d 100644 --- a/torchgeo/datasets/etci2021.py +++ b/torchgeo/datasets/etci2021.py @@ -44,7 +44,7 @@ class ETCI2021(VisionDataset): 2. flood/water If you use this dataset in your research, please add the following to your - acknowledgements section: + acknowledgements section:: The authors would like to thank the NASA Earth Science Data Systems Program, NASA Digital Transformation AI/ML thrust, and IEEE GRSS for organizing From da27c7de6c91864fd2a4b64763f31f5d1d0514f7 Mon Sep 17 00:00:00 2001 From: Isaac Corley <22203655+isaaccorley@users.noreply.github.com> Date: Fri, 10 Sep 2021 20:04:39 -0500 Subject: [PATCH 9/9] fixed doc formatting --- torchgeo/datasets/etci2021.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/torchgeo/datasets/etci2021.py b/torchgeo/datasets/etci2021.py index 881a808b22d..f61cc604046 100644 --- a/torchgeo/datasets/etci2021.py +++ b/torchgeo/datasets/etci2021.py @@ -24,22 +24,25 @@ class ETCI2021(VisionDataset): dataset is a dataset for flood detection Dataset features: + * 33,405 VV & VH Sentinel-1 Synthetic Aperture Radar (SAR) images * 2 binary masks per image representing water body & flood, respectively * 2 polarization band images (VV, VH) of 3 RGB channels per band - * 3 RGB channels per band generated by the Hybrid Pluggable - Processing Pipeline 'hyp3' + * 3 RGB channels per band generated by the Hybrid Pluggable Processing + Pipeline (hyp3) * Images with 5x20m per pixel resolution (256x256) px) taken in Interferometric Wide Swath acquisition mode * Flood events from 5 different regions Dataset format: + * VV band three-channel png * VH band three-channel png * water body mask single-channel png where no water body = 0, water body = 255 * flood mask single-channel png where no flood = 0, flood = 255 Dataset classes: + 1. no flood/water 2. flood/water