From e975e911a5d46ac0e0b11159b5fde0a7ed538879 Mon Sep 17 00:00:00 2001
From: Isaac Corley <22203655+isaaccorley@users.noreply.github.com>
Date: Thu, 9 Sep 2021 21:41:42 -0500
Subject: [PATCH 1/9] add dataset to docs

---
 docs/api/datasets.rst | 5 +++++
 1 file changed, 5 insertions(+)
diff --git a/docs/api/datasets.rst b/docs/api/datasets.rst
index 034f3c7d463..842d995613f 100644
--- a/docs/api/datasets.rst
+++ b/docs/api/datasets.rst
@@ -87,6 +87,11 @@ CV4A Kenya Crop Type Competition
 
 .. autoclass:: CV4AKenyaCropType
 
+ETCI2021 Flood Detection
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. autoclass:: ETCI2021
+
 LandCover.ai (Land Cover from Aerial Imagery)
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 

From f38f7eb5406e5c92f5e1669974fe1972eef2097c Mon Sep 17 00:00:00 2001
From: Isaac Corley <22203655+isaaccorley@users.noreply.github.com>
Date: Thu, 9 Sep 2021 21:42:14 -0500
Subject: [PATCH 2/9] add sample test data

---
 tests/data/etci2021/test_without_ref_labels.zip | Bin 0 -> 4874 bytes
 tests/data/etci2021/train.zip                   | Bin 0 -> 5610 bytes
 tests/data/etci2021/val_with_ref_labels.zip     | Bin 0 -> 5512 bytes
 3 files changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 tests/data/etci2021/test_without_ref_labels.zip
 create mode 100644 tests/data/etci2021/train.zip
 create mode 100644 tests/data/etci2021/val_with_ref_labels.zip

diff --git a/tests/data/etci2021/test_without_ref_labels.zip b/tests/data/etci2021/test_without_ref_labels.zip
new file mode 100644
index 0000000000000000000000000000000000000000..0f94a1e4ce5b827f3efe7991c2c526a20d4a11a2
GIT binary patch
literal 4874
zcmc(iO=uHA6vroDZDVcGR)Z9g3fdypW)tdHDWZrV#6$4oyxq{nL}F5tb#3vWq8C9t
zlwQ0jf>5*~coMu>p$HYho)x7a6-Dr%ci&_`Gc&t88Q0kILJDF2Z-4XNzyIWOvG@jr
ze^wXwoN2r<UwYAYq^WgHE>$$OR#D37b7zw%mU}&};JQ51jUw@{F@zc~dHbBJ#5p6>
zIW={vR?^|jYE3K3St)Z^%47~`l9bJk9W+!PF9oRdJZ<ha+oFnADywxv>m@u5F3%?#
zBDD4kY3aHlw+>Rc7;;+xeHX$2?_E%@9f7}2Kq;bfzMSQmeUdy2rR$^9mGh0e#nf>T
zt$pA4v%daiZmlpm`T6&+>11^E&fwcu@m<67ou6W-C+?5l-MO_s*WcH7eInJdTwO~&
zJes<>u=rqQeBss*awelc9F9eU<1#SBMKp`$LgiRYb8;-Ui{l`{@wm`{Tv1@H<wA98
zRxT?A)m-lnKol1@)p9uET#VZVSj@ttG0MVL=w))&;W=THd=}=YxGby(ZmgGmYlgWm
z5n6l}Mrv6Wwil#E*m8ffdJH+%SlA!?4;J<!9FC@i4FE%2+{?!IufPVBHbz8eTbTVZ
zhhy6;><z(@&%*4E%?D#!*dZW_i<>H+g$0Up&Fa6lut;=A%q!1rGn3sW^y3(QOQySo
zk?tsN&Y83^y3agGoykJtPI{oo$Q${bqe$mGx8k!F6)npIU1s8Qbm`3J)*XnD^!%UH
z+(VoIL7JKVu(X$XTG9wTx^Y`n+P6c;0?afRd^2>Kv)aV-Rfq}^!o~>F%v{Gy=)I^k
zCn3`$g9*_&%}wYDe06HxKwo`t%Y#OkiOri=8Ci|si5iZF(ui;O)_fWE7mUu|Xu1<T
z9Xp2In5h7xq0^Yf2#@on78PAagR_~BrP9gFt@}|(w~^BTGR|pCX&$3F)zKPc+v8Y+
zVBdu`2=>>`;8~zEmz&V;PN6nUqhY2m@5ND?7H3{mnv)Q>(a>qiP3U5m2q8wcf~#WY
gDsNuRM)NJ4M!4wt8%+Xc8==kcJCsD|9TY-;0F4YS0ssI2

literal 0
HcmV?d00001

diff --git a/tests/data/etci2021/train.zip b/tests/data/etci2021/train.zip
new file mode 100644
index 0000000000000000000000000000000000000000..5cba717d7a1083433c852e30087e86ed24ad6373
GIT binary patch
literal 5610
zcmd6rO-vI(7={Ph@*^f16ym{%L=k`KE+_&a1`nP%K&(B`1MN1g6m42UYXuXdMm=fJ
z5EDfbjYjPO6TEpQQ7&@wphuC!XfP%o^dRxzH{GA^?#!?=ixwf%v<dsYGtW2k&RhC>
zJ>Crn|4cvM7bv{ZUMkRblpR%uQ~uzoYUFwSs!N%Nac-;zm3hB-5GuU%8_!2<&o>&+
zN0ro2Qi;VgiEyhVAC=nV!&zBA(k^wXr2|m*f=KCRp3>RjWIR(8z#`1K->}VnKfgdO
zr)oTd`gyv>Rv=o1FrTS2@k__x(+L;>Aq_Ue@Yo?KJPsq5YZ*xm6`F+?OIh&Ojo&k$
zKTR%0Bay|QKSrv{-`?E0Fz4Ofe5dk*=VbSTmWjr?%;c8Z+RNSb74zw(`bRzWSMxIu
z-*n}#?Loy>-(CZ*go<kxX87V?;Y#><a3xBG>$(kB2Y=_!E7|yHIGT=)hm%S)o>bvY
z0l51j@CMhQ=!~*d95329w(+m(bTXZenGsF^K0GLvKtHYn?!^8ddUk(qFcy@GM@N;+
zSw(ecS?b72QmZVt7R^)0_?w&yi+J-(Ti5sUme-y67qodrq}6=_3M_W(;L5YEL1~`6
z43O~PJ`?<)5`Je;CG+DM8>n{Pe$(CfBOt?r8(YY=o)g4UIq_2)L78_&gb%+1aPLLn
z1>H!kH@C9#9UfN7-lpa4&FI^4{8rE0J~eX_yQ)~r?&yukT&u^Z#lC~U;X#Iy6#IMG
zGR+p5X&xEf@14cSByL{QZ+@|Y`-2|N<y40BYd&Bs4xuMkB4v6%R$-_v4)oZ6L)-?=
zAjphu263RAe`46(K@TTx*C1{Q>G6Qj+};@knV!ucLhwaYJuz%izuF;+dxJX8(gIip
zE&F3(4eIRq>bpgCb7w+iE;bW-?Byn8b!QN!CS>dk^use?fwRfyO8D<_vHN3#F+J?H
zi^lX=w}UHpY_f=SR_F|vBK+P+CSh}*@9|3M&WLldz|QY87KWX1ML><UbbX&2|G9A9
z>SV?>Bbck?q@B||k(zed``e0OR~5gSGXicuB-5)I!7}fp=x#{Ov1}kA%bo~Ta{?i>
zo$&BFA(>0fYz{&MQ|}6vo_13ZK8RDQz)=ZWu-tVc%bKBnW>zQSgVTvn9sG`fc&rA0
F`UBzHrgZ=S

literal 0
HcmV?d00001

diff --git a/tests/data/etci2021/val_with_ref_labels.zip b/tests/data/etci2021/val_with_ref_labels.zip
new file mode 100644
index 0000000000000000000000000000000000000000..acd6d76c332619c4d33494bb8400e51489cbdb5b
GIT binary patch
literal 5512
zcmd6r!D|yi6vih_(l%B>s|_AXQ3wV>n@!R{3B_7O5DFz|6e?(%*fmi?Q<^ol2eDEQ
z9*Rg2#fu^cMZI|tL_~Y>(jJQcfTbV`Qs}`V9t7v@?(A-MW_G9BhT1YAf$V&5e)Hy=
zugwhl1G^CXJa~2Voccz838GG<$W<kFJ{v;*^-t+Mj2n|}$QStGM@W6i7?0nxJ>Eqg
zzgn(L$YWRJTtbTXN~yS{B$DxDPg0wAWwY}}T<2A;mF22AJ17WO9Tl9tR&y0NfH}4z
z!1Cm}eES6cbQ+H#aRDB<+~jd7H-$&7);&H}RJR8f$yn&m-Rsrw-{w{eBO@ytzs5sN
zUuF+2Ed(Obhb>?IXZxOZ-#^?@o!j5uKHV2?{#;oNKR+3snScFkF+G3x2;y(9Fs4$Y
zF_mKOm`Xc^DJ8hsH}i@-kt<Y2r*h?dK`wiQHtbc-4dG7llLQxp>#vM%30>BM(*Htj
z>1Dlf^PD%4(44nNYp>~#={yXsH(s<kI?s8{c`eR6iwy||clJfw>^w`c&rY28(n30a
z84rel=Dd3ZotGhC+galqEa&}$5i0aTx665#y<*axw}=tJ;9?r`iivVw1}4*a0Z{`l
zsP{HZJm)oPjA^~fcHRLD3<eh(91ZKO&U0Q@Ik!$ToEOeg1NptR@#8tKeL<A-_F`->
zxaiK<(D9ttNlBPm2~%UZ&{hwpuc^z=3(W`h_`Dab4ZvGHLI<@7O<^&oq64&bs(vz+
zZSB@LjWrl8k{RS{M37bEv8x6#_{}Rt>y$PGl~l8L661wzvMhvN+_~V>cY%veTNW1M
zeS<Q7r<%0Ruy1EQfX*htn!v`L#6$qx5*Kp^pj*0AO>6+Ex(+yn|AJJH22YW1&`#29
z9K(h*^ursk(!5ehcW7qqR34j*WRwS{F%u9YgR8NGUME#Hl&ql&P}V%CZWj+az>?z&
zJl+M_Jj@G1Jt52}YdRktX000gn%y)k&d0b=tR71TTdPt<P>DDn#9Zd3p=~N1a)}sh
z>aw_)Z7&VDq_>TKHsD5s$jhurYi~NTlA2&UAGL^~uIbLl46m@S8UQ-J5-|YqTU^W?
zfa!dUyl?FQI*gK<0KkV<F#wEcirF(`sfmLQrbHTW91D4rc7tO$pUc^xV<ssN$ov&7
z4_8y~e9XXULZKrkQ3WVP+_kKm$T$M}O=MLS<^cheIx0bYED`F!zx`M{*#@8f0pZh+
AS^xk5

literal 0
HcmV?d00001


From d28c831e6d42a85ffd08539217799a696cf3143a Mon Sep 17 00:00:00 2001
From: Isaac Corley <22203655+isaaccorley@users.noreply.github.com>
Date: Thu, 9 Sep 2021 21:42:29 -0500
Subject: [PATCH 3/9] add dataset unit tests

---
 tests/datasets/test_etci2021.py | 77 +++++++++++++++++++++++++++++++++
 1 file changed, 77 insertions(+)
 create mode 100644 tests/datasets/test_etci2021.py

diff --git a/tests/datasets/test_etci2021.py b/tests/datasets/test_etci2021.py
new file mode 100644
index 00000000000..6f77717d2d0
--- /dev/null
+++ b/tests/datasets/test_etci2021.py
@@ -0,0 +1,77 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+import os
+import shutil
+from pathlib import Path
+from typing import Generator
+
+import pytest
+import torch
+from _pytest.fixtures import SubRequest
+from _pytest.monkeypatch import MonkeyPatch
+
+import torchgeo.datasets.utils
+from torchgeo.datasets import ETCI2021
+from torchgeo.transforms import Identity
+
+
+def download_url(url: str, root: str, *args: str) -> None:
+    shutil.copy(url, root)
+
+
+class TestETCI2021:
+    @pytest.fixture(params=["train", "val", "test"])
+    def dataset(
+        self,
+        monkeypatch: Generator[MonkeyPatch, None, None],
+        tmp_path: Path,
+        request: SubRequest,
+    ) -> ETCI2021:
+        monkeypatch.setattr(  # type: ignore[attr-defined]
+            torchgeo.datasets.utils, "download_url", download_url
+        )
+        md5s = [
+            "50c10eb07d6db9aee3ba36401e4a2c45",
+            "3e8b5a3cb95e6029e0e2c2d4b4ec6fba",
+            "c8ee1e5d3e478761cd00ebc6f28b0ae7",
+        ]
+        data_dir = os.path.join("tests", "data", "etci2021")
+        urls = [
+            os.path.join(data_dir, "train.zip"),
+            os.path.join(data_dir, "val_with_ref_labels.zip"),
+            os.path.join(data_dir, "test_without_ref_labels.zip"),
+        ]
+        monkeypatch.setattr(ETCI2021, "md5s", md5s)  # type: ignore[attr-defined]
+        monkeypatch.setattr(ETCI2021, "urls", urls)  # type: ignore[attr-defined]
+        root = str(tmp_path)
+        split = request.param
+        transforms = Identity()
+        return ETCI2021(root, split, transforms, download=True, checksum=True)
+
+    def test_getitem(self, dataset: ETCI2021) -> None:
+        x = dataset[0]
+        assert isinstance(x, dict)
+        assert isinstance(x["image"], torch.Tensor)
+        assert isinstance(x["mask"], torch.Tensor)
+        assert x["image"].shape[0] == 6
+        assert x["image"].shape[-2:] == x["mask"].shape[-2:]
+
+        if dataset.split != "test":
+            assert x["mask"].ndim == 3
+        else:
+            assert x["mask"].ndim == 2
+
+    def test_len(self, dataset: ETCI2021) -> None:
+        assert len(dataset) == 2
+
+    def test_already_downloaded(self, dataset: ETCI2021) -> None:
+        ETCI2021(root=dataset.root, download=True)
+
+    def test_invalid_split(self) -> None:
+        with pytest.raises(AssertionError):
+            ETCI2021(split="foo")
+
+    def test_not_downloaded(self, tmp_path: Path) -> None:
+        with pytest.raises(RuntimeError, match="Dataset not found or corrupted."):
+            ETCI2021(str(tmp_path))

From 986be4fbfa96f594571b024517602e361e4c1942 Mon Sep 17 00:00:00 2001
From: Isaac Corley <22203655+isaaccorley@users.noreply.github.com>
Date: Thu, 9 Sep 2021 22:06:48 -0500
Subject: [PATCH 4/9] add etci2021 dataset

---
 torchgeo/datasets/__init__.py |   2 +
 torchgeo/datasets/etci2021.py | 243 ++++++++++++++++++++++++++++++++++
 2 files changed, 245 insertions(+)
 create mode 100644 torchgeo/datasets/etci2021.py

diff --git a/torchgeo/datasets/__init__.py b/torchgeo/datasets/__init__.py
index d249b6072b4..d027012b603 100644
--- a/torchgeo/datasets/__init__.py
+++ b/torchgeo/datasets/__init__.py
@@ -22,6 +22,7 @@
 from .cowc import COWC, COWCCounting, COWCDetection
 from .cv4a_kenya_crop_type import CV4AKenyaCropType
 from .cyclone import TropicalCycloneWindEstimation
+from .etci2021 import ETCI2021
 from .geo import GeoDataset, RasterDataset, VectorDataset, VisionDataset, ZipDataset
 from .landcoverai import LandCoverAI
 from .landsat import (
@@ -81,6 +82,7 @@
     "COWCCounting",
     "COWCDetection",
     "CV4AKenyaCropType",
+    "ETCI2021",
     "LandCoverAI",
     "LEVIRCDPlus",
     "PatternNet",
diff --git a/torchgeo/datasets/etci2021.py b/torchgeo/datasets/etci2021.py
new file mode 100644
index 00000000000..7326fe6f8f7
--- /dev/null
+++ b/torchgeo/datasets/etci2021.py
@@ -0,0 +1,243 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+"""ETCI 2021 dataset."""
+
+import glob
+import os
+import shutil
+from typing import Callable, Dict, List, Optional
+
+import numpy as np
+import torch
+from PIL import Image
+from torch import Tensor
+
+from .geo import VisionDataset
+from .utils import download_and_extract_archive
+
+
+class ETCI2021(VisionDataset):
+    """ETCI 2021 Flood Detection dataset.
+
+    The `ETCI2021 <https://nasa-impact.github.io/etci2021/>`_
+    dataset is a dataset for flood detection
+
+    Dataset features:
+    * 33,405 VV & VH Sentinel-1 Synthetic Aperture Radar (SAR) images
+    * 2 binary masks per image representing water body & flood, respectively
+    * 2 polarization band images (VV, VH) of 3 RGB channels per band
+    * 3 RGB channels per band generated by the Hybrid Pluggable Processing Pipeline 'hyp3'
+    * Images with 5x20m per pixel resolution (256x256) px) taken in Interferometric Wide Swath acquisition mode
+    * Flood events from 5 different regions
+
+    Dataset format:
+    * VV band three-channel png
+    * VH band three-channel png
+    * water body mask single-channel png where no water body = 0, water body = 255
+    * flood mask single-channel png where no flood = 0, flood = 255
+
+    Dataset classes:
+    1. no flood/water
+    2. flood/water
+
+    If you use this dataset in your research, please add the following to your
+    acknowledgements section:
+
+    'The authors would like to thank the NASA Earth Science Data Systems Program,
+    NASA Digital Transformation AI/ML thrust, and IEEE GRSS for organizing the ETCI competition'.
+    """  # noqa: E501
+
+    urls = [
+        "https://drive.google.com/file/d/14HqNW5uWLS92n7KrxKgDwUTsSEST6LCr",
+        "https://drive.google.com/file/d/19sriKPHCZLfJn_Jmk3Z_0b3VaCBVRVyn",
+        "https://drive.google.com/file/d/1rpMVluASnSHBfm2FhpPDio0GyCPOqg7E",
+    ]
+    md5s = [
+        "1e95792fe0f6e3c9000abdeab2a8ab0f",
+        "fd18cecb318efc69f8319f90c3771bdf",
+        "da9fa69e1498bd49d5c766338c6dac3d",
+    ]
+    filenames = ["train.zip", "val_with_ref_labels.zip", "test_without_ref_labels.zip"]
+    directories = ["train", "test", "test_internal"]
+    splits = ["train", "val", "test"]
+    bands = ["VV", "VH"]
+    masks = ["flood", "water_body"]
+    split_to_folder = dict(train="train", val="test", test="test_internal")
+
+    def __init__(
+        self,
+        root: str = "data",
+        split: str = "train",
+        transforms: Optional[Callable[[Dict[str, Tensor]], Dict[str, Tensor]]] = None,
+        download: bool = False,
+        checksum: bool = False,
+    ) -> None:
+        """Initialize a new ETCI 2021 dataset instance.
+
+        Args:
+            root: root directory where dataset can be found
+            split: one of "train", "val", or "test"
+            transforms: a function/transform that takes input sample and its target as
+                entry and returns a transformed version
+            download: if True, download dataset and store it in the root directory
+            checksum: if True, check the MD5 of the downloaded files (may be slow)
+
+        Raises:
+            AssertionError: if ``split`` argument is invalid
+            RuntimeError: if ``download=False`` and data is not found, or checksums
+                don't match
+        """
+        assert split in self.splits
+
+        self.root = root
+        self.split = split
+        self.transforms = transforms
+        self.checksum = checksum
+
+        if download:
+            self._download()
+
+        if not self._check_integrity():
+            raise RuntimeError(
+                "Dataset not found or corrupted. "
+                + "You can use download=True to download it"
+            )
+
+        self.files = self._load_files(self.root, self.split)
+
+    def __getitem__(self, index: int) -> Dict[str, Tensor]:
+        """Return an index within the dataset.
+
+        Args:
+            index: index to return
+
+        Returns:
+            data and label at that index
+        """
+        files = self.files[index]
+        vv = self._load_image(files["vv"])
+        vh = self._load_image(files["vh"])
+        water_mask = self._load_target(files["water_mask"])
+
+        if self.split != "test":
+            flood_mask = self._load_target(files["flood_mask"])
+            mask = torch.stack(tensors=[water_mask, flood_mask], dim=0)
+        else:
+            mask = water_mask
+
+        image = torch.cat(tensors=[vv, vh], dim=0)  # type: ignore[attr-defined]
+        sample = {"image": image, "mask": mask}
+
+        if self.transforms is not None:
+            sample = self.transforms(sample)
+
+        return sample
+
+    def __len__(self) -> int:
+        """Return the number of data points in the dataset.
+
+        Returns:
+            length of the dataset
+        """
+        return len(self.files)
+
+    def _load_files(self, root: str, split: str) -> List[Dict[str, str]]:
+        """Return the paths of the files in the dataset.
+
+        Args:
+            root: root dir of dataset
+            split: subset of dataset, one of [train, val, test]
+
+        Returns:
+            list of dicts containing paths for each pair of vv, vh,
+            water body mask, flood mask (train/val only)
+        """
+        files = []
+        directory = self.split_to_folder[split]
+        folders = sorted(glob.glob(os.path.join(root, directory, "*")))
+        folders = [os.path.join(folder, "tiles") for folder in folders]
+        for folder in folders:
+            vvs = glob.glob(os.path.join(folder, "vv", "*.png"))
+            vhs = glob.glob(os.path.join(folder, "vh", "*.png"))
+            water_masks = glob.glob(os.path.join(folder, "water_body_label", "*.png"))
+
+            if split == "test":
+                flood_masks = [""] * len(water_masks)
+            else:
+                flood_masks = glob.glob(os.path.join(folder, "flood_label", "*.png"))
+
+            for vv, vh, flood_mask, water_mask in zip(
+                vvs, vhs, flood_masks, water_masks
+            ):
+                files.append(
+                    dict(vv=vv, vh=vh, flood_mask=flood_mask, water_mask=water_mask)
+                )
+        return files
+
+    def _load_image(self, path: str) -> Tensor:
+        """Load a single image.
+
+        Args:
+            path: path to the image
+
+        Returns:
+            the image
+        """
+        filename = os.path.join(path)
+        with Image.open(filename) as img:
+            array = np.array(img.convert("RGB"))
+            tensor: Tensor = torch.from_numpy(array)  # type: ignore[attr-defined]
+            # Convert from HxWxC to CxHxW
+            tensor = tensor.permute((2, 0, 1))
+            return tensor
+
+    def _load_target(self, path: str) -> Tensor:
+        """Load the target mask for a single image.
+
+        Args:
+            path: path to the image
+
+        Returns:
+            the target mask
+        """
+        filename = os.path.join(path)
+        with Image.open(filename) as img:
+            array = np.array(img.convert("L"))
+            tensor: Tensor = torch.from_numpy(array)  # type: ignore[attr-defined]
+            tensor = torch.clip(tensor, min=0, max=1)  # type: ignore[attr-defined]
+            tensor = tensor.to(torch.long)  # type: ignore[attr-defined]
+            return tensor
+
+    def _check_integrity(self) -> bool:
+        """Checks the integrity of the dataset structure.
+
+        Returns:
+            True if the dataset directories and split files are found, else False
+        """
+        for directory in self.directories:
+            dirpath = os.path.join(self.root, directory)
+            if not os.path.exists(dirpath):
+                return False
+        return True
+
+    def _download(self) -> None:
+        """Download the dataset and extract it.
+
+        Raises:
+            AssertionError: if the checksum of split.py does not match
+        """
+        if self._check_integrity():
+            print("Files already downloaded and verified")
+            return
+
+        for url, filename, md5 in zip(self.urls, self.filenames, self.md5s):
+            download_and_extract_archive(
+                url,
+                self.root,
+                filename=filename,
+                md5=md5 if self.checksum else None,
+            )
+
+        if os.path.exists(os.path.join(self.root, "__MACOSX")):
+            shutil.rmtree(os.path.join(self.root, "__MACOSX"))

From 539a2ac7de6f66788b453b9bae3b113f8b5ce552 Mon Sep 17 00:00:00 2001
From: Isaac Corley <22203655+isaaccorley@users.noreply.github.com>
Date: Thu, 9 Sep 2021 22:58:56 -0500
Subject: [PATCH 5/9] updated tests

---
 tests/datasets/test_etci2021.py | 37 ++++++++++++++++++++-------------
 1 file changed, 23 insertions(+), 14 deletions(-)

diff --git a/tests/datasets/test_etci2021.py b/tests/datasets/test_etci2021.py
index 6f77717d2d0..de440dd7e3e 100644
--- a/tests/datasets/test_etci2021.py
+++ b/tests/datasets/test_etci2021.py
@@ -31,19 +31,28 @@ def dataset(
         monkeypatch.setattr(  # type: ignore[attr-defined]
             torchgeo.datasets.utils, "download_url", download_url
         )
-        md5s = [
-            "50c10eb07d6db9aee3ba36401e4a2c45",
-            "3e8b5a3cb95e6029e0e2c2d4b4ec6fba",
-            "c8ee1e5d3e478761cd00ebc6f28b0ae7",
-        ]
         data_dir = os.path.join("tests", "data", "etci2021")
-        urls = [
-            os.path.join(data_dir, "train.zip"),
-            os.path.join(data_dir, "val_with_ref_labels.zip"),
-            os.path.join(data_dir, "test_without_ref_labels.zip"),
-        ]
-        monkeypatch.setattr(ETCI2021, "md5s", md5s)  # type: ignore[attr-defined]
-        monkeypatch.setattr(ETCI2021, "urls", urls)  # type: ignore[attr-defined]
+        metadata = {
+            "train": {
+                "filename": "train.zip",
+                "md5": "50c10eb07d6db9aee3ba36401e4a2c45",
+                "directory": "train",
+                "url": os.path.join(data_dir, "train.zip"),
+            },
+            "val": {
+                "filename": "val_with_ref_labels.zip",
+                "md5": "3e8b5a3cb95e6029e0e2c2d4b4ec6fba",
+                "directory": "test",
+                "url": os.path.join(data_dir, "val_with_ref_labels.zip"),
+            },
+            "test": {
+                "filename": "test_without_ref_labels.zip",
+                "md5": "c8ee1e5d3e478761cd00ebc6f28b0ae7",
+                "directory": "test_internal",
+                "url": os.path.join(data_dir, "test_without_ref_labels.zip"),
+            },
+        }
+        monkeypatch.setattr(ETCI2021, "metadata", metadata)  # type: ignore[attr-defined]   # noqa: E501
         root = str(tmp_path)
         split = request.param
         transforms = Identity()
@@ -58,9 +67,9 @@ def test_getitem(self, dataset: ETCI2021) -> None:
         assert x["image"].shape[-2:] == x["mask"].shape[-2:]
 
         if dataset.split != "test":
-            assert x["mask"].ndim == 3
+            assert x["mask"].shape[0] == 2
         else:
-            assert x["mask"].ndim == 2
+            assert x["mask"].shape[0] == 1
 
     def test_len(self, dataset: ETCI2021) -> None:
         assert len(dataset) == 2

From 52ff79b2aee5b19c00d526d3447a44175a7f2c91 Mon Sep 17 00:00:00 2001
From: Isaac Corley <22203655+isaaccorley@users.noreply.github.com>
Date: Thu, 9 Sep 2021 22:59:09 -0500
Subject: [PATCH 6/9] updated dataset to download only desired split file

---
 torchgeo/datasets/etci2021.py | 58 +++++++++++++++++++----------------
 1 file changed, 32 insertions(+), 26 deletions(-)

diff --git a/torchgeo/datasets/etci2021.py b/torchgeo/datasets/etci2021.py
index 7326fe6f8f7..e740ab23ae9 100644
--- a/torchgeo/datasets/etci2021.py
+++ b/torchgeo/datasets/etci2021.py
@@ -48,22 +48,29 @@ class ETCI2021(VisionDataset):
     NASA Digital Transformation AI/ML thrust, and IEEE GRSS for organizing the ETCI competition'.
     """  # noqa: E501
 
-    urls = [
-        "https://drive.google.com/file/d/14HqNW5uWLS92n7KrxKgDwUTsSEST6LCr",
-        "https://drive.google.com/file/d/19sriKPHCZLfJn_Jmk3Z_0b3VaCBVRVyn",
-        "https://drive.google.com/file/d/1rpMVluASnSHBfm2FhpPDio0GyCPOqg7E",
-    ]
-    md5s = [
-        "1e95792fe0f6e3c9000abdeab2a8ab0f",
-        "fd18cecb318efc69f8319f90c3771bdf",
-        "da9fa69e1498bd49d5c766338c6dac3d",
-    ]
-    filenames = ["train.zip", "val_with_ref_labels.zip", "test_without_ref_labels.zip"]
-    directories = ["train", "test", "test_internal"]
     splits = ["train", "val", "test"]
     bands = ["VV", "VH"]
     masks = ["flood", "water_body"]
-    split_to_folder = dict(train="train", val="test", test="test_internal")
+    metadata = {
+        "train": {
+            "filename": "train.zip",
+            "md5": "1e95792fe0f6e3c9000abdeab2a8ab0f",
+            "directory": "train",
+            "url": "https://drive.google.com/file/d/14HqNW5uWLS92n7KrxKgDwUTsSEST6LCr",
+        },
+        "val": {
+            "filename": "val_with_ref_labels.zip",
+            "md5": "fd18cecb318efc69f8319f90c3771bdf",
+            "directory": "test",
+            "url": "https://drive.google.com/file/d/19sriKPHCZLfJn_Jmk3Z_0b3VaCBVRVyn",
+        },
+        "test": {
+            "filename": "test_without_ref_labels.zip",
+            "md5": "da9fa69e1498bd49d5c766338c6dac3d",
+            "directory": "test_internal",
+            "url": "https://drive.google.com/file/d/1rpMVluASnSHBfm2FhpPDio0GyCPOqg7E",
+        },
+    }
 
     def __init__(
         self,
@@ -124,7 +131,7 @@ def __getitem__(self, index: int) -> Dict[str, Tensor]:
             flood_mask = self._load_target(files["flood_mask"])
             mask = torch.stack(tensors=[water_mask, flood_mask], dim=0)
         else:
-            mask = water_mask
+            mask = water_mask.unsqueeze(0)
 
         image = torch.cat(tensors=[vv, vh], dim=0)  # type: ignore[attr-defined]
         sample = {"image": image, "mask": mask}
@@ -154,7 +161,7 @@ def _load_files(self, root: str, split: str) -> List[Dict[str, str]]:
             water body mask, flood mask (train/val only)
         """
         files = []
-        directory = self.split_to_folder[split]
+        directory = self.metadata[split]["directory"]
         folders = sorted(glob.glob(os.path.join(root, directory, "*")))
         folders = [os.path.join(folder, "tiles") for folder in folders]
         for folder in folders:
@@ -215,10 +222,10 @@ def _check_integrity(self) -> bool:
         Returns:
             True if the dataset directories and split files are found, else False
         """
-        for directory in self.directories:
-            dirpath = os.path.join(self.root, directory)
-            if not os.path.exists(dirpath):
-                return False
+        directory = self.metadata[self.split]["directory"]
+        dirpath = os.path.join(self.root, directory)
+        if not os.path.exists(dirpath):
+            return False
         return True
 
     def _download(self) -> None:
@@ -231,13 +238,12 @@ def _download(self) -> None:
             print("Files already downloaded and verified")
             return
 
-        for url, filename, md5 in zip(self.urls, self.filenames, self.md5s):
-            download_and_extract_archive(
-                url,
-                self.root,
-                filename=filename,
-                md5=md5 if self.checksum else None,
-            )
+        download_and_extract_archive(
+            self.metadata[self.split]["url"],
+            self.root,
+            filename=self.metadata[self.split]["filename"],
+            md5=self.metadata[self.split]["md5"] if self.checksum else None,
+        )
 
         if os.path.exists(os.path.join(self.root, "__MACOSX")):
             shutil.rmtree(os.path.join(self.root, "__MACOSX"))

From 8914eec6a1d34289df7bf903fb3ea3a2e35cff9b Mon Sep 17 00:00:00 2001
From: Isaac Corley <22203655+isaaccorley@users.noreply.github.com>
Date: Fri, 10 Sep 2021 14:43:34 -0500
Subject: [PATCH 7/9] removed flood mask from file list for test set and other
 formatting

---
 torchgeo/datasets/etci2021.py | 34 +++++++++++++++++++---------------
 1 file changed, 19 insertions(+), 15 deletions(-)

diff --git a/torchgeo/datasets/etci2021.py b/torchgeo/datasets/etci2021.py
index e740ab23ae9..e70bdfd9a85 100644
--- a/torchgeo/datasets/etci2021.py
+++ b/torchgeo/datasets/etci2021.py
@@ -27,8 +27,10 @@ class ETCI2021(VisionDataset):
     * 33,405 VV & VH Sentinel-1 Synthetic Aperture Radar (SAR) images
     * 2 binary masks per image representing water body & flood, respectively
     * 2 polarization band images (VV, VH) of 3 RGB channels per band
-    * 3 RGB channels per band generated by the Hybrid Pluggable Processing Pipeline 'hyp3'
-    * Images with 5x20m per pixel resolution (256x256) px) taken in Interferometric Wide Swath acquisition mode
+    * 3 RGB channels per band generated by the Hybrid Pluggable
+    Processing Pipeline 'hyp3'
+    * Images with 5x20m per pixel resolution (256x256) px) taken in
+    Interferometric Wide Swath acquisition mode
     * Flood events from 5 different regions
 
     Dataset format:
@@ -45,10 +47,10 @@ class ETCI2021(VisionDataset):
     acknowledgements section:
 
     'The authors would like to thank the NASA Earth Science Data Systems Program,
-    NASA Digital Transformation AI/ML thrust, and IEEE GRSS for organizing the ETCI competition'.
-    """  # noqa: E501
+    NASA Digital Transformation AI/ML thrust, and IEEE GRSS for organizing
+    the ETCI competition'.
+    """
 
-    splits = ["train", "val", "test"]
     bands = ["VV", "VH"]
     masks = ["flood", "water_body"]
     metadata = {
@@ -95,7 +97,7 @@ def __init__(
             RuntimeError: if ``download=False`` and data is not found, or checksums
                 don't match
         """
-        assert split in self.splits
+        assert split in self.metadata.keys()
 
         self.root = root
         self.split = split
@@ -169,17 +171,19 @@ def _load_files(self, root: str, split: str) -> List[Dict[str, str]]:
             vhs = glob.glob(os.path.join(folder, "vh", "*.png"))
             water_masks = glob.glob(os.path.join(folder, "water_body_label", "*.png"))
 
-            if split == "test":
-                flood_masks = [""] * len(water_masks)
-            else:
+            if split != "test":
                 flood_masks = glob.glob(os.path.join(folder, "flood_label", "*.png"))
 
-            for vv, vh, flood_mask, water_mask in zip(
-                vvs, vhs, flood_masks, water_masks
-            ):
-                files.append(
-                    dict(vv=vv, vh=vh, flood_mask=flood_mask, water_mask=water_mask)
-                )
+                for vv, vh, flood_mask, water_mask in zip(
+                    vvs, vhs, flood_masks, water_masks
+                ):
+                    files.append(
+                        dict(vv=vv, vh=vh, flood_mask=flood_mask, water_mask=water_mask)
+                    )
+            else:
+                for vv, vh, water_mask in zip(vvs, vhs, water_masks):
+                    files.append(dict(vv=vv, vh=vh, water_mask=water_mask))
+
         return files
 
     def _load_image(self, path: str) -> Tensor:

From cb30cba5ff9f28965cd4223285674e4bb173fc11 Mon Sep 17 00:00:00 2001
From: isaac <22203655+isaaccorley@users.noreply.github.com>
Date: Fri, 10 Sep 2021 16:19:25 -0500
Subject: [PATCH 8/9] Update torchgeo/datasets/etci2021.py

Co-authored-by: Adam J. Stewart <ajstewart426@gmail.com>
---
 torchgeo/datasets/etci2021.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/torchgeo/datasets/etci2021.py b/torchgeo/datasets/etci2021.py
index a0d0317c963..881a808b22d 100644
--- a/torchgeo/datasets/etci2021.py
+++ b/torchgeo/datasets/etci2021.py
@@ -44,7 +44,7 @@ class ETCI2021(VisionDataset):
     2. flood/water
 
     If you use this dataset in your research, please add the following to your
-    acknowledgements section:
+    acknowledgements section::
 
         The authors would like to thank the NASA Earth Science Data Systems Program,
         NASA Digital Transformation AI/ML thrust, and IEEE GRSS for organizing

From da27c7de6c91864fd2a4b64763f31f5d1d0514f7 Mon Sep 17 00:00:00 2001
From: Isaac Corley <22203655+isaaccorley@users.noreply.github.com>
Date: Fri, 10 Sep 2021 20:04:39 -0500
Subject: [PATCH 9/9] fixed doc formatting

---
 torchgeo/datasets/etci2021.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/torchgeo/datasets/etci2021.py b/torchgeo/datasets/etci2021.py
index 881a808b22d..f61cc604046 100644
--- a/torchgeo/datasets/etci2021.py
+++ b/torchgeo/datasets/etci2021.py
@@ -24,22 +24,25 @@ class ETCI2021(VisionDataset):
     dataset is a dataset for flood detection
 
     Dataset features:
+
     * 33,405 VV & VH Sentinel-1 Synthetic Aperture Radar (SAR) images
     * 2 binary masks per image representing water body & flood, respectively
     * 2 polarization band images (VV, VH) of 3 RGB channels per band
-    * 3 RGB channels per band generated by the Hybrid Pluggable
-      Processing Pipeline 'hyp3'
+    * 3 RGB channels per band generated by the Hybrid Pluggable Processing
+      Pipeline (hyp3)
     * Images with 5x20m per pixel resolution (256x256) px) taken in
       Interferometric Wide Swath acquisition mode
     * Flood events from 5 different regions
 
     Dataset format:
+
     * VV band three-channel png
     * VH band three-channel png
     * water body mask single-channel png where no water body = 0, water body = 255
     * flood mask single-channel png where no flood = 0, flood = 255
 
     Dataset classes:
+
     1. no flood/water
     2. flood/water