diff --git a/docs/api/datamodules.rst b/docs/api/datamodules.rst index 1c07c04737f..ab66bb4f2c1 100644 --- a/docs/api/datamodules.rst +++ b/docs/api/datamodules.rst @@ -11,6 +11,11 @@ Chesapeake Land Cover .. autoclass:: ChesapeakeCVPRDataModule +L7 Irish +^^^^^^^^ + +.. autoclass:: L7IrishDataModule + L8 Biome ^^^^^^^^ diff --git a/docs/api/datasets.rst b/docs/api/datasets.rst index 362f3b2a375..88865f61db8 100644 --- a/docs/api/datasets.rst +++ b/docs/api/datasets.rst @@ -93,6 +93,11 @@ iNaturalist .. autoclass:: INaturalist +L7 Irish +^^^^^^^^ + +.. autoclass:: L7Irish + L8 Biome ^^^^^^^^ diff --git a/docs/api/geo_datasets.csv b/docs/api/geo_datasets.csv index 3c5a8e75e41..ebde2c6dcb6 100644 --- a/docs/api/geo_datasets.csv +++ b/docs/api/geo_datasets.csv @@ -12,9 +12,10 @@ Dataset,Type,Source,Size (px),Resolution (m) `GBIF`_,Points,Citizen Scientists,-,- `GlobBiomass`_,Masks,Landsat,"45,000x45,000",100 `iNaturalist`_,Points,Citizen Scientists,-,- +`L7 Irish`_,"Imagery, Masks",Landsat,"8,400x7,500","15, 30" +`L8 Biome`_,"Imagery, Masks",Landsat,"8,900x8,900","15, 30" `LandCover.ai Geo`_,"Imagery, Masks",Aerial,"4,200--9,500",0.25--0.5 `Landsat`_,Imagery,Landsat,"8,900x8,900",30 -`L8 Biome`_,"Imagery, Masks",Landsat,"8,900x8,900","15, 30" `NAIP`_,Imagery,Aerial,"6,100x7,600",1 `Open Buildings`_,Geometries,"Maxar, CNES/Airbus",-,- `Sentinel`_,Imagery,Sentinel,"10,000x10,000",10 diff --git a/tests/conf/l7irish.yaml b/tests/conf/l7irish.yaml new file mode 100644 index 00000000000..1946e80ce2d --- /dev/null +++ b/tests/conf/l7irish.yaml @@ -0,0 +1,21 @@ +experiment: + task: "l7irish" + module: + loss: "ce" + model: "unet" + backbone: "resnet18" + weights: null + learning_rate: 1e-3 + learning_rate_schedule_patience: 6 + verbose: false + in_channels: 9 + num_classes: 5 + num_filters: 1 + ignore_index: 0 + datamodule: + root: "tests/data/l7irish" + download: true + batch_size: 1 + patch_size: 32 + length: 5 + num_workers: 0 \ No newline at end of file diff --git a/tests/data/l7irish/austral.tar.gz b/tests/data/l7irish/austral.tar.gz new file mode 100644 index 00000000000..0cb7fd57cb7 Binary files /dev/null and b/tests/data/l7irish/austral.tar.gz differ diff --git a/tests/data/l7irish/austral/p226_r98/L71226098_09820011112_B10.TIF b/tests/data/l7irish/austral/p226_r98/L71226098_09820011112_B10.TIF new file mode 100644 index 00000000000..6596dd15706 Binary files /dev/null and b/tests/data/l7irish/austral/p226_r98/L71226098_09820011112_B10.TIF differ diff --git a/tests/data/l7irish/austral/p226_r98/L71226098_09820011112_B20.TIF b/tests/data/l7irish/austral/p226_r98/L71226098_09820011112_B20.TIF new file mode 100644 index 00000000000..5eb21341fd6 Binary files /dev/null and b/tests/data/l7irish/austral/p226_r98/L71226098_09820011112_B20.TIF differ diff --git a/tests/data/l7irish/austral/p226_r98/L71226098_09820011112_B30.TIF b/tests/data/l7irish/austral/p226_r98/L71226098_09820011112_B30.TIF new file mode 100644 index 00000000000..3abf1c03c61 Binary files /dev/null and b/tests/data/l7irish/austral/p226_r98/L71226098_09820011112_B30.TIF differ diff --git a/tests/data/l7irish/austral/p226_r98/L71226098_09820011112_B40.TIF b/tests/data/l7irish/austral/p226_r98/L71226098_09820011112_B40.TIF new file mode 100644 index 00000000000..de99b5f71c4 Binary files /dev/null and b/tests/data/l7irish/austral/p226_r98/L71226098_09820011112_B40.TIF differ diff --git a/tests/data/l7irish/austral/p226_r98/L71226098_09820011112_B50.TIF b/tests/data/l7irish/austral/p226_r98/L71226098_09820011112_B50.TIF new file mode 100644 index 00000000000..a02ade22fb7 Binary files /dev/null and b/tests/data/l7irish/austral/p226_r98/L71226098_09820011112_B50.TIF differ diff --git a/tests/data/l7irish/austral/p226_r98/L71226098_09820011112_B61.TIF b/tests/data/l7irish/austral/p226_r98/L71226098_09820011112_B61.TIF new file mode 100644 index 00000000000..71e668da931 Binary files /dev/null and b/tests/data/l7irish/austral/p226_r98/L71226098_09820011112_B61.TIF differ diff --git a/tests/data/l7irish/austral/p226_r98/L72226098_09820011112_B62.TIF b/tests/data/l7irish/austral/p226_r98/L72226098_09820011112_B62.TIF new file mode 100644 index 00000000000..116566136f6 Binary files /dev/null and b/tests/data/l7irish/austral/p226_r98/L72226098_09820011112_B62.TIF differ diff --git a/tests/data/l7irish/austral/p226_r98/L72226098_09820011112_B70.TIF b/tests/data/l7irish/austral/p226_r98/L72226098_09820011112_B70.TIF new file mode 100644 index 00000000000..436f832c50e Binary files /dev/null and b/tests/data/l7irish/austral/p226_r98/L72226098_09820011112_B70.TIF differ diff --git a/tests/data/l7irish/austral/p226_r98/L72226098_09820011112_B80.TIF b/tests/data/l7irish/austral/p226_r98/L72226098_09820011112_B80.TIF new file mode 100644 index 00000000000..55a332854e0 Binary files /dev/null and b/tests/data/l7irish/austral/p226_r98/L72226098_09820011112_B80.TIF differ diff --git a/tests/data/l7irish/austral/p226_r98/L7_p226_r98_newmask2015.TIF b/tests/data/l7irish/austral/p226_r98/L7_p226_r98_newmask2015.TIF new file mode 100644 index 00000000000..819fdf1b84c Binary files /dev/null and b/tests/data/l7irish/austral/p226_r98/L7_p226_r98_newmask2015.TIF differ diff --git a/tests/data/l7irish/austral/p227_r98/L71227098_09820011103_B10.TIF b/tests/data/l7irish/austral/p227_r98/L71227098_09820011103_B10.TIF new file mode 100644 index 00000000000..90b2d3821a7 Binary files /dev/null and b/tests/data/l7irish/austral/p227_r98/L71227098_09820011103_B10.TIF differ diff --git a/tests/data/l7irish/austral/p227_r98/L71227098_09820011103_B20.TIF b/tests/data/l7irish/austral/p227_r98/L71227098_09820011103_B20.TIF new file mode 100644 index 00000000000..74e7b6e58cd Binary files /dev/null and b/tests/data/l7irish/austral/p227_r98/L71227098_09820011103_B20.TIF differ diff --git a/tests/data/l7irish/austral/p227_r98/L71227098_09820011103_B30.TIF b/tests/data/l7irish/austral/p227_r98/L71227098_09820011103_B30.TIF new file mode 100644 index 00000000000..63996dd7f2c Binary files /dev/null and b/tests/data/l7irish/austral/p227_r98/L71227098_09820011103_B30.TIF differ diff --git a/tests/data/l7irish/austral/p227_r98/L71227098_09820011103_B40.TIF b/tests/data/l7irish/austral/p227_r98/L71227098_09820011103_B40.TIF new file mode 100644 index 00000000000..3bdc0261cbd Binary files /dev/null and b/tests/data/l7irish/austral/p227_r98/L71227098_09820011103_B40.TIF differ diff --git a/tests/data/l7irish/austral/p227_r98/L71227098_09820011103_B50.TIF b/tests/data/l7irish/austral/p227_r98/L71227098_09820011103_B50.TIF new file mode 100644 index 00000000000..c71eed70e26 Binary files /dev/null and b/tests/data/l7irish/austral/p227_r98/L71227098_09820011103_B50.TIF differ diff --git a/tests/data/l7irish/austral/p227_r98/L71227098_09820011103_B61.TIF b/tests/data/l7irish/austral/p227_r98/L71227098_09820011103_B61.TIF new file mode 100644 index 00000000000..beb2b9039e1 Binary files /dev/null and b/tests/data/l7irish/austral/p227_r98/L71227098_09820011103_B61.TIF differ diff --git a/tests/data/l7irish/austral/p227_r98/L72227098_09820011103_B62.TIF b/tests/data/l7irish/austral/p227_r98/L72227098_09820011103_B62.TIF new file mode 100644 index 00000000000..0a62e3bb17f Binary files /dev/null and b/tests/data/l7irish/austral/p227_r98/L72227098_09820011103_B62.TIF differ diff --git a/tests/data/l7irish/austral/p227_r98/L72227098_09820011103_B70.TIF b/tests/data/l7irish/austral/p227_r98/L72227098_09820011103_B70.TIF new file mode 100644 index 00000000000..5c34ed4d072 Binary files /dev/null and b/tests/data/l7irish/austral/p227_r98/L72227098_09820011103_B70.TIF differ diff --git a/tests/data/l7irish/austral/p227_r98/L72227098_09820011103_B80.TIF b/tests/data/l7irish/austral/p227_r98/L72227098_09820011103_B80.TIF new file mode 100644 index 00000000000..94941c87193 Binary files /dev/null and b/tests/data/l7irish/austral/p227_r98/L72227098_09820011103_B80.TIF differ diff --git a/tests/data/l7irish/austral/p227_r98/L7_p227_r98_newmask2015.TIF b/tests/data/l7irish/austral/p227_r98/L7_p227_r98_newmask2015.TIF new file mode 100644 index 00000000000..52679d35d11 Binary files /dev/null and b/tests/data/l7irish/austral/p227_r98/L7_p227_r98_newmask2015.TIF differ diff --git a/tests/data/l7irish/austral/p231_r93_2/L71231093_09320010507_B10.TIF b/tests/data/l7irish/austral/p231_r93_2/L71231093_09320010507_B10.TIF new file mode 100644 index 00000000000..05e03fa8fac Binary files /dev/null and b/tests/data/l7irish/austral/p231_r93_2/L71231093_09320010507_B10.TIF differ diff --git a/tests/data/l7irish/austral/p231_r93_2/L71231093_09320010507_B20.TIF b/tests/data/l7irish/austral/p231_r93_2/L71231093_09320010507_B20.TIF new file mode 100644 index 00000000000..54218861ba3 Binary files /dev/null and b/tests/data/l7irish/austral/p231_r93_2/L71231093_09320010507_B20.TIF differ diff --git a/tests/data/l7irish/austral/p231_r93_2/L71231093_09320010507_B30.TIF b/tests/data/l7irish/austral/p231_r93_2/L71231093_09320010507_B30.TIF new file mode 100644 index 00000000000..750bb90670a Binary files /dev/null and b/tests/data/l7irish/austral/p231_r93_2/L71231093_09320010507_B30.TIF differ diff --git a/tests/data/l7irish/austral/p231_r93_2/L71231093_09320010507_B40.TIF b/tests/data/l7irish/austral/p231_r93_2/L71231093_09320010507_B40.TIF new file mode 100644 index 00000000000..ce913fb49d6 Binary files /dev/null and b/tests/data/l7irish/austral/p231_r93_2/L71231093_09320010507_B40.TIF differ diff --git a/tests/data/l7irish/austral/p231_r93_2/L71231093_09320010507_B50.TIF b/tests/data/l7irish/austral/p231_r93_2/L71231093_09320010507_B50.TIF new file mode 100644 index 00000000000..86ee5319bea Binary files /dev/null and b/tests/data/l7irish/austral/p231_r93_2/L71231093_09320010507_B50.TIF differ diff --git a/tests/data/l7irish/austral/p231_r93_2/L71231093_09320010507_B61.TIF b/tests/data/l7irish/austral/p231_r93_2/L71231093_09320010507_B61.TIF new file mode 100644 index 00000000000..e987a363d94 Binary files /dev/null and b/tests/data/l7irish/austral/p231_r93_2/L71231093_09320010507_B61.TIF differ diff --git a/tests/data/l7irish/austral/p231_r93_2/L72231093_09320010507_B62.TIF b/tests/data/l7irish/austral/p231_r93_2/L72231093_09320010507_B62.TIF new file mode 100644 index 00000000000..b87a4b26a29 Binary files /dev/null and b/tests/data/l7irish/austral/p231_r93_2/L72231093_09320010507_B62.TIF differ diff --git a/tests/data/l7irish/austral/p231_r93_2/L72231093_09320010507_B70.TIF b/tests/data/l7irish/austral/p231_r93_2/L72231093_09320010507_B70.TIF new file mode 100644 index 00000000000..3c753cf93e4 Binary files /dev/null and b/tests/data/l7irish/austral/p231_r93_2/L72231093_09320010507_B70.TIF differ diff --git a/tests/data/l7irish/austral/p231_r93_2/L72231093_09320010507_B80.TIF b/tests/data/l7irish/austral/p231_r93_2/L72231093_09320010507_B80.TIF new file mode 100644 index 00000000000..af52189344f Binary files /dev/null and b/tests/data/l7irish/austral/p231_r93_2/L72231093_09320010507_B80.TIF differ diff --git a/tests/data/l7irish/austral/p231_r93_2/L7_p231_r93_newmask2015.TIF b/tests/data/l7irish/austral/p231_r93_2/L7_p231_r93_newmask2015.TIF new file mode 100644 index 00000000000..8da86a965db Binary files /dev/null and b/tests/data/l7irish/austral/p231_r93_2/L7_p231_r93_newmask2015.TIF differ diff --git a/tests/data/l7irish/boreal.tar.gz b/tests/data/l7irish/boreal.tar.gz new file mode 100644 index 00000000000..c5a8463b363 Binary files /dev/null and b/tests/data/l7irish/boreal.tar.gz differ diff --git a/tests/data/l7irish/boreal/p143_r21_3/L71143021_02120010803_B10.TIF b/tests/data/l7irish/boreal/p143_r21_3/L71143021_02120010803_B10.TIF new file mode 100644 index 00000000000..a502f02aaeb Binary files /dev/null and b/tests/data/l7irish/boreal/p143_r21_3/L71143021_02120010803_B10.TIF differ diff --git a/tests/data/l7irish/boreal/p143_r21_3/L71143021_02120010803_B20.TIF b/tests/data/l7irish/boreal/p143_r21_3/L71143021_02120010803_B20.TIF new file mode 100644 index 00000000000..c6994defb64 Binary files /dev/null and b/tests/data/l7irish/boreal/p143_r21_3/L71143021_02120010803_B20.TIF differ diff --git a/tests/data/l7irish/boreal/p143_r21_3/L71143021_02120010803_B30.TIF b/tests/data/l7irish/boreal/p143_r21_3/L71143021_02120010803_B30.TIF new file mode 100644 index 00000000000..ec4d9c74907 Binary files /dev/null and b/tests/data/l7irish/boreal/p143_r21_3/L71143021_02120010803_B30.TIF differ diff --git a/tests/data/l7irish/boreal/p143_r21_3/L71143021_02120010803_B40.TIF b/tests/data/l7irish/boreal/p143_r21_3/L71143021_02120010803_B40.TIF new file mode 100644 index 00000000000..c7e566a5944 Binary files /dev/null and b/tests/data/l7irish/boreal/p143_r21_3/L71143021_02120010803_B40.TIF differ diff --git a/tests/data/l7irish/boreal/p143_r21_3/L71143021_02120010803_B50.TIF b/tests/data/l7irish/boreal/p143_r21_3/L71143021_02120010803_B50.TIF new file mode 100644 index 00000000000..a21ba9cf82e Binary files /dev/null and b/tests/data/l7irish/boreal/p143_r21_3/L71143021_02120010803_B50.TIF differ diff --git a/tests/data/l7irish/boreal/p143_r21_3/L71143021_02120010803_B61.TIF b/tests/data/l7irish/boreal/p143_r21_3/L71143021_02120010803_B61.TIF new file mode 100644 index 00000000000..f57cf09f1ec Binary files /dev/null and b/tests/data/l7irish/boreal/p143_r21_3/L71143021_02120010803_B61.TIF differ diff --git a/tests/data/l7irish/boreal/p143_r21_3/L72143021_02120010803_B62.TIF b/tests/data/l7irish/boreal/p143_r21_3/L72143021_02120010803_B62.TIF new file mode 100644 index 00000000000..338e46ad7a1 Binary files /dev/null and b/tests/data/l7irish/boreal/p143_r21_3/L72143021_02120010803_B62.TIF differ diff --git a/tests/data/l7irish/boreal/p143_r21_3/L72143021_02120010803_B70.TIF b/tests/data/l7irish/boreal/p143_r21_3/L72143021_02120010803_B70.TIF new file mode 100644 index 00000000000..e6cf8645043 Binary files /dev/null and b/tests/data/l7irish/boreal/p143_r21_3/L72143021_02120010803_B70.TIF differ diff --git a/tests/data/l7irish/boreal/p143_r21_3/L72143021_02120010803_B80.TIF b/tests/data/l7irish/boreal/p143_r21_3/L72143021_02120010803_B80.TIF new file mode 100644 index 00000000000..3bbed22a7fb Binary files /dev/null and b/tests/data/l7irish/boreal/p143_r21_3/L72143021_02120010803_B80.TIF differ diff --git a/tests/data/l7irish/boreal/p143_r21_3/L7_p143_r21_newmask2015.TIF b/tests/data/l7irish/boreal/p143_r21_3/L7_p143_r21_newmask2015.TIF new file mode 100644 index 00000000000..6f48096b95e Binary files /dev/null and b/tests/data/l7irish/boreal/p143_r21_3/L7_p143_r21_newmask2015.TIF differ diff --git a/tests/data/l7irish/boreal/p2_r27/L71002027_02720010604_B10.TIF b/tests/data/l7irish/boreal/p2_r27/L71002027_02720010604_B10.TIF new file mode 100644 index 00000000000..11fcd824ea5 Binary files /dev/null and b/tests/data/l7irish/boreal/p2_r27/L71002027_02720010604_B10.TIF differ diff --git a/tests/data/l7irish/boreal/p2_r27/L71002027_02720010604_B20.TIF b/tests/data/l7irish/boreal/p2_r27/L71002027_02720010604_B20.TIF new file mode 100644 index 00000000000..b9de6aa42e1 Binary files /dev/null and b/tests/data/l7irish/boreal/p2_r27/L71002027_02720010604_B20.TIF differ diff --git a/tests/data/l7irish/boreal/p2_r27/L71002027_02720010604_B30.TIF b/tests/data/l7irish/boreal/p2_r27/L71002027_02720010604_B30.TIF new file mode 100644 index 00000000000..f0f7175d9a4 Binary files /dev/null and b/tests/data/l7irish/boreal/p2_r27/L71002027_02720010604_B30.TIF differ diff --git a/tests/data/l7irish/boreal/p2_r27/L71002027_02720010604_B40.TIF b/tests/data/l7irish/boreal/p2_r27/L71002027_02720010604_B40.TIF new file mode 100644 index 00000000000..378fd1cca3b Binary files /dev/null and b/tests/data/l7irish/boreal/p2_r27/L71002027_02720010604_B40.TIF differ diff --git a/tests/data/l7irish/boreal/p2_r27/L71002027_02720010604_B50.TIF b/tests/data/l7irish/boreal/p2_r27/L71002027_02720010604_B50.TIF new file mode 100644 index 00000000000..7fe0b2f3ad5 Binary files /dev/null and b/tests/data/l7irish/boreal/p2_r27/L71002027_02720010604_B50.TIF differ diff --git a/tests/data/l7irish/boreal/p2_r27/L71002027_02720010604_B61.TIF b/tests/data/l7irish/boreal/p2_r27/L71002027_02720010604_B61.TIF new file mode 100644 index 00000000000..ea37448a086 Binary files /dev/null and b/tests/data/l7irish/boreal/p2_r27/L71002027_02720010604_B61.TIF differ diff --git a/tests/data/l7irish/boreal/p2_r27/L72002027_02720010604_B62.TIF b/tests/data/l7irish/boreal/p2_r27/L72002027_02720010604_B62.TIF new file mode 100644 index 00000000000..d9ae659b646 Binary files /dev/null and b/tests/data/l7irish/boreal/p2_r27/L72002027_02720010604_B62.TIF differ diff --git a/tests/data/l7irish/boreal/p2_r27/L72002027_02720010604_B70.TIF b/tests/data/l7irish/boreal/p2_r27/L72002027_02720010604_B70.TIF new file mode 100644 index 00000000000..3855426670d Binary files /dev/null and b/tests/data/l7irish/boreal/p2_r27/L72002027_02720010604_B70.TIF differ diff --git a/tests/data/l7irish/boreal/p2_r27/L72002027_02720010604_B80.TIF b/tests/data/l7irish/boreal/p2_r27/L72002027_02720010604_B80.TIF new file mode 100644 index 00000000000..ebd578cbcda Binary files /dev/null and b/tests/data/l7irish/boreal/p2_r27/L72002027_02720010604_B80.TIF differ diff --git a/tests/data/l7irish/boreal/p2_r27/L7_p2_r27_newmask2015.TIF b/tests/data/l7irish/boreal/p2_r27/L7_p2_r27_newmask2015.TIF new file mode 100644 index 00000000000..ae99c500e75 Binary files /dev/null and b/tests/data/l7irish/boreal/p2_r27/L7_p2_r27_newmask2015.TIF differ diff --git a/tests/data/l7irish/data.py b/tests/data/l7irish/data.py new file mode 100755 index 00000000000..9c7869d01b0 --- /dev/null +++ b/tests/data/l7irish/data.py @@ -0,0 +1,117 @@ +#!/usr/bin/env python3 + +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. + +import hashlib +import os +import shutil +from typing import Dict, List, Union + +import numpy as np +import rasterio +from rasterio import Affine +from rasterio.crs import CRS + +SIZE = 36 + +np.random.seed(0) + +FILENAME_HIERARCHY = Union[Dict[str, "FILENAME_HIERARCHY"], List[str]] + +bands = [ + "B10.TIF", + "B20.TIF", + "B30.TIF", + "B40.TIF", + "B50.TIF", + "B61.TIF", + "B62.TIF", + "B70.TIF", + "B80.TIF", +] + +filenames: FILENAME_HIERARCHY = { + "austral": {"p226_r98": [], "p227_r98": [], "p231_r93_2": []}, + "boreal": {"p2_r27": [], "p143_r21_3": []}, +} +prefixes = [ + "L71226098_09820011112", + "L71227098_09820011103", + "L71231093_09320010507", + "L71002027_02720010604", + "L71143021_02120010803", +] + +for land_type, patches in filenames.items(): + for patch in patches: + path, row = patch.split("_")[:2] + key = path[1:].zfill(3) + row[1:].zfill(3) + for prefix in prefixes: + if key in prefix: + for band in bands: + if band in ["B62.TIF", "B70.TIF", "B80.TIF"]: + prefix = prefix.replace("L71", "L72") + filenames[land_type][patch].append(f"{prefix}_{band}") + + filenames[land_type][patch].append(f"L7_{path}_{row}_newmask2015.TIF") + + +def create_file(path: str) -> None: + dtype = "uint8" + profile = { + "driver": "GTiff", + "dtype": dtype, + "width": SIZE, + "height": SIZE, + "count": 1, + "crs": CRS.from_epsg(32719), + "transform": Affine(30.0, 0.0, 462884.99999999994, 0.0, -30.0, 4071915.0), + } + + if path.endswith("B80.TIF"): + profile["transform"] = Affine( + 15.0, 0.0, 462892.49999999994, 0.0, -15.0, 4071907.5 + ) + profile["width"] = profile["height"] = SIZE * 2 + + if path.endswith("_newmask2015.TIF"): + Z = np.random.choice( + np.array([0, 64, 128, 191, 255], dtype=dtype), size=(SIZE, SIZE) + ) + + else: + Z = np.random.randn(SIZE, SIZE).astype(profile["dtype"]) + + with rasterio.open(path, "w", **profile) as src: + src.write(Z, 1) + + +def create_directory(directory: str, hierarchy: FILENAME_HIERARCHY) -> None: + if isinstance(hierarchy, dict): + # Recursive case + for key, value in hierarchy.items(): + path = os.path.join(directory, key) + os.makedirs(path, exist_ok=True) + create_directory(path, value) + else: + # Base case + for value in hierarchy: + path = os.path.join(directory, value) + create_file(path) + + +if __name__ == "__main__": + create_directory(".", filenames) + + directories = ["austral", "boreal"] + for directory in directories: + filename = str(directory) + + # Create tarballs + shutil.make_archive(filename, "gztar", ".", directory) + + # # Compute checksums + with open(f"{filename}.tar.gz", "rb") as f: + md5 = hashlib.md5(f.read()).hexdigest() + print(filename, md5) diff --git a/tests/datasets/test_l7irish.py b/tests/datasets/test_l7irish.py new file mode 100644 index 00000000000..b89f4819ba7 --- /dev/null +++ b/tests/datasets/test_l7irish.py @@ -0,0 +1,94 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. + +import glob +import os +import shutil +from pathlib import Path + +import matplotlib.pyplot as plt +import pytest +import torch +import torch.nn as nn +from _pytest.monkeypatch import MonkeyPatch +from rasterio.crs import CRS + +import torchgeo.datasets.utils +from torchgeo.datasets import BoundingBox, IntersectionDataset, L7Irish, UnionDataset + + +def download_url(url: str, root: str, *args: str, **kwargs: str) -> None: + shutil.copy(url, root) + + +class TestL7Irish: + @pytest.fixture + def dataset(self, monkeypatch: MonkeyPatch, tmp_path: Path) -> L7Irish: + monkeypatch.setattr(torchgeo.datasets.l7irish, "download_url", download_url) + md5s = { + "austral": "c06147330141517f7eee55ea931c4787", + "boreal": "4b598e55f0d6d33da3672190ebf96268", + } + + url = os.path.join("tests", "data", "l7irish", "{}.tar.gz") + monkeypatch.setattr(L7Irish, "url", url) + monkeypatch.setattr(L7Irish, "md5s", md5s) + root = str(tmp_path) + transforms = nn.Identity() + return L7Irish(root, transforms=transforms, download=True, checksum=True) + + def test_getitem(self, dataset: L7Irish) -> None: + x = dataset[dataset.bounds] + assert isinstance(x, dict) + assert isinstance(x["crs"], CRS) + assert isinstance(x["image"], torch.Tensor) + assert isinstance(x["mask"], torch.Tensor) + + def test_and(self, dataset: L7Irish) -> None: + ds = dataset & dataset + assert isinstance(ds, IntersectionDataset) + + def test_or(self, dataset: L7Irish) -> None: + ds = dataset | dataset + assert isinstance(ds, UnionDataset) + + def test_plot(self, dataset: L7Irish) -> None: + x = dataset[dataset.bounds] + dataset.plot(x, suptitle="Test") + plt.close() + + def test_already_extracted(self, dataset: L7Irish) -> None: + L7Irish(root=dataset.root, download=True) + + def test_already_downloaded(self, tmp_path: Path) -> None: + pathname = os.path.join("tests", "data", "l7irish", "*.tar.gz") + root = str(tmp_path) + for tarfile in glob.iglob(pathname): + shutil.copy(tarfile, root) + L7Irish(root) + + def test_not_downloaded(self, tmp_path: Path) -> None: + with pytest.raises(RuntimeError, match="Dataset not found"): + L7Irish(str(tmp_path)) + + def test_plot_prediction(self, dataset: L7Irish) -> None: + x = dataset[dataset.bounds] + x["prediction"] = x["mask"].clone() + dataset.plot(x, suptitle="Prediction") + plt.close() + + def test_invalid_query(self, dataset: L7Irish) -> None: + query = BoundingBox(0, 0, 0, 0, 0, 0) + with pytest.raises( + IndexError, match="query: .* not found in index with bounds:" + ): + dataset[query] + + def test_rgb_bands_absent_plot(self, dataset: L7Irish) -> None: + with pytest.raises( + ValueError, match="Dataset doesn't contain some of the RGB bands" + ): + ds = L7Irish(root=dataset.root, bands=["B1", "B2", "B5"]) + x = ds[ds.bounds] + ds.plot(x, suptitle="Test") + plt.close() diff --git a/tests/trainers/test_segmentation.py b/tests/trainers/test_segmentation.py index 9256ed6debf..0a6a7f7aa07 100644 --- a/tests/trainers/test_segmentation.py +++ b/tests/trainers/test_segmentation.py @@ -19,6 +19,7 @@ ETCI2021DataModule, GID15DataModule, InriaAerialImageLabelingDataModule, + L7IrishDataModule, L8BiomeDataModule, LandCoverAIDataModule, LoveDADataModule, @@ -64,6 +65,7 @@ class TestSemanticSegmentationTask: ("etci2021", ETCI2021DataModule), ("gid15", GID15DataModule), ("inria", InriaAerialImageLabelingDataModule), + ("l7irish", L7IrishDataModule), ("l8biome", L8BiomeDataModule), ("landcoverai", LandCoverAIDataModule), ("loveda", LoveDADataModule), diff --git a/torchgeo/datamodules/__init__.py b/torchgeo/datamodules/__init__.py index 719ad1fb82b..57699c4291a 100644 --- a/torchgeo/datamodules/__init__.py +++ b/torchgeo/datamodules/__init__.py @@ -14,6 +14,7 @@ from .geo import GeoDataModule, NonGeoDataModule from .gid15 import GID15DataModule from .inria import InriaAerialImageLabelingDataModule +from .l7irish import L7IrishDataModule from .l8biome import L8BiomeDataModule from .landcoverai import LandCoverAIDataModule from .loveda import LoveDADataModule @@ -35,6 +36,7 @@ __all__ = ( # GeoDataset "ChesapeakeCVPRDataModule", + "L7IrishDataModule", "L8BiomeDataModule", "NAIPChesapeakeDataModule", # NonGeoDataset diff --git a/torchgeo/datamodules/l7irish.py b/torchgeo/datamodules/l7irish.py new file mode 100644 index 00000000000..5e8c89f29a0 --- /dev/null +++ b/torchgeo/datamodules/l7irish.py @@ -0,0 +1,76 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. + +"""L7 Irish datamodule.""" + +from typing import Any, Tuple, Union + +import torch + +from ..datasets import L7Irish, random_bbox_assignment +from ..samplers import GridGeoSampler, RandomBatchGeoSampler +from .geo import GeoDataModule + + +class L7IrishDataModule(GeoDataModule): + """LightningDataModule implementation for the L7 Irish dataset. + + .. versionadded:: 0.5 + """ + + mean = torch.tensor(0) + std = torch.tensor(10000) + + def __init__( + self, + batch_size: int = 1, + patch_size: Union[int, Tuple[int, int]] = 32, + length: int = 5, + num_workers: int = 0, + **kwargs: Any, + ) -> None: + """Initialize a new L7IrishDataModule instance. + + Args: + batch_size: Size of each mini-batch. + patch_size: Size of each patch, either ``size`` or ``(height, width)``. + length: Length of each training epoch. + num_workers: Number of workers for parallel data loading. + **kwargs: Additional keyword arguments passed to + :class:`~torchgeo.datasets.L7Irish`. + """ + super().__init__( + L7Irish, + batch_size=batch_size, + patch_size=patch_size, + length=length, + num_workers=num_workers, + **kwargs, + ) + + def setup(self, stage: str) -> None: + """Set up datasets. + + Args: + stage: Either 'fit', 'validate', 'test', or 'predict'. + """ + dataset = L7Irish(**self.kwargs) + generator = torch.Generator().manual_seed(0) + ( + self.train_dataset, + self.val_dataset, + self.test_dataset, + ) = random_bbox_assignment(dataset, [0.6, 0.2, 0.2], generator) + + if stage in ["fit"]: + self.train_batch_sampler = RandomBatchGeoSampler( + self.train_dataset, self.patch_size, self.batch_size, self.length + ) + if stage in ["fit", "validate"]: + self.val_sampler = GridGeoSampler( + self.val_dataset, self.patch_size, self.patch_size + ) + if stage in ["test"]: + self.test_sampler = GridGeoSampler( + self.test_dataset, self.patch_size, self.patch_size + ) diff --git a/torchgeo/datasets/__init__.py b/torchgeo/datasets/__init__.py index 2eff64d8696..55ade316c7b 100644 --- a/torchgeo/datasets/__init__.py +++ b/torchgeo/datasets/__init__.py @@ -53,6 +53,7 @@ from .idtrees import IDTReeS from .inaturalist import INaturalist from .inria import InriaAerialImageLabeling +from .l7irish import L7Irish from .l8biome import L8Biome from .landcoverai import LandCoverAI, LandCoverAIBase, LandCoverAIGeo from .landsat import ( @@ -138,6 +139,7 @@ "GBIF", "GlobBiomass", "INaturalist", + "L7Irish", "L8Biome", "LandCoverAIBase", "LandCoverAIGeo", diff --git a/torchgeo/datasets/l7irish.py b/torchgeo/datasets/l7irish.py new file mode 100644 index 00000000000..9c6c55c6698 --- /dev/null +++ b/torchgeo/datasets/l7irish.py @@ -0,0 +1,294 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. + +"""L7 Irish dataset.""" + +import glob +import os +import re +from typing import Any, Callable, Dict, List, Optional, Sequence, cast + +import matplotlib.pyplot as plt +import torch +from rasterio.crs import CRS +from torch import Tensor + +from .geo import RasterDataset +from .utils import BoundingBox, download_url, extract_archive + + +class L7Irish(RasterDataset): + """L7 Irish dataset. + + The `L7 Irish `__ dataset + is based on Landsat 7 Enhanced Thematic Mapper Plus (ETM+) Level-1G scenes. + Manually generated cloud masks are used to train and validate cloud cover assessment + algorithms, which in turn are intended to compute the percentage of cloud cover in each + scene. + + Dataset features: + + * 206 scenes from Landsat-7 ETM+ tiles + * Imagery from global tiles between June 2000--December 2001 + * 9 Level-1 spectral bands with 15 and 30 m per pixel resolution + + Dataset format: + + * Images are composed of multiple single channel geotiffs + * Labels are multiclass, stored in a single geotiffs file per image + * Level-1 metadata (MTL.txt file) + * Landsat-7 ETM+ bands: (B10, B20, B30, B40, B50, B61, B62, B70, B80) + + Dataset classes (5): + + 0. Fill + 1. Cloud Shadow + 2. Clear + 3. Thin Cloud + 4. Cloud + + If you use this dataset in your research, please cite the following: + + * https://doi.org/10.5066/F7XD0ZWC + * https://doi.org/10.1109/TGRS.2011.2164087 + * https://www.sciencebase.gov/catalog/item/573ccf18e4b0dae0d5e4b109 + + .. versionadded:: 0.5 + """ # noqa: E501 + + url = "https://huggingface.co/datasets/torchgeo/l7irish/resolve/main/{}.tar.gz" # noqa: E501 + + md5s = { + "austral": "9c2629884c1e7251e24953e1e5f880de", + "boreal": "0a9f50998c0fb47c0cc226faf479f883", + "mid_latitude_north": "0860e218403d949f4b38e4f9f70e0087", + "mid_latitude_south": "c66bbeaa6dbf0ba2cd26b9eea89eb3a4", + "polar_north": "18a6b9b4684ae91bfdcc7b78ea1f42ee", + "polar_south": "a12e4d7fddaa377259328190f10a1c17", + "subtropical_north": "ebdfaee37ffc5ba1bd4763f7f72df97f", + "subtropical_south": "3670c9490753efe3d36927329bb87e2f", + "tropical": "f60c93d8609c72ac86e858105b6272f2", + } + + classes = ["Fill", "Cloud Shadow", "Clear", "Thin Cloud", "Cloud"] + + # https://landsat.usgs.gov/cloud-validation/cca_irish_2015/L7_Irish_Cloud_Validation_Masks.xml + filename_glob = "L7*_B10.TIF" + filename_regex = r""" + ^L7[12] + (?P\d{3}) + (?P\d{3}) + _(?P=wrs_row) + (?P\d{8}) + _(?PB\d{2}) + \.TIF$ + """ + date_format = "%Y%m%d" + + separate_files = True + rgb_bands = ["B30", "B20", "B10"] + all_bands = ["B10", "B20", "B30", "B40", "B50", "B61", "B62", "B70", "B80"] + + def __init__( + self, + root: str = "data", + crs: Optional[CRS] = None, + res: Optional[float] = None, + bands: Sequence[str] = all_bands, + transforms: Optional[Callable[[Dict[str, Any]], Dict[str, Any]]] = None, + cache: bool = True, + download: bool = False, + checksum: bool = False, + ) -> None: + """Initialize a new L7Irish instance. + + Args: + root: root directory where dataset can be found + crs: :term:`coordinate reference system (CRS)` to warp to + (defaults to the CRS of the first file found) + res: resolution of the dataset in units of CRS + (defaults to the resolution of the first file found) + bands: bands to return (defaults to all bands) + transforms: a function/transform that takes an input sample + and returns a transformed version + cache: if True, cache file handle to speed up repeated sampling + download: if True, download dataset and store it in the root directory + checksum: if True, check the MD5 of the downloaded files (may be slow) + + Raises: + RuntimeError: if ``download=False`` and data is not found, or checksums + don't match + """ + self.root = root + self.download = download + self.checksum = checksum + + self._verify() + + super().__init__( + root, crs=crs, res=res, bands=bands, transforms=transforms, cache=cache + ) + + def _verify(self) -> None: + """Verify the integrity of the dataset. + + Raises: + RuntimeError: if ``download=False`` but dataset is missing or checksum fails + """ + # Check if the extracted files already exist + pathname = os.path.join(self.root, "**", self.filename_glob) + for fname in glob.iglob(pathname, recursive=True): + return + + # Check if the tar files have already been downloaded + pathname = os.path.join(self.root, "*.tar.gz") + if glob.glob(pathname): + self._extract() + return + + # Check if the user requested to download the dataset + if not self.download: + raise RuntimeError( + f"Dataset not found in `root={self.root}` and `download=False`, " + "either specify a different `root` directory or use `download=True` " + "to automatically download the dataset." + ) + + # Download the dataset + self._download() + self._extract() + + def _download(self) -> None: + """Download the dataset.""" + for biome, md5 in self.md5s.items(): + download_url( + self.url.format(biome), self.root, md5=md5 if self.checksum else None + ) + + def _extract(self) -> None: + """Extract the dataset.""" + pathname = os.path.join(self.root, "*.tar.gz") + for tarfile in glob.iglob(pathname): + extract_archive(tarfile) + + def __getitem__(self, query: BoundingBox) -> Dict[str, Any]: + """Retrieve image/mask and metadata indexed by query. + + Args: + query: (minx, maxx, miny, maxy, mint, maxt) coordinates to index + + Returns: + sample of image, mask and metadata at that index + + Raises: + IndexError: if query is not found in the index + """ + hits = self.index.intersection(tuple(query), objects=True) + filepaths = cast(List[str], [hit.object for hit in hits]) + + if not filepaths: + raise IndexError( + f"query: {query} not found in index with bounds: {self.bounds}" + ) + + image_list: List[Tensor] = [] + filename_regex = re.compile(self.filename_regex, re.VERBOSE) + for band in self.all_bands: + band_filepaths = [] + for filepath in filepaths: + filename = os.path.basename(filepath) + directory = os.path.dirname(filepath) + match = re.match(filename_regex, filename) + if match: + if "date" in match.groupdict(): + start = match.start("band") + end = match.end("band") + filename = filename[:start] + band + filename[end:] + if band in ["B62", "B70", "B80"]: + filename = filename.replace("L71", "L72") + filepath = os.path.join(directory, filename) + band_filepaths.append(filepath) + image_list.append(self._merge_files(band_filepaths, query)) + image = torch.cat(image_list) + + mask_filepaths = [] + for filepath in filepaths: + path, row = os.path.basename(os.path.dirname(filepath)).split("_")[:2] + mask_filepath = filepath.replace( + os.path.basename(filepath), f"L7_{path}_{row}_newmask2015.TIF" + ) + mask_filepaths.append(mask_filepath) + + mask = self._merge_files(mask_filepaths, query) + mask_mapping = {64: 1, 128: 2, 191: 3, 255: 4} + + for k, v in mask_mapping.items(): + mask[mask == k] = v + + sample = { + "crs": self.crs, + "bbox": query, + "image": image.float(), + "mask": mask.long(), + } + + if self.transforms is not None: + sample = self.transforms(sample) + + return sample + + def plot( + self, + sample: Dict[str, Tensor], + show_titles: bool = True, + suptitle: Optional[str] = None, + ) -> plt.Figure: + """Plot a sample from the dataset. + + Args: + sample: a sample returned by :meth:`__getitem__` + show_titles: flag indicating whether to show titles above each panel + suptitle: optional string to use as a suptitle + + Returns: + a matplotlib Figure with the rendered sample + """ + rgb_indices = [] + for band in self.rgb_bands: + if band in self.bands: + rgb_indices.append(self.bands.index(band)) + else: + raise ValueError("Dataset doesn't contain some of the RGB bands") + + image = sample["image"][rgb_indices].permute(1, 2, 0) + + # Stretch to the full range + image = (image - image.min()) / (image.max() - image.min()) + + mask = sample["mask"].numpy().astype("uint8").squeeze() + + num_panels = 2 + showing_predictions = "prediction" in sample + if showing_predictions: + predictions = sample["prediction"].numpy().astype("uint8").squeeze() + num_panels += 1 + + fig, axs = plt.subplots(1, num_panels, figsize=(num_panels * 4, 5)) + axs[0].imshow(image) + axs[0].axis("off") + axs[1].imshow(mask, vmin=0, vmax=4, cmap="gray") + axs[1].axis("off") + if show_titles: + axs[0].set_title("Image") + axs[1].set_title("Mask") + + if showing_predictions: + axs[2].imshow(predictions, vmin=0, vmax=4, cmap="gray") + axs[2].axis("off") + if show_titles: + axs[2].set_title("Predictions") + + if suptitle is not None: + plt.suptitle(suptitle) + + return fig