Skip to content

Commit

Permalink
🔀 Merge branch 'main' into extra-data-and-pre-batch-shuffle
Browse files Browse the repository at this point in the history
Commented out the extra california_*.hdf5 data for now.
  • Loading branch information
weiji14 committed Jun 1, 2023
2 parents 8efb5f2 + 4f2f232 commit e9b7255
Show file tree
Hide file tree
Showing 14 changed files with 3,075 additions and 2,011 deletions.
8 changes: 8 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,14 @@ chabud2023/
checkpoints/*.ckpt
lightning_logs/
wandb/
logs/

# Unit test / coverage reports
.pytest_cache/

# Notebooks
.ipynb_checkpoints
nbs/

# vscode
.vscode/
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ Finally, double-check that the libraries have been installed.
This is for those who want full reproducibility of the virtual environment.
Create a virtual environment with just Python and conda-lock installed first.

mamba create --name chabud python=3.11 conda-lock=1.4.0
mamba create --name chabud python=3.11 conda-lock=2.0.0
mamba activate chabud

Generate a unified [`conda-lock.yml`](https://github.com/conda/conda-lock) file
Expand Down
6 changes: 4 additions & 2 deletions chabud/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,7 @@ scripts. To ensure high standards of reproducibility, the code is structured
using the [Lightning](https://lightning.ai/pytorch-lightning) framework and
based on https://github.com/Lightning-AI/deep-learning-project-template.

- :bricks: datapipe.py - Data pipeline to load Sentinel-2 optical imagery from HDF5 files and perform pre-processing
- :spider_web: model.py - Code containing Neural Network model architecture
- :cloud_with_lightning: datapipe.py - Data pipeline to load Sentinel-2 optical imagery from HDF5 files and perform pre-processing
- :building_construction: model.py - Code containing the base ChaBuD LightningModule that handles fit/validation/test steps
- :bricks: layers.py - Modular block components for TinyCD model
- :house: tinycd_model.py - TinyCD neural network model architecture from Codegoni et al. 2022.
63 changes: 63 additions & 0 deletions chabud/callbacks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
from lightning.pytorch.callbacks import Callback
import torch
import torch.nn.functional as F
import wandb


class LogIntermediatePredictions(Callback):
"""Visualize the model results at the end of every epoch."""

def __init__(self, logger):
"""Instantiates with wandb-logger.
Args:
logger : wandb-logger instance.
"""
super().__init__()
self.logger = logger

def on_validation_batch_end(
self,
trainer,
pl_module,
outputs,
batch,
batch_idx,
dataloader_idx=0,
):
"""Called when the validation batch ends.
At the end of each epoch, takes a sample from validation dataset & logs
the image with model predictions to wandb-logger for humans to interpret
how model evolves over time.
"""
if batch_idx == 0:
# Take a small sample size for logging
id2label = {0: "ok", 1: "burn"}
log_list = []

with torch.no_grad():
pre_img, post_img, mask, metadata = batch
batch_size = mask.shape[0]

# Pass the image through neural network model to get predicted images
logits: torch.Tensor = pl_module(x1=pre_img, x2=post_img).squeeze()
y_pred: torch.Tensor = F.sigmoid(logits)
y_pred = (y_pred > 0.5).int().detach().cpu().numpy()

for i in range(batch_size):
log_image = wandb.Image(
post_img[i].permute(1, 2, 0).detach().cpu().numpy() / 6000,
masks={
"prediction": {
"mask_data": mask[i].detach().cpu().numpy(),
"class_labels": id2label,
},
"ground_truth": {
"mask_data": y_pred[i],
"class_labels": id2label,
},
},
)
log_list.append(log_image)

wandb.log({"predictions": log_list})
26 changes: 15 additions & 11 deletions chabud/datapipe.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

import datatree
import lightning as L
import numpy as np
import torch
import torchdata
import torchdata.dataloader2
Expand Down Expand Up @@ -87,10 +88,15 @@ def _pre_post_mask_tuple(
mask image, and a Python dict containing metadata (e.g. filename, UUID,
fold, comments).
"""
# return just the RGB bands for now
pre = dataset.pre_fire.data[[3, 2, 1], ...].astype(dtype="float32")
post = dataset.post_fire.data[[3, 2, 1], ...].astype(dtype="float32")
mask = dataset.mask.data.astype(dtype="uint8")

return (
torch.as_tensor(data=dataset.pre_fire.astype(dtype="int16").data),
torch.as_tensor(data=dataset.post_fire.astype(dtype="int16").data),
torch.as_tensor(data=dataset.mask.astype(dtype="uint8").data),
torch.as_tensor(data=pre),
torch.as_tensor(data=post),
torch.as_tensor(data=mask),
{
"filename": os.path.basename(dataset.encoding["source"]),
**dataset.attrs,
Expand All @@ -113,7 +119,6 @@ def _stack_tensor_collate_fn(
return pre_tensor, post_tensor, mask_tensor, metadata


# %%
class ChaBuDDataPipeModule(L.LightningDataModule):
"""
Lightning DataModule for loading Hierarchical Data Format 5 (HDF5) files
Expand All @@ -134,13 +139,13 @@ def __init__(
# From https://huggingface.co/datasets/chabud-team/chabud-ecml-pkdd2023/tree/main
"https://huggingface.co/datasets/chabud-team/chabud-ecml-pkdd2023/resolve/main/train_eval.hdf5",
# From https://huggingface.co/datasets/chabud-team/chabud-extra/tree/main
"https://huggingface.co/datasets/chabud-team/chabud-extra/resolve/main/california_0.hdf5",
"https://huggingface.co/datasets/chabud-team/chabud-extra/resolve/main/california_1.hdf5",
"https://huggingface.co/datasets/chabud-team/chabud-extra/resolve/main/california_2.hdf5",
"https://huggingface.co/datasets/chabud-team/chabud-extra/resolve/main/california_3.hdf5",
"https://huggingface.co/datasets/chabud-team/chabud-extra/resolve/main/california_4.hdf5",
# "https://huggingface.co/datasets/chabud-team/chabud-extra/resolve/main/california_0.hdf5",
# "https://huggingface.co/datasets/chabud-team/chabud-extra/resolve/main/california_1.hdf5",
# "https://huggingface.co/datasets/chabud-team/chabud-extra/resolve/main/california_2.hdf5",
# "https://huggingface.co/datasets/chabud-team/chabud-extra/resolve/main/california_3.hdf5",
# "https://huggingface.co/datasets/chabud-team/chabud-extra/resolve/main/california_4.hdf5",
],
batch_size: int = 32,
batch_size: int = 8,
):
"""
Go from multiple HDF5 files to 512x512 chips!
Expand Down Expand Up @@ -184,7 +189,6 @@ def setup(
dp_urls: torchdata.datapipes.iter.IterDataPipe = (
torchdata.datapipes.iter.IterableWrapper(iterable=self.hdf5_urls)
)

# Step 1 - Download and cache HDF5 files to the data/ folder
# Also includes sha256 checksum verification
dp_cache: torchdata.datapipes.iter.IterDataPipe = dp_urls.on_disk_cache(
Expand Down
119 changes: 119 additions & 0 deletions chabud/layers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
"""
Modular block layers of the TinyCD model.
Reference:
- https://github.com/AndreaCodegoni/Tiny_model_4_CD/blob/main/models/layers.py
- Codegoni, A., Lombardi, G., & Ferrari, A. (2022). TINYCD: A (Not So) Deep
Learning Model For Change Detection (arXiv:2207.13159). arXiv.
https://doi.org/10.48550/arXiv.2207.13159
"""
from typing import List, Optional

from torch import Tensor, reshape, stack
from torch.nn import Conv2d, InstanceNorm2d, Module, PReLU, Sequential, Upsample


class PixelwiseLinear(Module):
def __init__(
self,
fin: List[int],
fout: List[int],
last_activation: Module = None,
) -> None:
assert len(fout) == len(fin)
super().__init__()

n = len(fin)
self._linears = Sequential(
*[
Sequential(
Conv2d(fin[i], fout[i], kernel_size=1, bias=True),
PReLU()
if i < n - 1 or last_activation is None
else last_activation,
)
for i in range(n)
]
)

def forward(self, x: Tensor) -> Tensor:
# Processing the tensor:
return self._linears(x)


class MixingBlock(Module):
def __init__(
self,
ch_in: int,
ch_out: int,
):
super().__init__()
self._convmix = Sequential(
Conv2d(ch_in, ch_out, 3, groups=ch_out, padding=1),
PReLU(),
InstanceNorm2d(ch_out),
)

def forward(self, x: Tensor, y: Tensor) -> Tensor:
# Packing the tensors and interleaving the channels:
mixed = stack((x, y), dim=2)
mixed = reshape(mixed, (x.shape[0], -1, x.shape[2], x.shape[3]))

# Mixing:
return self._convmix(mixed)


class MixingMaskAttentionBlock(Module):
"""use the grouped convolution to make a sort of attention"""

def __init__(
self,
ch_in: int,
ch_out: int,
fin: List[int],
fout: List[int],
generate_masked: bool = False,
):
super().__init__()
self._mixing = MixingBlock(ch_in, ch_out)
self._linear = PixelwiseLinear(fin, fout)
self._final_normalization = InstanceNorm2d(ch_out) if generate_masked else None
self._mixing_out = MixingBlock(ch_in, ch_out) if generate_masked else None

def forward(self, x: Tensor, y: Tensor) -> Tensor:
z_mix = self._mixing(x, y)
z = self._linear(z_mix)
z_mix_out = 0 if self._mixing_out is None else self._mixing_out(x, y)

return (
z
if self._final_normalization is None
else self._final_normalization(z_mix_out * z)
)


class UpMask(Module):
def __init__(
self,
scale_factor: float,
nin: int,
nout: int,
):
super().__init__()
self._upsample = Upsample(
scale_factor=scale_factor, mode="bilinear", align_corners=True
)
self._convolution = Sequential(
Conv2d(nin, nin, 3, 1, groups=nin, padding=1),
PReLU(),
InstanceNorm2d(nin),
Conv2d(nin, nout, kernel_size=1, stride=1),
PReLU(),
InstanceNorm2d(nout),
)

def forward(self, x: Tensor, y: Optional[Tensor] = None) -> Tensor:
x = self._upsample(x)
if y is not None:
x = x * y
return self._convolution(x)
Loading

0 comments on commit e9b7255

Please sign in to comment.