mehta-lab
diff --git a/‎CONTRIBUTING.md
+13-1 b/‎CONTRIBUTING.md
+13-1
diff --git a/‎examples/configs/fit_example.yml
+14-2 b/‎examples/configs/fit_example.yml
+14-2
diff --git a/‎examples/configs/predict_example.yml
-1 b/‎examples/configs/predict_example.yml
-1
diff --git a/‎examples/configs/test_example.yml
-1 b/‎examples/configs/test_example.yml
-1
diff --git a/‎examples/demo_dlmbl/debug_log_graph.py
+1-1 b/‎examples/demo_dlmbl/debug_log_graph.py
+1-1
diff --git a/‎examples/demo_dlmbl/solution.py
+1-1 b/‎examples/demo_dlmbl/solution.py
+1-1
diff --git a/‎pyproject.toml
+13-8 b/‎pyproject.toml
+13-8
diff --git a/‎tests/conftest.py
+2 b/‎tests/conftest.py
+2
diff --git a/‎tests/data/__init__.py b/‎tests/data/__init__.py
diff --git a/‎tests/data/test_data.py
+105 b/‎tests/data/test_data.py
+105
diff --git a/‎tests/light/test_data.py
-70 b/‎tests/light/test_data.py
-70
diff --git a/‎tests/light/test_engine.py
+7 b/‎tests/light/test_engine.py
+7
diff --git a/‎tests/unet/__init__.py b/‎tests/unet/__init__.py
diff --git a/‎tests/torch_unet/networks/Unet25D_tests.py ‎tests/unet/networks/Unet25D_tests.py b/‎tests/torch_unet/networks/Unet25D_tests.py ‎tests/unet/networks/Unet25D_tests.py
diff --git a/‎tests/torch_unet/networks/Unet2D_tests.py ‎tests/unet/networks/Unet2D_tests.py b/‎tests/torch_unet/networks/Unet2D_tests.py ‎tests/unet/networks/Unet2D_tests.py
diff --git a/‎tests/torch_unet/networks/layers/ConvBlock2D_tests.py ‎tests/unet/networks/layers/ConvBlock2D_tests.py b/‎tests/torch_unet/networks/layers/ConvBlock2D_tests.py ‎tests/unet/networks/layers/ConvBlock2D_tests.py
diff --git a/‎tests/torch_unet/networks/layers/ConvBlock3D_tests.py ‎tests/unet/networks/layers/ConvBlock3D_tests.py b/‎tests/torch_unet/networks/layers/ConvBlock3D_tests.py ‎tests/unet/networks/layers/ConvBlock3D_tests.py
@@ -10,7 +10,19 @@ then make an editable installation with all the optional dependencies:
 pip install -e ".[dev,visual,metrics]"
 ```
 
-## Testing
+## CI requirements
+
+Lint with Ruff:
+
+```sh
+ruff check viscy
+```
+
+Format the code with Black:
+
+```sh
+black viscy
+```
 
 Run tests with `pytest`:
 
 
@@ -37,6 +37,19 @@ data:
   batch_size: 32
   num_workers: 16
   yx_patch_size: [256, 256]
+  normalizations:
+    - class_path: viscy.transforms.NormalizeSampled
+      init_args:
+        keys: [source]
+        level: "fov_statistics"
+        subtrahend: "mean"
+        divisor: "std"
+    - class_path: viscy.transforms.NormalizeSampled
+      init_args:
+        keys: [target_1]
+        level: "fov_statistics"
+        subtrahend: "median"
+        divisor: "iqr"
   augmentations:
     - class_path: viscy.transforms.RandWeightedCropd
       init_args:
@@ -74,5 +87,4 @@ data:
         sigma_z: [0.25, 1.5]
         sigma_y: [0.25, 1.5]
         sigma_x: [0.25, 1.5]
-  caching: false
-  normalize_source: true
+  caching: false
@@ -62,7 +62,6 @@ predict:
       - 256
       - 256
     caching: false
-    normalize_source: false
     predict_scale_source: null
   return_predictions: false
   ckpt_path: null
@@ -61,7 +61,6 @@ data:
   - 256
   - 256
   caching: false
-  normalize_source: false
   ground_truth_masks: null
 ckpt_path: null
 verbose: true
@@ -19,7 +19,7 @@
 from torch.utils.tensorboard import SummaryWriter  # for logging to tensorboard
 
 # HCSDataModule makes it easy to load data during training.
-from viscy.light.data import HCSDataModule
+from viscy.data.hcs import HCSDataModule
 
 # Trainer class and UNet.
 from viscy.light.engine import VSUNet
 
@@ -83,7 +83,7 @@
 from torch.utils.tensorboard import SummaryWriter  # for logging to tensorboard
 
 # HCSDataModule makes it easy to load data during training.
-from viscy.light.data import HCSDataModule
+from viscy.data.hcs import HCSDataModule
 
 # training augmentations
 from viscy.transforms import (
 
@@ -10,8 +10,8 @@ requires-python = ">=3.10"
 license = { file = "LICENSE" }
 authors = [{ name = "CZ Biohub SF", email = "compmicro@czbiohub.org" }]
 dependencies = [
-    "iohub==0.1.0rc0",
-    "torch>=2.0.0",
+    "iohub==0.1.0",
+    "torch>=2.1.2",
     "timm>=0.9.5",
     "tensorboard>=2.13.0",
     "lightning>=2.0.1",
@@ -30,7 +30,15 @@ metrics = [
     "ptflops>=0.7",
 ]
 visual = ["ipykernel", "graphviz", "torchview"]
-dev = ["pytest", "pytest-cov", "hypothesis", "profilehooks", "onnxruntime"]
+dev = [
+    "pytest",
+    "pytest-cov",
+    "hypothesis",
+    "ruff",
+    "black",
+    "profilehooks",
+    "onnxruntime",
+]
 
 [project.scripts]
 viscy = "viscy.cli.cli:main"
@@ -39,12 +47,9 @@ viscy = "viscy.cli.cli:main"
 write_to = "viscy/_version.py"
 
 [tool.black]
-src = ["viscy"]
 line-length = 88
 
 [tool.ruff]
 src = ["viscy", "tests"]
-extend-select = ["I001"]
-
-[tool.ruff.isort]
-known-first-party = ["viscy"]
+lint.extend-select = ["I001"]
+lint.isort.known-first-party = ["viscy"]
@@ -36,6 +36,8 @@ def preprocessed_hcs_dataset(tmp_path_factory: TempPathFactory) -> Path:
     norm_meta = {channel: {"dataset_statistics": expected} for channel in channel_names}
     with open_ome_zarr(dataset_path, mode="r+") as dataset:
         dataset.zattrs["normalization"] = norm_meta
+        for _, fov in dataset.positions():
+            fov.zattrs["normalization"] = norm_meta
     return dataset_path
 
 
 
@@ -0,0 +1,105 @@
+from pathlib import Path
+
+from iohub import open_ome_zarr
+from monai.transforms import RandSpatialCropSamplesd
+from pytest import mark
+
+from viscy.data.hcs import HCSDataModule
+from viscy.light.trainer import VSTrainer
+
+
+@mark.parametrize("default_channels", [True, False])
+def test_preprocess(small_hcs_dataset: Path, default_channels: bool):
+    data_path = small_hcs_dataset
+    if default_channels:
+        channel_names = -1
+    else:
+        with open_ome_zarr(data_path) as dataset:
+            channel_names = dataset.channel_names
+    trainer = VSTrainer(accelerator="cpu")
+    trainer.preprocess(data_path, channel_names=channel_names, num_workers=2)
+    with open_ome_zarr(data_path) as dataset:
+        channel_names = dataset.channel_names
+        for channel in channel_names:
+            assert "dataset_statistics" in dataset.zattrs["normalization"][channel]
+        for _, fov in dataset.positions():
+            norm_metadata = fov.zattrs["normalization"]
+            for channel in channel_names:
+                assert channel in norm_metadata
+                assert "dataset_statistics" in norm_metadata[channel]
+                assert "fov_statistics" in norm_metadata[channel]
+
+
+@mark.parametrize("multi_sample_augmentation", [True, False])
+def test_datamodule_setup_fit(preprocessed_hcs_dataset, multi_sample_augmentation):
+    data_path = preprocessed_hcs_dataset
+    z_window_size = 5
+    channel_split = 2
+    split_ratio = 0.8
+    yx_patch_size = [128, 96]
+    batch_size = 4
+    with open_ome_zarr(data_path) as dataset:
+        channel_names = dataset.channel_names
+    if multi_sample_augmentation:
+        transforms = [
+            RandSpatialCropSamplesd(
+                keys=channel_names,
+                roi_size=[z_window_size, *yx_patch_size],
+                num_samples=2,
+            )
+        ]
+    else:
+        transforms = []
+    dm = HCSDataModule(
+        data_path=data_path,
+        source_channel=channel_names[:channel_split],
+        target_channel=channel_names[channel_split:],
+        z_window_size=z_window_size,
+        batch_size=batch_size,
+        num_workers=0,
+        augmentations=transforms,
+        architecture="3D",
+        split_ratio=split_ratio,
+        yx_patch_size=yx_patch_size,
+    )
+    dm.setup(stage="fit")
+    for batch in dm.train_dataloader():
+        assert batch["source"].shape == (
+            batch_size,
+            channel_split,
+            z_window_size,
+            *yx_patch_size,
+        )
+        assert batch["target"].shape == (
+            batch_size,
+            len(channel_names) - channel_split,
+            z_window_size,
+            *yx_patch_size,
+        )
+
+
+def test_datamodule_setup_predict(preprocessed_hcs_dataset):
+    data_path = preprocessed_hcs_dataset
+    z_window_size = 5
+    channel_split = 2
+    with open_ome_zarr(data_path) as dataset:
+        channel_names = dataset.channel_names
+        img = next(dataset.positions())[1][0]
+        total_p = len(list(dataset.positions()))
+    dm = HCSDataModule(
+        data_path=data_path,
+        source_channel=channel_names[:channel_split],
+        target_channel=channel_names[channel_split:],
+        z_window_size=z_window_size,
+        batch_size=2,
+        num_workers=0,
+    )
+    dm.setup(stage="predict")
+    dataset = dm.predict_dataset
+    assert len(dataset) == total_p * 2 * (img.slices - z_window_size + 1)
+    assert dataset[0]["source"].shape == (
+        channel_split,
+        z_window_size,
+        img.height,
+        img.width,
+    )
@@ -0,0 +1,7 @@
+from viscy.light.engine import FcmaeUNet
+
+
+def test_fcmae_vsunet() -> None:
+    model = FcmaeUNet(
+        model_config=dict(in_channels=3, out_channels=1), fit_mask_ratio=0.6
+    )