update to accommodate differing numbers of stains/channels (#39)

- update to accommodate differing numbers of stains/channels, stain_# columns can now be added or removed. - can also use n/a or empty cells to indicate no stain (README is updated) - also fixes a type conversion bug in pixel size - expose more bigstitcher options for global optimization
khanlab · May 23, 2024 · d446306 · d446306
1 parent 58add2e
commit d446306
Show file tree

Hide file tree

Showing 9 changed files with 71 additions and 68 deletions.
diff --git a/README.md b/README.md
@@ -37,7 +37,7 @@ python3.11 -m venv venv
 source venv/bin/activate
 ```
 
-3. Update the `config/datasets.tsv` spreadsheet to point to your dataset(s). Each dataset's tif files should be in it's own folder or tar file, with no other tif files. Enter the path to each dataset in the `dataset_path` column. The first three columns identify the subject, sample, acquisition, which become part of the resulting filenames (BIDS naming). The `stain_0` and `stain_1` identify what stains were used for each channel. Use `autof` to indicate the autofluorescence channel, as this is used for registration.
+3. Update the `config/datasets.tsv` spreadsheet to point to your dataset(s). Each dataset's tif files should be in it's own folder or tar file, with no other tif files. Enter the path to each dataset in the `dataset_path` column. The first three columns identify the subject, sample, acquisition, which become part of the resulting filenames (BIDS naming). The `stain_0` and `stain_1` identify what stains were used for each channel. Use `autof` to indicate the autofluorescence channel. If you have a different number of stains you can add or remove these columns. If your samples have different numbers of stains, you can leave values blank or use `n/a` to indicate that a sample does not have a particular stain. 
 
 Note: The acquisition value must contain either `blaze` or `prestitched`, and defines which workflow will be used. E.g. for LifeCanvas data that is already stitched, you need to include `prestitched` in the acquisition flag. 
 

diff --git a/config/config.yml b/config/config.yml
@@ -32,8 +32,23 @@ bigstitcher:
     downsample_in_x: 4
     downsample_in_y: 4
     downsample_in_z: 1
+    method: "phase_corr"
+    methods:
+      phase_corr: "Phase Correlation"
+      optical_flow: "Lucas-Kanade" 
   filter_pairwise_shifts:
+    enabled: 1
     min_r: 0.7
+  global_optimization:
+    enabled: 1
+    strategy: two_round
+    strategies:
+      one_round: "One-Round"
+      one_round_iterative: "One-Round with iterative dropping of bad links"
+      two_round:  "Two-Round using metadata to align unconnected Tiles"
+      two_round_iterative: "Two-Round using Metadata to align unconnected Tiles and iterative dropping of bad links"
+
+
   fuse_dataset:
     downsampling: 1
     block_size_x: 4096 # for storage
@@ -113,23 +128,7 @@ bids:
 
 
 
-templates: 
-  - ABAv3
-
-atlases:
-  ABAv3:
-    anat: 'resources/ABAv3/P56_Atlas.nii.gz'
-    dseg: 'resources/ABAv3/P56_Annotation.nii.gz'
-    lut: 'resources/ABAv3/labelmapper_ABAv3_to_all.json'
-
-atlasreg:
-  stain: autof
-  level: 4
-  desc: stitchedflatcorr
-
-
 
 containers:
   spimprep: 'docker://khanlab/spimprep-deps:main'
-  miracl: 'docker://mgoubran/miracl:2.2.5'
 
diff --git a/workflow/Snakefile b/workflow/Snakefile
@@ -2,6 +2,7 @@ import json
 from pathlib import Path
 from snakebids import bids, set_bids_spec
 import pandas as pd
+from collections import defaultdict
 import os
 
 
@@ -20,18 +21,8 @@ resampled = Path(root) / "derivatives" / "resampled"
 set_bids_spec("v0_11_0")
 
 # read datasets tsv
-datasets = pd.read_csv(
-    config["datasets"],
-    sep="\t",
-    dtype={
-        "subject": str,
-        "sample": str,
-        "acq": str,
-        "stain_0": str,
-        "stain_1": str,
-        "num_tiles": int,
-    },
-)
+dtype = defaultdict(lambda: str, num_tiles=int)
+datasets = pd.read_csv(config["datasets"], sep="\t", dtype=dtype)
 
 
 include: "rules/common.smk"

diff --git a/workflow/macros/AutostitchMacro.ijm b/workflow/macros/AutostitchMacro.ijm
@@ -2,10 +2,14 @@ args = getArgument()
 args = split(args, " ");
 
 dataset_xml = args[0]
-ds_x = args[1]
-ds_y = args[2]
-ds_z = args[3]
-min_r = args[4]
+pairwise_method = args[1]
+ds_x = args[2]
+ds_y = args[3]
+ds_z = args[4]
+do_filter = args[5]
+min_r = args[6]
+do_global = args[7]
+global_strategy = args[8]
 
 run("Calculate pairwise shifts ...",
  "select=" + dataset_xml + 
@@ -14,12 +18,15 @@ run("Calculate pairwise shifts ...",
     " process_illumination=[All illuminations] " + 
     " process_tile=[All tiles] " + 
     " process_timepoint=[All Timepoints] " + 
-    " method=[Phase Correlation] " + 
+    " method=["+ pairwise_method +"] " + 
     " channels=[Average Channels] " + 
     " downsample_in_x=" +  ds_x + 
     " downsample_in_y=" +  ds_y + 
     " downsample_in_z=" +  ds_z );
 
+
+if ( do_filter == 1 ){
+
 run("Filter pairwise shifts ...",
  "select=" + dataset_xml + 
     " min_r=" + min_r + 
@@ -28,8 +35,10 @@ run("Filter pairwise shifts ...",
     " max_shift_in_y=0 " + 
     " max_shift_in_z=0 " + 
     " max_displacement=0");
+}
+
+if ( do_global == 1 ){
 
-
 run("Optimize globally and apply shifts ...",
  "select=" + dataset_xml + 
     " process_angle=[All angles] " + 
@@ -39,8 +48,9 @@ run("Optimize globally and apply shifts ...",
     " process_timepoint=[All Timepoints] " + 
     " relative=2.500 " + 
     " absolute=3.500 " + 
-    " global_optimization_strategy=[Two-Round using Metadata to align unconnected Tiles and iterative dropping of bad links] " + 
+    " global_optimization_strategy=["+global_strategy+"] " + 
     " fix_group_0-0,");
+}
 
 // quit after we are finished
 eval("script", "System.exit(0);");
diff --git a/workflow/rules/bids.smk b/workflow/rules/bids.smk
@@ -52,8 +52,8 @@ rule bids_samples_json:
 
 
 rule create_samples_tsv:
-    input:
-        tsv=config["datasets"],
+    params:
+        datasets_df=datasets,
     output:
         tsv=Path(root) / "samples.tsv",
     log:

diff --git a/workflow/rules/common.smk b/workflow/rules/common.smk
@@ -39,7 +39,7 @@ def get_all_targets():
                 sample=datasets.loc[i, "sample"],
                 acq=datasets.loc[i, "acq"],
                 level=config["nifti"]["levels"],
-                stain=[datasets.loc[i, "stain_0"], datasets.loc[i, "stain_1"]],
+                stain=get_stains_by_row(i),
             )
         )
 
@@ -108,15 +108,24 @@ def get_dataset_path(wildcards):
     return df.dataset_path.to_list()[0]
 
 
+def get_stains_by_row(i):
+
+    # Select columns that match the pattern 'stain_'
+    stain_columns = datasets.filter(like="stain_").columns
+
+    # Select values for a given row
+    return datasets.loc[i, stain_columns].dropna().tolist()
+
+
 def get_stains(wildcards):
     df = datasets.query(
         f"subject=='{wildcards.subject}' and sample=='{wildcards.sample}' and acq=='{wildcards.acq}'"
     )
 
-    return [
-        df.stain_0.to_list()[0],
-        df.stain_1.to_list()[0],
-    ]
+    # Select columns that match the pattern 'stain_'
+    stain_columns = df.filter(like="stain_").columns
+
+    return df.iloc[0][stain_columns].dropna().tolist()
 
 
 # bids
@@ -140,12 +149,20 @@ def get_fiji_launcher_cmd(wildcards, output, threads, resources):
 
 
 def get_macro_args_bigstitcher(wildcards, input, output):
-    return "{dataset_xml} {ds_x} {ds_y} {ds_z} {min_r}".format(
+    return "{dataset_xml} {pairwise_method} {ds_x} {ds_y} {ds_z} {do_filter} {min_r} {do_global} {global_strategy}".format(
         dataset_xml=output.dataset_xml,
+        pairwise_method=config["bigstitcher"]["calc_pairwise_shifts"]["methods"][
+            config["bigstitcher"]["calc_pairwise_shifts"]["method"]
+        ],
         ds_x=config["bigstitcher"]["calc_pairwise_shifts"]["downsample_in_x"],
         ds_y=config["bigstitcher"]["calc_pairwise_shifts"]["downsample_in_y"],
         ds_z=config["bigstitcher"]["calc_pairwise_shifts"]["downsample_in_z"],
+        do_filter=config["bigstitcher"]["filter_pairwise_shifts"]["enabled"],
         min_r=config["bigstitcher"]["filter_pairwise_shifts"]["min_r"],
+        do_global=config["bigstitcher"]["global_optimization"]["enabled"],
+        global_strategy=config["bigstitcher"]["global_optimization"]["strategies"][
+            config["bigstitcher"]["global_optimization"]["strategy"]
+        ],
     )
 
 

diff --git a/workflow/scripts/blaze_to_metadata.py b/workflow/scripts/blaze_to_metadata.py
@@ -29,10 +29,6 @@
 axes = raw_tif.series[0].get_axes()
 shape = raw_tif.series[0].get_shape()
 
-#axes normally should be CZYX - if not, then we fail out
-if not axes == 'CZYX':
-    print('ERROR: cannot deal with axes other than CZYX')
-    #sys.exit(1)
 
 ome_dict = xmltodict.parse(raw_tif.ome_metadata)
 physical_size_x = ome_dict['OME']['Image']['Pixels']['@PhysicalSizeX']
@@ -43,8 +39,11 @@
 
 
 #read tile configuration from the microscope metadata
+if axes == 'CZYX':
+    tile_config_pattern=r"Blaze\[(?P<tilex>[0-9]+) x (?P<tiley>[0-9]+)\]_C(?P<channel>[0-9]+)_xyz-Table Z(?P<zslice>[0-9]+).ome.tif;;\((?P<x>\S+), (?P<y>\S+),(?P<chan>\S+), (?P<z>\S+)\)"
+elif axes == 'ZYX': 
+    tile_config_pattern=r"Blaze\[(?P<tilex>[0-9]+) x (?P<tiley>[0-9]+)\]_C(?P<channel>[0-9]+)_xyz-Table Z(?P<zslice>[0-9]+).ome.tif;;\((?P<x>\S+), (?P<y>\S+), (?P<z>\S+)\)"
 
-tile_config_pattern=r"Blaze\[(?P<tilex>[0-9]+) x (?P<tiley>[0-9]+)\]_C(?P<channel>[0-9]+)_xyz-Table Z(?P<zslice>[0-9]+).ome.tif;;\((?P<x>\S+), (?P<y>\S+),(?P<chan>\S+), (?P<z>\S+)\)"
 tile_pattern = re.compile(tile_config_pattern)
 
 #put it in 3 maps, one for each coord, indexed by tilex, tiley, channel, and aslice
@@ -80,14 +79,14 @@
 metadata['chunks'] = chunks
 metadata['axes'] = axes
 metadata['shape'] = shape
-metadata['physical_size_x'] = physical_size_x
-metadata['physical_size_y'] = physical_size_y
-metadata['physical_size_z'] = physical_size_z
+metadata['physical_size_x'] = float(physical_size_x)
+metadata['physical_size_y'] = float(physical_size_y)
+metadata['physical_size_z'] = float(physical_size_z)
 metadata['lookup_tile_offset_x'] = map_x
 metadata['lookup_tile_offset_y'] = map_y
 metadata['lookup_tile_offset_z'] = map_z
 metadata['ome_full_metadata'] = ome_dict
-metadata['PixelSize'] = [ float(physical_size_z/1000.0), float(physical_size_y/1000.0), float(physical_size_x/1000.0) ] #zyx since OME-Zarr is ZYX
+metadata['PixelSize'] = [ metadata['physical_size_z']/1000.0, metadata['physical_size_y']/1000.0, metadata['physical_size_x']/1000.0 ] #zyx since OME-Zarr is ZYX
 metadata['PixelSizeUnits'] = 'mm' 
 
 #write metadata to json

diff --git a/workflow/scripts/create_samples_tsv.py b/workflow/scripts/create_samples_tsv.py
@@ -1,18 +1,6 @@
 import pandas as pd
 
-# read datasets tsv
-df = pd.read_csv(
-    snakemake.input.tsv,
-    sep="\t",
-    dtype={
-        "subject": str,
-        "sample": str,
-        "acq": str,
-        "stain_0": str,
-        "stain_1": str,
-        "num_tiles": int,
-    },
-)
+df = snakemake.params.datasets_df
 
 df['participant_id'] = 'sub-' + df['subject'] 
 df['sample_id'] = 'sample-' + df['sample'] 

diff --git a/workflow/scripts/zarr_to_n5_bdv.py b/workflow/scripts/zarr_to_n5_bdv.py
@@ -66,8 +66,7 @@ def update_xml_h5_to_n5(in_xml,out_xml,in_n5):
         affine[1,3] = -metadata['lookup_tile_offset_y'][key] / float(metadata['physical_size_y'])
         affine[2,3] = -metadata['lookup_tile_offset_z'][key] / float(metadata['physical_size_z'])
         bdv_writer.append_view(stack=None, 
-                                #virtual_stack_dim=(shape[3],shape[2],shape[1]), 
-                                virtual_stack_dim=(metadata['shape'][1],metadata['shape'][2],metadata['shape'][3]), 
+                                virtual_stack_dim=(n_z,n_y,n_x),
                                 tile=i_tile,channel=i_chan,
                                 voxel_units='μm',
                                 voxel_size_xyz=(metadata['physical_size_x'],