From d4463064695ad84761bfaca1166bf305f973260d Mon Sep 17 00:00:00 2001 From: Ali Khan Date: Thu, 23 May 2024 09:27:54 -0400 Subject: [PATCH] update to accommodate differing numbers of stains/channels (#39) - update to accommodate differing numbers of stains/channels, stain_# columns can now be added or removed. - can also use n/a or empty cells to indicate no stain (README is updated) - also fixes a type conversion bug in pixel size - expose more bigstitcher options for global optimization --- README.md | 2 +- config/config.yml | 31 +++++++++++++------------- workflow/Snakefile | 15 +++---------- workflow/macros/AutostitchMacro.ijm | 24 ++++++++++++++------ workflow/rules/bids.smk | 4 ++-- workflow/rules/common.smk | 29 +++++++++++++++++++----- workflow/scripts/blaze_to_metadata.py | 17 +++++++------- workflow/scripts/create_samples_tsv.py | 14 +----------- workflow/scripts/zarr_to_n5_bdv.py | 3 +-- 9 files changed, 71 insertions(+), 68 deletions(-) diff --git a/README.md b/README.md index 1b982a2..cb0d045 100644 --- a/README.md +++ b/README.md @@ -37,7 +37,7 @@ python3.11 -m venv venv source venv/bin/activate ``` -3. Update the `config/datasets.tsv` spreadsheet to point to your dataset(s). Each dataset's tif files should be in it's own folder or tar file, with no other tif files. Enter the path to each dataset in the `dataset_path` column. The first three columns identify the subject, sample, acquisition, which become part of the resulting filenames (BIDS naming). The `stain_0` and `stain_1` identify what stains were used for each channel. Use `autof` to indicate the autofluorescence channel, as this is used for registration. +3. Update the `config/datasets.tsv` spreadsheet to point to your dataset(s). Each dataset's tif files should be in it's own folder or tar file, with no other tif files. Enter the path to each dataset in the `dataset_path` column. The first three columns identify the subject, sample, acquisition, which become part of the resulting filenames (BIDS naming). The `stain_0` and `stain_1` identify what stains were used for each channel. Use `autof` to indicate the autofluorescence channel. If you have a different number of stains you can add or remove these columns. If your samples have different numbers of stains, you can leave values blank or use `n/a` to indicate that a sample does not have a particular stain. Note: The acquisition value must contain either `blaze` or `prestitched`, and defines which workflow will be used. E.g. for LifeCanvas data that is already stitched, you need to include `prestitched` in the acquisition flag. diff --git a/config/config.yml b/config/config.yml index 5e7c835..7b09a08 100644 --- a/config/config.yml +++ b/config/config.yml @@ -32,8 +32,23 @@ bigstitcher: downsample_in_x: 4 downsample_in_y: 4 downsample_in_z: 1 + method: "phase_corr" + methods: + phase_corr: "Phase Correlation" + optical_flow: "Lucas-Kanade" filter_pairwise_shifts: + enabled: 1 min_r: 0.7 + global_optimization: + enabled: 1 + strategy: two_round + strategies: + one_round: "One-Round" + one_round_iterative: "One-Round with iterative dropping of bad links" + two_round: "Two-Round using metadata to align unconnected Tiles" + two_round_iterative: "Two-Round using Metadata to align unconnected Tiles and iterative dropping of bad links" + + fuse_dataset: downsampling: 1 block_size_x: 4096 # for storage @@ -113,23 +128,7 @@ bids: -templates: - - ABAv3 - -atlases: - ABAv3: - anat: 'resources/ABAv3/P56_Atlas.nii.gz' - dseg: 'resources/ABAv3/P56_Annotation.nii.gz' - lut: 'resources/ABAv3/labelmapper_ABAv3_to_all.json' - -atlasreg: - stain: autof - level: 4 - desc: stitchedflatcorr - - containers: spimprep: 'docker://khanlab/spimprep-deps:main' - miracl: 'docker://mgoubran/miracl:2.2.5' diff --git a/workflow/Snakefile b/workflow/Snakefile index 9b36740..c6dbd76 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -2,6 +2,7 @@ import json from pathlib import Path from snakebids import bids, set_bids_spec import pandas as pd +from collections import defaultdict import os @@ -20,18 +21,8 @@ resampled = Path(root) / "derivatives" / "resampled" set_bids_spec("v0_11_0") # read datasets tsv -datasets = pd.read_csv( - config["datasets"], - sep="\t", - dtype={ - "subject": str, - "sample": str, - "acq": str, - "stain_0": str, - "stain_1": str, - "num_tiles": int, - }, -) +dtype = defaultdict(lambda: str, num_tiles=int) +datasets = pd.read_csv(config["datasets"], sep="\t", dtype=dtype) include: "rules/common.smk" diff --git a/workflow/macros/AutostitchMacro.ijm b/workflow/macros/AutostitchMacro.ijm index 565559e..e9df4df 100644 --- a/workflow/macros/AutostitchMacro.ijm +++ b/workflow/macros/AutostitchMacro.ijm @@ -2,10 +2,14 @@ args = getArgument() args = split(args, " "); dataset_xml = args[0] -ds_x = args[1] -ds_y = args[2] -ds_z = args[3] -min_r = args[4] +pairwise_method = args[1] +ds_x = args[2] +ds_y = args[3] +ds_z = args[4] +do_filter = args[5] +min_r = args[6] +do_global = args[7] +global_strategy = args[8] run("Calculate pairwise shifts ...", "select=" + dataset_xml + @@ -14,12 +18,15 @@ run("Calculate pairwise shifts ...", " process_illumination=[All illuminations] " + " process_tile=[All tiles] " + " process_timepoint=[All Timepoints] " + - " method=[Phase Correlation] " + + " method=["+ pairwise_method +"] " + " channels=[Average Channels] " + " downsample_in_x=" + ds_x + " downsample_in_y=" + ds_y + " downsample_in_z=" + ds_z ); + +if ( do_filter == 1 ){ + run("Filter pairwise shifts ...", "select=" + dataset_xml + " min_r=" + min_r + @@ -28,8 +35,10 @@ run("Filter pairwise shifts ...", " max_shift_in_y=0 " + " max_shift_in_z=0 " + " max_displacement=0"); +} + +if ( do_global == 1 ){ - run("Optimize globally and apply shifts ...", "select=" + dataset_xml + " process_angle=[All angles] " + @@ -39,8 +48,9 @@ run("Optimize globally and apply shifts ...", " process_timepoint=[All Timepoints] " + " relative=2.500 " + " absolute=3.500 " + - " global_optimization_strategy=[Two-Round using Metadata to align unconnected Tiles and iterative dropping of bad links] " + + " global_optimization_strategy=["+global_strategy+"] " + " fix_group_0-0,"); +} // quit after we are finished eval("script", "System.exit(0);"); diff --git a/workflow/rules/bids.smk b/workflow/rules/bids.smk index aede06f..da708cb 100644 --- a/workflow/rules/bids.smk +++ b/workflow/rules/bids.smk @@ -52,8 +52,8 @@ rule bids_samples_json: rule create_samples_tsv: - input: - tsv=config["datasets"], + params: + datasets_df=datasets, output: tsv=Path(root) / "samples.tsv", log: diff --git a/workflow/rules/common.smk b/workflow/rules/common.smk index f7a65a0..e5e7b08 100644 --- a/workflow/rules/common.smk +++ b/workflow/rules/common.smk @@ -39,7 +39,7 @@ def get_all_targets(): sample=datasets.loc[i, "sample"], acq=datasets.loc[i, "acq"], level=config["nifti"]["levels"], - stain=[datasets.loc[i, "stain_0"], datasets.loc[i, "stain_1"]], + stain=get_stains_by_row(i), ) ) @@ -108,15 +108,24 @@ def get_dataset_path(wildcards): return df.dataset_path.to_list()[0] +def get_stains_by_row(i): + + # Select columns that match the pattern 'stain_' + stain_columns = datasets.filter(like="stain_").columns + + # Select values for a given row + return datasets.loc[i, stain_columns].dropna().tolist() + + def get_stains(wildcards): df = datasets.query( f"subject=='{wildcards.subject}' and sample=='{wildcards.sample}' and acq=='{wildcards.acq}'" ) - return [ - df.stain_0.to_list()[0], - df.stain_1.to_list()[0], - ] + # Select columns that match the pattern 'stain_' + stain_columns = df.filter(like="stain_").columns + + return df.iloc[0][stain_columns].dropna().tolist() # bids @@ -140,12 +149,20 @@ def get_fiji_launcher_cmd(wildcards, output, threads, resources): def get_macro_args_bigstitcher(wildcards, input, output): - return "{dataset_xml} {ds_x} {ds_y} {ds_z} {min_r}".format( + return "{dataset_xml} {pairwise_method} {ds_x} {ds_y} {ds_z} {do_filter} {min_r} {do_global} {global_strategy}".format( dataset_xml=output.dataset_xml, + pairwise_method=config["bigstitcher"]["calc_pairwise_shifts"]["methods"][ + config["bigstitcher"]["calc_pairwise_shifts"]["method"] + ], ds_x=config["bigstitcher"]["calc_pairwise_shifts"]["downsample_in_x"], ds_y=config["bigstitcher"]["calc_pairwise_shifts"]["downsample_in_y"], ds_z=config["bigstitcher"]["calc_pairwise_shifts"]["downsample_in_z"], + do_filter=config["bigstitcher"]["filter_pairwise_shifts"]["enabled"], min_r=config["bigstitcher"]["filter_pairwise_shifts"]["min_r"], + do_global=config["bigstitcher"]["global_optimization"]["enabled"], + global_strategy=config["bigstitcher"]["global_optimization"]["strategies"][ + config["bigstitcher"]["global_optimization"]["strategy"] + ], ) diff --git a/workflow/scripts/blaze_to_metadata.py b/workflow/scripts/blaze_to_metadata.py index da3e511..608158e 100644 --- a/workflow/scripts/blaze_to_metadata.py +++ b/workflow/scripts/blaze_to_metadata.py @@ -29,10 +29,6 @@ axes = raw_tif.series[0].get_axes() shape = raw_tif.series[0].get_shape() -#axes normally should be CZYX - if not, then we fail out -if not axes == 'CZYX': - print('ERROR: cannot deal with axes other than CZYX') - #sys.exit(1) ome_dict = xmltodict.parse(raw_tif.ome_metadata) physical_size_x = ome_dict['OME']['Image']['Pixels']['@PhysicalSizeX'] @@ -43,8 +39,11 @@ #read tile configuration from the microscope metadata +if axes == 'CZYX': + tile_config_pattern=r"Blaze\[(?P[0-9]+) x (?P[0-9]+)\]_C(?P[0-9]+)_xyz-Table Z(?P[0-9]+).ome.tif;;\((?P\S+), (?P\S+),(?P\S+), (?P\S+)\)" +elif axes == 'ZYX': + tile_config_pattern=r"Blaze\[(?P[0-9]+) x (?P[0-9]+)\]_C(?P[0-9]+)_xyz-Table Z(?P[0-9]+).ome.tif;;\((?P\S+), (?P\S+), (?P\S+)\)" -tile_config_pattern=r"Blaze\[(?P[0-9]+) x (?P[0-9]+)\]_C(?P[0-9]+)_xyz-Table Z(?P[0-9]+).ome.tif;;\((?P\S+), (?P\S+),(?P\S+), (?P\S+)\)" tile_pattern = re.compile(tile_config_pattern) #put it in 3 maps, one for each coord, indexed by tilex, tiley, channel, and aslice @@ -80,14 +79,14 @@ metadata['chunks'] = chunks metadata['axes'] = axes metadata['shape'] = shape -metadata['physical_size_x'] = physical_size_x -metadata['physical_size_y'] = physical_size_y -metadata['physical_size_z'] = physical_size_z +metadata['physical_size_x'] = float(physical_size_x) +metadata['physical_size_y'] = float(physical_size_y) +metadata['physical_size_z'] = float(physical_size_z) metadata['lookup_tile_offset_x'] = map_x metadata['lookup_tile_offset_y'] = map_y metadata['lookup_tile_offset_z'] = map_z metadata['ome_full_metadata'] = ome_dict -metadata['PixelSize'] = [ float(physical_size_z/1000.0), float(physical_size_y/1000.0), float(physical_size_x/1000.0) ] #zyx since OME-Zarr is ZYX +metadata['PixelSize'] = [ metadata['physical_size_z']/1000.0, metadata['physical_size_y']/1000.0, metadata['physical_size_x']/1000.0 ] #zyx since OME-Zarr is ZYX metadata['PixelSizeUnits'] = 'mm' #write metadata to json diff --git a/workflow/scripts/create_samples_tsv.py b/workflow/scripts/create_samples_tsv.py index 87f4d58..28b9a53 100644 --- a/workflow/scripts/create_samples_tsv.py +++ b/workflow/scripts/create_samples_tsv.py @@ -1,18 +1,6 @@ import pandas as pd -# read datasets tsv -df = pd.read_csv( - snakemake.input.tsv, - sep="\t", - dtype={ - "subject": str, - "sample": str, - "acq": str, - "stain_0": str, - "stain_1": str, - "num_tiles": int, - }, -) +df = snakemake.params.datasets_df df['participant_id'] = 'sub-' + df['subject'] df['sample_id'] = 'sample-' + df['sample'] diff --git a/workflow/scripts/zarr_to_n5_bdv.py b/workflow/scripts/zarr_to_n5_bdv.py index 14a40e2..3869983 100644 --- a/workflow/scripts/zarr_to_n5_bdv.py +++ b/workflow/scripts/zarr_to_n5_bdv.py @@ -66,8 +66,7 @@ def update_xml_h5_to_n5(in_xml,out_xml,in_n5): affine[1,3] = -metadata['lookup_tile_offset_y'][key] / float(metadata['physical_size_y']) affine[2,3] = -metadata['lookup_tile_offset_z'][key] / float(metadata['physical_size_z']) bdv_writer.append_view(stack=None, - #virtual_stack_dim=(shape[3],shape[2],shape[1]), - virtual_stack_dim=(metadata['shape'][1],metadata['shape'][2],metadata['shape'][3]), + virtual_stack_dim=(n_z,n_y,n_x), tile=i_tile,channel=i_chan, voxel_units='μm', voxel_size_xyz=(metadata['physical_size_x'],