Skip to content

Commit

Permalink
Tune-up chunking (#47)
Browse files Browse the repository at this point in the history
* update chunking strategy
  - smaller 3d chunks for bigstitcher
  - moves rechunking from zarr2bdv to apply_flatfield_corr
* make logging visible for bigstitcher (for coiled debugging)
  • Loading branch information
akhanf authored Sep 12, 2024
1 parent c2810e4 commit e32ec07
Show file tree
Hide file tree
Showing 11 changed files with 48 additions and 28 deletions.
16 changes: 10 additions & 6 deletions config/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -55,17 +55,17 @@ bigstitcher:

fuse_dataset:
downsampling: 1
block_size_x: 4096 # for storage
block_size_y: 4096
block_size_x: 256 # for storage
block_size_y: 256
block_size_z: 1
block_size_factor_x: 2 #e.g. 2 will use 2*block_size for computation
block_size_factor_y: 2
block_size_factor_z: 1
block_size_factor_x: 1 #e.g. 2 will use 2*block_size for computation
block_size_factor_y: 1
block_size_factor_z: 256

ome_zarr:
desc: stitchedflatcorr
max_downsampling_layers: 5 # e.g. 4 levels: { 0: orig, 1: ds2, 2: ds4, 3: ds8, 4: ds16}
rechunk_size: #z, y, x
rechunk_size: #z, y, x
- 1
- 4096
- 4096
Expand All @@ -76,7 +76,11 @@ ome_zarr:
default_color: 'FFFFFF'
color_mapping:
autof: 'FFFFFF'
AutoF: 'FFFFFF'
abeta: '00FF00'
Abeta: '00FF00'
PI: 'FFFFFF'
AlphaSynuclein: '00FF00'
defaults:
active: True
coefficient: 1.0
Expand Down
11 changes: 11 additions & 0 deletions spimprep_run
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#!/bin/bash

if ! command -v singularity >/dev/null 2>&1; then
echo "Error: 'singularity' is not installed or not in the PATH." >&2
exit 1
fi

container='docker://khanlab/spimprep-deps:main'
unset SNAKEMAKE_PROFILE
singularity exec ${container} snakemake --config --set-resources bigstitcher:mem_mb=30000 fuse_dataset:mem_mb=30000 -pr $@

2 changes: 1 addition & 1 deletion workflow/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ rule all:
input:
get_all_targets(),
get_bids_toplevel_targets(),
get_qc_targets(),
# get_qc_targets(), #need to skip this if using prestitched
localrule: True


Expand Down
4 changes: 2 additions & 2 deletions workflow/rules/bigstitcher.smk
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ rule bigstitcher:
"cp {input.dataset_xml} {output.dataset_xml} && "
" {params.fiji_launcher_cmd} && "
" echo ' -macro {input.ijm} \"{params.macro_args}\"' >> {output.launcher} "
" && {output.launcher} &> {log} && {params.rm_old_xml}"
" && {output.launcher} |& tee {log} && {params.rm_old_xml}"


rule fuse_dataset:
Expand Down Expand Up @@ -245,7 +245,7 @@ rule fuse_dataset:
shell:
" {params.fiji_launcher_cmd} && "
" echo ' -macro {input.ijm} \"{params.macro_args}\"' >> {output.launcher} "
" && {output.launcher} &> {log}"
" && {output.launcher} |& tee {log}"


rule fuse_dataset_spark:
Expand Down
2 changes: 2 additions & 0 deletions workflow/rules/flatfield_corr.smk
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,8 @@ rule apply_basic_flatfield_corr:
stain=get_stains(wildcards),
allow_missing=True,
),
params:
out_chunks=config["ome_zarr"]["rechunk_size"],
output:
zarr=temp(
directory(
Expand Down
2 changes: 1 addition & 1 deletion workflow/rules/ome_zarr.smk
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ rule tif_stacks_to_ome_zarr:
config["containers"]["spimprep"]
group:
"preproc"
threads: 8
threads: config["cores_per_rule"]
resources:
runtime=360,
mem_mb=32000,
Expand Down
2 changes: 1 addition & 1 deletion workflow/scripts/apply_basic_flatfield_corr_zarr.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def apply_basic_parallel(x):
chan_arr_list.append(arr_corr)

#stack along chans
arr_stacked = da.stack(chan_arr_list,axis=1)
arr_stacked = da.stack(chan_arr_list,axis=1).rechunk([1,1] + snakemake.params.out_chunks)

with ProgressBar():
da.to_zarr(arr_stacked,snakemake.output.zarr,overwrite=True,dimension_separator='/')
8 changes: 5 additions & 3 deletions workflow/scripts/ome_zarr_to_nii.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
from lib.cloud_io import get_fsspec, is_remote

uri = snakemake.params.uri

in_zarr = snakemake.input.zarr
channel_index = snakemake.params.channel_index

Expand Down Expand Up @@ -38,8 +37,11 @@
affine[1,1]=-transforms[0]['scale'][2] #y
affine[2,2]=-transforms[0]['scale'][1] #z

#grab the channel index corresponding to the stain
darr = da.from_zarr(store,component=f'/{level}')[channel_index,:,:,:].squeeze()
if is_remote(uri):
#grab the channel index corresponding to the stain
darr = da.from_zarr(store,component=f'/{level}')[channel_index,:,:,:].squeeze()
else:
darr = da.from_zarr(in_zarr,component=f'/{level}')[channel_index,:,:,:].squeeze()

#input array axes are ZYX
#writing to nifti we want XYZ
Expand Down
23 changes: 11 additions & 12 deletions workflow/scripts/tif_stacks_to_ome_zarr.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,16 @@
import json
import zarr
import dask.array as da
from dask.array.image import imread as dask_imread
from dask.array.image import imread as dask_imread
from ome_zarr.io import parse_url
from ome_zarr.writer import write_image
from ome_zarr.format import format_from_version
from ome_zarr.scale import Scaler
from dask.diagnostics import ProgressBar
from upath import UPath as Path
from lib.cloud_io import get_fsspec, is_remote
from dask.distributed import Client, LocalCluster

in_tif_glob = snakemake.params.in_tif_glob

metadata_json=snakemake.input.metadata_json
downsampling=snakemake.params.downsampling
max_layer=snakemake.params.max_downsampling_layers #number of downsamplings by 2 to include in zarr
Expand All @@ -21,6 +20,9 @@
scaling_method=snakemake.params.scaling_method
uri = snakemake.params.uri

cluster = LocalCluster(processes=False)
client = Client(cluster)
print(client.dashboard_link)

# prepare metadata for ome-zarr
with open(metadata_json) as fp:
Expand Down Expand Up @@ -48,6 +50,8 @@
omero={key:val for key,val in snakemake.config['ome_zarr']['omero_metadata']['defaults'].items()}
omero['channels']=[]



darr_list=[]
for i,stain in enumerate(stains):

Expand All @@ -63,7 +67,6 @@
channel_metadata['color'] = color
omero['channels'].append(channel_metadata)


darr_channels = da.stack(darr_list)


Expand All @@ -72,23 +75,19 @@
fs = get_fsspec(uri,**fs_args)
store = zarr.storage.FSStore(Path(uri).path,fs=fs,dimension_separator='/',mode='w')
else:
store = zarr.DirectoryStore(out_zarr)


store = zarr.DirectoryStore(out_zarr,dimension_separator='/')



group = zarr.group(store,overwrite=True)
scaler = Scaler(max_layer=max_layer,method=scaling_method)


with ProgressBar():
write_image(image=darr_channels,
delayed = write_image(image=darr_channels,
group=group,
scaler=scaler,
coordinate_transformations=coordinate_transformations,
axes=axes,
metadata={'omero':omero}
metadata={'omero':omero},
compute=True
)


3 changes: 2 additions & 1 deletion workflow/scripts/zarr_to_n5_bdv.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ def update_xml_h5_to_n5(in_xml,out_xml,in_n5):
print('removing empty bdv h5/xml')
rmtree(temp_bdv_dir)


print('writing data to n5')
n5_store = zarr.n5.N5Store(snakemake.output.bdv_n5)

Expand All @@ -98,7 +99,7 @@ def update_xml_h5_to_n5(in_xml,out_xml,in_n5):
ds_list=[] #for setup-level attrs
for ds in range(max_downsampling_layers):
step=2**ds #1,2,4,8..
zstack = da.squeeze(darr[tile_i,chan_i,:,::step,::step])
zstack = da.squeeze(darr[tile_i,chan_i,:,::step,::step]).astype(np.int16)
print(f'writing to setup{setup_i}/timepoint0/s{ds}')
with ProgressBar():
zstack.to_zarr(n5_store,component=f'setup{setup_i}/timepoint0/s{ds}',overwrite=True,compute=True)
Expand Down
3 changes: 2 additions & 1 deletion workflow/scripts/zarr_to_ome_zarr.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@
fs = get_fsspec(uri,**fs_args)
store = zarr.storage.FSStore(Path(uri).path,fs=fs,dimension_separator='/',mode='w')
else:
store = zarr.DirectoryStore(out_zarr)
store = zarr.DirectoryStore(out_zarr,dimension_separator='/')



Expand All @@ -68,6 +68,7 @@

darr_list.append(da.from_zarr(in_zarr,component=f'{group_name}/s0',chunks=rechunk_size))


#append to omero metadata
channel_metadata={key:val for key,val in snakemake.config['ome_zarr']['omero_metadata']['channels']['defaults'].items()}
channel_name=stains[zarr_i]
Expand Down

0 comments on commit e32ec07

Please sign in to comment.