Skip to content

Commit

Permalink
First version of metadata parsing in create_zarr_structure (ref #112)
Browse files Browse the repository at this point in the history
  • Loading branch information
tcompa committed Jul 20, 2022
1 parent f3b28e8 commit d05b262
Show file tree
Hide file tree
Showing 2 changed files with 82 additions and 24 deletions.
81 changes: 57 additions & 24 deletions fractal/tasks/create_zarr_structure.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,12 @@
import os
from glob import glob

import pandas as pd
import zarr
from anndata.experimental import write_elem

from fractal.tasks.lib_parse_filename_metadata import parse_metadata
from fractal.tasks.lib_regions_of_interest import prepare_ROIs_table
from fractal.tasks.metadata_parsing import parse_yokogawa_metadata


Expand All @@ -28,6 +31,7 @@ def create_zarr_structure(
path_dict_channels=None,
num_levels=None,
coarsening_xy=None,
metadata_table="mlf_mrf",
):

"""
Expand Down Expand Up @@ -64,6 +68,21 @@ def create_zarr_structure(
"ERROR in create_zarr_structure_multifov: in_paths is None"
)

# Preliminary checks on metadata_table
if metadata_table != "mlf_mrf" and not isinstance(
metadata_table, pd.core.frame.DataFrame
):
raise Exception(
"ERROR: metadata_table must be a known string or a "
"pandas DataFrame}"
)
if metadata_table != "mlf_mrf":
raise NotImplementedError(
"We currently only support "
'metadata_table="mlf_mrf", '
f"and not {metadata_table}"
)

# Identify all plates and all channels, across all input folders
plates = []
channels = None
Expand Down Expand Up @@ -116,6 +135,9 @@ def create_zarr_structure(
else:
plates.append(plate)

# Update dict_plate_paths
dict_plate_paths[plate] = in_path

# Check that channels are the same as in previous plates
if channels is None:
channels = tmp_channels[:]
Expand All @@ -125,9 +147,6 @@ def create_zarr_structure(
f"ERROR\n{info}\nERROR: expected channels " "{channels}"
)

# Update dict_plate_paths
dict_plate_paths[plate] = in_path

# Check that all channels are in the allowed_channels
if not set(channels).issubset(set(dict_channels.keys())):
msg = "ERROR in create_zarr_structure\n"
Expand All @@ -143,36 +162,42 @@ def create_zarr_structure(
print(f"actual_channels: {actual_channels}")

zarrurls = {"plate": [], "well": []}
# zarrurls_in_paths = {}

# PARSE METADATA
# FIXME: hard-coded paths
root = (
"/data/active/fractal/3D/PelkmansLab/"
"CardiacMultiplexing/Cycle1_testSubset/"
)
mrf_path = root + "MeasurementDetail.mrf"
mlf_path = root + "MeasurementData.mlf"

site_metadata, total_files = parse_yokogawa_metadata(
mrf_path=mrf_path, mlf_path=mlf_path
)

# PIXEL SIZES
pixel_size_z = site_metadata["pixel_size_z"][0]
pixel_size_y = site_metadata["pixel_size_y"][0]
pixel_size_x = site_metadata["pixel_size_x"][0]

# Sanitize out_path
if not out_path.endswith("/"):
out_path += "/"

# Loop over plates
for plate in plates:

# Retrieve path corresponding to this plate
in_path = dict_plate_paths[plate]

# Define plate zarr
zarrurl = f"{out_path}{plate}.zarr"
print(f"Creating {zarrurl}")
group_plate = zarr.group(zarrurl)
zarrurls["plate"].append(zarrurl)
# zarrurls_in_paths[zarrurl] = dict_plate_paths[plate]

# Obtain FOV-metadata dataframe
if metadata_table == "mlf_mrf":
mrf_path = f"{in_path}MeasurementDetail.mrf"
mlf_path = f"{in_path}MeasurementData.mlf"
site_metadata, total_files = parse_yokogawa_metadata(
mrf_path=mrf_path, mlf_path=mlf_path
)
# FIXME: hardcoded
image_size = {"x": 2560, "y": 2160}

# Extract pixel sizes
pixel_size_z = site_metadata["pixel_size_z"][0]
pixel_size_y = site_metadata["pixel_size_y"][0]
pixel_size_x = site_metadata["pixel_size_x"][0]

# Extract bit_depth #FIXME
# bit_depth = site_metadata["bit_depth"][0]
# if bit_depth == 8:
# dtype

# Identify all wells
plate_prefix = dict_plate_prefixes[plate]
Expand Down Expand Up @@ -291,7 +316,7 @@ def create_zarr_structure(
}
for ind_level in range(num_levels)
],
# Global rescaling to physiacl units
# Global rescaling to physical units
"coordinateTransformations": [
{
"type": "scale",
Expand Down Expand Up @@ -328,6 +353,13 @@ def create_zarr_structure(
],
}

# Prepare and write anndata table of FOV ROIs
FOV_ROIs_table = prepare_ROIs_table(
site_metadata.loc[f"{row+column}"], image_size=image_size
)
group_tables = group_field.create_group("tables/") # noqa: F841
write_elem(group_tables, "FOV_ROI_table", FOV_ROIs_table)

return zarrurls, actual_channels


Expand Down Expand Up @@ -375,4 +407,5 @@ def create_zarr_structure(
num_levels=args.num_levels,
coarsening_xy=args.coarsening_xy,
path_dict_channels=args.path_dict_channels,
# metadata_table=args.metadata_table, #FIXME
)
25 changes: 25 additions & 0 deletions fractal/tasks/lib_regions_of_interest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
import anndata as ad
import numpy as np


def prepare_ROIs_table(df, image_size=None):
if image_size is None:
raise Exception("Missing image_size arg in prepare_ROIs_table")

df["x_micrometer"] -= df["x_micrometer"].min()
df["y_micrometer"] -= df["y_micrometer"].min()
df["z_micrometer"] -= df["z_micrometer"].min()

df["len_x_micrometer"] = image_size["x"] * df["pixel_size_x"]
df["len_y_micrometer"] = image_size["y"] * df["pixel_size_y"]
df["len_z_micrometer"] = df["pixel_size_z"]

df.drop("bit_depth", inplace=True, axis=1)

df = df.astype(np.float32)

adata = ad.AnnData(X=df, dtype=np.float32)
adata.obs_names = [f"FOV_{i+1:d}" for i in range(len(df.index))]
adata.var_names = df.columns

return adata

0 comments on commit d05b262

Please sign in to comment.