Skip to content

Commit

Permalink
feat: main public API to limits tuning
Browse files Browse the repository at this point in the history
  • Loading branch information
seankmartin committed Oct 17, 2024
1 parent bcc2c14 commit 71112c9
Show file tree
Hide file tree
Showing 3 changed files with 151 additions and 21 deletions.
29 changes: 17 additions & 12 deletions cryoet_data_portal_neuroglancer/precompute/contrast_limits.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@
def compute_contrast_limits(
data: da.Array | np.ndarray,
method: Literal["gmm", "cdf"] = "gmm",
z_radius: int | None | Literal["auto"] = 5,
num_samples: int | None = 100_000,
z_radius: int | None | Literal["auto", "computed"] = "auto",
downsampling_ratio: float | None = 0.3,
) -> tuple[float, float]:
"""Compute the contrast limits for the given data.
Expand All @@ -32,13 +32,15 @@ def compute_contrast_limits(
By default "gmm". Other option is "cdf".
z_radius: int or None or "auto", optional.
The number of z-slices to include above and below the central z-slice.
By default 5.
Disable by setting to None. Auto compute by setting to "auto".
Auto compute can currently be problematic for large volumes.
num_samples: int or None, optional.
The number of samples to take from the volume.
By default 100_000.
Disable sampling and use the whole volume by setting to None.
None means the whole volume is used.
If "auto", the z-radius mode is picked based on the method.
"compute" attempts to estimate a good z-slicing, but
can currently be problematic for large volumes.
default is "auto".
downsampling_ratio: float or None, optional.
The downsampling ratio for the volume. By default 0.3.
If None, no downsampling is performed (same as 1.0).
This is particularly useful if the z_radius is set to None.
Returns
-------
Expand All @@ -48,10 +50,13 @@ def compute_contrast_limits(
calculator = GMMContrastLimitCalculator(data) if method == "gmm" else CDFContrastLimitCalculator(data)
if z_radius is not None:
if z_radius == "auto":
z_radius = None
z_radius = None if method == "gmm" else 5
calculator.trim_volume_around_central_zslice(z_radius=z_radius)
if num_samples is not None:
calculator.take_random_samples_from_volume(num_samples=num_samples)
if downsampling_ratio is not None:
total_size = np.prod(data.shape)
calculator.take_random_samples_from_volume(
num_samples=int(total_size * downsampling_ratio),
)
return calculator.compute_contrast_limit()


Expand Down
23 changes: 14 additions & 9 deletions manual_tests/tuning_contrast_limits_from_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@

OUTPUT_FOLDER = "/media/starfish/LargeSSD/data/cryoET/data/FromAPI"

# These are the examples that were human chosen
id_to_path_map = {
1000: "1000/16.zarr",
706: "706/Position_161.zarr",
Expand All @@ -33,8 +32,6 @@
4279: "4279/dga2018-08-27-600.zarr",
}

# These are some random examples that can be used instead

id_to_human_contrast_limits = {
1000: {
"slice": [-0.2, 0.15],
Expand Down Expand Up @@ -111,7 +108,7 @@ def run_contrast_limit_calculations_from_api(
input_data_path,
output_path,
z_radius=5,
num_samples=20000,
ds_factor=0.3,
):
output_path.mkdir(parents=True, exist_ok=True)
human_contrast = id_to_human_contrast_limits[id_]
Expand All @@ -126,13 +123,13 @@ def run_contrast_limit_calculations_from_api(
data,
method="gmm",
z_radius=z_radius,
num_samples=num_samples,
downsampling_ratio=ds_factor,
)
cdf_limits = compute_contrast_limits(
data,
method="cdf",
z_radius=z_radius,
num_samples=num_samples,
downsampling_ratio=ds_factor,
)
limits_dict["gmm"] = gmm_limits
limits_dict["cdf"] = cdf_limits
Expand All @@ -152,7 +149,7 @@ def run_contrast_limit_calculations_from_api(
limits_dict = {k: limits_dict[k] for k in sorted(limits_dict, key=closeness_ordering.get)}
limits_dict_string = "".join(f"{k} : {v[0]:.5f} - {v[1]:.5f}\n" for k, v in limits_dict.items())
print(
f"Closest method to human contrast limits of {volume_limit} was {closest_method}:\n{limits_dict_string}Details saved to {output_path}.",
f"----------------\nClosest method to human contrast limits of {volume_limit} was {closest_method}:\n{limits_dict_string}Details saved to {output_path}.",
)
# For state generation
limits_dict["closest_method"] = closest_method
Expand Down Expand Up @@ -256,7 +253,7 @@ def create_state(id_, contrast_limit_dict, output_folder):
for key, limits in contrast_limit_dict.items():
if key in ignored_keys:
continue
visible = key == contrast_limit_dict["closest_method"]
visible = key == "gmm"
closeness = contrast_limit_dict["distance_to_human"].get(key, 0)
gain = id_to_human_contrast_limits[id_]["gain"] if key == "human" else -7.8
twodee_limits = contrast_limit_dict["2d"] if key != "human" else id_to_human_contrast_limits[id_]["slice"]
Expand Down Expand Up @@ -286,7 +283,13 @@ def create_state(id_, contrast_limit_dict, output_folder):
return json_state


def main(output_folder, take_screenshots=False, wait_for=60 * 1000):
def main(
output_folder,
take_screenshots=False,
wait_for=60 * 1000,
z_radius=5,
ds_factor=0.3,
):
url_list = []
for id_, path in id_to_path_map.items():
path = Path(output_folder) / path
Expand All @@ -295,6 +298,8 @@ def main(output_folder, take_screenshots=False, wait_for=60 * 1000):
id_,
path,
Path(output_folder) / "results",
z_radius=z_radius,
ds_factor=ds_factor,
)
# For tuning hyperparameters
# limits = run_all_contrast_limit_calculations(
Expand Down
120 changes: 120 additions & 0 deletions manual_tests/volume_contrast_limits.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
import argparse
import json
from pathlib import Path

from cryoet_data_portal import Client, Tomogram
from neuroglancer import viewer_state
from neuroglancer.url_state import to_url
from scipy.spatial.transform import Rotation

from cryoet_data_portal_neuroglancer.io import load_omezarr_data
from cryoet_data_portal_neuroglancer.precompute.contrast_limits import (
compute_contrast_limits,
)
from cryoet_data_portal_neuroglancer.state_generator import combine_json_layers, generate_image_layer

# Set up logging - level is info
# logging.basicConfig(level=logging.INFO, force=True)

OUTPUT_FOLDER = "/media/starfish/LargeSSD/data/cryoET/data/FromAPI"

id_to_path_map = {
773: "773/Position_513.zarr",
}

id_to_source_map = {
773: (
"https://files.cryoetdataportal.cziscience.com/10004/Position_513/Tomograms/VoxelSpacing7.560/CanonicalTomogram/Position_513.zarr",
(756e-10, 756e-10, 756e-10),
),
}


def grab_tomogram(id_: int, zarr_path: Path):
client = Client()
if not zarr_path.exists():
zarr_path.mkdir(parents=True, exist_ok=True)
tomogram = Tomogram.get_by_id(client, id_)
tomogram.download_omezarr(str(zarr_path.parent.resolve()))


def run_contrast_limit_calculations_from_api(input_data_path):
data = load_omezarr_data(input_data_path, resolution_level=-1, persist=False)
data_shape = data.shape
data_size_dict = {"z": data_shape[0], "y": data_shape[1], "x": data_shape[2]}

limits_dict = {}
# Ensure the main public API is working
gmm_limits = compute_contrast_limits(
data,
method="gmm",
)
cdf_limits = compute_contrast_limits(
data,
method="cdf",
)
limits_dict["gmm"] = gmm_limits
limits_dict["cdf"] = cdf_limits
limits_dict["2d"] = gmm_limits
limits_dict["size"] = data_size_dict
return limits_dict


def create_state(id_, contrast_limit_dict, output_folder):
source, scale = id_to_source_map[id_]
# One state layer for each contrast limit method
layers_list = []
ignored_keys = ["size", "closest_method", "distance_to_human", "2d"]

# set the most promising contrast limits as visible, rest not
for key, limits in contrast_limit_dict.items():
if key in ignored_keys:
continue
is_visible = key == "gmm"
layer_info = generate_image_layer(
source=source,
scale=scale,
size=contrast_limit_dict["size"],
name=f"{id_}_{key}",
twodee_contrast_limits=limits,
threedee_contrast_limits=limits,
has_volume_rendering_shader=True,
volume_rendering_is_visible=True,
is_visible=is_visible,
)
layer_info["_projectionScale"] = 2000
layers_list.append(layer_info)
json_state = combine_json_layers(
layers_list,
scale,
projection_quaternion=Rotation.from_euler(seq="xyz", angles=(0, 0, 0), degrees=True).as_quat(),
show_axis_lines=False,
)
with open(output_folder / f"{id_}_state.json", "w") as f:
json.dump(json_state, f, indent=4)
return json_state


def main(output_folder):
url_list = []
for id_, path in id_to_path_map.items():
path = Path(output_folder) / path
grab_tomogram(id_, path)
limits = run_contrast_limit_calculations_from_api(path)
state = create_state(id_, limits, Path(output_folder) / "results")
viewer_state_obj = viewer_state.ViewerState(state)
url_from_json = to_url(
viewer_state_obj,
prefix="https://neuroglancer-demo.appspot.com/",
)
url_list.append(url_from_json)
print(f"Wrote {len(url_list)} urls to {Path(output_folder) / 'urls.txt'}")
with open(Path(output_folder) / "urls.txt", "w") as f:
f.write("\n\n\n".join(url_list))


if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--output_folder", type=str, default=OUTPUT_FOLDER)
args, _ = parser.parse_known_args()
main(args.output_folder)

0 comments on commit 71112c9

Please sign in to comment.