diff --git a/src/ark/segmentation/marker_quantification.py b/src/ark/segmentation/marker_quantification.py index 002cb754c..feb6b6838 100644 --- a/src/ark/segmentation/marker_quantification.py +++ b/src/ark/segmentation/marker_quantification.py @@ -454,7 +454,8 @@ def create_marker_count_matrices(segmentation_labels, image_data, nuclear_counts def generate_cell_table(segmentation_dir, tiff_dir, img_sub_folder="TIFs", is_mibitiff=False, fovs=None, extraction='total_intensity', - nuclear_counts=False, fast_extraction=False, mask_types=['whole_cell'], + nuclear_counts=False, split_large_nuclei=False, + fast_extraction=False, mask_types=['whole_cell'], add_underscore=True, **kwargs): """This function takes the segmented data and computes the expression matrices batch-wise while also validating inputs @@ -476,6 +477,9 @@ def generate_cell_table(segmentation_dir, tiff_dir, img_sub_folder="TIFs", nuclear_counts (bool): boolean flag to determine whether nuclear counts are returned, note that if set to True, the compartments coordinate in segmentation_labels must contain 'nuclear' + split_large_nuclei (bool): + boolean flag to determine whether nuclei which are larger than their assigned cell + will get split into two different nuclear objects fast_extraction (bool): if set, skips the custom regionprops and expensive base regionprops extraction steps mask_types (list): @@ -570,6 +574,7 @@ def generate_cell_table(segmentation_dir, tiff_dir, img_sub_folder="TIFs", image_data=image_data, extraction=extraction, nuclear_counts=nuclear_counts, + split_large_nuclei=split_large_nuclei, fast_extraction=fast_extraction, **kwargs ) diff --git a/templates/1_Segment_Image_Data.ipynb b/templates/1_Segment_Image_Data.ipynb index 7cadf662a..d433d3229 100644 --- a/templates/1_Segment_Image_Data.ipynb +++ b/templates/1_Segment_Image_Data.ipynb @@ -331,6 +331,10 @@ "# set to True to add nuclear cell properties to the expression matrix\n", "nuclear_counts = False\n", "\n", + "# set to True to split nuclei which are larger than their assigned cell\n", + "# into two separate nuclear objects\n", + "split_large_nuclei = False\n", + "\n", "# set to True to bypass expensive cell property calculations\n", "# only cell label, size, and centroid will be extracted if True\n", "fast_extraction = False" @@ -363,6 +367,7 @@ " fovs=fovs,\n", " batch_size=5,\n", " nuclear_counts=nuclear_counts,\n", + " split_large_nuclei=split_large_nuclei,\n", " fast_extraction=fast_extraction)" ] }, @@ -406,7 +411,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.11" + "version": "3.10.14" }, "vscode": { "interpreter": { diff --git a/tests/segmentation/segmentation_utils_test.py b/tests/segmentation/segmentation_utils_test.py index 4b69ff374..fb10683bf 100644 --- a/tests/segmentation/segmentation_utils_test.py +++ b/tests/segmentation/segmentation_utils_test.py @@ -189,6 +189,13 @@ def test_transform_expression_matrix_multiple_compartments(): ) assert np.array_equal(normalized_data.loc['whole_cell', cell, modified_cols].values, normalized_vals) + if cell_data.loc['nuclear', cell, settings.CELL_SIZE] != 0: + normalized_vals = np.divide( + cell_data.loc['nuclear', cell, modified_cols].values, + cell_data.loc['nuclear', cell, settings.CELL_SIZE].values + ) + assert np.array_equal(normalized_data.loc['nuclear', cell, modified_cols].values, + normalized_vals) # test arcsinh transform transform_kwargs = {'linear_factor': 1}