Skip to content

Commit

Permalink
Update way how to export annotations
Browse files Browse the repository at this point in the history
  • Loading branch information
jonasteuwen committed Nov 28, 2023
1 parent 3527130 commit 0853021
Show file tree
Hide file tree
Showing 3 changed files with 99 additions and 67 deletions.
109 changes: 69 additions & 40 deletions dlup/annotations.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,35 @@ def shape(coordinates: CoordinatesDict, label: str, multiplier: float = 1.0) ->
}


def _geometry_to_geojson(geometry: Polygon | Point, label: str) -> dict[str, Any]:
"""Function to convert a geometry to a GeoJSON object.
Parameters
----------
geometry : Polygon | Point
A polygon or point object
label : str
The label name
Returns
-------
dict[str, Any]
Output dictionary representing the data in GeoJSON
"""
data = {
"type": "Feature",
"properties": {
"classification": {
"name": label,
"color": None,
},
},
"geometry": shapely.geometry.mapping(geometry),
}
return data


class SingleAnnotationWrapper:
"""Class to hold the annotations of one specific label (class) for a whole slide image"""

Expand Down Expand Up @@ -298,19 +327,7 @@ def as_json(self) -> list[Any]:
-------
dict
"""
data = [
{
"type": "Feature",
"properties": {
"classification": {
"name": _.label,
"color": None,
},
},
"geometry": shapely.geometry.mapping(_),
}
for _ in self._annotation
]
data = [_geometry_to_geojson(_, label=_.label) for _ in self._annotation]
return data

@staticmethod
Expand Down Expand Up @@ -729,41 +746,53 @@ def from_darwin_json(
def __getitem__(self, a_cls: AnnotationClass) -> SingleAnnotationWrapper:
return self._annotations[a_cls]

def as_geojson(self, split_per_label: bool = False) -> GeoJsonDict | list[tuple[str, GeoJsonDict]]:
def as_geojson(self) -> GeoJsonDict:
"""
Output the annotations as proper geojson.
Parameters
----------
split_per_label : bool
If set will return a list of a tuple with str, GeoJSON dict for this specific label.
Output the annotations as proper geojson. These outputs are sorted according to the `AnnotationSorting` selected
for the annotations. This ensures the annotations are correctly sorted in the output.
Returns
-------
list of (str, GeoJsonDict)
"""
jsons = [(label, self[label].as_json()) for label in self.available_labels]
if split_per_label:
per_label_jsons = []
for label, json_per_label in jsons:
per_label_data: GeoJsonDict = {
"type": "FeatureCollection",
"features": [],
"id": None,
}
for idx, json_dict in enumerate(json_per_label):
per_label_data["features"].append(json_dict)
per_label_data["id"] = str(idx)
per_label_jsons.append((label, per_label_data))
return per_label_jsons
coordinates, size = self.bounding_box
region_size = (coordinates[0] + size[0], coordinates[1] + size[1])
all_annotations = self.read_region((0, 0), 1.0, region_size)

# We should group annotations that belong to the same class
grouped_annotations = []
previous_label = None
group = []
for annotation in all_annotations:
label = annotation.label
if not previous_label:
previous_label = label

if previous_label == label:
group.append(annotation)
else:
grouped_annotations.append(group)
group = [annotation]
previous_label = label
# After the loop, add the last group if it's not empty
if group:
grouped_annotations.append(group)

data: GeoJsonDict = {"type": "FeatureCollection", "features": [], "id": None}
index = 0
for label, json_per_label in jsons:
for json_dict in json_per_label:
json_dict["id"] = str(index)
data["features"].append(json_dict)
index += 1
for idx, annotation_list in enumerate(grouped_annotations):
label = annotation_list[0].label
if len(annotation_list) == 1:
json_dict = _geometry_to_geojson(annotation_list[0], label=label)
else:
if annotation_list[0].type in [AnnotationType.BOX, AnnotationType.POLYGON]:
annotation = shapely.geometry.MultiPolygon(annotation_list)
else:
annotation = shapely.geometry.MultiPoint(annotation_list)
json_dict = _geometry_to_geojson(annotation, label=label)

json_dict["id"] = str(idx)
data["features"].append(json_dict)

return data

def simplify(self, tolerance: float, *, preserve_topology: bool = True) -> None:
Expand Down
24 changes: 3 additions & 21 deletions dlup/cli/mask.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
from dlup.annotations import (
AnnotationClass,
AnnotationType,
GeoJsonDict,
Polygon,
SingleAnnotationWrapper,
WsiAnnotations,
Expand Down Expand Up @@ -88,20 +87,8 @@ def mask_to_polygon(args: argparse.Namespace) -> None:
if args.simplify is not None:
slide_annotations.simplify(tolerance=args.simplify)

if not args.separate:
with open(output_filename, "w") as f:
json.dump(slide_annotations.as_geojson(split_per_label=False), f, indent=2)
else:
jsons = slide_annotations.as_geojson(split_per_label=True)
if not type(jsons) == list[tuple[str, GeoJsonDict]]: # noqa
raise ValueError("Expected a list of tuples")
for label, json_dict in jsons:
suffix = output_filename.suffix
name = output_filename.with_suffix("").name
new_name = name + "-" + label
new_filename = (output_filename.parent / new_name).with_suffix(suffix)
with open(new_filename, "w") as f:
json.dump(json_dict, f, indent=2)
with open(output_filename, "w") as f:
json.dump(slide_annotations.as_geojson(), f, indent=2)


def register_parser(parser: argparse._SubParsersAction) -> None: # type: ignore
Expand All @@ -118,7 +105,7 @@ def register_parser(parser: argparse._SubParsersAction) -> None: # type: ignore
mask_parser.add_argument(
"MASK_FILENAME",
type=file_path,
help="Filename of the mask. If `--separate` is set, will create a label <MASK_FILENAME>-<label>.json",
help="Filename of the mask.",
)
mask_parser.add_argument(
"OUTPUT_FN",
Expand All @@ -135,11 +122,6 @@ def register_parser(parser: argparse._SubParsersAction) -> None: # type: ignore
action="store_true",
help="If set, will not show progress bar.",
)
mask_parser.add_argument(
"--separate",
action="store_true",
help="If set, save labels separately.",
)
mask_parser.add_argument(
"--tile-size",
type=int,
Expand Down
33 changes: 27 additions & 6 deletions tests/test_annotations.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,9 @@

import numpy as np
import pytest
import shapely.geometry

from dlup.annotations import AnnotationClass, AnnotationType, Polygon, WsiAnnotations
from dlup.annotations import AnnotationClass, AnnotationType, Polygon, WsiAnnotations, shape
from dlup.utils.imports import DARWIN_SDK_AVAILABLE

ASAP_XML_EXAMPLE = b"""<?xml version="1.0"?>
Expand Down Expand Up @@ -45,7 +46,7 @@ class TestAnnotations:
asap_annotations = WsiAnnotations.from_asap_xml(pathlib.Path(asap_file.name))

with tempfile.NamedTemporaryFile(suffix=".json") as geojson_out:
asap_geojson = asap_annotations.as_geojson(split_per_label=False)
asap_geojson = asap_annotations.as_geojson()
geojson_out.write(json.dumps(asap_geojson).encode("utf-8"))
geojson_out.flush()

Expand All @@ -60,10 +61,30 @@ def v7_annotations(self):
self._v7_annotations = WsiAnnotations.from_darwin_json("tests/files/103S.json")
return self._v7_annotations

def test_asap_to_geojson(self, split_per_label=False):
asap_geojson = self.asap_annotations.as_geojson(split_per_label=split_per_label)
geojson_geojson = self.geojson_annotations.as_geojson(split_per_label=split_per_label)
assert asap_geojson == geojson_geojson
def test_asap_to_geojson(self):
# TODO: Make sure that the annotations hit the border of the region.
asap_geojson = self.asap_annotations.as_geojson()
geojson_geojson = self.geojson_annotations.as_geojson()

# TODO: Collect the geometries together per name and compare
for elem0, elem1 in zip(asap_geojson["features"], geojson_geojson["features"]):
assert elem0["type"] == elem1["type"]
assert elem0["properties"] == elem1["properties"]
assert elem0["id"] == elem1["id"]

# Now we need to compare the geometries, given the sorting they could become different
shape0 = shape(elem0["geometry"], label="")
shape1 = shape(elem1["geometry"], label="")
assert len(set([_.label for _ in shape0])) == 1
assert len(set([_.label for _ in shape1])) == 1
if isinstance(shape0[0], Polygon):
complete_shape0 = shapely.geometry.MultiPolygon(shape0)
complete_shape1 = shapely.geometry.MultiPolygon(shape1)
else:
raise NotImplementedError("Different shape types not implemented yet.")

# TODO: Find how two areas compare.
assert complete_shape0.equals(complete_shape1)

@pytest.mark.parametrize("region", [((10000, 10000), (5000, 5000), 3756.0), ((0, 0), (5000, 5000), None)])
def test_read_region(self, region):
Expand Down

0 comments on commit 0853021

Please sign in to comment.