Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve logic of limit_bounds together with rois #201

Merged
merged 2 commits into from
Nov 27, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 22 additions & 3 deletions dlup/_image.py
Original file line number Diff line number Diff line change
Expand Up @@ -403,15 +403,15 @@ def read_region(
region = region.resize(size, resample=self._interpolator, box=box)
return region

def get_scaled_size(self, scaling: GenericNumber, use_limit_bounds: Optional[bool] = False) -> tuple[int, int]:
def get_scaled_size(self, scaling: GenericNumber, limit_bounds: Optional[bool] = False) -> tuple[int, int]:
"""Compute slide image size at specific scaling.

Parameters
-----------
scaling: GenericNumber
The factor by which the image needs to be scaled.

use_limit_bounds: Optional[bool]
limit_bounds: Optional[bool]
If True, the scaled size will be calculated using the slide bounds of the whole slide image.
This is generally the specific area within a whole slide image where we can find the tissue specimen.

Expand All @@ -420,7 +420,7 @@ def get_scaled_size(self, scaling: GenericNumber, use_limit_bounds: Optional[boo
size: tuple[int, int]
The scaled size of the image.
"""
if use_limit_bounds:
if limit_bounds:
_, bounded_size = self.slide_bounds
size = int(bounded_size[0] * scaling), int(bounded_size[1] * scaling)
else:
Expand Down Expand Up @@ -525,6 +525,25 @@ def slide_bounds(self) -> tuple[tuple[int, int], tuple[int, int]]:
"""
return self._wsi.slide_bounds

def get_scaled_slide_bounds(self, scaling: float) -> tuple[tuple[int, int], tuple[int, int]]:
"""Returns the bounds of the slide at a specific scaling level. This takes the slide bounds into account
and scales them to the appropriate scaling level.

Parameters
----------
scaling : float
The scaling level to use.

Returns
-------
tuple[tuple[int, int], tuple[int, int]]
The slide bounds at the given scaling level.
"""
offset, size = self.slide_bounds
offset = (int(scaling * offset[0]), int(scaling * offset[1]))
size = (int(scaling * size[0]), int(scaling * size[1]))
return offset, size

def __repr__(self) -> str:
"""Returns the SlideImage representation and some of its properties."""
props = ("identifier", "vendor", "mpp", "magnification", "size")
Expand Down
65 changes: 58 additions & 7 deletions dlup/annotations.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,11 @@
- The type of object (point, box, polygon) is fixed per label.
- The mpp is fixed per label.

Also the ASAP XML data format is supported.
Supported file formats:
- ASAP XML
- Darwin V7 JSON
- GeoJSON
- HaloXML
"""
from __future__ import annotations

Expand Down Expand Up @@ -55,6 +59,16 @@ class AnnotationType(Enum):


class AnnotationSorting(Enum):
"""The ways to sort the annotations. This is used in the constructors of the `WsiAnnotations` class, and applied
to the output of `WsiAnnotations.read_region()`.

- REVERSE: Sort the output in reverse order.
- BY_AREA: Often when the annotation tools do not properly support hierarchical order, one would annotate in a way
that the smaller objects are on top of the larger objects. This option sorts the output by area, so that the
larger objects appear first in the output and then the smaller objects.
- NONE: Do not apply any sorting and output as is presented in the input file.
"""

REVERSE = "reverse"
BY_AREA = "by_area"
NONE = "none"
Expand Down Expand Up @@ -447,6 +461,7 @@ def from_geojson(
cls: Type[_TWsiAnnotations],
geojsons: PathLike | Iterable[PathLike],
scaling: float | None = None,
sorting: AnnotationSorting = AnnotationSorting.BY_AREA,
) -> _TWsiAnnotations:
"""
Constructs an WsiAnnotations object from geojson.
Expand All @@ -458,6 +473,9 @@ def from_geojson(
object.
scaling : float, optional
The scaling to apply to the annotations.
sorting: AnnotationSorting
The sorting to apply to the annotations. Check the `AnnotationSorting` enum for more information.
By default, the annotations are sorted by area.

Returns
-------
Expand Down Expand Up @@ -488,13 +506,14 @@ def from_geojson(
SingleAnnotationWrapper(a_cls=data[k][0].annotation_class, annotation=data[k]) for k in data.keys()
]

return cls(_annotations, sorting=AnnotationSorting.BY_AREA)
return cls(_annotations, sorting=sorting)

@classmethod
def from_asap_xml(
cls,
asap_xml: PathLike,
scaling: float | None = None,
sorting: AnnotationSorting = AnnotationSorting.BY_AREA,
) -> WsiAnnotations:
"""
Read annotations as an ASAP [1] XML file. ASAP is a tool for viewing and annotating whole slide images.
Expand All @@ -504,6 +523,9 @@ def from_asap_xml(
asap_xml : PathLike
Path to ASAP XML annotation file.
scaling : float, optional
sorting: AnnotationSorting
The sorting to apply to the annotations. Check the `AnnotationSorting` enum for more information.
By default, the annotations are sorted by area.

References
----------
Expand Down Expand Up @@ -575,10 +597,12 @@ def from_asap_xml(

opened_annotations += 1

return cls(list(annotations.values()), sorting=AnnotationSorting.BY_AREA)
return cls(list(annotations.values()), sorting=sorting)

@classmethod
def from_halo_xml(cls, halo_xml: PathLike, scaling: float | None = None) -> WsiAnnotations:
def from_halo_xml(
cls, halo_xml: PathLike, scaling: float | None = None, sorting: AnnotationSorting = AnnotationSorting.NONE
) -> WsiAnnotations:
"""
Read annotations as a Halo [1] XML file.
This function requires `pyhaloxml` [2] to be installed.
Expand All @@ -589,6 +613,9 @@ def from_halo_xml(cls, halo_xml: PathLike, scaling: float | None = None) -> WsiA
Path to the Halo XML file.
scaling : float, optional
The scaling to apply to the annotations.
sorting: AnnotationSorting
The sorting to apply to the annotations. Check the `AnnotationSorting` enum for more information. By default
the annotations are not sorted as HALO supports hierarchical annotations.

References
----------
Expand Down Expand Up @@ -622,10 +649,34 @@ def from_halo_xml(cls, halo_xml: PathLike, scaling: float | None = None) -> WsiA
)
)

return cls(annotations, sorting=AnnotationSorting.NONE)
return cls(annotations, sorting=sorting)

@classmethod
def from_darwin_json(cls, darwin_json: PathLike, scaling: float | None = None) -> WsiAnnotations:
def from_darwin_json(
cls, darwin_json: PathLike, scaling: float | None = None, sorting: AnnotationSorting = AnnotationSorting.NONE
) -> WsiAnnotations:
"""
Read annotations as a V7 Darwin [1] JSON file.

Parameters
----------
darwin_json : PathLike
Path to the Darwin JSON file.
scaling : float, optional
The scaling to apply to the annotations.
sorting: AnnotationSorting
The sorting to apply to the annotations. Check the `AnnotationSorting` enum for more information.
By default, the annotations are not sorted as V7 Darwin supports hierarchical annotations.

References
----------
.. [1] https://darwin.v7labs.com/

Returns
-------
WsiAnnotations

"""
if not DARWIN_SDK_AVAILABLE:
raise RuntimeError("`darwin` is not available. Install using `python -m pip install darwin-py`.")
import darwin
Expand Down Expand Up @@ -673,7 +724,7 @@ def from_darwin_json(cls, darwin_json: PathLike, scaling: float | None = None) -
output = []
for an_cls, _annotation in all_annotations.items():
output.append(SingleAnnotationWrapper(a_cls=an_cls, annotation=_annotation))
return cls(output, sorting=AnnotationSorting.NONE)
return cls(output, sorting=sorting)

def __getitem__(self, a_cls: AnnotationClass) -> SingleAnnotationWrapper:
return self._annotations[a_cls]
Expand Down
22 changes: 8 additions & 14 deletions dlup/data/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -488,7 +488,7 @@ def from_standard_tiling(
Backend to use to read the whole slide image.
limit_bounds : bool
If the bounds of the grid should be limited to the bounds of the slide given in the `slide_bounds` property
of the `SlideImage` class.
of the `SlideImage` class. If ROIs are given, this parameter is ignored.
**kwargs :
Gets passed to the SlideImage constructor.

Expand All @@ -506,25 +506,19 @@ def from_standard_tiling(
scaling = slide_image.get_scaling(mpp)
slide_mpp = slide_image.mpp

if limit_bounds:
if rois is not None:
raise ValueError("Cannot use both `rois` and `limit_bounds` at the same time.")
if rois is not None:
slide_level_size = slide_image.get_scaled_size(scaling, limit_bounds=False)
_rois = parse_rois(rois, slide_level_size, scaling=scaling)
elif limit_bounds:
if backend == ImageBackend.AUTODETECT or backend == "AUTODETECT":
raise ValueError(
"Cannot use AutoDetect as backend and use limit_bounds at the same time. "
"This is related to issue #151. See https://github.com/NKI-AI/dlup/issues/151"
)

offset, bounds = slide_image.slide_bounds
offset = (int(scaling * offset[0]), int(scaling * offset[1]))
size = int(bounds[0] * scaling), int(bounds[1] * scaling)
_rois = [
(offset, size),
]

_rois = [slide_image.get_scaled_slide_bounds(scaling=scaling)]
else:
slide_level_size = slide_image.get_scaled_size(scaling)
_rois = parse_rois(rois, slide_level_size, scaling=slide_mpp / mpp if mpp else 1.0)
slide_level_size = slide_image.get_scaled_size(scaling, limit_bounds=False)
_rois = [((0, 0), slide_level_size)]

grid_mpp = mpp if mpp is not None else slide_mpp
grids = []
Expand Down