diff --git a/dlup/_image.py b/dlup/_image.py index 54957ffe..d561de37 100644 --- a/dlup/_image.py +++ b/dlup/_image.py @@ -403,7 +403,7 @@ def read_region( region = region.resize(size, resample=self._interpolator, box=box) return region - def get_scaled_size(self, scaling: GenericNumber, use_limit_bounds: Optional[bool] = False) -> tuple[int, int]: + def get_scaled_size(self, scaling: GenericNumber, limit_bounds: Optional[bool] = False) -> tuple[int, int]: """Compute slide image size at specific scaling. Parameters @@ -411,7 +411,7 @@ def get_scaled_size(self, scaling: GenericNumber, use_limit_bounds: Optional[boo scaling: GenericNumber The factor by which the image needs to be scaled. - use_limit_bounds: Optional[bool] + limit_bounds: Optional[bool] If True, the scaled size will be calculated using the slide bounds of the whole slide image. This is generally the specific area within a whole slide image where we can find the tissue specimen. @@ -420,7 +420,7 @@ def get_scaled_size(self, scaling: GenericNumber, use_limit_bounds: Optional[boo size: tuple[int, int] The scaled size of the image. """ - if use_limit_bounds: + if limit_bounds: _, bounded_size = self.slide_bounds size = int(bounded_size[0] * scaling), int(bounded_size[1] * scaling) else: @@ -525,6 +525,25 @@ def slide_bounds(self) -> tuple[tuple[int, int], tuple[int, int]]: """ return self._wsi.slide_bounds + def get_scaled_slide_bounds(self, scaling: float) -> tuple[tuple[int, int], tuple[int, int]]: + """Returns the bounds of the slide at a specific scaling level. This takes the slide bounds into account + and scales them to the appropriate scaling level. + + Parameters + ---------- + scaling : float + The scaling level to use. + + Returns + ------- + tuple[tuple[int, int], tuple[int, int]] + The slide bounds at the given scaling level. + """ + offset, size = self.slide_bounds + offset = (int(scaling * offset[0]), int(scaling * offset[1])) + size = (int(scaling * size[0]), int(scaling * size[1])) + return offset, size + def __repr__(self) -> str: """Returns the SlideImage representation and some of its properties.""" props = ("identifier", "vendor", "mpp", "magnification", "size") diff --git a/dlup/annotations.py b/dlup/annotations.py index d15aacba..9dc89c1e 100644 --- a/dlup/annotations.py +++ b/dlup/annotations.py @@ -16,7 +16,11 @@ - The type of object (point, box, polygon) is fixed per label. - The mpp is fixed per label. -Also the ASAP XML data format is supported. +Supported file formats: +- ASAP XML +- Darwin V7 JSON +- GeoJSON +- HaloXML """ from __future__ import annotations @@ -55,6 +59,16 @@ class AnnotationType(Enum): class AnnotationSorting(Enum): + """The ways to sort the annotations. This is used in the constructors of the `WsiAnnotations` class, and applied + to the output of `WsiAnnotations.read_region()`. + + - REVERSE: Sort the output in reverse order. + - BY_AREA: Often when the annotation tools do not properly support hierarchical order, one would annotate in a way + that the smaller objects are on top of the larger objects. This option sorts the output by area, so that the + larger objects appear first in the output and then the smaller objects. + - NONE: Do not apply any sorting and output as is presented in the input file. + """ + REVERSE = "reverse" BY_AREA = "by_area" NONE = "none" @@ -447,6 +461,7 @@ def from_geojson( cls: Type[_TWsiAnnotations], geojsons: PathLike | Iterable[PathLike], scaling: float | None = None, + sorting: AnnotationSorting = AnnotationSorting.BY_AREA, ) -> _TWsiAnnotations: """ Constructs an WsiAnnotations object from geojson. @@ -458,6 +473,9 @@ def from_geojson( object. scaling : float, optional The scaling to apply to the annotations. + sorting: AnnotationSorting + The sorting to apply to the annotations. Check the `AnnotationSorting` enum for more information. + By default, the annotations are sorted by area. Returns ------- @@ -488,13 +506,14 @@ def from_geojson( SingleAnnotationWrapper(a_cls=data[k][0].annotation_class, annotation=data[k]) for k in data.keys() ] - return cls(_annotations, sorting=AnnotationSorting.BY_AREA) + return cls(_annotations, sorting=sorting) @classmethod def from_asap_xml( cls, asap_xml: PathLike, scaling: float | None = None, + sorting: AnnotationSorting = AnnotationSorting.BY_AREA, ) -> WsiAnnotations: """ Read annotations as an ASAP [1] XML file. ASAP is a tool for viewing and annotating whole slide images. @@ -504,6 +523,9 @@ def from_asap_xml( asap_xml : PathLike Path to ASAP XML annotation file. scaling : float, optional + sorting: AnnotationSorting + The sorting to apply to the annotations. Check the `AnnotationSorting` enum for more information. + By default, the annotations are sorted by area. References ---------- @@ -575,10 +597,12 @@ def from_asap_xml( opened_annotations += 1 - return cls(list(annotations.values()), sorting=AnnotationSorting.BY_AREA) + return cls(list(annotations.values()), sorting=sorting) @classmethod - def from_halo_xml(cls, halo_xml: PathLike, scaling: float | None = None) -> WsiAnnotations: + def from_halo_xml( + cls, halo_xml: PathLike, scaling: float | None = None, sorting: AnnotationSorting = AnnotationSorting.NONE + ) -> WsiAnnotations: """ Read annotations as a Halo [1] XML file. This function requires `pyhaloxml` [2] to be installed. @@ -589,6 +613,9 @@ def from_halo_xml(cls, halo_xml: PathLike, scaling: float | None = None) -> WsiA Path to the Halo XML file. scaling : float, optional The scaling to apply to the annotations. + sorting: AnnotationSorting + The sorting to apply to the annotations. Check the `AnnotationSorting` enum for more information. By default + the annotations are not sorted as HALO supports hierarchical annotations. References ---------- @@ -622,10 +649,34 @@ def from_halo_xml(cls, halo_xml: PathLike, scaling: float | None = None) -> WsiA ) ) - return cls(annotations, sorting=AnnotationSorting.NONE) + return cls(annotations, sorting=sorting) @classmethod - def from_darwin_json(cls, darwin_json: PathLike, scaling: float | None = None) -> WsiAnnotations: + def from_darwin_json( + cls, darwin_json: PathLike, scaling: float | None = None, sorting: AnnotationSorting = AnnotationSorting.NONE + ) -> WsiAnnotations: + """ + Read annotations as a V7 Darwin [1] JSON file. + + Parameters + ---------- + darwin_json : PathLike + Path to the Darwin JSON file. + scaling : float, optional + The scaling to apply to the annotations. + sorting: AnnotationSorting + The sorting to apply to the annotations. Check the `AnnotationSorting` enum for more information. + By default, the annotations are not sorted as V7 Darwin supports hierarchical annotations. + + References + ---------- + .. [1] https://darwin.v7labs.com/ + + Returns + ------- + WsiAnnotations + + """ if not DARWIN_SDK_AVAILABLE: raise RuntimeError("`darwin` is not available. Install using `python -m pip install darwin-py`.") import darwin @@ -673,7 +724,7 @@ def from_darwin_json(cls, darwin_json: PathLike, scaling: float | None = None) - output = [] for an_cls, _annotation in all_annotations.items(): output.append(SingleAnnotationWrapper(a_cls=an_cls, annotation=_annotation)) - return cls(output, sorting=AnnotationSorting.NONE) + return cls(output, sorting=sorting) def __getitem__(self, a_cls: AnnotationClass) -> SingleAnnotationWrapper: return self._annotations[a_cls] diff --git a/dlup/data/dataset.py b/dlup/data/dataset.py index be12323f..8b83ac5a 100644 --- a/dlup/data/dataset.py +++ b/dlup/data/dataset.py @@ -488,7 +488,7 @@ def from_standard_tiling( Backend to use to read the whole slide image. limit_bounds : bool If the bounds of the grid should be limited to the bounds of the slide given in the `slide_bounds` property - of the `SlideImage` class. + of the `SlideImage` class. If ROIs are given, this parameter is ignored. **kwargs : Gets passed to the SlideImage constructor. @@ -506,25 +506,19 @@ def from_standard_tiling( scaling = slide_image.get_scaling(mpp) slide_mpp = slide_image.mpp - if limit_bounds: - if rois is not None: - raise ValueError("Cannot use both `rois` and `limit_bounds` at the same time.") + if rois is not None: + slide_level_size = slide_image.get_scaled_size(scaling, limit_bounds=False) + _rois = parse_rois(rois, slide_level_size, scaling=scaling) + elif limit_bounds: if backend == ImageBackend.AUTODETECT or backend == "AUTODETECT": raise ValueError( "Cannot use AutoDetect as backend and use limit_bounds at the same time. " "This is related to issue #151. See https://github.com/NKI-AI/dlup/issues/151" ) - - offset, bounds = slide_image.slide_bounds - offset = (int(scaling * offset[0]), int(scaling * offset[1])) - size = int(bounds[0] * scaling), int(bounds[1] * scaling) - _rois = [ - (offset, size), - ] - + _rois = [slide_image.get_scaled_slide_bounds(scaling=scaling)] else: - slide_level_size = slide_image.get_scaled_size(scaling) - _rois = parse_rois(rois, slide_level_size, scaling=slide_mpp / mpp if mpp else 1.0) + slide_level_size = slide_image.get_scaled_size(scaling, limit_bounds=False) + _rois = [((0, 0), slide_level_size)] grid_mpp = mpp if mpp is not None else slide_mpp grids = []