Skip to content

Commit 9e2d467

Browse files
committed
fix methods to make Brectangle retro compatible
1 parent 6413bc0 commit 9e2d467

File tree

11 files changed

+2231
-60
lines changed

11 files changed

+2231
-60
lines changed

docling_core/transforms/serializer/doctags.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
DocSerializer,
2626
create_ser_result,
2727
)
28-
from docling_core.types.doc.base import BoundingBox
28+
from docling_core.types.doc.base import BoundingBox, BoundingRectangle
2929
from docling_core.types.doc.document import (
3030
CodeItem,
3131
DocItem,
@@ -423,7 +423,7 @@ def _get_inline_location_tags(
423423
) -> SerializationResult:
424424

425425
prov: Optional[ProvenanceItem] = None
426-
boxes: list[BoundingBox] = []
426+
boxes: list[Union[BoundingBox, BoundingRectangle]] = []
427427
doc_items: list[DocItem] = []
428428
for it, _ in doc.iterate_items(root=item):
429429
if isinstance(it, DocItem):

docling_core/types/doc/__init__.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,6 @@
7070
TableCellLabel,
7171
)
7272
from .page import (
73-
BoundingRectangle,
7473
ColorMixin,
7574
ColorRGBA,
7675
Coord2D,

docling_core/types/doc/base.py

Lines changed: 128 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
"""Models for the base data types."""
22

3-
from enum import Enum
4-
from typing import List, Tuple, NamedTuple
5-
import numpy as np
63
import math
7-
from shapely.geometry import Polygon
4+
from enum import Enum
5+
from typing import List, NamedTuple, Tuple, Union
86

7+
import numpy as np
98
from pydantic import BaseModel
9+
from shapely.geometry import Polygon
1010

1111

1212
class ImageRefMode(str, Enum):
@@ -373,7 +373,9 @@ def is_horizontally_connected(
373373
return False
374374

375375
@classmethod
376-
def enclosing_bbox(cls, boxes: List["BoundingBox"]) -> "BoundingBox":
376+
def enclosing_bbox(
377+
cls, boxes: List[Union["BoundingBox", "BoundingRectangle"]]
378+
) -> "BoundingBox":
377379
"""Create a bounding box that covers all of the given boxes."""
378380
if not boxes:
379381
raise ValueError("No bounding boxes provided for union.")
@@ -385,15 +387,21 @@ def enclosing_bbox(cls, boxes: List["BoundingBox"]) -> "BoundingBox":
385387
CoordOrigin to compute their union."
386388
)
387389

388-
left = min(box.l for box in boxes)
389-
right = max(box.r for box in boxes)
390+
# transform every BRectangle in the encloser BBox
391+
boxes_post = [
392+
box.to_bounding_box() if isinstance(box, BoundingRectangle) else box
393+
for box in boxes
394+
]
395+
396+
left = min(box.l for box in boxes_post)
397+
right = max(box.r for box in boxes_post)
390398

391399
if origin == CoordOrigin.TOPLEFT:
392-
top = min(box.t for box in boxes)
393-
bottom = max(box.b for box in boxes)
400+
top = min(box.t for box in boxes_post)
401+
bottom = max(box.b for box in boxes_post)
394402
elif origin == CoordOrigin.BOTTOMLEFT:
395-
top = max(box.t for box in boxes)
396-
bottom = min(box.b for box in boxes)
403+
top = max(box.t for box in boxes_post)
404+
bottom = min(box.b for box in boxes_post)
397405
else:
398406
raise ValueError("BoundingBoxes have different CoordOrigin")
399407

@@ -437,6 +445,7 @@ def y_union_with(self, other: "BoundingBox") -> float:
437445
return max(0.0, max(self.t, other.t) - min(self.b, other.b))
438446
raise ValueError("Unsupported CoordOrigin")
439447

448+
440449
class Coord2D(NamedTuple):
441450
"""A 2D coordinate with x and y components."""
442451

@@ -503,6 +512,82 @@ def centre(self):
503512
self.r_y0 + self.r_y1 + self.r_y2 + self.r_y3
504513
) / 4.0
505514

515+
@property
516+
def l(self): # noqa: E743
517+
"""Left value of the inclosing rectangle."""
518+
return min([self.r_x0, self.r_x1, self.r_x2, self.r_x3])
519+
520+
@property
521+
def r(self):
522+
"""Right value of the inclosing rectangle."""
523+
return max([self.r_x0, self.r_x1, self.r_x2, self.r_x3])
524+
525+
@property
526+
def t(self):
527+
"""Top value of the inclosing rectangle."""
528+
if self.coord_origin == CoordOrigin.BOTTOMLEFT:
529+
top = max([self.r_y0, self.r_y1, self.r_y2, self.r_y3])
530+
else:
531+
top = min([self.r_y0, self.r_y1, self.r_y2, self.r_y3])
532+
return top
533+
534+
@property
535+
def b(self):
536+
"""Bottom value of the inclosing rectangle."""
537+
if self.coord_origin == CoordOrigin.BOTTOMLEFT:
538+
bottom = min([self.r_y0, self.r_y1, self.r_y2, self.r_y3])
539+
else:
540+
bottom = max([self.r_y0, self.r_y1, self.r_y2, self.r_y3])
541+
return bottom
542+
543+
def resize_by_scale(self, x_scale: float, y_scale: float):
544+
"""resize_by_scale."""
545+
rect_to_bbox = self.to_bounding_box()
546+
return BoundingBox(
547+
l=rect_to_bbox.l * x_scale,
548+
r=rect_to_bbox.r * x_scale,
549+
t=rect_to_bbox.t * y_scale,
550+
b=rect_to_bbox.b * y_scale,
551+
coord_origin=self.coord_origin,
552+
)
553+
554+
def scale_to_size(self, old_size: Size, new_size: Size):
555+
"""scale_to_size."""
556+
return self.resize_by_scale(
557+
x_scale=new_size.width / old_size.width,
558+
y_scale=new_size.height / old_size.height,
559+
)
560+
561+
def scaled(self, scale: float):
562+
"""scaled."""
563+
return self.resize_by_scale(x_scale=scale, y_scale=scale)
564+
565+
def normalized(self, page_size: Size):
566+
"""normalized."""
567+
return self.scale_to_size(
568+
old_size=page_size, new_size=Size(height=1.0, width=1.0)
569+
)
570+
571+
def expand_by_scale(self, x_scale: float, y_scale: float) -> "BoundingBox":
572+
"""expand_to_size."""
573+
rect_to_bbox = self.to_bounding_box()
574+
if self.coord_origin == CoordOrigin.TOPLEFT:
575+
return BoundingBox(
576+
l=rect_to_bbox.l - rect_to_bbox.width * x_scale,
577+
r=rect_to_bbox.r + rect_to_bbox.width * x_scale,
578+
t=rect_to_bbox.t - rect_to_bbox.height * y_scale,
579+
b=rect_to_bbox.b + rect_to_bbox.height * y_scale,
580+
coord_origin=self.coord_origin,
581+
)
582+
elif self.coord_origin == CoordOrigin.BOTTOMLEFT:
583+
return BoundingBox(
584+
l=rect_to_bbox.l - rect_to_bbox.width * x_scale,
585+
r=rect_to_bbox.r + rect_to_bbox.width * x_scale,
586+
t=rect_to_bbox.t + rect_to_bbox.height * y_scale,
587+
b=rect_to_bbox.b - rect_to_bbox.height * y_scale,
588+
coord_origin=self.coord_origin,
589+
)
590+
506591
def to_bounding_box(self) -> BoundingBox:
507592
"""Convert to a BoundingBox representation."""
508593
if self.coord_origin == CoordOrigin.BOTTOMLEFT:
@@ -524,8 +609,12 @@ def to_bounding_box(self) -> BoundingBox:
524609
)
525610

526611
@classmethod
527-
def from_bounding_box(cls, bbox: BoundingBox) -> "BoundingRectangle":
612+
def from_bounding_box(
613+
cls, bbox: Union["BoundingRectangle", BoundingBox]
614+
) -> "BoundingRectangle":
528615
"""Convert a BoundingBox into a BoundingRectangle."""
616+
if isinstance(bbox, BoundingRectangle):
617+
return bbox
529618
return cls(
530619
r_x0=bbox.l,
531620
r_y0=bbox.b,
@@ -546,7 +635,7 @@ def to_polygon(self) -> List[Coord2D]:
546635
Coord2D(self.r_x2, self.r_y2),
547636
Coord2D(self.r_x3, self.r_y3),
548637
]
549-
638+
550639
def to_list(self) -> List[Tuple]:
551640
"""Convert to a list of tuple point coordinates."""
552641
return [
@@ -555,14 +644,30 @@ def to_list(self) -> List[Tuple]:
555644
(self.r_x2, self.r_y2),
556645
(self.r_x3, self.r_y3),
557646
]
558-
647+
648+
def as_tuple(self) -> Tuple[float, float, float, float, float, float, float, float]:
649+
"""as_tuple."""
650+
return (
651+
self.r_x0,
652+
self.r_y0,
653+
self.r_x1,
654+
self.r_y1,
655+
self.r_x2,
656+
self.r_y2,
657+
self.r_x3,
658+
self.r_y3,
659+
)
660+
559661
def to_shapely_polygon(self) -> Polygon:
560-
return Polygon([
561-
(self.r_x0, self.r_y0),
562-
(self.r_x1, self.r_y1),
563-
(self.r_x2, self.r_y2),
564-
(self.r_x3, self.r_y3),
565-
])
662+
"""To shapely polygon."""
663+
return Polygon(
664+
[
665+
(self.r_x0, self.r_y0),
666+
(self.r_x1, self.r_y1),
667+
(self.r_x2, self.r_y2),
668+
(self.r_x3, self.r_y3),
669+
]
670+
)
566671

567672
def to_bottom_left_origin(self, page_height: float) -> "BoundingRectangle":
568673
"""Convert coordinates to use bottom-left origin.
@@ -615,14 +720,11 @@ def to_top_left_origin(self, page_height: float) -> "BoundingRectangle":
615720
def intersection_over_union(
616721
self, other: "BoundingRectangle", eps: float = 1.0e-6
617722
) -> float:
618-
"""intersection_over_union."""
619-
723+
"""Intersection_over_union."""
620724
polygon_other = other.to_shapely_polygon()
621725
current_polygon = self.to_shapely_polygon()
622-
726+
623727
intersection_area = current_polygon.intersection(polygon_other).area
624728
union_area = current_polygon.union(polygon_other).area
625-
626-
return intersection_area / (union_area + eps)
627-
628729

730+
return intersection_area / (union_area + eps)

docling_core/types/doc/document.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -394,7 +394,9 @@ def get_row_bounding_boxes(self) -> dict[int, BoundingBox]:
394394
row_bboxes: dict[int, BoundingBox] = {}
395395

396396
for row_idx in range(self.num_rows):
397-
row_cells_with_bbox: dict[int, list[BoundingBox]] = {}
397+
row_cells_with_bbox: dict[
398+
int, list[Union["BoundingBox", "BoundingRectangle"]]
399+
] = {}
398400

399401
# Collect all cells in this row that have bounding boxes
400402
for cell in self.table_cells:
@@ -448,7 +450,9 @@ def get_column_bounding_boxes(self) -> dict[int, BoundingBox]:
448450
col_bboxes: dict[int, BoundingBox] = {}
449451

450452
for col_idx in range(self.num_cols):
451-
col_cells_with_bbox: dict[int, list[BoundingBox]] = {}
453+
col_cells_with_bbox: dict[
454+
int, list[Union["BoundingBox", "BoundingRectangle"]]
455+
] = {}
452456

453457
# Collect all cells in this row that have bounding boxes
454458
for cell in self.table_cells:

docling_core/types/doc/page.py

Lines changed: 8 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -3,31 +3,23 @@
33
import copy
44
import json
55
import logging
6-
import math
76
import re
87
import typing
98
from enum import Enum
109
from pathlib import Path
11-
from typing import (
12-
Annotated,
13-
Any,
14-
Dict,
15-
Iterator,
16-
List,
17-
Literal,
18-
NamedTuple,
19-
Optional,
20-
Tuple,
21-
Union,
22-
)
10+
from typing import Annotated, Any, Dict, Iterator, List, Literal, Optional, Tuple, Union
2311

24-
import numpy as np
2512
from PIL import Image as PILImage
2613
from PIL import ImageColor, ImageDraw, ImageFont
2714
from PIL.ImageFont import FreeTypeFont
2815
from pydantic import AnyUrl, BaseModel, Field, model_validator
2916

30-
from docling_core.types.doc.base import BoundingBox, BoundingRectangle, CoordOrigin, Coord2D
17+
from docling_core.types.doc.base import (
18+
BoundingBox,
19+
BoundingRectangle,
20+
Coord2D,
21+
CoordOrigin,
22+
)
3123
from docling_core.types.doc.document import ImageRef
3224

3325
_logger = logging.getLogger(__name__)
@@ -80,6 +72,7 @@ def __iter__(self):
8072
"""Yield the color components for iteration."""
8173
yield from (self.r, self.g, self.b, self.a)
8274

75+
8376
class OrderedElement(BaseModel):
8477
"""Base model for elements that have an ordering index."""
8578

docling_core/types/doc/tokens.py

Lines changed: 22 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,8 @@
88
from enum import Enum
99
from typing import Tuple, Union
1010

11-
from docling_core.types.doc.labels import DocItemLabel
1211
from docling_core.types.doc.base import BoundingBox, BoundingRectangle
12+
from docling_core.types.doc.labels import DocItemLabel
1313

1414

1515
class TableToken(str, Enum):
@@ -263,7 +263,9 @@ def get_code_language_token(code_language: str) -> str:
263263
return _CodeLanguageToken(f"<_{code_language}_>").value
264264

265265
@staticmethod
266-
def get_location_token(val: float, rnorm: int = 500, prefix=_LOC_PREFIX): # TODO review
266+
def get_location_token(
267+
val: float, rnorm: int = 500, prefix=_LOC_PREFIX
268+
): # TODO review
267269
"""Function to get location tokens."""
268270
val_ = round(rnorm * val)
269271
val_ = max(val_, 0)
@@ -280,11 +282,11 @@ def get_location(
280282
):
281283
"""Get the location string given bbox and page-dim."""
282284
if isinstance(bbox, BoundingBox):
283-
#the transform this case into the old case
285+
# the transform this case into the old case
284286
bbox = bbox.to_top_left_origin(page_h).as_tuple()
285287

286288
if isinstance(bbox, tuple):
287-
#old case
289+
# old case
288290
assert bbox[0] <= bbox[2], f"bbox[0]<=bbox[2] => {bbox[0]}<={bbox[2]}"
289291
assert bbox[1] <= bbox[3], f"bbox[1]<=bbox[3] => {bbox[1]}<={bbox[3]}"
290292

@@ -299,25 +301,33 @@ def get_location(
299301
y1_tok = DocumentToken.get_location_token(val=max(y0, y1), rnorm=ysize)
300302

301303
loc_str = f"{x0_tok}{y0_tok}{x1_tok}{y1_tok}"
302-
304+
303305
elif isinstance(bbox, BoundingRectangle):
304-
#use the prefix rec, 4 elements required
306+
# use the prefix rec, 4 elements required
305307
vertices = bbox.to_top_left_origin(page_h).to_polygon()
306308
# Convection for the rectangle vertices:
307309
# 3 +-------+ 2
308310
# | hello |
309311
# 0 +-------+ 1
310312
# 0->1 line must be the baseline for the words
311-
313+
312314
vertices_tok = []
313315
for vertex in vertices:
314-
vertex_x_norm = vertex.x/ page_w
315-
vertices_tok.append(DocumentToken.get_location_token(val= vertex_x_norm, rnorm=xsize, prefix=_REC_PREFIX))
316-
vertex_y_norm = vertex.y/ page_h
317-
vertices_tok.append(DocumentToken.get_location_token(val=vertex_y_norm, rnorm=ysize, prefix=_REC_PREFIX))
316+
vertex_x_norm = vertex.x / page_w
317+
vertices_tok.append(
318+
DocumentToken.get_location_token(
319+
val=vertex_x_norm, rnorm=xsize, prefix=_REC_PREFIX
320+
)
321+
)
322+
vertex_y_norm = vertex.y / page_h
323+
vertices_tok.append(
324+
DocumentToken.get_location_token(
325+
val=vertex_y_norm, rnorm=ysize, prefix=_REC_PREFIX
326+
)
327+
)
318328

319329
loc_str = ""
320330
for vertex_tok in vertices_tok:
321-
loc_str += f'{vertex_tok}'
331+
loc_str += f"{vertex_tok}"
322332

323333
return loc_str

0 commit comments

Comments
 (0)