Skip to content

Commit 84917db

Browse files
committed
feat: addition of testing for the BoundingRectangle
1 parent 9e2d467 commit 84917db

File tree

3 files changed

+579
-0
lines changed

3 files changed

+579
-0
lines changed

test/data/doc/rect_doc_test.gt.txt

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
<doctag><caption><rec_281><rec_81><rec_454><rec_168><rec_457><rec_164><rec_284><rec_77>Figure 2: New car sales by year as modelled by the TCM LS model run</caption>
2+
<picture><rec_180><rec_185><rec_378><rec_289><rec_464><rec_183><rec_261><rec_80></picture>
3+
<section_header_level_2><rec_175><rec_192><rec_186><rec_197><rec_189><rec_194><rec_177><rec_188>Notes:</section_header_level_2>
4+
<unordered_list><list_item><rec_154><rec_218><rec_348><rec_321><rec_364><rec_301><rec_170><rec_198> ICV = internal combustion vehicle, HEV = hybrid electric vehicle, PHEV = plug-in hybrid electric vehicle, EV = electric vehicle, FCV = fuel cell vehicle, E85 = 85% bio-ethanol plus 15% petrol blend, Biodiesel (2nd gen) ICV = pure (100%) second generation biodiesel.</list_item>
5+
<list_item><rec_147><rec_228><rec_313><rec_317><rec_316><rec_314><rec_149><rec_225> The peak and subsequent drop in new car sales in 2005-07 is based on observed data.</list_item>
6+
</unordered_list>
7+
<text><rec_68><rec_329><rec_259><rec_436><rec_329><rec_349><rec_134><rec_244>Two further lifestyle changes were simulated for cars. First, car buyers - whether private, fleet or business - are assumed to choose smaller cars instead of larger ones. This is simulated in UKTCM by phasing out the sale of new large cars (engine size >2.0 litres) by 2020 - starting in 2010, with linear interpolation between 2010 and 2020. Secondly, the tendency towards less overall car use and the increased membership of car clubs for use of a variety of types of cars for longer distance journeys is modelled endogenously in UKTCM by assuming significantly lower levels of maximum car ownership per household in urban and non-urban areas - about half of the reference value (TCM REF) for households owning ‘at least 2 cars’ and ‘at least</text>
8+
<page_footer><rec_149><rec_393><rec_155><rec_396><rec_158><rec_393><rec_152><rec_390>25</page_footer>
9+
</doctag>

test/data/doc/rect_doc_test.json

Lines changed: 315 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,315 @@
1+
{
2+
"schema_name": "DoclingDocument",
3+
"version": "1.3.0",
4+
"name": "00e80479997ace55a2e7cc4cb291411b81b2b4111e5be295b8e827436e917530",
5+
"furniture": {
6+
"self_ref": "#/furniture",
7+
"children": [],
8+
"content_layer": "furniture",
9+
"name": "_root_",
10+
"label": "unspecified"
11+
},
12+
"body": {
13+
"self_ref": "#/body",
14+
"children": [
15+
{
16+
"$ref": "#/texts/0"
17+
},
18+
{
19+
"$ref": "#/pictures/0"
20+
},
21+
{
22+
"$ref": "#/texts/1"
23+
},
24+
{
25+
"$ref": "#/groups/0"
26+
},
27+
{
28+
"$ref": "#/texts/4"
29+
},
30+
{
31+
"$ref": "#/texts/5"
32+
}
33+
],
34+
"content_layer": "body",
35+
"name": "_root_",
36+
"label": "unspecified"
37+
},
38+
"groups": [
39+
{
40+
"self_ref": "#/groups/0",
41+
"parent": {
42+
"$ref": "#/body"
43+
},
44+
"children": [
45+
{
46+
"$ref": "#/texts/2"
47+
},
48+
{
49+
"$ref": "#/texts/3"
50+
}
51+
],
52+
"content_layer": "body",
53+
"name": "list",
54+
"label": "list"
55+
}
56+
],
57+
"texts": [
58+
{
59+
"self_ref": "#/texts/0",
60+
"parent": {
61+
"$ref": "#/body"
62+
},
63+
"children": [],
64+
"content_layer": "body",
65+
"label": "caption",
66+
"prov": [
67+
{
68+
"page_no": 1,
69+
"bbox": {
70+
"r_x0": 509.66453018188474,
71+
"r_y0": 195.2655132293701,
72+
"r_x1": 823.4733366966248,
73+
"r_y1": 405.85461616516113,
74+
"r_x2": 828.5433065414428,
75+
"r_y2": 397.4555854797363,
76+
"r_x3": 514.4958744049072,
77+
"r_y3": 186.9875347137451,
78+
"coord_origin": "TOPLEFT"
79+
},
80+
"charspan": [
81+
0,
82+
0
83+
]
84+
}
85+
],
86+
"orig": "Figure 2: New car sales by year as modelled by the TCM LS model run",
87+
"text": "Figure 2: New car sales by year as modelled by the TCM LS model run"
88+
},
89+
{
90+
"self_ref": "#/texts/1",
91+
"parent": {
92+
"$ref": "#/body"
93+
},
94+
"children": [],
95+
"content_layer": "body",
96+
"label": "section_header",
97+
"prov": [
98+
{
99+
"page_no": 1,
100+
"bbox": {
101+
"r_x0": 317.1496463298798,
102+
"r_y0": 463.0926097869873,
103+
"r_x1": 337.6816155910492,
104+
"r_y1": 477.51476097106934,
105+
"r_x2": 342.38450860977173,
106+
"r_y2": 469.4521385192871,
107+
"r_x3": 321.8373177051544,
108+
"r_y3": 455.03806228637694,
109+
"coord_origin": "TOPLEFT"
110+
},
111+
"charspan": [
112+
0,
113+
0
114+
]
115+
}
116+
],
117+
"orig": "Notes:",
118+
"text": "Notes:",
119+
"level": 2
120+
},
121+
{
122+
"self_ref": "#/texts/2",
123+
"parent": {
124+
"$ref": "#/groups/0"
125+
},
126+
"children": [],
127+
"content_layer": "body",
128+
"label": "list_item",
129+
"prov": [
130+
{
131+
"page_no": 1,
132+
"bbox": {
133+
"r_x0": 279.7723736286163,
134+
"r_y0": 527.31781539917,
135+
"r_x1": 630.5829946517944,
136+
"r_y1": 776.2454526901245,
137+
"r_x2": 660.1747346878052,
138+
"r_y2": 727.3397632598877,
139+
"r_x3": 307.7846315860748,
140+
"r_y3": 479.18461761474606,
141+
"coord_origin": "TOPLEFT"
142+
},
143+
"charspan": [
144+
0,
145+
0
146+
]
147+
}
148+
],
149+
"orig": "\uf0b7 ICV = internal combustion vehicle, HEV = hybrid electric vehicle, PHEV = plug-in hybrid electric vehicle, EV = electric vehicle, FCV = fuel cell vehicle, E85 = 85% bio-ethanol plus 15% petrol blend, Biodiesel (2nd gen) ICV = pure (100%) second generation biodiesel.",
150+
"text": "\uf0b7 ICV = internal combustion vehicle, HEV = hybrid electric vehicle, PHEV = plug-in hybrid electric vehicle, EV = electric vehicle, FCV = fuel cell vehicle, E85 = 85% bio-ethanol plus 15% petrol blend, Biodiesel (2nd gen) ICV = pure (100%) second generation biodiesel.",
151+
"enumerated": false,
152+
"marker": "-"
153+
},
154+
{
155+
"self_ref": "#/texts/3",
156+
"parent": {
157+
"$ref": "#/groups/0"
158+
},
159+
"children": [],
160+
"content_layer": "body",
161+
"label": "list_item",
162+
"prov": [
163+
{
164+
"page_no": 1,
165+
"bbox": {
166+
"r_x0": 265.81266660690306,
167+
"r_y0": 551.3047462463379,
168+
"r_x1": 567.6941934585572,
169+
"r_y1": 766.3243297576904,
170+
"r_x2": 572.5692830085754,
171+
"r_y2": 758.2244688034058,
172+
"r_x3": 270.4625407218933,
173+
"r_y3": 543.3150146484375,
174+
"coord_origin": "TOPLEFT"
175+
},
176+
"charspan": [
177+
0,
178+
0
179+
]
180+
}
181+
],
182+
"orig": "\uf0b7 The peak and subsequent drop in new car sales in 2005-07 is based on observed data.",
183+
"text": "\uf0b7 The peak and subsequent drop in new car sales in 2005-07 is based on observed data.",
184+
"enumerated": false,
185+
"marker": "-"
186+
},
187+
{
188+
"self_ref": "#/texts/4",
189+
"parent": {
190+
"$ref": "#/body"
191+
},
192+
"children": [],
193+
"content_layer": "body",
194+
"label": "text",
195+
"prov": [
196+
{
197+
"page_no": 1,
198+
"bbox": {
199+
"r_x0": 123.34899322986602,
200+
"r_y0": 796.0989818572998,
201+
"r_x1": 470.6539724349975,
202+
"r_y1": 1053.1127815246582,
203+
"r_x2": 596.6871794700622,
204+
"r_y2": 844.9434242248535,
205+
"r_x3": 242.61499013900755,
206+
"r_y3": 591.1650844573975,
207+
"coord_origin": "TOPLEFT"
208+
},
209+
"charspan": [
210+
0,
211+
0
212+
]
213+
}
214+
],
215+
"orig": "Two further lifestyle changes were simulated for cars. First, car buyers - whether private, fleet or business - are assumed to choose smaller cars instead of larger ones. This is simulated in UKTCM by phasing out the sale of new large cars (engine size >2.0 litres) by 2020 - starting in 2010, with linear interpolation between 2010 and 2020. Secondly, the tendency towards less overall car use and the increased membership of car clubs for use of a variety of types of cars for longer distance journeys is modelled endogenously in UKTCM by assuming significantly lower levels of maximum car ownership per household in urban and non-urban areas - about half of the reference value (TCM REF) for households owning \u2018at least 2 cars\u2019 and \u2018at least",
216+
"text": "Two further lifestyle changes were simulated for cars. First, car buyers - whether private, fleet or business - are assumed to choose smaller cars instead of larger ones. This is simulated in UKTCM by phasing out the sale of new large cars (engine size >2.0 litres) by 2020 - starting in 2010, with linear interpolation between 2010 and 2020. Secondly, the tendency towards less overall car use and the increased membership of car clubs for use of a variety of types of cars for longer distance journeys is modelled endogenously in UKTCM by assuming significantly lower levels of maximum car ownership per household in urban and non-urban areas - about half of the reference value (TCM REF) for households owning \u2018at least 2 cars\u2019 and \u2018at least"
217+
},
218+
{
219+
"self_ref": "#/texts/5",
220+
"parent": {
221+
"$ref": "#/body"
222+
},
223+
"children": [],
224+
"content_layer": "body",
225+
"label": "page_footer",
226+
"prov": [
227+
{
228+
"page_no": 1,
229+
"bbox": {
230+
"r_x0": 270.87814836502076,
231+
"r_y0": 950.4819365501403,
232+
"r_x1": 281.0608870983124,
233+
"r_y1": 958.0538795471191,
234+
"r_x2": 285.7220084667206,
235+
"r_y2": 950.2045403480529,
236+
"r_x3": 275.53167243003844,
237+
"r_y3": 942.6361121177673,
238+
"coord_origin": "TOPLEFT"
239+
},
240+
"charspan": [
241+
0,
242+
0
243+
]
244+
}
245+
],
246+
"orig": "25",
247+
"text": "25"
248+
}
249+
],
250+
"pictures": [
251+
{
252+
"self_ref": "#/pictures/0",
253+
"parent": {
254+
"$ref": "#/body"
255+
},
256+
"children": [],
257+
"content_layer": "body",
258+
"label": "picture",
259+
"prov": [
260+
{
261+
"page_no": 1,
262+
"bbox": {
263+
"r_x0": 326.52861199378964,
264+
"r_y0": 446.9771701812744,
265+
"r_x1": 685.4023781776428,
266+
"r_y1": 698.4126926422119,
267+
"r_x2": 841.0731253623962,
268+
"r_y2": 441.2905300140381,
269+
"r_x3": 473.6983551979065,
270+
"r_y3": 194.0965919494629,
271+
"coord_origin": "TOPLEFT"
272+
},
273+
"charspan": [
274+
0,
275+
0
276+
]
277+
}
278+
],
279+
"captions": [],
280+
"references": [],
281+
"footnotes": [],
282+
"image": {
283+
"mimetype": "image/png",
284+
"dpi": 72,
285+
"size": {
286+
"width": 871.0,
287+
"height": 570.0
288+
},
289+
"uri": "GroundTruthPictures/0"
290+
},
291+
"annotations": []
292+
}
293+
],
294+
"tables": [],
295+
"key_value_items": [],
296+
"form_items": [],
297+
"pages": {
298+
"1": {
299+
"size": {
300+
"width": 907.0,
301+
"height": 1209.0
302+
},
303+
"image": {
304+
"mimetype": "image/png",
305+
"dpi": 72,
306+
"size": {
307+
"width": 907.0,
308+
"height": 1209.0
309+
},
310+
"uri": "GroundTruthPageImages/1"
311+
},
312+
"page_no": 1
313+
}
314+
}
315+
}

0 commit comments

Comments
 (0)