Skip to content

Commit

Permalink
PageType.get_AllRegions to list all kinds of regions
Browse files Browse the repository at this point in the history
  • Loading branch information
kba committed May 13, 2020
1 parent 42c1672 commit abef352
Show file tree
Hide file tree
Showing 3 changed files with 48 additions and 2 deletions.
18 changes: 16 additions & 2 deletions ocrd_models/ocrd_models/ocrd_page_generateds.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# -*- coding: utf-8 -*-

#
# Generated Wed May 13 16:09:07 2020 by generateDS.py version 2.35.20.
# Generated Wed May 13 20:41:21 2020 by generateDS.py version 2.35.20.
# Python 3.7.6 (default, Jan 8 2020, 19:59:22) [GCC 7.3.0]
#
# Command line options:
Expand Down Expand Up @@ -2850,7 +2850,21 @@ def buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collec
obj_.original_tagname_ = 'CustomRegion'
def __hash__(self):
return hash(self.id)
# end class PageType

def get_AllRegions(self, regions=None, reading_order=False):
""""
Get all the *Region element or only those provided by ``regions``.
Returned in random order unless ``reading_order`` is set (NOT CURRENTLY IMPLEMENTED)
"""
if reading_order:
raise NotImplementedError("Ordering of regions by Reading Order not currently Implemented :(")
if not regions:
regions = ['Advert', 'Chart', 'Chem', 'Custom', 'Graphic', 'Image', 'LineDrawing', 'Map', 'Maths', 'Music', 'Noise', 'Table', 'Text']
ret = []
for region in regions:
ret += getattr(self, 'get_{}Region'.format(region))()
return ret
# end class PageType


class CoordsType(GeneratedsSuper):
Expand Down
21 changes: 21 additions & 0 deletions ocrd_models/ocrd_page_user_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,26 @@ def show(self):
#
# Replace the following method specifications with your own.

#
# List all *Regions on the PAGE
#
get_AllRegions = MethodSpec(name='get_AllRegions',
source=r'''
def get_AllRegions(self, regions=None, reading_order=False):
""""
Get all the *Region element or only those provided by ``regions``.
Returned in random order unless ``reading_order`` is set (NOT CURRENTLY IMPLEMENTED)
"""
if reading_order:
raise NotImplementedError("Ordering of regions by Reading Order not currently Implemented :(")
if not regions:
regions = ['Advert', 'Chart', 'Chem', 'Custom', 'Graphic', 'Image', 'LineDrawing', 'Map', 'Maths', 'Music', 'Noise', 'Table', 'Text']
ret = []
for region in regions:
ret += getattr(self, 'get_{}Region'.format(region))()
return ret
''', class_names=r'^(PageType)$')

#
# List all *Indexed children sorted by @index
#
Expand Down Expand Up @@ -167,6 +187,7 @@ def __hash__(self):
exportChildren,
get_AllIndexed,
add_AllIndexed,
get_AllRegions,
clear_AllIndexed,
)

Expand Down
11 changes: 11 additions & 0 deletions tests/model/test_ocrd_page.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,5 +184,16 @@ def test_empty_groups_to_regionrefindexed(self):
children = og.get_AllIndexed()
self.assertTrue(isinstance(children[1], RegionRefIndexedType))

def test_all_regions(self):
"""
Corrolary See https://github.com/OCR-D/core/issues/475
"""
with open('tests/model/TEMP1_Gutachten2-2.xml', 'r') as f:
pcgts = parseString(f.read().encode('utf8'), silence=True)
pg = pcgts.get_Page()
self.assertEqual(len(pg.get_AllRegions()), 20)
self.assertEqual(len(pg.get_AllRegions(['Table'])), 3)
self.assertEqual(len(pg.get_AllRegions(['Text'])), 17)

if __name__ == '__main__':
main()

0 comments on commit abef352

Please sign in to comment.