-
Notifications
You must be signed in to change notification settings - Fork 3
/
test_text_box.py
47 lines (43 loc) · 2.06 KB
/
test_text_box.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
from skimage import io
import unittest
from resources import Resources
import context
from pyamiimage.tesseract_hocr import TesseractOCR
from pyamiimage.text_box import TextBox
class TestTextBox:
def setup_method(self, method):
self.cropped1_hocr = TesseractOCR.hocr_from_image_path(
Resources.BIOSYNTH1_CROPPED_PNG
)
self.cropped1_elem = TesseractOCR.parse_hocr_string(self.cropped1_hocr)
self.biosynth2_hocr = TesseractOCR.hocr_from_image_path(Resources.BIOSYNTH2_RAW)
self.biosynth2_elem = TesseractOCR.parse_hocr_string(self.biosynth2_hocr)
self.biosynth3_hocr = TesseractOCR.hocr_from_image_path(Resources.BIOSYNTH3_RAW)
self.biosynth3_elem = TesseractOCR.parse_hocr_string(self.biosynth3_hocr)
@unittest.skip("nothing extracted; BUG but needs reparametrising")
def test_extract_phrases_boxes0(self):
phrases, bboxes = TesseractOCR.find_phrases(self.cropped1_elem)
assert len(bboxes) == 12
assert phrases[0] == "Isomerase (?)"
@unittest.skip("nothing extracted; BUG but needs reparametrising")
def test_extract_text_bboxes(self):
# TODO repplace bboxes with whitespace
text_boxes = TextBox.find_text_boxes(self.cropped1_elem)
assert len(text_boxes) == 12
assert type(text_boxes[0]) is TextBox
assert text_boxes[0].text == "Isomerase (?)"
assert text_boxes[0].bbox.xy_ranges == [[684, 843], [38, 65]]
assert text_boxes[4].text == "Dimethylallyl diphosphate"
assert text_boxes[4].bbox.xy_ranges == [[895, 1214], [70, 98]]
assert text_boxes[10].text == "GPP synthase"
assert text_boxes[10].bbox.xy_ranges == [[568, 732], [281, 308]]
for text_box in text_boxes:
# print(text_box.text)
pass
def test_extract_text_path2(self):
text_boxes = TextBox.find_text_boxes(self.biosynth2_elem)
# this is system dependent
assert 65 <= len(text_boxes) <= 67
for text_box in text_boxes:
# print(text_box.text, text_box.bbox)
pass