-
Notifications
You must be signed in to change notification settings - Fork 0
/
doxa.py
122 lines (90 loc) · 3.66 KB
/
doxa.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
import cv2
import numpy as np
from doxapy import Binarization
from PIL import Image, ImageDraw, ImageFont
def read_image(file):
return np.array(Image.open(file).convert("L"))
def binarize(grayscale_image, algo, args={}, inplace=False):
if inplace:
Binarization.update_to_binary(algo, grayscale_image, args)
else:
binary_image = np.empty(grayscale_image.shape, grayscale_image.dtype) # output image buffer
binarization = Binarization(algo)
binarization.initialize(grayscale_image)
binarization.to_binary(binary_image, args)
return binary_image
def adaptive_otsu(path, blur=True, blockx=64, blocky=64, magic=0.74, out=None):
"""based on:
Fast Document Image Binarization Based on an Improved Adaptive Otsu's Method and Destination Word Accumulation
"""
from genutility.numpy import block2d, unblock, unblock2d
img = cv2.imread(path, 0)
a = img.shape[0] // blockx
b = img.shape[1] // blocky
blocks = unblock2d(img, blockx, blocky, blocksize=True)
assert np.array_equal(blocks, unblock(img, blockx, blocky, blocksize=True))
if out is None:
out = np.empty(blocks.shape, dtype=bool)
for i in range(blocks.shape[0]):
block_original = blocks[i]
if blur:
block_processed = cv2.GaussianBlur(block_original, (5, 5), 0)
else:
block_processed = block_original
thresh, fn_min = adaptive_otsu_block(block_processed)
if fn_min > magic:
block_binary = np.where(block_original < thresh, False, True)
else:
block_binary = np.ones(block_original.shape, dtype=bool)
out[i] = block_binary
return block2d(out, a, b, blockx, blocky)
def adaptive_otsu_block(block):
hist = cv2.calcHist([block], [0], None, [256], [0, 256])
hist_norm = hist.ravel() / hist.sum()
Q = hist_norm.cumsum()
bins = np.arange(256)
fn_min = np.inf
thresh = -1
for i in range(1, 256):
p1, p2 = np.hsplit(hist_norm, [i]) # probabilities
q1, q2 = Q[i], Q[255] - Q[i] # cum sum of classes
if q1 < 1.0e-6 or q2 < 1.0e-6:
continue
b1, b2 = np.hsplit(bins, [i]) # weights
# finding means and variances
m1, m2 = np.sum(p1 * b1) / q1, np.sum(p2 * b2) / q2
v1, v2 = np.sum(((b1 - m1) ** 2) * p1) / q1, np.sum(((b2 - m2) ** 2) * p2) / q2
fn = v1 * q1 + v2 * q2
if fn < fn_min:
fn_min = fn
thresh = i
return thresh, fn_min
def test_doxa(path):
font = ImageFont.truetype("arial.ttf", 36)
grayscale_image = read_image(path)
for algo, args in [
(Binarization.Algorithms.OTSU, {}),
(Binarization.Algorithms.BERNSEN, {}),
(Binarization.Algorithms.NIBLACK, {}),
(Binarization.Algorithms.SAUVOLA, {}),
(Binarization.Algorithms.WOLF, {}),
(Binarization.Algorithms.NICK, {}),
(Binarization.Algorithms.SU, {}),
(Binarization.Algorithms.TRSINGH, {}),
(Binarization.Algorithms.BATAINEH, {}),
(Binarization.Algorithms.ISAUVOLA, {}),
(Binarization.Algorithms.WAN, {}),
(Binarization.Algorithms.GATOS, {}),
]:
binary_image = binarize(grayscale_image, algo, args)
# Display our resulting image
img = Image.fromarray(binary_image)
draw = ImageDraw.Draw(img)
draw.text((0, 0), str(algo), color=0, font=font)
img.show()
if __name__ == "__main__":
path = "output-linear-512x512.png"
# test_doxa(path)
for size in (16, 32, 64, 128):
img = adaptive_otsu(path, blur=False, blockx=size, blocky=size)
Image.fromarray(img).save(f"adaptive-otsu_{size}.png")