-
Notifications
You must be signed in to change notification settings - Fork 129
/
PPOCR_visualize.py
159 lines (146 loc) · 6.29 KB
/
PPOCR_visualize.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
# 将 PaddleOCR-json 结果可视化表现
# 项目主页:
# https://github.com/hiroi-sora/PaddleOCR-json
from PIL import Image, ImageDraw, ImageFont
import math
class visualize:
"""可视化"""
# ================================ 静态方法 ================================
@staticmethod
def createBox(textBlocks, size, fill="#00500040", outline="#11ff22", width=6):
"""创建包围盒图层,返回PIL Image对象。\n
:textBlocks: 文本块列表。\n
:size: 图片尺寸。\n
以下为可选字段:(颜色为十六进制6位RGB或8位RGBA字符串,如 #112233ff)\n
:fill: 包围盒填充颜色。\n
:outline: 包围盒轮廓颜色。\n
:width: 包围盒轮廓粗细,像素。
"""
img = Image.new("RGBA", size, 0)
draw = ImageDraw.Draw(img)
for tb in textBlocks:
box = [
tuple(tb["box"][0]),
tuple(tb["box"][1]),
tuple(tb["box"][2]),
tuple(tb["box"][3]),
]
draw.polygon(box, fill=fill, outline=outline, width=width)
return img
@staticmethod
def createText(
textBlocks,
size,
ttfPath="C:\Windows\Fonts\msyh.ttc",
ttfScale=0.9,
fill="#ff0000",
):
"""创建文字图层,返回PIL Image对象。\n
:textBlocks: 文本块列表。\n
:size: 图片尺寸。\n
以下为可选字段:\n
:ttfPath: 字体文件路径。默认为微软雅黑,若不存在此字体会报错。\n
:ttfScale: 字体大小整体缩放系数,应在1附近。\n
:fill: 文字颜色,十六进制6位RGB或8位RGBA字符串,如 #112233ff。\n
"""
img = Image.new("RGBA", size, 0)
draw = ImageDraw.Draw(img)
ttfDict = {} # 缓存不同大小的字体对象
for tb in textBlocks:
text = tb["text"]
xy = tuple(tb["box"][0]) # 左上角坐标
xy1 = tb["box"][3] # 左下角坐标# 行高
hight = round(
math.sqrt(((xy[0] - xy1[0]) ** 2) + ((xy[1] - xy1[1]) ** 2)) * ttfScale
)
if hight not in ttfDict:
ttfDict[hight] = ImageFont.truetype(ttfPath, hight) # 创建新大小的字体
draw.text(xy, text, font=ttfDict[hight], fill=fill)
return img
@staticmethod
def createOrder(
textBlocks,
size,
ttfPath="C:\Windows\Fonts\msyh.ttc",
ttfSize=50,
fill="#2233ff",
bg="#ffffffe0",
):
"""创建序号图层,返回PIL Image对象。\n
:textBlocks: 文本块列表。\n
:size: 图片尺寸。\n
以下为可选字段:\n
:ttfPath: 字体文件路径。默认为微软雅黑,若不存在此字体会报错。\n
:ttfSize: 字体大小。\n
:fill: 文字颜色,十六进制6位RGB或8位RGBA字符串,如 #112233ff。\n
"""
img = Image.new("RGBA", size, 0)
draw = ImageDraw.Draw(img)
ttf = ImageFont.truetype(ttfPath, ttfSize) # 字体
for index, tb in enumerate(textBlocks):
text = f"{index+1}"
xy = tuple(tb["box"][0]) # 左上角坐标
x_, y_, w, h = ttf.getbbox(text) # 获取宽高。只需要w和h
w *= 1.1
h *= 1.1
draw.rectangle((xy, (xy[0] + w, xy[1] + h)), fill=bg, width=0) # 背景矩形
draw.text(xy, text, font=ttf, fill=fill) # 文字
return img
@staticmethod
def createContrast(img1, img2):
"""左右拼合两个图片,创建对比图层,返回PIL Image对象。"""
size = (img1.size[0] + img2.size[0], max(img1.size[1], img2.size[1]))
img = Image.new("RGBA", size, 0)
img.paste(img1, (0, 0))
img.paste(img2, (img1.size[0], 0))
return img
@staticmethod
def composite(img1, img2):
"""传入两个PIL Image对象(RGBA格式),以img1为底,将img2叠加在其上
返回生成的图片"""
return Image.alpha_composite(img1, img2)
# ================================ 快捷接口 ================================
def __init__(self, textBlocks, imagePath):
"""创建可视化对象。\n
:textBlocks: 文本块列表,即OCR返回的data部分\n
:imagePath: 对应的图片路径。
"""
self.imgSource = Image.open(imagePath).convert("RGBA") # 原始图片图层
self.size = self.imgSource.size
self.imgBox = self.createBox(textBlocks, self.size) # 包围盒图层
self.imgText = self.createText(textBlocks, self.size) # 文字图层
self.imgOrder = self.createOrder(textBlocks, self.size) # 序号图层
def get(self, isBox=True, isText=False, isOrder=False, isSource=True):
"""返回合成可视化结果的PIL Image图像。\n
:isBox: T时返回包围盒图层。\n
:isText: T时返回文字图层。\n
:isOrder: T时返回序号图层。\n
:isSource: T时返回原图。F时返回透明背景的纯可视化结果。\n
"""
img = Image.new("RGBA", self.size, 0)
flags = (isSource, isBox, isText, isOrder)
for index, im in enumerate(
[self.imgSource, self.imgBox, self.imgText, self.imgOrder]
):
if im and flags[index]:
img = visualize.composite(img, im)
return img
def show(self, isBox=True, isText=False, isOrder=False, isSource=True):
"""显示可视化结果图像。\n
:isBox: T时返回包围盒图层。\n
:isText: T时返回文字图层。\n
:isOrder: T时返回序号图层。\n
:isSource: T时返回原图。F时返回透明背景的纯可视化结果。\n
"""
img = self.get(isBox, isText, isOrder, isSource)
img.show()
def save(self, path="", isBox=True, isText=False, isOrder=False, isSource=True):
"""保存可视化结果图像。\n
:path: 保存路径。\n
:isBox: T时返回包围盒图层。\n
:isText: T时返回文字图层。\n
:isOrder: T时返回序号图层。\n
:isSource: T时返回原图。F时返回透明背景的纯可视化结果。\n
"""
img = self.get(isBox, isText, isOrder, isSource)
img.save(path)