-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtext_detec_and_recog.py
126 lines (97 loc) · 4.43 KB
/
text_detec_and_recog.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
#This code was viewed from this youtube channel: https://www.youtube.com/channel/UCwVQ-caNvMMRh2fIlG48U1Q
from imutils.object_detection import non_max_suppression
import numpy as np
import time
import cv2
import pytesseract
#---------------------------------
'''This next two lines are for setting up the neural net that will be used to pass the
photos that will be analyzed.'''
net = cv2.dnn.readNet('frozen_east_text_detection.pb')
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract'
def text_detect_and_recognition(img):
#This two lines are for preserving data (image, height, and width) of the original image
org = img
(H, W) = img.shape[:2]
#Setting the image to the correct parameters to be turned later into a blob
(newW, newH) = (640, 320)
rW = W / float(newW)
rH = H / float(newH)
image = cv2.resize(img, (newW, newH))
(H, W) = img.shape[:2]
#Layers of the neural net for detecting where the text is in the image
layerNames = [
"feature_fusion/Conv_7/Sigmoid",
"feature_fusion/concat_3"]
#Modifiying the image to the correct format in order to pass it through the neural net
blob = cv2.dnn.blobFromImage(img, 1.0, (W, H), (123.68, 116.78, 103.94), swapRB=True,
crop=False)
net.setInput(blob)
#These two variable represent the output of the neural net
(scores, geometry) = net.forward(layerNames)
(numRows, numCol) = scores.shape[2:4]
rects = []
confidences = []
'''This for loop is for choosing the rectangles that have sufficient
confidence as to if it does has text inside the rectangle. We save the parameters
of this rect.'''
for y in range(0, numRows):
scoresData = scores[0, 0, y]
xData0 = geometry[0, 0, y]
xData1 = geometry[0, 1, y]
xData2 = geometry[0, 2, y]
xData3 = geometry[0, 3, y]
anglesData = geometry[0, 4, y]
for x in range(0, numCol):
if scoresData[x] < 0.5:
continue
(offsetX, offsetY) = (x * 4.0, y * 4.0)
angle = anglesData[x]
cos = np.cos(angle)
sin = np.sin(angle)
h = xData0[x] + xData2[x]
w = xData1[x] + xData3[x]
endX = int(offsetX + (cos * xData1[x]) + (sin * xData2[x]))
endY = int(offsetY + (sin * xData1[x]) + (cos * xData2[x]))
startX = int(endX - w)
startY = int(endY - h)
rects.append((startX, startY, endX, endY))
confidences.append(scoresData[x])
boxes = non_max_suppression(np.array(rects), probs=confidences)
'''This for loop is for actually drawing the bounding box in the original image and
passing that region of interest (roi) through the pytesseract functions.'''
for (startX, startY, endX, endY) in boxes:
print(startX, startY, endX, endY )
newstartX = int(startX * rW)
newstartY = int(startY * rH)
newendX = int(endX * rW)
newendY = int(endY * rH)
boundary = 5
roi = org[newstartY - boundary: newendY + boundary, newstartX - boundary: newendX + boundary]
text = cv2.cvtColor(roi.astype(np.uint8), cv2.COLOR_BGR2GRAY)
cong = r'--oem 2'
textRecognized = pytesseract.image_to_string(text)
textRecognized = textRecognized.replace("\n", "")
textRecognized = textRecognized[:-1]
cv2.rectangle(org, (startX, startY), (endX, endY), (0, 255, 0), 2)
org = cv2.putText(org, textRecognized, (endX, endY+5), cv2.FONT_ITALIC, fontScale=0.5, color=(0, 0, 0))
return org
'''Here is the main part that calls the functions for detecting and recognicing the text
in the image.'''
img0 = cv2.imread('Capture.PNG')
img1 = cv2.imread('streetSigns.jpg')
img2 = cv2.imread('doNotEnter.jpg')
img5 = cv2.imread('waySigns.jpeg')
array = [img0, img1, img2,img5]
for i in range(0, 2):
for img in array:
imgO = cv2.resize(img, (640, 320), interpolation=cv2.INTER_AREA)
orig = cv2.resize(img, (640, 320), interpolation=cv2.INTER_AREA)
textDetected = text_detect_and_recognition(imgO)
cv2.imshow('Original Image', orig)
cv2.imshow("Text Detected", textDetected)
time.sleep(2)
k = cv2.waitKey(0)
if k == ord('q'):
continue
cv2.destroyAllWindows()