-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathObjectDetection.py
172 lines (130 loc) · 5.45 KB
/
ObjectDetection.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
import cv2 as cv
import numpy as np
cap = cv.VideoCapture(0)
whT = 320
confThreshold = 0.5
nmsThreshold = 0.2
classesFile = "coco.names"
classNames = []
with open(classesFile, 'rt') as f:
classNames = f.read().rstrip('\n').split('\n')
#print(len(classNames))
modelConfiguration = 'yolov3.cfg' #yolov3_320 is the best trade off between speed and accuracy
modelWeights = 'yolov3.weights'
net = cv.dnn.readNetFromDarknet(modelConfiguration, modelWeights)
net.setPreferableBackend(cv.dnn.DNN_BACKEND_OPENCV)
net.setPreferableTarget(cv.dnn.DNN_TARGET_CPU)
def findObjects(outputs,img):
hT, wT, cT = img.shape
bbox = [] # bounding box corner points
classIds = [] # class id with the highest confidence
confs = [] # confidence value of the highest class
for output in outputs:
for det in output:
scores = det[5:]
classId = np.argmax(scores)
confidence = scores[classId]
if confidence > confThreshold:
w,h = int(det[2]*wT) , int(det[3]*hT)
x,y = int((det[0]*wT)-w/2) , int((det[1]*hT)-h/2)
bbox.append([x,y,w,h])
classIds.append(classId)
confs.append(float(confidence))
indices = cv.dnn.NMSBoxes(bbox, confs, confThreshold, nmsThreshold)
for i in indices:
i = i[0]
box = bbox[i]
x, y, w, h = box[0], box[1], box[2], box[3]
# print(x,y,w,h)
cv.rectangle(img, (x, y), (x+w,y+h), (255, 0 , 255), 2)
cv.putText(img,f'{classNames[classIds[i]].upper()} {int(confs[i]*100)}%',
(x, y-10), cv.FONT_HERSHEY_SIMPLEX, 0.6, (255, 0, 255), 2)
while True:
success, img = cap.read()
blob = cv.dnn.blobFromImage(img, 1 / 255, (whT, whT), [0, 0, 0], 1, crop=False)
net.setInput(blob)
layerNames = net.getLayerNames()
#print(len(layerNames))
#print(net.getUnconnectedOutLayers())
outputNames = [layerNames[i[0]-1] for i in net.getUnconnectedOutLayers()]
#print(outputNames)
outputs = net.forward(outputNames)
# print(outputs[0].shape)
# print(outputs[1].shape)
# print(outputs[2].shape)
# print(outputs[0][0])
findObjects(outputs,img)
cv.imshow("image", img)
if cv.waitKey(1) == 27 :
break
cap.release()
cv.destroyAllWindows()
# import cv2
# import numpy as np
# import time
# # A required callback method that goes into the trackbar function.
# def nothing(x):
# pass
# # Initializing the webcam feed.
# cap = cv2.VideoCapture(0)
# cap.set(3,1280)
# cap.set(4,720)
# # Create a window named trackbars.
# cv2.namedWindow("Trackbars")
# # Now create 6 trackbars that will control the lower and upper range of
# # H,S and V channels. The Arguments are like this: Name of trackbar,
# # window name, range,callback function. For Hue the range is 0-179 and
# # for S,V its 0-255.
# cv2.createTrackbar("L - H", "Trackbars", 0, 179, nothing)
# cv2.createTrackbar("L - S", "Trackbars", 0, 255, nothing)
# cv2.createTrackbar("L - V", "Trackbars", 0, 255, nothing)
# cv2.createTrackbar("U - H", "Trackbars", 179, 179, nothing)
# cv2.createTrackbar("U - S", "Trackbars", 255, 255, nothing)
# cv2.createTrackbar("U - V", "Trackbars", 255, 255, nothing)
# while True:
# # Start reading the webcam feed frame by frame.
# ret, frame = cap.read()
# if not ret:
# break
# # Flip the frame horizontally (Not required)
# frame = cv2.flip( frame, 1 )
# # Convert the BGR image to HSV image.
# hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
# # Get the new values of the trackbar in real time as the user changes
# # them
# l_h = cv2.getTrackbarPos("L - H", "Trackbars")
# l_s = cv2.getTrackbarPos("L - S", "Trackbars")
# l_v = cv2.getTrackbarPos("L - V", "Trackbars")
# u_h = cv2.getTrackbarPos("U - H", "Trackbars")
# u_s = cv2.getTrackbarPos("U - S", "Trackbars")
# u_v = cv2.getTrackbarPos("U - V", "Trackbars")
# # Set the lower and upper HSV range according to the value selected
# # by the trackbar
# lower_range = np.array([l_h, l_s, l_v])
# upper_range = np.array([u_h, u_s, u_v])
# # Filter the image and get the binary mask, where white represents
# # your target color
# mask = cv2.inRange(hsv, lower_range, upper_range)
# # You can also visualize the real part of the target color (Optional)
# res = cv2.bitwise_and(frame, frame, mask=mask)
# # Converting the binary mask to 3 channel image, this is just so
# # we can stack it with the others
# mask_3 = cv2.cvtColor(mask, cv2.COLOR_GRAY2BGR)
# # stack the mask, orginal frame and the filtered result
# stacked = np.hstack((mask_3,frame,res))
# # Show this stacked frame at 40% of the size.
# cv2.imshow('Trackbars',cv2.resize(stacked,None,fx=0.4,fy=0.4))
# # If the user presses ESC then exit the program
# key = cv2.waitKey(1)
# if key == 27:
# break
# # If the user presses `s` then print this array.
# if key == ord('s'):
# thearray = [[l_h,l_s,l_v],[u_h, u_s, u_v]]
# print(thearray)
# # Also save this array as penval.npy
# np.save('hsv_value',thearray)
# break
# # Release the camera & destroy the windows.
# cap.release()
# cv2.destroyAllWindows()