forked from Nielsencu/ripplecreate-attention-model
-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
257 lines (203 loc) · 8.92 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
'''Human Head Pose Estimation Demo
1. Detect human face in the video frame.
2. Run facial landmark detection on the face image.
3. Estimate the pose by solving a PnP problem.
To find more details, please refer to:
https://github.com/yinguobing/head-pose-estimation
'''
from argparse import ArgumentParser
import math
import time
import cv2
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from imutils import face_utils
import dlib
from src.pose_estimator import PoseEstimator
from src.landmark_metrics import eye_aspect_ratio, mouth_aspect_ratio
from src.database import Database
from src.eyes import update_eyes
from src.yawn import update_yawn
from src.message import Message
from src.video import Camera
print(__doc__)
print('OpenCV version: {}'.format(cv2.__version__))
# Parse arguments from user input
parser = ArgumentParser()
parser.add_argument('--video', type=str, default=None,
help='Video file to be processed.')
parser.add_argument('--cam', type=int, default=None,
help='The webcam index.')
args = parser.parse_args()
# FPS
prev_frame_time, cur_frame_time = 0, 0
# Attention Metrics
attn = 100
attn_span = pd.DataFrame(columns=['timestamp', 'attention'])
looking_away = None
# Blink Detection
eyes_closed, eyes_already_closed = False, False
start_eyes_closed, time_eyes_closed = 0, 0
# Yawn detection
mouth_open, mouth_already_open = False, False
start_mouth_open, time_mouth_open = 0, 0
# Landmarks Detection
pretrained_landmarks = r'assets/shape_predictor_68_face_landmarks.dat'
detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor(pretrained_landmarks)
(left_eye_start, left_eye_end) = face_utils.FACIAL_LANDMARKS_68_IDXS['left_eye']
(right_eye_start, right_eye_end) = face_utils.FACIAL_LANDMARKS_68_IDXS['right_eye']
(mouth_start, mouth_end) = face_utils.FACIAL_LANDMARKS_68_IDXS['mouth']
def draw_text(image, label, coords=(50,50)):
cv2.putText(
img=image,
text=label,
org=(coords[0], coords[1]),
fontFace=cv2.FONT_HERSHEY_SIMPLEX,
fontScale=0.8,
color=(0, 0, 255),
)
if __name__ == '__main__':
# Setup the video source from webcam or video file
video_src = args.cam if args.cam is not None else args.video
cap = Camera(video_src)
# Get the frame size. This will be used by the pose estimator.
width, height = cap.get_frame_size()
# Initialize pose estimator
pose_estimator = PoseEstimator(img_size=(height, width))
# Initialize variables sent to database every 5 seconds
poses = []
avg_time_eyes_closed = 0
avg_time_mouth_open = 0
avg_attention = 0
count = 0
yawn_duration = 0
last_sent = time.time()
# Initialize database connection
db = Database('Meeting102')
# Initialize real-time message sent to database
message = Message()
detector = dlib.get_frontal_face_detector()
while True:
start = time.time()
# Read a frame
frame_got, frame = cap.get_frame()
if frame_got is False:
break
# Converting the image to grayscale
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
# Get faces into webcam's image
rects = detector(gray, 0)
cur_time = pd.Timestamp.now(tz='Asia/Singapore').strftime('%d-%m-%Y %H:%M:%S.%f')
if not(rects):
attn = max(attn-0.1,0)
looking_away = None
eyes_closed = False
yawn = False
pose = None
else:
# For each detected face, find the landmarks
for (i, rect) in enumerate(rects):
# Make the prediction and transfom it to numpy array
shape = predictor(gray, rect)
shape = np.array([(shape.part(i).x, shape.part(i).y) for i in range(68)], dtype='f')
# Draw all landmark (x,y) coordinate points
#for (x, y) in shape:
# cv2.circle(frame, (int(x), int(y)), 2, (0, 255, 0), -1)
# Estimate pose using the 68 facial landmarks
pose = pose_estimator.solve_pose_by_68_points(shape)
# View pose annotation cube
# pose_estimator.draw_annotation_box(
# frame, pose[0], pose[1], color=(0, 255, 0))
# View head axes
# pose_estimator.draw_axes(frame, pose[0], pose[1])
pose = pose[0]
# Convert to angles
pose = [ith_pose[0] * 180 / math.pi for ith_pose in pose]
# View marks
# draw_marks(frame, marks, color=(0, 255, 0))
# View facebox
# draw_box(frame, [facebox])
#--- ATTENTION ALGORITHMS ---#
print(f'[{cur_time}] (x,y,z): ({pose[0]:.2f}, {pose[1]:.2f}, {pose[2]:.2f})')
# Attention Thresholds
face_left_right_threshold = 20
face_up_threshold = 30
face_down_threshold = 20
attn_change = 0.5
EAR_threshold = 0.25
# Eye Aspect Ratio (EAR)
left_eye = shape[left_eye_start:left_eye_end]
right_eye = shape[right_eye_start:right_eye_end]
left_EAR = eye_aspect_ratio(left_eye)
right_EAR = eye_aspect_ratio(right_eye)
EAR = (left_EAR + right_EAR) / 2.0
eyes_closed = EAR < EAR_threshold
print(f'LEFT: {left_EAR:.3f}, RIGHT: {right_EAR:.3f}, CLOSED: {eyes_closed}')
# Mouth Aspect Ratio (MAR)
mouth = shape[mouth_start:mouth_end]
MAR = mouth_aspect_ratio(mouth)
mouth_open = MAR > 0.3
print(f'YAWN: {MAR}')
time_eyes_closed, start_eyes_closed, eyes_already_closed = update_eyes(eyes_closed, time_eyes_closed, eyes_already_closed, start_eyes_closed)
time_mouth_open, start_mouth_open, mouth_already_open = update_yawn(mouth_open, time_mouth_open, mouth_already_open, start_mouth_open)
# Add/Deduct Attention based on the thresholds
if (-face_left_right_threshold < pose[0] < face_left_right_threshold) \
and (-face_down_threshold < pose[1] < face_up_threshold) \
and time_eyes_closed < 2 \
and time_mouth_open < 1:
attn = min(100, attn + attn_change / 2)
looking_away = False
else:
attn = max(attn-attn_change,0)
looking_away = True
print('-------------------------------------------------------------------------------')
# Calculate FPS
cur_frame_time = time.time()
fps = 1/(cur_frame_time-prev_frame_time)
prev_frame_time = cur_frame_time
# Display metrics on the screen
draw_text(frame, f'FPS: {int(fps)}', coords=(30,30))
attentiveness = 'Please keep your face within the screen' if looking_away is None else f'Looking Away: {looking_away}'
draw_text(frame, attentiveness, coords=(30,60))
draw_text(frame, f'Attention: {attn:.2f}%', coords=(30,90))
eyes_closed_text = f'{time_eyes_closed:.2f}s' if eyes_closed else ''
draw_text(frame, f'Eyes Closed: {eyes_closed} {eyes_closed_text}', coords=(30,120))
mouth_opened_text = f'{time_mouth_open:.2f}s' if mouth_open else ''
draw_text(frame, f'Yawn: {mouth_open} {mouth_opened_text}', coords=(30,150))
# Update message
message.update(pose, attn, time_eyes_closed, time_mouth_open)
# Save value every 5 seconds to database
time_now = time.time()
if time_now - last_sent > 5:
last_sent = time_now
# Finalize message by taking average, or median for pose
message.finalize(cur_time)
# Send message to database
db.send(message.as_dict())
# Reset all variables in message
message.reset()
attn_span = pd.concat([
attn_span,
pd.DataFrame.from_dict({
'timestamp': [cur_time],
'attention': [attn],
'eyes_closed': [time_eyes_closed],
})
], ignore_index=True)
# Show preview of the webcam/video feed
app_name = 'Attention Tracker'
cv2.imshow(app_name, frame)
if cv2.waitKey(1) == 27 or cv2.getWindowProperty(app_name, cv2.WND_PROP_VISIBLE) < 1:
print(f'\nShutting Down {app_name}...')
break
cap.release()
cv2.destroyAllWindows()
# Save results into CSV and plot graphs
attn_span['timestamp'] = pd.to_datetime(attn_span['timestamp'])
attn_span = attn_span.set_index('timestamp')
# attn_span.to_csv(r'assets/attn.csv', index=True) # Temporarily avoid saving to csv
attn_span['attention'].plot.line(label='Attention (%)', legend=True)
attn_span['eyes_closed'].plot.line(label='Eyes Closed (s)', secondary_y=True, legend=True)
plt.show()