-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsample.py
159 lines (125 loc) · 5.63 KB
/
sample.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
#!/usr/bin/python
#
# Real-time facial feature detection with 68 landmarks, including
# corners of the mouth, eyebrows, nose, and eyes.
#
# Histogram of Oriented Gradients (HOG) features, linear classifier, image pyramid,
# and sliding windows.
#
# Pose estimator taken from dlib's implementation of
# One Millisecond Face Alignment with an Ensemble of Regression Trees,
# Kazemi and Sullivan, CVPR 2014,
# trained on the iBUG 300-W facial landmark dataset.
#
# Train your own models using dlib's tools (e.g., train_shape_predictor.py).
#
# Trained model can be obtained from:
# http://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2
import sys
import os
import dlib
import glob
import time
import numpy as np
import pandas as pd
import cv2
from utils import reshape_data, print_bbox, print_parts, plot_bbox, plot_landmarks
def sample():
if len(sys.argv) < 2:
predictor_path = "models/face_predictor.dat"
elif len(sys.argv) == 2:
predictor_path = sys.argv[1]
else:
print(
"\nUsage:\n"
"python sample.py models/face_predictor.dat\n"
"\nExample face predictor:\n"
"http://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2\n")
exit()
# Load trained facial model
detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor(predictor_path)
# Request user input for the label of the desired class in the training data
try:
label = int(raw_input('Enter the number corresponding to the label of the action you would like to record.\n'))
except ValueError:
print "\nPlease enter an integer.\n"
# Request user input for the size of the sliding window
try:
time_window = int(raw_input('Enter the duration (in number of frames) of the action. This must match the duration of other actions in the data.\n'))
except ValueError:
print "\nPlease enter an integer greater than 0.\n"
# Initialize video input
vc = cv2.VideoCapture(0)
cv2.namedWindow("preview")
if vc.isOpened():
rval, img = vc.read()
else:
rval = False
frame_count = 0
start = time.time()
# Initialize trajectories of top/left points of the facial position-in-frame
top_trajectory = np.array([])
left_trajectory = np.array([])
# Record data for the desired class
try:
while rval:
frame_count += 1
rval, img = vc.read()
img = np.asarray(img)
# Ask the detector to find the bounding boxes of each face.
# The argument of 1 indicates that we should upsample the image 1 time.
# A higher number allows for the detection of more faces.
# This step accounts for the majority of computation time.
dets = detector(img, 0)
# print("Number of faces detected: {}".format(len(dets)))
if len(dets) > 0:
for k, d in enumerate(dets):
print_bbox(img, k, d)
########################################
# FEATURE EXTRACTION
########################################
shape = predictor(img, d)
# print_parts(shape)
# Extract (x, y) coordinates of facial landmarks
parts = [[shape.part(n).x, shape.part(n).y] for n in range(shape.num_parts)]
parts = np.asarray(parts).astype(int)
# Extract facial position-in-frame
top = d.top()
left = d.left()
# Compute landmark coordinates with respect to position-in-frame
# to enforce translation invariance of features (roughly, due to noise)
parts_x = parts.T[0] - d.left()
parts_y = parts.T[1] - d.top()
# Create feature vector
# Continue stacking features here as needed
features = np.hstack((parts_x, parts_y))
top_trajectory = np.hstack((top_trajectory, top))
left_trajectory = np.hstack((left_trajectory, left))
if frame_count >= time_window:
# Append the variance of position-in-frame to the end of each window
features = np.hstack((features, np.var(top_trajectory)))
features = np.hstack((features, np.var(left_trajectory)))
# Reset trajectories and frame counter
top_trajectory = np.array([])
left_trajectory = np.array([])
frame_count = 0
# Append feature vector to csv
pd.DataFrame(features).to_csv('./data/train.csv', mode='a', header=False, index=False)
# Plot left, right, top, bottom coordinates of detected face
plot_bbox(img, d.left(), d.right(), d.top(), d.bottom(), color=(0, 255, 255))
plot_landmarks(img, parts, black_bg=True, color=(0, 255, 255), resolution=(480, 640))
key = cv2.waitKey(1)
if time.time() - start >= 1:
print frame_count
start = time.time()
# Stop recording when user inputs Ctrl-c
except KeyboardInterrupt:
pass
# Add 2 to account for the variance of position-in-frame
features_per_window = 136*time_window + 2
reshape_data(label, features_per_window)
os.system("rm ./data/train.csv")
print "Done."
if __name__ == "__main__":
sample()