-
Notifications
You must be signed in to change notification settings - Fork 16
/
Copy patheuler_angles.py
161 lines (128 loc) · 6.67 KB
/
euler_angles.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
"""
Author: Amr Elsersy
email: amrelsersay@gmail.com
-----------------------------------------------------------------------------------
Description: Head Pose Euler Angles(pitch yaw roll) estimation from 2D-3D correspondences landmarks
"""
import cv2
import numpy as np
class EulerAngles:
"""
Head Pose Estimation from landmarks annotations with solvePnP OpenCV
Pitch, Yaw, Roll Rotation angles (eular angles) from 2D-3D Correspondences Landmarks
Givin a General 3D Face Model (3D Landmarks) & annotations 2D landmarks
2D point = internsic * exterinsic * 3D point_in_world_space
if we have 2D - 3D correspondences & internsic camera matrix,
we can use cv2.solvPnP to get the extrensic matrix that convert the world_space to camera_3D_space
this extrensic matrix is considered as the 3 eular angles & translation vector
we can do that because the faces have similar 3D structure and emotions & iluminations dosn't affect the pose
Notes:
we can choose get any 3D coord from any 3D face model .. changing translation will not affect the angle
it will only inroduce a bigger tvec but the same rotation matrix
"""
def __init__(self, img_shape=(112,112) ):
# Lazy Estimation of Camera internsic Matrix Approximation
self.camera_intrensic_matrix = self.estimate_camera_matrix(img_shape)
# 3D Face model 3D landmarks
self.landmarks_3D = self.get_face_model_3D_landmarks()
def estimate_camera_matrix(self, img_shape):
# Used Weak Prespective projection as we assume near object with similar depths
# cx, cy the optical centres
# translation to image center as image center here is top left corner
# focal length is function of image size & Field of View (assumed to be 30 degree)
c_x = img_shape[0] / 2
c_y = img_shape[1] / 2
FieldOfView = 60
focal = c_x / np.tan(np.radians(FieldOfView/2))
# Approximated Camera intrensic matrix assuming weak prespective
return np.float32([
[focal, 0.0, c_x],
[0.0, focal, c_y],
[0.0, 0.0, 1.0]
])
def set_img_shape(self, img_shape):
self.camera_intrensic_matrix = self.estimate_camera_matrix(img_shape)
def get_face_model_3D_landmarks(self):
"""
General 3D Face Model Coordinates (3D Landmarks)
obtained from antrophometric measurement of the human head.
Returns:
-------
3D_Landmarks: numpy array of shape(N, 3) as N = 11 point in 3D
"""
# X-Y-Z with X pointing forward and Y on the left and Z up (same as LIDAR)
# OpenCV Coord X points to the right, Y down, Z to the front (same as 3D Camera)
landmarks_3D = np.float32([
[6.825897, 6.760612, 4.402142], # LEFT_EYEBROW_LEFT,
[1.330353, 7.122144, 6.903745], # LEFT_EYEBROW_RIGHT,
[-1.330353, 7.122144, 6.903745], # RIGHT_EYEBROW_LEFT,
[-6.825897, 6.760612, 4.402142], # RIGHT_EYEBROW_RIGHT,
[5.311432, 5.485328, 3.987654], # LEFT_EYE_LEFT,
[1.789930, 5.393625, 4.413414], # LEFT_EYE_RIGHT,
[-1.789930, 5.393625, 4.413414], # RIGHT_EYE_LEFT,
[-5.311432, 5.485328, 3.987654], # RIGHT_EYE_RIGHT,
[-2.005628, 1.409845, 6.165652], # NOSE_LEFT,
[-2.005628, 1.409845, 6.165652], # NOSE_RIGHT,
[2.774015, -2.080775, 5.048531], # MOUTH_LEFT,
[-2.774015, -2.080775, 5.048531], # MOUTH_RIGHT,
[0.000000, -3.116408, 6.097667], # LOWER_LIP,
[0.000000, -7.415691, 4.070434], # CHIN
])
return landmarks_3D
def eular_angles_from_landmarks(self, landmarks_2D):
"""
Estimates Euler angles from 2D landmarks
Parameters:
----------
landmarks_2D: numpy array of shape(N, 2) as N is num of landmarks (usualy 98 from WFLW)
Returns:
-------
rvec: rotation numpy array that transform model space to camera space (3D in both)
tvec: translation numpy array that transform model space to camera space
euler_angles: (pitch yaw roll) in degrees
"""
# WFLW(98 landmark) tracked points
TRACKED_POINTS_MASK = [33, 38, 50, 46, 60, 64, 68, 72, 55, 59, 76, 82, 85, 16]
landmarks_2D = landmarks_2D[TRACKED_POINTS_MASK]
"""
solve for extrensic matrix (rotation & translation) with 2D-3D correspondences
returns:
rvec: rotation vector (as rotation is 3 degree of freedom, it is represented as 3d-vector)
tvec: translate vector (world origin position relative to the camera 3d coord system)
_ : error -not important-.
"""
_, rvec, tvec = cv2.solvePnP(self.landmarks_3D, landmarks_2D, self.camera_intrensic_matrix, distCoeffs=None)
"""
note:
tvec is almost constant = the world origin coord with respect to the camera
avarage value of tvec = [-1,-2,-21]
we can use this directly without computing tvec
"""
# convert rotation vector to rotation matrix .. note: function is used for vice versa
# rotation matrix that transform from model coord(object model 3D space) to the camera 3D coord space
rotation_matrix, _ = cv2.Rodrigues(rvec)
# [R T] may be used in cv2.decomposeProjectionMatrix(extrinsic)[6]
extrensic_matrix = np.hstack((rotation_matrix, tvec))
# decompose the extrensic matrix to many things including the 3 eular angles
# (pitch yaw roll) in degrees
euler_angles = cv2.RQDecomp3x3(rotation_matrix)[0]
return rvec, tvec, euler_angles
if __name__ == "__main__":
from dataset import WFLW_Dataset
from visualization import WFLW_Visualizer
dataset = WFLW_Dataset(mode='train')
visualizer = WFLW_Visualizer()
eular_estimator = EulerAngles()
for i in range(len(dataset)):
image, labels = dataset[i]
landmarks = labels['landmarks']
rvec, tvec, euler_angles = eular_estimator.eular_angles_from_landmarks(landmarks)
image = visualizer.draw_euler_angles(image, rvec, tvec, euler_angles, eular_estimator.camera_intrensic_matrix)
print("rvec\n", rvec)
print("tvec\n", tvec)
print("euler ", euler_angles)
print ("*" * 80, '\n\n\t press n for next example .... ESC to exit')
visualizer.show(image)
if visualizer.user_press == 27:
cv2.destroyAllWindows()
break