-
Notifications
You must be signed in to change notification settings - Fork 14
/
Copy pathdetect_emotions.py
111 lines (89 loc) · 4.71 KB
/
detect_emotions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
This file applies the trained emotion classifier to images and videos. It first extracts all faces from the
images/frames using the FaceFinder class. It then applies the necessary transformations (e.g. Grayscale) on the image
and passes it to the FacePredictor class (which wraps the capabilities of the emotion classifier to return the predicted
emotion label along with its softmax probability).
"""
from argparse import ArgumentParser
import numpy as np
from PIL import Image, ImageDraw
from facenet_pytorch import MTCNN
from model import FaceFinder, FacePredictor, load_model
from utils import FontLoader
# define RGB color constants
RED = (200, 0, 0)
GREEN = (0, 128, 0)
BLUE = (0, 0, 255)
PURPLE = (148, 0, 211)
GRAY = (128, 128, 128)
WHITE = (255, 255, 255)
# map each emotion to the color used for the bounding box
EMOTION_COLOR_MAP = {'angry': RED, 'happy': GREEN, 'surprised': BLUE, 'disgusted': PURPLE, 'sad': GRAY}
def process_image(face_images, boxes, predict, draw, font=None, font_loader=None, padding=5, line_width=3):
"""Draws a bounding box around each face in the image with the emotion label and confidence level
Parameters
----------
face_images: Image or list[Image]
Either a single PIL Image of a face or a list of Image objects to process.
boxes: np.ndarray
A numpy array containing the bounding box extents for each face in face_images within the overall image.
This argument is aligned with face_images. Bounding box is in (x_min, y_min, x_max, y_max) format.
If face_images is a PIL Image then boxes should have only four elements.
If face_images is a list of Image objects then boxes should have shape (n, 4)
predict: nn.Module
The emotion detector model that will be applied to each face image
draw: ImageDraw
The ImageDraw object for the image containing all of the faces. This object is modified in-place.
font: ImageFont, optional
The ImageFont object to use for the text annotation.
If left as None, then the font_loader argument must be provided.
font_loader: FontLoader, optional
A FontLoader used to load the font given the width of each bounding box.
If left as None, then the font argument must be provided.
padding: int, optional
The amount of pixel padding for the both the width and height of each bounding box (default=5)
line_width: int, optional
The pixel width of the bounding box (default=3)
"""
if isinstance(face_images, Image.Image):
face_images = [face_images]
boxes = [boxes]
for face_img, box in zip(face_images, boxes):
emotion, prob = predict(face_img)
prob = round(100 * prob, 2)
message = f'{emotion}: {prob}%'
color = EMOTION_COLOR_MAP[emotion]
# draw the main bounding box outline for the face
draw.rectangle(box.tolist(), outline=color, width=line_width)
# get the face extents from the bounding box
x_min, y_min, x_max, y_max = box
if font_loader is not None:
font = font_loader(x_max - x_min)
# get the extents of the text message
ascent, descent = font.getmetrics()
text_width = font.getsize(message)[0]
text_height = ascent + descent
# draw the message over a filled-in background rectangle
draw.rectangle((x_min, y_min, x_min + text_width + padding, y_min + text_height + padding), fill=color)
draw.text((x_min + padding, y_min + padding), message, font=font, fill=WHITE)
# ----------------------------------------------------------------------------------------------------------------------
# Main script
# ----------------------------------------------------------------------------------------------------------------------
if __name__ == '__main__':
parser = ArgumentParser()
parser.add_argument('--model', required=True, help='The path to the pickled model state')
parser.add_argument('--image', required=True, help='The path to the target image')
args = parser.parse_args()
# load the final model that has been trained on the Google Images dataset
model = load_model(args.model)
# create the face finder using the pre-trained MTCNN model
face_finder = FaceFinder(MTCNN(keep_all=True))
# wrap the emotion classifier model in the FacePredictor
predict_emotion = FacePredictor(model, emotions=sorted(list(EMOTION_COLOR_MAP.keys())))
pil_img = Image.open(args.image).convert('RGB')
faces, boxes = face_finder(pil_img, return_boxes=True)
draw = ImageDraw.Draw(pil_img)
process_image(faces, boxes, predict_emotion, draw, font_loader=FontLoader())
pil_img.show()