diff --git a/deepface/basemodels/ArcFace.py b/deepface/basemodels/ArcFace.py index 13cf10a2..bb639f3d 100644 --- a/deepface/basemodels/ArcFace.py +++ b/deepface/basemodels/ArcFace.py @@ -1,5 +1,7 @@ +from typing import List import os import gdown +import numpy as np from deepface.commons import functions from deepface.commons.logger import Logger from deepface.models.FacialRecognition import FacialRecognition @@ -43,7 +45,7 @@ ) # pylint: disable=too-few-public-methods -class ArcFace(FacialRecognition): +class ArcFaceClient(FacialRecognition): """ ArcFace model class """ @@ -52,6 +54,18 @@ def __init__(self): self.model = load_model() self.model_name = "ArcFace" + def find_embeddings(self, img: np.ndarray) -> List[float]: + """ + find embeddings with ArcFace model + Args: + img (np.ndarray): pre-loaded image in BGR + Returns + embeddings (list): multi-dimensional vector + """ + # model.predict causes memory issue when it is called in a for loop + # embedding = model.predict(img, verbose=0)[0].tolist() + return self.model(img, training=False).numpy()[0].tolist() + def load_model( url="https://github.com/serengil/deepface_models/releases/download/v1.0/arcface_weights.h5", diff --git a/deepface/basemodels/DeepID.py b/deepface/basemodels/DeepID.py index 7a4da5bf..8b4a7d5b 100644 --- a/deepface/basemodels/DeepID.py +++ b/deepface/basemodels/DeepID.py @@ -1,5 +1,7 @@ +from typing import List import os import gdown +import numpy as np from deepface.commons import functions from deepface.commons.logger import Logger from deepface.models.FacialRecognition import FacialRecognition @@ -39,7 +41,7 @@ # ------------------------------------- # pylint: disable=too-few-public-methods -class DeepId(FacialRecognition): +class DeepIdClient(FacialRecognition): """ DeepId model class """ @@ -48,6 +50,18 @@ def __init__(self): self.model = load_model() self.model_name = "DeepId" + def find_embeddings(self, img: np.ndarray) -> List[float]: + """ + find embeddings with DeepId model + Args: + img (np.ndarray): pre-loaded image in BGR + Returns + embeddings (list): multi-dimensional vector + """ + # model.predict causes memory issue when it is called in a for loop + # embedding = model.predict(img, verbose=0)[0].tolist() + return self.model(img, training=False).numpy()[0].tolist() + def load_model( url="https://github.com/serengil/deepface_models/releases/download/v1.0/deepid_keras_weights.h5", diff --git a/deepface/basemodels/DlibResNet.py b/deepface/basemodels/Dlib.py similarity index 66% rename from deepface/basemodels/DlibResNet.py rename to deepface/basemodels/Dlib.py index c440f19a..6d06ef41 100644 --- a/deepface/basemodels/DlibResNet.py +++ b/deepface/basemodels/Dlib.py @@ -1,3 +1,4 @@ +from typing import List import os import bz2 import gdown @@ -11,7 +12,7 @@ # pylint: disable=too-few-public-methods -class Dlib(FacialRecognition): +class DlibClient(FacialRecognition): """ Dlib model class """ @@ -20,15 +21,33 @@ def __init__(self): self.model = DlibResNet() self.model_name = "Dlib" - def find_embeddings(self, img: np.ndarray) -> list: + def find_embeddings(self, img: np.ndarray) -> List[float]: """ - Custom find embeddings function of Dlib different than FacialRecognition's one + find embeddings with Dlib model - different than regular models Args: - img (np.ndarray) - Retunrs: - embeddings (list) + img (np.ndarray): pre-loaded image in BGR + Returns + embeddings (list): multi-dimensional vector """ - return self.model.predict(img)[0].tolist() + # return self.model.predict(img)[0].tolist() + + # extract_faces returns 4 dimensional images + if len(img.shape) == 4: + img = img[0] + + # bgr to rgb + img = img[:, :, ::-1] # bgr to rgb + + # img is in scale of [0, 1] but expected [0, 255] + if img.max() <= 1: + img = img * 255 + + img = img.astype(np.uint8) + + img_representation = self.model.model.compute_face_descriptor(img) + img_representation = np.array(img_representation) + img_representation = np.expand_dims(img_representation, axis=0) + return img_representation[0].tolist() class DlibResNet: @@ -69,38 +88,12 @@ def __init__(self): # --------------------- - model = dlib.face_recognition_model_v1(weight_file) - self.__model = model + self.model = dlib.face_recognition_model_v1(weight_file) # --------------------- # return None # classes must return None - def predict(self, img_aligned: np.ndarray) -> np.ndarray: - - # functions.detectFace returns 4 dimensional images - if len(img_aligned.shape) == 4: - img_aligned = img_aligned[0] - - # functions.detectFace returns bgr images - img_aligned = img_aligned[:, :, ::-1] # bgr to rgb - - # deepface.detectFace returns an array in scale of [0, 1] - # but dlib expects in scale of [0, 255] - if img_aligned.max() <= 1: - img_aligned = img_aligned * 255 - - img_aligned = img_aligned.astype(np.uint8) - - model = self.__model - - img_representation = model.compute_face_descriptor(img_aligned) - - img_representation = np.array(img_representation) - img_representation = np.expand_dims(img_representation, axis=0) - - return img_representation - class DlibMetaData: def __init__(self): diff --git a/deepface/basemodels/Facenet.py b/deepface/basemodels/Facenet.py index 44797903..d95da023 100644 --- a/deepface/basemodels/Facenet.py +++ b/deepface/basemodels/Facenet.py @@ -1,5 +1,7 @@ +from typing import List import os import gdown +import numpy as np from deepface.commons import functions from deepface.commons.logger import Logger from deepface.models.FacialRecognition import FacialRecognition @@ -43,7 +45,7 @@ # -------------------------------- # pylint: disable=too-few-public-methods -class FaceNet128d(FacialRecognition): +class FaceNet128dClient(FacialRecognition): """ FaceNet-128d model class """ @@ -52,8 +54,20 @@ def __init__(self): self.model = load_facenet128d_model() self.model_name = "FaceNet-128d" + def find_embeddings(self, img: np.ndarray) -> List[float]: + """ + find embeddings with FaceNet-128d model + Args: + img (np.ndarray): pre-loaded image in BGR + Returns + embeddings (list): multi-dimensional vector + """ + # model.predict causes memory issue when it is called in a for loop + # embedding = model.predict(img, verbose=0)[0].tolist() + return self.model(img, training=False).numpy()[0].tolist() -class FaceNet512d(FacialRecognition): + +class FaceNet512dClient(FacialRecognition): """ FaceNet-1512d model class """ @@ -62,6 +76,18 @@ def __init__(self): self.model = load_facenet512d_model() self.model_name = "FaceNet-512d" + def find_embeddings(self, img: np.ndarray) -> List[float]: + """ + find embeddings with FaceNet-512d model + Args: + img (np.ndarray): pre-loaded image in BGR + Returns + embeddings (list): multi-dimensional vector + """ + # model.predict causes memory issue when it is called in a for loop + # embedding = model.predict(img, verbose=0)[0].tolist() + return self.model(img, training=False).numpy()[0].tolist() + def scaling(x, scale): return x * scale diff --git a/deepface/basemodels/FbDeepFace.py b/deepface/basemodels/FbDeepFace.py index 0a436edc..075626b8 100644 --- a/deepface/basemodels/FbDeepFace.py +++ b/deepface/basemodels/FbDeepFace.py @@ -1,6 +1,8 @@ +from typing import List import os import zipfile import gdown +import numpy as np from deepface.commons import functions from deepface.commons.logger import Logger from deepface.models.FacialRecognition import FacialRecognition @@ -36,7 +38,7 @@ # ------------------------------------- # pylint: disable=line-too-long, too-few-public-methods -class DeepFace(FacialRecognition): +class DeepFaceClient(FacialRecognition): """ Fb's DeepFace model class """ @@ -45,6 +47,18 @@ def __init__(self): self.model = load_model() self.model_name = "DeepFace" + def find_embeddings(self, img: np.ndarray) -> List[float]: + """ + find embeddings with OpenFace model + Args: + img (np.ndarray): pre-loaded image in BGR + Returns + embeddings (list): multi-dimensional vector + """ + # model.predict causes memory issue when it is called in a for loop + # embedding = model.predict(img, verbose=0)[0].tolist() + return self.model(img, training=False).numpy()[0].tolist() + def load_model( url="https://github.com/swghosh/DeepFace/releases/download/weights-vggface2-2d-aligned/VGGFace2_DeepFace_weights_val-0.9034.h5.zip", diff --git a/deepface/basemodels/OpenFace.py b/deepface/basemodels/OpenFace.py index 05ef430e..38672912 100644 --- a/deepface/basemodels/OpenFace.py +++ b/deepface/basemodels/OpenFace.py @@ -1,6 +1,8 @@ +from typing import List import os import gdown import tensorflow as tf +import numpy as np from deepface.commons import functions from deepface.commons.logger import Logger from deepface.models.FacialRecognition import FacialRecognition @@ -26,7 +28,7 @@ # --------------------------------------- # pylint: disable=too-few-public-methods -class OpenFace(FacialRecognition): +class OpenFaceClient(FacialRecognition): """ OpenFace model class """ @@ -35,6 +37,18 @@ def __init__(self): self.model = load_model() self.model_name = "OpenFace" + def find_embeddings(self, img: np.ndarray) -> List[float]: + """ + find embeddings with OpenFace model + Args: + img (np.ndarray): pre-loaded image in BGR + Returns + embeddings (list): multi-dimensional vector + """ + # model.predict causes memory issue when it is called in a for loop + # embedding = model.predict(img, verbose=0)[0].tolist() + return self.model(img, training=False).numpy()[0].tolist() + def load_model( url="https://github.com/serengil/deepface_models/releases/download/v1.0/openface_weights.h5", diff --git a/deepface/basemodels/SFace.py b/deepface/basemodels/SFace.py index a9f36d14..ba8b55e5 100644 --- a/deepface/basemodels/SFace.py +++ b/deepface/basemodels/SFace.py @@ -1,5 +1,5 @@ import os -from typing import Any +from typing import Any, List import numpy as np import cv2 as cv @@ -14,7 +14,7 @@ # pylint: disable=line-too-long, too-few-public-methods -class SFace(FacialRecognition): +class SFaceClient(FacialRecognition): """ SFace model class """ @@ -23,15 +23,22 @@ def __init__(self): self.model = load_model() self.model_name = "SFace" - def find_embeddings(self, img: np.ndarray) -> list: + def find_embeddings(self, img: np.ndarray) -> List[float]: """ - Custom find embeddings function of SFace different than FacialRecognition's one + find embeddings with SFace model - different than regular models Args: - img (np.ndarray) - Retunrs: - embeddings (list) + img (np.ndarray): pre-loaded image in BGR + Returns + embeddings (list): multi-dimensional vector """ - return self.model.predict(img)[0].tolist() + # return self.model.predict(img)[0].tolist() + + # revert the image to original format and preprocess using the model + input_blob = (img[0] * 255).astype(np.uint8) + + embeddings = self.model.model.feature(input_blob) + + return embeddings[0].tolist() def load_model( @@ -74,17 +81,6 @@ def __init__(self, model_path): self.layers = [_Layer()] - def predict(self, image: np.ndarray) -> np.ndarray: - # Preprocess - input_blob = (image[0] * 255).astype( - np.uint8 - ) # revert the image to original format and preprocess using the model - - # Forward - embeddings = self.model.feature(input_blob) - - return embeddings - class _Layer: input_shape = (None, 112, 112, 3) diff --git a/deepface/basemodels/VGGFace.py b/deepface/basemodels/VGGFace.py index 921898e0..80d6d876 100644 --- a/deepface/basemodels/VGGFace.py +++ b/deepface/basemodels/VGGFace.py @@ -1,5 +1,7 @@ +from typing import List import os import gdown +import numpy as np from deepface.commons import functions from deepface.commons.logger import Logger from deepface.models.FacialRecognition import FacialRecognition @@ -37,7 +39,7 @@ # --------------------------------------- # pylint: disable=too-few-public-methods -class VggFace(FacialRecognition): +class VggFaceClient(FacialRecognition): """ VGG-Face model class """ @@ -46,6 +48,18 @@ def __init__(self): self.model = load_model() self.model_name = "VGG-Face" + def find_embeddings(self, img: np.ndarray) -> List[float]: + """ + find embeddings with VGG-Face model + Args: + img (np.ndarray): pre-loaded image in BGR + Returns + embeddings (list): multi-dimensional vector + """ + # model.predict causes memory issue when it is called in a for loop + # embedding = model.predict(img, verbose=0)[0].tolist() + return self.model(img, training=False).numpy()[0].tolist() + def base_model() -> Sequential: """ diff --git a/deepface/commons/functions.py b/deepface/commons/functions.py index 22b76f13..dc2a5b2c 100644 --- a/deepface/commons/functions.py +++ b/deepface/commons/functions.py @@ -1,5 +1,5 @@ import os -from typing import Union, Tuple +from typing import Union, Tuple, List import base64 from pathlib import Path @@ -140,9 +140,9 @@ def extract_faces( grayscale: bool = False, enforce_detection: bool = True, align: bool = True, -) -> list: - """Extract faces from an image. - +) -> List[Tuple[np.ndarray, dict, float]]: + """ + Extract faces from an image. Args: img: a path, url, base64 or numpy array. target_size (tuple, optional): the target size of the extracted faces. @@ -157,7 +157,12 @@ def extract_faces( ValueError: if face could not be detected and enforce_detection is True. Returns: - list: a list of extracted faces. + results (List[Tuple[np.ndarray, dict, float]]): A list of tuples + where each tuple contains: + - detected_face (np.ndarray): The detected face as a NumPy array. + - face_region (dict): The image region represented as + {"x": x, "y": y, "w": w, "h": h} + - confidence (float): The confidence score associated with the detected face. """ # this is going to store a list of img itself (numpy), it region and confidence @@ -246,7 +251,7 @@ def extract_faces( "h": int(current_region[3]), } - extracted_face = [img_pixels, region_obj, confidence] + extracted_face = (img_pixels, region_obj, confidence) extracted_faces.append(extracted_face) if len(extracted_faces) == 0 and enforce_detection == True: diff --git a/deepface/detectors/DetectorWrapper.py b/deepface/detectors/DetectorWrapper.py index 06cfca6a..d061d795 100644 --- a/deepface/detectors/DetectorWrapper.py +++ b/deepface/detectors/DetectorWrapper.py @@ -2,15 +2,15 @@ import numpy as np from deepface.models.Detector import Detector from deepface.detectors import ( - OpenCvWrapper, - SsdWrapper, - DlibWrapper, - MtcnnWrapper, - RetinaFaceWrapper, - MediapipeWrapper, - YoloWrapper, - YunetWrapper, - FastMtcnnWrapper, + FastMtCnn, + MediaPipe, + MtCnn, + OpenCv, + Dlib, + RetinaFace, + Ssd, + Yolo, + YuNet, ) @@ -25,15 +25,15 @@ def build_model(detector_backend: str) -> Any: global face_detector_obj # singleton design pattern backends = { - "opencv": OpenCvWrapper.OpenCv, - "mtcnn": MtcnnWrapper.MtCnn, - "ssd": SsdWrapper.Ssd, - "dlib": DlibWrapper.Dlib, - "retinaface": RetinaFaceWrapper.RetinaFace, - "mediapipe": MediapipeWrapper.MediaPipe, - "yolov8": YoloWrapper.Yolo, - "yunet": YunetWrapper.YuNet, - "fastmtcnn": FastMtcnnWrapper.FastMtCnn, + "opencv": OpenCv.OpenCvClient, + "mtcnn": MtCnn.MtCnnClient, + "ssd": Ssd.SsdClient, + "dlib": Dlib.DlibClient, + "retinaface": RetinaFace.RetinaFaceClient, + "mediapipe": MediaPipe.MediaPipeClient, + "yolov8": Yolo.YoloClient, + "yunet": YuNet.YuNetClient, + "fastmtcnn": FastMtCnn.FastMtCnnClient, } if not "face_detector_obj" in globals(): @@ -59,9 +59,20 @@ def detect_faces(detector_backend: str, img: np.ndarray, align: bool = True) -> detector_backend (str): detector name img (np.ndarray): pre-loaded image alig (bool): enable or disable alignment after detection - Returns - result (list): tuple of face (np.ndarray), face region (list) - , confidence score (float) + Returns: + results (List[Tuple[np.ndarray, List[float], float]]): A list of tuples + where each tuple contains: + - detected_face (np.ndarray): The detected face as a NumPy array. + - face_region (List[float]): The image region represented as + a list of floats e.g. [x, y, w, h] + - confidence (float): The confidence score associated with the detected face. + + Example: + results = [ + (array(..., dtype=uint8), [110, 60, 150, 380], 0.99), + (array(..., dtype=uint8), [150, 50, 299, 375], 0.98), + (array(..., dtype=uint8), [120, 55, 300, 371], 0.96), + ] """ face_detector: Detector = build_model(detector_backend) return face_detector.detect_faces(img=img, align=align) diff --git a/deepface/detectors/DlibWrapper.py b/deepface/detectors/Dlib.py similarity index 80% rename from deepface/detectors/DlibWrapper.py rename to deepface/detectors/Dlib.py index 40f8eebd..fead85e7 100644 --- a/deepface/detectors/DlibWrapper.py +++ b/deepface/detectors/Dlib.py @@ -1,3 +1,4 @@ +from typing import List, Tuple import os import bz2 import gdown @@ -9,7 +10,7 @@ logger = Logger(module="detectors.DlibWrapper") -class Dlib(Detector): +class DlibClient(Detector): def __init__(self): self.model = self.build_model() @@ -55,7 +56,9 @@ def build_model(self) -> dict: detector["sp"] = sp return detector - def detect_faces(self, img: np.ndarray, align: bool = True) -> list: + def detect_faces( + self, img: np.ndarray, align: bool = True + ) -> List[Tuple[np.ndarray, List[float], float]]: """ Detect and align face with dlib Args: @@ -63,7 +66,19 @@ def detect_faces(self, img: np.ndarray, align: bool = True) -> list: img (np.ndarray): pre-loaded image align (bool): default is true Returns: - list of detected and aligned faces + results (List[Tuple[np.ndarray, List[float], float]]): A list of tuples + where each tuple contains: + - detected_face (np.ndarray): The detected face as a NumPy array. + - face_region (List[float]): The image region represented as + a list of floats e.g. [x, y, w, h] + - confidence (float): The confidence score associated with the detected face. + + Example: + results = [ + (array(..., dtype=uint8), [110, 60, 150, 380], 0.99), + (array(..., dtype=uint8), [150, 50, 299, 375], 0.98), + (array(..., dtype=uint8), [120, 55, 300, 371], 0.96), + ] """ # this is not a must dependency. do not import it in the global level. try: diff --git a/deepface/detectors/FastMtcnnWrapper.py b/deepface/detectors/FastMtCnn.py similarity index 63% rename from deepface/detectors/FastMtcnnWrapper.py rename to deepface/detectors/FastMtCnn.py index 3f38fa07..c43ed556 100644 --- a/deepface/detectors/FastMtcnnWrapper.py +++ b/deepface/detectors/FastMtCnn.py @@ -1,24 +1,39 @@ -from typing import Any, Union +from typing import Any, Union, List, Tuple import cv2 import numpy as np from deepface.models.Detector import Detector +from deepface.modules import detection # Link -> https://github.com/timesler/facenet-pytorch # Examples https://www.kaggle.com/timesler/guide-to-mtcnn-in-facenet-pytorch -class FastMtCnn(Detector): +class FastMtCnnClient(Detector): def __init__(self): self.model = self.build_model() - def detect_faces(self, img: np.ndarray, align: bool = True) -> list: + def detect_faces( + self, img: np.ndarray, align: bool = True + ) -> List[Tuple[np.ndarray, List[float], float]]: """ Detect and align face with mtcnn Args: img (np.ndarray): pre-loaded image align (bool): default is true Returns: - list of detected and aligned faces + results (List[Tuple[np.ndarray, List[float], float]]): A list of tuples + where each tuple contains: + - detected_face (np.ndarray): The detected face as a NumPy array. + - face_region (List[float]): The image region represented as + a list of floats e.g. [x, y, w, h] + - confidence (float): The confidence score associated with the detected face. + + Example: + results = [ + (array(..., dtype=uint8), [110, 60, 150, 380], 0.99), + (array(..., dtype=uint8), [150, 50, 299, 375], 0.98), + (array(..., dtype=uint8), [120, 55, 300, 371], 0.96), + ] """ resp = [] @@ -31,16 +46,16 @@ def detect_faces(self, img: np.ndarray, align: bool = True) -> list: ) # returns boundingbox, prob, landmark if len(detections[0]) > 0: - for detection in zip(*detections): - x, y, w, h = xyxy_to_xywh(detection[0]) + for current_detection in zip(*detections): + x, y, w, h = xyxy_to_xywh(current_detection[0]) detected_face = img[int(y) : int(y + h), int(x) : int(x + w)] img_region = [x, y, w, h] - confidence = detection[1] + confidence = current_detection[1] if align: - left_eye = detection[2][0] - right_eye = detection[2][1] - detected_face = self.align_face( + left_eye = current_detection[2][0] + right_eye = current_detection[2][1] + detected_face = detection.align_face( img=detected_face, left_eye=left_eye, right_eye=right_eye ) diff --git a/deepface/detectors/MediapipeWrapper.py b/deepface/detectors/MediaPipe.py similarity index 65% rename from deepface/detectors/MediapipeWrapper.py rename to deepface/detectors/MediaPipe.py index 56e439ec..0e067a2a 100644 --- a/deepface/detectors/MediapipeWrapper.py +++ b/deepface/detectors/MediaPipe.py @@ -1,11 +1,12 @@ -from typing import Any +from typing import Any, List, Tuple import numpy as np from deepface.models.Detector import Detector +from deepface.modules import detection # Link - https://google.github.io/mediapipe/solutions/face_detection -class MediaPipe(Detector): +class MediaPipeClient(Detector): def __init__(self): self.model = self.build_model() @@ -28,14 +29,28 @@ def build_model(self) -> Any: face_detection = mp_face_detection.FaceDetection(min_detection_confidence=0.7) return face_detection - def detect_faces(self, img: np.ndarray, align: bool = True) -> list: + def detect_faces( + self, img: np.ndarray, align: bool = True + ) -> List[Tuple[np.ndarray, List[float], float]]: """ Detect and align face with mediapipe Args: img (np.ndarray): pre-loaded image align (bool): default is true Returns: - list of detected and aligned faces + results (List[Tuple[np.ndarray, List[float], float]]): A list of tuples + where each tuple contains: + - detected_face (np.ndarray): The detected face as a NumPy array. + - face_region (List[float]): The image region represented as + a list of floats e.g. [x, y, w, h] + - confidence (float): The confidence score associated with the detected face. + + Example: + results = [ + (array(..., dtype=uint8), [110, 60, 150, 380], 0.99), + (array(..., dtype=uint8), [150, 50, 299, 375], 0.98), + (array(..., dtype=uint8), [120, 55, 300, 371], 0.96), + ] """ resp = [] @@ -49,11 +64,11 @@ def detect_faces(self, img: np.ndarray, align: bool = True) -> list: return resp # Extract the bounding box, the landmarks and the confidence score - for detection in results.detections: - (confidence,) = detection.score + for current_detection in results.detections: + (confidence,) = current_detection.score - bounding_box = detection.location_data.relative_bounding_box - landmarks = detection.location_data.relative_keypoints + bounding_box = current_detection.location_data.relative_bounding_box + landmarks = current_detection.location_data.relative_keypoints x = int(bounding_box.xmin * img_width) w = int(bounding_box.width * img_width) @@ -73,7 +88,7 @@ def detect_faces(self, img: np.ndarray, align: bool = True) -> list: img_region = [x, y, w, h] if align: - detected_face = self.align_face( + detected_face = detection.align_face( img=detected_face, left_eye=left_eye, right_eye=right_eye ) diff --git a/deepface/detectors/MtCnn.py b/deepface/detectors/MtCnn.py new file mode 100644 index 00000000..aefb5f60 --- /dev/null +++ b/deepface/detectors/MtCnn.py @@ -0,0 +1,67 @@ +from typing import List, Tuple +import cv2 +import numpy as np +from mtcnn import MTCNN +from deepface.models.Detector import Detector +from deepface.modules import detection + +# pylint: disable=too-few-public-methods +class MtCnnClient(Detector): + """ + Class to cover common face detection functionalitiy for MtCnn backend + """ + + def __init__(self): + self.model = MTCNN() + + def detect_faces( + self, img: np.ndarray, align: bool = True + ) -> List[Tuple[np.ndarray, List[float], float]]: + """ + Detect and align face with mtcnn + Args: + img (np.ndarray): pre-loaded image + align (bool): default is true + Returns: + results (List[Tuple[np.ndarray, List[float], float]]): A list of tuples + where each tuple contains: + - detected_face (np.ndarray): The detected face as a NumPy array. + - face_region (List[float]): The image region represented as + a list of floats e.g. [x, y, w, h] + - confidence (float): The confidence score associated with the detected face. + + Example: + results = [ + (array(..., dtype=uint8), [110, 60, 150, 380], 0.99), + (array(..., dtype=uint8), [150, 50, 299, 375], 0.98), + (array(..., dtype=uint8), [120, 55, 300, 371], 0.96), + ] + """ + + resp = [] + + detected_face = None + img_region = [0, 0, img.shape[1], img.shape[0]] + + img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # mtcnn expects RGB but OpenCV read BGR + detections = self.model.detect_faces(img_rgb) + + if len(detections) > 0: + + for current_detection in detections: + x, y, w, h = current_detection["box"] + detected_face = img[int(y) : int(y + h), int(x) : int(x + w)] + img_region = [x, y, w, h] + confidence = current_detection["confidence"] + + if align: + keypoints = current_detection["keypoints"] + left_eye = keypoints["left_eye"] + right_eye = keypoints["right_eye"] + detected_face = detection.align_face( + img=detected_face, left_eye=left_eye, right_eye=right_eye + ) + + resp.append((detected_face, img_region, confidence)) + + return resp diff --git a/deepface/detectors/MtcnnWrapper.py b/deepface/detectors/MtcnnWrapper.py deleted file mode 100644 index 14c59f52..00000000 --- a/deepface/detectors/MtcnnWrapper.py +++ /dev/null @@ -1,51 +0,0 @@ -import cv2 -import numpy as np -from mtcnn import MTCNN -from deepface.models.Detector import Detector - - -class MtCnn(Detector): - """ - Class to cover common face detection functionalitiy for MtCnn backend - """ - - def __init__(self): - self.model = MTCNN() - - def detect_faces(self, img: np.ndarray, align: bool = True) -> list: - """ - Detect and align face with mtcnn - Args: - img (np.ndarray): pre-loaded image - align (bool): default is true - Returns: - list of detected and aligned faces - """ - - resp = [] - - detected_face = None - img_region = [0, 0, img.shape[1], img.shape[0]] - - img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # mtcnn expects RGB but OpenCV read BGR - detections = self.model.detect_faces(img_rgb) - - if len(detections) > 0: - - for detection in detections: - x, y, w, h = detection["box"] - detected_face = img[int(y) : int(y + h), int(x) : int(x + w)] - img_region = [x, y, w, h] - confidence = detection["confidence"] - - if align: - keypoints = detection["keypoints"] - left_eye = keypoints["left_eye"] - right_eye = keypoints["right_eye"] - detected_face = self.align_face( - img=detected_face, left_eye=left_eye, right_eye=right_eye - ) - - resp.append((detected_face, img_region, confidence)) - - return resp diff --git a/deepface/detectors/OpenCvWrapper.py b/deepface/detectors/OpenCv.py similarity index 83% rename from deepface/detectors/OpenCvWrapper.py rename to deepface/detectors/OpenCv.py index 515fdb09..6f1d21c2 100644 --- a/deepface/detectors/OpenCvWrapper.py +++ b/deepface/detectors/OpenCv.py @@ -1,11 +1,12 @@ import os -from typing import Any +from typing import Any, List, Tuple import cv2 import numpy as np from deepface.models.Detector import Detector +from deepface.modules import detection -class OpenCv(Detector): +class OpenCvClient(Detector): """ Class to cover common face detection functionalitiy for OpenCv backend """ @@ -24,7 +25,9 @@ def build_model(self): detector["eye_detector"] = self.__build_cascade("haarcascade_eye") return detector - def detect_faces(self, img: np.ndarray, align: bool = True) -> list: + def detect_faces( + self, img: np.ndarray, align: bool = True + ) -> List[Tuple[np.ndarray, List[float], float]]: """ Detect and align face with opencv Args: @@ -32,7 +35,19 @@ def detect_faces(self, img: np.ndarray, align: bool = True) -> list: img (np.ndarray): pre-loaded image align (bool): default is true Returns: - list of detected and aligned faces + results (List[Tuple[np.ndarray, List[float], float]]): A list of tuples + where each tuple contains: + - detected_face (np.ndarray): The detected face as a NumPy array. + - face_region (List[float]): The image region represented as + a list of floats e.g. [x, y, w, h] + - confidence (float): The confidence score associated with the detected face. + + Example: + results = [ + (array(..., dtype=uint8), [110, 60, 150, 380], 0.99), + (array(..., dtype=uint8), [150, 50, 299, 375], 0.98), + (array(..., dtype=uint8), [120, 55, 300, 371], 0.96), + ] """ resp = [] @@ -56,7 +71,7 @@ def detect_faces(self, img: np.ndarray, align: bool = True) -> list: if align: left_eye, right_eye = self.find_eyes(img=detected_face) - detected_face = self.align_face(detected_face, left_eye, right_eye) + detected_face = detection.align_face(detected_face, left_eye, right_eye) img_region = [x, y, w, h] diff --git a/deepface/detectors/RetinaFaceWrapper.py b/deepface/detectors/RetinaFace.py similarity index 64% rename from deepface/detectors/RetinaFaceWrapper.py rename to deepface/detectors/RetinaFace.py index 6986ce5b..0eb8bf1c 100644 --- a/deepface/detectors/RetinaFaceWrapper.py +++ b/deepface/detectors/RetinaFace.py @@ -1,21 +1,36 @@ +from typing import List, Tuple import numpy as np from retinaface import RetinaFace as rf from retinaface.commons import postprocess from deepface.models.Detector import Detector - -class RetinaFace(Detector): +# pylint: disable=too-few-public-methods +class RetinaFaceClient(Detector): def __init__(self): self.model = rf.build_model() - def detect_faces(self, img: np.ndarray, align: bool = True) -> list: + def detect_faces( + self, img: np.ndarray, align: bool = True + ) -> List[Tuple[np.ndarray, List[float], float]]: """ Detect and align face with retinaface Args: img (np.ndarray): pre-loaded image align (bool): default is true Returns: - list of detected and aligned faces + results (List[Tuple[np.ndarray, List[float], float]]): A list of tuples + where each tuple contains: + - detected_face (np.ndarray): The detected face as a NumPy array. + - face_region (List[float]): The image region represented as + a list of floats e.g. [x, y, w, h] + - confidence (float): The confidence score associated with the detected face. + + Example: + results = [ + (array(..., dtype=uint8), [110, 60, 150, 380], 0.99), + (array(..., dtype=uint8), [150, 50, 299, 375], 0.98), + (array(..., dtype=uint8), [120, 55, 300, 371], 0.96), + ] """ resp = [] diff --git a/deepface/detectors/SsdWrapper.py b/deepface/detectors/Ssd.py similarity index 79% rename from deepface/detectors/SsdWrapper.py rename to deepface/detectors/Ssd.py index f88eea3d..50bc4cd2 100644 --- a/deepface/detectors/SsdWrapper.py +++ b/deepface/detectors/Ssd.py @@ -1,11 +1,13 @@ +from typing import List, Tuple import os import gdown import cv2 import pandas as pd import numpy as np -from deepface.detectors import OpenCvWrapper +from deepface.detectors import OpenCv from deepface.commons import functions from deepface.models.Detector import Detector +from deepface.modules import detection from deepface.commons.logger import Logger logger = Logger(module="detectors.SsdWrapper") @@ -13,7 +15,7 @@ # pylint: disable=line-too-long -class Ssd(Detector): +class SsdClient(Detector): def __init__(self): self.model = self.build_model() @@ -65,18 +67,32 @@ def build_model(self) -> dict: detector = {} detector["face_detector"] = face_detector - detector["opencv_module"] = OpenCvWrapper.OpenCv() + detector["opencv_module"] = OpenCv.OpenCvClient() return detector - def detect_faces(self, img: np.ndarray, align: bool = True) -> list: + def detect_faces( + self, img: np.ndarray, align: bool = True + ) -> List[Tuple[np.ndarray, List[float], float]]: """ Detect and align face with ssd Args: img (np.ndarray): pre-loaded image align (bool): default is true Returns: - list of detected and aligned faces + results (List[Tuple[np.ndarray, List[float], float]]): A list of tuples + where each tuple contains: + - detected_face (np.ndarray): The detected face as a NumPy array. + - face_region (List[float]): The image region represented as + a list of floats e.g. [x, y, w, h] + - confidence (float): The confidence score associated with the detected face. + + Example: + results = [ + (array(..., dtype=uint8), [110, 60, 150, 380], 0.99), + (array(..., dtype=uint8), [150, 50, 299, 375], 0.98), + (array(..., dtype=uint8), [120, 55, 300, 371], 0.96), + ] """ resp = [] @@ -134,9 +150,9 @@ def detect_faces(self, img: np.ndarray, align: bool = True) -> list: confidence = instance["confidence"] if align: - opencv_module: OpenCvWrapper.OpenCv = self.model["opencv_module"] + opencv_module: OpenCv.OpenCvClient = self.model["opencv_module"] left_eye, right_eye = opencv_module.find_eyes(detected_face) - detected_face = self.align_face( + detected_face = detection.align_face( img=detected_face, left_eye=left_eye, right_eye=right_eye ) diff --git a/deepface/detectors/YoloWrapper.py b/deepface/detectors/Yolo.py similarity index 75% rename from deepface/detectors/YoloWrapper.py rename to deepface/detectors/Yolo.py index a6666c99..44bd4bb2 100644 --- a/deepface/detectors/YoloWrapper.py +++ b/deepface/detectors/Yolo.py @@ -1,6 +1,7 @@ -from typing import Any +from typing import Any, List, Tuple import numpy as np from deepface.models.Detector import Detector +from deepface.modules import detection from deepface.commons.logger import Logger logger = Logger() @@ -16,7 +17,7 @@ LANDMARKS_CONFIDENCE_THRESHOLD = 0.5 -class Yolo(Detector): +class YoloClient(Detector): def __init__(self): self.model = self.build_model() @@ -50,7 +51,9 @@ def build_model(self) -> Any: # Return face_detector return YOLO(weight_path) - def detect_faces(self, img: np.ndarray, align: bool = False) -> list: + def detect_faces( + self, img: np.ndarray, align: bool = False + ) -> List[Tuple[np.ndarray, List[float], float]]: """ Detect and align face with yolo Args: @@ -58,7 +61,19 @@ def detect_faces(self, img: np.ndarray, align: bool = False) -> list: img (np.ndarray): pre-loaded image align (bool): default is true Returns: - list of detected and aligned faces + results (List[Tuple[np.ndarray, List[float], float]]): A list of tuples + where each tuple contains: + - detected_face (np.ndarray): The detected face as a NumPy array. + - face_region (List[float]): The image region represented as + a list of floats e.g. [x, y, w, h] + - confidence (float): The confidence score associated with the detected face. + + Example: + results = [ + (array(..., dtype=uint8), [110, 60, 150, 380], 0.99), + (array(..., dtype=uint8), [150, 50, 299, 375], 0.98), + (array(..., dtype=uint8), [120, 55, 300, 371], 0.96), + ] """ resp = [] @@ -85,7 +100,7 @@ def detect_faces(self, img: np.ndarray, align: bool = False) -> list: left_eye[1] > LANDMARKS_CONFIDENCE_THRESHOLD and right_eye[1] > LANDMARKS_CONFIDENCE_THRESHOLD ): - detected_face = self.align_face( + detected_face = detection.align_face( img=detected_face, left_eye=left_eye[0].cpu(), right_eye=right_eye[0].cpu() ) resp.append((detected_face, [x, y, w, h], confidence)) diff --git a/deepface/detectors/YunetWrapper.py b/deepface/detectors/YuNet.py similarity index 80% rename from deepface/detectors/YunetWrapper.py rename to deepface/detectors/YuNet.py index 544bd5b9..2f7e9732 100644 --- a/deepface/detectors/YunetWrapper.py +++ b/deepface/detectors/YuNet.py @@ -1,16 +1,17 @@ import os -from typing import Any +from typing import Any, List, Tuple import cv2 import numpy as np import gdown from deepface.commons import functions -from deepface.commons.logger import Logger from deepface.models.Detector import Detector +from deepface.modules import detection +from deepface.commons.logger import Logger logger = Logger(module="detectors.YunetWrapper") -class YuNet(Detector): +class YuNetClient(Detector): def __init__(self): self.model = self.build_model() @@ -41,14 +42,28 @@ def build_model(self) -> Any: ) from err return face_detector - def detect_faces(self, img: np.ndarray, align: bool = True) -> list: + def detect_faces( + self, img: np.ndarray, align: bool = True + ) -> List[Tuple[np.ndarray, List[float], float]]: """ Detect and align face with yunet Args: img (np.ndarray): pre-loaded image align (bool): default is true Returns: - list of detected and aligned faces + results (List[Tuple[np.ndarray, List[float], float]]): A list of tuples + where each tuple contains: + - detected_face (np.ndarray): The detected face as a NumPy array. + - face_region (List[float]): The image region represented as + a list of floats e.g. [x, y, w, h] + - confidence (float): The confidence score associated with the detected face. + + Example: + results = [ + (array(..., dtype=uint8), [110, 60, 150, 380], 0.99), + (array(..., dtype=uint8), [150, 50, 299, 375], 0.98), + (array(..., dtype=uint8), [120, 55, 300, 371], 0.96), + ] """ # FaceDetector.detect_faces does not support score_threshold parameter. # We can set it via environment variable. @@ -107,6 +122,6 @@ def detect_faces(self, img: np.ndarray, align: bool = True) -> list: detected_face = img[int(y) : int(y + h), int(x) : int(x + w)] img_region = [x, y, w, h] if align: - detected_face = self.align_face(detected_face, (x_re, y_re), (x_le, y_le)) + detected_face = detection.align_face(detected_face, (x_re, y_re), (x_le, y_le)) resp.append((detected_face, img_region, confidence)) return resp diff --git a/deepface/extendedmodels/Age.py b/deepface/extendedmodels/Age.py index 6315a8c8..1d5e6cd5 100644 --- a/deepface/extendedmodels/Age.py +++ b/deepface/extendedmodels/Age.py @@ -23,7 +23,7 @@ # ---------------------------------------- # pylint: disable=too-few-public-methods -class ApparentAge(Demography): +class ApparentAgeClient(Demography): """ Age model class """ diff --git a/deepface/extendedmodels/Emotion.py b/deepface/extendedmodels/Emotion.py index 7db38c17..8a04c33f 100644 --- a/deepface/extendedmodels/Emotion.py +++ b/deepface/extendedmodels/Emotion.py @@ -33,7 +33,7 @@ labels = ["angry", "disgust", "fear", "happy", "sad", "surprise", "neutral"] # pylint: disable=too-few-public-methods -class FacialExpression(Demography): +class EmotionClient(Demography): """ Emotion model class """ diff --git a/deepface/extendedmodels/Gender.py b/deepface/extendedmodels/Gender.py index 5c6bc95d..191e315d 100644 --- a/deepface/extendedmodels/Gender.py +++ b/deepface/extendedmodels/Gender.py @@ -26,7 +26,7 @@ labels = ["Woman", "Man"] # pylint: disable=too-few-public-methods -class Gender(Demography): +class GenderClient(Demography): """ Gender model class """ diff --git a/deepface/extendedmodels/Race.py b/deepface/extendedmodels/Race.py index 33a0d194..9c907f1c 100644 --- a/deepface/extendedmodels/Race.py +++ b/deepface/extendedmodels/Race.py @@ -25,7 +25,7 @@ labels = ["asian", "indian", "black", "white", "middle eastern", "latino hispanic"] # pylint: disable=too-few-public-methods -class Race(Demography): +class RaceClient(Demography): """ Race model class """ diff --git a/deepface/models/Detector.py b/deepface/models/Detector.py index b5c80af1..b834e9d9 100644 --- a/deepface/models/Detector.py +++ b/deepface/models/Detector.py @@ -1,39 +1,34 @@ +from typing import List, Tuple from abc import ABC, abstractmethod -from typing import Union import numpy as np -from PIL import Image # Notice that all facial detector models must be inherited from this class +# pylint: disable=unnecessary-pass, too-few-public-methods class Detector(ABC): @abstractmethod - def detect_faces(self, img: np.ndarray, align: bool = True) -> list: - pass - - def align_face( - self, img: np.ndarray, left_eye: Union[list, tuple], right_eye: Union[list, tuple] - ) -> np.ndarray: + def detect_faces( + self, img: np.ndarray, align: bool = True + ) -> List[Tuple[np.ndarray, List[float], float]]: """ - Align a given image horizantally with respect to their left and right eye locations + Detect faces from a given image Args: - img (np.ndarray): pre-loaded image with detected face - left_eye (list or tuple): coordinates of left eye with respect to the you - right_eye(list or tuple): coordinates of right eye with respect to the you + img (np.ndarray): pre-loaded image as a NumPy array + align (bool): enable or disable alignment after face detection Returns: - img (np.ndarray): aligned facial image - """ - # if eye could not be detected for the given image, return image itself - if left_eye is None or right_eye is None: - return img + results (List[Tuple[np.ndarray, List[float], float]]): A list of tuples + where each tuple contains: + - detected_face (np.ndarray): The detected face as a NumPy array. + - face_region (List[float]): The image region represented as + a list of floats e.g. [x, y, w, h] + - confidence (float): The confidence score associated with the detected face. - # sometimes unexpectedly detected images come with nil dimensions - if img.shape[0] == 0 or img.shape[1] == 0: - return img - - angle = float( - np.degrees(np.arctan2(right_eye[1] - left_eye[1], right_eye[0] - left_eye[0])) - ) - img = Image.fromarray(img) - img = np.array(img.rotate(angle)) - return img + Example: + results = [ + (array(..., dtype=uint8), [110, 60, 150, 380], 0.99), + (array(..., dtype=uint8), [150, 50, 299, 375], 0.98), + (array(..., dtype=uint8), [120, 55, 300, 371], 0.96), + ] + """ + pass diff --git a/deepface/models/FacialRecognition.py b/deepface/models/FacialRecognition.py index 1709e406..b49292c0 100644 --- a/deepface/models/FacialRecognition.py +++ b/deepface/models/FacialRecognition.py @@ -1,5 +1,5 @@ -from abc import ABC -from typing import Any, Union +from abc import ABC, abstractmethod +from typing import Any, Union, List import numpy as np from deepface.commons import functions @@ -16,13 +16,6 @@ class FacialRecognition(ABC): model: Union[Model, Any] model_name: str - def find_embeddings(self, img: np.ndarray) -> list: - if not isinstance(self.model, Model): - raise ValueError( - "If a facial recognition model is not type of (tf.)keras.models.Model," - "Then its find_embeddings method must be implemented its own module." - f"However {self.model_name}'s model type is {type(self.model)}" - ) - # model.predict causes memory issue when it is called in a for loop - # embedding = model.predict(img, verbose=0)[0].tolist() - return self.model(img, training=False).numpy()[0].tolist() + @abstractmethod + def find_embeddings(self, img: np.ndarray) -> List[float]: + pass diff --git a/deepface/modules/detection.py b/deepface/modules/detection.py index ae92aaaa..1973c56d 100644 --- a/deepface/modules/detection.py +++ b/deepface/modules/detection.py @@ -3,6 +3,7 @@ # 3rd part dependencies import numpy as np +from PIL import Image # project dependencies from deepface.commons import functions @@ -40,8 +41,11 @@ def extract_faces( grayscale (boolean): extracting faces in rgb or gray scale Returns: - list of dictionaries. Each dictionary will have facial image itself (RGB), - extracted area from the original image and confidence score. + results (List[Dict[str, Any]]): A list of dictionaries, where each dictionary contains: + - "face" (np.ndarray): The detected face as a NumPy array. + - "facial_area" (List[float]): The detected face's regions represented as a list of floats. + - "confidence" (float): The confidence score associated with the detected face. + """ @@ -70,3 +74,31 @@ def extract_faces( resp_objs.append(resp_obj) return resp_objs + + +def align_face( + img: np.ndarray, + left_eye: Union[list, tuple], + right_eye: Union[list, tuple], +) -> np.ndarray: + """ + Align a given image horizantally with respect to their left and right eye locations + Args: + img (np.ndarray): pre-loaded image with detected face + left_eye (list or tuple): coordinates of left eye with respect to the you + right_eye(list or tuple): coordinates of right eye with respect to the you + Returns: + img (np.ndarray): aligned facial image + """ + # if eye could not be detected for the given image, return image itself + if left_eye is None or right_eye is None: + return img + + # sometimes unexpectedly detected images come with nil dimensions + if img.shape[0] == 0 or img.shape[1] == 0: + return img + + angle = float(np.degrees(np.arctan2(right_eye[1] - left_eye[1], right_eye[0] - left_eye[0]))) + img = Image.fromarray(img) + img = np.array(img.rotate(angle)) + return img diff --git a/deepface/modules/modeling.py b/deepface/modules/modeling.py index f1b4bfb1..f9c6464d 100644 --- a/deepface/modules/modeling.py +++ b/deepface/modules/modeling.py @@ -2,16 +2,7 @@ from typing import Any # project dependencies -from deepface.basemodels import ( - VGGFace, - OpenFace, - Facenet, - FbDeepFace, - DeepID, - DlibResNet, - ArcFace, - SFace, -) +from deepface.basemodels import VGGFace, OpenFace, FbDeepFace, DeepID, ArcFace, SFace, Dlib, Facenet from deepface.extendedmodels import Age, Gender, Race, Emotion @@ -31,19 +22,19 @@ def build_model(model_name: str) -> Any: global model_obj models = { - "VGG-Face": VGGFace.VggFace, - "OpenFace": OpenFace.OpenFace, - "Facenet": Facenet.FaceNet128d, - "Facenet512": Facenet.FaceNet512d, - "DeepFace": FbDeepFace.DeepFace, - "DeepID": DeepID.DeepId, - "Dlib": DlibResNet.Dlib, - "ArcFace": ArcFace.ArcFace, - "SFace": SFace.SFace, - "Emotion": Emotion.FacialExpression, - "Age": Age.ApparentAge, - "Gender": Gender.Gender, - "Race": Race.Race, + "VGG-Face": VGGFace.VggFaceClient, + "OpenFace": OpenFace.OpenFaceClient, + "Facenet": Facenet.FaceNet128dClient, + "Facenet512": Facenet.FaceNet512dClient, + "DeepFace": FbDeepFace.DeepFaceClient, + "DeepID": DeepID.DeepIdClient, + "Dlib": Dlib.DlibClient, + "ArcFace": ArcFace.ArcFaceClient, + "SFace": SFace.SFaceClient, + "Emotion": Emotion.EmotionClient, + "Age": Age.ApparentAgeClient, + "Gender": Gender.GenderClient, + "Race": Race.RaceClient, } if not "model_obj" in globals(): diff --git a/tests/visual-test.py b/tests/visual-test.py index dbdf54ff..78e79379 100644 --- a/tests/visual-test.py +++ b/tests/visual-test.py @@ -14,11 +14,12 @@ "Facenet512", "OpenFace", "DeepFace", - "DeepID", + # "DeepID", "Dlib", "ArcFace", "SFace", ] + detector_backends = ["opencv", "ssd", "dlib", "mtcnn", "retinaface"] @@ -44,10 +45,11 @@ for df in dfs: logger.info(df) + # extract faces for detector_backend in detector_backends: face_objs = DeepFace.extract_faces( - img_path="dataset/img1.jpg", detector_backend=detector_backend + img_path="dataset/img11.jpg", detector_backend=detector_backend ) for face_obj in face_objs: face = face_obj["face"]