diff --git a/deepface/DeepFace.py b/deepface/DeepFace.py index 5722db7d..c235ac89 100644 --- a/deepface/DeepFace.py +++ b/deepface/DeepFace.py @@ -2,7 +2,7 @@ import os import warnings import logging -from typing import Any, Dict, List, Tuple, Union +from typing import Any, Dict, List, Tuple, Union, Optional # 3rd party dependencies import numpy as np @@ -65,34 +65,49 @@ def verify( Args: img1_path (str or np.ndarray): Path to the first image. Accepts exact image path as a string, numpy array (BGR), or base64 encoded images. + img2_path (str or np.ndarray): Path to the second image. Accepts exact image path as a string, numpy array (BGR), or base64 encoded images. + model_name (str): Model for face recognition. Options: VGG-Face, Facenet, Facenet512, OpenFace, DeepFace, DeepID, Dlib, ArcFace and SFace (default is VGG-Face). + detector_backend (string): face detector backend. Options: 'opencv', 'retinaface', - 'mtcnn', 'ssd', 'dlib', 'mediapipe', 'yolov8' (default is opencv) + 'mtcnn', 'ssd', 'dlib', 'mediapipe', 'yolov8' (default is opencv). + distance_metric (string): Metric for measuring similarity. Options: 'cosine', 'euclidean', 'euclidean_l2' (default is cosine). + enforce_detection (boolean): If no face is detected in an image, raise an exception. Set to False to avoid the exception for low-resolution images (default is True). + align (bool): Flag to enable face alignment (default is True). + normalization (string): Normalize the input image before feeding it to the model. Options: base, raw, Facenet, Facenet2018, VGGFace, VGGFace2, ArcFace (default is base) + Returns: result (dict): A dictionary containing verification results with following keys. + - 'verified' (bool): Indicates whether the images represent the same person (True) or different persons (False). + - 'distance' (float): The distance measure between the face vectors. A lower distance indicates higher similarity. + - 'max_threshold_to_verify' (float): The maximum threshold used for verification. If the distance is below this threshold, the images are considered a match. + - 'model' (str): The chosen face recognition model. + - 'similarity_metric' (str): The chosen similarity metric for measuring distances. + - 'facial_areas' (dict): Rectangular regions of interest for faces in both images. - 'img1': {'x': int, 'y': int, 'w': int, 'h': int} Region of interest for the first image. - 'img2': {'x': int, 'y': int, 'w': int, 'h': int} Region of interest for the second image. + - 'time' (float): Time taken for the verification process in seconds. """ @@ -122,37 +137,51 @@ def analyze( img_path (str or np.ndarray): The exact path to the image, a numpy array in BGR format, or a base64 encoded image. If the source image contains multiple faces, the result will include information for each detected face. + actions (tuple): Attributes to analyze. The default is ('age', 'gender', 'emotion', 'race'). You can exclude some of these attributes from the analysis if needed. + enforce_detection (boolean): If no face is detected in an image, raise an exception. Set to False to avoid the exception for low-resolution images (default is True). + detector_backend (string): face detector backend. Options: 'opencv', 'retinaface', 'mtcnn', 'ssd', 'dlib', 'mediapipe', 'yolov8' (default is opencv). + distance_metric (string): Metric for measuring similarity. Options: 'cosine', 'euclidean', 'euclidean_l2' (default is cosine). + align (boolean): Perform alignment based on the eye positions (default is True). + silent (boolean): Suppress or allow some log messages for a quieter analysis process (default is False). + Returns: results (List[Dict[str, Any]]): A list of dictionaries, where each dictionary represents the analysis results for a detected face. Each dictionary in the list contains the following keys: + - 'region' (dict): Represents the rectangular region of the detected face in the image. - 'x': x-coordinate of the top-left corner of the face. - 'y': y-coordinate of the top-left corner of the face. - 'w': Width of the detected face region. - 'h': Height of the detected face region. + - 'age' (float): Estimated age of the detected face. + - 'face_confidence' (float): Confidence score for the detected face. Indicates the reliability of the face detection. + - 'dominant_gender' (str): The dominant gender in the detected face. - Either "Man" or "Woman." + Either "Man" or "Woman". + - 'gender' (dict): Confidence scores for each gender category. - 'Man': Confidence score for the male gender. - 'Woman': Confidence score for the female gender. + - 'dominant_emotion' (str): The dominant emotion in the detected face. Possible values include "sad," "angry," "surprise," "fear," "happy," - "disgust," and "neutral." + "disgust," and "neutral" + - 'emotion' (dict): Confidence scores for each emotion category. - 'sad': Confidence score for sadness. - 'angry': Confidence score for anger. @@ -161,9 +190,11 @@ def analyze( - 'happy': Confidence score for happiness. - 'disgust': Confidence score for disgust. - 'neutral': Confidence score for neutrality. + - 'dominant_race' (str): The dominant race in the detected face. Possible values include "indian," "asian," "latino hispanic," "black," "middle eastern," and "white." + - 'race' (dict): Confidence scores for each race category. - 'indian': Confidence score for Indian ethnicity. - 'asian': Confidence score for Asian ethnicity. @@ -190,6 +221,7 @@ def find( enforce_detection: bool = True, detector_backend: str = "opencv", align: bool = True, + threshold: Optional[float] = None, normalization: str = "base", silent: bool = False, ) -> List[pd.DataFrame]: @@ -199,31 +231,51 @@ def find( img_path (str or np.ndarray): The exact path to the image, a numpy array in BGR format, or a base64 encoded image. If the source image contains multiple faces, the result will include information for each detected face. + db_path (string): Path to the folder containing image files. All detected faces in the database will be considered in the decision-making process. + model_name (str): Model for face recognition. Options: VGG-Face, Facenet, Facenet512, OpenFace, DeepFace, DeepID, Dlib, ArcFace and SFace (default is VGG-Face). + distance_metric (string): Metric for measuring similarity. Options: 'cosine', 'euclidean', 'euclidean_l2' (default is cosine). + enforce_detection (boolean): If no face is detected in an image, raise an exception. Set to False to avoid the exception for low-resolution images (default is True). + detector_backend (string): face detector backend. Options: 'opencv', 'retinaface', 'mtcnn', 'ssd', 'dlib', 'mediapipe', 'yolov8' (default is opencv). + align (boolean): Perform alignment based on the eye positions (default is True). + + threshold (float): Specify a threshold to determine whether a pair represents the same + person or different individuals. This threshold is used for comparing distances. + If left unset, default pre-tuned threshold values will be applied based on the specified + model name and distance metric (default is None). + normalization (string): Normalize the input image before feeding it to the model. Options: base, raw, Facenet, Facenet2018, VGGFace, VGGFace2, ArcFace (default is base). + silent (boolean): Suppress or allow some log messages for a quieter analysis process (default is False). + Returns: results (List[pd.DataFrame]): A list of pandas dataframes. Each dataframe corresponds to the identity information for an individual detected in the source image. The DataFrame columns include: + - 'identity': Identity label of the detected individual. + - 'target_x', 'target_y', 'target_w', 'target_h': Bounding box coordinates of the target face in the database. + - 'source_x', 'source_y', 'source_w', 'source_h': Bounding box coordinates of the detected face in the source image. - - '{model_name}_{distance_metric}': Similarity score between the faces based on the + + - 'threshold': threshold to determine a pair whether same person or different persons + + - 'distance': Similarity score between the faces based on the specified model and distance metric """ return recognition.find( @@ -234,6 +286,7 @@ def find( enforce_detection=enforce_detection, detector_backend=detector_backend, align=align, + threshold=threshold, normalization=normalization, silent=silent, ) @@ -254,27 +307,36 @@ def represent( img_path (str or np.ndarray): The exact path to the image, a numpy array in BGR format, or a base64 encoded image. If the source image contains multiple faces, the result will include information for each detected face. + model_name (str): Model for face recognition. Options: VGG-Face, Facenet, Facenet512, OpenFace, DeepFace, DeepID, Dlib, ArcFace and SFace (default is VGG-Face.). + enforce_detection (boolean): If no face is detected in an image, raise an exception. Default is True. Set to False to avoid the exception for low-resolution images (default is True). + detector_backend (string): face detector backend. Options: 'opencv', 'retinaface', 'mtcnn', 'ssd', 'dlib', 'mediapipe', 'yolov8' (default is opencv). + align (boolean): Perform alignment based on the eye positions (default is True). + normalization (string): Normalize the input image before feeding it to the model. Default is base. Options: base, raw, Facenet, Facenet2018, VGGFace, VGGFace2, ArcFace (default is base). + Returns: results (List[Dict[str, Any]]): A list of dictionaries, each containing the following fields: + - embedding (np.array): Multidimensional vector representing facial features. The number of dimensions varies based on the reference model (e.g., FaceNet returns 128 dimensions, VGG-Face returns 4096 dimensions). + - facial_area (dict): Detected facial area by face detection in dictionary format. Contains 'x' and 'y' as the left-corner point, and 'w' and 'h' as the width and height. If `detector_backend` is set to 'skip', it represents the full image area and is nonsensical. + - face_confidence (float): Confidence score of face detection. If `detector_backend` is set to 'skip', the confidence will be 0 and is nonsensical. """ @@ -355,19 +417,28 @@ def extract_faces( Args: img_path (str or np.ndarray): Path to the first image. Accepts exact image path as a string, numpy array (BGR), or base64 encoded images. + target_size (tuple): final shape of facial image. black pixels will be added to resize the image (default is (224, 224)). + detector_backend (string): face detector backend. Options: 'opencv', 'retinaface', - 'mtcnn', 'ssd', 'dlib', 'mediapipe', 'yolov8' (default is opencv) + 'mtcnn', 'ssd', 'dlib', 'mediapipe', 'yolov8' (default is opencv). + enforce_detection (boolean): If no face is detected in an image, raise an exception. Set to False to avoid the exception for low-resolution images (default is True). + align (bool): Flag to enable face alignment (default is True). + grayscale (boolean): Flag to convert the image to grayscale before processing (default is False). + Returns: results (List[Dict[str, Any]]): A list of dictionaries, where each dictionary contains: + - "face" (np.ndarray): The detected face as a NumPy array. + - "facial_area" (List[float]): The detected face's regions represented as a list of floats. + - "confidence" (float): The confidence score associated with the detected face. """ diff --git a/deepface/detectors/Ssd.py b/deepface/detectors/Ssd.py index 50bc4cd2..7793e5f0 100644 --- a/deepface/detectors/Ssd.py +++ b/deepface/detectors/Ssd.py @@ -12,7 +12,7 @@ logger = Logger(module="detectors.SsdWrapper") -# pylint: disable=line-too-long +# pylint: disable=line-too-long, c-extension-no-member class SsdClient(Detector): diff --git a/deepface/detectors/YuNet.py b/deepface/detectors/YuNet.py index 2f7e9732..d4970a7b 100644 --- a/deepface/detectors/YuNet.py +++ b/deepface/detectors/YuNet.py @@ -21,6 +21,13 @@ def build_model(self) -> Any: Returns: model (Any) """ + + opencv_version = cv2.__version__.split(".") + + if len(opencv_version) > 2 and int(opencv_version[0]) == 4 and int(opencv_version[1]) < 8: + # min requirement: https://github.com/opencv/opencv_zoo/issues/172 + raise ValueError(f"YuNet requires opencv-python >= 4.8 but you have {cv2.__version__}") + # pylint: disable=C0301 url = "https://github.com/opencv/opencv_zoo/raw/main/models/face_detection_yunet/face_detection_yunet_2023mar.onnx" file_name = "face_detection_yunet_2023mar.onnx" @@ -67,7 +74,7 @@ def detect_faces( """ # FaceDetector.detect_faces does not support score_threshold parameter. # We can set it via environment variable. - score_threshold = os.environ.get("yunet_score_threshold", "0.9") + score_threshold = float(os.environ.get("yunet_score_threshold", "0.9")) resp = [] detected_face = None img_region = [0, 0, img.shape[1], img.shape[0]] diff --git a/deepface/modules/recognition.py b/deepface/modules/recognition.py index 60a337f8..fe9e8143 100644 --- a/deepface/modules/recognition.py +++ b/deepface/modules/recognition.py @@ -1,7 +1,7 @@ # built-in dependencies import os import pickle -from typing import List, Union +from typing import List, Union, Optional import time # 3rd party dependencies @@ -25,6 +25,7 @@ def find( enforce_detection: bool = True, detector_backend: str = "opencv", align: bool = True, + threshold: Optional[float] = None, normalization: str = "base", silent: bool = False, ) -> List[pd.DataFrame]: @@ -53,6 +54,11 @@ def find( align (boolean): Perform alignment based on the eye positions. + threshold (float): Specify a threshold to determine whether a pair represents the same + person or different individuals. This threshold is used for comparing distances. + If left unset, default pre-tuned threshold values will be applied based on the specified + model name and distance metric (default is None). + normalization (string): Normalize the input image before feeding it to the model. Default is base. Options: base, raw, Facenet, Facenet2018, VGGFace, VGGFace2, ArcFace @@ -64,11 +70,16 @@ def find( The DataFrame columns include: - 'identity': Identity label of the detected individual. + - 'target_x', 'target_y', 'target_w', 'target_h': Bounding box coordinates of the target face in the database. + - 'source_x', 'source_y', 'source_w', 'source_h': Bounding box coordinates of the detected face in the source image. - - '{model_name}_{distance_metric}': Similarity score between the faces based on the + + - 'threshold': threshold to determine a pair whether same person or different persons + + - 'distance': Similarity score between the faces based on the specified model and distance metric """ @@ -248,16 +259,15 @@ def find( distances.append(distance) # --------------------------- + target_threshold = threshold or dst.findThreshold(model_name, distance_metric) - result_df[f"{model_name}_{distance_metric}"] = distances + result_df["threshold"] = target_threshold + result_df["distance"] = distances - threshold = dst.findThreshold(model_name, distance_metric) result_df = result_df.drop(columns=[f"{model_name}_representation"]) # pylint: disable=unsubscriptable-object - result_df = result_df[result_df[f"{model_name}_{distance_metric}"] <= threshold] - result_df = result_df.sort_values( - by=[f"{model_name}_{distance_metric}"], ascending=True - ).reset_index(drop=True) + result_df = result_df[result_df["distance"] <= target_threshold] + result_df = result_df.sort_values(by=["distance"], ascending=True).reset_index(drop=True) resp_obj.append(result_df) diff --git a/setup.py b/setup.py index b34c34b8..81c803bf 100644 --- a/setup.py +++ b/setup.py @@ -8,7 +8,7 @@ setuptools.setup( name="deepface", - version="0.0.82", + version="0.0.83", author="Sefik Ilkin Serengil", author_email="serengil@gmail.com", description="A Lightweight Face Recognition and Facial Attribute Analysis Framework (Age, Gender, Emotion, Race) for Python", diff --git a/tests/test_find.py b/tests/test_find.py index 4a1007e5..8d9e34f9 100644 --- a/tests/test_find.py +++ b/tests/test_find.py @@ -21,7 +21,7 @@ def test_find_with_exact_path(): assert identity_df.shape[0] > 0 # validate reproducability - assert identity_df["VGG-Face_cosine"].values[0] < threshold + assert identity_df["distance"].values[0] < threshold df = df[df["identity"] != img_path] logger.debug(df.head()) @@ -42,7 +42,7 @@ def test_find_with_array_input(): assert identity_df.shape[0] > 0 # validate reproducability - assert identity_df["VGG-Face_cosine"].values[0] < threshold + assert identity_df["distance"].values[0] < threshold df = df[df["identity"] != img_path] logger.debug(df.head()) @@ -65,7 +65,7 @@ def test_find_with_extracted_faces(): assert identity_df.shape[0] > 0 # validate reproducability - assert identity_df["VGG-Face_cosine"].values[0] < threshold + assert identity_df["distance"].values[0] < threshold df = df[df["identity"] != img_path] logger.debug(df.head()) diff --git a/tests/visual-test.py b/tests/visual-test.py index 06f17117..9188412e 100644 --- a/tests/visual-test.py +++ b/tests/visual-test.py @@ -20,7 +20,7 @@ "SFace", ] -detector_backends = ["opencv", "ssd", "dlib", "mtcnn", "retinaface"] +detector_backends = ["opencv", "ssd", "dlib", "mtcnn", "retinaface", "yunet"] # verification