Merge pull request #10 from jhj0517/fix/no-face

Handle the case when the video contains frames with no face
jhj0517 · Jan 14, 2025 · 5051132 · 5051132
2 parents 628fb9f + f5e63ec
commit 5051132
Show file tree

Hide file tree

Showing 2 changed files with 30 additions and 32 deletions.
diff --git a/modules/inferencer/moondream_inferencer.py b/modules/inferencer/moondream_inferencer.py
@@ -64,41 +64,39 @@ def process_image(
             faces = self.model.detect(enc_image, "face")["objects"]
             faces.sort(key=lambda x: (x["x_min"], x["y_min"]))
 
-            if not faces:
-                return None, "No faces detected in the image."
-
             face_boxes = []
             gaze_points = []
 
-            for face in faces:
-                # Add face bounding box regardless of gaze detection
-                face_box = (
-                    face["x_min"] * pil_image.width,
-                    face["y_min"] * pil_image.height,
-                    (face["x_max"] - face["x_min"]) * pil_image.width,
-                    (face["y_max"] - face["y_min"]) * pil_image.height,
-                )
-                face_center = (
-                    (face["x_min"] + face["x_max"]) / 2,
-                    (face["y_min"] + face["y_max"]) / 2
-                )
-                face_boxes.append(face_box)
-
-                # Try to detect gaze
-                gaze_settings = {
-                    "prioritize_accuracy": use_ensemble,
-                    "flip_enc_img": flip_enc_image
-                }
-                gaze = self.model.detect_gaze(enc_image, face=face, eye=face_center, unstable_settings=gaze_settings)["gaze"]
-
-                if gaze is not None:
-                    gaze_point = (
-                        gaze["x"] * pil_image.width,
-                        gaze["y"] * pil_image.height,
+            if faces:
+                for face in faces:
+                    # Add face bounding box regardless of gaze detection
+                    face_box = (
+                        face["x_min"] * pil_image.width,
+                        face["y_min"] * pil_image.height,
+                        (face["x_max"] - face["x_min"]) * pil_image.width,
+                        (face["y_max"] - face["y_min"]) * pil_image.height,
+                    )
+                    face_center = (
+                        (face["x_min"] + face["x_max"]) / 2,
+                        (face["y_min"] + face["y_max"]) / 2
                     )
-                    gaze_points.append(gaze_point)
-                else:
-                    gaze_points.append(None)
+                    face_boxes.append(face_box)
+
+                    # Try to detect gaze
+                    gaze_settings = {
+                        "prioritize_accuracy": use_ensemble,
+                        "flip_enc_img": flip_enc_image
+                    }
+                    gaze = self.model.detect_gaze(enc_image, face=face, eye=face_center, unstable_settings=gaze_settings)["gaze"]
+
+                    if gaze is not None:
+                        gaze_point = (
+                            gaze["x"] * pil_image.width,
+                            gaze["y"] * pil_image.height,
+                        )
+                        gaze_points.append(gaze_point)
+                    else:
+                        gaze_points.append(None)
 
             # Create visualization
             image_array = np.array(pil_image)

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,7 +1,7 @@
 [project]
 name = "comfyui-moondream-gaze-detection"
 description = "Moondream's gaze detection feature wrapper node."
-version = "1.0.3"
+version = "1.0.4"
 license = {file = "LICENSE"}
 dependencies = ["matplotlib==3.10.0", "pyvips==2.2.3", "accelerate>=0.32.1", "opencv-python"]