Integrated Buffer with heuristic camera transformation estimation

Improved heuristic transformation estimation algorithms Enhanced `demo_sequential` procedure with depth and reprojection visualizations
child-lab-uj · Nov 21, 2024 · 3c2f33b · 3c2f33b
1 parent 5b8c2ee
commit 3c2f33b
Show file tree

Hide file tree

Showing 10 changed files with 422 additions and 232 deletions.
diff --git a/child_lab_framework/_procedure/demo_sequential.py b/child_lab_framework/_procedure/demo_sequential.py
@@ -4,18 +4,21 @@
 
 import torch
 
+from ..core import transformation
 from ..core.video import Format, Input, Reader, Writer
 from ..logging import Logger
 from ..task import depth, face, gaze, pose
-from ..task.camera import transformation
+from ..task.camera.transformation import heuristic as heuristic_transformation
 from ..task.visualization import Configuration as VisualizationConfiguration
 from ..task.visualization import Visualizer
 
 BATCH_SIZE = 32
 
 
 def main(
-    inputs: tuple[Input, Input, Input], device: torch.device, output_directory: Path
+    inputs: tuple[Input, Input, Input],
+    device: torch.device,
+    output_directory: Path,
 ) -> None:
     # ignore exceeded allocation limit on MPS and CUDA - very important!
     os.environ['PYTORCH_MPS_HIGH_WATERMARK_RATIO'] = '0.0'
@@ -38,6 +41,7 @@ def main(
         width=ceiling_properties.width,
         fps=ceiling_properties.fps,
     )
+    window_left_properties = window_left_reader.properties
 
     window_right_reader = Reader(
         window_right,
@@ -46,83 +50,119 @@ def main(
         width=ceiling_properties.width,
         fps=ceiling_properties.fps,
     )
+    window_right_properties = window_right_reader.properties
 
-    depth_estimator = depth.Estimator(executor, device, input=ceiling_reader.properties)
+    depth_estimator = depth.Estimator(executor, device, input=ceiling_properties)
 
-    transformation_estimator = transformation.heuristic.Estimator(
+    transformation_buffer: transformation.Buffer[str] = transformation.Buffer()
+
+    window_left_to_ceiling_transformation_estimator = heuristic_transformation.Estimator(
+        executor,
+        transformation_buffer,
+        window_left_properties,
+        ceiling_properties,
+        keypoint_threshold=0.35,
+    )
+
+    window_right_to_ceiling_transformation_estimator = heuristic_transformation.Estimator(
         executor,
-        window_left_reader.properties,
-        ceiling_reader.properties,
+        transformation_buffer,
+        window_right_properties,
+        ceiling_properties,
         keypoint_threshold=0.35,
     )
 
     pose_estimator = pose.Estimator(
         executor,
         device,
-        input=ceiling_reader.properties,
+        input=ceiling_properties,
         max_detections=2,
         threshold=0.5,
     )
 
     face_estimator = face.Estimator(
         executor,
-        input=ceiling_reader.properties,
+        input=ceiling_properties,
         threshold=0.1,
     )
 
     window_left_gaze_estimator = gaze.Estimator(
         executor,
-        input=window_left_reader.properties,
+        input=window_left_properties,
     )
 
     window_right_gaze_estimator = gaze.Estimator(
         executor,
-        input=window_right_reader.properties,
+        input=window_right_properties,
     )
 
     ceiling_gaze_estimator = gaze.ceiling_projection.Estimator(
         executor,
-        ceiling_reader.properties,
-        window_left_reader.properties,
-        window_right_reader.properties,
+        ceiling_properties,
+        window_left_properties,
+        window_right_properties,
     )
 
     # social_distance_estimator = social_distance.Estimator(executor)
     # social_distance_logger = social_distance.FileLogger('dev/output/distance.csv')
 
     ceiling_visualizer = Visualizer(
         executor,
-        properties=ceiling_reader.properties,
+        properties=ceiling_properties,
         configuration=VisualizationConfiguration(),
     )
 
     window_left_visualizer = Visualizer(
         executor,
-        properties=window_left_reader.properties,
+        properties=window_left_properties,
         configuration=VisualizationConfiguration(),
     )
 
     window_right_visualizer = Visualizer(
         executor,
-        properties=window_right_reader.properties,
+        properties=window_right_properties,
         configuration=VisualizationConfiguration(),
     )
 
     ceiling_writer = Writer(
         output_directory / (ceiling.name + '.mp4'),
-        ceiling_reader.properties,
+        ceiling_properties,
+        output_format=Format.MP4,
+    )
+
+    ceiling_projection_writer = Writer(
+        output_directory / (ceiling.name + '_projections.mp4'),
+        ceiling_properties,
+        output_format=Format.MP4,
+    )
+
+    ceiling_depth_writer = Writer(
+        output_directory / (ceiling.name + '_depth.mp4'),
+        ceiling_properties,
+        output_format=Format.MP4,
+    )
+
+    window_left_depth_writer = Writer(
+        output_directory / (window_left.name + '_depth.mp4'),
+        window_left_properties,
+        output_format=Format.MP4,
+    )
+
+    window_right_depth_writer = Writer(
+        output_directory / (window_right.name + '_depth.mp4'),
+        window_right_properties,
         output_format=Format.MP4,
     )
 
     window_left_writer = Writer(
         output_directory / (window_left.name + '.mp4'),
-        window_left_reader.properties,
+        window_left_properties,
         output_format=Format.MP4,
     )
 
     window_right_writer = Writer(
         output_directory / (window_right.name + '.mp4'),
-        window_right_reader.properties,
+        window_right_properties,
         output_format=Format.MP4,
     )
 
@@ -159,34 +199,54 @@ def main(
             Logger.error('window_right_poses == None')
 
         Logger.info('Estimating depth...')
-        ceiling_depth = depth_estimator.predict(ceiling_frames[0])
+        ceiling_depth = depth_estimator.predict(
+            ceiling_frames[0],
+            ceiling_properties,
+        )
+        window_left_depth = depth_estimator.predict(
+            window_left_frames[0],
+            window_left_properties,
+        )
+        window_right_depth = depth_estimator.predict(
+            window_right_frames[0],
+            window_right_properties,
+        )
+
         ceiling_depths = [ceiling_depth for _ in range(n_frames)]
+        window_left_depths = [window_left_depth for _ in range(n_frames)]
+        window_right_depths = [window_right_depth for _ in range(n_frames)]
         Logger.info('Done!')
 
         Logger.info('Estimating transformations...')
         window_left_to_ceiling = (
-            transformation_estimator.predict_batch(
+            window_left_to_ceiling_transformation_estimator.predict_batch(
                 ceiling_poses,
                 window_left_poses,
                 ceiling_depths,
-                [None for _ in range(n_frames)],  # type: ignore  # safe to pass
+                window_left_depths,
             )
             if ceiling_poses is not None and window_left_poses is not None
             else None
         )
 
         window_right_to_ceiling = (
-            transformation_estimator.predict_batch(
+            window_right_to_ceiling_transformation_estimator.predict_batch(
                 ceiling_poses,
                 window_right_poses,
                 ceiling_depths,
-                [None for _ in range(n_frames)],  # type: ignore  # safe to pass
+                window_right_depths,
             )
             if ceiling_poses is not None and window_right_poses is not None
             else None
         )
         Logger.info('Done!')
 
+        if window_left_to_ceiling is None:
+            Logger.error('window_left_to_ceiling == None')
+
+        if window_right_to_ceiling is None:
+            Logger.error('window_right_to_ceiling == None')
+
         Logger.info('Detecting faces...')
         window_left_faces = (
             face_estimator.predict_batch(window_left_frames, window_left_poses)
@@ -237,7 +297,29 @@ def main(
         )
         Logger.info('Done!')
 
+        if window_left_gazes is None:
+            Logger.error('window_left_gazes == None')
+
+        if window_right_gazes is None:
+            Logger.error('window_right_gazes == None')
+
         Logger.info('Visualizing results...')
+        ceiling_projection_annotated_frames = ceiling_visualizer.annotate_batch(
+            ceiling_frames,
+            [
+                p.unproject(window_left_properties.calibration, ceiling_depth)
+                .transform(t.inverse)
+                .project(ceiling_properties.calibration)
+                for p, t in zip(window_left_poses or [], window_left_to_ceiling or [])
+            ],
+            [
+                p.unproject(window_right_properties.calibration, ceiling_depth)
+                .transform(t.inverse)
+                .project(ceiling_properties.calibration)
+                for p, t in zip(window_right_poses or [], window_right_to_ceiling or [])
+            ],
+        )
+
         ceiling_annotated_frames = ceiling_visualizer.annotate_batch(
             ceiling_frames,
             ceiling_poses,
@@ -260,6 +342,16 @@ def main(
         Logger.info('Done!')
 
         Logger.info('Saving results...')
+        ceiling_projection_writer.write_batch(ceiling_projection_annotated_frames)
+
+        ceiling_depth_writer.write_batch([depth.to_frame(d) for d in ceiling_depths])
+        window_left_depth_writer.write_batch(
+            [depth.to_frame(d) for d in window_left_depths]
+        )
+        window_right_depth_writer.write_batch(
+            [depth.to_frame(d) for d in window_right_depths]
+        )
+
         ceiling_writer.write_batch(ceiling_annotated_frames)
         window_left_writer.write_batch(window_left_annotated_frames)
         window_right_writer.write_batch(window_right_annotated_frames)

diff --git a/child_lab_framework/core/algebra.py b/child_lab_framework/core/algebra.py
@@ -1,6 +1,8 @@
 from enum import IntEnum
+from typing import Literal
 
 import numpy as np
+from scipy.spatial.transform import Rotation
 
 from ..typing.array import FloatArray1, FloatArray2, FloatArray3, FloatArray6
 from .calibration import Calibration
@@ -31,6 +33,20 @@ def rotation_matrix(angle: float, axis: Axis) -> FloatArray2:
             )
 
 
+def euler_angles_from_rotation_matrix(
+    rotation: FloatArray2,
+) -> np.ndarray[tuple[Literal[3]], np.dtype[np.float32]]:
+    return (
+        Rotation.from_matrix(rotation).as_euler('xyz', degrees=False).astype(np.float32)
+    )
+
+
+def rotation_matrix_from_euler_angles(
+    angles: np.ndarray[tuple[Literal[3]], np.dtype[np.float32]],
+) -> FloatArray2:
+    return Rotation.from_euler('xyz', angles, degrees=False).as_matrix()
+
+
 def normalized(vecs: FloatArray2) -> FloatArray2:
     norm = np.linalg.norm(vecs, ord=2.0, axis=1)
     return vecs / norm

diff --git a/child_lab_framework/core/video.py b/child_lab_framework/core/video.py
@@ -19,6 +19,7 @@ class Format(Enum):
 
 @dataclass(frozen=True, repr=False)
 class Properties:
+    name: str
     length: int
     height: int
     width: int
@@ -93,6 +94,7 @@ def __init__(
         )
 
         self.__input_properties = Properties(
+            input.name,
             input_length,
             input_height,
             input_width,
@@ -102,6 +104,7 @@ def __init__(
 
         # Output properties with maybe mimicked parameters
         self.properties = Properties(
+            input.name,
             input_length * self.__frame_repetitions,
             mimicked_height,
             mimicked_width,