ami-iit · diegoferigo · Jul 15, 2024 · Jul 12, 2024 · Jul 12, 2024 · Jul 12, 2024
@@ -5,7 +5,7 @@ dependencies:
   # ===========================
   # Dependencies from setup.cfg
   # ===========================
-  - python=3.11
+  - python >= 3.12.0
   - coloredlogs
   - jax >= 0.4.13
   - jaxlib >= 0.4.13
@@ -30,6 +30,7 @@ dependencies:
   - lxml
   - mediapy
   - mujoco >= 3.0.0
+  - scipy >= 1.14.0
   # ==========================
   # Documentation dependencies
   # ==========================

@@ -69,6 +69,7 @@ viz = [
     "lxml",
     "mediapy",
     "mujoco >= 3.0.0",
+    "scipy >= 1.14.0",
 ]
 all = [
     "jaxsim[style,testing,viz]",

@@ -1,12 +1,17 @@
+from __future__ import annotations
+
 import dataclasses
 import pathlib
 import tempfile
 import warnings
-from typing import Any
+from typing import Any, Sequence
 
 import mujoco as mj
+import numpy as np
+import numpy.typing as npt
 import rod.urdf.exporter
 from lxml import etree as ET
+from scipy.spatial.transform import Rotation
 
 
 def load_rod_model(
@@ -161,7 +166,12 @@ def convert(
         plane_normal: tuple[float, float, float] = (0, 0, 1),
         heightmap: bool | None = None,
         heightmap_samples_xy: tuple[int, int] = (101, 101),
-        cameras: list[dict[str, str]] | dict[str, str] | None = None,
+        cameras: (
+            MujocoCamera
+            | Sequence[MujocoCamera]
+            | dict[str, str]
+            | Sequence[dict[str, str]]
+        ) = (),
     ) -> tuple[str, dict[str, Any]]:
         """
         Converts a ROD model to a Mujoco MJCF string.
@@ -172,10 +182,10 @@ def convert(
             plane_normal: The normal vector of the plane.
             heightmap: Whether to generate a heightmap.
             heightmap_samples_xy: The number of points in the heightmap grid.
-            cameras: The list of cameras to add to the scene.
+            cameras: The custom cameras to add to the scene.
 
         Returns:
-            tuple: A tuple containing the MJCF string and the dictionary of assets.
+            A tuple containing the MJCF string and the dictionary of assets.
         """
 
         # -------------------------------------
@@ -250,7 +260,6 @@ def convert(
 
         parser = ET.XMLParser(remove_blank_text=True)
         root: ET._Element = ET.fromstring(text=urdf_string.encode(), parser=parser)
-        import numpy as np
 
         # Give a tiny radius to all dummy spheres
         for geometry in root.findall(".//visual/geometry[sphere]"):
@@ -478,14 +487,17 @@ def convert(
             fovy="60",
         )
 
-        # Add user-defined camera
-        cameras = cameras if cameras is not None else []
-        for camera in cameras if isinstance(cameras, list) else [cameras]:
-            mj_camera = MujocoCamera.build(**camera)
-            _ = ET.SubElement(
-                worldbody_element, "camera", dataclasses.asdict(mj_camera)
+        # Add user-defined camera.
+        for camera in cameras if isinstance(cameras, Sequence) else [cameras]:
+
+            mj_camera = (
+                camera
+                if isinstance(camera, MujocoCamera)
+                else MujocoCamera.build(**camera)
             )
 
+            _ = ET.SubElement(worldbody_element, "camera", mj_camera.asdict())
+
         # ------------------------------------------------
         # Add a light following the  CoM of the first link
         # ------------------------------------------------
@@ -598,21 +610,114 @@ def convert(
 
 @dataclasses.dataclass
 class MujocoCamera:
-    name: str
-    mode: str
-    pos: str
-    xyaxes: str
-    fovy: str
+    """
+    Helper class storing parameters of a Mujoco camera.
+
+    Refer to the official documentation for more details:
+    https://mujoco.readthedocs.io/en/stable/XMLreference.html#body-camera
+    """
+
+    mode: str = "fixed"
+
+    target: str | None = None
+    fovy: str = "45"
+    pos: str = "0 0 0"
+
+    quat: str | None = None
+    axisangle: str | None = None
+    xyaxes: str | None = None
+    zaxis: str | None = None
+    euler: str | None = None
+
+    name: str | None = None
 
     @classmethod
-    def build(cls, **kwargs):
+    def build(cls, **kwargs) -> MujocoCamera:
+
         if not all(isinstance(value, str) for value in kwargs.values()):
             raise ValueError("Values must be strings")
 
-        if len(kwargs["pos"].split()) != 3:
-            raise ValueError("pos must have three values separated by space")
+        return cls(**kwargs)
 
-        if len(kwargs["xyaxes"].split()) != 6:
-            raise ValueError("xyaxes must have six values separated by space")
+    @staticmethod
+    def build_from_target_view(
+        camera_name: str,
+        lookat: Sequence[float | int] | npt.NDArray = (0, 0, 0),
+        distance: float | int | npt.NDArray = 3,
+        azimut: float | int | npt.NDArray = 90,
+        elevation: float | int | npt.NDArray = -45,
+        fovy: float | int | npt.NDArray = 45,
+        degrees: bool = True,
+        **kwargs,
+    ) -> MujocoCamera:
+        """
+        Create a custom camera that looks at a target point.
 
-        return cls(**kwargs)
+        Note:
+            The choice of the parameters is easier if we imagine to consider a target
+            frame `T` whose origin is located over the lookat point and having the same
+            orientation of the world frame `W`. We also introduce a camera frame `C`
+            whose origin is located over the lower-left corner of the image, and having
+            the x-axis pointing right and the y-axis pointing up in image coordinates.
+            The camera renders what it sees in the -z direction of frame `C`.
+
+        Args:
+            camera_name: The name of the camera.
+            lookat: The target point to look at (origin of `T`).
+            distance:
+                The distance from the target point (displacement between the origins
+                of `T` and `C`).
+            azimut:
+                The rotation around z of the camera. With an angle of 0, the camera
+                would loot at the target point towards the positive x-axis of `T`.
+            elevation:
+                The rotation around the x-axis of the camera frame `C`. Note that if
+                you want to lift the view angle, the elevation is negative.
+            fovy: The field of view of the camera.
+            degrees: Whether the angles are in degrees or radians.
+            **kwargs: Additional camera parameters.
+
+        Returns:
+            The custom camera.
+        """
+
+        # Start from a frame whose origin is located over the lookat point.
+        # We initialize a -90 degrees rotation around the z-axis because due to
+        # the default camera coordinate system (x pointing right, y pointing up).
+        W_H_C = np.eye(4)
+        W_H_C[0:3, 3] = np.array(lookat)
+        W_H_C[0:3, 0:3] = Rotation.from_euler(
+            seq="ZX", angles=[-90, 90], degrees=True
+        ).as_matrix()
+
+        # Process the azimut.
+        R_az = Rotation.from_euler(seq="Y", angles=azimut, degrees=degrees).as_matrix()
+        W_H_C[0:3, 0:3] = W_H_C[0:3, 0:3] @ R_az
+
+        # Process elevation.
+        R_el = Rotation.from_euler(
+            seq="X", angles=elevation, degrees=degrees
+        ).as_matrix()
+        W_H_C[0:3, 0:3] = W_H_C[0:3, 0:3] @ R_el
+
+        # Process distance.
+        tf_distance = np.eye(4)
+        tf_distance[2, 3] = distance
+        W_H_C = W_H_C @ tf_distance
+
+        # Extract the position and the quaternion.
+        p = W_H_C[0:3, 3]
+        Q = Rotation.from_matrix(W_H_C[0:3, 0:3]).as_quat(scalar_first=True)
+
+        return MujocoCamera.build(
+            name=camera_name,
+            mode="fixed",
+            fovy=f"{fovy if degrees else np.rad2deg(fovy)}",
+            pos=" ".join(p.astype(str).tolist()),
+            quat=" ".join(Q.astype(str).tolist()),
+            **kwargs,
+        )
+
+    def asdict(self) -> dict[str, str]:
+
+        return {k: v for k, v in dataclasses.asdict(self).items() if v is not None}
@@ -1,10 +1,11 @@
 import contextlib
 import pathlib
-from typing import ContextManager
+from typing import ContextManager, Sequence
 
 import mediapy as media
 import mujoco as mj
 import mujoco.viewer
+import numpy as np
 import numpy.typing as npt
 
 
@@ -62,18 +63,16 @@ def reset(
         self.data = data if data is not None else self.data
         self.model = model if model is not None else self.model
 
-    def render_frame(self, camera_name: str | None = None) -> npt.NDArray:
+    def render_frame(self, camera_name: str = "track") -> npt.NDArray:
         """Renders a frame."""
-        camera_name = camera_name or "track"
 
         mujoco.mj_forward(self.model, self.data)
         self.renderer.update_scene(data=self.data, camera=camera_name)
 
         return self.renderer.render()
 
-    def record_frame(self, camera_name: str | None = None) -> None:
+    def record_frame(self, camera_name: str = "track") -> None:
         """Stores a frame in the buffer."""
-        camera_name = camera_name or "track"
 
         frame = self.render_frame(camera_name=camera_name)
         self.frames.append(frame)
@@ -167,13 +166,72 @@ def open(
         self,
         model: mj.MjModel | None = None,
         data: mj.MjData | None = None,
+        *,
         close_on_exit: bool = True,
+        lookat: Sequence[float | int] | npt.NDArray | None = None,
+        distance: float | int | npt.NDArray | None = None,
+        azimut: float | int | npt.NDArray | None = None,
+        elevation: float | int | npt.NDArray | None = None,
     ) -> ContextManager[mujoco.viewer.Handle]:
-        """Context manager to open a viewer."""
+        """
+        Context manager to open the Mujoco passive viewer.
+
+        Note:
+            Refer to the Mujoco documentation for details of the camera options:
+            https://mujoco.readthedocs.io/en/stable/XMLreference.html#visual-global
+        """
 
         handle = self.open_viewer(model=model, data=data)
 
+        handle = MujocoVisualizer.setup_viewer_camera(
+            viewer=handle,
+            lookat=lookat,
+            distance=distance,
+            azimut=azimut,
+            elevation=elevation,
+        )
+
         try:
             yield handle
         finally:
             _ = handle.close() if close_on_exit else None
+
+    @staticmethod
+    def setup_viewer_camera(
+        viewer: mj.viewer.Handle,
+        *,
+        lookat: Sequence[float | int] | npt.NDArray | None,
+        distance: float | int | npt.NDArray | None = None,
+        azimut: float | int | npt.NDArray | None = None,
+        elevation: float | int | npt.NDArray | None = None,
+    ) -> mj.viewer.Handle:
+        """
+        Configure the initial viewpoint of the Mujoco passive viewer.
+
+        Note:
+            Refer to the Mujoco documentation for details of the camera options:
+            https://mujoco.readthedocs.io/en/stable/XMLreference.html#visual-global
+
+        Returns:
+            The viewer with configured camera.
+        """
+
+        if lookat is not None:
+
+            lookat_array = np.array(lookat, dtype=float).squeeze()
+
+            if lookat_array.size != 3:
+                raise ValueError(lookat)
+
+            viewer.cam.lookat = lookat_array
+
+        if distance is not None:
+            viewer.cam.distance = float(distance)
+
+        if azimut is not None:
+            viewer.cam.azimuth = float(azimut) % 360
+
+        if elevation is not None:
+            viewer.cam.elevation = float(elevation)
+
+        return viewer