renderer.py improve accuracy and support depth precision and mapping …

…flags By more carefully inverting the operations performed when OpenGL transforms metric distance to window coordinates, more accurate depth values can be estimated from the depth buffer
aftersomemath · Jun 22, 2023 · 66055ca · 66055ca
1 parent c71a42c
commit 66055ca
Showing 1 changed file with 38 additions and 7 deletions.
diff --git a/python/mujoco/renderer.py b/python/mujoco/renderer.py
@@ -32,7 +32,9 @@ def __init__(
       model: _structs.MjModel,
       height: int = 240,
       width: int = 320,
-      max_geom: int = 10000
+      max_geom: int = 10000,
+      depth_mapping: _enums.mjtDepthMapping =  _enums.mjtDepthMapping.mjDB_NEGONETOONE,
+      depth_precision: _enums.mjtDepthPrecision =  _enums.mjtDepthPrecision.mjDB_INT24,
   ) -> None:
     """Initializes a new `Renderer`.
 
@@ -43,6 +45,8 @@ def __init__(
       max_geom: Optional integer specifying the maximum number of geoms that can
         be rendered in the same scene. If None this will be chosen automatically
         based on the estimated maximum number of renderable geoms in the model.
+      depth_mapping: Type of mapping from znear to zfar to z buffer ndc values
+      depth_precision: Precision of z buffer
     Raises:
       ValueError: If `camera_id` is outside the valid range, or if `width` or
         `height` exceed the dimensions of MuJoCo's offscreen framebuffer.
@@ -70,6 +74,7 @@ def __init__(
     self._width = width
     self._height = height
     self._model = model
+    self._depth_mapping = depth_mapping
 
     self._scene = _structs.MjvScene(model=model, maxgeom=max_geom)
     self._scene_option = _structs.MjvOption()
@@ -80,7 +85,7 @@ def __init__(
     self._gl_context = gl_context.GLContext(width, height)
     self._gl_context.make_current()
     self._mjr_context = _render.MjrContext(
-        model, _enums.mjtFontScale.mjFONTSCALE_150
+        model, _enums.mjtFontScale.mjFONTSCALE_150, depth_mapping, depth_precision
     )
     _render.mjr_setBuffer(
         _enums.mjtFramebuffer.mjFB_OFFSCREEN, self._mjr_context
@@ -137,7 +142,8 @@ def render(self, *, out: Optional[np.ndarray] = None) -> np.ndarray:
     """
     original_flags = self._scene.flags.copy()
 
-    if self._segmentation_rendering:
+    # Using segmented rendering for depth makes the calculated depth more accurate at far distances
+    if self._depth_rendering or self._segmentation_rendering:
       self._scene.flags[_enums.mjtRndFlag.mjRND_SEGMENT] = True
       self._scene.flags[_enums.mjtRndFlag.mjRND_IDCOLOR] = True
 
@@ -172,11 +178,36 @@ def render(self, *, out: Optional[np.ndarray] = None) -> np.ndarray:
       near = self._model.vis.map.znear * extent
       far = self._model.vis.map.zfar * extent
 
-      # Convert from [0 1] to depth in units of length, see links below:
-      # http://stackoverflow.com/a/6657284/1461210
-      # https://www.khronos.org/opengl/wiki/Depth_Buffer_Precision
-      out = near / (1 - out * (1 - near / far))
+      # Calculate OpenGL perspective matrix values in float32 precision
+      # so they are close to what glFrustum returns
+      # https://registry.khronos.org/OpenGL-Refpages/gl2.1/xhtml/glFrustum.xml
+      zfar  = np.float32(far)
+      znear = np.float32(near)
+      C = -(zfar + znear)/(zfar - znear)
+      D = -(np.float32(2)*zfar*znear)/(zfar - znear)
 
+      # In reverse Z mode the perspective matrix is transformed by the following
+      if self._depth_mapping == _enums.mjtDepthMapping.mjDB_ONETOZERO:
+        C = np.float32(-0.5)*C - np.float32(0.5)
+        D = np.float32(-0.5)*D
+
+      # We need 64 bits to convert Z from ndc to metric depth without noticeable losses in precision
+      out_64 = out.astype(np.float64)
+
+      # Convert depth from window coordinates to normalized device coordinates
+      # In reversed Z mode the mapping is identity
+      # https://registry.khronos.org/OpenGL-Refpages/gl4/html/glDepthRange.xhtml
+      if self._depth_mapping == _enums.mjtDepthMapping.mjDB_NEGONETOONE:
+        out_64 = 2.0*out_64 - 1.0
+
+      # Undo OpenGL projection
+      out_64 = D / (out_64 + C)
+
+      # Cast result back to float32 for backwards compatibility
+      out[:] = out_64.astype(np.float32)
+
+      # Reset scene flags.
+      np.copyto(self._scene.flags, original_flags)
     elif self._segmentation_rendering:
       _render.mjr_readPixels(out, None, self._rect, self._mjr_context)