facebookresearch · jturner65 · Feb 21, 2023 · Feb 8, 2023 · Feb 8, 2023 · Feb 8, 2023
diff --git a/src/esp/bindings/GfxBindings.cpp b/src/esp/bindings/GfxBindings.cpp
@@ -72,8 +72,8 @@ void initGfxBindings(py::module& m) {
            "height"_a, "znear"_a, "zfar"_a, "scale"_a)
       .def(
           "unproject", &RenderCamera::unproject,
-          R"(Unproject a 2D viewport point to a 3D ray with its origin at the camera position.)",
-          "viewport_point"_a)
+          R"(Unproject a 2D viewport point to a 3D ray with its origin at the camera position. Ray direction is optionally normalized. Non-normalized rays originate at the camera location and terminate at a view plane one unit down the Z axis.)",
+          "viewport_point"_a, "normalized"_a = true)
       .def_property_readonly("node", nodeGetter<RenderCamera>,
                              "Node this object is attached to")
       .def_property_readonly("object", nodeGetter<RenderCamera>,

diff --git a/src/esp/gfx/RenderCamera.cpp b/src/esp/gfx/RenderCamera.cpp
@@ -197,7 +197,8 @@ size_t RenderCamera::filterTransforms(DrawableTransforms& drawableTransforms,
   return drawableTransforms.size();
 }
 
-esp::geo::Ray RenderCamera::unproject(const Mn::Vector2i& viewportPosition) {
+esp::geo::Ray RenderCamera::unproject(const Mn::Vector2i& viewportPosition,
+                                      bool normalized) {
   esp::geo::Ray ray;
   ray.origin = object().absoluteTranslation();
 
@@ -209,11 +210,19 @@ esp::geo::Ray RenderCamera::unproject(const Mn::Vector2i& viewportPosition) {
           Magnum::Vector2{1.0f},
       1.0};
 
+  // compute the far plane distance
+  auto farDistance =
+      projectionMatrix()[3][2] / (1.0f + projectionMatrix()[2][2]);
+
   ray.direction =
-      ((object().absoluteTransformationMatrix() * projectionMatrix().inverted())
+      ((object().absoluteTransformationMatrix() * invertedProjectionMatrix)
            .transformPoint(normalizedPos) -
-       ray.origin)
-          .normalized();
+       ray.origin) /
+      farDistance;
+
+  if (normalized) {
+    ray.direction = ray.direction.normalized();
+  }
   return ray;
 }
 

diff --git a/src/esp/gfx/RenderCamera.h b/src/esp/gfx/RenderCamera.h
@@ -147,6 +147,7 @@ class RenderCamera : public MagnumCamera {
                                     Mn::Matrix4& projMat) {
     MagnumCamera::setProjectionMatrix(projMat).setViewport(
         Magnum::Vector2i(width, height));
+    invertedProjectionMatrix = projectionMatrix().inverted();
     return *this;
   }
 
@@ -223,16 +224,21 @@ class RenderCamera : public MagnumCamera {
    * following rendering pass, otherwise false
    */
   bool useDrawableIds() const { return useDrawableIds_; }
+
   /**
    * @brief Unproject a 2D viewport point to a 3D ray with origin at camera
-   * position.
+   * position. Ray direction is optionally normalized. Non-normalized rays
+   * originate at the camera location and terminate at a view plane one unit
+   * down the Z axis.
    *
    * @param viewportPosition The 2D point on the viewport to unproject
    * ([0,width], [0,height]).
+   * @param normalized If true(default), normalize ray direction.
    * @return a @ref esp::geo::Ray with unit length direction or zero direction
    * if failed.
    */
-  esp::geo::Ray unproject(const Mn::Vector2i& viewportPosition);
+  esp::geo::Ray unproject(const Mn::Vector2i& viewportPosition,
+                          bool normalized = true);
 
   /**
    * @brief Query the cached number of Drawables visible after frustum culling
@@ -243,6 +249,9 @@ class RenderCamera : public MagnumCamera {
   }
 
  protected:
+  //! cached inverted projection matrix to save compute on repeated calls (e.g.
+  //! to unproject) without moving the camera
+  Mn::Matrix4 invertedProjectionMatrix;
   size_t previousNumVisibleDrawables_ = 0;
   bool useDrawableIds_ = false;
   ESP_SMART_POINTERS(RenderCamera)

diff --git a/src_python/habitat_sim/utils/settings.py b/src_python/habitat_sim/utils/settings.py
@@ -16,6 +16,7 @@
     "default_agent": 0,
     "sensor_height": 1.5,
     "hfov": 90,
+    "zfar": 1000.0,
     "color_sensor": True,
     "semantic_sensor": False,
     "depth_sensor": False,
@@ -30,6 +31,7 @@
     "equirect_semantic_sensor": False,
     "seed": 1,
     "physics_config_file": "data/default.physics_config.json",
+    "enable_physics": True,
 }
 # [/default_sim_settings]
 
@@ -79,6 +81,7 @@ def create_camera_spec(**kw_args):
         color_sensor_spec = create_camera_spec(
             uuid="color_sensor",
             hfov=settings["hfov"],
+            far=settings["zfar"],
             sensor_type=habitat_sim.SensorType.COLOR,
             sensor_subtype=habitat_sim.SensorSubType.PINHOLE,
         )
@@ -88,6 +91,7 @@ def create_camera_spec(**kw_args):
         depth_sensor_spec = create_camera_spec(
             uuid="depth_sensor",
             hfov=settings["hfov"],
+            far=settings["zfar"],
             sensor_type=habitat_sim.SensorType.DEPTH,
             channels=1,
             sensor_subtype=habitat_sim.SensorSubType.PINHOLE,
@@ -98,6 +102,7 @@ def create_camera_spec(**kw_args):
         semantic_sensor_spec = create_camera_spec(
             uuid="semantic_sensor",
             hfov=settings["hfov"],
+            far=settings["zfar"],
             sensor_type=habitat_sim.SensorType.SEMANTIC,
             channels=1,
             sensor_subtype=habitat_sim.SensorSubType.PINHOLE,
@@ -107,6 +112,7 @@ def create_camera_spec(**kw_args):
     if settings["ortho_rgba_sensor"]:
         ortho_rgba_sensor_spec = create_camera_spec(
             uuid="ortho_rgba_sensor",
+            far=settings["zfar"],
             sensor_type=habitat_sim.SensorType.COLOR,
             sensor_subtype=habitat_sim.SensorSubType.ORTHOGRAPHIC,
         )
@@ -115,6 +121,7 @@ def create_camera_spec(**kw_args):
     if settings["ortho_depth_sensor"]:
         ortho_depth_sensor_spec = create_camera_spec(
             uuid="ortho_depth_sensor",
+            far=settings["zfar"],
             sensor_type=habitat_sim.SensorType.DEPTH,
             channels=1,
             sensor_subtype=habitat_sim.SensorSubType.ORTHOGRAPHIC,
@@ -124,6 +131,7 @@ def create_camera_spec(**kw_args):
     if settings["ortho_semantic_sensor"]:
         ortho_semantic_sensor_spec = create_camera_spec(
             uuid="ortho_semantic_sensor",
+            far=settings["zfar"],
             sensor_type=habitat_sim.SensorType.SEMANTIC,
             channels=1,
             sensor_subtype=habitat_sim.SensorSubType.ORTHOGRAPHIC,

diff --git a/tests/test_gfx.py b/tests/test_gfx.py
@@ -4,6 +4,8 @@
 # This source code is licensed under the MIT license found in the
 # LICENSE file in the root directory of this source tree.
 
+import random
+import typing
 from os import path as osp
 
 import magnum as mn
@@ -19,13 +21,21 @@
     not osp.exists("data/scene_datasets/habitat-test-scenes/skokloster-castle.glb"),
     reason="Requires the habitat-test-scenes",
 )
-def test_unproject():
+@pytest.mark.skipif(
+    not habitat_sim.built_with_bullet,
+    reason="Bullet physics used for validation.",
+)
+@pytest.mark.parametrize("zfar", [500, 1000, 1500])
+def test_unproject(zfar):
     cfg_settings = habitat_sim.utils.settings.default_sim_settings.copy()
 
     # configure some settings in case defaults change
     cfg_settings["scene"] = "data/scene_datasets/habitat-test-scenes/apartment_1.glb"
-    cfg_settings["width"] = 101
-    cfg_settings["height"] = 101
+    cfg_settings["enable_physics"] = True
+    cfg_settings["depth_sensor"] = True
+    cfg_settings["zfar"] = zfar
+    cfg_settings["width"] = 501
+    cfg_settings["height"] = 501
     cfg_settings["sensor_height"] = 0
     cfg_settings["color_sensor"] = True
 
@@ -37,22 +47,101 @@ def test_unproject():
         sim.agents[0].scene_node.translation = mn.Vector3(0.5, 0, 0)
 
         # setup camera
+        far_plane = sim._sensors["depth_sensor"]._sensor_object.far_plane_dist
+        assert zfar == far_plane
         render_camera = sim._sensors["color_sensor"]._sensor_object.render_camera
+        depth_camera = sim._sensors["depth_sensor"]._sensor_object.render_camera
 
-        # test unproject
+        # test unproject with known values
         center_ray = render_camera.unproject(
-            mn.Vector2i(50, 50)
+            mn.Vector2i(250, 250), normalized=False
         )  # middle of the viewport
+        center_ray_normalized = render_camera.unproject(mn.Vector2i(250, 250))
+        assert np.allclose(
+            center_ray_normalized.direction,
+            center_ray.direction.normalized(),
+            atol=0.07,
+        )
         assert np.allclose(center_ray.origin, np.array([0.5, 0, 0]), atol=0.07)
-        assert np.allclose(center_ray.direction, np.array([0, 0, -1.0]), atol=0.02)
+        assert np.allclose(
+            center_ray_normalized.direction, np.array([0, 0, -1.0]), atol=0.02
+        )
 
+        # NOTE: viewport y==0 is at the top
         test_ray_2 = render_camera.unproject(
-            mn.Vector2i(100, 100)
+            mn.Vector2i(500, 500), normalized=False
         )  # bottom right of the viewport
+        test_ray_2_normalized = render_camera.unproject(mn.Vector2i(500, 500))
+        assert np.allclose(
+            test_ray_2_normalized.direction,
+            test_ray_2.direction.normalized(),
+            atol=0.07,
+        )
         assert np.allclose(
-            test_ray_2.direction, np.array([0.569653, -0.581161, -0.581161]), atol=0.07
+            test_ray_2_normalized.direction,
+            np.array([0.569653, -0.581161, -0.581161]),
+            atol=0.07,
         )
 
+        # add a primitive sphere object to the world
+        obj_template_mgr = sim.get_object_template_manager()
+        rigid_obj_mgr = sim.get_rigid_object_manager()
+        sphere_prim_handle = obj_template_mgr.get_template_handles("uvSphereSolid")[0]
+        sphere_template = obj_template_mgr.get_template_by_handle(sphere_prim_handle)
+        sphere_template.scale = [0.03, 0.03, 0.03]
+        obj_template_mgr.register_template(sphere_template, "scaled_sphere")
+        sphere_prim_handle = obj_template_mgr.get_template_handles("scaled_sphere")[0]
+        sphere_obj = rigid_obj_mgr.add_object_by_template_handle(sphere_prim_handle)
+
+        # validate that random unprojected points scaled by depth camera distance are actually on the render mesh
+        # do this by creating a small collision object at the unprojected point and test it against scene geometry
+        num_samples = 10
+        # move the camera, test a random pixel
+        cur_sample = 0
+        while cur_sample < num_samples:
+            # move agent
+            sim.agents[0].scene_node.translation = np.random.random(3)
+            # rotate agent
+            sim.agents[0].scene_node.rotation = mn.Quaternion.rotation(
+                mn.Rad(random.random() * mn.math.tau), mn.Vector3(0, 1, 0)
+            )
+            # tilt the camera
+            render_camera.node.rotation = mn.Quaternion.rotation(
+                mn.Rad(random.random()), mn.Vector3(1, 0, 0)
+            )
+            depth_camera.node.rotation = render_camera.node.rotation
+
+            # do the unprojection from depth image
+            view_point = mn.Vector2i(
+                random.randint(0, render_camera.viewport[0] - 1),
+                random.randint(0, render_camera.viewport[1] - 1),
+            )
+            # NOTE: use un-normalized rays scaled to unit z distance for this application
+            ray = render_camera.unproject(view_point, normalized=False)
+            depth_obs = typing.cast(
+                np.ndarray, sim.get_sensor_observations()["depth_sensor"]
+            )
+            # NOTE: (height, width) for buffer access
+            depth = depth_obs[view_point[1]][view_point[0]]
+
+            if depth == 0.0:
+                # skip depths of 0 which represent empty/background pixels
+                continue
+
+            # update the collision test object
+            depth_point = ray.origin + ray.direction * depth
+            sphere_obj.translation = depth_point
+
+            # optionally render the frames for debugging
+            # import habitat_sim.utils.viz_utils as vut
+            # c_image = vut.observation_to_image(sim.get_sensor_observations()["color_sensor"], "color")
+            # c_image.show()
+
+            assert (
+                sphere_obj.contact_test()
+            ), "Must be intersecting with scene collision mesh."
+            cur_sample += 1
+
 
 @pytest.mark.parametrize(
     "sensor_type",