Skip to content

Commit

Permalink
--Add Panoptic/Instance ID support to Semantic sensor (#2316)
Browse files Browse the repository at this point in the history
* --add enum to govern type of information the semantic sensor will return
* --refactor eigen::vecX references in sensor subsystem to be magnum vectors
Part of ongoing effort to remove redundant Eigen dependency.
* --properly initialize magnum vectors for sensor spec pos and orientation.
--Need floating point values for list/np.array initialization.
* --fix audio sensor magnum vector access
* --magnum compat vectors
* --add comment to describe helper class
* --set up the list of various semantic ID data in the scene node.
* --set various semantic/panoptic IDs in the scene node
* --set stage ID to 0; (Object counts will start at 1)
* --alias the sensor enum to the scene node enum defining semantic data types
* --build the render camera with the type of semantic id it will access
This field is ignored if semantics are not rendered.
* --modify the drawables to consume the appropriate ID
The render camera knows which semantic data it needs from each scene node, and the scene nodes know all their appropriate data.
* --allow for accessing semantic data vector
* --modify stage ID constant to be 0 instead of -1
* --link ID == -1 means no link, not the stage.
* --a few fixes/reviewer suggestions
  • Loading branch information
jturner65 authored Feb 28, 2024
1 parent d513cc4 commit 415f5ae
Show file tree
Hide file tree
Showing 44 changed files with 359 additions and 157 deletions.
6 changes: 2 additions & 4 deletions examples/motion_viewer.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,6 @@
import time
from typing import Any, Callable, Dict, Optional, Tuple

import numpy as np

flags = sys.getdlopenflags()
sys.setdlopenflags(flags | ctypes.RTLD_GLOBAL)

Expand Down Expand Up @@ -121,8 +119,8 @@ def reconfigure_sim(self) -> None:
self.sim_settings["height"],
self.sim_settings["width"],
]
camera_sensor_spec.position = np.array([0, 0, 0])
camera_sensor_spec.orientation = np.array([0, 0, 0])
camera_sensor_spec.position = mn.Vector3(0.0, 0.0, 0.0)
camera_sensor_spec.orientation = mn.Vector3(0.0, 0.0, 0.0)
camera_sensor_spec.uuid = "fpov_sensor"

agent_config = habitat_sim.agent.AgentConfiguration(
Expand Down
16 changes: 8 additions & 8 deletions examples/tutorials/nb_python/ECCV_2020_Advanced_Features.py
Original file line number Diff line number Diff line change
Expand Up @@ -474,7 +474,7 @@ def make_cfg(settings):
settings["sensor_height"] + 0.2,
0.2,
]
color_sensor_3rd_person_spec.orientation = [-math.pi / 4, 0, 0]
color_sensor_3rd_person_spec.orientation = [-math.pi / 4, 0.0, 0.0]
color_sensor_3rd_person_spec.sensor_subtype = habitat_sim.SensorSubType.PINHOLE
sensor_specs.append(color_sensor_3rd_person_spec)

Expand Down Expand Up @@ -574,8 +574,8 @@ def init_camera_track_config(sim, sensor_name="color_sensor_1st_person", agent_I
init_state["position"] = np.array(visual_sensor._spec.position)
init_state["orientation"] = np.array(visual_sensor._spec.orientation)
# set the color sensor transform to be the agent transform
visual_sensor._spec.position = np.array([0, 0, 0])
visual_sensor._spec.orientation = np.array([0, 0, 0])
visual_sensor._spec.position = mn.Vector3(0.0, 0.0, 0.0)
visual_sensor._spec.orientation = mn.Vector3(0.0, 0.0, 0.0)
visual_sensor._sensor_object.set_transformation_from_spec()
# save ID of agent being modified
init_state["agent_ID"] = agent_ID
Expand Down Expand Up @@ -848,7 +848,7 @@ def build_widget_ui(obj_attr_mgr, prim_attr_mgr):
sim_settings["scene"] = os.path.join(
data_path, "scene_datasets/mp3d_example/17DRP5sb8fy/17DRP5sb8fy.glb"
)
sim_settings["sensor_pitch"] = 0
sim_settings["sensor_pitch"] = 0.0

make_simulator_from_settings(sim_settings)

Expand All @@ -870,8 +870,8 @@ def build_widget_ui(obj_attr_mgr, prim_attr_mgr):
initial_sensor_position = np.array(visual_sensor._spec.position)
initial_sensor_orientation = np.array(visual_sensor._spec.orientation)
# set the color sensor transform to be the agent transform
visual_sensor._spec.position = np.array([0, 0, 0])
visual_sensor._spec.orientation = np.array([0, 0, 0])
visual_sensor._spec.position = mn.Vector3(0.0, 0.0, 0.0)
visual_sensor._spec.orientation = mn.Vector3(0.0, 0.0, 0.0)
visual_sensor._sensor_object.set_transformation_from_spec()

# boost the agent off the floor
Expand Down Expand Up @@ -1006,7 +1006,7 @@ def get_2d_point(sim, sensor_name, point_3d):
sim_settings["scene"] = os.path.join(
data_path, "scene_datasets/mp3d_example/17DRP5sb8fy/17DRP5sb8fy.glb"
)
sim_settings["sensor_pitch"] = 0
sim_settings["sensor_pitch"] = 0.0
sim_settings["semantic_sensor_1st_person"] = True

make_simulator_from_settings(sim_settings)
Expand Down Expand Up @@ -1097,7 +1097,7 @@ def get_2d_point(sim, sensor_name, point_3d):
sim_settings["scene"] = os.path.join(
data_path, "scene_datasets/habitat-test-scenes/apartment_1.glb"
)
sim_settings["sensor_pitch"] = 0
sim_settings["sensor_pitch"] = 0.0

make_simulator_from_settings(sim_settings)

Expand Down
6 changes: 3 additions & 3 deletions examples/tutorials/nb_python/ECCV_2020_Interactivity.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ def make_cfg(settings):
settings["sensor_height"] + 0.2,
0.2,
]
color_sensor_3rd_person_spec.orientation = [-math.pi / 4, 0, 0]
color_sensor_3rd_person_spec.orientation = [-math.pi / 4, 0.0, 0.0]
color_sensor_3rd_person_spec.sensor_subtype = habitat_sim.SensorSubType.PINHOLE
sensor_specs.append(color_sensor_3rd_person_spec)

Expand Down Expand Up @@ -887,7 +887,7 @@ def build_widget_ui(obj_attr_mgr, prim_attr_mgr):
sim_settings["scene"] = os.path.join(
data_path, "scene_datasets/habitat-test-scenes/apartment_1.glb"
)
sim_settings["sensor_pitch"] = 0
sim_settings["sensor_pitch"] = 0.0

make_simulator_from_settings(sim_settings)

Expand Down Expand Up @@ -1180,7 +1180,7 @@ def release(self):
# fmt: off
sim_settings["scene"] = os.path.join(data_path, "scene_datasets/mp3d_example/17DRP5sb8fy/17DRP5sb8fy.glb") # @param{type:"string"}
# fmt: on
sim_settings["sensor_pitch"] = 0
sim_settings["sensor_pitch"] = 0.0
sim_settings["sensor_height"] = 0.6
sim_settings["color_sensor_3rd_person"] = True
sim_settings["depth_sensor_1st_person"] = True
Expand Down
2 changes: 1 addition & 1 deletion examples/tutorials/nb_python/ReplicaCAD_quickstart.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ def make_default_settings():
"scene": "NONE", # Scene path
"default_agent": 0,
"sensor_height": 1.5, # Height of sensors in meters
"sensor_pitch": 0, # sensor pitch (x rotation in rads)
"sensor_pitch": 0.0, # sensor pitch (x rotation in rads)
"seed": 1,
"enable_physics": True, # enable dynamics simulation
}
Expand Down
13 changes: 7 additions & 6 deletions examples/tutorials/nb_python/asset_viewer.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
import os

import git
import magnum as mn
import numpy as np

import habitat_sim
Expand Down Expand Up @@ -176,7 +177,7 @@ def make_cfg(settings):
settings["sensor_height"] + 0.2,
0.2,
]
color_sensor_3rd_person_spec.orientation = [-math.pi / 4, 0, 0]
color_sensor_3rd_person_spec.orientation = [-math.pi / 4, 0.0, 0.0]
color_sensor_3rd_person_spec.sensor_subtype = habitat_sim.SensorSubType.PINHOLE
sensor_specs.append(color_sensor_3rd_person_spec)

Expand Down Expand Up @@ -720,7 +721,7 @@ def build_widget_ui(obj_attr_mgr, prim_attr_mgr):

sim_settings = make_default_settings()
sim_settings["scene"] = "none"
sim_settings["sensor_pitch"] = 0
sim_settings["sensor_pitch"] = 0.0
sim_settings["override_scene_light_defaults"] = True
sim_settings["scene_light_setup"] = ""

Expand Down Expand Up @@ -785,20 +786,20 @@ def build_widget_ui(obj_attr_mgr, prim_attr_mgr):
# get max dim to use as scale for sensor placement
bb_scale = max(obj_bbox.max)
# determine sensor placement based on size of object
sensor_pos = bb_scale * np.array([0, 1, 2])
sensor_pos = bb_scale * mn.Vector3(0.0, 1.0, 2.0)
# set object to be static
obj.motion_type = habitat_sim.physics.MotionType.STATIC

# initialize an agent and set its initial state
agent = sim.initialize_agent(sim_settings["default_agent"])
agent_state = habitat_sim.AgentState()
agent_state.position = np.array([0.0, 0.0, 0.0]) # in world space
agent_state.position = mn.Vector3(0.0, 0.0, 0.0) # in world space
agent.set_state(agent_state)

# set the sensor to be behind and above the agent's initial loc
# distance is scaled by size of largest object dimension
visual_sensor._spec.position = agent_state.position + sensor_pos
visual_sensor._spec.orientation = np.array([-0.5, 0, 0])
visual_sensor._spec.orientation = mn.Vector3(-0.5, 0.0, 0.0)
visual_sensor._sensor_object.set_transformation_from_spec()

# Create observations array
Expand All @@ -817,7 +818,7 @@ def build_widget_ui(obj_attr_mgr, prim_attr_mgr):
sim.step_physics(time_step)
# rotate the agent to rotate the camera
agent_state.rotation *= ut.quat_from_angle_axis(
rot_amount, np.array([0, 1.0, 0])
rot_amount, np.array([0.0, 1.0, 0.0])
)
agent.set_state(agent_state)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ def make_configuration():
rgba_camera_3rdperson_spec.sensor_type = habitat_sim.SensorType.COLOR
rgba_camera_3rdperson_spec.resolution = camera_resolution
rgba_camera_3rdperson_spec.position = [0.0, 1.0, 0.3]
rgba_camera_3rdperson_spec.orientation = [-45, 0.0, 0.0]
rgba_camera_3rdperson_spec.orientation = [-45.0, 0.0, 0.0]
rgba_camera_3rdperson_spec.sensor_subtype = habitat_sim.SensorSubType.PINHOLE
sensor_specs.append(rgba_camera_3rdperson_spec)

Expand Down
16 changes: 8 additions & 8 deletions examples/tutorials/notebooks/ECCV_2020_Advanced_Features.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -473,7 +473,7 @@
" settings[\"sensor_height\"] + 0.2,\n",
" 0.2,\n",
" ]\n",
" color_sensor_3rd_person_spec.orientation = [-math.pi / 4, 0, 0]\n",
" color_sensor_3rd_person_spec.orientation = [-math.pi / 4, 0.0, 0.0]\n",
" color_sensor_3rd_person_spec.sensor_subtype = habitat_sim.SensorSubType.PINHOLE\n",
" sensor_specs.append(color_sensor_3rd_person_spec)\n",
"\n",
Expand Down Expand Up @@ -580,8 +580,8 @@
" init_state[\"position\"] = np.array(visual_sensor._spec.position)\n",
" init_state[\"orientation\"] = np.array(visual_sensor._spec.orientation)\n",
" # set the color sensor transform to be the agent transform\n",
" visual_sensor._spec.position = np.array([0, 0, 0])\n",
" visual_sensor._spec.orientation = np.array([0, 0, 0])\n",
" visual_sensor._spec.position = mn.Vector3(0.0, 0.0, 0.0)\n",
" visual_sensor._spec.orientation = mn.Vector3(0.0, 0.0, 0.0)\n",
" visual_sensor._sensor_object.set_transformation_from_spec()\n",
" # save ID of agent being modified\n",
" init_state[\"agent_ID\"] = agent_ID\n",
Expand Down Expand Up @@ -875,7 +875,7 @@
"sim_settings[\"scene\"] = os.path.join(\n",
" data_path, \"scene_datasets/mp3d_example/17DRP5sb8fy/17DRP5sb8fy.glb\"\n",
")\n",
"sim_settings[\"sensor_pitch\"] = 0\n",
"sim_settings[\"sensor_pitch\"] = 0.0\n",
"\n",
"make_simulator_from_settings(sim_settings)"
]
Expand Down Expand Up @@ -915,8 +915,8 @@
"initial_sensor_position = np.array(visual_sensor._spec.position)\n",
"initial_sensor_orientation = np.array(visual_sensor._spec.orientation)\n",
"# set the color sensor transform to be the agent transform\n",
"visual_sensor._spec.position = np.array([0, 0, 0])\n",
"visual_sensor._spec.orientation = np.array([0, 0, 0])\n",
"visual_sensor._spec.position = mn.Vector3(0.0, 0.0, 0.0)\n",
"visual_sensor._spec.orientation = mn.Vector3(0.0, 0.0, 0.0)\n",
"visual_sensor._sensor_object.set_transformation_from_spec()\n",
"\n",
"# boost the agent off the floor\n",
Expand Down Expand Up @@ -1070,7 +1070,7 @@
"sim_settings[\"scene\"] = os.path.join(\n",
" data_path, \"scene_datasets/mp3d_example/17DRP5sb8fy/17DRP5sb8fy.glb\"\n",
")\n",
"sim_settings[\"sensor_pitch\"] = 0\n",
"sim_settings[\"sensor_pitch\"] = 0.0\n",
"sim_settings[\"semantic_sensor_1st_person\"] = True\n",
"\n",
"make_simulator_from_settings(sim_settings)\n",
Expand Down Expand Up @@ -1175,7 +1175,7 @@
"sim_settings[\"scene\"] = os.path.join(\n",
" data_path, \"scene_datasets/habitat-test-scenes/apartment_1.glb\"\n",
")\n",
"sim_settings[\"sensor_pitch\"] = 0\n",
"sim_settings[\"sensor_pitch\"] = 0.0\n",
"\n",
"make_simulator_from_settings(sim_settings)\n",
"\n",
Expand Down
6 changes: 3 additions & 3 deletions examples/tutorials/notebooks/ECCV_2020_Interactivity.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@
" settings[\"sensor_height\"] + 0.2,\n",
" 0.2,\n",
" ]\n",
" color_sensor_3rd_person_spec.orientation = [-math.pi / 4, 0, 0]\n",
" color_sensor_3rd_person_spec.orientation = [-math.pi / 4, 0.0, 0.0]\n",
" color_sensor_3rd_person_spec.sensor_subtype = habitat_sim.SensorSubType.PINHOLE\n",
" sensor_specs.append(color_sensor_3rd_person_spec)\n",
"\n",
Expand Down Expand Up @@ -973,7 +973,7 @@
"sim_settings[\"scene\"] = os.path.join(\n",
" data_path, \"scene_datasets/habitat-test-scenes/apartment_1.glb\"\n",
")\n",
"sim_settings[\"sensor_pitch\"] = 0\n",
"sim_settings[\"sensor_pitch\"] = 0.0\n",
"\n",
"make_simulator_from_settings(sim_settings)"
]
Expand Down Expand Up @@ -1304,7 +1304,7 @@
"# fmt: off\n",
"sim_settings[\"scene\"] = os.path.join(data_path, \"scene_datasets/mp3d_example/17DRP5sb8fy/17DRP5sb8fy.glb\") # @param{type:\"string\"}\n",
"# fmt: on\n",
"sim_settings[\"sensor_pitch\"] = 0\n",
"sim_settings[\"sensor_pitch\"] = 0.0\n",
"sim_settings[\"sensor_height\"] = 0.6\n",
"sim_settings[\"color_sensor_3rd_person\"] = True\n",
"sim_settings[\"depth_sensor_1st_person\"] = True\n",
Expand Down
2 changes: 1 addition & 1 deletion examples/tutorials/notebooks/ReplicaCAD_quickstart.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@
" \"scene\": \"NONE\", # Scene path\n",
" \"default_agent\": 0,\n",
" \"sensor_height\": 1.5, # Height of sensors in meters\n",
" \"sensor_pitch\": 0, # sensor pitch (x rotation in rads)\n",
" \"sensor_pitch\": 0.0, # sensor pitch (x rotation in rads)\n",
" \"seed\": 1,\n",
" \"enable_physics\": True, # enable dynamics simulation\n",
" }\n",
Expand Down
13 changes: 7 additions & 6 deletions examples/tutorials/notebooks/asset_viewer.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
"import os\n",
"\n",
"import git\n",
"import magnum as mn\n",
"import numpy as np\n",
"\n",
"import habitat_sim\n",
Expand Down Expand Up @@ -167,7 +168,7 @@
" settings[\"sensor_height\"] + 0.2,\n",
" 0.2,\n",
" ]\n",
" color_sensor_3rd_person_spec.orientation = [-math.pi / 4, 0, 0]\n",
" color_sensor_3rd_person_spec.orientation = [-math.pi / 4, 0.0, 0.0]\n",
" color_sensor_3rd_person_spec.sensor_subtype = habitat_sim.SensorSubType.PINHOLE\n",
" sensor_specs.append(color_sensor_3rd_person_spec)\n",
"\n",
Expand Down Expand Up @@ -733,7 +734,7 @@
"\n",
"sim_settings = make_default_settings()\n",
"sim_settings[\"scene\"] = \"none\"\n",
"sim_settings[\"sensor_pitch\"] = 0\n",
"sim_settings[\"sensor_pitch\"] = 0.0\n",
"sim_settings[\"override_scene_light_defaults\"] = True\n",
"sim_settings[\"scene_light_setup\"] = \"\"\n",
"\n",
Expand Down Expand Up @@ -808,20 +809,20 @@
" # get max dim to use as scale for sensor placement\n",
" bb_scale = max(obj_bbox.max)\n",
" # determine sensor placement based on size of object\n",
" sensor_pos = bb_scale * np.array([0, 1, 2])\n",
" sensor_pos = bb_scale * mn.Vector3(0.0, 1.0, 2.0)\n",
" # set object to be static\n",
" obj.motion_type = habitat_sim.physics.MotionType.STATIC\n",
"\n",
" # initialize an agent and set its initial state\n",
" agent = sim.initialize_agent(sim_settings[\"default_agent\"])\n",
" agent_state = habitat_sim.AgentState()\n",
" agent_state.position = np.array([0.0, 0.0, 0.0]) # in world space\n",
" agent_state.position = mn.Vector3(0.0, 0.0, 0.0) # in world space\n",
" agent.set_state(agent_state)\n",
"\n",
" # set the sensor to be behind and above the agent's initial loc\n",
" # distance is scaled by size of largest object dimension\n",
" visual_sensor._spec.position = agent_state.position + sensor_pos\n",
" visual_sensor._spec.orientation = np.array([-0.5, 0, 0])\n",
" visual_sensor._spec.orientation = mn.Vector3(-0.5, 0.0, 0.0)\n",
" visual_sensor._sensor_object.set_transformation_from_spec()\n",
"\n",
" # Create observations array\n",
Expand All @@ -840,7 +841,7 @@
" sim.step_physics(time_step)\n",
" # rotate the agent to rotate the camera\n",
" agent_state.rotation *= ut.quat_from_angle_axis(\n",
" rot_amount, np.array([0, 1.0, 0])\n",
" rot_amount, np.array([0.0, 1.0, 0.0])\n",
" )\n",
" agent.set_state(agent_state)\n",
"\n",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@
" rgba_camera_3rdperson_spec.sensor_type = habitat_sim.SensorType.COLOR\n",
" rgba_camera_3rdperson_spec.resolution = camera_resolution\n",
" rgba_camera_3rdperson_spec.position = [0.0, 1.0, 0.3]\n",
" rgba_camera_3rdperson_spec.orientation = [-45, 0.0, 0.0]\n",
" rgba_camera_3rdperson_spec.orientation = [-45.0, 0.0, 0.0]\n",
" rgba_camera_3rdperson_spec.sensor_subtype = habitat_sim.SensorSubType.PINHOLE\n",
" sensor_specs.append(rgba_camera_3rdperson_spec)\n",
"\n",
Expand Down
2 changes: 0 additions & 2 deletions src/esp/bindings/GfxBindings.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,6 @@ void initGfxBindings(py::module& m) {
pybindEnumOperators(flags);

render_camera
.def(py::init_alias<std::reference_wrapper<scene::SceneNode>,
const vec3f&, const vec3f&, const vec3f&>())
.def(
"set_projection_matrix",
[](RenderCamera& self, int w, int h, float n, float f, Mn::Degd fov) {
Expand Down
8 changes: 8 additions & 0 deletions src/esp/bindings/SceneBindings.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,14 @@ void initSceneBindings(py::module& m) {
.def_property("type", &SceneNode::getType, &SceneNode::setType)
.def_property("semantic_id", &SceneNode::getSemanticId,
&SceneNode::setSemanticId)
.def_property(
"object_semantic_id", &SceneNode::getBaseObjectId,
&SceneNode::setBaseObjectId,
R"(This node's owning object's ID, for instance-based semantics)")
.def_property(
"drawable_semantic_id", &SceneNode::getDrawableId,
&SceneNode::setDrawableId,
R"(This node's drawable's ID, for instance-based semantics)")
.def(
"create_child", [](SceneNode& self) { return &self.createChild(); },
R"(Creates a child node, and sets its parent to the current node.)")
Expand Down
13 changes: 11 additions & 2 deletions src/esp/bindings/SensorBindings.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,12 @@ void initSensorBindings(py::module& m) {
.value("FISHEYE", SensorSubType::Fisheye)
.value("EQUIRECTANGULAR", SensorSubType::Equirectangular)
.value("IMPULSERESPONSE", SensorSubType::ImpulseResponse);
;

// NOTE : esp::sensor::SemanticSensorTarget is an alias for
// esp::scene::SceneNodeSemanticDataIDX.
py::enum_<SemanticSensorTarget>(m, "SemanticSensorTarget")
.value("SEMANTIC_ID", SemanticSensorTarget::SEMANTIC_ID)
.value("OBJECT_ID", SemanticSensorTarget::OBJECT_ID);

py::enum_<FisheyeSensorModelType>(m, "FisheyeSensorModelType")
.value("DOUBLE_SPHERE", FisheyeSensorModelType::DoubleSphere);
Expand Down Expand Up @@ -105,7 +110,11 @@ void initSensorBindings(py::module& m) {
.def_readwrite("resolution", &VisualSensorSpec::resolution)
.def_readwrite("gpu2gpu_transfer", &VisualSensorSpec::gpu2gpuTransfer)
.def_readwrite("channels", &VisualSensorSpec::channels)
.def_readwrite("clear_color", &CameraSensorSpec::clearColor);
.def_readwrite(
"semantic_target", &VisualSensorSpec::semanticTarget,
R"(The type of information rendered by the semantic sensor. If this sensor is not semantic,
this is ignored. Acceptable values : [SEMANTIC_ID(default), OBJECT_ID])")
.def_readwrite("clear_color", &VisualSensorSpec::clearColor);

// ====CameraSensorSpec ====
py::class_<CameraSensorSpec, CameraSensorSpec::ptr, VisualSensorSpec>(
Expand Down
Loading

0 comments on commit 415f5ae

Please sign in to comment.