facebookresearch · akshararai · Feb 10, 2023 · Dec 24, 2022 · Dec 26, 2022 · Dec 27, 2022
diff --git a/habitat-baselines/habitat_baselines/README.md b/habitat-baselines/habitat_baselines/README.md
@@ -57,7 +57,7 @@ Change the `/benchmark/nav/pointnav: pointnav_gibson` in `habitat_baselines/conf
 
 We provide a two-layer hierarchical policy class, consisting of a low-level skill that moves the robot, and a high-level policy that reasons about which low-level skill to use in the current state. This can be especially powerful in long-horizon mobile manipulation tasks, like those introduced in [Habitat2.0](https://arxiv.org/abs/2106.14405). Both the low- and high- level can be either learned or an oracle. For oracle high-level we use [PDDL](https://planning.wiki/guide/whatis/pddl), and for oracle low-level we use instantaneous transitions, with the environment set to the final desired state. Additionally, for navigation, we provide an oracle navigation skill that uses A-star and the map of the environment to move the robot to its goal.
 
-To run the following examples, you need the [ReplicaCAD dataset](https://github.com/facebookresearch/habitat-sim/blob/main/DATASETS.md#replicacad). 
+To run the following examples, you need the [ReplicaCAD dataset](https://github.com/facebookresearch/habitat-sim/blob/main/DATASETS.md#replicacad).
 
 To train a high-level policy, while using pre-learned low-level skills (SRL baseline from [Habitat2.0](https://arxiv.org/abs/2106.14405)), you can run:
 

diff --git a/habitat-baselines/habitat_baselines/common/rollout_storage.py b/habitat-baselines/habitat_baselines/common/rollout_storage.py
@@ -5,7 +5,7 @@
 # LICENSE file in the root directory of this source tree.
 
 import warnings
-from typing import Any, Dict, Iterator, Optional, Tuple
+from typing import Any, Dict, Iterator, Optional
 
 import numpy as np
 import torch
@@ -16,6 +16,10 @@
     build_pack_info_from_dones,
     build_rnn_build_seq_info,
 )
+from habitat_baselines.utils.common import (
+    get_num_actions,
+    is_continuous_action_space,
+)
 
 
 @baseline_registry.register_storage
@@ -30,10 +34,22 @@ def __init__(
         action_space,
         recurrent_hidden_state_size,
         num_recurrent_layers=1,
-        action_shape: Optional[Tuple[int]] = None,
         is_double_buffered: bool = False,
-        discrete_actions: bool = True,
     ):
+
+        if is_continuous_action_space(action_space):
+            # Assume ALL actions are NOT discrete
+            action_shape = (
+                get_num_actions(
+                    action_space,
+                ),
+            )
+            discrete_actions = False
+        else:
+            # For discrete pointnav
+            action_shape = (1,)
+            discrete_actions = True
+
         self.buffers = TensorDict()
         self.buffers["observations"] = TensorDict()
 

diff --git a/...ines/config/habitat_baselines/rl/policy/hierarchical_policy/defined_skills/nn_skills.yaml b/...ines/config/habitat_baselines/rl/policy/hierarchical_policy/defined_skills/nn_skills.yaml
@@ -0,0 +1,43 @@
+open_cab:
+  skill_name: "ArtObjSkillPolicy"
+  load_ckpt_file: "data/models/open_cab.pth"
+
+open_fridge:
+  skill_name: "ArtObjSkillPolicy"
+  load_ckpt_file: "data/models/open_fridge.pth"
+
+close_cab:
+  skill_name: "ArtObjSkillPolicy"
+  load_ckpt_file: "data/models/close_cab.pth"
+
+close_fridge:
+  skill_name: "ArtObjSkillPolicy"
+  load_ckpt_file: "data/models/close_fridge.pth"
+
+pick:
+  skill_name: "PickSkillPolicy"
+  obs_skill_inputs: ["obj_start_sensor"]
+  load_ckpt_file: "data/models/pick.pth"
+
+place:
+  skill_name: "PlaceSkillPolicy"
+  obs_skill_inputs: ["obj_goal_sensor"]
+  load_ckpt_file: "data/models/place.pth"
+
+wait_skill:
+  skill_name: "WaitSkillPolicy"
+  max_skill_steps: -1
+  force_end_on_timeout: False
+
+nav_to_obj:
+  skill_name: "NavSkillPolicy"
+  obs_skill_inputs: ["goal_to_agent_gps_compass"]
+  load_ckpt_file: "data/models/nav.pth"
+  max_skill_steps: 300
+  obs_skill_input_dim: 2
+
+reset_arm_skill:
+  skill_name: "ResetArmSkill"
+  max_skill_steps: 50
+  reset_joint_state: [-4.50e-01, -1.08e00, 9.95e-02, 9.38e-01, -7.88e-04, 1.57e00, 4.62e-03]
+  force_end_on_timeout: False
diff --git a/...es/config/habitat_baselines/rl/policy/hierarchical_policy/defined_skills/noop_skills.yaml b/...es/config/habitat_baselines/rl/policy/hierarchical_policy/defined_skills/noop_skills.yaml
@@ -0,0 +1,52 @@
+open_cab:
+  skill_name: "NoopSkillPolicy"
+  max_skill_steps: 1
+  apply_postconds: True
+
+open_fridge:
+  skill_name: "NoopSkillPolicy"
+  max_skill_steps: 1
+  apply_postconds: True
+
+close_cab:
+  skill_name: "NoopSkillPolicy"
+  obs_skill_inputs: ["obj_start_sensor"]
+  max_skill_steps: 1
+
+close_fridge:
+  skill_name: "NoopSkillPolicy"
+  obs_skill_inputs: ["obj_start_sensor"]
+  max_skill_steps: 1
+  apply_postconds: True
+
+pick:
+  skill_name: "NoopSkillPolicy"
+  obs_skill_inputs: ["obj_start_sensor"]
+  max_skill_steps: 1
+  apply_postconds: True
+  force_end_on_timeout: False
+
+place:
+  skill_name: "NoopSkillPolicy"
+  obs_skill_inputs: ["obj_goal_sensor"]
+  max_skill_steps: 1
+  apply_postconds: True
+  force_end_on_timeout: False
+
+wait_skill:
+  skill_name: "WaitSkillPolicy"
+  max_skill_steps: -1
+
+nav_to_obj:
+  skill_name: "NoopSkillPolicy"
+  obs_skill_inputs: ["goal_to_agent_gps_compass"]
+  max_skill_steps: 1
+  apply_postconds: True
+  force_end_on_timeout: False
+  obs_skill_input_dim: 2
+
+reset_arm_skill:
+  skill_name: "ResetArmSkill"
+  max_skill_steps: 50
+  reset_joint_state: [-4.50e-01, -1.07e00, 9.95e-02, 9.38e-01, -7.88e-04, 1.57e00, 4.62e-03]
+  force_end_on_timeout: False
diff --git a/...baselines/habitat_baselines/config/habitat_baselines/rl/policy/hierarchical_srl_onav.yaml b/...baselines/habitat_baselines/config/habitat_baselines/rl/policy/hierarchical_srl_onav.yaml
diff --git a/...lines/habitat_baselines/config/habitat_baselines/rl/policy/hierarchical_tp_noop_onav.yaml b/...lines/habitat_baselines/config/habitat_baselines/rl/policy/hierarchical_tp_noop_onav.yaml
diff --git a/...t-baselines/habitat_baselines/config/habitat_baselines/rl/policy/hierarchical_tp_srl.yaml b/...t-baselines/habitat_baselines/config/habitat_baselines/rl/policy/hierarchical_tp_srl.yaml
diff --git a/habitat-baselines/habitat_baselines/config/habitat_baselines/rl/policy/hl_fixed.yaml b/habitat-baselines/habitat_baselines/config/habitat_baselines/rl/policy/hl_fixed.yaml
@@ -0,0 +1,21 @@
+name: "HierarchicalPolicy"
+obs_transforms:
+  add_virtual_keys:
+    virtual_keys:
+      "goal_to_agent_gps_compass": 2
+hierarchical_policy:
+  high_level_policy:
+    name: "FixedHighLevelPolicy"
+    add_arm_rest: True
+  use_skills:
+    open_cab: "open_cab"
+    open_fridge: "open_fridge"
+    close_cab: "close_cab"
+    close_fridge: "close_fridge"
+    pick: "pick"
+    place: "place"
+    nav: "nav_to_obj"
+    nav_to_receptacle: "nav_to_obj"
+    wait: "wait_skill"
+    reset_arm: "reset_arm_skill"
+  defined_skills: {}