facebookresearch · ASzot · Jul 19, 2023 · Apr 17, 2023 · Apr 17, 2023 · Apr 29, 2023
diff --git a/habitat-baselines/habitat_baselines/common/construct_vector_env.py b/habitat-baselines/habitat_baselines/common/construct_vector_env.py
diff --git a/habitat-baselines/habitat_baselines/common/env_factory.py b/habitat-baselines/habitat_baselines/common/env_factory.py
@@ -0,0 +1,34 @@
+from abc import ABC, abstractmethod
+from typing import TYPE_CHECKING
+
+from habitat import VectorEnv
+
+if TYPE_CHECKING:
+    from omegaconf import DictConfig
+
+
+class VectorEnvFactory(ABC):
+    """
+    Interface responsible for constructing vectorized environments used in training.
+    """
+
+    @abstractmethod
+    def construct_envs(
+        self,
+        config: "DictConfig",
+        workers_ignore_signals: bool = False,
+        enforce_scenes_greater_eq_environments: bool = False,
+        is_first_rank: bool = True,
+    ) -> VectorEnv:
+        """
+        Setup a vectorized environment.
+
+        :param config: configs that contain num_environments as well as information
+        :param workers_ignore_signals: Passed to :ref:`habitat.VectorEnv`'s constructor
+        :param enforce_scenes_greater_eq_environments: Make sure that there are more (or equal)
+        :param enforce_scenes_greater_eq_environments: Make sure that there are more (or equal)
+            scenes than environments. This is needed for correct evaluation.
+        :param is_first_rank: If these environments are being constructed on the rank0 GPU.
+
+        :return: VectorEnv object created according to specification.
+        """
diff --git a/habitat-baselines/habitat_baselines/common/habitat_env_factory.py b/habitat-baselines/habitat_baselines/common/habitat_env_factory.py
@@ -0,0 +1,120 @@
+# Copyright (c) Meta Platforms, Inc. and its affiliates.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import os
+import random
+from typing import TYPE_CHECKING, Any, List, Type
+
+from habitat import ThreadedVectorEnv, VectorEnv, logger, make_dataset
+from habitat.config import read_write
+from habitat.gym import make_gym_from_config
+from habitat_baselines.common.env_factory import VectorEnvFactory
+
+if TYPE_CHECKING:
+    from omegaconf import DictConfig
+
+
+class HabitatVectorEnvFactory(VectorEnvFactory):
+    def construct_envs(
+        self,
+        config: "DictConfig",
+        workers_ignore_signals: bool = False,
+        enforce_scenes_greater_eq_environments: bool = False,
+        is_first_rank: bool = True,
+    ) -> VectorEnv:
+        r"""Create VectorEnv object with specified config and env class type.
+        To allow better performance, dataset are split into small ones for
+        each individual env, grouped by scenes.
+        """
+
+        num_environments = config.habitat_baselines.num_environments
+        configs = []
+        dataset = make_dataset(config.habitat.dataset.type)
+        scenes = config.habitat.dataset.content_scenes
+        if "*" in config.habitat.dataset.content_scenes:
+            scenes = dataset.get_scenes_to_load(config.habitat.dataset)
+
+        if num_environments < 1:
+            raise RuntimeError("num_environments must be strictly positive")
+
+        if len(scenes) == 0:
+            raise RuntimeError(
+                "No scenes to load, multiple process logic relies on being able to split scenes uniquely between processes"
+            )
+
+        random.shuffle(scenes)
+
+        scene_splits: List[List[str]] = [[] for _ in range(num_environments)]
+        if len(scenes) < num_environments:
+            msg = f"There are less scenes ({len(scenes)}) than environments ({num_environments}). "
+            if enforce_scenes_greater_eq_environments:
+                logger.warn(
+                    msg
+                    + "Reducing the number of environments to be the number of scenes."
+                )
+                num_environments = len(scenes)
+                scene_splits = [[s] for s in scenes]
+            else:
+                logger.warn(
+                    msg
+                    + "Each environment will use all the scenes instead of using a subset."
+                )
+            for scene in scenes:
+                for split in scene_splits:
+                    split.append(scene)
+        else:
+            for idx, scene in enumerate(scenes):
+                scene_splits[idx % len(scene_splits)].append(scene)
+            assert sum(map(len, scene_splits)) == len(scenes)
+
+        for env_index in range(num_environments):
+            proc_config = config.copy()
+            with read_write(proc_config):
+                task_config = proc_config.habitat
+                task_config.seed = task_config.seed + env_index
+                remove_measure_names = []
+                if not is_first_rank:
+                    # Filter out non rank0_measure from the task config if we are not on rank0.
+                    remove_measure_names.extend(
+                        task_config.task.rank0_measure_names
+                    )
+                if (env_index != 0) or not is_first_rank:
+                    # Filter out non-rank0_env0 measures from the task config if we
+                    # are not on rank0 env0.
+                    remove_measure_names.extend(
+                        task_config.task.rank0_env0_measure_names
+                    )
+
+                task_config.task.measurements = {
+                    k: v
+                    for k, v in task_config.task.measurements.items()
+                    if k not in remove_measure_names
+                }
+
+                if len(scenes) > 0:
+                    task_config.dataset.content_scenes = scene_splits[
+                        env_index
+                    ]
+
+            configs.append(proc_config)
+
+        vector_env_cls: Type[Any]
+        if int(os.environ.get("HABITAT_ENV_DEBUG", 0)):
+            logger.warn(
+                "Using the debug Vector environment interface. Expect slower performance."
+            )
+            vector_env_cls = ThreadedVectorEnv
+        else:
+            vector_env_cls = VectorEnv
+
+        envs = vector_env_cls(
+            make_env_fn=make_gym_from_config,
+            env_fn_args=tuple((c,) for c in configs),
+            workers_ignore_signals=workers_ignore_signals,
+        )
+
+        if config.habitat.simulator.renderer.enable_batch_renderer:
+            envs.initialize_batch_renderer(config)
+
+        return envs
diff --git a/habitat-baselines/habitat_baselines/config/default_structured_configs.py b/habitat-baselines/habitat_baselines/config/default_structured_configs.py
@@ -264,6 +264,8 @@ class HrlDefinedSkillConfig(HabitatBaselinesBaseConfig):
 @dataclass
 class HierarchicalPolicyConfig(HabitatBaselinesBaseConfig):
     high_level_policy: Dict[str, Any] = MISSING
+    # Names of the skills to not load.
+    ignore_skills: List[str] = field(default_factory=list)
     defined_skills: Dict[str, HrlDefinedSkillConfig] = field(
         default_factory=dict
     )
@@ -383,6 +385,28 @@ class ProfilingConfig(HabitatBaselinesBaseConfig):
     num_steps_to_capture: int = -1
 
 
+@dataclass
+class VectorEnvFactoryConfig(HabitatBaselinesBaseConfig):
+    """
+    `_target_` points to the `VectorEnvFactory` to setup the vectorized
+    environment. Defaults to the Habitat vectorized environment setup.
+    """
+
+    _target_: str = (
+        "habitat_baselines.common.habitat_env_factory.HabitatEnvFactory"
+    )
+
+
+@dataclass
+class HydraCallbackConfig(HabitatBaselinesBaseConfig):
+    """
+    Generic callback option for Hydra. Used to create the `_target_` class or
+    call the `_target_` method.
+    """
+
+    _target_: Optional[str] = None
+
+
 @dataclass
 class HabitatBaselinesConfig(HabitatBaselinesBaseConfig):
     # task config can be a list of configs like "A.yaml,B.yaml"
@@ -413,6 +437,8 @@ class HabitatBaselinesConfig(HabitatBaselinesBaseConfig):
     log_file: str = "train.log"
     force_blind_policy: bool = False
     verbose: bool = True
+    # Creates the vectorized environment.
+    vector_env_factory: VectorEnvFactoryConfig = VectorEnvFactoryConfig()
     eval_keys_to_include_in_name: List[str] = field(default_factory=list)
     # For our use case, the CPU side things are mainly memory copies
     # and nothing of substantive compute. PyTorch has been making
@@ -430,6 +456,13 @@ class HabitatBaselinesConfig(HabitatBaselinesBaseConfig):
     load_resume_state_config: bool = True
     eval: EvalConfig = EvalConfig()
     profiling: ProfilingConfig = ProfilingConfig()
+    # Whether to log the infos that are only logged to a single process to the
+    # CLI along with the other metrics.
+    should_log_single_proc_infos: bool = False
+    # Called every time a checkpoint is saved.
+    # Function signature: fn(save_file_path: str) -> None
+    # If not specified, there is no callback.
+    on_save_ckpt_callback: Optional[HydraCallbackConfig] = None
 
 
 @dataclass

diff --git a/habitat-baselines/habitat_baselines/rl/hrl/hierarchical_policy.py b/habitat-baselines/habitat_baselines/rl/hrl/hierarchical_policy.py
@@ -72,9 +72,19 @@ def __init__(
         # Can map multiple skills to the same underlying skill controller.
         self._skill_redirects: Dict[int, int] = {}
 
+        if "rearrange_stop" not in action_space.spaces:
+            raise ValueError("Hierarchical policy requires the stop action")
+        self._stop_action_idx, _ = find_action_range(
+            action_space, "rearrange_stop"
+        )
+
         self._pddl = self._create_pddl(full_config, config)
         self._create_skills(
-            dict(config.hierarchical_policy.defined_skills),
+            {
+                k: v
+                for k, v in config.hierarchical_policy.defined_skills.items()
+                if k not in config.hierarchical_policy.ignore_skills
+            },
             observation_space,
             action_space,
             full_config,
@@ -98,9 +108,6 @@ def __init__(
             observation_space,
             action_space,
         )
-        self._stop_action_idx, _ = find_action_range(
-            action_space, "rearrange_stop"
-        )
         first_idx: Optional[int] = None
 
         # Remap all the Noop skills to the same underlying skill so all the

diff --git a/habitat-baselines/habitat_baselines/rl/hrl/skills/nn_skill.py b/habitat-baselines/habitat_baselines/rl/hrl/skills/nn_skill.py
@@ -175,7 +175,7 @@ def from_config(
                 )
             except FileNotFoundError as e:
                 raise FileNotFoundError(
-                    "Could not load neural network weights for skill."
+                    f"Could not load neural network weights for skill from ckpt {config.load_ckpt_file}"
                 ) from e
 
             policy_cfg = ckpt_dict["config"]
@@ -224,12 +224,7 @@ def from_config(
         )
         if len(ckpt_dict) > 0:
             try:
-                actor_critic.load_state_dict(
-                    {  # type: ignore
-                        k[len("actor_critic.") :]: v
-                        for k, v in ckpt_dict["state_dict"].items()
-                    }
-                )
+                actor_critic.load_state_dict(ckpt_dict["state_dict"])
 
             except Exception as e:
                 raise ValueError(

diff --git a/habitat-baselines/habitat_baselines/rl/hrl/skills/place.py b/habitat-baselines/habitat_baselines/rl/hrl/skills/place.py
@@ -27,7 +27,7 @@ def _mask_pick(self, action, observations):
         is_not_holding = 1 - observations[IsHoldingSensor.cls_uuid].view(-1)
         for i in torch.nonzero(is_not_holding):
             # Do not regrasp the object once it is released.
-            action[i, self._grip_ac_idx] = -1.0
+            action.actions[i, self._grip_ac_idx] = -1.0
         return action
 
     def _is_skill_done(