diff --git a/habitat-baselines/habitat_baselines/common/__init__.py b/habitat-baselines/habitat_baselines/common/__init__.py
index 0f0db8cad5..3a2b019086 100644
--- a/habitat-baselines/habitat_baselines/common/__init__.py
+++ b/habitat-baselines/habitat_baselines/common/__init__.py
@@ -3,3 +3,7 @@
 # Copyright (c) Meta Platforms, Inc. and its affiliates.
 # This source code is licensed under the MIT license found in the
 # LICENSE file in the root directory of this source tree.
+from habitat_baselines.common.env_factory import VectorEnvFactory
+from habitat_baselines.common.habitat_env_factory import (
+    HabitatVectorEnvFactory,
+)
diff --git a/habitat-baselines/habitat_baselines/common/construct_vector_env.py b/habitat-baselines/habitat_baselines/common/construct_vector_env.py
deleted file mode 100644
index b4df6558d3..0000000000
--- a/habitat-baselines/habitat_baselines/common/construct_vector_env.py
+++ /dev/null
@@ -1,113 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and its affiliates.
-# This source code is licensed under the MIT license found in the
-# LICENSE file in the root directory of this source tree.
-
-import os
-import random
-from typing import TYPE_CHECKING, Any, List, Type
-
-from habitat import ThreadedVectorEnv, VectorEnv, logger, make_dataset
-from habitat.config import read_write
-from habitat.gym import make_gym_from_config
-
-if TYPE_CHECKING:
-    from omegaconf import DictConfig
-
-
-def construct_envs(
-    config: "DictConfig",
-    workers_ignore_signals: bool = False,
-    enforce_scenes_greater_eq_environments: bool = False,
-    is_first_rank: bool = True,
-) -> VectorEnv:
-    r"""Create VectorEnv object with specified config and env class type.
-    To allow better performance, dataset are split into small ones for
-    each individual env, grouped by scenes.
-
-    :param config: configs that contain num_environments as well as information
-    :param necessary to create individual environments.
-    :param workers_ignore_signals: Passed to :ref:`habitat.VectorEnv`'s constructor
-    :param enforce_scenes_greater_eq_environments: Make sure that there are more (or equal)
-        scenes than environments. This is needed for correct evaluation.
-    :param is_first_rank: If these environments are being constructed on the rank0 GPU.
-
-    :return: VectorEnv object created according to specification.
-    """
-
-    num_environments = config.habitat_baselines.num_environments
-    configs = []
-    dataset = make_dataset(config.habitat.dataset.type)
-    scenes = config.habitat.dataset.content_scenes
-    if "*" in config.habitat.dataset.content_scenes:
-        scenes = dataset.get_scenes_to_load(config.habitat.dataset)
-
-    if num_environments < 1:
-        raise RuntimeError("num_environments must be strictly positive")
-
-    if len(scenes) == 0:
-        raise RuntimeError(
-            "No scenes to load, multiple process logic relies on being able to split scenes uniquely between processes"
-        )
-
-    random.shuffle(scenes)
-
-    scene_splits: List[List[str]] = [[] for _ in range(num_environments)]
-    if len(scenes) < num_environments:
-        msg = f"There are less scenes ({len(scenes)}) than environments ({num_environments}). "
-        if enforce_scenes_greater_eq_environments:
-            logger.warn(
-                msg
-                + "Reducing the number of environments to be the number of scenes."
-            )
-            num_environments = len(scenes)
-            scene_splits = [[s] for s in scenes]
-        else:
-            logger.warn(
-                msg
-                + "Each environment will use all the scenes instead of using a subset."
-            )
-        for scene in scenes:
-            for split in scene_splits:
-                split.append(scene)
-    else:
-        for idx, scene in enumerate(scenes):
-            scene_splits[idx % len(scene_splits)].append(scene)
-        assert sum(map(len, scene_splits)) == len(scenes)
-
-    for env_index in range(num_environments):
-        proc_config = config.copy()
-        with read_write(proc_config):
-            task_config = proc_config.habitat
-            task_config.seed = task_config.seed + env_index
-            if (env_index != 0) or not is_first_rank:
-                # Filter out non-rank0_env0 measures from the task config if we
-                # are not on rank0 env0.
-                task_config.task.measurements = {
-                    k: v
-                    for k, v in task_config.task.measurements.items()
-                    if k not in task_config.task.rank0_env0_measure_names
-                }
-            if len(scenes) > 0:
-                task_config.dataset.content_scenes = scene_splits[env_index]
-
-        configs.append(proc_config)
-
-    vector_env_cls: Type[Any]
-    if int(os.environ.get("HABITAT_ENV_DEBUG", 0)):
-        logger.warn(
-            "Using the debug Vector environment interface. Expect slower performance."
-        )
-        vector_env_cls = ThreadedVectorEnv
-    else:
-        vector_env_cls = VectorEnv
-
-    envs = vector_env_cls(
-        make_env_fn=make_gym_from_config,
-        env_fn_args=tuple((c,) for c in configs),
-        workers_ignore_signals=workers_ignore_signals,
-    )
-
-    if config.habitat.simulator.renderer.enable_batch_renderer:
-        envs.initialize_batch_renderer(config)
-
-    return envs
diff --git a/habitat-baselines/habitat_baselines/common/env_factory.py b/habitat-baselines/habitat_baselines/common/env_factory.py
new file mode 100644
index 0000000000..98e8564a48
--- /dev/null
+++ b/habitat-baselines/habitat_baselines/common/env_factory.py
@@ -0,0 +1,34 @@
+from abc import ABC, abstractmethod
+from typing import TYPE_CHECKING
+
+from habitat import VectorEnv
+
+if TYPE_CHECKING:
+    from omegaconf import DictConfig
+
+
+class VectorEnvFactory(ABC):
+    """
+    Interface responsible for constructing vectorized environments used in training.
+    """
+
+    @abstractmethod
+    def construct_envs(
+        self,
+        config: "DictConfig",
+        workers_ignore_signals: bool = False,
+        enforce_scenes_greater_eq_environments: bool = False,
+        is_first_rank: bool = True,
+    ) -> VectorEnv:
+        """
+        Setup a vectorized environment.
+
+        :param config: configs that contain num_environments as well as information
+        :param workers_ignore_signals: Passed to :ref:`habitat.VectorEnv`'s constructor
+        :param enforce_scenes_greater_eq_environments: Make sure that there are more (or equal)
+        :param enforce_scenes_greater_eq_environments: Make sure that there are more (or equal)
+            scenes than environments. This is needed for correct evaluation.
+        :param is_first_rank: If these environments are being constructed on the rank0 GPU.
+
+        :return: VectorEnv object created according to specification.
+        """
diff --git a/habitat-baselines/habitat_baselines/common/habitat_env_factory.py b/habitat-baselines/habitat_baselines/common/habitat_env_factory.py
new file mode 100644
index 0000000000..bc3730252c
--- /dev/null
+++ b/habitat-baselines/habitat_baselines/common/habitat_env_factory.py
@@ -0,0 +1,120 @@
+# Copyright (c) Meta Platforms, Inc. and its affiliates.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import os
+import random
+from typing import TYPE_CHECKING, Any, List, Type
+
+from habitat import ThreadedVectorEnv, VectorEnv, logger, make_dataset
+from habitat.config import read_write
+from habitat.gym import make_gym_from_config
+from habitat_baselines.common.env_factory import VectorEnvFactory
+
+if TYPE_CHECKING:
+    from omegaconf import DictConfig
+
+
+class HabitatVectorEnvFactory(VectorEnvFactory):
+    def construct_envs(
+        self,
+        config: "DictConfig",
+        workers_ignore_signals: bool = False,
+        enforce_scenes_greater_eq_environments: bool = False,
+        is_first_rank: bool = True,
+    ) -> VectorEnv:
+        r"""Create VectorEnv object with specified config and env class type.
+        To allow better performance, dataset are split into small ones for
+        each individual env, grouped by scenes.
+        """
+
+        num_environments = config.habitat_baselines.num_environments
+        configs = []
+        dataset = make_dataset(config.habitat.dataset.type)
+        scenes = config.habitat.dataset.content_scenes
+        if "*" in config.habitat.dataset.content_scenes:
+            scenes = dataset.get_scenes_to_load(config.habitat.dataset)
+
+        if num_environments < 1:
+            raise RuntimeError("num_environments must be strictly positive")
+
+        if len(scenes) == 0:
+            raise RuntimeError(
+                "No scenes to load, multiple process logic relies on being able to split scenes uniquely between processes"
+            )
+
+        random.shuffle(scenes)
+
+        scene_splits: List[List[str]] = [[] for _ in range(num_environments)]
+        if len(scenes) < num_environments:
+            msg = f"There are less scenes ({len(scenes)}) than environments ({num_environments}). "
+            if enforce_scenes_greater_eq_environments:
+                logger.warn(
+                    msg
+                    + "Reducing the number of environments to be the number of scenes."
+                )
+                num_environments = len(scenes)
+                scene_splits = [[s] for s in scenes]
+            else:
+                logger.warn(
+                    msg
+                    + "Each environment will use all the scenes instead of using a subset."
+                )
+            for scene in scenes:
+                for split in scene_splits:
+                    split.append(scene)
+        else:
+            for idx, scene in enumerate(scenes):
+                scene_splits[idx % len(scene_splits)].append(scene)
+            assert sum(map(len, scene_splits)) == len(scenes)
+
+        for env_index in range(num_environments):
+            proc_config = config.copy()
+            with read_write(proc_config):
+                task_config = proc_config.habitat
+                task_config.seed = task_config.seed + env_index
+                remove_measure_names = []
+                if not is_first_rank:
+                    # Filter out non rank0_measure from the task config if we are not on rank0.
+                    remove_measure_names.extend(
+                        task_config.task.rank0_measure_names
+                    )
+                if (env_index != 0) or not is_first_rank:
+                    # Filter out non-rank0_env0 measures from the task config if we
+                    # are not on rank0 env0.
+                    remove_measure_names.extend(
+                        task_config.task.rank0_env0_measure_names
+                    )
+
+                task_config.task.measurements = {
+                    k: v
+                    for k, v in task_config.task.measurements.items()
+                    if k not in remove_measure_names
+                }
+
+                if len(scenes) > 0:
+                    task_config.dataset.content_scenes = scene_splits[
+                        env_index
+                    ]
+
+            configs.append(proc_config)
+
+        vector_env_cls: Type[Any]
+        if int(os.environ.get("HABITAT_ENV_DEBUG", 0)):
+            logger.warn(
+                "Using the debug Vector environment interface. Expect slower performance."
+            )
+            vector_env_cls = ThreadedVectorEnv
+        else:
+            vector_env_cls = VectorEnv
+
+        envs = vector_env_cls(
+            make_env_fn=make_gym_from_config,
+            env_fn_args=tuple((c,) for c in configs),
+            workers_ignore_signals=workers_ignore_signals,
+        )
+
+        if config.habitat.simulator.renderer.enable_batch_renderer:
+            envs.initialize_batch_renderer(config)
+
+        return envs
diff --git a/habitat-baselines/habitat_baselines/common/rollout_storage.py b/habitat-baselines/habitat_baselines/common/rollout_storage.py
index 305677ef80..7acc64a2d5 100644
--- a/habitat-baselines/habitat_baselines/common/rollout_storage.py
+++ b/habitat-baselines/habitat_baselines/common/rollout_storage.py
@@ -31,8 +31,7 @@ def __init__(
         num_envs,
         observation_space,
         action_space,
-        recurrent_hidden_state_size,
-        num_recurrent_layers=1,
+        actor_critic,
         is_double_buffered: bool = False,
     ):
         action_shape, discrete_actions = get_action_space_info(action_space)
@@ -55,8 +54,8 @@ def __init__(
         self.buffers["recurrent_hidden_states"] = torch.zeros(
             numsteps + 1,
             num_envs,
-            num_recurrent_layers,
-            recurrent_hidden_state_size,
+            actor_critic.num_recurrent_layers,
+            actor_critic.recurrent_hidden_size,
         )
 
         self.buffers["rewards"] = torch.zeros(numsteps + 1, num_envs, 1)
@@ -110,6 +109,7 @@ def to(self, device):
         self.buffers.map_in_place(lambda v: v.to(device))
         self.device = device
 
+    @g_timer.avg_time("rollout_storage.insert", level=1)
     def insert(
         self,
         next_observations=None,
diff --git a/habitat-baselines/habitat_baselines/config/default_structured_configs.py b/habitat-baselines/habitat_baselines/config/default_structured_configs.py
index 93f7e9ddc8..0aadbc239a 100644
--- a/habitat-baselines/habitat_baselines/config/default_structured_configs.py
+++ b/habitat-baselines/habitat_baselines/config/default_structured_configs.py
@@ -264,6 +264,8 @@ class HrlDefinedSkillConfig(HabitatBaselinesBaseConfig):
 @dataclass
 class HierarchicalPolicyConfig(HabitatBaselinesBaseConfig):
     high_level_policy: Dict[str, Any] = MISSING
+    # Names of the skills to not load.
+    ignore_skills: List[str] = field(default_factory=list)
     defined_skills: Dict[str, HrlDefinedSkillConfig] = field(
         default_factory=dict
     )
@@ -383,6 +385,26 @@ class ProfilingConfig(HabitatBaselinesBaseConfig):
     num_steps_to_capture: int = -1
 
 
+@dataclass
+class VectorEnvFactoryConfig(HabitatBaselinesBaseConfig):
+    """
+    `_target_` points to the `VectorEnvFactory` to setup the vectorized
+    environment. Defaults to the Habitat vectorized environment setup.
+    """
+
+    _target_: str = "habitat_baselines.common.HabitatVectorEnvFactory"
+
+
+@dataclass
+class HydraCallbackConfig(HabitatBaselinesBaseConfig):
+    """
+    Generic callback option for Hydra. Used to create the `_target_` class or
+    call the `_target_` method.
+    """
+
+    _target_: Optional[str] = None
+
+
 @dataclass
 class HabitatBaselinesConfig(HabitatBaselinesBaseConfig):
     # task config can be a list of configs like "A.yaml,B.yaml"
@@ -413,6 +435,8 @@ class HabitatBaselinesConfig(HabitatBaselinesBaseConfig):
     log_file: str = "train.log"
     force_blind_policy: bool = False
     verbose: bool = True
+    # Creates the vectorized environment.
+    vector_env_factory: VectorEnvFactoryConfig = VectorEnvFactoryConfig()
     eval_keys_to_include_in_name: List[str] = field(default_factory=list)
     # For our use case, the CPU side things are mainly memory copies
     # and nothing of substantive compute. PyTorch has been making
@@ -430,6 +454,13 @@ class HabitatBaselinesConfig(HabitatBaselinesBaseConfig):
     load_resume_state_config: bool = True
     eval: EvalConfig = EvalConfig()
     profiling: ProfilingConfig = ProfilingConfig()
+    # Whether to log the infos that are only logged to a single process to the
+    # CLI along with the other metrics.
+    should_log_single_proc_infos: bool = False
+    # Called every time a checkpoint is saved.
+    # Function signature: fn(save_file_path: str) -> None
+    # If not specified, there is no callback.
+    on_save_ckpt_callback: Optional[HydraCallbackConfig] = None
 
 
 @dataclass
diff --git a/habitat-baselines/habitat_baselines/rl/ddppo/policy/resnet_policy.py b/habitat-baselines/habitat_baselines/rl/ddppo/policy/resnet_policy.py
index daa77d3961..8546c01203 100644
--- a/habitat-baselines/habitat_baselines/rl/ddppo/policy/resnet_policy.py
+++ b/habitat-baselines/habitat_baselines/rl/ddppo/policy/resnet_policy.py
@@ -601,6 +601,10 @@ def is_blind(self):
     def num_recurrent_layers(self):
         return self.state_encoder.num_recurrent_layers
 
+    @property
+    def recurrent_hidden_size(self):
+        return self._hidden_size
+
     @property
     def perception_embedding_size(self):
         return self._hidden_size
diff --git a/habitat-baselines/habitat_baselines/rl/hrl/hierarchical_policy.py b/habitat-baselines/habitat_baselines/rl/hrl/hierarchical_policy.py
index 0ff20e4156..3ff48c8463 100644
--- a/habitat-baselines/habitat_baselines/rl/hrl/hierarchical_policy.py
+++ b/habitat-baselines/habitat_baselines/rl/hrl/hierarchical_policy.py
@@ -20,22 +20,11 @@
     PddlProblem,
 )
 from habitat_baselines.common.baseline_registry import baseline_registry
-from habitat_baselines.rl.hrl.hl import (  # noqa: F401.
-    FixedHighLevelPolicy,
-    HighLevelPolicy,
-    NeuralHighLevelPolicy,
-)
-from habitat_baselines.rl.hrl.skills import (  # noqa: F401.
-    ArtObjSkillPolicy,
-    NavSkillPolicy,
-    NoopSkillPolicy,
-    OracleNavPolicy,
-    PickSkillPolicy,
-    PlaceSkillPolicy,
-    ResetArmSkill,
-    SkillPolicy,
-    WaitSkillPolicy,
-)
+from habitat_baselines.rl.hrl.hl import FixedHighLevelPolicy  # noqa: F401.
+from habitat_baselines.rl.hrl.hl import NeuralHighLevelPolicy  # noqa: F401.
+from habitat_baselines.rl.hrl.hl import HighLevelPolicy
+from habitat_baselines.rl.hrl.skills import *  # noqa: F403,F401.
+from habitat_baselines.rl.hrl.skills import NoopSkillPolicy, SkillPolicy
 from habitat_baselines.rl.hrl.utils import find_action_range
 from habitat_baselines.rl.ppo.policy import Policy, PolicyActionData
 from habitat_baselines.utils.common import get_num_actions
@@ -72,13 +61,29 @@ def __init__(
         # Can map multiple skills to the same underlying skill controller.
         self._skill_redirects: Dict[int, int] = {}
 
+        if "rearrange_stop" not in action_space.spaces:
+            raise ValueError("Hierarchical policy requires the stop action")
+        self._stop_action_idx, _ = find_action_range(
+            action_space, "rearrange_stop"
+        )
+
         self._pddl = self._create_pddl(full_config, config)
         self._create_skills(
-            dict(config.hierarchical_policy.defined_skills),
+            {
+                k: v
+                for k, v in config.hierarchical_policy.defined_skills.items()
+                if k not in config.hierarchical_policy.ignore_skills
+            },
             observation_space,
             action_space,
             full_config,
         )
+        self._max_skill_rnn_layers = max(
+            skill.num_recurrent_layers for skill in self._skills.values()
+        )
+        self._any_ll_hidden_state = any(
+            skill.has_hidden_state for skill in self._skills.values()
+        )
 
         self._cur_skills: np.ndarray = np.full(
             (self._num_envs,), -1, dtype=np.int32
@@ -98,9 +103,6 @@ def __init__(
             observation_space,
             action_space,
         )
-        self._stop_action_idx, _ = find_action_range(
-            action_space, "rearrange_stop"
-        )
         first_idx: Optional[int] = None
 
         # Remap all the Noop skills to the same underlying skill so all the
@@ -112,6 +114,13 @@ def __init__(
                 else:
                     self._skill_redirects[skill_i] = first_idx
 
+        self._hl_needs_recurrent_state = (
+            self._high_level_policy.num_recurrent_layers
+        ) > 0
+        self._recurrent_hidden_size = (
+            full_config.habitat_baselines.rl.ppo.hidden_size
+        )
+
     def _create_skills(
         self, skills, observation_space, action_space, full_config
     ):
@@ -156,6 +165,7 @@ def _create_pddl(self, full_config, config) -> PddlDomain:
             domain_file,
             task_spec_file,
             config,
+            read_config=False,
         )
 
     def eval(self):
@@ -168,10 +178,13 @@ def get_policy_action_space(
         Fetches the policy action space for learning. If we are learning the HL
         policy, it will return its custom action space for learning.
         """
-
-        return self._high_level_policy.get_policy_action_space(
-            env_action_space
-        )
+        if self._any_ll_hidden_state or not self._hl_needs_recurrent_state:
+            # The LL skill will take priority for the prev action.
+            return env_action_space
+        else:
+            return self._high_level_policy.get_policy_action_space(
+                env_action_space
+            )
 
     def extract_policy_info(
         self, action_data, infos, dones
@@ -195,13 +208,16 @@ def extract_policy_info(
 
         return ret_policy_infos
 
+    @property
+    def recurrent_hidden_size(self) -> int:
+        return self._recurrent_hidden_size
+
     @property
     def num_recurrent_layers(self):
-        if self._high_level_policy.num_recurrent_layers != 0:
-            return self._high_level_policy.num_recurrent_layers
-        else:
-            first_skill = self._skills[list(self._skills.keys())[0]]
-            return first_skill.num_recurrent_layers
+        return (
+            self._max_skill_rnn_layers
+            + self._high_level_policy.num_recurrent_layers
+        )
 
     @property
     def should_load_agent_state(self):
@@ -271,14 +287,17 @@ def act(
             (self._num_envs, get_num_actions(self._action_space)),
             device=masks.device,
         )
+        hl_rnn_hidden_states, ll_rnn_hidden_states = self._split_hidden_states(
+            rnn_hidden_states
+        )
 
         # Always call high-level if the episode is over.
         self._cur_call_high_level |= (~masks_cpu).view(-1)
 
-        skill_id = self._cur_skills[0]
         hl_terminate_episode, hl_info = self._update_skills(
             observations,
-            rnn_hidden_states,
+            hl_rnn_hidden_states,
+            ll_rnn_hidden_states,
             prev_actions,
             masks,
             actions,
@@ -287,12 +306,15 @@ def act(
             deterministic,
         )
         did_choose_new_skill = self._cur_call_high_level.clone()
+        if hl_info.rnn_hidden_states is not None:
+            # Update the HL hidden state.
+            hl_rnn_hidden_states = hl_info.rnn_hidden_states
 
         grouped_skills = self._broadcast_skill_ids(
             self._cur_skills,
             sel_dat={
                 "observations": observations,
-                "rnn_hidden_states": rnn_hidden_states,
+                "rnn_hidden_states": ll_rnn_hidden_states,
                 "prev_actions": prev_actions,
                 "masks": masks,
             },
@@ -305,9 +327,9 @@ def act(
                 masks=batch_dat["masks"],
                 cur_batch_idx=batch_ids,
             )
-
             actions[batch_ids] += action_data.actions
-            rnn_hidden_states[batch_ids] = action_data.rnn_hidden_states
+            # Update the LL hidden state.
+            ll_rnn_hidden_states[batch_ids] = action_data.rnn_hidden_states
 
         actions[:, self._stop_action_idx] = 0.0
 
@@ -317,7 +339,8 @@ def act(
             actions,
         ) = self._get_terminations(
             observations,
-            rnn_hidden_states,
+            hl_rnn_hidden_states,
+            ll_rnn_hidden_states,
             prev_actions,
             masks,
             actions,
@@ -330,30 +353,68 @@ def act(
             for batch_idx in torch.nonzero(should_terminate_episode):
                 actions[batch_idx, self._stop_action_idx] = 1.0
 
-        action_kwargs = {
-            "rnn_hidden_states": rnn_hidden_states,
-            "actions": actions,
-        }
-        action_kwargs.update(hl_info)
+        rnn_hidden_states = self._combine_hidden_states(
+            hl_rnn_hidden_states, ll_rnn_hidden_states
+        )
+
+        # This will update the prev action
+        if self._any_ll_hidden_state or not self._hl_needs_recurrent_state:
+            # The LL skill will take priority for the prev action
+            use_action = actions
+        else:
+            use_action = hl_info.actions
 
         return PolicyActionData(
             take_actions=actions,
             policy_info=log_info,
             should_inserts=did_choose_new_skill.view(-1, 1),
-            **action_kwargs,
+            actions=use_action,
+            values=hl_info.values,
+            action_log_probs=hl_info.action_log_probs,
+            rnn_hidden_states=rnn_hidden_states,
         )
 
+    def _split_hidden_states(
+        self, rnn_hidden_states: torch.Tensor
+    ) -> Tuple[torch.Tensor, torch.Tensor]:
+        hl_num_layers = self._high_level_policy.num_recurrent_layers
+
+        if hl_num_layers == 0 or self._max_skill_rnn_layers == 0:
+            # No need to split hidden states if both aren't being used.
+            return rnn_hidden_states, rnn_hidden_states
+        else:
+            # Split the hidden state for HL and LL policies
+            hl_rnn_hidden_states = rnn_hidden_states[:, :hl_num_layers]
+            ll_rnn_hidden_states = rnn_hidden_states[:, hl_num_layers:]
+            return hl_rnn_hidden_states, ll_rnn_hidden_states
+
+    def _combine_hidden_states(
+        self,
+        hl_rnn_hidden_states: torch.Tensor,
+        ll_rnn_hidden_states: torch.Tensor,
+    ) -> torch.Tensor:
+        if not self._hl_needs_recurrent_state:
+            return ll_rnn_hidden_states
+        elif self._max_skill_rnn_layers == 0:
+            return hl_rnn_hidden_states
+        else:
+            # Stack the LL and HL hidden states.
+            return torch.cat(
+                [hl_rnn_hidden_states, ll_rnn_hidden_states], dim=1
+            )
+
     def _update_skills(
         self,
         observations,
-        rnn_hidden_states,
+        hl_rnn_hidden_states,
+        ll_rnn_hidden_states,
         prev_actions,
         masks,
         actions,
         log_info,
         should_choose_new_skill: torch.BoolTensor,
         deterministic: bool,
-    ) -> Tuple[torch.BoolTensor, Dict[str, Any]]:
+    ) -> Tuple[torch.BoolTensor, PolicyActionData]:
         """
         Will potentially update the set of running skills according to the HL
         policy. This updates the active skill indices in `self._cur_skills` in
@@ -370,7 +431,6 @@ def _update_skills(
         # If any skills want to terminate invoke the high-level policy to get
         # the next skill.
         hl_terminate_episode = torch.zeros(self._num_envs, dtype=torch.bool)
-        hl_info: Dict[str, Any] = self._high_level_policy.create_hl_info()
         if should_choose_new_skill.sum() > 0:
             (
                 new_skills,
@@ -379,7 +439,7 @@ def _update_skills(
                 hl_info,
             ) = self._high_level_policy.get_next_skill(
                 observations,
-                rnn_hidden_states,
+                hl_rnn_hidden_states,
                 prev_actions,
                 masks,
                 should_choose_new_skill,
@@ -395,42 +455,51 @@ def _update_skills(
             )
 
             for skill_id, (batch_ids, _) in sel_grouped_skills.items():
-                self._skills[skill_id].on_enter(
+                (
+                    ll_rnn_hidden_states_batched,
+                    prev_actions_batched,
+                ) = self._skills[skill_id].on_enter(
                     [new_skill_args[i] for i in batch_ids],
                     batch_ids,
                     observations,
-                    rnn_hidden_states,
+                    ll_rnn_hidden_states,
                     prev_actions,
                 )
-                if "rnn_hidden_states" in hl_info:
-                    if self._skills[skill_id].has_hidden_state:
-                        raise ValueError(
-                            f"The code does not currently support neural LL and neural HL skills. Skill={self._skills[skill_id]}, HL={self._high_level_policy}"
-                        )
-                    if prev_actions.shape[0] == len(batch_ids):
-                        prev_actions = hl_info["actions"][batch_ids]
-                    else:
-                        prev_actions[batch_ids] = hl_info["actions"][batch_ids]
-
-                    rnn_hidden_states = (
-                        self._high_level_policy.update_hidden_states(
-                            batch_ids, rnn_hidden_states, hl_info
-                        )
+
+                _write_tensor_batched(
+                    ll_rnn_hidden_states,
+                    ll_rnn_hidden_states_batched,
+                    batch_ids,
+                )
+                _write_tensor_batched(
+                    prev_actions, prev_actions_batched, batch_ids
+                )
+
+                if hl_info.rnn_hidden_states is not None:
+                    # Only update the RNN hidden state for NEW skills.
+                    hl_rnn_hidden_states = _update_tensor_batched(
+                        hl_rnn_hidden_states,
+                        hl_info.rnn_hidden_states,
+                        batch_ids,
                     )
 
-            hl_info["actions"] = prev_actions
-            hl_info["rnn_hidden_states"] = rnn_hidden_states
+            # We made at least some decisions, so update the action info
+            hl_info.rnn_hidden_states = hl_rnn_hidden_states
 
             should_choose_new_skill = should_choose_new_skill.numpy()
             self._cur_skills = (
                 (~should_choose_new_skill) * self._cur_skills
             ) + (should_choose_new_skill * new_skills)
+        else:
+            # We made no decisions, so return an empty HL action info.
+            hl_info = PolicyActionData()
         return hl_terminate_episode, hl_info
 
     def _get_terminations(
         self,
         observations,
-        rnn_hidden_states,
+        hl_rnn_hidden_states,
+        ll_rnn_hidden_states,
         prev_actions,
         masks,
         actions,
@@ -450,7 +519,7 @@ def _get_terminations(
 
         hl_wants_skill_term = self._high_level_policy.get_termination(
             observations,
-            rnn_hidden_states,
+            hl_rnn_hidden_states,
             prev_actions,
             masks,
             self._cur_skills,
@@ -465,7 +534,7 @@ def _get_terminations(
             self._cur_skills,
             sel_dat={
                 "observations": observations,
-                "rnn_hidden_states": rnn_hidden_states,
+                "rnn_hidden_states": ll_rnn_hidden_states,
                 "prev_actions": prev_actions,
                 "masks": masks,
                 "actions": actions,
@@ -474,8 +543,8 @@ def _get_terminations(
         )
         for skill_id, (batch_ids, dat) in grouped_skills.items():
             (
-                self._cur_call_high_level[batch_ids],
-                bad_should_terminate[batch_ids],
+                call_hl_batch,
+                bad_should_terminate_batch,
                 new_actions,
             ) = self._skills[skill_id].should_terminate(
                 **dat,
@@ -485,6 +554,13 @@ def _get_terminations(
                     self._idx_to_name[self._cur_skills[i]] for i in batch_ids
                 ],
             )
+
+            _write_tensor_batched(
+                self._cur_call_high_level, call_hl_batch, batch_ids
+            )
+            _write_tensor_batched(
+                bad_should_terminate, bad_should_terminate_batch, batch_ids
+            )
             actions[batch_ids] += new_actions
         return self._cur_call_high_level, bad_should_terminate, actions
 
@@ -524,9 +600,44 @@ def from_config(
         **kwargs,
     ):
         return cls(
-            config.habitat_baselines.rl.policy,
-            config,
-            observation_space,
-            orig_action_space,
-            config.habitat_baselines.num_environments,
+            config=config.habitat_baselines.rl.policy,
+            full_config=config,
+            observation_space=observation_space,
+            action_space=orig_action_space,
+            num_envs=config.habitat_baselines.num_environments,
         )
+
+
+def _write_tensor_batched(
+    source_tensor: torch.Tensor,
+    write_tensor: torch.Tensor,
+    write_idxs: List[int],
+) -> torch.Tensor:
+    """
+    This assumes that write_tensor has already been indexed into by
+    `write_idxs` and only needs to be copied to `source_tensor`. Updates
+    `source_tensor` in place.
+    """
+
+    if source_tensor.shape[0] == len(write_idxs):
+        source_tensor = write_tensor
+    else:
+        source_tensor[write_idxs] = write_tensor
+    return source_tensor
+
+
+def _update_tensor_batched(
+    source_tensor: torch.Tensor,
+    write_tensor: torch.Tensor,
+    write_idxs: List[int],
+) -> torch.Tensor:
+    """
+    Writes the indices of `write_idxs` from `write_tensor` into
+    `source_tensor`. Updates `source_tensor` in place.
+    """
+
+    if source_tensor.shape[0] == len(write_idxs):
+        source_tensor = write_tensor[write_idxs]
+    else:
+        source_tensor[write_idxs] = write_tensor[write_idxs]
+    return source_tensor
diff --git a/habitat-baselines/habitat_baselines/rl/hrl/hl/fixed_policy.py b/habitat-baselines/habitat_baselines/rl/hrl/hl/fixed_policy.py
index d14b2f0a8d..a7c2be085e 100644
--- a/habitat-baselines/habitat_baselines/rl/hrl/hl/fixed_policy.py
+++ b/habitat-baselines/habitat_baselines/rl/hrl/hl/fixed_policy.py
@@ -9,6 +9,7 @@
 from habitat.tasks.rearrange.multi_task.rearrange_pddl import parse_func
 from habitat_baselines.common.logging import baselines_logger
 from habitat_baselines.rl.hrl.hl.high_level_policy import HighLevelPolicy
+from habitat_baselines.rl.ppo.policy import PolicyActionData
 
 
 class FixedHighLevelPolicy(HighLevelPolicy):
@@ -119,4 +120,4 @@ def get_next_skill(
 
                 self._next_sol_idxs[batch_idx] += 1
 
-        return next_skill, skill_args_data, immediate_end, {}
+        return next_skill, skill_args_data, immediate_end, PolicyActionData()
diff --git a/habitat-baselines/habitat_baselines/rl/hrl/hl/high_level_policy.py b/habitat-baselines/habitat_baselines/rl/hrl/hl/high_level_policy.py
index 0b66555d2b..ab76b19de7 100644
--- a/habitat-baselines/habitat_baselines/rl/hrl/hl/high_level_policy.py
+++ b/habitat-baselines/habitat_baselines/rl/hrl/hl/high_level_policy.py
@@ -5,6 +5,7 @@
 import torch.nn as nn
 
 from habitat.tasks.rearrange.multi_task.pddl_domain import PddlProblem
+from habitat_baselines.rl.ppo.policy import PolicyActionData
 
 
 class HighLevelPolicy(nn.Module):
@@ -58,26 +59,6 @@ def evaluate_actions(
     def num_recurrent_layers(self):
         return 0
 
-    def update_hidden_states(
-        self,
-        batch_ids: List[int],
-        old_rnn_hidden_states: torch.Tensor,
-        hl_info: Dict[str, Any],
-    ) -> torch.Tensor:
-        """
-        Update the hidden states from the previous hidden state and selected HL
-        action information. Called after every HL action prediction. Returns
-        the updated hidden states of same shape as `old_rnn_hidden_states`.
-        """
-
-        if old_rnn_hidden_states.shape[0] == len(batch_ids):
-            return hl_info["rnn_hidden_states"][batch_ids]
-        else:
-            old_rnn_hidden_states[batch_ids] = hl_info["rnn_hidden_states"][
-                batch_ids
-            ]
-            return old_rnn_hidden_states
-
     def parameters(self):
         return iter([nn.Parameter(torch.zeros((1,), device=self._device))])
 
@@ -95,7 +76,7 @@ def get_next_skill(
         plan_masks: torch.Tensor,
         deterministic: bool,
         log_info: List[Dict[str, Any]],
-    ) -> Tuple[torch.Tensor, List[Any], torch.BoolTensor, Dict[str, Any]]:
+    ) -> Tuple[torch.Tensor, List[Any], torch.BoolTensor, PolicyActionData]:
         """
         Get the next skill to be executed.
 
@@ -113,13 +94,10 @@ def get_next_skill(
             - skill_args_data: Arguments for the next skill.
             - immediate_end: Binary masks indicating which environment(s) should
                 end immediately.
-            - Information for PolicyActionData
+            - PolicyActionData information for learning.
         """
         raise NotImplementedError()
 
-    def create_hl_info(self) -> Dict[str, Any]:
-        return {}
-
     def apply_mask(self, mask: torch.Tensor) -> None:
         """
         Called before every step with the mask information at the current step.
diff --git a/habitat-baselines/habitat_baselines/rl/hrl/hl/neural_policy.py b/habitat-baselines/habitat_baselines/rl/hrl/hl/neural_policy.py
index 2460098a54..b04b12d77a 100644
--- a/habitat-baselines/habitat_baselines/rl/hrl/hl/neural_policy.py
+++ b/habitat-baselines/habitat_baselines/rl/hrl/hl/neural_policy.py
@@ -15,7 +15,7 @@
 from habitat_baselines.rl.models.rnn_state_encoder import (
     build_rnn_state_encoder,
 )
-from habitat_baselines.rl.ppo.policy import CriticHead
+from habitat_baselines.rl.ppo.policy import CriticHead, PolicyActionData
 from habitat_baselines.utils.common import CategoricalNet
 
 
@@ -85,9 +85,6 @@ def __init__(self, *args, **kwargs):
     def should_load_agent_state(self):
         return True
 
-    def create_hl_info(self):
-        return {"actions": None}
-
     def _setup_actions(self) -> List[PddlAction]:
         all_actions = self._pddl_prob.get_possible_actions()
         all_actions = [
@@ -215,10 +212,10 @@ def get_next_skill(
             next_skill,
             skill_args_data,
             immediate_end,
-            {
-                "action_log_probs": action_log_probs,
-                "values": values,
-                "actions": skill_sel,
-                "rnn_hidden_states": rnn_hidden_states,
-            },
+            PolicyActionData(
+                action_log_probs=action_log_probs,
+                values=values,
+                actions=skill_sel,
+                rnn_hidden_states=rnn_hidden_states,
+            ),
         )
diff --git a/habitat-baselines/habitat_baselines/rl/hrl/skills/nn_skill.py b/habitat-baselines/habitat_baselines/rl/hrl/skills/nn_skill.py
index 1dcbf07d20..79cc49cd56 100644
--- a/habitat-baselines/habitat_baselines/rl/hrl/skills/nn_skill.py
+++ b/habitat-baselines/habitat_baselines/rl/hrl/skills/nn_skill.py
@@ -104,7 +104,7 @@ def on_enter(
         rnn_hidden_states,
         prev_actions,
     ):
-        super().on_enter(
+        ret = super().on_enter(
             skill_arg,
             batch_idxs,
             observations,
@@ -112,6 +112,7 @@ def on_enter(
             prev_actions,
         )
         self._did_want_done *= 0.0
+        return ret
 
     def _get_filtered_obs(self, observations, cur_batch_idx) -> TensorDict:
         return TensorDict(
@@ -175,7 +176,7 @@ def from_config(
                 )
             except FileNotFoundError as e:
                 raise FileNotFoundError(
-                    "Could not load neural network weights for skill."
+                    f"Could not load neural network weights for skill from ckpt {config.load_ckpt_file}"
                 ) from e
 
             policy_cfg = ckpt_dict["config"]
@@ -224,12 +225,7 @@ def from_config(
         )
         if len(ckpt_dict) > 0:
             try:
-                actor_critic.load_state_dict(
-                    {  # type: ignore
-                        k[len("actor_critic.") :]: v
-                        for k, v in ckpt_dict["state_dict"].items()
-                    }
-                )
+                actor_critic.load_state_dict(ckpt_dict["state_dict"])
 
             except Exception as e:
                 raise ValueError(
diff --git a/habitat-baselines/habitat_baselines/rl/hrl/skills/place.py b/habitat-baselines/habitat_baselines/rl/hrl/skills/place.py
index a99ac1a8e1..5ff21b9099 100644
--- a/habitat-baselines/habitat_baselines/rl/hrl/skills/place.py
+++ b/habitat-baselines/habitat_baselines/rl/hrl/skills/place.py
@@ -27,7 +27,7 @@ def _mask_pick(self, action, observations):
         is_not_holding = 1 - observations[IsHoldingSensor.cls_uuid].view(-1)
         for i in torch.nonzero(is_not_holding):
             # Do not regrasp the object once it is released.
-            action[i, self._grip_ac_idx] = -1.0
+            action.actions[i, self._grip_ac_idx] = -1.0
         return action
 
     def _is_skill_done(
diff --git a/habitat-baselines/habitat_baselines/rl/hrl/skills/skill.py b/habitat-baselines/habitat_baselines/rl/hrl/skills/skill.py
index 30fdb21f98..f5d9828a87 100644
--- a/habitat-baselines/habitat_baselines/rl/hrl/skills/skill.py
+++ b/habitat-baselines/habitat_baselines/rl/hrl/skills/skill.py
@@ -68,6 +68,7 @@ def __init__(
                 break
         if not found_grip and not self.should_ignore_grip:
             raise ValueError(f"Could not find grip action in {action_space}")
+
         self._stop_action_idx, _ = find_action_range(
             action_space, "rearrange_stop"
         )
@@ -233,7 +234,7 @@ def on_enter(
 
         return (
             rnn_hidden_states[batch_idxs] * 0.0,
-            prev_actions[batch_idxs] * 0.0,
+            prev_actions[batch_idxs] * 0,
         )
 
     def set_pddl_problem(self, pddl_prob):
diff --git a/habitat-baselines/habitat_baselines/rl/ppo/agent_access_mgr.py b/habitat-baselines/habitat_baselines/rl/ppo/agent_access_mgr.py
index 542eee9110..837524e271 100644
--- a/habitat-baselines/habitat_baselines/rl/ppo/agent_access_mgr.py
+++ b/habitat-baselines/habitat_baselines/rl/ppo/agent_access_mgr.py
@@ -1,5 +1,5 @@
 from abc import ABC, abstractmethod
-from typing import TYPE_CHECKING, Any, Callable, Dict, Optional, Tuple
+from typing import TYPE_CHECKING, Any, Callable, Dict, Optional
 
 import gym.spaces as spaces
 
@@ -115,14 +115,6 @@ def load_ckpt_state_dict(self, ckpt: Dict) -> None:
     def load_state_dict(self, state: Dict) -> None:
         raise NotImplementedError()
 
-    @property
-    @abstractmethod
-    def hidden_state_shape(self) -> Tuple[int]:
-        """
-        The shape of the tensor to track the hidden state, such as the RNN hidden state.
-        """
-        raise NotImplementedError()
-
     @abstractmethod
     def after_update(self) -> None:
         """
@@ -137,3 +129,15 @@ def pre_rollout(self) -> None:
         Called before a rollout is collected.
         """
         raise NotImplementedError()
+
+    @abstractmethod
+    def _create_storage(
+        self,
+        num_envs: int,
+        env_spec: EnvironmentSpec,
+        actor_critic: Policy,
+        policy_action_space: spaces.Space,
+        config: "DictConfig",
+        device,
+    ) -> Storage:
+        pass
diff --git a/habitat-baselines/habitat_baselines/rl/ppo/policy.py b/habitat-baselines/habitat_baselines/rl/ppo/policy.py
index 61182b9c01..56292fc028 100644
--- a/habitat-baselines/habitat_baselines/rl/ppo/policy.py
+++ b/habitat-baselines/habitat_baselines/rl/ppo/policy.py
@@ -57,7 +57,7 @@ class PolicyActionData:
         current policy.
     """
 
-    rnn_hidden_states: torch.Tensor
+    rnn_hidden_states: Optional[torch.Tensor] = None
     actions: Optional[torch.Tensor] = None
     values: Optional[torch.Tensor] = None
     action_log_probs: Optional[torch.Tensor] = None
@@ -99,9 +99,19 @@ def should_load_agent_state(self):
     def num_recurrent_layers(self) -> int:
         return 0
 
+    @property
+    def recurrent_hidden_size(self) -> int:
+        return 0
+
     def forward(self, *x):
         raise NotImplementedError
 
+    @property
+    def visual_encoder(self) -> Optional[nn.Module]:
+        """
+        Gets the visual encoder for the policy.
+        """
+
     def get_policy_action_space(
         self, env_action_space: spaces.Space
     ) -> spaces.Space:
@@ -207,6 +217,14 @@ def __init__(
                 **cfg,
             )
 
+    @property
+    def recurrent_hidden_size(self) -> int:
+        return self.net.recurrent_hidden_size
+
+    @property
+    def visual_encoder(self) -> Optional[nn.Module]:
+        return self.net.visual_encoder
+
     @property
     def should_load_agent_state(self):
         return True
@@ -372,6 +390,11 @@ def output_size(self):
     def num_recurrent_layers(self):
         pass
 
+    @property
+    @abc.abstractmethod
+    def recurrent_hidden_size(self):
+        pass
+
     @property
     @abc.abstractmethod
     def is_blind(self):
@@ -438,6 +461,10 @@ def is_blind(self):
     def num_recurrent_layers(self):
         return self.state_encoder.num_recurrent_layers
 
+    @property
+    def recurrent_hidden_size(self):
+        return self._hidden_size
+
     @property
     def perception_embedding_size(self):
         return self._hidden_size
diff --git a/habitat-baselines/habitat_baselines/rl/ppo/ppo.py b/habitat-baselines/habitat_baselines/rl/ppo/ppo.py
index 8a98b4c9f9..569fd4e49a 100644
--- a/habitat-baselines/habitat_baselines/rl/ppo/ppo.py
+++ b/habitat-baselines/habitat_baselines/rl/ppo/ppo.py
@@ -35,20 +35,21 @@ class PPO(nn.Module, Updater):
 
     @classmethod
     def from_config(cls, actor_critic: NetPolicy, config):
-        config = {k.lower(): v for k, v in config.items()}
-        param_dict = dict(actor_critic=actor_critic)
-        sig = inspect.signature(cls.__init__)
-        for p in sig.parameters.values():
-            if p.name == "self" or p.name in param_dict:
-                continue
-
-            assert p.name in config, "{} parameter '{}' not in config".format(
-                cls.__name__, p.name
-            )
-
-            param_dict[p.name] = config[p.name]
-
-        return cls(**param_dict)
+        return cls(
+            actor_critic=actor_critic,
+            clip_param=config.clip_param,
+            ppo_epoch=config.ppo_epoch,
+            num_mini_batch=config.num_mini_batch,
+            value_loss_coef=config.value_loss_coef,
+            entropy_coef=config.entropy_coef,
+            lr=config.lr,
+            eps=config.eps,
+            max_grad_norm=config.max_grad_norm,
+            use_clipped_value_loss=config.use_clipped_value_loss,
+            use_normalized_advantage=config.use_normalized_advantage,
+            entropy_target_factor=config.entropy_target_factor,
+            use_adaptive_entropy_pen=config.use_adaptive_entropy_pen,
+        )
 
     def __init__(
         self,
@@ -99,9 +100,16 @@ def __init__(
             ).to(device=self.device)
 
         self.use_normalized_advantage = use_normalized_advantage
+        self.optimizer = self._create_optimizer(lr, eps)
 
-        params = list(filter(lambda p: p.requires_grad, self.parameters()))
+        self.non_ac_params = [
+            p
+            for name, p in self.named_parameters()
+            if not name.startswith("actor_critic.")
+        ]
 
+    def _create_optimizer(self, lr, eps):
+        params = list(filter(lambda p: p.requires_grad, self.parameters()))
         if len(params) > 0:
             optim_cls = optim.Adam
             optim_kwargs = dict(
@@ -120,15 +128,9 @@ def __init__(
                 else:
                     optim_cls = torch.optim._multi_tensor.Adam
 
-            self.optimizer = optim_cls(**optim_kwargs)
+            return optim_cls(**optim_kwargs)
         else:
-            self.optimizer = None
-
-        self.non_ac_params = [
-            p
-            for name, p in self.named_parameters()
-            if not name.startswith("actor_critic.")
-        ]
+            return None
 
     def forward(self, *x):
         raise NotImplementedError
@@ -370,3 +372,15 @@ def before_step(self) -> torch.Tensor:
     def after_step(self) -> None:
         if isinstance(self.entropy_coef, LagrangeInequalityCoefficient):
             self.entropy_coef.project_into_bounds()
+
+    def after_update(self):
+        pass
+
+    def get_resume_state(self):
+        return {
+            "optim_state": self.optimizer.state_dict(),
+        }
+
+    def load_state_dict(self, state):
+        if "optim_state" in state:
+            self.optimizer.load_state_dict(state["optim_state"])
diff --git a/habitat-baselines/habitat_baselines/rl/ppo/ppo_trainer.py b/habitat-baselines/habitat_baselines/rl/ppo/ppo_trainer.py
index e4aaf7444b..633a3c1628 100644
--- a/habitat-baselines/habitat_baselines/rl/ppo/ppo_trainer.py
+++ b/habitat-baselines/habitat_baselines/rl/ppo/ppo_trainer.py
@@ -11,6 +11,7 @@
 from collections import defaultdict, deque
 from typing import Any, Dict, List, Optional, Set
 
+import hydra
 import numpy as np
 import torch
 import tqdm
@@ -26,9 +27,9 @@
     observations_to_image,
     overlay_frame,
 )
+from habitat_baselines.common import VectorEnvFactory
 from habitat_baselines.common.base_trainer import BaseRLTrainer
 from habitat_baselines.common.baseline_registry import baseline_registry
-from habitat_baselines.common.construct_vector_env import construct_envs
 from habitat_baselines.common.env_spec import EnvironmentSpec
 from habitat_baselines.common.obs_transformers import (
     apply_obs_transforms_batch,
@@ -52,7 +53,6 @@
 )
 from habitat_baselines.rl.ddppo.policy import PointNavResNetNet
 from habitat_baselines.rl.ppo.agent_access_mgr import AgentAccessMgr
-from habitat_baselines.rl.ppo.policy import NetPolicy
 from habitat_baselines.rl.ppo.single_agent_access_mgr import (  # noqa: F401.
     SingleAgentAccessMgr,
 )
@@ -140,7 +140,10 @@ def _init_envs(self, config=None, is_eval: bool = False):
         if config is None:
             config = self.config
 
-        self.envs = construct_envs(
+        env_factory: VectorEnvFactory = hydra.utils.instantiate(
+            config.habitat_baselines.vector_env_factory
+        )
+        self.envs = env_factory.construct_envs(
             config,
             workers_ignore_signals=is_slurm_batch_job(),
             enforce_scenes_greater_eq_environments=is_eval,
@@ -149,21 +152,23 @@ def _init_envs(self, config=None, is_eval: bool = False):
                 or torch.distributed.get_rank() == 0
             ),
         )
+
         self._env_spec = EnvironmentSpec(
             observation_space=self.envs.observation_spaces[0],
             action_space=self.envs.action_spaces[0],
             orig_action_space=self.envs.orig_action_spaces[0],
         )
 
-        # The measure keys that should only be logged on rank0,gpu0 and nowhere
+        # The measure keys that should only be logged on rank0 and nowhere
         # else. They will be excluded from all other workers and only reported
         # from the single worker.
-        self._rank0_env0_keys: Set[str] = set(
-            self.config.habitat.task.rank0_env0_measure_names
+        self._rank0_keys: Set[str] = set(
+            list(self.config.habitat.task.rank0_env0_measure_names)
+            + list(self.config.habitat.task.rank0_measure_names)
         )
 
-        # Information on measures that declared in `self._rank0_env0_keys` to
-        # be only reported on rank0,gpu0. This is seperately logged from
+        # Information on measures that declared in `self._rank0_keys` or
+        # to be only reported on rank0. This is seperately logged from
         # `self.window_episode_stats`.
         self._single_proc_infos: Dict[str, List[float]] = {}
 
@@ -273,8 +278,10 @@ def _init_train(self, resume_state=None):
         batch = apply_obs_transforms_batch(batch, self.obs_transforms)  # type: ignore
 
         if self._is_static_encoder:
-            assert isinstance(self._agent.actor_critic, NetPolicy)
-            self._encoder = self._agent.actor_critic.net.visual_encoder
+            self._encoder = self._agent.actor_critic.visual_encoder
+            assert (
+                self._encoder is not None
+            ), "Visual encoder is not specified for this actor"
             with inference_mode():
                 batch[
                     PointNavResNetNet.PRETRAINED_VISUAL_FEATURES_KEY
@@ -313,18 +320,21 @@ def save_checkpoint(
         if extra_state is not None:
             checkpoint["extra_state"] = extra_state  # type: ignore
 
-        torch.save(
-            checkpoint,
-            os.path.join(
-                self.config.habitat_baselines.checkpoint_folder, file_name
-            ),
+        save_file_path = os.path.join(
+            self.config.habitat_baselines.checkpoint_folder, file_name
         )
+        torch.save(checkpoint, save_file_path)
         torch.save(
             checkpoint,
             os.path.join(
                 self.config.habitat_baselines.checkpoint_folder, "latest.pth"
             ),
         )
+        if self.config.habitat_baselines.on_save_ckpt_callback is not None:
+            hydra.utils.call(
+                self.config.habitat_baselines.on_save_ckpt_callback,
+                save_file_path=save_file_path,
+            )
 
     def load_checkpoint(self, checkpoint_path: str, *args, **kwargs) -> Dict:
         r"""Load checkpoint of specified path as a dict.
@@ -432,14 +442,11 @@ def _collect_environment_result(self, buffer_index: int = 0):
             self._single_proc_infos = extract_scalars_from_infos(
                 infos,
                 ignore_keys=set(
-                    k
-                    for k in infos[0].keys()
-                    if k not in self._rank0_env0_keys
+                    k for k in infos[0].keys() if k not in self._rank0_keys
                 ),
             )
-
             extracted_infos = extract_scalars_from_infos(
-                infos, ignore_keys=self._rank0_env0_keys
+                infos, ignore_keys=self._rank0_keys
             )
             for k, v_k in extracted_infos.items():
                 v = torch.tensor(
@@ -457,20 +464,20 @@ def _collect_environment_result(self, buffer_index: int = 0):
                 done_masks, 0.0
             )
 
-            if self._is_static_encoder:
-                with inference_mode():
-                    batch[
-                        PointNavResNetNet.PRETRAINED_VISUAL_FEATURES_KEY
-                    ] = self._encoder(batch)
+        if self._is_static_encoder:
+            with inference_mode(), g_timer.avg_time("trainer.visual_features"):
+                batch[
+                    PointNavResNetNet.PRETRAINED_VISUAL_FEATURES_KEY
+                ] = self._encoder(batch)
 
-            self._agent.rollouts.insert(
-                next_observations=batch,
-                rewards=rewards,
-                next_masks=not_done_masks,
-                buffer_index=buffer_index,
-            )
+        self._agent.rollouts.insert(
+            next_observations=batch,
+            rewards=rewards,
+            next_masks=not_done_masks,
+            buffer_index=buffer_index,
+        )
 
-            self._agent.rollouts.advance_rollout(buffer_index)
+        self._agent.rollouts.advance_rollout(buffer_index)
 
         return env_slice.stop - env_slice.start
 
@@ -620,6 +627,9 @@ def _training_log(
                 [f"{k}: {v.mean:.3f}" for k, v in g_timer.items()]
             )
             logger.info(f"\tPerf Stats: {perf_stats_str}")
+            if self.config.habitat_baselines.should_log_single_proc_infos:
+                for k, v in self._single_proc_infos.items():
+                    logger.info(f" - {k}: {np.mean(v):.3f}")
 
     def should_end_early(self, rollout_step) -> bool:
         if not self._is_distributed:
@@ -810,7 +820,7 @@ def _eval_checkpoint(
                 checkpoint_path, map_location="cpu"
             )
             step_id = ckpt_dict["extra_state"]["step"]
-            print(step_id)
+            logger.info(f"Loaded checkpoint trained for {step_id} steps")
         else:
             ckpt_dict = {"config": None}
 
@@ -861,10 +871,14 @@ def _eval_checkpoint(
         test_recurrent_hidden_states = torch.zeros(
             (
                 self.config.habitat_baselines.num_environments,
-                *self._agent.hidden_state_shape,
+                self._agent.actor_critic.num_recurrent_layers,
+                self._agent.actor_critic.recurrent_hidden_size,
             ),
             device=self.device,
         )
+        should_update_recurrent_hidden_states = (
+            np.prod(test_recurrent_hidden_states.shape) != 0
+        )
         prev_actions = torch.zeros(
             self.config.habitat_baselines.num_environments,
             *action_shape,
@@ -882,9 +896,21 @@ def _eval_checkpoint(
         ] = {}  # dict of dicts that stores stats per episode
         ep_eval_count: Dict[Any, int] = defaultdict(lambda: 0)
 
-        rgb_frames: List[List[np.ndarray]] = [
-            [] for _ in range(self.config.habitat_baselines.num_environments)
-        ]
+        if len(self.config.habitat_baselines.eval.video_option) > 0:
+            # Add the first frame of the episode to the video.
+            rgb_frames: List[List[np.ndarray]] = [
+                [
+                    observations_to_image(
+                        {k: v[env_idx] for k, v in batch.items()}, {}
+                    )
+                ]
+                for env_idx in range(
+                    self.config.habitat_baselines.num_environments
+                )
+            ]
+        else:
+            rgb_frames = None
+
         if len(self.config.habitat_baselines.eval.video_option) > 0:
             os.makedirs(self.config.habitat_baselines.video_dir, exist_ok=True)
 
@@ -935,11 +961,13 @@ def _eval_checkpoint(
                     for i, should_insert in enumerate(
                         action_data.should_inserts
                     ):
-                        if should_insert.item():
+                        if not should_insert.item():
+                            continue
+                        if should_update_recurrent_hidden_states:
                             test_recurrent_hidden_states[
                                 i
                             ] = action_data.rnn_hidden_states[i]
-                            prev_actions[i].copy_(action_data.actions[i])  # type: ignore
+                        prev_actions[i].copy_(action_data.actions[i])  # type: ignore
             # NB: Move actions to CPU.  If CUDA tensors are
             # sent in to env.step(), that will create CUDA contexts
             # in the subprocesses.
@@ -961,6 +989,9 @@ def _eval_checkpoint(
             observations, rewards_l, dones, infos = [
                 list(x) for x in zip(*outputs)
             ]
+            # Note that `policy_infos` represents the information about the
+            # action BEFORE `observations` (the action used to transition to
+            # `observations`).
             policy_infos = self._agent.actor_critic.get_extra(
                 action_data, infos, dones
             )
@@ -999,26 +1030,32 @@ def _eval_checkpoint(
                 ):
                     envs_to_pause.append(i)
 
-                # Exclude the keys from `_rank0_env0_keys`.
-                infos[i] = {
+                # Exclude the keys from `_rank0_keys` from displaying in the video
+                disp_info = {
                     k: v
                     for k, v in infos[i].items()
-                    if k not in self._rank0_env0_keys
+                    if k not in self._rank0_keys
                 }
 
                 if len(self.config.habitat_baselines.eval.video_option) > 0:
                     # TODO move normalization / channel changing out of the policy and undo it here
                     frame = observations_to_image(
-                        {k: v[i] for k, v in batch.items()}, infos[i]
+                        {k: v[i] for k, v in batch.items()}, disp_info
                     )
                     if not not_done_masks[i].item():
                         # The last frame corresponds to the first frame of the next episode
                         # but the info is correct. So we use a black frame
-                        frame = observations_to_image(
-                            {k: v[i] * 0.0 for k, v in batch.items()}, infos[i]
+                        final_frame = observations_to_image(
+                            {k: v[i] * 0.0 for k, v in batch.items()},
+                            disp_info,
                         )
-                    frame = overlay_frame(frame, infos[i])
-                    rgb_frames[i].append(frame)
+                        final_frame = overlay_frame(final_frame, disp_info)
+                        rgb_frames[i].append(final_frame)
+                        # The starting frame of the next episode will be the final element..
+                        rgb_frames[i].append(frame)
+                    else:
+                        frame = overlay_frame(frame, disp_info)
+                        rgb_frames[i].append(frame)
 
                 # episode ended
                 if not not_done_masks[i].item():
@@ -1043,16 +1080,18 @@ def _eval_checkpoint(
                         generate_video(
                             video_option=self.config.habitat_baselines.eval.video_option,
                             video_dir=self.config.habitat_baselines.video_dir,
-                            images=rgb_frames[i],
-                            episode_id=current_episodes_info[i].episode_id,
+                            # Since the final frame is the start frame of the next episode.
+                            images=rgb_frames[i][:-1],
+                            episode_id=f"{current_episodes_info[i].episode_id}_{ep_eval_count[k]}",
                             checkpoint_idx=checkpoint_index,
-                            metrics=extract_scalars_from_info(infos[i]),
+                            metrics=extract_scalars_from_info(disp_info),
                             fps=self.config.habitat_baselines.video_fps,
                             tb_writer=writer,
                             keys_to_include_in_name=self.config.habitat_baselines.eval_keys_to_include_in_name,
                         )
 
-                        rgb_frames[i] = []
+                        # Since the starting frame of the next episode is the final frame.
+                        rgb_frames[i] = rgb_frames[i][-1:]
 
                     gfx_str = infos[i].get(GfxReplayMeasure.cls_uuid, "")
                     if gfx_str != "":
@@ -1088,9 +1127,12 @@ def _eval_checkpoint(
         ), f"Expected {number_of_eval_episodes} episodes, got {len(ep_eval_count)}."
 
         aggregated_stats = {}
-        for stat_key in next(iter(stats_episodes.values())).keys():
+        all_ks = set()
+        for ep in stats_episodes.values():
+            all_ks.update(ep.keys())
+        for stat_key in all_ks:
             aggregated_stats[stat_key] = np.mean(
-                [v[stat_key] for v in stats_episodes.values()]
+                [v[stat_key] for v in stats_episodes.values() if stat_key in v]
             )
 
         for k, v in aggregated_stats.items():
diff --git a/habitat-baselines/habitat_baselines/rl/ppo/single_agent_access_mgr.py b/habitat-baselines/habitat_baselines/rl/ppo/single_agent_access_mgr.py
index b176eccea8..eeee6794e7 100644
--- a/habitat-baselines/habitat_baselines/rl/ppo/single_agent_access_mgr.py
+++ b/habitat-baselines/habitat_baselines/rl/ppo/single_agent_access_mgr.py
@@ -90,10 +90,40 @@ def _init_policy_and_updater(self, lr_schedule_fn, resume_state):
     def nbuffers(self):
         return self._nbuffers
 
+    def _create_storage(
+        self,
+        num_envs: int,
+        env_spec: EnvironmentSpec,
+        actor_critic: Policy,
+        policy_action_space: spaces.Space,
+        config: "DictConfig",
+        device,
+    ) -> Storage:
+        """
+        Default behavior for setting up and initializing the rollout storage.
+        """
+
+        obs_space = get_rollout_obs_space(
+            env_spec.observation_space, actor_critic, config
+        )
+        ppo_cfg = config.habitat_baselines.rl.ppo
+        rollouts = baseline_registry.get_storage(
+            config.habitat_baselines.rollout_storage_name
+        )(
+            numsteps=ppo_cfg.num_steps,
+            num_envs=num_envs,
+            observation_space=obs_space,
+            action_space=policy_action_space,
+            actor_critic=actor_critic,
+            is_double_buffered=ppo_cfg.use_double_buffered_sampler,
+        )
+        rollouts.to(device)
+        return rollouts
+
     def post_init(self, create_rollouts_fn: Optional[Callable] = None) -> None:
         # Create the rollouts storage.
         if create_rollouts_fn is None:
-            create_rollouts_fn = default_create_rollouts
+            create_rollouts_fn = self._create_storage
 
         policy_action_space = self._actor_critic.get_policy_action_space(
             self._env_spec.action_space
@@ -169,7 +199,7 @@ def _create_policy(self) -> NetPolicy:
                 }
             )
         if self._is_static_encoder:
-            for param in actor_critic.net.visual_encoder.parameters():
+            for param in actor_critic.visual_encoder.parameters():
                 param.requires_grad_(False)
 
         if self._config.habitat_baselines.rl.ddppo.reset_critic:
@@ -194,7 +224,7 @@ def updater(self) -> Updater:
     def get_resume_state(self) -> Dict[str, Any]:
         ret = {
             "state_dict": self._actor_critic.state_dict(),
-            "optim_state": self._updater.optimizer.state_dict(),
+            **self._updater.get_resume_state(),
         }
         if self._lr_scheduler is not None:
             ret["lr_sched_state"] = (self._lr_scheduler.state_dict(),)
@@ -216,17 +246,9 @@ def load_ckpt_state_dict(self, ckpt: Dict) -> None:
     def load_state_dict(self, state: Dict) -> None:
         self._actor_critic.load_state_dict(state["state_dict"])
         if self._updater is not None:
-            if "optim_state" in state:
-                self._actor_critic.load_state_dict(state["optim_state"])
+            self._updater.load_state_dict(state)
             if "lr_sched_state" in state:
-                self._actor_critic.load_state_dict(state["lr_sched_state"])
-
-    @property
-    def hidden_state_shape(self):
-        return (
-            self.actor_critic.num_recurrent_layers,
-            self._ppo_cfg.hidden_size,
-        )
+                self._lr_scheduler.load_state_dict(state["lr_sched_state"])
 
     def after_update(self):
         if (
@@ -234,6 +256,7 @@ def after_update(self):
             and self._lr_scheduler is not None
         ):
             self._lr_scheduler.step()  # type: ignore
+        self._updater.after_update()
 
     def pre_rollout(self):
         if self._ppo_cfg.use_linear_clip_decay:
@@ -249,7 +272,7 @@ def get_rollout_obs_space(obs_space, actor_critic, config):
     """
 
     if not config.habitat_baselines.rl.ddppo.train_encoder:
-        encoder = actor_critic.net.visual_encoder
+        encoder = actor_critic.visual_encoder
         obs_space = spaces.Dict(
             {
                 PointNavResNetNet.PRETRAINED_VISUAL_FEATURES_KEY: spaces.Box(
@@ -264,36 +287,5 @@ def get_rollout_obs_space(obs_space, actor_critic, config):
     return obs_space
 
 
-def default_create_rollouts(
-    num_envs: int,
-    env_spec: EnvironmentSpec,
-    actor_critic: NetPolicy,
-    policy_action_space: spaces.Space,
-    config: "DictConfig",
-    device,
-) -> Storage:
-    """
-    Default behavior for setting up and initializing the rollout storage.
-    """
-
-    obs_space = get_rollout_obs_space(
-        env_spec.observation_space, actor_critic, config
-    )
-    ppo_cfg = config.habitat_baselines.rl.ppo
-    rollouts = baseline_registry.get_storage(
-        config.habitat_baselines.rollout_storage_name
-    )(
-        ppo_cfg.num_steps,
-        num_envs,
-        obs_space,
-        policy_action_space,
-        ppo_cfg.hidden_size,
-        num_recurrent_layers=actor_critic.num_recurrent_layers,
-        is_double_buffered=ppo_cfg.use_double_buffered_sampler,
-    )
-    rollouts.to(device)
-    return rollouts
-
-
 def linear_lr_schedule(percent_done: float) -> float:
     return 1 - percent_done
diff --git a/habitat-baselines/habitat_baselines/rl/ppo/updater.py b/habitat-baselines/habitat_baselines/rl/ppo/updater.py
index ae3fa558f1..5d277f4667 100644
--- a/habitat-baselines/habitat_baselines/rl/ppo/updater.py
+++ b/habitat-baselines/habitat_baselines/rl/ppo/updater.py
@@ -1,5 +1,5 @@
 import abc
-from typing import Dict
+from typing import Any, Dict
 
 from habitat_baselines.common.storage import Storage
 
@@ -14,3 +14,22 @@ def update(self, rollouts: Storage) -> Dict[str, float]:
         """
         Perform an update from data in the storage objet.
         """
+
+    @property
+    def lr_scheduler(self):
+        return None
+
+    def after_update(self) -> None:
+        """
+        Called after the policy update.
+        """
+
+    def get_resume_state(self) -> Dict[str, Any]:
+        """
+        Gets the optimizer resume state.
+        """
+
+    def load_state_dict(self, state: Dict[str, Any]) -> None:
+        """
+        Loads an optimizer state.
+        """
diff --git a/habitat-baselines/habitat_baselines/rl/ver/ver_rollout_storage.py b/habitat-baselines/habitat_baselines/rl/ver/ver_rollout_storage.py
index 73608242f8..a5db473281 100644
--- a/habitat-baselines/habitat_baselines/rl/ver/ver_rollout_storage.py
+++ b/habitat-baselines/habitat_baselines/rl/ver/ver_rollout_storage.py
@@ -137,13 +137,12 @@ class VERRolloutStorage(RolloutStorage):
 
     def __init__(
         self,
-        variable_experience: bool,
         numsteps,
         num_envs,
         observation_space,
         action_space,
-        recurrent_hidden_state_size,
-        num_recurrent_layers=1,
+        actor_critic,
+        variable_experience: bool,
         is_double_buffered: bool = False,
     ):
         super().__init__(
@@ -151,8 +150,7 @@ def __init__(
             num_envs,
             observation_space,
             action_space,
-            recurrent_hidden_state_size,
-            num_recurrent_layers,
+            actor_critic,
             is_double_buffered,
         )
         self.use_is_coeffs = variable_experience
diff --git a/habitat-baselines/habitat_baselines/rl/ver/ver_trainer.py b/habitat-baselines/habitat_baselines/rl/ver/ver_trainer.py
index bb702d8a7f..08ae66cf0a 100644
--- a/habitat-baselines/habitat_baselines/rl/ver/ver_trainer.py
+++ b/habitat-baselines/habitat_baselines/rl/ver/ver_trainer.py
@@ -242,8 +242,7 @@ def _init_train(self, resume_state):
             "numsteps": ppo_cfg.num_steps,
             "num_envs": len(self.environment_workers),
             "action_space": self._env_spec.action_space,
-            "recurrent_hidden_state_size": ppo_cfg.hidden_size,
-            "num_recurrent_layers": self._agent.actor_critic.net.num_recurrent_layers,
+            "actor_critic": self._agent.actor_critic,
             "observation_space": rollouts_obs_space,
         }
 
diff --git a/habitat-lab/habitat/config/benchmark/rearrange/pick_spa.yaml b/habitat-lab/habitat/config/benchmark/rearrange/pick_spa.yaml
deleted file mode 100644
index 33cd42fc67..0000000000
--- a/habitat-lab/habitat/config/benchmark/rearrange/pick_spa.yaml
+++ /dev/null
@@ -1,50 +0,0 @@
-# @package _global_
-
-defaults:
-  - /habitat: habitat_config_base
-  - /habitat/simulator/agents@habitat.simulator.agents.main_agent: rgbd_head_rgbd_arm_agent
-  # add third_rgb_sensor to rgbd_head_rgbd_arm_agent:
-  - /habitat/simulator/sim_sensors@habitat.simulator.agents.main_agent.sim_sensors.third_rgb_sensor: third_rgb_sensor
-  - /habitat/task/rearrange: pick_spa
-  - /habitat/dataset/rearrangement: replica_cad
-  - _self_
-
-# The configuration to run the SensePlanAct baseline architecture from https://arxiv.org/abs/2106.14405
-habitat:
-  environment:
-    max_episode_steps: 30
-  simulator:
-    type: RearrangeSim-v0
-    seed: 100
-    additional_object_paths:
-      - "data/objects/ycb/configs/"
-    concur_render: True
-    auto_sleep: True
-    agents:
-      main_agent:
-        radius: 0.3
-        sim_sensors:
-          head_rgb_sensor:
-            height: 128
-            width: 128
-          head_depth_sensor:
-            width: 128
-            height: 128
-          arm_depth_sensor:
-            height: 128
-            width: 128
-          arm_rgb_sensor:
-            height: 128
-            width: 128
-          third_rgb_sensor:
-            height: 512
-            width: 512
-    # Agent setup
-    ac_freq_ratio: 8
-
-    habitat_sim_v0:
-      allow_sliding: False
-      enable_physics: True
-
-  dataset:
-    data_path: data/datasets/replica_cad/rearrange/v1/{split}/all_receptacles_10k_1k.json.gz
diff --git a/habitat-lab/habitat/config/default_structured_configs.py b/habitat-lab/habitat/config/default_structured_configs.py
index a2f1385332..ff12f4cf13 100644
--- a/habitat-lab/habitat/config/default_structured_configs.py
+++ b/habitat-lab/habitat/config/default_structured_configs.py
@@ -694,6 +694,7 @@ class RuntimePerfStatsMeasurementConfig(MeasurementConfig):
     """
 
     type: str = "RuntimePerfStats"
+    disable_logging: bool = False
 
 
 @dataclass
@@ -1134,6 +1135,8 @@ class TaskConfig(HabitatBaseConfig):
     rank0_env0_measure_names: List[str] = field(
         default_factory=lambda: ["habitat_perf"]
     )
+    # Measures to only record in the first rank for vectorized environments.
+    rank0_measure_names: List[str] = field(default_factory=list)
     goal_sensor_uuid: str = "pointgoal"
     # REARRANGE task
     count_obj_collisions: bool = True
diff --git a/habitat-lab/habitat/config/habitat/task/rearrange/agents/fetch_arm.yaml b/habitat-lab/habitat/config/habitat/task/rearrange/agents/fetch_arm.yaml
new file mode 100644
index 0000000000..faf644ab27
--- /dev/null
+++ b/habitat-lab/habitat/config/habitat/task/rearrange/agents/fetch_arm.yaml
@@ -0,0 +1,7 @@
+# @package habitat.task.actions
+defaults:
+  - /habitat/task/actions:
+    - arm_action
+  - _self_
+arm_action:
+  grip_controller: SuctionGraspAction
diff --git a/habitat-lab/habitat/config/habitat/task/rearrange/agents/fetch_base_arm.yaml b/habitat-lab/habitat/config/habitat/task/rearrange/agents/fetch_base_arm.yaml
new file mode 100644
index 0000000000..d2ce4c42a9
--- /dev/null
+++ b/habitat-lab/habitat/config/habitat/task/rearrange/agents/fetch_base_arm.yaml
@@ -0,0 +1,8 @@
+# @package habitat.task.actions
+defaults:
+  - /habitat/task/actions:
+    - arm_action
+    - base_velocity
+  - _self_
+arm_action:
+  grip_controller: SuctionGraspAction
diff --git a/habitat-lab/habitat/config/habitat/task/rearrange/open_cab.yaml b/habitat-lab/habitat/config/habitat/task/rearrange/open_cab.yaml
index 1239aa794f..c1aae6217a 100644
--- a/habitat-lab/habitat/config/habitat/task/rearrange/open_cab.yaml
+++ b/habitat-lab/habitat/config/habitat/task/rearrange/open_cab.yaml
@@ -46,5 +46,5 @@ measurements:
     marker_dist_reward: 1.0
     constraint_violate_pen: 1.0
   force_terminate:
-    max_accum_force: 20_000.0
+    max_accum_force: -1.0
     max_instant_force: 10_000.0
diff --git a/habitat-lab/habitat/config/habitat/task/rearrange/pick.yaml b/habitat-lab/habitat/config/habitat/task/rearrange/pick.yaml
index d52944f0c6..80ee4a7e61 100644
--- a/habitat-lab/habitat/config/habitat/task/rearrange/pick.yaml
+++ b/habitat-lab/habitat/config/habitat/task/rearrange/pick.yaml
@@ -2,9 +2,7 @@
 
 defaults:
   - /habitat/task: task_config_base
-  - /habitat/task/actions:
-    - arm_action
-    - base_velocity
+  - /habitat/task/rearrange/agents: fetch_base_arm
   - /habitat/task/measurements:
     - articulated_agent_force
     - force_terminate
@@ -32,9 +30,6 @@ success_measure: "pick_success"
 success_reward: 10.0
 slack_reward: -0.005
 end_on_success: True
-actions:
-  arm_action:
-    grip_controller: SuctionGraspAction
 measurements:
   force_terminate:
     max_accum_force: 10_000.0
diff --git a/habitat-lab/habitat/config/habitat/task/rearrange/pick_spa.yaml b/habitat-lab/habitat/config/habitat/task/rearrange/pick_spa.yaml
deleted file mode 100644
index d8671427aa..0000000000
--- a/habitat-lab/habitat/config/habitat/task/rearrange/pick_spa.yaml
+++ /dev/null
@@ -1,49 +0,0 @@
-# @package habitat.task
-
-defaults:
-  - /habitat/task: task_config_base
-  - /habitat/task/actions:
-    - arm_action
-    - empty
-  - /habitat/task/measurements:
-    - articulated_agent_force
-    - articulated_agent_colls
-    - force_terminate
-    - end_effector_to_rest_distance
-    - end_effector_to_object_distance
-    - pick_success
-    - pick_reward
-    - num_steps
-  - /habitat/task/lab_sensors:
-    - target_start_sensor
-    - joint_sensor
-    - is_holding_sensor
-    - end_effector_sensor
-    - relative_resting_pos_sensor
-    - goal_sensor
-    - object_sensor
-  - _self_
-
-# The configuration to run the SensePlanAct baseline architecture from https://arxiv.org/abs/2106.14405
-type: RearrangePickTask-v0
-count_obj_collisions: True
-desired_resting_position: [0.5, 0.0, 1.0]
-constraint_violation_ends_episode: True
-should_enforce_target_within_reach: False
-
-# In radians
-base_angle_noise: 0.15
-base_noise: 0.05
-force_regenerate: False
-
-actions:
-  arm_action:
-    arm_controller: "ArmAbsPosKinematicAction"
-    grip_controller: "MagicGraspAction"
-measurements:
-  pick_reward:
-    dist_reward: 10.0
-    pick_reward: 5.0
-    force_pen: 0.0
-    max_force_pen: 0.0
-    force_end_pen: 0.0
diff --git a/habitat-lab/habitat/config/habitat/task/rearrange/place.yaml b/habitat-lab/habitat/config/habitat/task/rearrange/place.yaml
index 891a55760e..a4ab90474e 100644
--- a/habitat-lab/habitat/config/habitat/task/rearrange/place.yaml
+++ b/habitat-lab/habitat/config/habitat/task/rearrange/place.yaml
@@ -2,9 +2,7 @@
 
 defaults:
   - /habitat/task: task_config_base
-  - /habitat/task/actions:
-    - arm_action
-    - base_velocity
+  - /habitat/task/rearrange/agents: fetch_base_arm
   - /habitat/task/measurements:
     - articulated_agent_force
     - force_terminate
@@ -36,9 +34,6 @@ success_measure: place_success
 success_reward: 10.0
 slack_reward: -0.005
 end_on_success: True
-actions:
-  arm_action:
-    grip_controller: SuctionGraspAction
 measurements:
   force_terminate:
     max_accum_force: 10_000.0
diff --git a/habitat-lab/habitat/datasets/pointnav/pointnav_dataset.py b/habitat-lab/habitat/datasets/pointnav/pointnav_dataset.py
index bc144e4eb4..4a1ddedc8a 100644
--- a/habitat-lab/habitat/datasets/pointnav/pointnav_dataset.py
+++ b/habitat-lab/habitat/datasets/pointnav/pointnav_dataset.py
@@ -90,6 +90,14 @@ def _get_scenes_from_folder(
         scenes.sort()
         return scenes
 
+    def _load_from_file(self, fname: str, scenes_dir: str) -> None:
+        """
+        Load the data from a file into `self.episodes`.
+        """
+
+        with gzip.open(fname, "rt") as f:
+            self.from_json(f.read(), scenes_dir=scenes_dir)
+
     def __init__(self, config: Optional["DictConfig"] = None) -> None:
         self.episodes = []
 
@@ -97,8 +105,8 @@ def __init__(self, config: Optional["DictConfig"] = None) -> None:
             return
 
         datasetfile_path = config.data_path.format(split=config.split)
-        with gzip.open(datasetfile_path, "rt") as f:
-            self.from_json(f.read(), scenes_dir=config.scenes_dir)
+
+        self._load_from_file(datasetfile_path, config.scenes_dir)
 
         # Read separate file for each scene
         dataset_dir = os.path.dirname(datasetfile_path)
@@ -119,8 +127,8 @@ def __init__(self, config: Optional["DictConfig"] = None) -> None:
                 scene_filename = self.content_scenes_path.format(
                     data_path=dataset_dir, scene=scene
                 )
-                with gzip.open(scene_filename, "rt") as f:
-                    self.from_json(f.read(), scenes_dir=config.scenes_dir)
+
+                self._load_from_file(scene_filename, config.scenes_dir)
 
         else:
             self.episodes = list(
diff --git a/habitat-lab/habitat/datasets/rearrange/rearrange_generator.py b/habitat-lab/habitat/datasets/rearrange/rearrange_generator.py
index 5f8db590a5..a829addb62 100644
--- a/habitat-lab/habitat/datasets/rearrange/rearrange_generator.py
+++ b/habitat-lab/habitat/datasets/rearrange/rearrange_generator.py
@@ -774,6 +774,7 @@ def initialize_sim(self, scene_name: str, dataset_path: str) -> None:
         backend_cfg.scene_dataset_config_file = dataset_path
         backend_cfg.scene_id = scene_name
         backend_cfg.enable_physics = True
+        backend_cfg.gpu_device_id = self.cfg.gpu_device_id
         if not self._render_debug_obs:
             # don't bother loading textures if not intending to visualize the generation process
             backend_cfg.create_renderer = False
diff --git a/habitat-lab/habitat/datasets/rearrange/run_episode_generator.py b/habitat-lab/habitat/datasets/rearrange/run_episode_generator.py
index f44f387b10..fc5a84236a 100644
--- a/habitat-lab/habitat/datasets/rearrange/run_episode_generator.py
+++ b/habitat-lab/habitat/datasets/rearrange/run_episode_generator.py
@@ -43,6 +43,7 @@ class SceneSamplerConfig:
 class RearrangeEpisodeGeneratorConfig:
     # The minimum distance from the target object starting position to its goal
     min_dist_from_start_to_goal: float = 0.5
+    gpu_device_id: int = 0
     # ----- import/initialization parameters ------
     # the scene dataset from which scenes and objects are sampled
     dataset_path: str = "data/replica_cad/replicaCAD.scene_dataset_config.json"
diff --git a/habitat-lab/habitat/datasets/rearrange/samplers/object_sampler.py b/habitat-lab/habitat/datasets/rearrange/samplers/object_sampler.py
index 2cc3ee86df..5f5c860a18 100644
--- a/habitat-lab/habitat/datasets/rearrange/samplers/object_sampler.py
+++ b/habitat-lab/habitat/datasets/rearrange/samplers/object_sampler.py
@@ -343,7 +343,7 @@ def sample_placement(
         sim.get_rigid_object_manager().remove_object_by_handle(
             new_object.handle
         )
-        logger.warning(
+        logger.info(
             f"Failed to sample {object_handle} placement on {receptacle.unique_name} in {self.max_placement_attempts} tries."
         )
 
@@ -402,6 +402,7 @@ def single_sample(
             object_handle = self.sample_object()
         else:
             object_handle = fixed_obj_handle
+
         if fixed_target_receptacle is not None:
             target_receptacle = fixed_target_receptacle
         else:
@@ -433,6 +434,8 @@ def sample(
         target_receptacles: List[Receptacle],
         snap_down: bool = False,
         vdb: Optional[DebugVisualizer] = None,
+        target_object_handles: Optional[List[str]] = None,
+        object_idx_to_recep: Optional[Dict[int, Receptacle]] = None,
     ) -> List[Tuple[habitat_sim.physics.ManagedRigidObject, Receptacle]]:
         """
         Defaults to uniform sample: object -> receptacle -> volume w/ rejection -> repeat.
@@ -449,6 +452,8 @@ def sample(
         new_objects: List[
             Tuple[habitat_sim.physics.ManagedRigidObject, Receptacle]
         ] = []
+        if object_idx_to_recep is None:
+            object_idx_to_recep = {}
 
         logger.info(
             f"    Trying to sample {self.target_objects_number} from range {self.num_objects}"
@@ -460,6 +465,12 @@ def sample(
             len(new_objects) < self.target_objects_number
             and num_pairing_tries < self.max_sample_attempts
         ):
+            cur_obj_idx = len(new_objects)
+            if target_object_handles is None:
+                fixed_obj_handle = None
+            else:
+                fixed_obj_handle = target_object_handles[cur_obj_idx]
+
             num_pairing_tries += 1
             if len(new_objects) < len(target_receptacles):
                 # sample objects explicitly from pre-designated target receptacles first
@@ -468,13 +479,21 @@ def sample(
                     recep_tracker,
                     snap_down,
                     vdb,
-                    target_receptacles[len(new_objects)],
+                    target_receptacles[cur_obj_idx],
+                    fixed_obj_handle=fixed_obj_handle,
                 )
                 # This receptacle has already been counted in the receptacle
                 # tracking so don't double count.
             else:
                 new_object, receptacle = self.single_sample(
-                    sim, recep_tracker, snap_down, vdb
+                    sim,
+                    recep_tracker,
+                    snap_down,
+                    vdb,
+                    fixed_target_receptacle=object_idx_to_recep.get(
+                        cur_obj_idx, None
+                    ),
+                    fixed_obj_handle=fixed_obj_handle,
                 )
                 if (
                     new_object is not None
diff --git a/habitat-lab/habitat/gym/gym_definitions.py b/habitat-lab/habitat/gym/gym_definitions.py
index 0c8f426c81..b9ae9a9fbf 100644
--- a/habitat-lab/habitat/gym/gym_definitions.py
+++ b/habitat-lab/habitat/gym/gym_definitions.py
@@ -47,7 +47,7 @@ def _get_env_name(cfg: "DictConfig") -> Optional[str]:
     return cfg["env_task"]
 
 
-def make_gym_from_config(config: "DictConfig") -> gym.Env:
+def make_gym_from_config(config: "DictConfig", dataset=None) -> gym.Env:
     """
     From a habitat-lab or habitat-baseline config, create the associated gym environment.
     """
@@ -58,7 +58,7 @@ def make_gym_from_config(config: "DictConfig") -> gym.Env:
     assert (
         env_class is not None
     ), f"No environment class with name `{env_class_name}` was found, you need to specify a valid one with env_task"
-    return make_env_fn(env_class=env_class, config=config)
+    return make_env_fn(env_class=env_class, config=config, dataset=dataset)
 
 
 def _add_sim_sensor_to_config(
diff --git a/habitat-lab/habitat/tasks/rearrange/articulated_agent_manager.py b/habitat-lab/habitat/tasks/rearrange/articulated_agent_manager.py
index 3f4f6fb1da..1ac2e6eb72 100644
--- a/habitat-lab/habitat/tasks/rearrange/articulated_agent_manager.py
+++ b/habitat-lab/habitat/tasks/rearrange/articulated_agent_manager.py
@@ -136,9 +136,9 @@ def pre_obj_clear(self) -> None:
         """
 
         for agent_data in self._all_agent_data:
-            agent_data.grasp_mgr.reconfigure()
             for grasp_manager in agent_data.grasp_mgrs:
                 grasp_manager.reset()
+            agent_data.grasp_mgr.reconfigure()
 
     @add_perf_timing_func()
     def post_obj_load_reconfigure(self):
diff --git a/habitat-lab/habitat/tasks/rearrange/multi_task/composite_sensors.py b/habitat-lab/habitat/tasks/rearrange/multi_task/composite_sensors.py
index eeeaa83ac3..392317c939 100644
--- a/habitat-lab/habitat/tasks/rearrange/multi_task/composite_sensors.py
+++ b/habitat-lab/habitat/tasks/rearrange/multi_task/composite_sensors.py
@@ -111,11 +111,10 @@ def update_target_object(self):
 
         # Get the target object's absolute index
         self.abs_targ_obj_idx = self._sim.scene_obj_ids[targ_obj_idx]
-        self.targ_obj_idx = str(targ_obj_idx)
 
     def get_distance(self, task, distance):
         return task.measurements.measures[distance.cls_uuid].get_metric()[
-            self.targ_obj_idx
+            str(self._cur_rearrange_stage)
         ]
 
     def update_metric(self, *args, episode, task, observations, **kwargs):
@@ -164,6 +163,9 @@ def update_metric(self, *args, episode, task, observations, **kwargs):
         if place_success and not is_holding_obj:
             self._metric += self._config.single_rearrange_reward
             self._cur_rearrange_stage += 1
+            self._cur_rearrange_stage = (
+                self._cur_rearrange_stage % self.num_targets
+            )
             if self._cur_rearrange_stage < self.num_targets:
                 self.update_target_object()
 
diff --git a/habitat-lab/habitat/tasks/rearrange/multi_task/pddl_domain.py b/habitat-lab/habitat/tasks/rearrange/multi_task/pddl_domain.py
index ca4ba0f074..5ad1cd306d 100644
--- a/habitat-lab/habitat/tasks/rearrange/multi_task/pddl_domain.py
+++ b/habitat-lab/habitat/tasks/rearrange/multi_task/pddl_domain.py
@@ -62,6 +62,7 @@ def __init__(
         self,
         domain_file_path: str,
         cur_task_config: Optional["DictConfig"] = None,
+        read_config: bool = True,
     ):
         """
         :param domain_file_path: Either an absolute path or a path relative to `habitat/task/rearrange/multi_task/domain_configs/`.
@@ -73,6 +74,19 @@ def __init__(
         self._config = cur_task_config
         self._orig_actions: Dict[str, PddlAction] = {}
 
+        if read_config:
+            # Setup config properties
+            self._obj_succ_thresh = self._config.obj_succ_thresh
+            self._art_succ_thresh = self._config.art_succ_thresh
+            self._robot_at_thresh = self._config.robot_at_thresh
+            self._num_spawn_attempts = self._config.num_spawn_attempts
+            self._physics_stability_steps = (
+                self._config.physics_stability_steps
+            )
+            self._recep_place_shrink_factor = (
+                self._config.recep_place_shrink_factor
+            )
+
         if not osp.isabs(domain_file_path):
             parent_dir = osp.dirname(__file__)
             domain_file_path = osp.join(
@@ -186,6 +200,7 @@ def fetch_entity(s):
             for k, v in robot_states.items():
                 use_k = all_entities[k]
 
+                # Sub in any referred entities.
                 v = {sub_k: fetch_entity(sub_v) for sub_k, sub_v in v.items()}
 
                 use_robot_states[use_k] = PddlRobotState(**v)
@@ -359,9 +374,9 @@ def bind_to_instance(
             dataset=dataset,
             env=env,
             episode=episode,
-            obj_thresh=self._config.obj_succ_thresh,
-            art_thresh=self._config.art_succ_thresh,
-            robot_at_thresh=self._config.robot_at_thresh,
+            obj_thresh=self._obj_succ_thresh,
+            art_thresh=self._art_succ_thresh,
+            robot_at_thresh=self._robot_at_thresh,
             expr_types=self.expr_types,
             obj_ids=sim.handle_to_object_id,
             target_ids={
@@ -376,10 +391,10 @@ def bind_to_instance(
             },
             all_entities=self.all_entities,
             predicates=self.predicates,
-            num_spawn_attempts=self._config.num_spawn_attempts,
-            physics_stability_steps=self._config.physics_stability_steps,
+            num_spawn_attempts=self._num_spawn_attempts,
+            physics_stability_steps=self._physics_stability_steps,
             receptacles=sim.receptacles,
-            recep_place_shrink_factor=self._config.recep_place_shrink_factor,
+            recep_place_shrink_factor=self._recep_place_shrink_factor,
         )
         # Ensure that all objects are accounted for.
         for entity in self.all_entities.values():
@@ -392,9 +407,7 @@ def bind_actions(self) -> None:
         """
         for k, ac in self._orig_actions.items():
             precond_quant = ac.precond.quantifier
-            new_preconds, assigns = self.expand_quantifiers(
-                ac.precond.clone(), ac.name
-            )
+            new_preconds, assigns = self.expand_quantifiers(ac.precond.clone())
 
             new_ac = ac.set_precond(new_preconds)
             if precond_quant == LogicalQuantifierType.EXISTS:
@@ -572,7 +585,7 @@ def all_entities(self) -> Dict[str, PddlEntity]:
         return {**self._constants, **self._added_entities}
 
     def expand_quantifiers(
-        self, expr: LogicalExpr, tmp=None
+        self, expr: LogicalExpr
     ) -> Tuple[LogicalExpr, List[Dict[PddlEntity, PddlEntity]]]:
         """
         Expand out a logical expression that could involve a quantifier into
@@ -598,7 +611,7 @@ def expand_quantifiers(
         elif expr.quantifier is None:
             return expr, []
         else:
-            raise ValueError(f"Unrecongized {expr.quantifier}")
+            raise ValueError(f"Unrecognized {expr.quantifier}")
 
         t_start = time.time()
         assigns: List[List[PddlEntity]] = [[]]
@@ -640,10 +653,11 @@ def __init__(
         domain_file_path: str,
         problem_file_path: str,
         cur_task_config: Optional["DictConfig"] = None,
+        read_config: bool = True,
     ):
         self._objects = {}
 
-        super().__init__(domain_file_path, cur_task_config)
+        super().__init__(domain_file_path, cur_task_config, read_config)
         with open(get_full_habitat_config_path(problem_file_path), "r") as f:
             problem_def = yaml.safe_load(f)
         self._objects = {
diff --git a/habitat-lab/habitat/tasks/rearrange/multi_task/pddl_logical_expr.py b/habitat-lab/habitat/tasks/rearrange/multi_task/pddl_logical_expr.py
index 47a52b7dce..7c806495b0 100644
--- a/habitat-lab/habitat/tasks/rearrange/multi_task/pddl_logical_expr.py
+++ b/habitat-lab/habitat/tasks/rearrange/multi_task/pddl_logical_expr.py
@@ -126,7 +126,7 @@ def sub_in(self, sub_dict: Dict[PddlEntity, PddlEntity]) -> "LogicalExpr":
     def sub_in_clone(self, sub_dict: Dict[PddlEntity, PddlEntity]):
         return LogicalExpr(
             self._expr_type,
-            [e.sub_in(sub_dict) for e in self._sub_exprs],
+            [e.sub_in_clone(sub_dict) for e in self._sub_exprs],
             self._inputs,
             self._quantifier,
         )
diff --git a/habitat-lab/habitat/tasks/rearrange/multi_task/pddl_sim_state.py b/habitat-lab/habitat/tasks/rearrange/multi_task/pddl_sim_state.py
index 829d8e7a0e..8305feb543 100644
--- a/habitat-lab/habitat/tasks/rearrange/multi_task/pddl_sim_state.py
+++ b/habitat-lab/habitat/tasks/rearrange/multi_task/pddl_sim_state.py
@@ -18,11 +18,7 @@
     PddlSimInfo,
     SimulatorObjectType,
 )
-from habitat.tasks.rearrange.utils import (
-    get_angle_to_pos,
-    get_robot_spawns,
-    rearrange_logger,
-)
+from habitat.tasks.rearrange.utils import get_robot_spawns, rearrange_logger
 
 CAB_TYPE = "cab_type"
 FRIDGE_TYPE = "fridge_type"
@@ -60,20 +56,40 @@ class PddlRobotState:
 
     :property place_at_pos_dist: If -1.0, this will place the robot as close
         as possible to the entity. Otherwise, it will place the robot within X
-        meters of the entity.
+        meters of the entity. If unset, sets to task default.
     :property base_angle_noise: How much noise to add to the robot base angle
-        when setting the robot base position.
+        when setting the robot base position. If not set, sets to task default.
     :property place_at_angle_thresh: The required maximum angle to the target
         entity in the robot's local frame. Specified in radains. If not specified,
         no angle is considered.
+    :property physics_stability_steps: Number of physics checks for placing the
+        robot. If not set, sets to task default.
     """
 
     holding: Optional[PddlEntity] = None
     should_drop: bool = False
     pos: Optional[Any] = None
-    place_at_pos_dist: float = -1.0
+    place_at_pos_dist: Optional[float] = None
     place_at_angle_thresh: Optional[float] = None
-    base_angle_noise: float = 0.0
+    base_angle_noise: Optional[float] = None
+    physics_stability_steps: Optional[int] = None
+
+    def get_place_at_pos_dist(self, sim_info) -> float:
+        if self.place_at_pos_dist is None:
+            return sim_info.robot_at_thresh
+        else:
+            return self.place_at_pos_dist
+
+    def get_base_angle_noise(self, sim_info) -> float:
+        if self.base_angle_noise is None:
+            return 0.0
+        return self.base_angle_noise
+
+    def get_physics_stability_steps(self, sim_info) -> Optional[int]:
+        if self.physics_stability_steps is None:
+            return sim_info.physics_stability_steps
+        else:
+            return self.physics_stability_steps
 
     def sub_in(
         self, sub_dict: Dict[PddlEntity, PddlEntity]
@@ -82,6 +98,14 @@ def sub_in(
         self.pos = sub_dict.get(self.pos, self.pos)
         return self
 
+    def sub_in_clone(
+        self, sub_dict: Dict[PddlEntity, PddlEntity]
+    ) -> "PddlRobotState":
+        other = replace(self)
+        other.holding = sub_dict.get(self.holding, self.holding)
+        other.pos = sub_dict.get(self.pos, self.pos)
+        return other
+
     def clone(self) -> "PddlRobotState":
         """
         Returns a shallow copy
@@ -128,13 +152,8 @@ def is_true(self, sim_info: PddlSimInfo, robot_entity: PddlEntity) -> bool:
             # Get the angle
             angle = np.arccos(np.dot(pos, pos_robot))
 
-            if self.place_at_pos_dist == -1.0:
-                use_thresh = sim_info.robot_at_thresh
-            else:
-                use_thresh = self.place_at_pos_dist
-
             # Check the distance threshold.
-            if dist > use_thresh:
+            if dist > self.get_place_at_pos_dist(sim_info):
                 return False
 
             # Check for the angle threshold
@@ -157,53 +176,43 @@ def set_state(
             sim_info.search_for_entity(robot_entity),
         )
         sim = sim_info.sim
-        grasp_mgr = sim.get_agent_data(robot_id).grasp_mgr
+        agent_data = sim.get_agent_data(robot_id)
         # Set the snapped object information
-        if self.should_drop and grasp_mgr.is_grasped:
-            grasp_mgr.desnap(True)
+        if self.should_drop and agent_data.grasp_mgr.is_grasped:
+            agent_data.grasp_mgr.desnap(True)
         elif self.holding is not None:
             # Swap objects to the desired object.
             obj_idx = cast(int, sim_info.search_for_entity(self.holding))
-            grasp_mgr.desnap(True)
+            agent_data.grasp_mgr.desnap(True)
             sim.internal_step(-1)
-            grasp_mgr.snap_to_obj(sim.scene_obj_ids[obj_idx])
+            agent_data.grasp_mgr.snap_to_obj(sim.scene_obj_ids[obj_idx])
             sim.internal_step(-1)
 
         # Set the robot starting position
         if isinstance(self.pos, PddlEntity):
             targ_pos = sim_info.get_entity_pos(self.pos)
-            agent = sim.get_agent_data(robot_id).articulated_agent
-
-            if self.place_at_pos_dist == -1.0:
-                # Place as close to the object as possible.
-                if not sim_info.sim.is_point_within_bounds(targ_pos):
-                    rearrange_logger.error(
-                        f"Object {self.pos} is out of bounds but trying to set robot position"
-                    )
-
-                agent_pos = sim_info.sim.safe_snap_point(targ_pos)
-                agent.base_pos = agent_pos
-                agent.base_rot = get_angle_to_pos(
-                    np.array(targ_pos - agent_pos)
-                )
-            else:
-                # Place some distance away from the object.
-                start_pos, start_rot, was_fail = get_robot_spawns(
-                    target_position=targ_pos,
-                    rotation_perturbation_noise=self.base_angle_noise,
-                    distance_threshold=self.place_at_pos_dist,
-                    sim=sim,
-                    num_spawn_attempts=sim_info.num_spawn_attempts,
-                    physics_stability_steps=sim_info.physics_stability_steps,
-                )
-                sim.articulated_agent.base_pos = start_pos
-                sim.articulated_agent.base_rot = start_rot
-                if was_fail:
-                    rearrange_logger.error("Failed to place the robot.")
+
+            # Place some distance away from the object.
+            start_pos, start_rot, was_fail = get_robot_spawns(
+                target_position=targ_pos,
+                rotation_perturbation_noise=self.get_base_angle_noise(
+                    sim_info
+                ),
+                distance_threshold=self.get_place_at_pos_dist(sim_info),
+                sim=sim,
+                num_spawn_attempts=sim_info.num_spawn_attempts,
+                physics_stability_steps=self.get_physics_stability_steps(
+                    sim_info
+                ),
+                agent=agent_data.articulated_agent,
+            )
+            agent_data.articulated_agent.base_pos = start_pos
+            agent_data.articulated_agent.base_rot = start_rot
+            if was_fail:
+                rearrange_logger.error("Failed to place the robot.")
 
             # We teleported the agent. We also need to teleport the object the agent was holding.
-            grasp_mgr = sim.get_agent_data(robot_id).grasp_mgr
-            grasp_mgr.update_object_to_grasp()
+            agent_data.grasp_mgr.update_object_to_grasp()
 
         elif self.pos is not None:
             raise ValueError(f"Unrecongized set position {self.pos}")
@@ -254,7 +263,7 @@ def sub_in_clone(
                 for k, v in self._obj_states.items()
             },
             {
-                sub_dict.get(k, k): robot_state.sub_in(sub_dict)
+                sub_dict.get(k, k): robot_state.sub_in_clone(sub_dict)
                 for k, robot_state in self._robot_states.items()
             },
         )
diff --git a/habitat-lab/habitat/tasks/rearrange/multi_task/rearrange_pddl.py b/habitat-lab/habitat/tasks/rearrange/multi_task/rearrange_pddl.py
index 417b846504..84fb441258 100644
--- a/habitat-lab/habitat/tasks/rearrange/multi_task/rearrange_pddl.py
+++ b/habitat-lab/habitat/tasks/rearrange/multi_task/rearrange_pddl.py
@@ -163,7 +163,7 @@ def get_entity_pos(self, entity: PddlEntity) -> np.ndarray:
             entity, SimulatorObjectType.ROBOT_ENTITY.value
         ):
             robot_id = self.robot_ids[ename]
-            return self.sim.get_agent_data(robot_id).robot.base_pos
+            return self.sim.get_agent_data(robot_id).articulated_agent.base_pos
         if self.check_type_matches(
             entity, SimulatorObjectType.ARTICULATED_RECEPTACLE_ENTITY.value
         ):
diff --git a/habitat-lab/habitat/tasks/rearrange/rearrange_grasp_manager.py b/habitat-lab/habitat/tasks/rearrange/rearrange_grasp_manager.py
index 4ac58df83d..c4ce59fa68 100644
--- a/habitat-lab/habitat/tasks/rearrange/rearrange_grasp_manager.py
+++ b/habitat-lab/habitat/tasks/rearrange/rearrange_grasp_manager.py
@@ -50,9 +50,7 @@ def __init__(
         self._kinematic_mode = self._sim.habitat_config.kinematic_mode
 
     def reconfigure(self) -> None:
-        """Removes any existing constraints managed by this structure.
-        Called from _sim.reconfigure().
-        """
+        """Removes any existing constraints managed by this structure."""
         self._snap_constraints.clear()
 
     def reset(self) -> None:
diff --git a/habitat-lab/habitat/tasks/rearrange/rearrange_sensors.py b/habitat-lab/habitat/tasks/rearrange/rearrange_sensors.py
index ef9d3729b9..a5243fb987 100644
--- a/habitat-lab/habitat/tasks/rearrange/rearrange_sensors.py
+++ b/habitat-lab/habitat/tasks/rearrange/rearrange_sensors.py
@@ -5,6 +5,8 @@
 # LICENSE file in the root directory of this source tree.
 
 
+from collections import defaultdict, deque
+
 import numpy as np
 from gym import spaces
 
@@ -474,7 +476,7 @@ def update_metric(self, *args, episode, **kwargs):
         scene_pos = self._sim.get_scene_pos()
         target_pos = scene_pos[idxs]
         distances = np.linalg.norm(target_pos - goal_pos, ord=2, axis=-1)
-        self._metric = {str(idx): dist for idx, dist in zip(idxs, distances)}
+        self._metric = {str(idx): dist for idx, dist in enumerate(distances)}
 
 
 @registry.register_measure
@@ -578,11 +580,11 @@ def update_metric(self, *args, observations, **kwargs):
             .translation
         )
 
-        idxs, goals = self._sim.get_targets()
+        goals = self._sim.get_targets()[1]
 
         distances = np.linalg.norm(goals - ee_pos, ord=2, axis=-1)
 
-        self._metric = {str(idx): dist for idx, dist in zip(idxs, distances)}
+        self._metric = {str(idx): dist for idx, dist in enumerate(distances)}
 
 
 @registry.register_measure
@@ -618,7 +620,7 @@ def update_metric(self, *args, episode, **kwargs):
 
         distances = np.linalg.norm(target_pos - ee_pos, ord=2, axis=-1)
 
-        self._metric = {str(idx): dist for idx, dist in zip(idxs, distances)}
+        self._metric = {str(idx): dist for idx, dist in enumerate(distances)}
 
 
 @registry.register_measure
@@ -1036,10 +1038,19 @@ def _get_uuid(*args, **kwargs):
     def __init__(self, sim, config, *args, **kwargs):
         self._sim = sim
         self._sim.enable_perf_logging()
+        self._disable_logging = config.disable_logging
         super().__init__()
 
     def reset_metric(self, *args, **kwargs):
+        self._metric_queue = defaultdict(deque)
         self._metric = {}
 
     def update_metric(self, *args, task, **kwargs):
-        self._metric = self._sim.get_runtime_perf_stats()
+        for k, v in self._sim.get_runtime_perf_stats().items():
+            self._metric_queue[k].append(v)
+        if self._disable_logging:
+            self._metric = {}
+        else:
+            self._metric = {
+                k: np.mean(v) for k, v in self._metric_queue.items()
+            }
diff --git a/habitat-lab/habitat/tasks/rearrange/rearrange_sim.py b/habitat-lab/habitat/tasks/rearrange/rearrange_sim.py
index efca12822a..f3becf8ff2 100644
--- a/habitat-lab/habitat/tasks/rearrange/rearrange_sim.py
+++ b/habitat-lab/habitat/tasks/rearrange/rearrange_sim.py
@@ -37,6 +37,7 @@
 )
 from habitat.sims.habitat_simulator.habitat_simulator import HabitatSim
 from habitat.tasks.rearrange.articulated_agent_manager import (
+    ArticulatedAgentData,
     ArticulatedAgentManager,
 )
 from habitat.tasks.rearrange.marker_info import MarkerInfo
@@ -113,6 +114,7 @@ def __init__(self, config: "DictConfig"):
 
         self.agents_mgr = ArticulatedAgentManager(self.habitat_config, self)
 
+        # Setup config options.
         self._debug_render_articulated_agent = (
             self.habitat_config.debug_render_articulated_agent
         )
@@ -128,10 +130,13 @@ def __init__(self, config: "DictConfig"):
             self.habitat_config.update_articulated_agent
         )
         self._step_physics = self.habitat_config.step_physics
+        self._auto_sleep = self.habitat_config.auto_sleep
+        self._load_objs = self.habitat_config.load_objs
         self._additional_object_paths = (
             self.habitat_config.additional_object_paths
         )
         self._kinematic_mode = self.habitat_config.kinematic_mode
+
         self._extra_runtime_perf_stats: Dict[str, float] = defaultdict(float)
         self._perf_logging_enabled = False
         self.cur_runtime_perf_scope: List[str] = []
@@ -271,26 +276,27 @@ def reset(self):
     def reconfigure(self, config: "DictConfig", ep_info: RearrangeEpisode):
         self._handle_to_goal_name = ep_info.info["object_labels"]
 
-        with read_write(config):
-            config["scene"] = ep_info.scene_id
-
         self.ep_info = ep_info
         new_scene = self.prev_scene_id != ep_info.scene_id
         if new_scene:
             self._prev_obj_names = None
 
-        # Only remove and re-add objects if we have a new set of objects.
         obj_names = [x[0] for x in ep_info.rigid_objs]
+        # Only remove and re-add objects if we have a new set of objects.
         should_add_objects = self._prev_obj_names != obj_names
         self._prev_obj_names = obj_names
 
         self.agents_mgr.pre_obj_clear()
-        self._clear_objects(should_add_objects)
+        self._clear_objects(should_add_objects, new_scene)
 
-        t_start = time.time()
-        super().reconfigure(config, should_close_on_new_scene=False)
-        self.add_perf_timing("super_reconfigure", t_start)
-        self._try_acquire_context()
+        is_hard_reset = new_scene or should_add_objects
+
+        if is_hard_reset:
+            with read_write(config):
+                config["scene"] = ep_info.scene_id
+            t_start = time.time()
+            super().reconfigure(config, should_close_on_new_scene=False)
+            self.add_perf_timing("super_reconfigure", t_start)
 
         if new_scene:
             self.agents_mgr.on_new_scene()
@@ -310,14 +316,14 @@ def reconfigure(self, config: "DictConfig", ep_info: RearrangeEpisode):
         self.agents_mgr.post_obj_load_reconfigure()
 
         # add episode clutter objects additional to base scene objects
-        if self.habitat_config.load_objs:
-            self._add_objs(ep_info, should_add_objects)
+        if self._load_objs:
+            self._add_objs(ep_info, should_add_objects, new_scene)
         self._setup_targets(ep_info)
 
         self._add_markers(ep_info)
 
         # auto-sleep rigid objects as optimization
-        if self.habitat_config.auto_sleep:
+        if self._auto_sleep:
             self._sleep_all_objects()
 
         rom = self.get_rigid_object_manager()
@@ -369,7 +375,7 @@ def _setup_semantic_ids(self):
                     obj.object_id + self.habitat_config.object_ids_start
                 )
 
-    def get_agent_data(self, agent_idx: Optional[int]):
+    def get_agent_data(self, agent_idx: Optional[int]) -> ArticulatedAgentData:
         if agent_idx is None:
             return self.agents_mgr[0]
         else:
@@ -449,7 +455,9 @@ def largest_island_idx(self) -> int:
         return self._largest_island_idx
 
     @add_perf_timing_func()
-    def _clear_objects(self, should_add_objects: bool) -> None:
+    def _clear_objects(
+        self, should_add_objects: bool, new_scene: bool
+    ) -> None:
         rom = self.get_rigid_object_manager()
 
         # Clear all the rigid objects.
@@ -472,9 +480,10 @@ def _clear_objects(self, should_add_objects: bool) -> None:
                 rom.remove_object_by_id(viz_obj.object_id)
         self._viz_objs = {}
 
-        # Do not remove the articulated objects from the scene, these are
-        # managed by the underlying sim.
-        self.art_objs = []
+        if new_scene:
+            # Do not remove the articulated objects from the scene, these are
+            # managed by the underlying sim.
+            self.art_objs = []
 
     @add_perf_timing_func()
     def _set_ao_states_from_ep(self, ep_info: RearrangeEpisode) -> None:
@@ -529,7 +538,10 @@ def safe_snap_point(self, pos: np.ndarray) -> np.ndarray:
 
     @add_perf_timing_func()
     def _add_objs(
-        self, ep_info: RearrangeEpisode, should_add_objects: bool
+        self,
+        ep_info: RearrangeEpisode,
+        should_add_objects: bool,
+        new_scene: bool,
     ) -> None:
         # Load clutter objects:
         rom = self.get_rigid_object_manager()
@@ -581,23 +593,18 @@ def _add_objs(
 
             obj_counts[obj_handle] += 1
 
-        self._receptacles = self._create_recep_info(
-            ep_info.scene_id, list(self._handle_to_object_id.keys())
-        )
+        if new_scene:
+            self._receptacles = self._create_recep_info(
+                ep_info.scene_id, list(self._handle_to_object_id.keys())
+            )
 
-        ao_mgr = self.get_articulated_object_manager()
-        articulated_agent_art_handles = [
-            articulated_agent.sim_obj.handle
-            for articulated_agent in self.agents_mgr.articulated_agents_iter
-        ]
-        for aoi_handle in ao_mgr.get_object_handles():
-            ao = ao_mgr.get_object_by_handle(aoi_handle)
-            if (
-                self._kinematic_mode
-                and ao.handle not in articulated_agent_art_handles
-            ):
-                ao.motion_type = habitat_sim.physics.MotionType.KINEMATIC
-            self.art_objs.append(ao)
+            ao_mgr = self.get_articulated_object_manager()
+            # Make all articulated objects (including the robots) kinematic
+            for aoi_handle in ao_mgr.get_object_handles():
+                ao = ao_mgr.get_object_by_handle(aoi_handle)
+                if self._kinematic_mode:
+                    ao.motion_type = habitat_sim.physics.MotionType.KINEMATIC
+                self.art_objs.append(ao)
 
     def _create_recep_info(
         self, scene_id: str, ignore_handles: List[str]
diff --git a/habitat-lab/habitat/tasks/rearrange/rearrange_task.py b/habitat-lab/habitat/tasks/rearrange/rearrange_task.py
index bbaabf0d65..7233f581f9 100644
--- a/habitat-lab/habitat/tasks/rearrange/rearrange_task.py
+++ b/habitat-lab/habitat/tasks/rearrange/rearrange_task.py
@@ -83,6 +83,19 @@ def __init__(
         self._cur_episode_step = 0
         self._should_place_articulated_agent = should_place_articulated_agent
 
+        # Get config options
+        self._force_regenerate = self._config.force_regenerate
+        self._should_save_to_cache = self._config.should_save_to_cache
+        self._obj_succ_thresh = self._config.obj_succ_thresh
+        self._enable_safe_drop = self._config.enable_safe_drop
+        self._constraint_violation_ends_episode = (
+            self._config.constraint_violation_ends_episode
+        )
+        self._constraint_violation_drops_object = (
+            self._config.constraint_violation_drops_object
+        )
+        self._count_obj_collisions = self._config.count_obj_collisions
+
         data_path = dataset.config.data_path.format(split=dataset.config.split)
         fname = data_path.split("/")[-1].split(".")[0]
         cache_path = osp.join(
@@ -134,7 +147,7 @@ def _get_cached_articulated_agent_start(self, agent_idx: int = 0):
         if (
             self._articulated_agent_pos_start is None
             or start_ident not in self._articulated_agent_pos_start
-            or self._config.force_regenerate
+            or self._force_regenerate
         ):
             return None
         else:
@@ -146,7 +159,7 @@ def _get_ep_init_ident(self, agent_idx):
     def _cache_articulated_agent_start(self, cache_data, agent_idx: int = 0):
         if (
             self._articulated_agent_pos_start is not None
-            and self._config.should_save_to_cache
+            and self._should_save_to_cache
         ):
             start_ident = self._get_ep_init_ident(agent_idx)
             self._articulated_agent_pos_start[start_ident] = cache_data
@@ -239,12 +252,12 @@ def _is_violating_safe_drop(self, action_args):
             self._sim.grasp_mgr.is_grasped
             and action_args.get("grip_action", None) is not None
             and action_args["grip_action"] < 0
-            and min_dist < self._config.obj_succ_thresh
+            and min_dist < self._obj_succ_thresh
         )
 
     def step(self, action: Dict[str, Any], episode: Episode):
         action_args = action["action_args"]
-        if self._config.enable_safe_drop and self._is_violating_safe_drop(
+        if self._enable_safe_drop and self._is_violating_safe_drop(
             action_args
         ):
             action_args["grip_action"] = None
@@ -255,7 +268,7 @@ def step(self, action: Dict[str, Any], episode: Episode):
         for grasp_mgr in self._sim.agents_mgr.grasp_iter:
             if (
                 grasp_mgr.is_violating_hold_constraint()
-                and self._config.constraint_violation_drops_object
+                and self._constraint_violation_drops_object
             ):
                 grasp_mgr.desnap(True)
 
@@ -276,7 +289,7 @@ def _check_episode_is_active(
         for grasp_mgr in self._sim.agents_mgr.grasp_iter:
             if (
                 grasp_mgr.is_violating_hold_constraint()
-                and self._config.constraint_violation_ends_episode
+                and self._constraint_violation_ends_episode
             ):
                 done = True
                 break
@@ -329,7 +342,7 @@ def get_max_force(contact_points, check_id):
 
     def get_cur_collision_info(self, agent_idx) -> CollisionDetails:
         _, coll_details = rearrange_collision(
-            self._sim, self._config.count_obj_collisions, agent_idx=agent_idx
+            self._sim, self._count_obj_collisions, agent_idx=agent_idx
         )
         return coll_details
 
diff --git a/habitat-lab/habitat/tasks/rearrange/sub_tasks/nav_to_obj_task.py b/habitat-lab/habitat/tasks/rearrange/sub_tasks/nav_to_obj_task.py
index 64b881b7f9..2136b00cb7 100644
--- a/habitat-lab/habitat/tasks/rearrange/sub_tasks/nav_to_obj_task.py
+++ b/habitat-lab/habitat/tasks/rearrange/sub_tasks/nav_to_obj_task.py
@@ -48,6 +48,12 @@ def __init__(self, *args, config, dataset=None, **kwargs):
         self.force_obj_to_idx = None
         self.force_recep_to_name = None
 
+        # Set config options
+        self._object_in_hand_sample_prob = (
+            self._config.object_in_hand_sample_prob
+        )
+        self._min_start_distance = self._config.min_start_distance
+
         self._nav_to_info = None
 
     @property
@@ -75,7 +81,7 @@ def _generate_nav_start_goal(self, episode, force_idx=None) -> NavToInfo:
         # Only change the scene if this skill is not running as a sub-task
         if (
             force_idx is None
-            and random.random() < self._config.object_in_hand_sample_prob
+            and random.random() < self._object_in_hand_sample_prob
         ):
             start_hold_obj_idx = self._generate_snap_to_obj()
 
@@ -94,7 +100,7 @@ def _generate_nav_start_goal(self, episode, force_idx=None) -> NavToInfo:
         def filter_func(start_pos, _):
             return (
                 np.linalg.norm(start_pos - nav_to_pos)
-                > self._config.min_start_distance
+                > self._min_start_distance
             )
 
         (
diff --git a/habitat-lab/habitat/tasks/rearrange/sub_tasks/pick_sensors.py b/habitat-lab/habitat/tasks/rearrange/sub_tasks/pick_sensors.py
index 175f1f127c..b04943e893 100644
--- a/habitat-lab/habitat/tasks/rearrange/sub_tasks/pick_sensors.py
+++ b/habitat-lab/habitat/tasks/rearrange/sub_tasks/pick_sensors.py
@@ -94,7 +94,7 @@ def update_metric(self, *args, episode, task, observations, **kwargs):
         if cur_picked:
             dist_to_goal = ee_to_rest_distance
         else:
-            dist_to_goal = ee_to_object_distance[str(task.abs_targ_idx)]
+            dist_to_goal = ee_to_object_distance[str(task.targ_idx)]
 
         abs_targ_obj_idx = self._sim.scene_obj_ids[task.abs_targ_idx]
 
diff --git a/habitat-lab/habitat/tasks/rearrange/sub_tasks/pick_task.py b/habitat-lab/habitat/tasks/rearrange/sub_tasks/pick_task.py
index 4fceaff8b3..83a8ae9ba0 100644
--- a/habitat-lab/habitat/tasks/rearrange/sub_tasks/pick_task.py
+++ b/habitat-lab/habitat/tasks/rearrange/sub_tasks/pick_task.py
@@ -32,6 +32,10 @@ def __init__(self, *args, config, dataset=None, **kwargs):
 
         self.prev_colls = None
         self.force_set_idx = None
+        self._base_angle_noise = self._config.base_angle_noise
+        self._spawn_max_dist_to_obj = self._config.spawn_max_dist_to_obj
+        self._num_spawn_attempts = self._config.num_spawn_attempts
+        self._physics_stability_steps = self._config.physics_stability_steps
 
     def set_args(self, obj, **kwargs):
         self.force_set_idx = obj
@@ -56,11 +60,11 @@ def _gen_start_pos(self, sim, episode, sel_idx):
 
         start_pos, angle_to_obj, was_fail = get_robot_spawns(
             targ_pos,
-            self._config.base_angle_noise,
-            self._config.spawn_max_dist_to_obj,
+            self._base_angle_noise,
+            self._spawn_max_dist_to_obj,
             sim,
-            self._config.num_spawn_attempts,
-            self._config.physics_stability_steps,
+            self._num_spawn_attempts,
+            self._physics_stability_steps,
         )
 
         if was_fail:
diff --git a/habitat-lab/habitat/tasks/rearrange/sub_tasks/place_sensors.py b/habitat-lab/habitat/tasks/rearrange/sub_tasks/place_sensors.py
index d19712c9b0..1335bb4693 100644
--- a/habitat-lab/habitat/tasks/rearrange/sub_tasks/place_sensors.py
+++ b/habitat-lab/habitat/tasks/rearrange/sub_tasks/place_sensors.py
@@ -76,16 +76,16 @@ def update_metric(self, *args, episode, task, observations, **kwargs):
         ].get_metric()
         obj_at_goal = task.measurements.measures[
             ObjAtGoal.cls_uuid
-        ].get_metric()[str(task.abs_targ_idx)]
+        ].get_metric()[str(task.targ_idx)]
 
         snapped_id = self._sim.grasp_mgr.snap_idx
         cur_picked = snapped_id is not None
 
         if (not obj_at_goal) or cur_picked:
             if self._config.use_ee_dist:
-                dist_to_goal = ee_to_goal_dist[str(task.abs_targ_idx)]
+                dist_to_goal = ee_to_goal_dist[str(task.targ_idx)]
             else:
-                dist_to_goal = obj_to_goal_dist[str(task.abs_targ_idx)]
+                dist_to_goal = obj_to_goal_dist[str(task.targ_idx)]
             min_dist = self._config.min_dist_to_goal
         else:
             dist_to_goal = ee_to_rest_distance
@@ -158,7 +158,7 @@ def reset_metric(self, *args, episode, task, observations, **kwargs):
     def update_metric(self, *args, episode, task, observations, **kwargs):
         is_obj_at_goal = task.measurements.measures[
             ObjAtGoal.cls_uuid
-        ].get_metric()[str(task.abs_targ_idx)]
+        ].get_metric()[str(task.targ_idx)]
         is_holding = self._sim.grasp_mgr.is_grasped
 
         ee_to_rest_distance = task.measurements.measures[
diff --git a/habitat-lab/habitat/tasks/rearrange/utils.py b/habitat-lab/habitat/tasks/rearrange/utils.py
index b23522f114..dc71c7fb14 100644
--- a/habitat-lab/habitat/tasks/rearrange/utils.py
+++ b/habitat-lab/habitat/tasks/rearrange/utils.py
@@ -409,31 +409,47 @@ def get_robot_spawns(
     :param distance_threshold: The maximum distance from the target.
     :param sim: The simulator instance.
     :param num_spawn_attempts: The number of sample attempts for the distance threshold.
-    :param physics_stability_steps: The number of steps to perform for physics stability check.
+    :param physics_stability_steps: The number of steps to perform for physics stability check. If specified as 0, then it will return the result without doing any checks.
     :param agent: The agent to set the position for. If not specified, defaults to the simulator default agent.
 
     :return: The robot's start position, rotation, and whether the placement was a failure (True for failure, False for success).
     """
-
-    state = sim.capture_state()
     if agent is None:
         agent = sim.articulated_agent
     start_rotation = agent.base_rot
     start_position = agent.base_pos
 
+    state = sim.capture_state()
+
     # Try to place the robot.
     for _ in range(num_spawn_attempts):
         sim.set_state(state)
-        start_position = sim.pathfinder.get_random_navigable_point_near(
-            target_position, distance_threshold
-        )
-        island_idx = sim.pathfinder.get_island(start_position)
-        if island_idx != sim.largest_island_idx:
-            continue
 
-        relative_target = target_position - start_position
+        if distance_threshold == -1.0:
+            # Place as close to the object as possible.
+            if not sim.is_point_within_bounds(target_position):
+                rearrange_logger.error(
+                    f"Object {target_position} is out of bounds but trying to set robot position"
+                )
 
+            start_position = sim.safe_snap_point(target_position)
+        else:
+            # Place within `distance_threshold` of the object.
+            start_position = sim.pathfinder.get_random_navigable_point_near(
+                target_position, distance_threshold
+            )
+        # Face the robot towards the object.
+        relative_target = target_position - start_position
         angle_to_object = get_angle_to_pos(relative_target)
+        rotation_noise = np.random.normal(0.0, rotation_perturbation_noise)
+        start_rotation = angle_to_object + rotation_noise
+
+        if physics_stability_steps == 0:
+            return start_position, start_rotation, False
+
+        island_idx = sim.pathfinder.get_island(start_position)
+        if island_idx != sim.largest_island_idx:
+            continue
 
         target_distance = np.linalg.norm(
             (start_position - target_position)[[0, 2]]
@@ -441,10 +457,6 @@ def get_robot_spawns(
 
         is_navigable = sim.pathfinder.is_navigable(start_position)
 
-        # Face the robot towards the object.
-        rotation_noise = np.random.normal(0.0, rotation_perturbation_noise)
-        start_rotation = angle_to_object + rotation_noise
-
         if target_distance > distance_threshold or not is_navigable:
             continue
 
diff --git a/habitat-lab/habitat/utils/env_utils.py b/habitat-lab/habitat/utils/env_utils.py
index 9dcaefaf01..9f27259fac 100644
--- a/habitat-lab/habitat/utils/env_utils.py
+++ b/habitat-lab/habitat/utils/env_utils.py
@@ -14,7 +14,9 @@
 
 
 def make_env_fn(
-    config: "DictConfig", env_class: Union[Type[Env], Type[RLEnv]]
+    config: "DictConfig",
+    env_class: Union[Type[Env], Type[RLEnv]],
+    dataset=None,
 ) -> Union[Env, RLEnv]:
     r"""Creates an env of type env_class with specified config and rank.
     This is to be passed in as an argument when creating VectorEnv.
@@ -23,13 +25,15 @@ def make_env_fn(
         config: root exp config that has core env config node as well as
             env-specific config node.
         env_class: class type of the env to be created.
+        dataset: If specified, load the environment using this dataset.
 
     Returns:
         env object created according to specification.
     """
     if "habitat" in config:
         config = config.habitat
-    dataset = make_dataset(config.dataset.type, config=config.dataset)
+    if dataset is None:
+        dataset = make_dataset(config.dataset.type, config=config.dataset)
     env = env_class(config=config, dataset=dataset)
     env.seed(config.seed)
     return env
diff --git a/test/test_baseline_trainers.py b/test/test_baseline_trainers.py
index fa5ff212d1..329a172516 100644
--- a/test/test_baseline_trainers.py
+++ b/test/test_baseline_trainers.py
@@ -10,7 +10,6 @@
 import os
 import random
 from copy import deepcopy
-from glob import glob
 
 import pytest
 
@@ -60,22 +59,9 @@ def download_data():
 @pytest.mark.parametrize(
     "test_cfg_path,gpu2gpu,observation_transforms_overrides,mode",
     list(
-        itertools.product(
-            glob("habitat-baselines/habitat_baselines/config/test/*"),
-            [False],
-            [
-                [],
-                [
-                    "+habitat_baselines/rl/policy/obs_transforms=[center_cropper_base, resize_shortest_edge_base]",
-                ],
-            ],
-            ["train", "eval"],
-        )
-    )
-    + list(
         itertools.product(
             ["test/config/habitat_baselines/ppo_pointnav_test.yaml"],
-            [True],
+            [True, False],
             [
                 [],
                 [
diff --git a/test/test_baseline_training.py b/test/test_baseline_training.py
index 2db78870f3..52b665c579 100644
--- a/test/test_baseline_training.py
+++ b/test/test_baseline_training.py
@@ -208,6 +208,7 @@ def test_hrl(config_path, policy_type, skill_type, mode):
         # We don't have skill checkpoints to load right now.
         return
     if policy_type == "hl_fixed" and mode == "train":
+        # Cannot train with a fixed policy
         return
     if skill_type == "oracle_skills" and "oracle" not in config_path:
         return
diff --git a/test/test_ddppo_reduce.py b/test/test_ddppo_reduce.py
index d15971591b..17d69db050 100644
--- a/test/test_ddppo_reduce.py
+++ b/test/test_ddppo_reduce.py
@@ -80,8 +80,7 @@ def _worker_fn(
         2,
         obs_space,
         action_space,
-        ppo_cfg.hidden_size,
-        num_recurrent_layers=actor_critic.net.num_recurrent_layers,
+        actor_critic,
         is_double_buffered=False,
     )
     rollouts.to(device)