diff --git a/habitat-baselines/habitat_baselines/common/__init__.py b/habitat-baselines/habitat_baselines/common/__init__.py index 0f0db8cad5..3a2b019086 100644 --- a/habitat-baselines/habitat_baselines/common/__init__.py +++ b/habitat-baselines/habitat_baselines/common/__init__.py @@ -3,3 +3,7 @@ # Copyright (c) Meta Platforms, Inc. and its affiliates. # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. +from habitat_baselines.common.env_factory import VectorEnvFactory +from habitat_baselines.common.habitat_env_factory import ( + HabitatVectorEnvFactory, +) diff --git a/habitat-baselines/habitat_baselines/common/construct_vector_env.py b/habitat-baselines/habitat_baselines/common/construct_vector_env.py deleted file mode 100644 index b4df6558d3..0000000000 --- a/habitat-baselines/habitat_baselines/common/construct_vector_env.py +++ /dev/null @@ -1,113 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and its affiliates. -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. - -import os -import random -from typing import TYPE_CHECKING, Any, List, Type - -from habitat import ThreadedVectorEnv, VectorEnv, logger, make_dataset -from habitat.config import read_write -from habitat.gym import make_gym_from_config - -if TYPE_CHECKING: - from omegaconf import DictConfig - - -def construct_envs( - config: "DictConfig", - workers_ignore_signals: bool = False, - enforce_scenes_greater_eq_environments: bool = False, - is_first_rank: bool = True, -) -> VectorEnv: - r"""Create VectorEnv object with specified config and env class type. - To allow better performance, dataset are split into small ones for - each individual env, grouped by scenes. - - :param config: configs that contain num_environments as well as information - :param necessary to create individual environments. - :param workers_ignore_signals: Passed to :ref:`habitat.VectorEnv`'s constructor - :param enforce_scenes_greater_eq_environments: Make sure that there are more (or equal) - scenes than environments. This is needed for correct evaluation. - :param is_first_rank: If these environments are being constructed on the rank0 GPU. - - :return: VectorEnv object created according to specification. - """ - - num_environments = config.habitat_baselines.num_environments - configs = [] - dataset = make_dataset(config.habitat.dataset.type) - scenes = config.habitat.dataset.content_scenes - if "*" in config.habitat.dataset.content_scenes: - scenes = dataset.get_scenes_to_load(config.habitat.dataset) - - if num_environments < 1: - raise RuntimeError("num_environments must be strictly positive") - - if len(scenes) == 0: - raise RuntimeError( - "No scenes to load, multiple process logic relies on being able to split scenes uniquely between processes" - ) - - random.shuffle(scenes) - - scene_splits: List[List[str]] = [[] for _ in range(num_environments)] - if len(scenes) < num_environments: - msg = f"There are less scenes ({len(scenes)}) than environments ({num_environments}). " - if enforce_scenes_greater_eq_environments: - logger.warn( - msg - + "Reducing the number of environments to be the number of scenes." - ) - num_environments = len(scenes) - scene_splits = [[s] for s in scenes] - else: - logger.warn( - msg - + "Each environment will use all the scenes instead of using a subset." - ) - for scene in scenes: - for split in scene_splits: - split.append(scene) - else: - for idx, scene in enumerate(scenes): - scene_splits[idx % len(scene_splits)].append(scene) - assert sum(map(len, scene_splits)) == len(scenes) - - for env_index in range(num_environments): - proc_config = config.copy() - with read_write(proc_config): - task_config = proc_config.habitat - task_config.seed = task_config.seed + env_index - if (env_index != 0) or not is_first_rank: - # Filter out non-rank0_env0 measures from the task config if we - # are not on rank0 env0. - task_config.task.measurements = { - k: v - for k, v in task_config.task.measurements.items() - if k not in task_config.task.rank0_env0_measure_names - } - if len(scenes) > 0: - task_config.dataset.content_scenes = scene_splits[env_index] - - configs.append(proc_config) - - vector_env_cls: Type[Any] - if int(os.environ.get("HABITAT_ENV_DEBUG", 0)): - logger.warn( - "Using the debug Vector environment interface. Expect slower performance." - ) - vector_env_cls = ThreadedVectorEnv - else: - vector_env_cls = VectorEnv - - envs = vector_env_cls( - make_env_fn=make_gym_from_config, - env_fn_args=tuple((c,) for c in configs), - workers_ignore_signals=workers_ignore_signals, - ) - - if config.habitat.simulator.renderer.enable_batch_renderer: - envs.initialize_batch_renderer(config) - - return envs diff --git a/habitat-baselines/habitat_baselines/common/env_factory.py b/habitat-baselines/habitat_baselines/common/env_factory.py new file mode 100644 index 0000000000..98e8564a48 --- /dev/null +++ b/habitat-baselines/habitat_baselines/common/env_factory.py @@ -0,0 +1,34 @@ +from abc import ABC, abstractmethod +from typing import TYPE_CHECKING + +from habitat import VectorEnv + +if TYPE_CHECKING: + from omegaconf import DictConfig + + +class VectorEnvFactory(ABC): + """ + Interface responsible for constructing vectorized environments used in training. + """ + + @abstractmethod + def construct_envs( + self, + config: "DictConfig", + workers_ignore_signals: bool = False, + enforce_scenes_greater_eq_environments: bool = False, + is_first_rank: bool = True, + ) -> VectorEnv: + """ + Setup a vectorized environment. + + :param config: configs that contain num_environments as well as information + :param workers_ignore_signals: Passed to :ref:`habitat.VectorEnv`'s constructor + :param enforce_scenes_greater_eq_environments: Make sure that there are more (or equal) + :param enforce_scenes_greater_eq_environments: Make sure that there are more (or equal) + scenes than environments. This is needed for correct evaluation. + :param is_first_rank: If these environments are being constructed on the rank0 GPU. + + :return: VectorEnv object created according to specification. + """ diff --git a/habitat-baselines/habitat_baselines/common/habitat_env_factory.py b/habitat-baselines/habitat_baselines/common/habitat_env_factory.py new file mode 100644 index 0000000000..bc3730252c --- /dev/null +++ b/habitat-baselines/habitat_baselines/common/habitat_env_factory.py @@ -0,0 +1,120 @@ +# Copyright (c) Meta Platforms, Inc. and its affiliates. +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import os +import random +from typing import TYPE_CHECKING, Any, List, Type + +from habitat import ThreadedVectorEnv, VectorEnv, logger, make_dataset +from habitat.config import read_write +from habitat.gym import make_gym_from_config +from habitat_baselines.common.env_factory import VectorEnvFactory + +if TYPE_CHECKING: + from omegaconf import DictConfig + + +class HabitatVectorEnvFactory(VectorEnvFactory): + def construct_envs( + self, + config: "DictConfig", + workers_ignore_signals: bool = False, + enforce_scenes_greater_eq_environments: bool = False, + is_first_rank: bool = True, + ) -> VectorEnv: + r"""Create VectorEnv object with specified config and env class type. + To allow better performance, dataset are split into small ones for + each individual env, grouped by scenes. + """ + + num_environments = config.habitat_baselines.num_environments + configs = [] + dataset = make_dataset(config.habitat.dataset.type) + scenes = config.habitat.dataset.content_scenes + if "*" in config.habitat.dataset.content_scenes: + scenes = dataset.get_scenes_to_load(config.habitat.dataset) + + if num_environments < 1: + raise RuntimeError("num_environments must be strictly positive") + + if len(scenes) == 0: + raise RuntimeError( + "No scenes to load, multiple process logic relies on being able to split scenes uniquely between processes" + ) + + random.shuffle(scenes) + + scene_splits: List[List[str]] = [[] for _ in range(num_environments)] + if len(scenes) < num_environments: + msg = f"There are less scenes ({len(scenes)}) than environments ({num_environments}). " + if enforce_scenes_greater_eq_environments: + logger.warn( + msg + + "Reducing the number of environments to be the number of scenes." + ) + num_environments = len(scenes) + scene_splits = [[s] for s in scenes] + else: + logger.warn( + msg + + "Each environment will use all the scenes instead of using a subset." + ) + for scene in scenes: + for split in scene_splits: + split.append(scene) + else: + for idx, scene in enumerate(scenes): + scene_splits[idx % len(scene_splits)].append(scene) + assert sum(map(len, scene_splits)) == len(scenes) + + for env_index in range(num_environments): + proc_config = config.copy() + with read_write(proc_config): + task_config = proc_config.habitat + task_config.seed = task_config.seed + env_index + remove_measure_names = [] + if not is_first_rank: + # Filter out non rank0_measure from the task config if we are not on rank0. + remove_measure_names.extend( + task_config.task.rank0_measure_names + ) + if (env_index != 0) or not is_first_rank: + # Filter out non-rank0_env0 measures from the task config if we + # are not on rank0 env0. + remove_measure_names.extend( + task_config.task.rank0_env0_measure_names + ) + + task_config.task.measurements = { + k: v + for k, v in task_config.task.measurements.items() + if k not in remove_measure_names + } + + if len(scenes) > 0: + task_config.dataset.content_scenes = scene_splits[ + env_index + ] + + configs.append(proc_config) + + vector_env_cls: Type[Any] + if int(os.environ.get("HABITAT_ENV_DEBUG", 0)): + logger.warn( + "Using the debug Vector environment interface. Expect slower performance." + ) + vector_env_cls = ThreadedVectorEnv + else: + vector_env_cls = VectorEnv + + envs = vector_env_cls( + make_env_fn=make_gym_from_config, + env_fn_args=tuple((c,) for c in configs), + workers_ignore_signals=workers_ignore_signals, + ) + + if config.habitat.simulator.renderer.enable_batch_renderer: + envs.initialize_batch_renderer(config) + + return envs diff --git a/habitat-baselines/habitat_baselines/common/rollout_storage.py b/habitat-baselines/habitat_baselines/common/rollout_storage.py index 305677ef80..7acc64a2d5 100644 --- a/habitat-baselines/habitat_baselines/common/rollout_storage.py +++ b/habitat-baselines/habitat_baselines/common/rollout_storage.py @@ -31,8 +31,7 @@ def __init__( num_envs, observation_space, action_space, - recurrent_hidden_state_size, - num_recurrent_layers=1, + actor_critic, is_double_buffered: bool = False, ): action_shape, discrete_actions = get_action_space_info(action_space) @@ -55,8 +54,8 @@ def __init__( self.buffers["recurrent_hidden_states"] = torch.zeros( numsteps + 1, num_envs, - num_recurrent_layers, - recurrent_hidden_state_size, + actor_critic.num_recurrent_layers, + actor_critic.recurrent_hidden_size, ) self.buffers["rewards"] = torch.zeros(numsteps + 1, num_envs, 1) @@ -110,6 +109,7 @@ def to(self, device): self.buffers.map_in_place(lambda v: v.to(device)) self.device = device + @g_timer.avg_time("rollout_storage.insert", level=1) def insert( self, next_observations=None, diff --git a/habitat-baselines/habitat_baselines/config/default_structured_configs.py b/habitat-baselines/habitat_baselines/config/default_structured_configs.py index 93f7e9ddc8..0aadbc239a 100644 --- a/habitat-baselines/habitat_baselines/config/default_structured_configs.py +++ b/habitat-baselines/habitat_baselines/config/default_structured_configs.py @@ -264,6 +264,8 @@ class HrlDefinedSkillConfig(HabitatBaselinesBaseConfig): @dataclass class HierarchicalPolicyConfig(HabitatBaselinesBaseConfig): high_level_policy: Dict[str, Any] = MISSING + # Names of the skills to not load. + ignore_skills: List[str] = field(default_factory=list) defined_skills: Dict[str, HrlDefinedSkillConfig] = field( default_factory=dict ) @@ -383,6 +385,26 @@ class ProfilingConfig(HabitatBaselinesBaseConfig): num_steps_to_capture: int = -1 +@dataclass +class VectorEnvFactoryConfig(HabitatBaselinesBaseConfig): + """ + `_target_` points to the `VectorEnvFactory` to setup the vectorized + environment. Defaults to the Habitat vectorized environment setup. + """ + + _target_: str = "habitat_baselines.common.HabitatVectorEnvFactory" + + +@dataclass +class HydraCallbackConfig(HabitatBaselinesBaseConfig): + """ + Generic callback option for Hydra. Used to create the `_target_` class or + call the `_target_` method. + """ + + _target_: Optional[str] = None + + @dataclass class HabitatBaselinesConfig(HabitatBaselinesBaseConfig): # task config can be a list of configs like "A.yaml,B.yaml" @@ -413,6 +435,8 @@ class HabitatBaselinesConfig(HabitatBaselinesBaseConfig): log_file: str = "train.log" force_blind_policy: bool = False verbose: bool = True + # Creates the vectorized environment. + vector_env_factory: VectorEnvFactoryConfig = VectorEnvFactoryConfig() eval_keys_to_include_in_name: List[str] = field(default_factory=list) # For our use case, the CPU side things are mainly memory copies # and nothing of substantive compute. PyTorch has been making @@ -430,6 +454,13 @@ class HabitatBaselinesConfig(HabitatBaselinesBaseConfig): load_resume_state_config: bool = True eval: EvalConfig = EvalConfig() profiling: ProfilingConfig = ProfilingConfig() + # Whether to log the infos that are only logged to a single process to the + # CLI along with the other metrics. + should_log_single_proc_infos: bool = False + # Called every time a checkpoint is saved. + # Function signature: fn(save_file_path: str) -> None + # If not specified, there is no callback. + on_save_ckpt_callback: Optional[HydraCallbackConfig] = None @dataclass diff --git a/habitat-baselines/habitat_baselines/rl/ddppo/policy/resnet_policy.py b/habitat-baselines/habitat_baselines/rl/ddppo/policy/resnet_policy.py index daa77d3961..8546c01203 100644 --- a/habitat-baselines/habitat_baselines/rl/ddppo/policy/resnet_policy.py +++ b/habitat-baselines/habitat_baselines/rl/ddppo/policy/resnet_policy.py @@ -601,6 +601,10 @@ def is_blind(self): def num_recurrent_layers(self): return self.state_encoder.num_recurrent_layers + @property + def recurrent_hidden_size(self): + return self._hidden_size + @property def perception_embedding_size(self): return self._hidden_size diff --git a/habitat-baselines/habitat_baselines/rl/hrl/hierarchical_policy.py b/habitat-baselines/habitat_baselines/rl/hrl/hierarchical_policy.py index 0ff20e4156..3ff48c8463 100644 --- a/habitat-baselines/habitat_baselines/rl/hrl/hierarchical_policy.py +++ b/habitat-baselines/habitat_baselines/rl/hrl/hierarchical_policy.py @@ -20,22 +20,11 @@ PddlProblem, ) from habitat_baselines.common.baseline_registry import baseline_registry -from habitat_baselines.rl.hrl.hl import ( # noqa: F401. - FixedHighLevelPolicy, - HighLevelPolicy, - NeuralHighLevelPolicy, -) -from habitat_baselines.rl.hrl.skills import ( # noqa: F401. - ArtObjSkillPolicy, - NavSkillPolicy, - NoopSkillPolicy, - OracleNavPolicy, - PickSkillPolicy, - PlaceSkillPolicy, - ResetArmSkill, - SkillPolicy, - WaitSkillPolicy, -) +from habitat_baselines.rl.hrl.hl import FixedHighLevelPolicy # noqa: F401. +from habitat_baselines.rl.hrl.hl import NeuralHighLevelPolicy # noqa: F401. +from habitat_baselines.rl.hrl.hl import HighLevelPolicy +from habitat_baselines.rl.hrl.skills import * # noqa: F403,F401. +from habitat_baselines.rl.hrl.skills import NoopSkillPolicy, SkillPolicy from habitat_baselines.rl.hrl.utils import find_action_range from habitat_baselines.rl.ppo.policy import Policy, PolicyActionData from habitat_baselines.utils.common import get_num_actions @@ -72,13 +61,29 @@ def __init__( # Can map multiple skills to the same underlying skill controller. self._skill_redirects: Dict[int, int] = {} + if "rearrange_stop" not in action_space.spaces: + raise ValueError("Hierarchical policy requires the stop action") + self._stop_action_idx, _ = find_action_range( + action_space, "rearrange_stop" + ) + self._pddl = self._create_pddl(full_config, config) self._create_skills( - dict(config.hierarchical_policy.defined_skills), + { + k: v + for k, v in config.hierarchical_policy.defined_skills.items() + if k not in config.hierarchical_policy.ignore_skills + }, observation_space, action_space, full_config, ) + self._max_skill_rnn_layers = max( + skill.num_recurrent_layers for skill in self._skills.values() + ) + self._any_ll_hidden_state = any( + skill.has_hidden_state for skill in self._skills.values() + ) self._cur_skills: np.ndarray = np.full( (self._num_envs,), -1, dtype=np.int32 @@ -98,9 +103,6 @@ def __init__( observation_space, action_space, ) - self._stop_action_idx, _ = find_action_range( - action_space, "rearrange_stop" - ) first_idx: Optional[int] = None # Remap all the Noop skills to the same underlying skill so all the @@ -112,6 +114,13 @@ def __init__( else: self._skill_redirects[skill_i] = first_idx + self._hl_needs_recurrent_state = ( + self._high_level_policy.num_recurrent_layers + ) > 0 + self._recurrent_hidden_size = ( + full_config.habitat_baselines.rl.ppo.hidden_size + ) + def _create_skills( self, skills, observation_space, action_space, full_config ): @@ -156,6 +165,7 @@ def _create_pddl(self, full_config, config) -> PddlDomain: domain_file, task_spec_file, config, + read_config=False, ) def eval(self): @@ -168,10 +178,13 @@ def get_policy_action_space( Fetches the policy action space for learning. If we are learning the HL policy, it will return its custom action space for learning. """ - - return self._high_level_policy.get_policy_action_space( - env_action_space - ) + if self._any_ll_hidden_state or not self._hl_needs_recurrent_state: + # The LL skill will take priority for the prev action. + return env_action_space + else: + return self._high_level_policy.get_policy_action_space( + env_action_space + ) def extract_policy_info( self, action_data, infos, dones @@ -195,13 +208,16 @@ def extract_policy_info( return ret_policy_infos + @property + def recurrent_hidden_size(self) -> int: + return self._recurrent_hidden_size + @property def num_recurrent_layers(self): - if self._high_level_policy.num_recurrent_layers != 0: - return self._high_level_policy.num_recurrent_layers - else: - first_skill = self._skills[list(self._skills.keys())[0]] - return first_skill.num_recurrent_layers + return ( + self._max_skill_rnn_layers + + self._high_level_policy.num_recurrent_layers + ) @property def should_load_agent_state(self): @@ -271,14 +287,17 @@ def act( (self._num_envs, get_num_actions(self._action_space)), device=masks.device, ) + hl_rnn_hidden_states, ll_rnn_hidden_states = self._split_hidden_states( + rnn_hidden_states + ) # Always call high-level if the episode is over. self._cur_call_high_level |= (~masks_cpu).view(-1) - skill_id = self._cur_skills[0] hl_terminate_episode, hl_info = self._update_skills( observations, - rnn_hidden_states, + hl_rnn_hidden_states, + ll_rnn_hidden_states, prev_actions, masks, actions, @@ -287,12 +306,15 @@ def act( deterministic, ) did_choose_new_skill = self._cur_call_high_level.clone() + if hl_info.rnn_hidden_states is not None: + # Update the HL hidden state. + hl_rnn_hidden_states = hl_info.rnn_hidden_states grouped_skills = self._broadcast_skill_ids( self._cur_skills, sel_dat={ "observations": observations, - "rnn_hidden_states": rnn_hidden_states, + "rnn_hidden_states": ll_rnn_hidden_states, "prev_actions": prev_actions, "masks": masks, }, @@ -305,9 +327,9 @@ def act( masks=batch_dat["masks"], cur_batch_idx=batch_ids, ) - actions[batch_ids] += action_data.actions - rnn_hidden_states[batch_ids] = action_data.rnn_hidden_states + # Update the LL hidden state. + ll_rnn_hidden_states[batch_ids] = action_data.rnn_hidden_states actions[:, self._stop_action_idx] = 0.0 @@ -317,7 +339,8 @@ def act( actions, ) = self._get_terminations( observations, - rnn_hidden_states, + hl_rnn_hidden_states, + ll_rnn_hidden_states, prev_actions, masks, actions, @@ -330,30 +353,68 @@ def act( for batch_idx in torch.nonzero(should_terminate_episode): actions[batch_idx, self._stop_action_idx] = 1.0 - action_kwargs = { - "rnn_hidden_states": rnn_hidden_states, - "actions": actions, - } - action_kwargs.update(hl_info) + rnn_hidden_states = self._combine_hidden_states( + hl_rnn_hidden_states, ll_rnn_hidden_states + ) + + # This will update the prev action + if self._any_ll_hidden_state or not self._hl_needs_recurrent_state: + # The LL skill will take priority for the prev action + use_action = actions + else: + use_action = hl_info.actions return PolicyActionData( take_actions=actions, policy_info=log_info, should_inserts=did_choose_new_skill.view(-1, 1), - **action_kwargs, + actions=use_action, + values=hl_info.values, + action_log_probs=hl_info.action_log_probs, + rnn_hidden_states=rnn_hidden_states, ) + def _split_hidden_states( + self, rnn_hidden_states: torch.Tensor + ) -> Tuple[torch.Tensor, torch.Tensor]: + hl_num_layers = self._high_level_policy.num_recurrent_layers + + if hl_num_layers == 0 or self._max_skill_rnn_layers == 0: + # No need to split hidden states if both aren't being used. + return rnn_hidden_states, rnn_hidden_states + else: + # Split the hidden state for HL and LL policies + hl_rnn_hidden_states = rnn_hidden_states[:, :hl_num_layers] + ll_rnn_hidden_states = rnn_hidden_states[:, hl_num_layers:] + return hl_rnn_hidden_states, ll_rnn_hidden_states + + def _combine_hidden_states( + self, + hl_rnn_hidden_states: torch.Tensor, + ll_rnn_hidden_states: torch.Tensor, + ) -> torch.Tensor: + if not self._hl_needs_recurrent_state: + return ll_rnn_hidden_states + elif self._max_skill_rnn_layers == 0: + return hl_rnn_hidden_states + else: + # Stack the LL and HL hidden states. + return torch.cat( + [hl_rnn_hidden_states, ll_rnn_hidden_states], dim=1 + ) + def _update_skills( self, observations, - rnn_hidden_states, + hl_rnn_hidden_states, + ll_rnn_hidden_states, prev_actions, masks, actions, log_info, should_choose_new_skill: torch.BoolTensor, deterministic: bool, - ) -> Tuple[torch.BoolTensor, Dict[str, Any]]: + ) -> Tuple[torch.BoolTensor, PolicyActionData]: """ Will potentially update the set of running skills according to the HL policy. This updates the active skill indices in `self._cur_skills` in @@ -370,7 +431,6 @@ def _update_skills( # If any skills want to terminate invoke the high-level policy to get # the next skill. hl_terminate_episode = torch.zeros(self._num_envs, dtype=torch.bool) - hl_info: Dict[str, Any] = self._high_level_policy.create_hl_info() if should_choose_new_skill.sum() > 0: ( new_skills, @@ -379,7 +439,7 @@ def _update_skills( hl_info, ) = self._high_level_policy.get_next_skill( observations, - rnn_hidden_states, + hl_rnn_hidden_states, prev_actions, masks, should_choose_new_skill, @@ -395,42 +455,51 @@ def _update_skills( ) for skill_id, (batch_ids, _) in sel_grouped_skills.items(): - self._skills[skill_id].on_enter( + ( + ll_rnn_hidden_states_batched, + prev_actions_batched, + ) = self._skills[skill_id].on_enter( [new_skill_args[i] for i in batch_ids], batch_ids, observations, - rnn_hidden_states, + ll_rnn_hidden_states, prev_actions, ) - if "rnn_hidden_states" in hl_info: - if self._skills[skill_id].has_hidden_state: - raise ValueError( - f"The code does not currently support neural LL and neural HL skills. Skill={self._skills[skill_id]}, HL={self._high_level_policy}" - ) - if prev_actions.shape[0] == len(batch_ids): - prev_actions = hl_info["actions"][batch_ids] - else: - prev_actions[batch_ids] = hl_info["actions"][batch_ids] - - rnn_hidden_states = ( - self._high_level_policy.update_hidden_states( - batch_ids, rnn_hidden_states, hl_info - ) + + _write_tensor_batched( + ll_rnn_hidden_states, + ll_rnn_hidden_states_batched, + batch_ids, + ) + _write_tensor_batched( + prev_actions, prev_actions_batched, batch_ids + ) + + if hl_info.rnn_hidden_states is not None: + # Only update the RNN hidden state for NEW skills. + hl_rnn_hidden_states = _update_tensor_batched( + hl_rnn_hidden_states, + hl_info.rnn_hidden_states, + batch_ids, ) - hl_info["actions"] = prev_actions - hl_info["rnn_hidden_states"] = rnn_hidden_states + # We made at least some decisions, so update the action info + hl_info.rnn_hidden_states = hl_rnn_hidden_states should_choose_new_skill = should_choose_new_skill.numpy() self._cur_skills = ( (~should_choose_new_skill) * self._cur_skills ) + (should_choose_new_skill * new_skills) + else: + # We made no decisions, so return an empty HL action info. + hl_info = PolicyActionData() return hl_terminate_episode, hl_info def _get_terminations( self, observations, - rnn_hidden_states, + hl_rnn_hidden_states, + ll_rnn_hidden_states, prev_actions, masks, actions, @@ -450,7 +519,7 @@ def _get_terminations( hl_wants_skill_term = self._high_level_policy.get_termination( observations, - rnn_hidden_states, + hl_rnn_hidden_states, prev_actions, masks, self._cur_skills, @@ -465,7 +534,7 @@ def _get_terminations( self._cur_skills, sel_dat={ "observations": observations, - "rnn_hidden_states": rnn_hidden_states, + "rnn_hidden_states": ll_rnn_hidden_states, "prev_actions": prev_actions, "masks": masks, "actions": actions, @@ -474,8 +543,8 @@ def _get_terminations( ) for skill_id, (batch_ids, dat) in grouped_skills.items(): ( - self._cur_call_high_level[batch_ids], - bad_should_terminate[batch_ids], + call_hl_batch, + bad_should_terminate_batch, new_actions, ) = self._skills[skill_id].should_terminate( **dat, @@ -485,6 +554,13 @@ def _get_terminations( self._idx_to_name[self._cur_skills[i]] for i in batch_ids ], ) + + _write_tensor_batched( + self._cur_call_high_level, call_hl_batch, batch_ids + ) + _write_tensor_batched( + bad_should_terminate, bad_should_terminate_batch, batch_ids + ) actions[batch_ids] += new_actions return self._cur_call_high_level, bad_should_terminate, actions @@ -524,9 +600,44 @@ def from_config( **kwargs, ): return cls( - config.habitat_baselines.rl.policy, - config, - observation_space, - orig_action_space, - config.habitat_baselines.num_environments, + config=config.habitat_baselines.rl.policy, + full_config=config, + observation_space=observation_space, + action_space=orig_action_space, + num_envs=config.habitat_baselines.num_environments, ) + + +def _write_tensor_batched( + source_tensor: torch.Tensor, + write_tensor: torch.Tensor, + write_idxs: List[int], +) -> torch.Tensor: + """ + This assumes that write_tensor has already been indexed into by + `write_idxs` and only needs to be copied to `source_tensor`. Updates + `source_tensor` in place. + """ + + if source_tensor.shape[0] == len(write_idxs): + source_tensor = write_tensor + else: + source_tensor[write_idxs] = write_tensor + return source_tensor + + +def _update_tensor_batched( + source_tensor: torch.Tensor, + write_tensor: torch.Tensor, + write_idxs: List[int], +) -> torch.Tensor: + """ + Writes the indices of `write_idxs` from `write_tensor` into + `source_tensor`. Updates `source_tensor` in place. + """ + + if source_tensor.shape[0] == len(write_idxs): + source_tensor = write_tensor[write_idxs] + else: + source_tensor[write_idxs] = write_tensor[write_idxs] + return source_tensor diff --git a/habitat-baselines/habitat_baselines/rl/hrl/hl/fixed_policy.py b/habitat-baselines/habitat_baselines/rl/hrl/hl/fixed_policy.py index d14b2f0a8d..a7c2be085e 100644 --- a/habitat-baselines/habitat_baselines/rl/hrl/hl/fixed_policy.py +++ b/habitat-baselines/habitat_baselines/rl/hrl/hl/fixed_policy.py @@ -9,6 +9,7 @@ from habitat.tasks.rearrange.multi_task.rearrange_pddl import parse_func from habitat_baselines.common.logging import baselines_logger from habitat_baselines.rl.hrl.hl.high_level_policy import HighLevelPolicy +from habitat_baselines.rl.ppo.policy import PolicyActionData class FixedHighLevelPolicy(HighLevelPolicy): @@ -119,4 +120,4 @@ def get_next_skill( self._next_sol_idxs[batch_idx] += 1 - return next_skill, skill_args_data, immediate_end, {} + return next_skill, skill_args_data, immediate_end, PolicyActionData() diff --git a/habitat-baselines/habitat_baselines/rl/hrl/hl/high_level_policy.py b/habitat-baselines/habitat_baselines/rl/hrl/hl/high_level_policy.py index 0b66555d2b..ab76b19de7 100644 --- a/habitat-baselines/habitat_baselines/rl/hrl/hl/high_level_policy.py +++ b/habitat-baselines/habitat_baselines/rl/hrl/hl/high_level_policy.py @@ -5,6 +5,7 @@ import torch.nn as nn from habitat.tasks.rearrange.multi_task.pddl_domain import PddlProblem +from habitat_baselines.rl.ppo.policy import PolicyActionData class HighLevelPolicy(nn.Module): @@ -58,26 +59,6 @@ def evaluate_actions( def num_recurrent_layers(self): return 0 - def update_hidden_states( - self, - batch_ids: List[int], - old_rnn_hidden_states: torch.Tensor, - hl_info: Dict[str, Any], - ) -> torch.Tensor: - """ - Update the hidden states from the previous hidden state and selected HL - action information. Called after every HL action prediction. Returns - the updated hidden states of same shape as `old_rnn_hidden_states`. - """ - - if old_rnn_hidden_states.shape[0] == len(batch_ids): - return hl_info["rnn_hidden_states"][batch_ids] - else: - old_rnn_hidden_states[batch_ids] = hl_info["rnn_hidden_states"][ - batch_ids - ] - return old_rnn_hidden_states - def parameters(self): return iter([nn.Parameter(torch.zeros((1,), device=self._device))]) @@ -95,7 +76,7 @@ def get_next_skill( plan_masks: torch.Tensor, deterministic: bool, log_info: List[Dict[str, Any]], - ) -> Tuple[torch.Tensor, List[Any], torch.BoolTensor, Dict[str, Any]]: + ) -> Tuple[torch.Tensor, List[Any], torch.BoolTensor, PolicyActionData]: """ Get the next skill to be executed. @@ -113,13 +94,10 @@ def get_next_skill( - skill_args_data: Arguments for the next skill. - immediate_end: Binary masks indicating which environment(s) should end immediately. - - Information for PolicyActionData + - PolicyActionData information for learning. """ raise NotImplementedError() - def create_hl_info(self) -> Dict[str, Any]: - return {} - def apply_mask(self, mask: torch.Tensor) -> None: """ Called before every step with the mask information at the current step. diff --git a/habitat-baselines/habitat_baselines/rl/hrl/hl/neural_policy.py b/habitat-baselines/habitat_baselines/rl/hrl/hl/neural_policy.py index 2460098a54..b04b12d77a 100644 --- a/habitat-baselines/habitat_baselines/rl/hrl/hl/neural_policy.py +++ b/habitat-baselines/habitat_baselines/rl/hrl/hl/neural_policy.py @@ -15,7 +15,7 @@ from habitat_baselines.rl.models.rnn_state_encoder import ( build_rnn_state_encoder, ) -from habitat_baselines.rl.ppo.policy import CriticHead +from habitat_baselines.rl.ppo.policy import CriticHead, PolicyActionData from habitat_baselines.utils.common import CategoricalNet @@ -85,9 +85,6 @@ def __init__(self, *args, **kwargs): def should_load_agent_state(self): return True - def create_hl_info(self): - return {"actions": None} - def _setup_actions(self) -> List[PddlAction]: all_actions = self._pddl_prob.get_possible_actions() all_actions = [ @@ -215,10 +212,10 @@ def get_next_skill( next_skill, skill_args_data, immediate_end, - { - "action_log_probs": action_log_probs, - "values": values, - "actions": skill_sel, - "rnn_hidden_states": rnn_hidden_states, - }, + PolicyActionData( + action_log_probs=action_log_probs, + values=values, + actions=skill_sel, + rnn_hidden_states=rnn_hidden_states, + ), ) diff --git a/habitat-baselines/habitat_baselines/rl/hrl/skills/nn_skill.py b/habitat-baselines/habitat_baselines/rl/hrl/skills/nn_skill.py index 1dcbf07d20..79cc49cd56 100644 --- a/habitat-baselines/habitat_baselines/rl/hrl/skills/nn_skill.py +++ b/habitat-baselines/habitat_baselines/rl/hrl/skills/nn_skill.py @@ -104,7 +104,7 @@ def on_enter( rnn_hidden_states, prev_actions, ): - super().on_enter( + ret = super().on_enter( skill_arg, batch_idxs, observations, @@ -112,6 +112,7 @@ def on_enter( prev_actions, ) self._did_want_done *= 0.0 + return ret def _get_filtered_obs(self, observations, cur_batch_idx) -> TensorDict: return TensorDict( @@ -175,7 +176,7 @@ def from_config( ) except FileNotFoundError as e: raise FileNotFoundError( - "Could not load neural network weights for skill." + f"Could not load neural network weights for skill from ckpt {config.load_ckpt_file}" ) from e policy_cfg = ckpt_dict["config"] @@ -224,12 +225,7 @@ def from_config( ) if len(ckpt_dict) > 0: try: - actor_critic.load_state_dict( - { # type: ignore - k[len("actor_critic.") :]: v - for k, v in ckpt_dict["state_dict"].items() - } - ) + actor_critic.load_state_dict(ckpt_dict["state_dict"]) except Exception as e: raise ValueError( diff --git a/habitat-baselines/habitat_baselines/rl/hrl/skills/place.py b/habitat-baselines/habitat_baselines/rl/hrl/skills/place.py index a99ac1a8e1..5ff21b9099 100644 --- a/habitat-baselines/habitat_baselines/rl/hrl/skills/place.py +++ b/habitat-baselines/habitat_baselines/rl/hrl/skills/place.py @@ -27,7 +27,7 @@ def _mask_pick(self, action, observations): is_not_holding = 1 - observations[IsHoldingSensor.cls_uuid].view(-1) for i in torch.nonzero(is_not_holding): # Do not regrasp the object once it is released. - action[i, self._grip_ac_idx] = -1.0 + action.actions[i, self._grip_ac_idx] = -1.0 return action def _is_skill_done( diff --git a/habitat-baselines/habitat_baselines/rl/hrl/skills/skill.py b/habitat-baselines/habitat_baselines/rl/hrl/skills/skill.py index 30fdb21f98..f5d9828a87 100644 --- a/habitat-baselines/habitat_baselines/rl/hrl/skills/skill.py +++ b/habitat-baselines/habitat_baselines/rl/hrl/skills/skill.py @@ -68,6 +68,7 @@ def __init__( break if not found_grip and not self.should_ignore_grip: raise ValueError(f"Could not find grip action in {action_space}") + self._stop_action_idx, _ = find_action_range( action_space, "rearrange_stop" ) @@ -233,7 +234,7 @@ def on_enter( return ( rnn_hidden_states[batch_idxs] * 0.0, - prev_actions[batch_idxs] * 0.0, + prev_actions[batch_idxs] * 0, ) def set_pddl_problem(self, pddl_prob): diff --git a/habitat-baselines/habitat_baselines/rl/ppo/agent_access_mgr.py b/habitat-baselines/habitat_baselines/rl/ppo/agent_access_mgr.py index 542eee9110..837524e271 100644 --- a/habitat-baselines/habitat_baselines/rl/ppo/agent_access_mgr.py +++ b/habitat-baselines/habitat_baselines/rl/ppo/agent_access_mgr.py @@ -1,5 +1,5 @@ from abc import ABC, abstractmethod -from typing import TYPE_CHECKING, Any, Callable, Dict, Optional, Tuple +from typing import TYPE_CHECKING, Any, Callable, Dict, Optional import gym.spaces as spaces @@ -115,14 +115,6 @@ def load_ckpt_state_dict(self, ckpt: Dict) -> None: def load_state_dict(self, state: Dict) -> None: raise NotImplementedError() - @property - @abstractmethod - def hidden_state_shape(self) -> Tuple[int]: - """ - The shape of the tensor to track the hidden state, such as the RNN hidden state. - """ - raise NotImplementedError() - @abstractmethod def after_update(self) -> None: """ @@ -137,3 +129,15 @@ def pre_rollout(self) -> None: Called before a rollout is collected. """ raise NotImplementedError() + + @abstractmethod + def _create_storage( + self, + num_envs: int, + env_spec: EnvironmentSpec, + actor_critic: Policy, + policy_action_space: spaces.Space, + config: "DictConfig", + device, + ) -> Storage: + pass diff --git a/habitat-baselines/habitat_baselines/rl/ppo/policy.py b/habitat-baselines/habitat_baselines/rl/ppo/policy.py index 61182b9c01..56292fc028 100644 --- a/habitat-baselines/habitat_baselines/rl/ppo/policy.py +++ b/habitat-baselines/habitat_baselines/rl/ppo/policy.py @@ -57,7 +57,7 @@ class PolicyActionData: current policy. """ - rnn_hidden_states: torch.Tensor + rnn_hidden_states: Optional[torch.Tensor] = None actions: Optional[torch.Tensor] = None values: Optional[torch.Tensor] = None action_log_probs: Optional[torch.Tensor] = None @@ -99,9 +99,19 @@ def should_load_agent_state(self): def num_recurrent_layers(self) -> int: return 0 + @property + def recurrent_hidden_size(self) -> int: + return 0 + def forward(self, *x): raise NotImplementedError + @property + def visual_encoder(self) -> Optional[nn.Module]: + """ + Gets the visual encoder for the policy. + """ + def get_policy_action_space( self, env_action_space: spaces.Space ) -> spaces.Space: @@ -207,6 +217,14 @@ def __init__( **cfg, ) + @property + def recurrent_hidden_size(self) -> int: + return self.net.recurrent_hidden_size + + @property + def visual_encoder(self) -> Optional[nn.Module]: + return self.net.visual_encoder + @property def should_load_agent_state(self): return True @@ -372,6 +390,11 @@ def output_size(self): def num_recurrent_layers(self): pass + @property + @abc.abstractmethod + def recurrent_hidden_size(self): + pass + @property @abc.abstractmethod def is_blind(self): @@ -438,6 +461,10 @@ def is_blind(self): def num_recurrent_layers(self): return self.state_encoder.num_recurrent_layers + @property + def recurrent_hidden_size(self): + return self._hidden_size + @property def perception_embedding_size(self): return self._hidden_size diff --git a/habitat-baselines/habitat_baselines/rl/ppo/ppo.py b/habitat-baselines/habitat_baselines/rl/ppo/ppo.py index 8a98b4c9f9..569fd4e49a 100644 --- a/habitat-baselines/habitat_baselines/rl/ppo/ppo.py +++ b/habitat-baselines/habitat_baselines/rl/ppo/ppo.py @@ -35,20 +35,21 @@ class PPO(nn.Module, Updater): @classmethod def from_config(cls, actor_critic: NetPolicy, config): - config = {k.lower(): v for k, v in config.items()} - param_dict = dict(actor_critic=actor_critic) - sig = inspect.signature(cls.__init__) - for p in sig.parameters.values(): - if p.name == "self" or p.name in param_dict: - continue - - assert p.name in config, "{} parameter '{}' not in config".format( - cls.__name__, p.name - ) - - param_dict[p.name] = config[p.name] - - return cls(**param_dict) + return cls( + actor_critic=actor_critic, + clip_param=config.clip_param, + ppo_epoch=config.ppo_epoch, + num_mini_batch=config.num_mini_batch, + value_loss_coef=config.value_loss_coef, + entropy_coef=config.entropy_coef, + lr=config.lr, + eps=config.eps, + max_grad_norm=config.max_grad_norm, + use_clipped_value_loss=config.use_clipped_value_loss, + use_normalized_advantage=config.use_normalized_advantage, + entropy_target_factor=config.entropy_target_factor, + use_adaptive_entropy_pen=config.use_adaptive_entropy_pen, + ) def __init__( self, @@ -99,9 +100,16 @@ def __init__( ).to(device=self.device) self.use_normalized_advantage = use_normalized_advantage + self.optimizer = self._create_optimizer(lr, eps) - params = list(filter(lambda p: p.requires_grad, self.parameters())) + self.non_ac_params = [ + p + for name, p in self.named_parameters() + if not name.startswith("actor_critic.") + ] + def _create_optimizer(self, lr, eps): + params = list(filter(lambda p: p.requires_grad, self.parameters())) if len(params) > 0: optim_cls = optim.Adam optim_kwargs = dict( @@ -120,15 +128,9 @@ def __init__( else: optim_cls = torch.optim._multi_tensor.Adam - self.optimizer = optim_cls(**optim_kwargs) + return optim_cls(**optim_kwargs) else: - self.optimizer = None - - self.non_ac_params = [ - p - for name, p in self.named_parameters() - if not name.startswith("actor_critic.") - ] + return None def forward(self, *x): raise NotImplementedError @@ -370,3 +372,15 @@ def before_step(self) -> torch.Tensor: def after_step(self) -> None: if isinstance(self.entropy_coef, LagrangeInequalityCoefficient): self.entropy_coef.project_into_bounds() + + def after_update(self): + pass + + def get_resume_state(self): + return { + "optim_state": self.optimizer.state_dict(), + } + + def load_state_dict(self, state): + if "optim_state" in state: + self.optimizer.load_state_dict(state["optim_state"]) diff --git a/habitat-baselines/habitat_baselines/rl/ppo/ppo_trainer.py b/habitat-baselines/habitat_baselines/rl/ppo/ppo_trainer.py index e4aaf7444b..633a3c1628 100644 --- a/habitat-baselines/habitat_baselines/rl/ppo/ppo_trainer.py +++ b/habitat-baselines/habitat_baselines/rl/ppo/ppo_trainer.py @@ -11,6 +11,7 @@ from collections import defaultdict, deque from typing import Any, Dict, List, Optional, Set +import hydra import numpy as np import torch import tqdm @@ -26,9 +27,9 @@ observations_to_image, overlay_frame, ) +from habitat_baselines.common import VectorEnvFactory from habitat_baselines.common.base_trainer import BaseRLTrainer from habitat_baselines.common.baseline_registry import baseline_registry -from habitat_baselines.common.construct_vector_env import construct_envs from habitat_baselines.common.env_spec import EnvironmentSpec from habitat_baselines.common.obs_transformers import ( apply_obs_transforms_batch, @@ -52,7 +53,6 @@ ) from habitat_baselines.rl.ddppo.policy import PointNavResNetNet from habitat_baselines.rl.ppo.agent_access_mgr import AgentAccessMgr -from habitat_baselines.rl.ppo.policy import NetPolicy from habitat_baselines.rl.ppo.single_agent_access_mgr import ( # noqa: F401. SingleAgentAccessMgr, ) @@ -140,7 +140,10 @@ def _init_envs(self, config=None, is_eval: bool = False): if config is None: config = self.config - self.envs = construct_envs( + env_factory: VectorEnvFactory = hydra.utils.instantiate( + config.habitat_baselines.vector_env_factory + ) + self.envs = env_factory.construct_envs( config, workers_ignore_signals=is_slurm_batch_job(), enforce_scenes_greater_eq_environments=is_eval, @@ -149,21 +152,23 @@ def _init_envs(self, config=None, is_eval: bool = False): or torch.distributed.get_rank() == 0 ), ) + self._env_spec = EnvironmentSpec( observation_space=self.envs.observation_spaces[0], action_space=self.envs.action_spaces[0], orig_action_space=self.envs.orig_action_spaces[0], ) - # The measure keys that should only be logged on rank0,gpu0 and nowhere + # The measure keys that should only be logged on rank0 and nowhere # else. They will be excluded from all other workers and only reported # from the single worker. - self._rank0_env0_keys: Set[str] = set( - self.config.habitat.task.rank0_env0_measure_names + self._rank0_keys: Set[str] = set( + list(self.config.habitat.task.rank0_env0_measure_names) + + list(self.config.habitat.task.rank0_measure_names) ) - # Information on measures that declared in `self._rank0_env0_keys` to - # be only reported on rank0,gpu0. This is seperately logged from + # Information on measures that declared in `self._rank0_keys` or + # to be only reported on rank0. This is seperately logged from # `self.window_episode_stats`. self._single_proc_infos: Dict[str, List[float]] = {} @@ -273,8 +278,10 @@ def _init_train(self, resume_state=None): batch = apply_obs_transforms_batch(batch, self.obs_transforms) # type: ignore if self._is_static_encoder: - assert isinstance(self._agent.actor_critic, NetPolicy) - self._encoder = self._agent.actor_critic.net.visual_encoder + self._encoder = self._agent.actor_critic.visual_encoder + assert ( + self._encoder is not None + ), "Visual encoder is not specified for this actor" with inference_mode(): batch[ PointNavResNetNet.PRETRAINED_VISUAL_FEATURES_KEY @@ -313,18 +320,21 @@ def save_checkpoint( if extra_state is not None: checkpoint["extra_state"] = extra_state # type: ignore - torch.save( - checkpoint, - os.path.join( - self.config.habitat_baselines.checkpoint_folder, file_name - ), + save_file_path = os.path.join( + self.config.habitat_baselines.checkpoint_folder, file_name ) + torch.save(checkpoint, save_file_path) torch.save( checkpoint, os.path.join( self.config.habitat_baselines.checkpoint_folder, "latest.pth" ), ) + if self.config.habitat_baselines.on_save_ckpt_callback is not None: + hydra.utils.call( + self.config.habitat_baselines.on_save_ckpt_callback, + save_file_path=save_file_path, + ) def load_checkpoint(self, checkpoint_path: str, *args, **kwargs) -> Dict: r"""Load checkpoint of specified path as a dict. @@ -432,14 +442,11 @@ def _collect_environment_result(self, buffer_index: int = 0): self._single_proc_infos = extract_scalars_from_infos( infos, ignore_keys=set( - k - for k in infos[0].keys() - if k not in self._rank0_env0_keys + k for k in infos[0].keys() if k not in self._rank0_keys ), ) - extracted_infos = extract_scalars_from_infos( - infos, ignore_keys=self._rank0_env0_keys + infos, ignore_keys=self._rank0_keys ) for k, v_k in extracted_infos.items(): v = torch.tensor( @@ -457,20 +464,20 @@ def _collect_environment_result(self, buffer_index: int = 0): done_masks, 0.0 ) - if self._is_static_encoder: - with inference_mode(): - batch[ - PointNavResNetNet.PRETRAINED_VISUAL_FEATURES_KEY - ] = self._encoder(batch) + if self._is_static_encoder: + with inference_mode(), g_timer.avg_time("trainer.visual_features"): + batch[ + PointNavResNetNet.PRETRAINED_VISUAL_FEATURES_KEY + ] = self._encoder(batch) - self._agent.rollouts.insert( - next_observations=batch, - rewards=rewards, - next_masks=not_done_masks, - buffer_index=buffer_index, - ) + self._agent.rollouts.insert( + next_observations=batch, + rewards=rewards, + next_masks=not_done_masks, + buffer_index=buffer_index, + ) - self._agent.rollouts.advance_rollout(buffer_index) + self._agent.rollouts.advance_rollout(buffer_index) return env_slice.stop - env_slice.start @@ -620,6 +627,9 @@ def _training_log( [f"{k}: {v.mean:.3f}" for k, v in g_timer.items()] ) logger.info(f"\tPerf Stats: {perf_stats_str}") + if self.config.habitat_baselines.should_log_single_proc_infos: + for k, v in self._single_proc_infos.items(): + logger.info(f" - {k}: {np.mean(v):.3f}") def should_end_early(self, rollout_step) -> bool: if not self._is_distributed: @@ -810,7 +820,7 @@ def _eval_checkpoint( checkpoint_path, map_location="cpu" ) step_id = ckpt_dict["extra_state"]["step"] - print(step_id) + logger.info(f"Loaded checkpoint trained for {step_id} steps") else: ckpt_dict = {"config": None} @@ -861,10 +871,14 @@ def _eval_checkpoint( test_recurrent_hidden_states = torch.zeros( ( self.config.habitat_baselines.num_environments, - *self._agent.hidden_state_shape, + self._agent.actor_critic.num_recurrent_layers, + self._agent.actor_critic.recurrent_hidden_size, ), device=self.device, ) + should_update_recurrent_hidden_states = ( + np.prod(test_recurrent_hidden_states.shape) != 0 + ) prev_actions = torch.zeros( self.config.habitat_baselines.num_environments, *action_shape, @@ -882,9 +896,21 @@ def _eval_checkpoint( ] = {} # dict of dicts that stores stats per episode ep_eval_count: Dict[Any, int] = defaultdict(lambda: 0) - rgb_frames: List[List[np.ndarray]] = [ - [] for _ in range(self.config.habitat_baselines.num_environments) - ] + if len(self.config.habitat_baselines.eval.video_option) > 0: + # Add the first frame of the episode to the video. + rgb_frames: List[List[np.ndarray]] = [ + [ + observations_to_image( + {k: v[env_idx] for k, v in batch.items()}, {} + ) + ] + for env_idx in range( + self.config.habitat_baselines.num_environments + ) + ] + else: + rgb_frames = None + if len(self.config.habitat_baselines.eval.video_option) > 0: os.makedirs(self.config.habitat_baselines.video_dir, exist_ok=True) @@ -935,11 +961,13 @@ def _eval_checkpoint( for i, should_insert in enumerate( action_data.should_inserts ): - if should_insert.item(): + if not should_insert.item(): + continue + if should_update_recurrent_hidden_states: test_recurrent_hidden_states[ i ] = action_data.rnn_hidden_states[i] - prev_actions[i].copy_(action_data.actions[i]) # type: ignore + prev_actions[i].copy_(action_data.actions[i]) # type: ignore # NB: Move actions to CPU. If CUDA tensors are # sent in to env.step(), that will create CUDA contexts # in the subprocesses. @@ -961,6 +989,9 @@ def _eval_checkpoint( observations, rewards_l, dones, infos = [ list(x) for x in zip(*outputs) ] + # Note that `policy_infos` represents the information about the + # action BEFORE `observations` (the action used to transition to + # `observations`). policy_infos = self._agent.actor_critic.get_extra( action_data, infos, dones ) @@ -999,26 +1030,32 @@ def _eval_checkpoint( ): envs_to_pause.append(i) - # Exclude the keys from `_rank0_env0_keys`. - infos[i] = { + # Exclude the keys from `_rank0_keys` from displaying in the video + disp_info = { k: v for k, v in infos[i].items() - if k not in self._rank0_env0_keys + if k not in self._rank0_keys } if len(self.config.habitat_baselines.eval.video_option) > 0: # TODO move normalization / channel changing out of the policy and undo it here frame = observations_to_image( - {k: v[i] for k, v in batch.items()}, infos[i] + {k: v[i] for k, v in batch.items()}, disp_info ) if not not_done_masks[i].item(): # The last frame corresponds to the first frame of the next episode # but the info is correct. So we use a black frame - frame = observations_to_image( - {k: v[i] * 0.0 for k, v in batch.items()}, infos[i] + final_frame = observations_to_image( + {k: v[i] * 0.0 for k, v in batch.items()}, + disp_info, ) - frame = overlay_frame(frame, infos[i]) - rgb_frames[i].append(frame) + final_frame = overlay_frame(final_frame, disp_info) + rgb_frames[i].append(final_frame) + # The starting frame of the next episode will be the final element.. + rgb_frames[i].append(frame) + else: + frame = overlay_frame(frame, disp_info) + rgb_frames[i].append(frame) # episode ended if not not_done_masks[i].item(): @@ -1043,16 +1080,18 @@ def _eval_checkpoint( generate_video( video_option=self.config.habitat_baselines.eval.video_option, video_dir=self.config.habitat_baselines.video_dir, - images=rgb_frames[i], - episode_id=current_episodes_info[i].episode_id, + # Since the final frame is the start frame of the next episode. + images=rgb_frames[i][:-1], + episode_id=f"{current_episodes_info[i].episode_id}_{ep_eval_count[k]}", checkpoint_idx=checkpoint_index, - metrics=extract_scalars_from_info(infos[i]), + metrics=extract_scalars_from_info(disp_info), fps=self.config.habitat_baselines.video_fps, tb_writer=writer, keys_to_include_in_name=self.config.habitat_baselines.eval_keys_to_include_in_name, ) - rgb_frames[i] = [] + # Since the starting frame of the next episode is the final frame. + rgb_frames[i] = rgb_frames[i][-1:] gfx_str = infos[i].get(GfxReplayMeasure.cls_uuid, "") if gfx_str != "": @@ -1088,9 +1127,12 @@ def _eval_checkpoint( ), f"Expected {number_of_eval_episodes} episodes, got {len(ep_eval_count)}." aggregated_stats = {} - for stat_key in next(iter(stats_episodes.values())).keys(): + all_ks = set() + for ep in stats_episodes.values(): + all_ks.update(ep.keys()) + for stat_key in all_ks: aggregated_stats[stat_key] = np.mean( - [v[stat_key] for v in stats_episodes.values()] + [v[stat_key] for v in stats_episodes.values() if stat_key in v] ) for k, v in aggregated_stats.items(): diff --git a/habitat-baselines/habitat_baselines/rl/ppo/single_agent_access_mgr.py b/habitat-baselines/habitat_baselines/rl/ppo/single_agent_access_mgr.py index b176eccea8..eeee6794e7 100644 --- a/habitat-baselines/habitat_baselines/rl/ppo/single_agent_access_mgr.py +++ b/habitat-baselines/habitat_baselines/rl/ppo/single_agent_access_mgr.py @@ -90,10 +90,40 @@ def _init_policy_and_updater(self, lr_schedule_fn, resume_state): def nbuffers(self): return self._nbuffers + def _create_storage( + self, + num_envs: int, + env_spec: EnvironmentSpec, + actor_critic: Policy, + policy_action_space: spaces.Space, + config: "DictConfig", + device, + ) -> Storage: + """ + Default behavior for setting up and initializing the rollout storage. + """ + + obs_space = get_rollout_obs_space( + env_spec.observation_space, actor_critic, config + ) + ppo_cfg = config.habitat_baselines.rl.ppo + rollouts = baseline_registry.get_storage( + config.habitat_baselines.rollout_storage_name + )( + numsteps=ppo_cfg.num_steps, + num_envs=num_envs, + observation_space=obs_space, + action_space=policy_action_space, + actor_critic=actor_critic, + is_double_buffered=ppo_cfg.use_double_buffered_sampler, + ) + rollouts.to(device) + return rollouts + def post_init(self, create_rollouts_fn: Optional[Callable] = None) -> None: # Create the rollouts storage. if create_rollouts_fn is None: - create_rollouts_fn = default_create_rollouts + create_rollouts_fn = self._create_storage policy_action_space = self._actor_critic.get_policy_action_space( self._env_spec.action_space @@ -169,7 +199,7 @@ def _create_policy(self) -> NetPolicy: } ) if self._is_static_encoder: - for param in actor_critic.net.visual_encoder.parameters(): + for param in actor_critic.visual_encoder.parameters(): param.requires_grad_(False) if self._config.habitat_baselines.rl.ddppo.reset_critic: @@ -194,7 +224,7 @@ def updater(self) -> Updater: def get_resume_state(self) -> Dict[str, Any]: ret = { "state_dict": self._actor_critic.state_dict(), - "optim_state": self._updater.optimizer.state_dict(), + **self._updater.get_resume_state(), } if self._lr_scheduler is not None: ret["lr_sched_state"] = (self._lr_scheduler.state_dict(),) @@ -216,17 +246,9 @@ def load_ckpt_state_dict(self, ckpt: Dict) -> None: def load_state_dict(self, state: Dict) -> None: self._actor_critic.load_state_dict(state["state_dict"]) if self._updater is not None: - if "optim_state" in state: - self._actor_critic.load_state_dict(state["optim_state"]) + self._updater.load_state_dict(state) if "lr_sched_state" in state: - self._actor_critic.load_state_dict(state["lr_sched_state"]) - - @property - def hidden_state_shape(self): - return ( - self.actor_critic.num_recurrent_layers, - self._ppo_cfg.hidden_size, - ) + self._lr_scheduler.load_state_dict(state["lr_sched_state"]) def after_update(self): if ( @@ -234,6 +256,7 @@ def after_update(self): and self._lr_scheduler is not None ): self._lr_scheduler.step() # type: ignore + self._updater.after_update() def pre_rollout(self): if self._ppo_cfg.use_linear_clip_decay: @@ -249,7 +272,7 @@ def get_rollout_obs_space(obs_space, actor_critic, config): """ if not config.habitat_baselines.rl.ddppo.train_encoder: - encoder = actor_critic.net.visual_encoder + encoder = actor_critic.visual_encoder obs_space = spaces.Dict( { PointNavResNetNet.PRETRAINED_VISUAL_FEATURES_KEY: spaces.Box( @@ -264,36 +287,5 @@ def get_rollout_obs_space(obs_space, actor_critic, config): return obs_space -def default_create_rollouts( - num_envs: int, - env_spec: EnvironmentSpec, - actor_critic: NetPolicy, - policy_action_space: spaces.Space, - config: "DictConfig", - device, -) -> Storage: - """ - Default behavior for setting up and initializing the rollout storage. - """ - - obs_space = get_rollout_obs_space( - env_spec.observation_space, actor_critic, config - ) - ppo_cfg = config.habitat_baselines.rl.ppo - rollouts = baseline_registry.get_storage( - config.habitat_baselines.rollout_storage_name - )( - ppo_cfg.num_steps, - num_envs, - obs_space, - policy_action_space, - ppo_cfg.hidden_size, - num_recurrent_layers=actor_critic.num_recurrent_layers, - is_double_buffered=ppo_cfg.use_double_buffered_sampler, - ) - rollouts.to(device) - return rollouts - - def linear_lr_schedule(percent_done: float) -> float: return 1 - percent_done diff --git a/habitat-baselines/habitat_baselines/rl/ppo/updater.py b/habitat-baselines/habitat_baselines/rl/ppo/updater.py index ae3fa558f1..5d277f4667 100644 --- a/habitat-baselines/habitat_baselines/rl/ppo/updater.py +++ b/habitat-baselines/habitat_baselines/rl/ppo/updater.py @@ -1,5 +1,5 @@ import abc -from typing import Dict +from typing import Any, Dict from habitat_baselines.common.storage import Storage @@ -14,3 +14,22 @@ def update(self, rollouts: Storage) -> Dict[str, float]: """ Perform an update from data in the storage objet. """ + + @property + def lr_scheduler(self): + return None + + def after_update(self) -> None: + """ + Called after the policy update. + """ + + def get_resume_state(self) -> Dict[str, Any]: + """ + Gets the optimizer resume state. + """ + + def load_state_dict(self, state: Dict[str, Any]) -> None: + """ + Loads an optimizer state. + """ diff --git a/habitat-baselines/habitat_baselines/rl/ver/ver_rollout_storage.py b/habitat-baselines/habitat_baselines/rl/ver/ver_rollout_storage.py index 73608242f8..a5db473281 100644 --- a/habitat-baselines/habitat_baselines/rl/ver/ver_rollout_storage.py +++ b/habitat-baselines/habitat_baselines/rl/ver/ver_rollout_storage.py @@ -137,13 +137,12 @@ class VERRolloutStorage(RolloutStorage): def __init__( self, - variable_experience: bool, numsteps, num_envs, observation_space, action_space, - recurrent_hidden_state_size, - num_recurrent_layers=1, + actor_critic, + variable_experience: bool, is_double_buffered: bool = False, ): super().__init__( @@ -151,8 +150,7 @@ def __init__( num_envs, observation_space, action_space, - recurrent_hidden_state_size, - num_recurrent_layers, + actor_critic, is_double_buffered, ) self.use_is_coeffs = variable_experience diff --git a/habitat-baselines/habitat_baselines/rl/ver/ver_trainer.py b/habitat-baselines/habitat_baselines/rl/ver/ver_trainer.py index bb702d8a7f..08ae66cf0a 100644 --- a/habitat-baselines/habitat_baselines/rl/ver/ver_trainer.py +++ b/habitat-baselines/habitat_baselines/rl/ver/ver_trainer.py @@ -242,8 +242,7 @@ def _init_train(self, resume_state): "numsteps": ppo_cfg.num_steps, "num_envs": len(self.environment_workers), "action_space": self._env_spec.action_space, - "recurrent_hidden_state_size": ppo_cfg.hidden_size, - "num_recurrent_layers": self._agent.actor_critic.net.num_recurrent_layers, + "actor_critic": self._agent.actor_critic, "observation_space": rollouts_obs_space, } diff --git a/habitat-lab/habitat/config/benchmark/rearrange/pick_spa.yaml b/habitat-lab/habitat/config/benchmark/rearrange/pick_spa.yaml deleted file mode 100644 index 33cd42fc67..0000000000 --- a/habitat-lab/habitat/config/benchmark/rearrange/pick_spa.yaml +++ /dev/null @@ -1,50 +0,0 @@ -# @package _global_ - -defaults: - - /habitat: habitat_config_base - - /habitat/simulator/agents@habitat.simulator.agents.main_agent: rgbd_head_rgbd_arm_agent - # add third_rgb_sensor to rgbd_head_rgbd_arm_agent: - - /habitat/simulator/sim_sensors@habitat.simulator.agents.main_agent.sim_sensors.third_rgb_sensor: third_rgb_sensor - - /habitat/task/rearrange: pick_spa - - /habitat/dataset/rearrangement: replica_cad - - _self_ - -# The configuration to run the SensePlanAct baseline architecture from https://arxiv.org/abs/2106.14405 -habitat: - environment: - max_episode_steps: 30 - simulator: - type: RearrangeSim-v0 - seed: 100 - additional_object_paths: - - "data/objects/ycb/configs/" - concur_render: True - auto_sleep: True - agents: - main_agent: - radius: 0.3 - sim_sensors: - head_rgb_sensor: - height: 128 - width: 128 - head_depth_sensor: - width: 128 - height: 128 - arm_depth_sensor: - height: 128 - width: 128 - arm_rgb_sensor: - height: 128 - width: 128 - third_rgb_sensor: - height: 512 - width: 512 - # Agent setup - ac_freq_ratio: 8 - - habitat_sim_v0: - allow_sliding: False - enable_physics: True - - dataset: - data_path: data/datasets/replica_cad/rearrange/v1/{split}/all_receptacles_10k_1k.json.gz diff --git a/habitat-lab/habitat/config/default_structured_configs.py b/habitat-lab/habitat/config/default_structured_configs.py index a2f1385332..ff12f4cf13 100644 --- a/habitat-lab/habitat/config/default_structured_configs.py +++ b/habitat-lab/habitat/config/default_structured_configs.py @@ -694,6 +694,7 @@ class RuntimePerfStatsMeasurementConfig(MeasurementConfig): """ type: str = "RuntimePerfStats" + disable_logging: bool = False @dataclass @@ -1134,6 +1135,8 @@ class TaskConfig(HabitatBaseConfig): rank0_env0_measure_names: List[str] = field( default_factory=lambda: ["habitat_perf"] ) + # Measures to only record in the first rank for vectorized environments. + rank0_measure_names: List[str] = field(default_factory=list) goal_sensor_uuid: str = "pointgoal" # REARRANGE task count_obj_collisions: bool = True diff --git a/habitat-lab/habitat/config/habitat/task/rearrange/agents/fetch_arm.yaml b/habitat-lab/habitat/config/habitat/task/rearrange/agents/fetch_arm.yaml new file mode 100644 index 0000000000..faf644ab27 --- /dev/null +++ b/habitat-lab/habitat/config/habitat/task/rearrange/agents/fetch_arm.yaml @@ -0,0 +1,7 @@ +# @package habitat.task.actions +defaults: + - /habitat/task/actions: + - arm_action + - _self_ +arm_action: + grip_controller: SuctionGraspAction diff --git a/habitat-lab/habitat/config/habitat/task/rearrange/agents/fetch_base_arm.yaml b/habitat-lab/habitat/config/habitat/task/rearrange/agents/fetch_base_arm.yaml new file mode 100644 index 0000000000..d2ce4c42a9 --- /dev/null +++ b/habitat-lab/habitat/config/habitat/task/rearrange/agents/fetch_base_arm.yaml @@ -0,0 +1,8 @@ +# @package habitat.task.actions +defaults: + - /habitat/task/actions: + - arm_action + - base_velocity + - _self_ +arm_action: + grip_controller: SuctionGraspAction diff --git a/habitat-lab/habitat/config/habitat/task/rearrange/open_cab.yaml b/habitat-lab/habitat/config/habitat/task/rearrange/open_cab.yaml index 1239aa794f..c1aae6217a 100644 --- a/habitat-lab/habitat/config/habitat/task/rearrange/open_cab.yaml +++ b/habitat-lab/habitat/config/habitat/task/rearrange/open_cab.yaml @@ -46,5 +46,5 @@ measurements: marker_dist_reward: 1.0 constraint_violate_pen: 1.0 force_terminate: - max_accum_force: 20_000.0 + max_accum_force: -1.0 max_instant_force: 10_000.0 diff --git a/habitat-lab/habitat/config/habitat/task/rearrange/pick.yaml b/habitat-lab/habitat/config/habitat/task/rearrange/pick.yaml index d52944f0c6..80ee4a7e61 100644 --- a/habitat-lab/habitat/config/habitat/task/rearrange/pick.yaml +++ b/habitat-lab/habitat/config/habitat/task/rearrange/pick.yaml @@ -2,9 +2,7 @@ defaults: - /habitat/task: task_config_base - - /habitat/task/actions: - - arm_action - - base_velocity + - /habitat/task/rearrange/agents: fetch_base_arm - /habitat/task/measurements: - articulated_agent_force - force_terminate @@ -32,9 +30,6 @@ success_measure: "pick_success" success_reward: 10.0 slack_reward: -0.005 end_on_success: True -actions: - arm_action: - grip_controller: SuctionGraspAction measurements: force_terminate: max_accum_force: 10_000.0 diff --git a/habitat-lab/habitat/config/habitat/task/rearrange/pick_spa.yaml b/habitat-lab/habitat/config/habitat/task/rearrange/pick_spa.yaml deleted file mode 100644 index d8671427aa..0000000000 --- a/habitat-lab/habitat/config/habitat/task/rearrange/pick_spa.yaml +++ /dev/null @@ -1,49 +0,0 @@ -# @package habitat.task - -defaults: - - /habitat/task: task_config_base - - /habitat/task/actions: - - arm_action - - empty - - /habitat/task/measurements: - - articulated_agent_force - - articulated_agent_colls - - force_terminate - - end_effector_to_rest_distance - - end_effector_to_object_distance - - pick_success - - pick_reward - - num_steps - - /habitat/task/lab_sensors: - - target_start_sensor - - joint_sensor - - is_holding_sensor - - end_effector_sensor - - relative_resting_pos_sensor - - goal_sensor - - object_sensor - - _self_ - -# The configuration to run the SensePlanAct baseline architecture from https://arxiv.org/abs/2106.14405 -type: RearrangePickTask-v0 -count_obj_collisions: True -desired_resting_position: [0.5, 0.0, 1.0] -constraint_violation_ends_episode: True -should_enforce_target_within_reach: False - -# In radians -base_angle_noise: 0.15 -base_noise: 0.05 -force_regenerate: False - -actions: - arm_action: - arm_controller: "ArmAbsPosKinematicAction" - grip_controller: "MagicGraspAction" -measurements: - pick_reward: - dist_reward: 10.0 - pick_reward: 5.0 - force_pen: 0.0 - max_force_pen: 0.0 - force_end_pen: 0.0 diff --git a/habitat-lab/habitat/config/habitat/task/rearrange/place.yaml b/habitat-lab/habitat/config/habitat/task/rearrange/place.yaml index 891a55760e..a4ab90474e 100644 --- a/habitat-lab/habitat/config/habitat/task/rearrange/place.yaml +++ b/habitat-lab/habitat/config/habitat/task/rearrange/place.yaml @@ -2,9 +2,7 @@ defaults: - /habitat/task: task_config_base - - /habitat/task/actions: - - arm_action - - base_velocity + - /habitat/task/rearrange/agents: fetch_base_arm - /habitat/task/measurements: - articulated_agent_force - force_terminate @@ -36,9 +34,6 @@ success_measure: place_success success_reward: 10.0 slack_reward: -0.005 end_on_success: True -actions: - arm_action: - grip_controller: SuctionGraspAction measurements: force_terminate: max_accum_force: 10_000.0 diff --git a/habitat-lab/habitat/datasets/pointnav/pointnav_dataset.py b/habitat-lab/habitat/datasets/pointnav/pointnav_dataset.py index bc144e4eb4..4a1ddedc8a 100644 --- a/habitat-lab/habitat/datasets/pointnav/pointnav_dataset.py +++ b/habitat-lab/habitat/datasets/pointnav/pointnav_dataset.py @@ -90,6 +90,14 @@ def _get_scenes_from_folder( scenes.sort() return scenes + def _load_from_file(self, fname: str, scenes_dir: str) -> None: + """ + Load the data from a file into `self.episodes`. + """ + + with gzip.open(fname, "rt") as f: + self.from_json(f.read(), scenes_dir=scenes_dir) + def __init__(self, config: Optional["DictConfig"] = None) -> None: self.episodes = [] @@ -97,8 +105,8 @@ def __init__(self, config: Optional["DictConfig"] = None) -> None: return datasetfile_path = config.data_path.format(split=config.split) - with gzip.open(datasetfile_path, "rt") as f: - self.from_json(f.read(), scenes_dir=config.scenes_dir) + + self._load_from_file(datasetfile_path, config.scenes_dir) # Read separate file for each scene dataset_dir = os.path.dirname(datasetfile_path) @@ -119,8 +127,8 @@ def __init__(self, config: Optional["DictConfig"] = None) -> None: scene_filename = self.content_scenes_path.format( data_path=dataset_dir, scene=scene ) - with gzip.open(scene_filename, "rt") as f: - self.from_json(f.read(), scenes_dir=config.scenes_dir) + + self._load_from_file(scene_filename, config.scenes_dir) else: self.episodes = list( diff --git a/habitat-lab/habitat/datasets/rearrange/rearrange_generator.py b/habitat-lab/habitat/datasets/rearrange/rearrange_generator.py index 5f8db590a5..a829addb62 100644 --- a/habitat-lab/habitat/datasets/rearrange/rearrange_generator.py +++ b/habitat-lab/habitat/datasets/rearrange/rearrange_generator.py @@ -774,6 +774,7 @@ def initialize_sim(self, scene_name: str, dataset_path: str) -> None: backend_cfg.scene_dataset_config_file = dataset_path backend_cfg.scene_id = scene_name backend_cfg.enable_physics = True + backend_cfg.gpu_device_id = self.cfg.gpu_device_id if not self._render_debug_obs: # don't bother loading textures if not intending to visualize the generation process backend_cfg.create_renderer = False diff --git a/habitat-lab/habitat/datasets/rearrange/run_episode_generator.py b/habitat-lab/habitat/datasets/rearrange/run_episode_generator.py index f44f387b10..fc5a84236a 100644 --- a/habitat-lab/habitat/datasets/rearrange/run_episode_generator.py +++ b/habitat-lab/habitat/datasets/rearrange/run_episode_generator.py @@ -43,6 +43,7 @@ class SceneSamplerConfig: class RearrangeEpisodeGeneratorConfig: # The minimum distance from the target object starting position to its goal min_dist_from_start_to_goal: float = 0.5 + gpu_device_id: int = 0 # ----- import/initialization parameters ------ # the scene dataset from which scenes and objects are sampled dataset_path: str = "data/replica_cad/replicaCAD.scene_dataset_config.json" diff --git a/habitat-lab/habitat/datasets/rearrange/samplers/object_sampler.py b/habitat-lab/habitat/datasets/rearrange/samplers/object_sampler.py index 2cc3ee86df..5f5c860a18 100644 --- a/habitat-lab/habitat/datasets/rearrange/samplers/object_sampler.py +++ b/habitat-lab/habitat/datasets/rearrange/samplers/object_sampler.py @@ -343,7 +343,7 @@ def sample_placement( sim.get_rigid_object_manager().remove_object_by_handle( new_object.handle ) - logger.warning( + logger.info( f"Failed to sample {object_handle} placement on {receptacle.unique_name} in {self.max_placement_attempts} tries." ) @@ -402,6 +402,7 @@ def single_sample( object_handle = self.sample_object() else: object_handle = fixed_obj_handle + if fixed_target_receptacle is not None: target_receptacle = fixed_target_receptacle else: @@ -433,6 +434,8 @@ def sample( target_receptacles: List[Receptacle], snap_down: bool = False, vdb: Optional[DebugVisualizer] = None, + target_object_handles: Optional[List[str]] = None, + object_idx_to_recep: Optional[Dict[int, Receptacle]] = None, ) -> List[Tuple[habitat_sim.physics.ManagedRigidObject, Receptacle]]: """ Defaults to uniform sample: object -> receptacle -> volume w/ rejection -> repeat. @@ -449,6 +452,8 @@ def sample( new_objects: List[ Tuple[habitat_sim.physics.ManagedRigidObject, Receptacle] ] = [] + if object_idx_to_recep is None: + object_idx_to_recep = {} logger.info( f" Trying to sample {self.target_objects_number} from range {self.num_objects}" @@ -460,6 +465,12 @@ def sample( len(new_objects) < self.target_objects_number and num_pairing_tries < self.max_sample_attempts ): + cur_obj_idx = len(new_objects) + if target_object_handles is None: + fixed_obj_handle = None + else: + fixed_obj_handle = target_object_handles[cur_obj_idx] + num_pairing_tries += 1 if len(new_objects) < len(target_receptacles): # sample objects explicitly from pre-designated target receptacles first @@ -468,13 +479,21 @@ def sample( recep_tracker, snap_down, vdb, - target_receptacles[len(new_objects)], + target_receptacles[cur_obj_idx], + fixed_obj_handle=fixed_obj_handle, ) # This receptacle has already been counted in the receptacle # tracking so don't double count. else: new_object, receptacle = self.single_sample( - sim, recep_tracker, snap_down, vdb + sim, + recep_tracker, + snap_down, + vdb, + fixed_target_receptacle=object_idx_to_recep.get( + cur_obj_idx, None + ), + fixed_obj_handle=fixed_obj_handle, ) if ( new_object is not None diff --git a/habitat-lab/habitat/gym/gym_definitions.py b/habitat-lab/habitat/gym/gym_definitions.py index 0c8f426c81..b9ae9a9fbf 100644 --- a/habitat-lab/habitat/gym/gym_definitions.py +++ b/habitat-lab/habitat/gym/gym_definitions.py @@ -47,7 +47,7 @@ def _get_env_name(cfg: "DictConfig") -> Optional[str]: return cfg["env_task"] -def make_gym_from_config(config: "DictConfig") -> gym.Env: +def make_gym_from_config(config: "DictConfig", dataset=None) -> gym.Env: """ From a habitat-lab or habitat-baseline config, create the associated gym environment. """ @@ -58,7 +58,7 @@ def make_gym_from_config(config: "DictConfig") -> gym.Env: assert ( env_class is not None ), f"No environment class with name `{env_class_name}` was found, you need to specify a valid one with env_task" - return make_env_fn(env_class=env_class, config=config) + return make_env_fn(env_class=env_class, config=config, dataset=dataset) def _add_sim_sensor_to_config( diff --git a/habitat-lab/habitat/tasks/rearrange/articulated_agent_manager.py b/habitat-lab/habitat/tasks/rearrange/articulated_agent_manager.py index 3f4f6fb1da..1ac2e6eb72 100644 --- a/habitat-lab/habitat/tasks/rearrange/articulated_agent_manager.py +++ b/habitat-lab/habitat/tasks/rearrange/articulated_agent_manager.py @@ -136,9 +136,9 @@ def pre_obj_clear(self) -> None: """ for agent_data in self._all_agent_data: - agent_data.grasp_mgr.reconfigure() for grasp_manager in agent_data.grasp_mgrs: grasp_manager.reset() + agent_data.grasp_mgr.reconfigure() @add_perf_timing_func() def post_obj_load_reconfigure(self): diff --git a/habitat-lab/habitat/tasks/rearrange/multi_task/composite_sensors.py b/habitat-lab/habitat/tasks/rearrange/multi_task/composite_sensors.py index eeeaa83ac3..392317c939 100644 --- a/habitat-lab/habitat/tasks/rearrange/multi_task/composite_sensors.py +++ b/habitat-lab/habitat/tasks/rearrange/multi_task/composite_sensors.py @@ -111,11 +111,10 @@ def update_target_object(self): # Get the target object's absolute index self.abs_targ_obj_idx = self._sim.scene_obj_ids[targ_obj_idx] - self.targ_obj_idx = str(targ_obj_idx) def get_distance(self, task, distance): return task.measurements.measures[distance.cls_uuid].get_metric()[ - self.targ_obj_idx + str(self._cur_rearrange_stage) ] def update_metric(self, *args, episode, task, observations, **kwargs): @@ -164,6 +163,9 @@ def update_metric(self, *args, episode, task, observations, **kwargs): if place_success and not is_holding_obj: self._metric += self._config.single_rearrange_reward self._cur_rearrange_stage += 1 + self._cur_rearrange_stage = ( + self._cur_rearrange_stage % self.num_targets + ) if self._cur_rearrange_stage < self.num_targets: self.update_target_object() diff --git a/habitat-lab/habitat/tasks/rearrange/multi_task/pddl_domain.py b/habitat-lab/habitat/tasks/rearrange/multi_task/pddl_domain.py index ca4ba0f074..5ad1cd306d 100644 --- a/habitat-lab/habitat/tasks/rearrange/multi_task/pddl_domain.py +++ b/habitat-lab/habitat/tasks/rearrange/multi_task/pddl_domain.py @@ -62,6 +62,7 @@ def __init__( self, domain_file_path: str, cur_task_config: Optional["DictConfig"] = None, + read_config: bool = True, ): """ :param domain_file_path: Either an absolute path or a path relative to `habitat/task/rearrange/multi_task/domain_configs/`. @@ -73,6 +74,19 @@ def __init__( self._config = cur_task_config self._orig_actions: Dict[str, PddlAction] = {} + if read_config: + # Setup config properties + self._obj_succ_thresh = self._config.obj_succ_thresh + self._art_succ_thresh = self._config.art_succ_thresh + self._robot_at_thresh = self._config.robot_at_thresh + self._num_spawn_attempts = self._config.num_spawn_attempts + self._physics_stability_steps = ( + self._config.physics_stability_steps + ) + self._recep_place_shrink_factor = ( + self._config.recep_place_shrink_factor + ) + if not osp.isabs(domain_file_path): parent_dir = osp.dirname(__file__) domain_file_path = osp.join( @@ -186,6 +200,7 @@ def fetch_entity(s): for k, v in robot_states.items(): use_k = all_entities[k] + # Sub in any referred entities. v = {sub_k: fetch_entity(sub_v) for sub_k, sub_v in v.items()} use_robot_states[use_k] = PddlRobotState(**v) @@ -359,9 +374,9 @@ def bind_to_instance( dataset=dataset, env=env, episode=episode, - obj_thresh=self._config.obj_succ_thresh, - art_thresh=self._config.art_succ_thresh, - robot_at_thresh=self._config.robot_at_thresh, + obj_thresh=self._obj_succ_thresh, + art_thresh=self._art_succ_thresh, + robot_at_thresh=self._robot_at_thresh, expr_types=self.expr_types, obj_ids=sim.handle_to_object_id, target_ids={ @@ -376,10 +391,10 @@ def bind_to_instance( }, all_entities=self.all_entities, predicates=self.predicates, - num_spawn_attempts=self._config.num_spawn_attempts, - physics_stability_steps=self._config.physics_stability_steps, + num_spawn_attempts=self._num_spawn_attempts, + physics_stability_steps=self._physics_stability_steps, receptacles=sim.receptacles, - recep_place_shrink_factor=self._config.recep_place_shrink_factor, + recep_place_shrink_factor=self._recep_place_shrink_factor, ) # Ensure that all objects are accounted for. for entity in self.all_entities.values(): @@ -392,9 +407,7 @@ def bind_actions(self) -> None: """ for k, ac in self._orig_actions.items(): precond_quant = ac.precond.quantifier - new_preconds, assigns = self.expand_quantifiers( - ac.precond.clone(), ac.name - ) + new_preconds, assigns = self.expand_quantifiers(ac.precond.clone()) new_ac = ac.set_precond(new_preconds) if precond_quant == LogicalQuantifierType.EXISTS: @@ -572,7 +585,7 @@ def all_entities(self) -> Dict[str, PddlEntity]: return {**self._constants, **self._added_entities} def expand_quantifiers( - self, expr: LogicalExpr, tmp=None + self, expr: LogicalExpr ) -> Tuple[LogicalExpr, List[Dict[PddlEntity, PddlEntity]]]: """ Expand out a logical expression that could involve a quantifier into @@ -598,7 +611,7 @@ def expand_quantifiers( elif expr.quantifier is None: return expr, [] else: - raise ValueError(f"Unrecongized {expr.quantifier}") + raise ValueError(f"Unrecognized {expr.quantifier}") t_start = time.time() assigns: List[List[PddlEntity]] = [[]] @@ -640,10 +653,11 @@ def __init__( domain_file_path: str, problem_file_path: str, cur_task_config: Optional["DictConfig"] = None, + read_config: bool = True, ): self._objects = {} - super().__init__(domain_file_path, cur_task_config) + super().__init__(domain_file_path, cur_task_config, read_config) with open(get_full_habitat_config_path(problem_file_path), "r") as f: problem_def = yaml.safe_load(f) self._objects = { diff --git a/habitat-lab/habitat/tasks/rearrange/multi_task/pddl_logical_expr.py b/habitat-lab/habitat/tasks/rearrange/multi_task/pddl_logical_expr.py index 47a52b7dce..7c806495b0 100644 --- a/habitat-lab/habitat/tasks/rearrange/multi_task/pddl_logical_expr.py +++ b/habitat-lab/habitat/tasks/rearrange/multi_task/pddl_logical_expr.py @@ -126,7 +126,7 @@ def sub_in(self, sub_dict: Dict[PddlEntity, PddlEntity]) -> "LogicalExpr": def sub_in_clone(self, sub_dict: Dict[PddlEntity, PddlEntity]): return LogicalExpr( self._expr_type, - [e.sub_in(sub_dict) for e in self._sub_exprs], + [e.sub_in_clone(sub_dict) for e in self._sub_exprs], self._inputs, self._quantifier, ) diff --git a/habitat-lab/habitat/tasks/rearrange/multi_task/pddl_sim_state.py b/habitat-lab/habitat/tasks/rearrange/multi_task/pddl_sim_state.py index 829d8e7a0e..8305feb543 100644 --- a/habitat-lab/habitat/tasks/rearrange/multi_task/pddl_sim_state.py +++ b/habitat-lab/habitat/tasks/rearrange/multi_task/pddl_sim_state.py @@ -18,11 +18,7 @@ PddlSimInfo, SimulatorObjectType, ) -from habitat.tasks.rearrange.utils import ( - get_angle_to_pos, - get_robot_spawns, - rearrange_logger, -) +from habitat.tasks.rearrange.utils import get_robot_spawns, rearrange_logger CAB_TYPE = "cab_type" FRIDGE_TYPE = "fridge_type" @@ -60,20 +56,40 @@ class PddlRobotState: :property place_at_pos_dist: If -1.0, this will place the robot as close as possible to the entity. Otherwise, it will place the robot within X - meters of the entity. + meters of the entity. If unset, sets to task default. :property base_angle_noise: How much noise to add to the robot base angle - when setting the robot base position. + when setting the robot base position. If not set, sets to task default. :property place_at_angle_thresh: The required maximum angle to the target entity in the robot's local frame. Specified in radains. If not specified, no angle is considered. + :property physics_stability_steps: Number of physics checks for placing the + robot. If not set, sets to task default. """ holding: Optional[PddlEntity] = None should_drop: bool = False pos: Optional[Any] = None - place_at_pos_dist: float = -1.0 + place_at_pos_dist: Optional[float] = None place_at_angle_thresh: Optional[float] = None - base_angle_noise: float = 0.0 + base_angle_noise: Optional[float] = None + physics_stability_steps: Optional[int] = None + + def get_place_at_pos_dist(self, sim_info) -> float: + if self.place_at_pos_dist is None: + return sim_info.robot_at_thresh + else: + return self.place_at_pos_dist + + def get_base_angle_noise(self, sim_info) -> float: + if self.base_angle_noise is None: + return 0.0 + return self.base_angle_noise + + def get_physics_stability_steps(self, sim_info) -> Optional[int]: + if self.physics_stability_steps is None: + return sim_info.physics_stability_steps + else: + return self.physics_stability_steps def sub_in( self, sub_dict: Dict[PddlEntity, PddlEntity] @@ -82,6 +98,14 @@ def sub_in( self.pos = sub_dict.get(self.pos, self.pos) return self + def sub_in_clone( + self, sub_dict: Dict[PddlEntity, PddlEntity] + ) -> "PddlRobotState": + other = replace(self) + other.holding = sub_dict.get(self.holding, self.holding) + other.pos = sub_dict.get(self.pos, self.pos) + return other + def clone(self) -> "PddlRobotState": """ Returns a shallow copy @@ -128,13 +152,8 @@ def is_true(self, sim_info: PddlSimInfo, robot_entity: PddlEntity) -> bool: # Get the angle angle = np.arccos(np.dot(pos, pos_robot)) - if self.place_at_pos_dist == -1.0: - use_thresh = sim_info.robot_at_thresh - else: - use_thresh = self.place_at_pos_dist - # Check the distance threshold. - if dist > use_thresh: + if dist > self.get_place_at_pos_dist(sim_info): return False # Check for the angle threshold @@ -157,53 +176,43 @@ def set_state( sim_info.search_for_entity(robot_entity), ) sim = sim_info.sim - grasp_mgr = sim.get_agent_data(robot_id).grasp_mgr + agent_data = sim.get_agent_data(robot_id) # Set the snapped object information - if self.should_drop and grasp_mgr.is_grasped: - grasp_mgr.desnap(True) + if self.should_drop and agent_data.grasp_mgr.is_grasped: + agent_data.grasp_mgr.desnap(True) elif self.holding is not None: # Swap objects to the desired object. obj_idx = cast(int, sim_info.search_for_entity(self.holding)) - grasp_mgr.desnap(True) + agent_data.grasp_mgr.desnap(True) sim.internal_step(-1) - grasp_mgr.snap_to_obj(sim.scene_obj_ids[obj_idx]) + agent_data.grasp_mgr.snap_to_obj(sim.scene_obj_ids[obj_idx]) sim.internal_step(-1) # Set the robot starting position if isinstance(self.pos, PddlEntity): targ_pos = sim_info.get_entity_pos(self.pos) - agent = sim.get_agent_data(robot_id).articulated_agent - - if self.place_at_pos_dist == -1.0: - # Place as close to the object as possible. - if not sim_info.sim.is_point_within_bounds(targ_pos): - rearrange_logger.error( - f"Object {self.pos} is out of bounds but trying to set robot position" - ) - - agent_pos = sim_info.sim.safe_snap_point(targ_pos) - agent.base_pos = agent_pos - agent.base_rot = get_angle_to_pos( - np.array(targ_pos - agent_pos) - ) - else: - # Place some distance away from the object. - start_pos, start_rot, was_fail = get_robot_spawns( - target_position=targ_pos, - rotation_perturbation_noise=self.base_angle_noise, - distance_threshold=self.place_at_pos_dist, - sim=sim, - num_spawn_attempts=sim_info.num_spawn_attempts, - physics_stability_steps=sim_info.physics_stability_steps, - ) - sim.articulated_agent.base_pos = start_pos - sim.articulated_agent.base_rot = start_rot - if was_fail: - rearrange_logger.error("Failed to place the robot.") + + # Place some distance away from the object. + start_pos, start_rot, was_fail = get_robot_spawns( + target_position=targ_pos, + rotation_perturbation_noise=self.get_base_angle_noise( + sim_info + ), + distance_threshold=self.get_place_at_pos_dist(sim_info), + sim=sim, + num_spawn_attempts=sim_info.num_spawn_attempts, + physics_stability_steps=self.get_physics_stability_steps( + sim_info + ), + agent=agent_data.articulated_agent, + ) + agent_data.articulated_agent.base_pos = start_pos + agent_data.articulated_agent.base_rot = start_rot + if was_fail: + rearrange_logger.error("Failed to place the robot.") # We teleported the agent. We also need to teleport the object the agent was holding. - grasp_mgr = sim.get_agent_data(robot_id).grasp_mgr - grasp_mgr.update_object_to_grasp() + agent_data.grasp_mgr.update_object_to_grasp() elif self.pos is not None: raise ValueError(f"Unrecongized set position {self.pos}") @@ -254,7 +263,7 @@ def sub_in_clone( for k, v in self._obj_states.items() }, { - sub_dict.get(k, k): robot_state.sub_in(sub_dict) + sub_dict.get(k, k): robot_state.sub_in_clone(sub_dict) for k, robot_state in self._robot_states.items() }, ) diff --git a/habitat-lab/habitat/tasks/rearrange/multi_task/rearrange_pddl.py b/habitat-lab/habitat/tasks/rearrange/multi_task/rearrange_pddl.py index 417b846504..84fb441258 100644 --- a/habitat-lab/habitat/tasks/rearrange/multi_task/rearrange_pddl.py +++ b/habitat-lab/habitat/tasks/rearrange/multi_task/rearrange_pddl.py @@ -163,7 +163,7 @@ def get_entity_pos(self, entity: PddlEntity) -> np.ndarray: entity, SimulatorObjectType.ROBOT_ENTITY.value ): robot_id = self.robot_ids[ename] - return self.sim.get_agent_data(robot_id).robot.base_pos + return self.sim.get_agent_data(robot_id).articulated_agent.base_pos if self.check_type_matches( entity, SimulatorObjectType.ARTICULATED_RECEPTACLE_ENTITY.value ): diff --git a/habitat-lab/habitat/tasks/rearrange/rearrange_grasp_manager.py b/habitat-lab/habitat/tasks/rearrange/rearrange_grasp_manager.py index 4ac58df83d..c4ce59fa68 100644 --- a/habitat-lab/habitat/tasks/rearrange/rearrange_grasp_manager.py +++ b/habitat-lab/habitat/tasks/rearrange/rearrange_grasp_manager.py @@ -50,9 +50,7 @@ def __init__( self._kinematic_mode = self._sim.habitat_config.kinematic_mode def reconfigure(self) -> None: - """Removes any existing constraints managed by this structure. - Called from _sim.reconfigure(). - """ + """Removes any existing constraints managed by this structure.""" self._snap_constraints.clear() def reset(self) -> None: diff --git a/habitat-lab/habitat/tasks/rearrange/rearrange_sensors.py b/habitat-lab/habitat/tasks/rearrange/rearrange_sensors.py index ef9d3729b9..a5243fb987 100644 --- a/habitat-lab/habitat/tasks/rearrange/rearrange_sensors.py +++ b/habitat-lab/habitat/tasks/rearrange/rearrange_sensors.py @@ -5,6 +5,8 @@ # LICENSE file in the root directory of this source tree. +from collections import defaultdict, deque + import numpy as np from gym import spaces @@ -474,7 +476,7 @@ def update_metric(self, *args, episode, **kwargs): scene_pos = self._sim.get_scene_pos() target_pos = scene_pos[idxs] distances = np.linalg.norm(target_pos - goal_pos, ord=2, axis=-1) - self._metric = {str(idx): dist for idx, dist in zip(idxs, distances)} + self._metric = {str(idx): dist for idx, dist in enumerate(distances)} @registry.register_measure @@ -578,11 +580,11 @@ def update_metric(self, *args, observations, **kwargs): .translation ) - idxs, goals = self._sim.get_targets() + goals = self._sim.get_targets()[1] distances = np.linalg.norm(goals - ee_pos, ord=2, axis=-1) - self._metric = {str(idx): dist for idx, dist in zip(idxs, distances)} + self._metric = {str(idx): dist for idx, dist in enumerate(distances)} @registry.register_measure @@ -618,7 +620,7 @@ def update_metric(self, *args, episode, **kwargs): distances = np.linalg.norm(target_pos - ee_pos, ord=2, axis=-1) - self._metric = {str(idx): dist for idx, dist in zip(idxs, distances)} + self._metric = {str(idx): dist for idx, dist in enumerate(distances)} @registry.register_measure @@ -1036,10 +1038,19 @@ def _get_uuid(*args, **kwargs): def __init__(self, sim, config, *args, **kwargs): self._sim = sim self._sim.enable_perf_logging() + self._disable_logging = config.disable_logging super().__init__() def reset_metric(self, *args, **kwargs): + self._metric_queue = defaultdict(deque) self._metric = {} def update_metric(self, *args, task, **kwargs): - self._metric = self._sim.get_runtime_perf_stats() + for k, v in self._sim.get_runtime_perf_stats().items(): + self._metric_queue[k].append(v) + if self._disable_logging: + self._metric = {} + else: + self._metric = { + k: np.mean(v) for k, v in self._metric_queue.items() + } diff --git a/habitat-lab/habitat/tasks/rearrange/rearrange_sim.py b/habitat-lab/habitat/tasks/rearrange/rearrange_sim.py index efca12822a..f3becf8ff2 100644 --- a/habitat-lab/habitat/tasks/rearrange/rearrange_sim.py +++ b/habitat-lab/habitat/tasks/rearrange/rearrange_sim.py @@ -37,6 +37,7 @@ ) from habitat.sims.habitat_simulator.habitat_simulator import HabitatSim from habitat.tasks.rearrange.articulated_agent_manager import ( + ArticulatedAgentData, ArticulatedAgentManager, ) from habitat.tasks.rearrange.marker_info import MarkerInfo @@ -113,6 +114,7 @@ def __init__(self, config: "DictConfig"): self.agents_mgr = ArticulatedAgentManager(self.habitat_config, self) + # Setup config options. self._debug_render_articulated_agent = ( self.habitat_config.debug_render_articulated_agent ) @@ -128,10 +130,13 @@ def __init__(self, config: "DictConfig"): self.habitat_config.update_articulated_agent ) self._step_physics = self.habitat_config.step_physics + self._auto_sleep = self.habitat_config.auto_sleep + self._load_objs = self.habitat_config.load_objs self._additional_object_paths = ( self.habitat_config.additional_object_paths ) self._kinematic_mode = self.habitat_config.kinematic_mode + self._extra_runtime_perf_stats: Dict[str, float] = defaultdict(float) self._perf_logging_enabled = False self.cur_runtime_perf_scope: List[str] = [] @@ -271,26 +276,27 @@ def reset(self): def reconfigure(self, config: "DictConfig", ep_info: RearrangeEpisode): self._handle_to_goal_name = ep_info.info["object_labels"] - with read_write(config): - config["scene"] = ep_info.scene_id - self.ep_info = ep_info new_scene = self.prev_scene_id != ep_info.scene_id if new_scene: self._prev_obj_names = None - # Only remove and re-add objects if we have a new set of objects. obj_names = [x[0] for x in ep_info.rigid_objs] + # Only remove and re-add objects if we have a new set of objects. should_add_objects = self._prev_obj_names != obj_names self._prev_obj_names = obj_names self.agents_mgr.pre_obj_clear() - self._clear_objects(should_add_objects) + self._clear_objects(should_add_objects, new_scene) - t_start = time.time() - super().reconfigure(config, should_close_on_new_scene=False) - self.add_perf_timing("super_reconfigure", t_start) - self._try_acquire_context() + is_hard_reset = new_scene or should_add_objects + + if is_hard_reset: + with read_write(config): + config["scene"] = ep_info.scene_id + t_start = time.time() + super().reconfigure(config, should_close_on_new_scene=False) + self.add_perf_timing("super_reconfigure", t_start) if new_scene: self.agents_mgr.on_new_scene() @@ -310,14 +316,14 @@ def reconfigure(self, config: "DictConfig", ep_info: RearrangeEpisode): self.agents_mgr.post_obj_load_reconfigure() # add episode clutter objects additional to base scene objects - if self.habitat_config.load_objs: - self._add_objs(ep_info, should_add_objects) + if self._load_objs: + self._add_objs(ep_info, should_add_objects, new_scene) self._setup_targets(ep_info) self._add_markers(ep_info) # auto-sleep rigid objects as optimization - if self.habitat_config.auto_sleep: + if self._auto_sleep: self._sleep_all_objects() rom = self.get_rigid_object_manager() @@ -369,7 +375,7 @@ def _setup_semantic_ids(self): obj.object_id + self.habitat_config.object_ids_start ) - def get_agent_data(self, agent_idx: Optional[int]): + def get_agent_data(self, agent_idx: Optional[int]) -> ArticulatedAgentData: if agent_idx is None: return self.agents_mgr[0] else: @@ -449,7 +455,9 @@ def largest_island_idx(self) -> int: return self._largest_island_idx @add_perf_timing_func() - def _clear_objects(self, should_add_objects: bool) -> None: + def _clear_objects( + self, should_add_objects: bool, new_scene: bool + ) -> None: rom = self.get_rigid_object_manager() # Clear all the rigid objects. @@ -472,9 +480,10 @@ def _clear_objects(self, should_add_objects: bool) -> None: rom.remove_object_by_id(viz_obj.object_id) self._viz_objs = {} - # Do not remove the articulated objects from the scene, these are - # managed by the underlying sim. - self.art_objs = [] + if new_scene: + # Do not remove the articulated objects from the scene, these are + # managed by the underlying sim. + self.art_objs = [] @add_perf_timing_func() def _set_ao_states_from_ep(self, ep_info: RearrangeEpisode) -> None: @@ -529,7 +538,10 @@ def safe_snap_point(self, pos: np.ndarray) -> np.ndarray: @add_perf_timing_func() def _add_objs( - self, ep_info: RearrangeEpisode, should_add_objects: bool + self, + ep_info: RearrangeEpisode, + should_add_objects: bool, + new_scene: bool, ) -> None: # Load clutter objects: rom = self.get_rigid_object_manager() @@ -581,23 +593,18 @@ def _add_objs( obj_counts[obj_handle] += 1 - self._receptacles = self._create_recep_info( - ep_info.scene_id, list(self._handle_to_object_id.keys()) - ) + if new_scene: + self._receptacles = self._create_recep_info( + ep_info.scene_id, list(self._handle_to_object_id.keys()) + ) - ao_mgr = self.get_articulated_object_manager() - articulated_agent_art_handles = [ - articulated_agent.sim_obj.handle - for articulated_agent in self.agents_mgr.articulated_agents_iter - ] - for aoi_handle in ao_mgr.get_object_handles(): - ao = ao_mgr.get_object_by_handle(aoi_handle) - if ( - self._kinematic_mode - and ao.handle not in articulated_agent_art_handles - ): - ao.motion_type = habitat_sim.physics.MotionType.KINEMATIC - self.art_objs.append(ao) + ao_mgr = self.get_articulated_object_manager() + # Make all articulated objects (including the robots) kinematic + for aoi_handle in ao_mgr.get_object_handles(): + ao = ao_mgr.get_object_by_handle(aoi_handle) + if self._kinematic_mode: + ao.motion_type = habitat_sim.physics.MotionType.KINEMATIC + self.art_objs.append(ao) def _create_recep_info( self, scene_id: str, ignore_handles: List[str] diff --git a/habitat-lab/habitat/tasks/rearrange/rearrange_task.py b/habitat-lab/habitat/tasks/rearrange/rearrange_task.py index bbaabf0d65..7233f581f9 100644 --- a/habitat-lab/habitat/tasks/rearrange/rearrange_task.py +++ b/habitat-lab/habitat/tasks/rearrange/rearrange_task.py @@ -83,6 +83,19 @@ def __init__( self._cur_episode_step = 0 self._should_place_articulated_agent = should_place_articulated_agent + # Get config options + self._force_regenerate = self._config.force_regenerate + self._should_save_to_cache = self._config.should_save_to_cache + self._obj_succ_thresh = self._config.obj_succ_thresh + self._enable_safe_drop = self._config.enable_safe_drop + self._constraint_violation_ends_episode = ( + self._config.constraint_violation_ends_episode + ) + self._constraint_violation_drops_object = ( + self._config.constraint_violation_drops_object + ) + self._count_obj_collisions = self._config.count_obj_collisions + data_path = dataset.config.data_path.format(split=dataset.config.split) fname = data_path.split("/")[-1].split(".")[0] cache_path = osp.join( @@ -134,7 +147,7 @@ def _get_cached_articulated_agent_start(self, agent_idx: int = 0): if ( self._articulated_agent_pos_start is None or start_ident not in self._articulated_agent_pos_start - or self._config.force_regenerate + or self._force_regenerate ): return None else: @@ -146,7 +159,7 @@ def _get_ep_init_ident(self, agent_idx): def _cache_articulated_agent_start(self, cache_data, agent_idx: int = 0): if ( self._articulated_agent_pos_start is not None - and self._config.should_save_to_cache + and self._should_save_to_cache ): start_ident = self._get_ep_init_ident(agent_idx) self._articulated_agent_pos_start[start_ident] = cache_data @@ -239,12 +252,12 @@ def _is_violating_safe_drop(self, action_args): self._sim.grasp_mgr.is_grasped and action_args.get("grip_action", None) is not None and action_args["grip_action"] < 0 - and min_dist < self._config.obj_succ_thresh + and min_dist < self._obj_succ_thresh ) def step(self, action: Dict[str, Any], episode: Episode): action_args = action["action_args"] - if self._config.enable_safe_drop and self._is_violating_safe_drop( + if self._enable_safe_drop and self._is_violating_safe_drop( action_args ): action_args["grip_action"] = None @@ -255,7 +268,7 @@ def step(self, action: Dict[str, Any], episode: Episode): for grasp_mgr in self._sim.agents_mgr.grasp_iter: if ( grasp_mgr.is_violating_hold_constraint() - and self._config.constraint_violation_drops_object + and self._constraint_violation_drops_object ): grasp_mgr.desnap(True) @@ -276,7 +289,7 @@ def _check_episode_is_active( for grasp_mgr in self._sim.agents_mgr.grasp_iter: if ( grasp_mgr.is_violating_hold_constraint() - and self._config.constraint_violation_ends_episode + and self._constraint_violation_ends_episode ): done = True break @@ -329,7 +342,7 @@ def get_max_force(contact_points, check_id): def get_cur_collision_info(self, agent_idx) -> CollisionDetails: _, coll_details = rearrange_collision( - self._sim, self._config.count_obj_collisions, agent_idx=agent_idx + self._sim, self._count_obj_collisions, agent_idx=agent_idx ) return coll_details diff --git a/habitat-lab/habitat/tasks/rearrange/sub_tasks/nav_to_obj_task.py b/habitat-lab/habitat/tasks/rearrange/sub_tasks/nav_to_obj_task.py index 64b881b7f9..2136b00cb7 100644 --- a/habitat-lab/habitat/tasks/rearrange/sub_tasks/nav_to_obj_task.py +++ b/habitat-lab/habitat/tasks/rearrange/sub_tasks/nav_to_obj_task.py @@ -48,6 +48,12 @@ def __init__(self, *args, config, dataset=None, **kwargs): self.force_obj_to_idx = None self.force_recep_to_name = None + # Set config options + self._object_in_hand_sample_prob = ( + self._config.object_in_hand_sample_prob + ) + self._min_start_distance = self._config.min_start_distance + self._nav_to_info = None @property @@ -75,7 +81,7 @@ def _generate_nav_start_goal(self, episode, force_idx=None) -> NavToInfo: # Only change the scene if this skill is not running as a sub-task if ( force_idx is None - and random.random() < self._config.object_in_hand_sample_prob + and random.random() < self._object_in_hand_sample_prob ): start_hold_obj_idx = self._generate_snap_to_obj() @@ -94,7 +100,7 @@ def _generate_nav_start_goal(self, episode, force_idx=None) -> NavToInfo: def filter_func(start_pos, _): return ( np.linalg.norm(start_pos - nav_to_pos) - > self._config.min_start_distance + > self._min_start_distance ) ( diff --git a/habitat-lab/habitat/tasks/rearrange/sub_tasks/pick_sensors.py b/habitat-lab/habitat/tasks/rearrange/sub_tasks/pick_sensors.py index 175f1f127c..b04943e893 100644 --- a/habitat-lab/habitat/tasks/rearrange/sub_tasks/pick_sensors.py +++ b/habitat-lab/habitat/tasks/rearrange/sub_tasks/pick_sensors.py @@ -94,7 +94,7 @@ def update_metric(self, *args, episode, task, observations, **kwargs): if cur_picked: dist_to_goal = ee_to_rest_distance else: - dist_to_goal = ee_to_object_distance[str(task.abs_targ_idx)] + dist_to_goal = ee_to_object_distance[str(task.targ_idx)] abs_targ_obj_idx = self._sim.scene_obj_ids[task.abs_targ_idx] diff --git a/habitat-lab/habitat/tasks/rearrange/sub_tasks/pick_task.py b/habitat-lab/habitat/tasks/rearrange/sub_tasks/pick_task.py index 4fceaff8b3..83a8ae9ba0 100644 --- a/habitat-lab/habitat/tasks/rearrange/sub_tasks/pick_task.py +++ b/habitat-lab/habitat/tasks/rearrange/sub_tasks/pick_task.py @@ -32,6 +32,10 @@ def __init__(self, *args, config, dataset=None, **kwargs): self.prev_colls = None self.force_set_idx = None + self._base_angle_noise = self._config.base_angle_noise + self._spawn_max_dist_to_obj = self._config.spawn_max_dist_to_obj + self._num_spawn_attempts = self._config.num_spawn_attempts + self._physics_stability_steps = self._config.physics_stability_steps def set_args(self, obj, **kwargs): self.force_set_idx = obj @@ -56,11 +60,11 @@ def _gen_start_pos(self, sim, episode, sel_idx): start_pos, angle_to_obj, was_fail = get_robot_spawns( targ_pos, - self._config.base_angle_noise, - self._config.spawn_max_dist_to_obj, + self._base_angle_noise, + self._spawn_max_dist_to_obj, sim, - self._config.num_spawn_attempts, - self._config.physics_stability_steps, + self._num_spawn_attempts, + self._physics_stability_steps, ) if was_fail: diff --git a/habitat-lab/habitat/tasks/rearrange/sub_tasks/place_sensors.py b/habitat-lab/habitat/tasks/rearrange/sub_tasks/place_sensors.py index d19712c9b0..1335bb4693 100644 --- a/habitat-lab/habitat/tasks/rearrange/sub_tasks/place_sensors.py +++ b/habitat-lab/habitat/tasks/rearrange/sub_tasks/place_sensors.py @@ -76,16 +76,16 @@ def update_metric(self, *args, episode, task, observations, **kwargs): ].get_metric() obj_at_goal = task.measurements.measures[ ObjAtGoal.cls_uuid - ].get_metric()[str(task.abs_targ_idx)] + ].get_metric()[str(task.targ_idx)] snapped_id = self._sim.grasp_mgr.snap_idx cur_picked = snapped_id is not None if (not obj_at_goal) or cur_picked: if self._config.use_ee_dist: - dist_to_goal = ee_to_goal_dist[str(task.abs_targ_idx)] + dist_to_goal = ee_to_goal_dist[str(task.targ_idx)] else: - dist_to_goal = obj_to_goal_dist[str(task.abs_targ_idx)] + dist_to_goal = obj_to_goal_dist[str(task.targ_idx)] min_dist = self._config.min_dist_to_goal else: dist_to_goal = ee_to_rest_distance @@ -158,7 +158,7 @@ def reset_metric(self, *args, episode, task, observations, **kwargs): def update_metric(self, *args, episode, task, observations, **kwargs): is_obj_at_goal = task.measurements.measures[ ObjAtGoal.cls_uuid - ].get_metric()[str(task.abs_targ_idx)] + ].get_metric()[str(task.targ_idx)] is_holding = self._sim.grasp_mgr.is_grasped ee_to_rest_distance = task.measurements.measures[ diff --git a/habitat-lab/habitat/tasks/rearrange/utils.py b/habitat-lab/habitat/tasks/rearrange/utils.py index b23522f114..dc71c7fb14 100644 --- a/habitat-lab/habitat/tasks/rearrange/utils.py +++ b/habitat-lab/habitat/tasks/rearrange/utils.py @@ -409,31 +409,47 @@ def get_robot_spawns( :param distance_threshold: The maximum distance from the target. :param sim: The simulator instance. :param num_spawn_attempts: The number of sample attempts for the distance threshold. - :param physics_stability_steps: The number of steps to perform for physics stability check. + :param physics_stability_steps: The number of steps to perform for physics stability check. If specified as 0, then it will return the result without doing any checks. :param agent: The agent to set the position for. If not specified, defaults to the simulator default agent. :return: The robot's start position, rotation, and whether the placement was a failure (True for failure, False for success). """ - - state = sim.capture_state() if agent is None: agent = sim.articulated_agent start_rotation = agent.base_rot start_position = agent.base_pos + state = sim.capture_state() + # Try to place the robot. for _ in range(num_spawn_attempts): sim.set_state(state) - start_position = sim.pathfinder.get_random_navigable_point_near( - target_position, distance_threshold - ) - island_idx = sim.pathfinder.get_island(start_position) - if island_idx != sim.largest_island_idx: - continue - relative_target = target_position - start_position + if distance_threshold == -1.0: + # Place as close to the object as possible. + if not sim.is_point_within_bounds(target_position): + rearrange_logger.error( + f"Object {target_position} is out of bounds but trying to set robot position" + ) + start_position = sim.safe_snap_point(target_position) + else: + # Place within `distance_threshold` of the object. + start_position = sim.pathfinder.get_random_navigable_point_near( + target_position, distance_threshold + ) + # Face the robot towards the object. + relative_target = target_position - start_position angle_to_object = get_angle_to_pos(relative_target) + rotation_noise = np.random.normal(0.0, rotation_perturbation_noise) + start_rotation = angle_to_object + rotation_noise + + if physics_stability_steps == 0: + return start_position, start_rotation, False + + island_idx = sim.pathfinder.get_island(start_position) + if island_idx != sim.largest_island_idx: + continue target_distance = np.linalg.norm( (start_position - target_position)[[0, 2]] @@ -441,10 +457,6 @@ def get_robot_spawns( is_navigable = sim.pathfinder.is_navigable(start_position) - # Face the robot towards the object. - rotation_noise = np.random.normal(0.0, rotation_perturbation_noise) - start_rotation = angle_to_object + rotation_noise - if target_distance > distance_threshold or not is_navigable: continue diff --git a/habitat-lab/habitat/utils/env_utils.py b/habitat-lab/habitat/utils/env_utils.py index 9dcaefaf01..9f27259fac 100644 --- a/habitat-lab/habitat/utils/env_utils.py +++ b/habitat-lab/habitat/utils/env_utils.py @@ -14,7 +14,9 @@ def make_env_fn( - config: "DictConfig", env_class: Union[Type[Env], Type[RLEnv]] + config: "DictConfig", + env_class: Union[Type[Env], Type[RLEnv]], + dataset=None, ) -> Union[Env, RLEnv]: r"""Creates an env of type env_class with specified config and rank. This is to be passed in as an argument when creating VectorEnv. @@ -23,13 +25,15 @@ def make_env_fn( config: root exp config that has core env config node as well as env-specific config node. env_class: class type of the env to be created. + dataset: If specified, load the environment using this dataset. Returns: env object created according to specification. """ if "habitat" in config: config = config.habitat - dataset = make_dataset(config.dataset.type, config=config.dataset) + if dataset is None: + dataset = make_dataset(config.dataset.type, config=config.dataset) env = env_class(config=config, dataset=dataset) env.seed(config.seed) return env diff --git a/test/test_baseline_trainers.py b/test/test_baseline_trainers.py index fa5ff212d1..329a172516 100644 --- a/test/test_baseline_trainers.py +++ b/test/test_baseline_trainers.py @@ -10,7 +10,6 @@ import os import random from copy import deepcopy -from glob import glob import pytest @@ -60,22 +59,9 @@ def download_data(): @pytest.mark.parametrize( "test_cfg_path,gpu2gpu,observation_transforms_overrides,mode", list( - itertools.product( - glob("habitat-baselines/habitat_baselines/config/test/*"), - [False], - [ - [], - [ - "+habitat_baselines/rl/policy/obs_transforms=[center_cropper_base, resize_shortest_edge_base]", - ], - ], - ["train", "eval"], - ) - ) - + list( itertools.product( ["test/config/habitat_baselines/ppo_pointnav_test.yaml"], - [True], + [True, False], [ [], [ diff --git a/test/test_baseline_training.py b/test/test_baseline_training.py index 2db78870f3..52b665c579 100644 --- a/test/test_baseline_training.py +++ b/test/test_baseline_training.py @@ -208,6 +208,7 @@ def test_hrl(config_path, policy_type, skill_type, mode): # We don't have skill checkpoints to load right now. return if policy_type == "hl_fixed" and mode == "train": + # Cannot train with a fixed policy return if skill_type == "oracle_skills" and "oracle" not in config_path: return diff --git a/test/test_ddppo_reduce.py b/test/test_ddppo_reduce.py index d15971591b..17d69db050 100644 --- a/test/test_ddppo_reduce.py +++ b/test/test_ddppo_reduce.py @@ -80,8 +80,7 @@ def _worker_fn( 2, obs_space, action_space, - ppo_cfg.hidden_size, - num_recurrent_layers=actor_critic.net.num_recurrent_layers, + actor_critic, is_double_buffered=False, ) rollouts.to(device)