From a787e3416956e2bda1f731045275979f21b0444e Mon Sep 17 00:00:00 2001 From: ASzot Date: Fri, 23 Dec 2022 16:09:50 -0800 Subject: [PATCH 01/48] Trainable HL policy --- .../config/default_structured_configs.py | 23 ++- .../config/rearrange/rl_hl_srl_onav.yaml | 176 ++++++++++++++++++ .../config/rearrange/tp_srl.yaml | 100 ++-------- .../config/rearrange/tp_srl_oracle_nav.yaml | 129 ++++--------- .../config/tp_srl_test/tp_srl_test.yaml | 99 ---------- .../rl/hrl/hierarchical_policy.py | 89 +++++++-- .../rl/hrl/high_level_policy.py | 81 -------- .../habitat_baselines/rl/hrl/hl/__init__.py | 3 +- .../rl/hrl/hl/fixed_policy.py | 67 ++----- .../rl/hrl/hl/high_level_policy.py | 84 ++++++++- .../rl/hrl/hl/neural_policy.py | 175 +++++++++++++++++ .../rl/hrl/skills/__init__.py | 2 + .../rl/hrl/skills/nn_skill.py | 13 +- .../habitat_baselines/rl/hrl/skills/noop.py | 39 ++++ .../rl/hrl/skills/oracle_nav.py | 7 +- .../habitat_baselines/rl/hrl/skills/pick.py | 9 +- .../habitat_baselines/rl/hrl/skills/reset.py | 5 +- .../habitat_baselines/rl/hrl/skills/skill.py | 99 ++++++++-- .../habitat_baselines/rl/hrl/skills/wait.py | 5 +- .../habitat_baselines/rl/hrl/utils.py | 2 +- .../habitat_baselines/rl/ppo/policy.py | 37 +++- .../habitat_baselines/rl/ppo/ppo_trainer.py | 40 ++-- .../rl/ver/inference_worker.py | 1 + .../tasks/rearrange/multi_task/pddl_action.py | 2 +- test/test_baseline_training.py | 58 ++++++ test/test_rearrange_task.py | 62 ------ 26 files changed, 861 insertions(+), 546 deletions(-) create mode 100644 habitat-baselines/habitat_baselines/config/rearrange/rl_hl_srl_onav.yaml delete mode 100644 habitat-baselines/habitat_baselines/config/tp_srl_test/tp_srl_test.yaml delete mode 100644 habitat-baselines/habitat_baselines/rl/hrl/high_level_policy.py create mode 100644 habitat-baselines/habitat_baselines/rl/hrl/hl/neural_policy.py create mode 100644 habitat-baselines/habitat_baselines/rl/hrl/skills/noop.py diff --git a/habitat-baselines/habitat_baselines/config/default_structured_configs.py b/habitat-baselines/habitat_baselines/config/default_structured_configs.py index 46275ea0d0..56a51d6c82 100644 --- a/habitat-baselines/habitat_baselines/config/default_structured_configs.py +++ b/habitat-baselines/habitat_baselines/config/default_structured_configs.py @@ -219,10 +219,31 @@ class Eq2CubeConfig(ObsTransformConfig): ) +@dataclass +class HrlDefinedSkill(HabitatBaselinesBaseConfig): + skill_name: str = MISSING + name: str = "PointNavResNetPolicy" + action_distribution_type: str = "gaussian" + load_ckpt_file: str = "" + max_skill_steps: int = 200 + force_end_on_timeout: bool = True + force_config_file: str = "" + at_resting_threshold: float = 0.15 + apply_postconds: bool = False + obs_skill_inputs: List[str] = field(default_factory=list) + obs_skill_input_dim: int = 3 + start_zone_radius: float = 0.3 + # For the oracle navigation skill + nav_action_name: str = "base_velocity" + stop_thresh: float = 0.001 + # For the reset_arm_skill + reset_joint_state: List[float] = MISSING + + @dataclass class HierarchicalPolicy(HabitatBaselinesBaseConfig): high_level_policy: Dict[str, Any] = MISSING - defined_skills: Dict[str, Any] = field(default_factory=dict) + defined_skills: Dict[str, HrlDefinedSkill] = field(default_factory=dict) use_skills: Dict[str, str] = field(default_factory=dict) diff --git a/habitat-baselines/habitat_baselines/config/rearrange/rl_hl_srl_onav.yaml b/habitat-baselines/habitat_baselines/config/rearrange/rl_hl_srl_onav.yaml new file mode 100644 index 0000000000..3f542bbf09 --- /dev/null +++ b/habitat-baselines/habitat_baselines/config/rearrange/rl_hl_srl_onav.yaml @@ -0,0 +1,176 @@ +# @package _global_ +# Pick and place are kinematically simulated. +# Navigation is fully simulated. + +defaults: + - /benchmark/rearrange: rearrange_easy + - /habitat_baselines: habitat_baselines_rl_config_base + - /habitat_baselines/rl/policy/obs_transforms: + - add_virtual_keys_base + - /habitat/task/actions: + - oracle_nav_action + - arm_action + - base_velocity + - rearrange_stop + - _self_ + +habitat_baselines: + verbose: False + trainer_name: "ddppo" + torch_gpu_id: 0 + tensorboard_dir: "tb" + video_dir: "video_dir" + video_fps: 30 + video_render_views: + - "third_rgb_sensor" + test_episode_count: -1 + eval_ckpt_path_dir: "" + num_environments: 1 + writer_type: 'tb' + checkpoint_folder: "data/new_checkpoints" + num_updates: -1 + total_num_steps: 1.0e8 + log_interval: 10 + num_checkpoints: 20 + force_torch_single_threaded: True + eval_keys_to_include_in_name: ['reward', 'force', 'composite_success'] + load_resume_state_config: False + + eval: + use_ckpt_config: False + should_load_ckpt: False + video_option: ["disk"] + + rl: + policy: + name: "HierarchicalPolicy" + obs_transforms: + add_virtual_keys: + virtual_keys: + "goal_to_agent_gps_compass": 2 + hierarchical_policy: + high_level_policy: + name: "NeuralHighLevelPolicy" + allow_other_place: False + hidden_dim: 512 + use_rnn: True + rnn_type: 'LSTM' + backbone: resnet18 + normalize_visual_inputs: False + num_rnn_layers: 2 + policy_input_keys: + - "robot_head_depth" + defined_skills: + open_cab: + skill_name: "ArtObjSkillPolicy" + name: "PointNavResNetPolicy" + action_distribution_type: "gaussian" + load_ckpt_file: "data/models/open_cab.pth" + max_skill_steps: 200 + apply_postconds: True + + open_fridge: + skill_name: "ArtObjSkillPolicy" + load_ckpt_file: "data/models/open_fridge.pth" + max_skill_steps: 200 + apply_postconds: True + + close_cab: + skill_name: "NoopSkillPolicy" + obs_skill_inputs: ["obj_start_sensor"] + load_ckpt_file: "data/models/close_cab.pth" + max_skill_steps: 200 + + close_fridge: + skill_name: "NoopSkillPolicy" + obs_skill_inputs: ["obj_start_sensor"] + max_skill_steps: 1 + apply_postconds: True + + pick: + skill_name: "NoopSkillPolicy" + obs_skill_inputs: ["obj_start_sensor"] + max_skill_steps: 1 + apply_postconds: True + + place: + skill_name: "NoopSkillPolicy" + obs_skill_inputs: ["obj_goal_sensor"] + max_skill_steps: 1 + apply_postconds: True + + nav_to_obj: + skill_name: "OracleNavPolicy" + obs_skill_inputs: ["obj_start_sensor", "abs_obj_start_sensor", "obj_goal_sensor", "abs_obj_goal_sensor"] + max_skill_steps: 300 + + wait_skill: + skill_name: "WaitSkillPolicy" + max_skill_steps: -1 + force_end_on_timeout: False + + reset_arm_skill: + skill_name: "ResetArmSkill" + max_skill_steps: 50 + reset_joint_state: [-4.5003259e-01, -1.0799699e00, 9.9526465e-02, 9.3869519e-01, -7.8854430e-04, 1.5702540e00, 4.6168058e-03] + force_end_on_timeout: False + + use_skills: + # Uncomment if you are also using these skills + # open_cab: "open_cab" + # open_fridge: "open_fridge" + # close_cab: "open_cab" + # close_fridge: "open_fridge" + pick: "pick" + place: "place" + nav: "nav_to_obj" + nav_to_receptacle: "nav_to_obj" + wait: "wait_skill" + reset_arm: "reset_arm_skill" + + ppo: + # ppo params + clip_param: 0.2 + ppo_epoch: 2 + num_mini_batch: 2 + value_loss_coef: 0.5 + entropy_coef: 0.0001 + lr: 2.5e-4 + eps: 1e-5 + max_grad_norm: 0.2 + num_steps: 128 + use_gae: True + gamma: 0.99 + tau: 0.95 + use_linear_clip_decay: False + use_linear_lr_decay: False + reward_window_size: 50 + + use_normalized_advantage: False + + hidden_size: 512 + + # Use double buffered sampling, typically helps + # when environment time is similar or larger than + # policy inference time during rollout generation + use_double_buffered_sampler: False + + ddppo: + sync_frac: 0.6 + # The PyTorch distributed backend to use + distrib_backend: NCCL + # Visual encoder backbone + pretrained_weights: data/ddppo-models/gibson-2plus-resnet50.pth + # Initialize with pretrained weights + pretrained: False + # Initialize just the visual encoder backbone with pretrained weights + pretrained_encoder: False + # Whether the visual encoder backbone will be trained. + train_encoder: True + # Whether to reset the critic linear layer + reset_critic: False + + # Model parameters + backbone: resnet18 + rnn_type: LSTM + num_recurrent_layers: 2 diff --git a/habitat-baselines/habitat_baselines/config/rearrange/tp_srl.yaml b/habitat-baselines/habitat_baselines/config/rearrange/tp_srl.yaml index 3524335f21..3f2fe45fb5 100644 --- a/habitat-baselines/habitat_baselines/config/rearrange/tp_srl.yaml +++ b/habitat-baselines/habitat_baselines/config/rearrange/tp_srl.yaml @@ -46,51 +46,30 @@ habitat_baselines: name: "FixedHighLevelPolicy" add_arm_rest: True defined_skills: - nn_open_cab: + open_cab: skill_name: "ArtObjSkillPolicy" name: "PointNavResNetPolicy" - action_distribution_type: "gaussian" - at_resting_threshold: 0.15 - obs_skill_inputs: [] load_ckpt_file: "data/models/open_cab.pth" max_skill_steps: 200 - start_zone_radius: 0.3 - force_end_on_timeout: True - nn_open_fridge: + open_fridge: skill_name: "ArtObjSkillPolicy" - name: "PointNavResNetPolicy" - action_distribution_type: "gaussian" - at_resting_threshold: 0.15 - obs_skill_inputs: [] load_ckpt_file: "data/models/open_fridge.pth" max_skill_steps: 200 - start_zone_radius: 0.3 - force_end_on_timeout: True - nn_close_cab: + close_cab: skill_name: "ArtObjSkillPolicy" - name: "PointNavResNetPolicy" - action_distribution_type: "gaussian" - at_resting_threshold: 0.2 obs_skill_inputs: ["obj_start_sensor"] load_ckpt_file: "data/models/close_cab.pth" max_skill_steps: 200 - start_zone_radius: 0.3 - force_end_on_timeout: True - nn_close_fridge: + close_fridge: skill_name: "ArtObjSkillPolicy" - name: "PointNavResNetPolicy" - action_distribution_type: "gaussian" - at_resting_threshold: 0.2 obs_skill_inputs: ["obj_start_sensor"] load_ckpt_file: "data/models/close_fridge.pth" max_skill_steps: 200 - start_zone_radius: 0.3 - force_end_on_timeout: True - nn_pick: + pick: skill_name: "PickSkillPolicy" name: "PointNavResNetPolicy" action_distribution_type: "gaussian" @@ -98,9 +77,8 @@ habitat_baselines: obs_skill_inputs: ["obj_start_sensor"] load_ckpt_file: "data/models/pick.pth" max_skill_steps: 200 - force_end_on_timeout: True - nn_place: + place: skill_name: "PlaceSkillPolicy" name: "PointNavResNetPolicy" action_distribution_type: "gaussian" @@ -108,23 +86,18 @@ habitat_baselines: obs_skill_inputs: ["obj_goal_sensor"] load_ckpt_file: "data/models/place.pth" max_skill_steps: 200 - force_end_on_timeout: True - nn_nav: + nav_to_obj: skill_name: "NavSkillPolicy" - name: "PointNavResNetPolicy" - action_distribution_type: "gaussian" obs_skill_inputs: ["goal_to_agent_gps_compass"] obs_skill_input_dim: 2 - lin_speed_stop: 0.067 - ang_speed_stop: 0.067 load_ckpt_file: "data/models/nav.pth" max_skill_steps: 300 force_end_on_timeout: False wait_skill: skill_name: "WaitSkillPolicy" - max_skill_steps: -1.0 + max_skill_steps: -1 force_end_on_timeout: False reset_arm_skill: @@ -135,60 +108,19 @@ habitat_baselines: use_skills: # Uncomment if you are also using these skills - # open_cab: "NN_OPEN_CAB" - # open_fridge: "NN_OPEN_FRIDGE" - # close_cab: "NN_OPEN_CAB" - # close_fridge: "NN_OPEN_FRIDGE" - pick: "nn_pick" - place: "nn_place" - nav: "nn_nav" - nav_to_receptacle: "nn_nav" + # open_cab: "open_cab" + # open_fridge: "open_fridge" + # close_cab: "open_cab" + # close_fridgE: "open_fridge" + pick: "pick" + place: "place" + nav: "nav_to_obj" + nav_to_receptacle: "nav_to_obj" wait: "wait_skill" reset_arm: "reset_arm_skill" - ppo: - # ppo params - clip_param: 0.2 - ppo_epoch: 2 - num_mini_batch: 2 - value_loss_coef: 0.5 - entropy_coef: 0.0001 - lr: 2.5e-4 - eps: 1e-5 - max_grad_norm: 0.2 - num_steps: 128 - use_gae: True - gamma: 0.99 - tau: 0.95 - use_linear_clip_decay: False - use_linear_lr_decay: False - reward_window_size: 50 - - use_normalized_advantage: False - - hidden_size: 512 - - # Use double buffered sampling, typically helps - # when environment time is similar or larger than - # policy inference time during rollout generation - use_double_buffered_sampler: False - ddppo: - sync_frac: 0.6 - # The PyTorch distributed backend to use - distrib_backend: NCCL - # Visual encoder backbone - pretrained_weights: data/ddppo-models/gibson-2plus-resnet50.pth - # Initialize with pretrained weights pretrained: False - # Initialize just the visual encoder backbone with pretrained weights pretrained_encoder: False - # Whether the visual encoder backbone will be trained. train_encoder: True - # Whether to reset the critic linear layer reset_critic: False - - # Model parameters - backbone: resnet18 - rnn_type: LSTM - num_recurrent_layers: 2 diff --git a/habitat-baselines/habitat_baselines/config/rearrange/tp_srl_oracle_nav.yaml b/habitat-baselines/habitat_baselines/config/rearrange/tp_srl_oracle_nav.yaml index 42f5524234..5e8b59a944 100644 --- a/habitat-baselines/habitat_baselines/config/rearrange/tp_srl_oracle_nav.yaml +++ b/habitat-baselines/habitat_baselines/config/rearrange/tp_srl_oracle_nav.yaml @@ -6,12 +6,27 @@ defaults: - /habitat_baselines/rl/policy/obs_transforms: - add_virtual_keys_base - /habitat/task/actions: + - oracle_nav_action - arm_action - base_velocity - rearrange_stop - - oracle_nav_action - _self_ +AGENT_0_ORACLE_NAV_ACTION: + TYPE: "OracleNavAction" + TURN_VELOCITY: 1.0 + FORWARD_VELOCITY: 1.0 + TURN_THRESH: 0.1 + DIST_THRESH: 0.2 + AGENT: 0 + LIN_SPEED: 40.0 + ANG_SPEED: 20.0 + MIN_ABS_LIN_SPEED: 1.0 + MIN_ABS_ANG_SPEED: 1.0 + ALLOW_DYN_SLIDE: False + END_ON_STOP: False + ALLOW_BACK: True + habitat_baselines: verbose: False trainer_name: "ddppo" @@ -32,10 +47,11 @@ habitat_baselines: num_checkpoints: 20 force_torch_single_threaded: True eval_keys_to_include_in_name: ['reward', 'force', 'composite_success'] + load_resume_state_config: False eval: - video_option: ["disk"] use_ckpt_config: False should_load_ckpt: False + video_option: ["disk"] rl: policy: @@ -49,51 +65,30 @@ habitat_baselines: name: "FixedHighLevelPolicy" add_arm_rest: True defined_skills: - NN_OPEN_CAB: + open_cab: skill_name: "ArtObjSkillPolicy" name: "PointNavResNetPolicy" - action_distribution_type: "gaussian" - at_resting_threshold: 0.15 - obs_skill_inputs: [] load_ckpt_file: "data/models/open_cab.pth" max_skill_steps: 200 - start_zone_radius: 0.3 - force_end_on_timeout: True - NN_OPEN_FRIDGE: + open_fridge: skill_name: "ArtObjSkillPolicy" - name: "PointNavResNetPolicy" - action_distribution_type: "gaussian" - at_resting_threshold: 0.15 - obs_skill_inputs: [] load_ckpt_file: "data/models/open_fridge.pth" max_skill_steps: 200 - start_zone_radius: 0.3 - force_end_on_timeout: True - NN_CLOSE_CAB: + close_cab: skill_name: "ArtObjSkillPolicy" - name: "PointNavResNetPolicy" - action_distribution_type: "gaussian" - at_resting_threshold: 0.2 obs_skill_inputs: ["obj_start_sensor"] load_ckpt_file: "data/models/close_cab.pth" max_skill_steps: 200 - start_zone_radius: 0.3 - force_end_on_timeout: True - NN_CLOSE_FRIDGE: + close_fridge: skill_name: "ArtObjSkillPolicy" - name: "PointNavResNetPolicy" - action_distribution_type: "gaussian" - at_resting_threshold: 0.2 obs_skill_inputs: ["obj_start_sensor"] load_ckpt_file: "data/models/close_fridge.pth" max_skill_steps: 200 - start_zone_radius: 0.3 - force_end_on_timeout: True - nn_pick: + pick: skill_name: "PickSkillPolicy" name: "PointNavResNetPolicy" action_distribution_type: "gaussian" @@ -101,19 +96,8 @@ habitat_baselines: obs_skill_inputs: ["obj_start_sensor"] load_ckpt_file: "data/models/pick.pth" max_skill_steps: 200 - force_end_on_timeout: True - GT_NAV: - skill_name: "OracleNavPolicy" - obs_skill_inputs: ["obj_start_sensor", "abs_obj_start_sensor", "obj_goal_sensor", "abs_obj_goal_sensor"] - goal_sensors: ["obj_goal_sensor", "abs_obj_goal_sensor"] - NAV_ACTION_NAME: "base_velocity" - max_skill_steps: 300 - force_end_on_timeout: True - stop_angle_thresh: 0.2 - stop_dist_thresh: 1.0 - - nn_place: + place: skill_name: "PlaceSkillPolicy" name: "PointNavResNetPolicy" action_distribution_type: "gaussian" @@ -121,13 +105,17 @@ habitat_baselines: obs_skill_inputs: ["obj_goal_sensor"] load_ckpt_file: "data/models/place.pth" max_skill_steps: 200 - force_end_on_timeout: True wait_skill: skill_name: "WaitSkillPolicy" - max_skill_steps: -1.0 + max_skill_steps: -1 force_end_on_timeout: False + gt_nav: + skill_name: "OracleNavPolicy" + obs_skill_inputs: ["obj_start_sensor", "abs_obj_start_sensor", "obj_goal_sensor", "abs_obj_goal_sensor"] + max_skill_steps: 300 + reset_arm_skill: skill_name: "ResetArmSkill" max_skill_steps: 50 @@ -136,60 +124,19 @@ habitat_baselines: use_skills: # Uncomment if you are also using these skills - # open_cab: "NN_OPEN_CAB" - # open_fridge: "NN_OPEN_FRIDGE" - # close_cab: "NN_OPEN_CAB" - # close_fridge: "NN_OPEN_FRIDGE" - pick: "nn_pick" - place: "nn_place" - nav: "GT_NAV" - nav_to_receptacle: "GT_NAV" + # open_cab: "open_cab" + # open_fridge: "open_fridge" + # close_cab: "open_cab" + # close_fridge: "open_fridge" + pick: "pick" + place: "place" + nav: "gt_nav" + nav_to_receptacle: "gt_nav" wait: "wait_skill" reset_arm: "reset_arm_skill" - ppo: - # ppo params - clip_param: 0.2 - ppo_epoch: 2 - num_mini_batch: 2 - value_loss_coef: 0.5 - entropy_coef: 0.0001 - lr: 2.5e-4 - eps: 1e-5 - max_grad_norm: 0.2 - num_steps: 128 - use_gae: True - gamma: 0.99 - tau: 0.95 - use_linear_clip_decay: False - use_linear_lr_decay: False - reward_window_size: 50 - - use_normalized_advantage: False - - hidden_size: 512 - - # Use double buffered sampling, typically helps - # when environment time is similar or larger than - # policy inference time during rollout generation - use_double_buffered_sampler: False - ddppo: - sync_frac: 0.6 - # The PyTorch distributed backend to use - distrib_backend: NCCL - # Visual encoder backbone - pretrained_weights: data/ddppo-models/gibson-2plus-resnet50.pth - # Initialize with pretrained weights pretrained: False - # Initialize just the visual encoder backbone with pretrained weights pretrained_encoder: False - # Whether the visual encoder backbone will be trained. train_encoder: True - # Whether to reset the critic linear layer reset_critic: False - - # Model parameters - backbone: resnet18 - rnn_type: LSTM - num_recurrent_layers: 2 diff --git a/habitat-baselines/habitat_baselines/config/tp_srl_test/tp_srl_test.yaml b/habitat-baselines/habitat_baselines/config/tp_srl_test/tp_srl_test.yaml deleted file mode 100644 index fd413e2749..0000000000 --- a/habitat-baselines/habitat_baselines/config/tp_srl_test/tp_srl_test.yaml +++ /dev/null @@ -1,99 +0,0 @@ -# @package _global_ - -defaults: - - /benchmark/rearrange: rearrange_easy - - /habitat_baselines: habitat_baselines_rl_config_base - - /habitat_baselines/rl/policy/obs_transforms: - - add_virtual_keys_base - -habitat_baselines: - verbose: False - trainer_name: "ppo" - torch_gpu_id: 0 - tensorboard_dir: "" - video_dir: "data/test_checkpoints/ppo/pointnav/video" - test_episode_count: 2 - eval_ckpt_path_dir: "" - num_environments: 1 - checkpoint_folder: "data/test_checkpoints/ppo/pointnav/" - num_updates: 2 - log_interval: 100 - num_checkpoints: 2 - force_torch_single_threaded: True - load_resume_state_config: False - eval: - use_ckpt_config: False - should_load_ckpt: False - - rl: - policy: - name: "HierarchicalPolicy" - - obs_transforms: - add_virtual_keys: - virtual_keys: - "goal_to_agent_gps_compass": 2 - hierarchical_policy: - high_level_policy: - name: "FixedHighLevelPolicy" - add_arm_rest: True - defined_skills: - nn_pick: - skill_name: "PickSkillPolicy" - name: "PointNavResNetPolicy" - action_distribution_type: "gaussian" - at_resting_threshold: 0.15 - obs_skill_inputs: ["obj_start_sensor"] - load_ckpt_file: "" - max_skill_steps: 200 - force_end_on_timeout: True - force_config_file: "benchmark/rearrange=pick" - - nn_place: - skill_name: "PlaceSkillPolicy" - name: "PointNavResNetPolicy" - action_distribution_type: "gaussian" - at_resting_threshold: 0.15 - obs_skill_inputs: ["obj_goal_sensor"] - load_ckpt_file: "" - max_skill_steps: 200 - force_end_on_timeout: True - force_config_file: "benchmark/rearrange=place" - - nn_nav: - skill_name: "NavSkillPolicy" - name: "PointNavResNetPolicy" - action_distribution_type: "gaussian" - obs_skill_inputs: ["goal_to_agent_gps_compass"] - obs_skill_input_dim: 2 - lin_speed_stop: 0.067 - ang_speed_stop: 0.067 - load_ckpt_file: "" - max_skill_steps: 300 - force_end_on_timeout: False - force_config_file: "benchmark/rearrange=nav_to_obj" - - wait_skill: - skill_name: "WaitSkillPolicy" - max_skill_steps: -1.0 - force_end_on_timeout: False - - reset_arm_skill: - skill_name: "ResetArmSkill" - max_skill_steps: 50 - reset_joint_state: [-4.5003259e-01, -1.0799699e00, 9.9526465e-02, 9.3869519e-01, -7.8854430e-04, 1.5702540e00, 4.6168058e-03] - force_end_on_timeout: False - - use_skills: - # Uncomment if you are also using these skills - pick: "nn_pick" - place: "nn_place" - nav: "nn_nav" - nav_to_receptacle: "nn_nav" - wait: "wait_skill" - reset_arm: "reset_arm_skill" - ddppo: - pretrained: False - pretrained_encoder: False - train_encoder: True - reset_critic: False diff --git a/habitat-baselines/habitat_baselines/rl/hrl/hierarchical_policy.py b/habitat-baselines/habitat_baselines/rl/hrl/hierarchical_policy.py index e0aa4e7e77..f9217051e4 100644 --- a/habitat-baselines/habitat_baselines/rl/hrl/hierarchical_policy.py +++ b/habitat-baselines/habitat_baselines/rl/hrl/hierarchical_policy.py @@ -13,15 +13,18 @@ from habitat.tasks.rearrange.multi_task.composite_sensors import ( CompositeSuccess, ) +from habitat.tasks.rearrange.multi_task.pddl_domain import PddlProblem from habitat_baselines.common.baseline_registry import baseline_registry from habitat_baselines.common.logging import baselines_logger from habitat_baselines.rl.hrl.hl import ( # noqa: F401. FixedHighLevelPolicy, HighLevelPolicy, + NeuralHighLevelPolicy, ) from habitat_baselines.rl.hrl.skills import ( # noqa: F401. ArtObjSkillPolicy, NavSkillPolicy, + NoopSkillPolicy, OracleNavPolicy, PickSkillPolicy, PlaceSkillPolicy, @@ -30,7 +33,7 @@ WaitSkillPolicy, ) from habitat_baselines.rl.hrl.utils import find_action_range -from habitat_baselines.rl.ppo.policy import Policy +from habitat_baselines.rl.ppo.policy import Policy, PolicyAction from habitat_baselines.utils.common import get_num_actions @@ -54,6 +57,18 @@ def __init__( self._name_to_idx: Dict[str, int] = {} self._idx_to_name: Dict[int, str] = {} + task_spec_file = osp.join( + full_config.habitat.task.task_spec_base_path, + full_config.habitat.task.task_spec + ".yaml", + ) + domain_file = full_config.habitat.task.pddl_domain_def + + self._pddl_problem = PddlProblem( + domain_file, + task_spec_file, + config, + ) + for i, (skill_id, use_skill_name) in enumerate( config.hierarchical_policy.use_skills.items() ): @@ -72,6 +87,7 @@ def __init__( self._num_envs, full_config, ) + skill_policy.set_pddl_problem(self._pddl_problem) self._skills[i] = skill_policy self._name_to_idx[skill_id] = i self._idx_to_name[i] = skill_id @@ -88,12 +104,11 @@ def __init__( ) self._high_level_policy: HighLevelPolicy = high_level_cls( config.hierarchical_policy.high_level_policy, - osp.join( - full_config.habitat.task.task_spec_base_path, - full_config.habitat.task.task_spec + ".yaml", - ), + self._pddl_problem, num_envs, self._name_to_idx, + observation_space, + action_space, ) self._stop_action_idx, _ = find_action_range( action_space, "rearrange_stop" @@ -102,35 +117,41 @@ def __init__( def eval(self): pass - def get_policy_info(self, infos, dones): - policy_infos = [] - for i, info in enumerate(infos): + def extract_policy_info( + self, action_data, infos, dones + ) -> List[Dict[str, float]]: + ret_policy_infos = [] + for i, (info, policy_info) in enumerate( + zip(infos, action_data.policy_info) + ): cur_skill_idx = self._cur_skills[i].item() - policy_info: Dict[str, Any] = { - "cur_skill": self._idx_to_name[cur_skill_idx] + ret_policy_info: Dict[str, Any] = { + "cur_skill": self._idx_to_name[cur_skill_idx], + **policy_info, } did_skill_fail = dones[i] and not info[CompositeSuccess.cls_uuid] for skill_name, idx in self._name_to_idx.items(): - policy_info[f"failed_skill_{skill_name}"] = ( + ret_policy_info[f"failed_skill_{skill_name}"] = ( did_skill_fail if idx == cur_skill_idx else 0.0 ) - policy_infos.append(policy_info) + ret_policy_infos.append(ret_policy_info) - return policy_infos + return ret_policy_infos @property def num_recurrent_layers(self): - return self._skills[0].num_recurrent_layers + return self._high_level_policy.num_recurrent_layers @property def should_load_agent_state(self): return False def parameters(self): - return self._skills[0].parameters() # type: ignore[attr-defined] + return self._high_level_policy.parameters() def to(self, device): + self._high_level_policy.to(device) for skill in self._skills.values(): skill.to(device) @@ -173,6 +194,7 @@ def act( deterministic=False, ): + log_info: List[Dict[str, Any]] = [{} for _ in range(self._num_envs)] self._high_level_policy.apply_mask(masks) # type: ignore[attr-defined] should_terminate: torch.BoolTensor = torch.zeros( @@ -182,6 +204,15 @@ def act( (self._num_envs,), dtype=torch.bool ) + hl_says_term = self._high_level_policy.get_termination( + observations, + rnn_hidden_states, + prev_actions, + masks, + self._cur_skills, + log_info, + ) + grouped_skills = self._broadcast_skill_ids( self._cur_skills, sel_dat={ @@ -189,6 +220,7 @@ def act( "rnn_hidden_states": rnn_hidden_states, "prev_actions": prev_actions, "masks": masks, + "hl_says_term": hl_says_term, }, # Only decide on skill termination if the episode is active. should_adds=masks, @@ -206,6 +238,11 @@ def act( ) = self._skills[skill_id].should_terminate( **dat, batch_idx=batch_ids, + log_info=log_info, + skill_name=[ + self._idx_to_name[self._cur_skills[i].item()] + for i in batch_ids + ], ) self._call_high_level = should_terminate @@ -215,17 +252,21 @@ def act( # If any skills want to terminate invoke the high-level policy to get # the next skill. hl_terminate = torch.zeros(self._num_envs, dtype=torch.bool) + hl_info: Dict[str, Any] = {} if self._call_high_level.sum() > 0: ( new_skills, new_skill_args, hl_terminate, + hl_info, ) = self._high_level_policy.get_next_skill( observations, rnn_hidden_states, prev_actions, masks, self._call_high_level, + deterministic, + log_info, ) sel_grouped_skills = self._broadcast_skill_ids( @@ -264,7 +305,7 @@ def act( }, ) for skill_id, (batch_ids, batch_dat) in grouped_skills.items(): - tmp_actions, tmp_rnn = self._skills[skill_id].act( + action_data = self._skills[skill_id].act( observations=batch_dat["observations"], rnn_hidden_states=batch_dat["rnn_hidden_states"], prev_actions=batch_dat["prev_actions"], @@ -273,8 +314,8 @@ def act( ) # LL skills are not allowed to terminate the overall episode. - actions[batch_ids] = tmp_actions - rnn_hidden_states[batch_ids] = tmp_rnn + actions[batch_ids] = action_data.actions + rnn_hidden_states[batch_ids] = action_data.rnn_hidden_states actions[:, self._stop_action_idx] = 0.0 should_terminate = bad_should_terminate | hl_terminate @@ -286,7 +327,17 @@ def act( ) actions[batch_idx, self._stop_action_idx] = 1.0 - return (None, actions, None, rnn_hidden_states) + ac_info = { + "values": None, + "action_log_probs": None, + "rnn_hidden_states": rnn_hidden_states, + "actions": actions, + } + ac_info.update(hl_info) + + return PolicyAction( + take_actions=actions, policy_info=log_info, **ac_info + ) @classmethod def from_config( diff --git a/habitat-baselines/habitat_baselines/rl/hrl/high_level_policy.py b/habitat-baselines/habitat_baselines/rl/hrl/high_level_policy.py deleted file mode 100644 index 082dc15434..0000000000 --- a/habitat-baselines/habitat_baselines/rl/hrl/high_level_policy.py +++ /dev/null @@ -1,81 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and its affiliates. -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. - -from typing import List, Tuple - -import torch -import yaml - -from habitat.config.default import get_full_habitat_config_path -from habitat.tasks.rearrange.multi_task.rearrange_pddl import parse_func -from habitat_baselines.common.logging import baselines_logger - - -class FixedHighLevelPolicy: - """ - :property _solution_actions: List of tuples were first tuple element is the - action name and the second is the action arguments. - """ - - _solution_actions: List[Tuple[str, List[str]]] - - def __init__(self, config, task_spec_file, num_envs, skill_name_to_idx): - with open(get_full_habitat_config_path(task_spec_file), "r") as f: - task_spec = yaml.safe_load(f) - - self._solution_actions = [] - if "solution" not in task_spec: - raise ValueError( - f"The ground truth task planner only works when the task solution is hard-coded in the PDDL problem file at {task_spec_file}" - ) - for i, sol_step in enumerate(task_spec["solution"]): - sol_action = parse_func(sol_step) - self._solution_actions.append(sol_action) - if config.add_arm_rest and i < (len(task_spec["solution"]) - 1): - self._solution_actions.append(parse_func("reset_arm(0)")) - - # Add a wait action at the end. - self._solution_actions.append(parse_func("wait(30)")) - - self._next_sol_idxs = torch.zeros(num_envs, dtype=torch.int32) - self._num_envs = num_envs - self._skill_name_to_idx = skill_name_to_idx - - def apply_mask(self, mask): - self._next_sol_idxs *= mask.cpu().view(-1) - - def get_next_skill( - self, observations, rnn_hidden_states, prev_actions, masks, plan_masks - ): - next_skill = torch.zeros(self._num_envs) - skill_args_data = [None for _ in range(self._num_envs)] - immediate_end = torch.zeros(self._num_envs, dtype=torch.bool) - for batch_idx, should_plan in enumerate(plan_masks): - if should_plan == 1.0: - if self._next_sol_idxs[batch_idx] >= len( - self._solution_actions - ): - baselines_logger.info( - f"Calling for immediate end with {self._next_sol_idxs[batch_idx]}" - ) - immediate_end[batch_idx] = True - use_idx = len(self._solution_actions) - 1 - else: - use_idx = self._next_sol_idxs[batch_idx].item() - - skill_name, skill_args = self._solution_actions[use_idx] - baselines_logger.info( - f"Got next element of the plan with {skill_name}, {skill_args}" - ) - if skill_name not in self._skill_name_to_idx: - raise ValueError( - f"Could not find skill named {skill_name} in {self._skill_name_to_idx}" - ) - next_skill[batch_idx] = self._skill_name_to_idx[skill_name] - - skill_args_data[batch_idx] = skill_args # type: ignore[call-overload] - - self._next_sol_idxs[batch_idx] += 1 - - return next_skill, skill_args_data, immediate_end diff --git a/habitat-baselines/habitat_baselines/rl/hrl/hl/__init__.py b/habitat-baselines/habitat_baselines/rl/hrl/hl/__init__.py index 09023c316f..31bc4beeb1 100644 --- a/habitat-baselines/habitat_baselines/rl/hrl/hl/__init__.py +++ b/habitat-baselines/habitat_baselines/rl/hrl/hl/__init__.py @@ -1,4 +1,5 @@ from habitat_baselines.rl.hrl.hl.fixed_policy import FixedHighLevelPolicy from habitat_baselines.rl.hrl.hl.high_level_policy import HighLevelPolicy +from habitat_baselines.rl.hrl.hl.neural_policy import NeuralHighLevelPolicy -__all__ = ["HighLevelPolicy", "FixedHighLevelPolicy"] +__all__ = ["HighLevelPolicy", "FixedHighLevelPolicy", "NeuralHighLevelPolicy"] diff --git a/habitat-baselines/habitat_baselines/rl/hrl/hl/fixed_policy.py b/habitat-baselines/habitat_baselines/rl/hrl/hl/fixed_policy.py index acbed7d418..8b6e6a6552 100644 --- a/habitat-baselines/habitat_baselines/rl/hrl/hl/fixed_policy.py +++ b/habitat-baselines/habitat_baselines/rl/hrl/hl/fixed_policy.py @@ -5,9 +5,7 @@ from typing import List, Tuple import torch -import yaml -from habitat.config.default import get_full_habitat_config_path from habitat.tasks.rearrange.multi_task.rearrange_pddl import parse_func from habitat_baselines.common.logging import baselines_logger from habitat_baselines.rl.hrl.hl.high_level_policy import HighLevelPolicy @@ -23,39 +21,25 @@ class FixedHighLevelPolicy(HighLevelPolicy): _solution_actions: List[Tuple[str, List[str]]] - def __init__(self, config, task_spec_file, num_envs, skill_name_to_idx): - """ - Initialize the `FixedHighLevelPolicy` object. - - Args: - config: Config object containing the configurations for the agent. - task_spec_file: Path to the task specification file. - num_envs: Number of parallel environments. - skill_name_to_idx: Dictionary mapping skill names to skill indices. - """ - with open(get_full_habitat_config_path(task_spec_file), "r") as f: - task_spec = yaml.safe_load(f) + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) - self._num_envs = num_envs - self._skill_name_to_idx = skill_name_to_idx self._solution_actions = self._parse_solution_actions( - config, task_spec, task_spec_file + self._pddl_prob.solution ) - self._next_sol_idxs = torch.zeros(num_envs, dtype=torch.int32) - - def _parse_solution_actions(self, config, task_spec, task_spec_file): - if "solution" not in task_spec: - raise ValueError( - f"The ground truth task planner only works when the task solution is hard-coded in the PDDL problem file at {task_spec_file}." - ) + self._next_sol_idxs = torch.zeros(self._num_envs, dtype=torch.int32) + def _parse_solution_actions(self, solution): solution_actions = [] - for i, sol_step in enumerate(task_spec["solution"]): - sol_action = parse_func(sol_step) + for i, sol_step in enumerate(solution): + sol_action = ( + sol_step.name, + [x.name for x in sol_step.param_values], + ) solution_actions.append(sol_action) - if config.add_arm_rest and i < (len(task_spec["solution"]) - 1): + if self._config.add_arm_rest and i < (len(solution) - 1): solution_actions.append(parse_func("reset_arm(0)")) # Add a wait action at the end. @@ -93,26 +77,15 @@ def _get_next_sol_idx(self, batch_idx, immediate_end): return self._next_sol_idxs[batch_idx].item() def get_next_skill( - self, observations, rnn_hidden_states, prev_actions, masks, plan_masks + self, + observations, + rnn_hidden_states, + prev_actions, + masks, + plan_masks, + deterministic, + log_info, ): - """ - Get the next skill to be executed. - - Args: - observations: Current observations. - rnn_hidden_states: Current hidden states of the RNN. - prev_actions: Previous actions taken. - masks: Binary masks indicating which environment(s) are active. - plan_masks: Binary masks indicating which environment(s) should - plan the next skill. - - Returns: - A tuple containing: - - next_skill: Next skill to be executed. - - skill_args_data: Arguments for the next skill. - - immediate_end: Binary masks indicating which environment(s) should - end immediately. - """ next_skill = torch.zeros(self._num_envs) skill_args_data = [None for _ in range(self._num_envs)] immediate_end = torch.zeros(self._num_envs, dtype=torch.bool) @@ -134,4 +107,4 @@ def get_next_skill( self._next_sol_idxs[batch_idx] += 1 - return next_skill, skill_args_data, immediate_end + return next_skill, skill_args_data, immediate_end, {} diff --git a/habitat-baselines/habitat_baselines/rl/hrl/hl/high_level_policy.py b/habitat-baselines/habitat_baselines/rl/hrl/hl/high_level_policy.py index 848ebd92e1..27bb1ac0f7 100644 --- a/habitat-baselines/habitat_baselines/rl/hrl/hl/high_level_policy.py +++ b/habitat-baselines/habitat_baselines/rl/hrl/hl/high_level_policy.py @@ -1,15 +1,87 @@ -from typing import Any, List, Tuple +from typing import Any, Dict, List, Tuple +import gym.spaces as spaces import torch +import torch.nn as nn + +from habitat.tasks.rearrange.multi_task.pddl_domain import PddlProblem class HighLevelPolicy: + def __init__( + self, + config, + pddl_problem: PddlProblem, + num_envs: int, + skill_name_to_idx: Dict[int, str], + observation_space: spaces.Space, + action_space: spaces.Space, + ): + self._config = config + self._pddl_prob = pddl_problem + self._num_envs = num_envs + self._skill_name_to_idx = skill_name_to_idx + self._obs_space = observation_space + self._device = None + + def to(self, device): + self._device = device + + @property + def num_recurrent_layers(self): + return 0 + + def parameters(self): + return nn.Parameter(torch.zeros((1,), device=self._device)) + def get_next_skill( - self, observations, rnn_hidden_states, prev_actions, masks, plan_masks - ) -> Tuple[torch.Tensor, List[Any], torch.BoolTensor]: + self, + observations, + rnn_hidden_states: torch.Tensor, + prev_actions: torch.Tensor, + masks: torch.Tensor, + plan_masks: torch.Tensor, + deterministic: bool, + log_info: List[Dict[str, Any]], + ) -> Tuple[torch.Tensor, List[Any], torch.BoolTensor, Dict[str, Any]]: """ - :returns: A tuple containing the next skill index, a list of arguments - for the skill, and if the high-level policy requests immediate - termination. + Get the next skill to be executed. + + Args: + observations: Current observations. + rnn_hidden_states: Current hidden states of the RNN. + prev_actions: Previous actions taken. + masks: Binary masks indicating which environment(s) are active. + plan_masks: Binary masks indicating which environment(s) should + plan the next skill. + + Returns: + A tuple containing: + - next_skill: Next skill to be executed. + - skill_args_data: Arguments for the next skill. + - immediate_end: Binary masks indicating which environment(s) should + end immediately. + - Information for PolicyAction """ raise NotImplementedError() + + def apply_mask(self, mask: torch.Tensor) -> None: + pass + + def get_termination( + self, + observations, + rnn_hidden_states, + prev_actions, + masks, + cur_skills, + log_info, + ) -> torch.BoolTensor: + """ + Can force the currently executing skill to terminate. + + Returns: A binary tensor where 1 indicates the current skill should + terminate and 0 indicates the skill can continue. + """ + + return torch.zeros(self._num_envs, dtype=torch.bool) diff --git a/habitat-baselines/habitat_baselines/rl/hrl/hl/neural_policy.py b/habitat-baselines/habitat_baselines/rl/hrl/hl/neural_policy.py new file mode 100644 index 0000000000..c855e4fcc5 --- /dev/null +++ b/habitat-baselines/habitat_baselines/rl/hrl/hl/neural_policy.py @@ -0,0 +1,175 @@ +from itertools import chain +from typing import Any, List + +import gym.spaces as spaces +import numpy as np +import torch +import torch.nn as nn + +from habitat.tasks.rearrange.multi_task.pddl_action import PddlAction +from habitat_baselines.rl.ddppo.policy import resnet +from habitat_baselines.rl.ddppo.policy.resnet_policy import ResNetEncoder +from habitat_baselines.rl.hrl.hl.high_level_policy import HighLevelPolicy +from habitat_baselines.rl.models.rnn_state_encoder import ( + build_rnn_state_encoder, +) +from habitat_baselines.rl.ppo.policy import CriticHead +from habitat_baselines.utils.common import CategoricalNet + + +class NeuralHighLevelPolicy(HighLevelPolicy): + """ + A trained high-level policy that selects low-level skills and their skill + inputs. Is limited to discrete skills and discrete skill inputs. The policy + detects the available skills and their possible arguments via the PDDL + problem. + """ + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self._all_actions = self._setup_actions() + self._n_actions = len(self._all_actions) + + use_obs_space = spaces.Dict( + { + k: self._obs_space.spaces[k] + for k in self._config.policy_input_keys + } + ) + self._im_obs_space = spaces.Dict( + {k: v for k, v in use_obs_space.items() if len(v.shape) == 3} + ) + + state_obs_space = { + k: v for k, v in use_obs_space.items() if len(v.shape) == 1 + } + self._state_obs_space = spaces.Dict(state_obs_space) + + rnn_input_size = sum( + v.shape[0] for v in self._state_obs_space.values() + ) + self._hidden_size = self._config.hidden_dim + if len(self._im_obs_space) > 0 and self._config.backbone != "NONE": + resnet_baseplanes = 32 + self._visual_encoder = ResNetEncoder( + self._im_obs_space, + baseplanes=resnet_baseplanes, + ngroups=resnet_baseplanes // 2, + make_backbone=getattr(resnet, self._config.backbone), + ) + self._visual_fc = nn.Sequential( + nn.Flatten(), + nn.Linear( + np.prod(self._visual_encoder.output_shape), + self._hidden_size, + ), + nn.ReLU(True), + ) + rnn_input_size += self._hidden_size + else: + self._visual_encoder = nn.Sequential() + self._visual_fc = nn.Sequential() + + self._state_encoder = build_rnn_state_encoder( + rnn_input_size, + self._hidden_size, + rnn_type=self._config.rnn_type, + num_layers=self._config.num_rnn_layers, + ) + self._policy = CategoricalNet(self._hidden_size, self._n_actions) + self._critic = CriticHead(self._hidden_size) + + def _setup_actions(self) -> List[PddlAction]: + all_actions = self._pddl_prob.get_possible_actions() + if not self._config.allow_other_place: + all_actions = [ + ac + for ac in all_actions + if ( + ac.name != "place" + or ac.param_values[0].name in ac.param_values[1].name + ) + ] + return all_actions + + @property + def num_recurrent_layers(self): + return self._state_encoder.num_recurrent_layers + + def parameters(self): + return chain( + self._visual_encoder.parameters(), + self._visual_fc.parameters(), + self._policy.parameters(), + self._state_encoder.parameters(), + self._critic.parameters(), + ) + + def forward(self, obs, rnn_hidden_states, masks): + hidden = [] + if len(self._im_obs_space) > 0: + im_obs = {k: obs[k] for k in self._im_obs_space.keys()} + visual_features = self._visual_encoder(im_obs) + visual_features = self._visual_fc(visual_features) + hidden.append(visual_features) + + if len(self._state_obs_space) > 0: + hidden.extend([obs[k] for k in self._state_obs_space.keys()]) + hidden = torch.cat(hidden, -1) + + return self._state_encoder(hidden, rnn_hidden_states, masks) + + def to(self, device): + self._device = device + return super().to(device) + + def get_value(self, observations, rnn_hidden_states, prev_actions, masks): + state, _ = self.forward(observations, rnn_hidden_states, masks) + return self._critic(state) + + def get_next_skill( + self, + observations, + rnn_hidden_states, + prev_actions, + masks, + plan_masks, + deterministic, + log_info, + ): + next_skill = torch.zeros(self._num_envs, dtype=torch.long) + skill_args_data: List[Any] = [None for _ in range(self._num_envs)] + immediate_end = torch.zeros(self._num_envs, dtype=torch.bool) + + state, rnn_hidden_states = self.forward( + observations, rnn_hidden_states, masks + ) + distrib = self._policy(state) + values = self._critic(state) + if deterministic: + skill_sel = distrib.mode() + else: + skill_sel = distrib.sample() + action_log_probs = distrib.log_probs(skill_sel) + + for batch_idx, should_plan in enumerate(plan_masks): + if should_plan != 1.0: + continue + use_ac = self._all_actions[skill_sel[batch_idx]] + next_skill[batch_idx] = self._skill_name_to_idx[use_ac.name] + skill_args_data[batch_idx] = [ + entity.name for entity in use_ac.param_values + ] + log_info[batch_idx]["nn_action"] = use_ac.compact_str + + return ( + next_skill, + skill_args_data, + immediate_end, + { + "action_log_probs": action_log_probs, + "values": values, + "actions": skill_sel, + "rnn_hidden_states": rnn_hidden_states, + }, + ) diff --git a/habitat-baselines/habitat_baselines/rl/hrl/skills/__init__.py b/habitat-baselines/habitat_baselines/rl/hrl/skills/__init__.py index 1b1536f999..1187ac3cd9 100644 --- a/habitat-baselines/habitat_baselines/rl/hrl/skills/__init__.py +++ b/habitat-baselines/habitat_baselines/rl/hrl/skills/__init__.py @@ -5,6 +5,7 @@ from habitat_baselines.rl.hrl.skills.art_obj import ArtObjSkillPolicy from habitat_baselines.rl.hrl.skills.nav import NavSkillPolicy from habitat_baselines.rl.hrl.skills.nn_skill import NnSkillPolicy +from habitat_baselines.rl.hrl.skills.noop import NoopSkillPolicy from habitat_baselines.rl.hrl.skills.oracle_nav import OracleNavPolicy from habitat_baselines.rl.hrl.skills.pick import PickSkillPolicy from habitat_baselines.rl.hrl.skills.place import PlaceSkillPolicy @@ -22,4 +23,5 @@ "ResetArmSkill", "SkillPolicy", "WaitSkillPolicy", + "NoopSkillPolicy", ] diff --git a/habitat-baselines/habitat_baselines/rl/hrl/skills/nn_skill.py b/habitat-baselines/habitat_baselines/rl/hrl/skills/nn_skill.py index be9ebb2258..ed153443f6 100644 --- a/habitat-baselines/habitat_baselines/rl/hrl/skills/nn_skill.py +++ b/habitat-baselines/habitat_baselines/rl/hrl/skills/nn_skill.py @@ -14,6 +14,7 @@ from habitat_baselines.common.tensor_dict import TensorDict from habitat_baselines.config.default import get_config from habitat_baselines.rl.hrl.skills.skill import SkillPolicy +from habitat_baselines.rl.ppo.policy import PolicyAction from habitat_baselines.utils.common import get_num_actions @@ -118,7 +119,7 @@ def _internal_act( masks, cur_batch_idx, deterministic=False, - ): + ) -> PolicyAction: filtered_obs = self._get_filtered_obs(observations, cur_batch_idx) filtered_prev_actions = prev_actions[ @@ -126,7 +127,7 @@ def _internal_act( ] filtered_obs = self._select_obs(filtered_obs, cur_batch_idx) - _, action, _, rnn_hidden_states = self._wrap_policy.act( + action_data = self._wrap_policy.act( filtered_obs, rnn_hidden_states, filtered_prev_actions, @@ -134,11 +135,15 @@ def _internal_act( deterministic, ) full_action = torch.zeros(prev_actions.shape, device=masks.device) - full_action[:, self._ac_start : self._ac_start + self._ac_len] = action + full_action[ + :, self._ac_start : self._ac_start + self._ac_len + ] = action_data.actions + action_data.actions = full_action + self._did_want_done[cur_batch_idx] = full_action[ cur_batch_idx, self._stop_action_idx ] - return full_action, rnn_hidden_states + return action_data @classmethod def from_config( diff --git a/habitat-baselines/habitat_baselines/rl/hrl/skills/noop.py b/habitat-baselines/habitat_baselines/rl/hrl/skills/noop.py new file mode 100644 index 0000000000..6456344cb9 --- /dev/null +++ b/habitat-baselines/habitat_baselines/rl/hrl/skills/noop.py @@ -0,0 +1,39 @@ +from typing import Any + +import gym.spaces as spaces +import torch + +from habitat_baselines.rl.hrl.skills.skill import SkillPolicy +from habitat_baselines.rl.ppo.policy import PolicyAction + + +class NoopSkillPolicy(SkillPolicy): + def __init__( + self, + config, + action_space: spaces.Space, + batch_size, + ): + super().__init__(config, action_space, batch_size, False) + + def _parse_skill_arg(self, *args, **kwargs) -> Any: + pass + + def _is_skill_done( + self, observations, rnn_hidden_states, prev_actions, masks, batch_idx + ) -> torch.BoolTensor: + return torch.zeros(masks.size(0), dtype=torch.bool) + + def _internal_act( + self, + observations, + rnn_hidden_states, + prev_actions, + masks, + cur_batch_idx, + full_action, + deterministic=False, + ): + return PolicyAction( + actions=full_action, rnn_hidden_states=rnn_hidden_states + ) diff --git a/habitat-baselines/habitat_baselines/rl/hrl/skills/oracle_nav.py b/habitat-baselines/habitat_baselines/rl/hrl/skills/oracle_nav.py index 7ba03a0bd7..e58a68daad 100644 --- a/habitat-baselines/habitat_baselines/rl/hrl/skills/oracle_nav.py +++ b/habitat-baselines/habitat_baselines/rl/hrl/skills/oracle_nav.py @@ -17,6 +17,7 @@ from habitat_baselines.common.logging import baselines_logger from habitat_baselines.rl.hrl.skills.nn_skill import NnSkillPolicy from habitat_baselines.rl.hrl.utils import find_action_range +from habitat_baselines.rl.ppo.policy import PolicyAction class OracleNavPolicy(NnSkillPolicy): @@ -87,7 +88,7 @@ def from_config( cls, config, observation_space, action_space, batch_size, full_config ): filtered_action_space = ActionSpace( - {config.NAV_ACTION_NAME: action_space[config.NAV_ACTION_NAME]} + {config.nav_action_name: action_space[config.nav_action_name]} ) baselines_logger.debug( f"Loaded action space {filtered_action_space} for skill {config.skill_name}" @@ -184,4 +185,6 @@ def _internal_act( full_action[:, self._oracle_nav_ac_idx] = action_idxs - return full_action, rnn_hidden_states + return PolicyAction( + actions=full_action, rnn_hidden_states=rnn_hidden_states + ) diff --git a/habitat-baselines/habitat_baselines/rl/hrl/skills/pick.py b/habitat-baselines/habitat_baselines/rl/hrl/skills/pick.py index 3f88f1b1c4..4efb63d415 100644 --- a/habitat-baselines/habitat_baselines/rl/hrl/skills/pick.py +++ b/habitat-baselines/habitat_baselines/rl/hrl/skills/pick.py @@ -9,6 +9,7 @@ RelativeRestingPositionSensor, ) from habitat_baselines.rl.hrl.skills.nn_skill import NnSkillPolicy +from habitat_baselines.rl.ppo.policy import PolicyAction class PickSkillPolicy(NnSkillPolicy): @@ -33,12 +34,12 @@ def _parse_skill_arg(self, skill_arg): self._internal_log(f"Parsing skill argument {skill_arg}") return int(skill_arg[0].split("|")[1]) - def _mask_pick(self, action, observations): + def _mask_pick(self, action: PolicyAction, observations) -> PolicyAction: # Mask out the release if the object is already held. is_holding = observations[IsHoldingSensor.cls_uuid].view(-1) for i in torch.nonzero(is_holding): # Do not release the object once it is held - action[i, self._grip_ac_idx] = 1.0 + action.actions[i, self._grip_ac_idx] = 1.0 return action def _internal_act( @@ -50,7 +51,7 @@ def _internal_act( cur_batch_idx, deterministic=False, ): - action, hxs = super()._internal_act( + action = super()._internal_act( observations, rnn_hidden_states, prev_actions, @@ -59,4 +60,4 @@ def _internal_act( deterministic, ) action = self._mask_pick(action, observations) - return action, hxs + return action diff --git a/habitat-baselines/habitat_baselines/rl/hrl/skills/reset.py b/habitat-baselines/habitat_baselines/rl/hrl/skills/reset.py index 9e33f713c0..7d17aead32 100644 --- a/habitat-baselines/habitat_baselines/rl/hrl/skills/reset.py +++ b/habitat-baselines/habitat_baselines/rl/hrl/skills/reset.py @@ -9,6 +9,7 @@ import torch from habitat_baselines.rl.hrl.skills.skill import SkillPolicy +from habitat_baselines.rl.ppo.policy import PolicyAction from habitat_baselines.utils.common import get_num_actions @@ -93,4 +94,6 @@ def _internal_act( delta ).to(device=action.device, dtype=action.dtype) - return action, rnn_hidden_states + return PolicyAction( + actions=action, rnn_hidden_states=rnn_hidden_states + ) diff --git a/habitat-baselines/habitat_baselines/rl/hrl/skills/skill.py b/habitat-baselines/habitat_baselines/rl/hrl/skills/skill.py index 7b857288c5..b0fd2ddb42 100644 --- a/habitat-baselines/habitat_baselines/rl/hrl/skills/skill.py +++ b/habitat-baselines/habitat_baselines/rl/hrl/skills/skill.py @@ -10,7 +10,7 @@ from habitat.tasks.rearrange.rearrange_sensors import IsHoldingSensor from habitat_baselines.common.logging import baselines_logger from habitat_baselines.rl.hrl.utils import find_action_range -from habitat_baselines.rl.ppo.policy import Policy +from habitat_baselines.rl.ppo.policy import Policy, PolicyAction from habitat_baselines.utils.common import get_num_actions @@ -38,6 +38,14 @@ def __init__( None for _ in range(self._batch_size) ] + if "pddl_apply_action" in action_space: + self._pddl_ac_start, _ = find_action_range( + action_space, "pddl_apply_action" + ) + else: + self._pddl_ac_start = None + self._delay_term = [None for _ in range(self._batch_size)] + self._grip_ac_idx = 0 found_grip = False for k, space in action_space.items(): @@ -68,8 +76,8 @@ def _get_multi_sensor_index(self, batch_idx: List[int]) -> List[int]: return [self._cur_skill_args[i] for i in batch_idx] def _keep_holding_state( - self, full_action: torch.Tensor, observations - ) -> torch.Tensor: + self, action_data: PolicyAction, observations + ) -> PolicyAction: """ Makes the action so it does not result in dropping or picking up an object. Used in navigation and other skills which are not supposed to @@ -79,8 +87,51 @@ def _keep_holding_state( is_holding = observations[IsHoldingSensor.cls_uuid].view(-1) # If it is not holding (0) want to keep releasing -> output -1. # If it is holding (1) want to keep grasping -> output +1. - full_action[:, self._grip_ac_idx] = is_holding + (is_holding - 1.0) - return full_action + action_data.write_action( + self._grip_ac_idx, is_holding + (is_holding - 1.0) + ) + return action_data + + def _apply_postcond( + self, + actions, + log_info, + skill_name, + env_i, + idx, + ): + skill_args = self._raw_skill_args[env_i] + action = self._pddl_problem.actions[skill_name] + + entities = [self._pddl_problem.get_entity(x) for x in skill_args] + + ac_idx = self._pddl_ac_start + found = False + for other_action in self._action_ordering: + if other_action.name != action.name: + ac_idx += other_action.n_args + else: + found = True + break + if not found: + raise ValueError(f"Could not find action {action}") + + entity_idxs = [ + self._entities_list.index(entity) + 1 for entity in entities + ] + if len(entity_idxs) != action.n_args: + raise ValueError( + f"Inconsistent # of args {action.n_args} versus {entity_idxs} for {action} with {skill_args} and {entities}" + ) + + actions[idx, ac_idx : ac_idx + action.n_args] = torch.tensor( + entity_idxs, dtype=actions.dtype, device=actions.device + ) + apply_action = action.clone() + apply_action.set_param_values(entities) + + log_info[env_i]["pddl_action"] = apply_action.compact_str + return actions def should_terminate( self, @@ -88,7 +139,10 @@ def should_terminate( rnn_hidden_states, prev_actions, masks, - batch_idx, + hl_says_term, + batch_idx: List[int], + skill_name: List[str], + log_info, ) -> Tuple[torch.BoolTensor, torch.BoolTensor]: """ :returns: A (batch_size,) size tensor where 1 indicates the skill wants to end and 0 if not. @@ -114,6 +168,22 @@ def should_terminate( else: is_skill_done = is_skill_done | over_max_len + # for i, env_i in enumerate(batch_idx): + # if self._delay_term[env_i]: + # self._delay_term[env_i] = False + # is_skill_done[i] = 1.0 + # elif ( + # self._config.apply_postconds + # and is_skill_done[i] == 1.0 + # and hl_says_term[i] == 0.0 + # ): + # actions = self._apply_postcond( + # actions, log_info, skill_name[i], env_i, i + # ) + # self._delay_term[env_i] = True + # is_skill_done[i] = 0.0 + is_skill_done |= hl_says_term + if bad_terminate.sum() > 0: self._internal_log( f"Bad terminating due to timeout {self._cur_skill_step}, {bad_terminate}", @@ -147,6 +217,11 @@ def on_enter( prev_actions[batch_idxs] * 0.0, ) + def set_pddl_problem(self, pddl_prob): + self._pddl_problem = pddl_prob + self._entities_list = self._pddl_problem.get_ordered_entities_list() + self._action_ordering = self._pddl_problem.get_ordered_actions() + @classmethod def from_config( cls, config, observation_space, action_space, batch_size, full_config @@ -166,7 +241,7 @@ def act( :returns: Predicted action and next rnn hidden state. """ self._cur_skill_step[cur_batch_idx] += 1 - action, hxs = self._internal_act( + action_data = self._internal_act( observations, rnn_hidden_states, prev_actions, @@ -176,8 +251,8 @@ def act( ) if self._should_keep_hold_state: - action = self._keep_holding_state(action, observations) - return action, hxs + action_data = self._keep_holding_state(action_data, observations) + return action_data def to(self, device): self._cur_skill_step = self._cur_skill_step.to(device) @@ -196,9 +271,7 @@ def _select_obs(self, obs, cur_batch_idx): ) entity_positions = obs[k].view( - len(cur_batch_idx), - -1, - self._config.get("obs_skill_input_dim", 3), + len(cur_batch_idx), -1, self._config.obs_skill_input_dim ) obs[k] = entity_positions[ torch.arange(len(cur_batch_idx)), cur_multi_sensor_index @@ -229,5 +302,5 @@ def _internal_act( masks, cur_batch_idx, deterministic=False, - ) -> Tuple[torch.Tensor, torch.Tensor]: + ) -> PolicyAction: raise NotImplementedError() diff --git a/habitat-baselines/habitat_baselines/rl/hrl/skills/wait.py b/habitat-baselines/habitat_baselines/rl/hrl/skills/wait.py index 6fb9a1b552..e8a7ca03e1 100644 --- a/habitat-baselines/habitat_baselines/rl/hrl/skills/wait.py +++ b/habitat-baselines/habitat_baselines/rl/hrl/skills/wait.py @@ -8,6 +8,7 @@ import torch from habitat_baselines.rl.hrl.skills.skill import SkillPolicy +from habitat_baselines.rl.ppo.policy import PolicyAction class WaitSkillPolicy(SkillPolicy): @@ -40,4 +41,6 @@ def _internal_act( deterministic=False, ): action = torch.zeros(prev_actions.shape, device=prev_actions.device) - return action, rnn_hidden_states + return PolicyAction( + actions=action, rnn_hidden_states=rnn_hidden_states + ) diff --git a/habitat-baselines/habitat_baselines/rl/hrl/utils.py b/habitat-baselines/habitat_baselines/rl/hrl/utils.py index 7cebe7db67..0c22f3b183 100644 --- a/habitat-baselines/habitat_baselines/rl/hrl/utils.py +++ b/habitat-baselines/habitat_baselines/rl/hrl/utils.py @@ -24,5 +24,5 @@ def find_action_range( break start_idx += get_num_actions(action_space[k]) if not found: - raise ValueError(f"Could not find stop action in {action_space}") + return None return start_idx, end_idx diff --git a/habitat-baselines/habitat_baselines/rl/ppo/policy.py b/habitat-baselines/habitat_baselines/rl/ppo/policy.py index 71a2dc170b..bfb8ac6f4a 100644 --- a/habitat-baselines/habitat_baselines/rl/ppo/policy.py +++ b/habitat-baselines/habitat_baselines/rl/ppo/policy.py @@ -4,7 +4,8 @@ # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. import abc -from typing import TYPE_CHECKING, Dict, Iterable, List, Optional, Union +from dataclasses import dataclass +from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Union import torch from gym import spaces @@ -30,6 +31,26 @@ from omegaconf import DictConfig +@dataclass +class PolicyAction: + rnn_hidden_states: torch.Tensor + actions: torch.Tensor + values: Optional[torch.Tensor] = None + action_log_probs: Optional[torch.Tensor] = None + take_actions: Optional[torch.Tensor] = None + policy_info: Optional[List[Dict[str, Any]]] = None + + def write_action(self, write_idx, write_action): + self.actions[:, write_idx] = write_action + + @property + def env_actions(self) -> torch.Tensor: + if self.take_actions is None: + return self.actions + else: + return self.take_actions + + class Policy(abc.ABC): action_distribution: nn.Module @@ -47,7 +68,9 @@ def num_recurrent_layers(self) -> int: def forward(self, *x): raise NotImplementedError - def get_policy_info(self, infos, dones) -> List[Dict[str, float]]: + def extract_policy_info( + self, action_data: PolicyAction, infos, dones + ) -> List[Dict[str, float]]: """ Gets the log information from the policy at the current time step. Currently only called during evaluation. The return list should be @@ -64,7 +87,7 @@ def act( prev_actions, masks, deterministic=False, - ): + ) -> PolicyAction: raise NotImplementedError @classmethod @@ -155,8 +178,12 @@ def act( action = distribution.sample() action_log_probs = distribution.log_probs(action) - - return value, action, action_log_probs, rnn_hidden_states + return PolicyAction( + values=value, + actions=value, + action_log_probs=action_log_probs, + rnn_hidden_states=rnn_hidden_states, + ) def get_value(self, observations, rnn_hidden_states, prev_actions, masks): features, _, _ = self.net( diff --git a/habitat-baselines/habitat_baselines/rl/ppo/ppo_trainer.py b/habitat-baselines/habitat_baselines/rl/ppo/ppo_trainer.py index 8f999efea6..42bb765815 100644 --- a/habitat-baselines/habitat_baselines/rl/ppo/ppo_trainer.py +++ b/habitat-baselines/habitat_baselines/rl/ppo/ppo_trainer.py @@ -469,12 +469,7 @@ def _compute_actions_and_step_envs(self, buffer_index: int = 0): ] profiling_wrapper.range_push("compute actions") - ( - values, - actions, - actions_log_probs, - recurrent_hidden_states, - ) = self.actor_critic.act( + action_data = self.actor_critic.act( step_batch["observations"], step_batch["recurrent_hidden_states"], step_batch["prev_actions"], @@ -488,7 +483,8 @@ def _compute_actions_and_step_envs(self, buffer_index: int = 0): t_step_env = time.time() for index_env, act in zip( - range(env_slice.start, env_slice.stop), actions.cpu().unbind(0) + range(env_slice.start, env_slice.stop), + action_data.env_actions.cpu().unbind(0), ): if is_continuous_action_space(self.policy_action_space): # Clipping actions to the specified limits @@ -504,10 +500,10 @@ def _compute_actions_and_step_envs(self, buffer_index: int = 0): self.env_time += time.time() - t_step_env self.rollouts.insert( - next_recurrent_hidden_states=recurrent_hidden_states, - actions=actions, - action_log_probs=actions_log_probs, - value_preds=values, + next_recurrent_hidden_states=action_data.rnn_hidden_states, + actions=action_data.actions, + action_log_probs=action_data.action_log_probs, + value_preds=action_data.values, buffer_index=buffer_index, ) @@ -1052,20 +1048,16 @@ def _eval_checkpoint( current_episodes_info = self.envs.current_episodes() with inference_mode(): - ( - _, - actions, - _, - test_recurrent_hidden_states, - ) = self.actor_critic.act( + action_data = self.actor_critic.act( batch, test_recurrent_hidden_states, prev_actions, not_done_masks, deterministic=False, ) + test_recurrent_hidden_states = action_data.rnn_hidden_states - prev_actions.copy_(actions) # type: ignore + prev_actions.copy_(action_data.actions) # type: ignore # NB: Move actions to CPU. If CUDA tensors are # sent in to env.step(), that will create CUDA contexts # in the subprocesses. @@ -1077,19 +1069,21 @@ def _eval_checkpoint( self.policy_action_space.low, self.policy_action_space.high, ) - for a in actions.cpu() + for a in action_data.env_actions.cpu() ] else: - step_data = [a.item() for a in actions.cpu()] + step_data = [a.item() for a in action_data.env_actions.cpu()] outputs = self.envs.step(step_data) observations, rewards_l, dones, infos = [ list(x) for x in zip(*outputs) ] - policy_info = self.actor_critic.get_policy_info(infos, dones) - for i in range(len(policy_info)): - infos[i].update(policy_info[i]) + policy_infos = self.actor_critic.extract_policy_info( + action_data, infos, dones + ) + for i in range(len(policy_infos)): + infos[i].update(policy_infos[i]) batch = batch_obs( # type: ignore observations, device=self.device, diff --git a/habitat-baselines/habitat_baselines/rl/ver/inference_worker.py b/habitat-baselines/habitat_baselines/rl/ver/inference_worker.py index bd674a00a9..56ef3f7025 100644 --- a/habitat-baselines/habitat_baselines/rl/ver/inference_worker.py +++ b/habitat-baselines/habitat_baselines/rl/ver/inference_worker.py @@ -323,6 +323,7 @@ def step(self) -> Tuple[bool, List[Tuple[int, int]]]: actions, actions_log_probs, next_recurrent_hidden_states, + policy_info, ) = self.actor_critic.act( obs, recurrent_hidden_states, diff --git a/habitat-lab/habitat/tasks/rearrange/multi_task/pddl_action.py b/habitat-lab/habitat/tasks/rearrange/multi_task/pddl_action.py index c1ba9d413d..00598f6d58 100644 --- a/habitat-lab/habitat/tasks/rearrange/multi_task/pddl_action.py +++ b/habitat-lab/habitat/tasks/rearrange/multi_task/pddl_action.py @@ -162,7 +162,7 @@ def apply(self, sim_info: PddlSimInfo) -> None: p.set_state(sim_info) @property - def param_values(self): + def param_values(self) -> Optional[List[PddlEntity]]: if self._param_values is None: raise ValueError( "Accessing action param values before they are set." diff --git a/test/test_baseline_training.py b/test/test_baseline_training.py index b129bbff75..eacd10759d 100644 --- a/test/test_baseline_training.py +++ b/test/test_baseline_training.py @@ -13,6 +13,7 @@ from habitat.config import read_write from habitat.config.default import get_agent_config +from habitat_baselines.run import execute_exp try: import torch @@ -148,6 +149,63 @@ def test_trainers(config_path, num_updates, overrides, trainer_name): # Training should complete without raising an error. +@pytest.mark.parametrize( + "config_path,mode,trainer_name", + [ + ( + "habitat-baselines/habitat_baselines/config/rearrange/rl_hl_srl_onav.yaml", + "eval", + "ppo", + ), + ( + "habitat-baselines/habitat_baselines/config/rearrange/tp_srl_oracle_nav.yaml", + "eval", + "ppo", + ), + ( + "habitat-baselines/habitat_baselines/config/rearrange/tp_srl.yaml", + "eval", + "ppo", + ), + ], +) +def test_hrl(config_path, mode, trainer_name): + # Remove the checkpoints from previous tests + for f in glob.glob("data/test_checkpoints/test_training/*"): + os.remove(f) + + # Setup the training + config = get_config( + config_path, + [ + "habitat_baselines.num_updates=1", + "habitat_baselines.eval.split=minival", + "habitat_baselines.total_num_steps=-1.0", + "habitat_baselines.test_episode_count=1", + "habitat_baselines.checkpoint_folder=data/test_checkpoints/test_training", + f"habitat_baselines.trainer_name={trainer_name}", + ], + ) + with read_write(config): + config.habitat_baselines.eval.update({"video_option": []}) + for ( + skill_name, + skill, + ) in ( + config.habitat_baselines.rl.policy.hierarchical_policy.defined_skills.items() + ): + if skill.load_ckpt_file == "": + continue + skill.update( + { + "force_config_file": f"benchmark/rearrange={skill_name}", + "max_skill_steps": 1, + "load_ckpt_file": "", + } + ) + execute_exp(config, mode) + + @pytest.mark.skipif( int(os.environ.get("TEST_BASELINE_SMALL", 0)) == 0, reason="Full training tests did not run. Need `export TEST_BASELINE_SMALL=1", diff --git a/test/test_rearrange_task.py b/test/test_rearrange_task.py index ed2fed6306..99d012871c 100644 --- a/test/test_rearrange_task.py +++ b/test/test_rearrange_task.py @@ -4,16 +4,12 @@ # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. -import gc -import itertools import json -import os import os.path as osp import time from glob import glob import pytest -import torch import yaml from omegaconf import DictConfig, OmegaConf @@ -29,8 +25,6 @@ from habitat.datasets.rearrange.rearrange_dataset import RearrangeDatasetV0 from habitat.tasks.rearrange.multi_task.composite_task import CompositeTask from habitat_baselines.config.default import get_config as baselines_get_config -from habitat_baselines.rl.ddppo.ddp_utils import find_free_port -from habitat_baselines.run import run_exp CFG_TEST = "benchmark/rearrange/pick.yaml" GEN_TEST_CFG = ( @@ -109,33 +103,6 @@ def test_rearrange_baseline_envs(test_cfg_path): ) -@pytest.mark.parametrize( - "test_cfg_path", - list( - glob("habitat-lab/habitat/config/benchmark/rearrange/*"), - ), -) -def test_rearrange_tasks(test_cfg_path): - """ - Test the underlying Habitat Tasks - """ - if not osp.isfile(test_cfg_path): - return - - config = get_config(test_cfg_path) - if ( - config.habitat.dataset.data_path - == "data/ep_datasets/bench_scene.json.gz" - ): - pytest.skip( - "This config is only useful for examples and does not have the generated dataset" - ) - - with habitat.Env(config=config) as env: - for _ in range(5): - env.reset() - - @pytest.mark.parametrize( "test_cfg_path", list( @@ -209,32 +176,3 @@ def test_rearrange_episode_generator( logger.info( f"successful_ep = {len(dataset.episodes)} generated in {time.time()-start_time} seconds." ) - - -@pytest.mark.parametrize( - "test_cfg_path,mode", - list( - itertools.product( - glob("habitat-baselines/habitat_baselines/config/tp_srl_test/*"), - ["eval"], - ) - ), -) -def test_tp_srl(test_cfg_path, mode): - # For testing with world_size=1 - os.environ["MAIN_PORT"] = str(find_free_port()) - - run_exp( - test_cfg_path.replace( - "habitat-baselines/habitat_baselines/config/", "" - ), - mode, - ["habitat_baselines.eval.split=train"], - ) - - # Needed to destroy the trainer - gc.collect() - - # Deinit processes group - if torch.distributed.is_initialized(): - torch.distributed.destroy_process_group() From f6d1f110ad192cc500defbfd91a15f7d40cf6f77 Mon Sep 17 00:00:00 2001 From: ASzot Date: Mon, 26 Dec 2022 11:58:06 +0100 Subject: [PATCH 02/48] Working on HRL trainer --- .../common/baseline_registry.py | 16 ++ .../common/rollout_storage.py | 3 + .../config/default_structured_configs.py | 3 + .../config/rearrange/rl_hl_srl_onav.yaml | 45 ++-- .../config/rearrange/tp_srl_oracle_nav.yaml | 27 +- .../habitat_baselines/rl/ddppo/algo/ddppo.py | 2 + .../habitat_baselines/rl/hrl/__init__.py | 8 + .../rl/hrl/hierarchical_policy.py | 15 +- .../rl/hrl/hl/high_level_policy.py | 5 +- .../habitat_baselines/rl/hrl/hrl_ppo.py | 128 +++++++++ .../rl/hrl/hrl_rollout_storage.py | 159 +++++++++++ .../rl/hrl/skills/oracle_nav.py | 2 +- .../habitat_baselines/rl/hrl/skills/skill.py | 51 ++-- .../habitat_baselines/rl/ppo/policy.py | 1 + .../habitat_baselines/rl/ppo/ppo.py | 255 +++++++++--------- .../habitat_baselines/rl/ppo/ppo_trainer.py | 27 +- .../config/default_structured_configs.py | 22 +- .../habitat/task/rearrange/rearrange.yaml | 1 + .../task/rearrange/rearrange_easy.yaml | 1 + .../tasks/rearrange/actions/pddl_actions.py | 32 ++- .../tasks/rearrange/sub_tasks/pick_task.py | 2 +- test/test_baseline_training.py | 28 +- 22 files changed, 613 insertions(+), 220 deletions(-) create mode 100644 habitat-baselines/habitat_baselines/rl/hrl/hrl_ppo.py create mode 100644 habitat-baselines/habitat_baselines/rl/hrl/hrl_rollout_storage.py diff --git a/habitat-baselines/habitat_baselines/common/baseline_registry.py b/habitat-baselines/habitat_baselines/common/baseline_registry.py index 5038cf91a5..7ff89a28b6 100644 --- a/habitat-baselines/habitat_baselines/common/baseline_registry.py +++ b/habitat-baselines/habitat_baselines/common/baseline_registry.py @@ -136,5 +136,21 @@ def register_auxiliary_loss( def get_auxiliary_loss(cls, name: str): return cls._get_impl("aux_loss", name) + @classmethod + def register_storage(cls, to_register=None, *, name: Optional[str] = None): + return cls._register_impl("storage", to_register, name) + + @classmethod + def get_storage(cls, name: str): + return cls._get_impl("storage", name) + + @classmethod + def register_updater(cls, to_register=None, *, name: Optional[str] = None): + return cls._register_impl("updater", to_register, name) + + @classmethod + def get_updater(cls, name: str): + return cls._get_impl("updater", name) + baseline_registry = BaselineRegistry() diff --git a/habitat-baselines/habitat_baselines/common/rollout_storage.py b/habitat-baselines/habitat_baselines/common/rollout_storage.py index b9b2984820..8bc0b3b9f2 100644 --- a/habitat-baselines/habitat_baselines/common/rollout_storage.py +++ b/habitat-baselines/habitat_baselines/common/rollout_storage.py @@ -10,6 +10,7 @@ import numpy as np import torch +from habitat_baselines.common.baseline_registry import baseline_registry from habitat_baselines.common.tensor_dict import DictTree, TensorDict from habitat_baselines.rl.models.rnn_state_encoder import ( build_pack_info_from_dones, @@ -17,6 +18,7 @@ ) +@baseline_registry.register_storage class RolloutStorage: r"""Class for storing rollout information for RL trainers.""" @@ -116,6 +118,7 @@ def insert( rewards=None, next_masks=None, buffer_index: int = 0, + **kwargs, ): if not self.is_double_buffered: assert buffer_index == 0 diff --git a/habitat-baselines/habitat_baselines/config/default_structured_configs.py b/habitat-baselines/habitat_baselines/config/default_structured_configs.py index 56a51d6c82..699df3053e 100644 --- a/habitat-baselines/habitat_baselines/config/default_structured_configs.py +++ b/habitat-baselines/habitat_baselines/config/default_structured_configs.py @@ -404,6 +404,8 @@ class HabitatBaselinesConfig(HabitatBaselinesBaseConfig): # ) # cmd_trailing_opts: List[str] = field(default_factory=list) trainer_name: str = "ppo" + updater_name: str = "PPO" + distrib_updater_name: str = "DDPPO" torch_gpu_id: int = 0 video_render_views: List[str] = field(default_factory=list) tensorboard_dir: str = "tb" @@ -415,6 +417,7 @@ class HabitatBaselinesConfig(HabitatBaselinesBaseConfig): eval_ckpt_path_dir: str = "data/checkpoints" num_environments: int = 16 num_processes: int = -1 # deprecated + rollout_storage: str = "RolloutStorage" checkpoint_folder: str = "data/checkpoints" num_updates: int = 10000 num_checkpoints: int = 10 diff --git a/habitat-baselines/habitat_baselines/config/rearrange/rl_hl_srl_onav.yaml b/habitat-baselines/habitat_baselines/config/rearrange/rl_hl_srl_onav.yaml index 3f542bbf09..1675eadb6c 100644 --- a/habitat-baselines/habitat_baselines/config/rearrange/rl_hl_srl_onav.yaml +++ b/habitat-baselines/habitat_baselines/config/rearrange/rl_hl_srl_onav.yaml @@ -8,24 +8,43 @@ defaults: - /habitat_baselines/rl/policy/obs_transforms: - add_virtual_keys_base - /habitat/task/actions: + - pddl_apply_action - oracle_nav_action - arm_action - base_velocity - rearrange_stop - _self_ +habitat: + gym: + auto_name: RearrangeEasy + obs_keys: + - robot_head_depth + - relative_resting_position + - obj_start_sensor + - obj_goal_sensor + - obj_start_gps_compass + - obj_goal_gps_compass + - joint + - is_holding + - localization_sensor + + habitat_baselines: verbose: False trainer_name: "ddppo" torch_gpu_id: 0 tensorboard_dir: "tb" + rollout_storage: "HrlRolloutStorage" + updater_name: "HrlPPO" + distrib_updater_name: "HrlDDPPO" video_dir: "video_dir" video_fps: 30 video_render_views: - "third_rgb_sensor" test_episode_count: -1 eval_ckpt_path_dir: "" - num_environments: 1 + num_environments: 3 writer_type: 'tb' checkpoint_folder: "data/new_checkpoints" num_updates: -1 @@ -62,24 +81,19 @@ habitat_baselines: - "robot_head_depth" defined_skills: open_cab: - skill_name: "ArtObjSkillPolicy" - name: "PointNavResNetPolicy" - action_distribution_type: "gaussian" - load_ckpt_file: "data/models/open_cab.pth" - max_skill_steps: 200 + skill_name: "NoopSkillPolicy" + max_skill_steps: 1 apply_postconds: True open_fridge: - skill_name: "ArtObjSkillPolicy" - load_ckpt_file: "data/models/open_fridge.pth" - max_skill_steps: 200 + skill_name: "NoopSkillPolicy" + max_skill_steps: 1 apply_postconds: True close_cab: skill_name: "NoopSkillPolicy" obs_skill_inputs: ["obj_start_sensor"] - load_ckpt_file: "data/models/close_cab.pth" - max_skill_steps: 200 + max_skill_steps: 1 close_fridge: skill_name: "NoopSkillPolicy" @@ -116,11 +130,10 @@ habitat_baselines: force_end_on_timeout: False use_skills: - # Uncomment if you are also using these skills - # open_cab: "open_cab" - # open_fridge: "open_fridge" - # close_cab: "open_cab" - # close_fridge: "open_fridge" + open_cab: "open_cab" + open_fridge: "open_fridge" + close_cab: "close_cab" + close_fridge: "close_fridge" pick: "pick" place: "place" nav: "nav_to_obj" diff --git a/habitat-baselines/habitat_baselines/config/rearrange/tp_srl_oracle_nav.yaml b/habitat-baselines/habitat_baselines/config/rearrange/tp_srl_oracle_nav.yaml index 5e8b59a944..f456242704 100644 --- a/habitat-baselines/habitat_baselines/config/rearrange/tp_srl_oracle_nav.yaml +++ b/habitat-baselines/habitat_baselines/config/rearrange/tp_srl_oracle_nav.yaml @@ -12,20 +12,19 @@ defaults: - rearrange_stop - _self_ -AGENT_0_ORACLE_NAV_ACTION: - TYPE: "OracleNavAction" - TURN_VELOCITY: 1.0 - FORWARD_VELOCITY: 1.0 - TURN_THRESH: 0.1 - DIST_THRESH: 0.2 - AGENT: 0 - LIN_SPEED: 40.0 - ANG_SPEED: 20.0 - MIN_ABS_LIN_SPEED: 1.0 - MIN_ABS_ANG_SPEED: 1.0 - ALLOW_DYN_SLIDE: False - END_ON_STOP: False - ALLOW_BACK: True +habitat: + gym: + auto_name: RearrangeEasy + obs_keys: + - robot_head_depth + - relative_resting_position + - obj_start_sensor + - obj_goal_sensor + - obj_start_gps_compass + - obj_goal_gps_compass + - joint + - is_holding + - localization_sensor habitat_baselines: verbose: False diff --git a/habitat-baselines/habitat_baselines/rl/ddppo/algo/ddppo.py b/habitat-baselines/habitat_baselines/rl/ddppo/algo/ddppo.py index c7bacb6cd1..1e95c02b4b 100644 --- a/habitat-baselines/habitat_baselines/rl/ddppo/algo/ddppo.py +++ b/habitat-baselines/habitat_baselines/rl/ddppo/algo/ddppo.py @@ -10,6 +10,7 @@ import torch from torch import distributed as distrib +from habitat_baselines.common.baseline_registry import baseline_registry from habitat_baselines.rl.ppo import PPO @@ -130,5 +131,6 @@ def _evaluate_actions(self, *args, **kwargs): ) +@baseline_registry.register_updater class DDPPO(DecentralizedDistributedMixin, PPO): pass diff --git a/habitat-baselines/habitat_baselines/rl/hrl/__init__.py b/habitat-baselines/habitat_baselines/rl/hrl/__init__.py index 0f0db8cad5..75f7fadf5c 100644 --- a/habitat-baselines/habitat_baselines/rl/hrl/__init__.py +++ b/habitat-baselines/habitat_baselines/rl/hrl/__init__.py @@ -3,3 +3,11 @@ # Copyright (c) Meta Platforms, Inc. and its affiliates. # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. + +from habitat_baselines.rl.hrl.hrl_ppo import HrlPPO +from habitat_baselines.rl.hrl.hrl_rollout_storage import HrlRolloutStorage + +__all__ = [ + "HrlPPO", + "HrlRolloutStorage", +] diff --git a/habitat-baselines/habitat_baselines/rl/hrl/hierarchical_policy.py b/habitat-baselines/habitat_baselines/rl/hrl/hierarchical_policy.py index f9217051e4..b8f63ad9eb 100644 --- a/habitat-baselines/habitat_baselines/rl/hrl/hierarchical_policy.py +++ b/habitat-baselines/habitat_baselines/rl/hrl/hierarchical_policy.py @@ -8,6 +8,7 @@ import gym.spaces as spaces import torch +import torch.nn as nn from habitat.core.spaces import ActionSpace from habitat.tasks.rearrange.multi_task.composite_sensors import ( @@ -38,7 +39,7 @@ @baseline_registry.register_policy -class HierarchicalPolicy(Policy): +class HierarchicalPolicy(nn.Module, Policy): def __init__( self, config, @@ -212,6 +213,11 @@ def act( self._cur_skills, log_info, ) + # Compute the actions from the current skills + actions = torch.zeros( + (self._num_envs, get_num_actions(self._action_space)), + device=masks.device, + ) grouped_skills = self._broadcast_skill_ids( self._cur_skills, @@ -220,6 +226,7 @@ def act( "rnn_hidden_states": rnn_hidden_states, "prev_actions": prev_actions, "masks": masks, + "actions": actions, "hl_says_term": hl_says_term, }, # Only decide on skill termination if the episode is active. @@ -289,12 +296,6 @@ def act( (~self._call_high_level) * self._cur_skills ) + (self._call_high_level * new_skills) - # Compute the actions from the current skills - actions = torch.zeros( - (self._num_envs, get_num_actions(self._action_space)), - device=masks.device, - ) - grouped_skills = self._broadcast_skill_ids( self._cur_skills, sel_dat={ diff --git a/habitat-baselines/habitat_baselines/rl/hrl/hl/high_level_policy.py b/habitat-baselines/habitat_baselines/rl/hrl/hl/high_level_policy.py index 27bb1ac0f7..5bbc2726ec 100644 --- a/habitat-baselines/habitat_baselines/rl/hrl/hl/high_level_policy.py +++ b/habitat-baselines/habitat_baselines/rl/hrl/hl/high_level_policy.py @@ -7,7 +7,7 @@ from habitat.tasks.rearrange.multi_task.pddl_domain import PddlProblem -class HighLevelPolicy: +class HighLevelPolicy(nn.Module): def __init__( self, config, @@ -17,6 +17,7 @@ def __init__( observation_space: spaces.Space, action_space: spaces.Space, ): + super().__init__() self._config = config self._pddl_prob = pddl_problem self._num_envs = num_envs @@ -32,7 +33,7 @@ def num_recurrent_layers(self): return 0 def parameters(self): - return nn.Parameter(torch.zeros((1,), device=self._device)) + return iter([nn.Parameter(torch.zeros((1,), device=self._device))]) def get_next_skill( self, diff --git a/habitat-baselines/habitat_baselines/rl/hrl/hrl_ppo.py b/habitat-baselines/habitat_baselines/rl/hrl/hrl_ppo.py new file mode 100644 index 0000000000..706f2b9a57 --- /dev/null +++ b/habitat-baselines/habitat_baselines/rl/hrl/hrl_ppo.py @@ -0,0 +1,128 @@ +import torch +import torch.nn.functional as F + +from habitat_baselines.common.baseline_registry import baseline_registry +from habitat_baselines.rl.ddppo.algo.ddppo import DecentralizedDistributedMixin +from habitat_baselines.rl.ppo import PPO +from habitat_baselines.utils.common import ( + LagrangeInequalityCoefficient, + inference_mode, +) + + +@baseline_registry.register_updater +class HrlPPO(PPO): + def _update_from_batch(self, batch, epoch, rollouts, learner_metrics): + n_samples = max(batch["loss_mask"].sum(), 1) + + def record_min_mean_max(t: torch.Tensor, prefix: str): + for name, op in ( + ("min", torch.min), + ("mean", torch.mean), + ("max", torch.max), + ): + learner_metrics[f"{prefix}_{name}"].append(op(t)) + + def reduce_loss(loss): + return (loss * batch["loss_mask"]).sum() / n_samples + + self._set_grads_to_none() + + ( + values, + action_log_probs, + dist_entropy, + _, + aux_loss_res, + ) = self._evaluate_actions( + batch["observations"], + batch["recurrent_hidden_states"], + batch["prev_actions"], + batch["masks"], + batch["actions"], + batch["rnn_build_seq_info"], + ) + + ratio = torch.exp(action_log_probs - batch["action_log_probs"]) + + surr1 = batch["advantages"] * ratio + surr2 = batch["advantages"] * ( + torch.clamp( + ratio, + 1.0 - self.clip_param, + 1.0 + self.clip_param, + ) + ) + action_loss = -torch.min(surr1, surr2) + action_loss = reduce_loss(action_loss) + + values = values.float() + orig_values = values + + if self.use_clipped_value_loss: + delta = values.detach() - batch["value_preds"] + value_pred_clipped = batch["value_preds"] + delta.clamp( + -self.clip_param, self.clip_param + ) + + values = torch.where( + delta.abs() < self.clip_param, + values, + value_pred_clipped, + ) + + value_loss = 0.5 * F.mse_loss( + values, batch["returns"], reduction="none" + ) + value_loss = reduce_loss(value_loss) + + all_losses = [ + self.value_loss_coef * value_loss, + action_loss, + ] + + dist_entropy = reduce_loss(dist_entropy) + if isinstance(self.entropy_coef, float): + all_losses.append(-self.entropy_coef * dist_entropy) + else: + all_losses.append(self.entropy_coef.lagrangian_loss(dist_entropy)) + + all_losses.extend(v["loss"] for v in aux_loss_res.values()) + + total_loss = torch.stack(all_losses).sum() + + total_loss = self.before_backward(total_loss) + total_loss.backward() + self.after_backward(total_loss) + + grad_norm = self.before_step() + self.optimizer.step() + self.after_step() + + with inference_mode(): + record_min_mean_max(orig_values, "value_pred") + record_min_mean_max(ratio, "prob_ratio") + + learner_metrics["value_loss"].append(value_loss) + learner_metrics["action_loss"].append(action_loss) + learner_metrics["dist_entropy"].append(dist_entropy) + if epoch == (self.ppo_epoch - 1): + learner_metrics["ppo_fraction_clipped"].append( + (ratio > (1.0 + self.clip_param)).float().mean() + + (ratio < (1.0 - self.clip_param)).float().mean() + ) + + learner_metrics["grad_norm"].append(grad_norm) + if isinstance(self.entropy_coef, LagrangeInequalityCoefficient): + learner_metrics["entropy_coef"].append( + self.entropy_coef().detach() + ) + + for name, res in aux_loss_res.items(): + for k, v in res.items(): + learner_metrics[f"aux_{name}_{k}"].append(v.detach()) + + +@baseline_registry.register_updater +class HrlDDPPO(DecentralizedDistributedMixin, HrlPPO): + pass diff --git a/habitat-baselines/habitat_baselines/rl/hrl/hrl_rollout_storage.py b/habitat-baselines/habitat_baselines/rl/hrl/hrl_rollout_storage.py new file mode 100644 index 0000000000..b7750c5493 --- /dev/null +++ b/habitat-baselines/habitat_baselines/rl/hrl/hrl_rollout_storage.py @@ -0,0 +1,159 @@ +#!/usr/bin/env python3 + +# Copyright (c) Facebook, Inc. and its affiliates. +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from typing import Iterator + +import torch + +from habitat_baselines.common.baseline_registry import baseline_registry +from habitat_baselines.common.rollout_storage import RolloutStorage +from habitat_baselines.common.tensor_dict import TensorDict + +EPS_PPO = 1e-5 + + +@baseline_registry.register_policy +class HrlRolloutStorage(RolloutStorage): + def __init__(self, numsteps, num_envs, *args, **kwargs): + super().__init__(numsteps, num_envs, *args, **kwargs) + self._num_envs = num_envs + self._numsteps = numsteps + self._cur_step_idxs = torch.zeros(self._num_envs, dtype=torch.long) + self._last_should_inserts = None + if self.is_double_buffered: + raise ValueError( + "HRL storage does not support double buffered sampling" + ) + + def insert( + self, + next_observations=None, + next_recurrent_hidden_states=None, + actions=None, + action_log_probs=None, + value_preds=None, + rewards=None, + next_masks=None, + buffer_index: int = 0, + should_inserts=None, + ): + next_step = dict( + observations=next_observations, + recurrent_hidden_states=next_recurrent_hidden_states, + prev_actions=actions, + masks=next_masks, + ) + + current_step = dict( + actions=actions, + action_log_probs=action_log_probs, + value_preds=value_preds, + ) + + next_step = {k: v for k, v in next_step.items() if v is not None} + current_step = {k: v for k, v in current_step.items() if v is not None} + + if should_inserts is None: + should_inserts = self._last_should_inserts + assert should_inserts is not None + + if should_inserts.sum() == 0: + return + + env_idxs = torch.arange(self._num_envs) + if rewards is not None: + # Accumulate rewards between updates. + reward_write_idxs = torch.maximum( + self._cur_step_idxs - 1, torch.zeros_like(self._cur_step_idxs) + ) + self.buffers["rewards"][reward_write_idxs, env_idxs] += rewards.to( + self.device + ) + + if len(next_step) > 0: + self.buffers.set( + ( + self._cur_step_idxs[should_inserts] + 1, + env_idxs[should_inserts], + ), + next_step, + strict=False, + ) + + if len(current_step) > 0: + self.buffers.set( + ( + self._cur_step_idxs[should_inserts], + env_idxs[should_inserts], + ), + current_step, + strict=False, + ) + self._last_should_inserts = should_inserts + + def advance_rollout(self, buffer_index: int = 0): + self._cur_step_idxs += self._last_should_inserts.long() + + is_past_buffer = self._cur_step_idxs >= self._numsteps + if is_past_buffer.sum() > 0: + self._cur_step_idxs[is_past_buffer] = self._numsteps - 1 + env_idxs = torch.arange(self._num_envs) + self.buffers["rewards"][ + self._cur_step_idxs[is_past_buffer], env_idxs[is_past_buffer] + ] = 0.0 + + def after_update(self): + env_idxs = torch.arange(self._num_envs) + self.buffers[0] = self.buffers[self._cur_step_idxs, env_idxs] + self.buffers["masks"][1:] = False + self.buffers["rewards"][1:] = 0.0 + + self.current_rollout_step_idxs = [ + 0 for _ in self.current_rollout_step_idxs + ] + self._cur_step_idxs[:] = 0 + + def compute_returns(self, next_value, use_gae, gamma, tau): + if not use_gae: + raise ValueError("Only GAE is supported with HRL trainer") + + assert isinstance(self.buffers["value_preds"], torch.Tensor) + gae = 0.0 + for step in reversed(range(self._cur_step_idxs.max() - 1)): + delta = ( + self.buffers["rewards"][step] + + gamma + * self.buffers["value_preds"][step + 1] + * self.buffers["masks"][step + 1] + - self.buffers["value_preds"][step] + ) + gae = delta + gamma * tau * gae * self.buffers["masks"][step + 1] + self.buffers["returns"][step] = ( # type: ignore + gae + self.buffers["value_preds"][step] # type: ignore + ) + + def recurrent_generator( + self, advantages, num_batches + ) -> Iterator[TensorDict]: + num_environments = advantages.size(1) + for inds in torch.randperm(num_environments).chunk(num_batches): + batch = self.buffers[0 : self._numsteps, inds] + batch["advantages"] = advantages[: self._numsteps, inds] + batch["recurrent_hidden_states"] = batch[ + "recurrent_hidden_states" + ][0:1] + batch["loss_mask"] = ( + torch.arange(self._numsteps, device=advantages.device) + .view(-1, 1, 1) + .repeat(1, len(inds), 1) + ) + for i, env_i in enumerate(inds): + # The -1 is to throw out the last transition. + batch["loss_mask"][:, i] = ( + batch["loss_mask"][:, i] < self._cur_step_idxs[env_i] - 1 + ) + + yield batch.map(lambda v: v.flatten(0, 1)) diff --git a/habitat-baselines/habitat_baselines/rl/hrl/skills/oracle_nav.py b/habitat-baselines/habitat_baselines/rl/hrl/skills/oracle_nav.py index e58a68daad..374e17494c 100644 --- a/habitat-baselines/habitat_baselines/rl/hrl/skills/oracle_nav.py +++ b/habitat-baselines/habitat_baselines/rl/hrl/skills/oracle_nav.py @@ -124,7 +124,7 @@ def _is_skill_done( prev_pos = self._prev_pos[batch_i] if prev_pos is not None: movement = torch.linalg.norm(prev_pos - cur_pos[i]) - ret[i] = movement < self._config.STOP_THRESH + ret[i] = movement < self._config.stop_thresh self._prev_pos[batch_i] = cur_pos[i] return ret diff --git a/habitat-baselines/habitat_baselines/rl/hrl/skills/skill.py b/habitat-baselines/habitat_baselines/rl/hrl/skills/skill.py index b0fd2ddb42..4e95a29221 100644 --- a/habitat-baselines/habitat_baselines/rl/hrl/skills/skill.py +++ b/habitat-baselines/habitat_baselines/rl/hrl/skills/skill.py @@ -44,7 +44,9 @@ def __init__( ) else: self._pddl_ac_start = None - self._delay_term = [None for _ in range(self._batch_size)] + self._delay_term: List[Optional[bool]] = [ + None for _ in range(self._batch_size) + ] self._grip_ac_idx = 0 found_grip = False @@ -104,6 +106,10 @@ def _apply_postcond( action = self._pddl_problem.actions[skill_name] entities = [self._pddl_problem.get_entity(x) for x in skill_args] + if self._pddl_ac_start is None: + raise ValueError( + "Apply post cond not supported when pddl action not in action space" + ) ac_idx = self._pddl_ac_start found = False @@ -139,6 +145,7 @@ def should_terminate( rnn_hidden_states, prev_actions, masks, + actions, hl_says_term, batch_idx: List[int], skill_name: List[str], @@ -156,37 +163,43 @@ def should_terminate( observations, ) + cur_skill_step = self._cur_skill_step[batch_idx] bad_terminate = torch.zeros( - self._cur_skill_step.shape, - device=self._cur_skill_step.device, + cur_skill_step.shape, + dtype=torch.bool, + ) + + bad_terminate = torch.zeros( + cur_skill_step.shape, + device=cur_skill_step.device, dtype=torch.bool, ) if self._config.max_skill_steps > 0: - over_max_len = self._cur_skill_step > self._config.max_skill_steps + over_max_len = cur_skill_step > self._config.max_skill_steps if self._config.force_end_on_timeout: bad_terminate = over_max_len else: is_skill_done = is_skill_done | over_max_len - # for i, env_i in enumerate(batch_idx): - # if self._delay_term[env_i]: - # self._delay_term[env_i] = False - # is_skill_done[i] = 1.0 - # elif ( - # self._config.apply_postconds - # and is_skill_done[i] == 1.0 - # and hl_says_term[i] == 0.0 - # ): - # actions = self._apply_postcond( - # actions, log_info, skill_name[i], env_i, i - # ) - # self._delay_term[env_i] = True - # is_skill_done[i] = 0.0 + for i, env_i in enumerate(batch_idx): + if self._delay_term[env_i]: + self._delay_term[env_i] = False + is_skill_done[i] = 1.0 + elif ( + self._config.apply_postconds + and is_skill_done[i] == 1.0 + and hl_says_term[i] == 0.0 + ): + actions = self._apply_postcond( + actions, log_info, skill_name[i], env_i, i + ) + self._delay_term[env_i] = True + is_skill_done[i] = 0.0 is_skill_done |= hl_says_term if bad_terminate.sum() > 0: self._internal_log( - f"Bad terminating due to timeout {self._cur_skill_step}, {bad_terminate}", + f"Bad terminating due to timeout {cur_skill_step}, {bad_terminate}", observations, ) diff --git a/habitat-baselines/habitat_baselines/rl/ppo/policy.py b/habitat-baselines/habitat_baselines/rl/ppo/policy.py index bfb8ac6f4a..22b7d95205 100644 --- a/habitat-baselines/habitat_baselines/rl/ppo/policy.py +++ b/habitat-baselines/habitat_baselines/rl/ppo/policy.py @@ -39,6 +39,7 @@ class PolicyAction: action_log_probs: Optional[torch.Tensor] = None take_actions: Optional[torch.Tensor] = None policy_info: Optional[List[Dict[str, Any]]] = None + should_inserts: Optional[torch.Tensor] = None def write_action(self, write_idx, write_action): self.actions[:, write_idx] = write_action diff --git a/habitat-baselines/habitat_baselines/rl/ppo/ppo.py b/habitat-baselines/habitat_baselines/rl/ppo/ppo.py index 20ccfe83aa..7c8e816898 100644 --- a/habitat-baselines/habitat_baselines/rl/ppo/ppo.py +++ b/habitat-baselines/habitat_baselines/rl/ppo/ppo.py @@ -6,7 +6,7 @@ import collections import inspect -from typing import Dict, Optional, Union +from typing import Any, Dict, List, Optional, Union import torch import torch.nn as nn @@ -15,6 +15,7 @@ from torch import Tensor from habitat.utils import profiling_wrapper +from habitat_baselines.common.baseline_registry import baseline_registry from habitat_baselines.common.rollout_storage import RolloutStorage from habitat_baselines.rl.ppo.policy import NetPolicy from habitat_baselines.rl.ver.ver_rollout_storage import VERRolloutStorage @@ -26,6 +27,7 @@ EPS_PPO = 1e-5 +@baseline_registry.register_updater class PPO(nn.Module): entropy_coef: Union[float, LagrangeInequalityCoefficient] @@ -155,14 +157,10 @@ def _set_grads_to_none(self): for p in pg["params"]: p.grad = None - def update( - self, - rollouts: RolloutStorage, - ) -> Dict[str, float]: - - advantages = self.get_advantages(rollouts) - - learner_metrics = collections.defaultdict(list) + def _update_from_batch(self, batch, epoch, rollouts, learner_metrics): + """ + Performs a gradient update from the minibatch. + """ def record_min_mean_max(t: torch.Tensor, prefix: str): for name, op in ( @@ -172,145 +170,148 @@ def record_min_mean_max(t: torch.Tensor, prefix: str): ): learner_metrics[f"{prefix}_{name}"].append(op(t)) - for epoch in range(self.ppo_epoch): - profiling_wrapper.range_push("PPO.update epoch") - data_generator = rollouts.recurrent_generator( - advantages, self.num_mini_batch + self._set_grads_to_none() + + ( + values, + action_log_probs, + dist_entropy, + _, + aux_loss_res, + ) = self._evaluate_actions( + batch["observations"], + batch["recurrent_hidden_states"], + batch["prev_actions"], + batch["masks"], + batch["actions"], + batch["rnn_build_seq_info"], + ) + + ratio = torch.exp(action_log_probs - batch["action_log_probs"]) + + surr1 = batch["advantages"] * ratio + surr2 = batch["advantages"] * ( + torch.clamp( + ratio, + 1.0 - self.clip_param, + 1.0 + self.clip_param, ) + ) + action_loss = -torch.min(surr1, surr2) - for _bid, batch in enumerate(data_generator): - self._set_grads_to_none() - - ( - values, - action_log_probs, - dist_entropy, - _, - aux_loss_res, - ) = self._evaluate_actions( - batch["observations"], - batch["recurrent_hidden_states"], - batch["prev_actions"], - batch["masks"], - batch["actions"], - batch["rnn_build_seq_info"], - ) + values = values.float() + orig_values = values - ratio = torch.exp(action_log_probs - batch["action_log_probs"]) + if self.use_clipped_value_loss: + delta = values.detach() - batch["value_preds"] + value_pred_clipped = batch["value_preds"] + delta.clamp( + -self.clip_param, self.clip_param + ) - surr1 = batch["advantages"] * ratio - surr2 = batch["advantages"] * ( - torch.clamp( - ratio, - 1.0 - self.clip_param, - 1.0 + self.clip_param, - ) - ) - action_loss = -torch.min(surr1, surr2) + values = torch.where( + delta.abs() < self.clip_param, + values, + value_pred_clipped, + ) - values = values.float() - orig_values = values + value_loss = 0.5 * F.mse_loss( + values, batch["returns"], reduction="none" + ) - if self.use_clipped_value_loss: - delta = values.detach() - batch["value_preds"] - value_pred_clipped = batch["value_preds"] + delta.clamp( - -self.clip_param, self.clip_param - ) + if "is_coeffs" in batch: + assert isinstance(batch["is_coeffs"], torch.Tensor) + ver_is_coeffs = batch["is_coeffs"].clamp(max=1.0) + mean_fn = lambda t: torch.mean(ver_is_coeffs * t) + else: + mean_fn = torch.mean - values = torch.where( - delta.abs() < self.clip_param, - values, - value_pred_clipped, - ) + action_loss, value_loss, dist_entropy = map( + mean_fn, + (action_loss, value_loss, dist_entropy), + ) - value_loss = 0.5 * F.mse_loss( - values, batch["returns"], reduction="none" - ) + all_losses = [ + self.value_loss_coef * value_loss, + action_loss, + ] - if "is_coeffs" in batch: - assert isinstance(batch["is_coeffs"], torch.Tensor) - ver_is_coeffs = batch["is_coeffs"].clamp(max=1.0) - mean_fn = lambda t: torch.mean(ver_is_coeffs * t) - else: - mean_fn = torch.mean + if isinstance(self.entropy_coef, float): + all_losses.append(-self.entropy_coef * dist_entropy) + else: + all_losses.append(self.entropy_coef.lagrangian_loss(dist_entropy)) - action_loss, value_loss, dist_entropy = map( - mean_fn, - (action_loss, value_loss, dist_entropy), - ) + all_losses.extend(v["loss"] for v in aux_loss_res.values()) - all_losses = [ - self.value_loss_coef * value_loss, - action_loss, - ] + total_loss = torch.stack(all_losses).sum() - if isinstance(self.entropy_coef, float): - all_losses.append(-self.entropy_coef * dist_entropy) - else: - all_losses.append( - self.entropy_coef.lagrangian_loss(dist_entropy) - ) + total_loss = self.before_backward(total_loss) + total_loss.backward() + self.after_backward(total_loss) - all_losses.extend(v["loss"] for v in aux_loss_res.values()) + grad_norm = self.before_step() + self.optimizer.step() + self.after_step() - total_loss = torch.stack(all_losses).sum() + with inference_mode(): + if "is_coeffs" in batch: + record_min_mean_max(batch["is_coeffs"], "ver_is_coeffs") + record_min_mean_max(orig_values, "value_pred") + record_min_mean_max(ratio, "prob_ratio") + + learner_metrics["value_loss"].append(value_loss) + learner_metrics["action_loss"].append(action_loss) + learner_metrics["dist_entropy"].append(dist_entropy) + if epoch == (self.ppo_epoch - 1): + learner_metrics["ppo_fraction_clipped"].append( + (ratio > (1.0 + self.clip_param)).float().mean() + + (ratio < (1.0 - self.clip_param)).float().mean() + ) - total_loss = self.before_backward(total_loss) - total_loss.backward() - self.after_backward(total_loss) + learner_metrics["grad_norm"].append(grad_norm) + if isinstance(self.entropy_coef, LagrangeInequalityCoefficient): + learner_metrics["entropy_coef"].append( + self.entropy_coef().detach() + ) - grad_norm = self.before_step() - self.optimizer.step() - self.after_step() + for name, res in aux_loss_res.items(): + for k, v in res.items(): + learner_metrics[f"aux_{name}_{k}"].append(v.detach()) - with inference_mode(): - if "is_coeffs" in batch: - record_min_mean_max( - batch["is_coeffs"], "ver_is_coeffs" - ) - record_min_mean_max(orig_values, "value_pred") - record_min_mean_max(ratio, "prob_ratio") - - learner_metrics["value_loss"].append(value_loss) - learner_metrics["action_loss"].append(action_loss) - learner_metrics["dist_entropy"].append(dist_entropy) - if epoch == (self.ppo_epoch - 1): - learner_metrics["ppo_fraction_clipped"].append( - (ratio > (1.0 + self.clip_param)).float().mean() - + (ratio < (1.0 - self.clip_param)).float().mean() - ) + if "is_stale" in batch: + assert isinstance(batch["is_stale"], torch.Tensor) + learner_metrics["fraction_stale"].append( + batch["is_stale"].float().mean() + ) - learner_metrics["grad_norm"].append(grad_norm) - if isinstance( - self.entropy_coef, LagrangeInequalityCoefficient - ): - learner_metrics["entropy_coef"].append( - self.entropy_coef().detach() - ) + if isinstance(rollouts, VERRolloutStorage): + assert isinstance(batch["policy_version"], torch.Tensor) + record_min_mean_max( + ( + rollouts.current_policy_version + - batch["policy_version"] + ).float(), + "policy_version_difference", + ) - for name, res in aux_loss_res.items(): - for k, v in res.items(): - learner_metrics[f"aux_{name}_{k}"].append( - v.detach() - ) + def update( + self, + rollouts: RolloutStorage, + ) -> Dict[str, float]: - if "is_stale" in batch: - assert isinstance(batch["is_stale"], torch.Tensor) - learner_metrics["fraction_stale"].append( - batch["is_stale"].float().mean() - ) + advantages = self.get_advantages(rollouts) - if isinstance(rollouts, VERRolloutStorage): - assert isinstance( - batch["policy_version"], torch.Tensor - ) - record_min_mean_max( - ( - rollouts.current_policy_version - - batch["policy_version"] - ).float(), - "policy_version_difference", - ) + learner_metrics: Dict[str, List[Any]] = collections.defaultdict(list) + + for epoch in range(self.ppo_epoch): + profiling_wrapper.range_push("PPO.update epoch") + data_generator = rollouts.recurrent_generator( + advantages, self.num_mini_batch + ) + + for _bid, batch in enumerate(data_generator): + self._update_from_batch( + batch, epoch, rollouts, learner_metrics + ) profiling_wrapper.range_pop() # PPO.update epoch diff --git a/habitat-baselines/habitat_baselines/rl/ppo/ppo_trainer.py b/habitat-baselines/habitat_baselines/rl/ppo/ppo_trainer.py index 42bb765815..a5a87c1b6b 100644 --- a/habitat-baselines/habitat_baselines/rl/ppo/ppo_trainer.py +++ b/habitat-baselines/habitat_baselines/rl/ppo/ppo_trainer.py @@ -36,12 +36,14 @@ apply_obs_transforms_obs_space, get_active_obs_transforms, ) -from habitat_baselines.common.rollout_storage import RolloutStorage +from habitat_baselines.common.rollout_storage import ( # noqa: F401. + RolloutStorage, +) from habitat_baselines.common.tensorboard_utils import ( TensorboardWriter, get_writer, ) -from habitat_baselines.rl.ddppo.algo import DDPPO +from habitat_baselines.rl.ddppo.algo import DDPPO # noqa: F401. from habitat_baselines.rl.ddppo.ddp_utils import ( EXIT, get_distrib_size, @@ -59,7 +61,7 @@ from habitat_baselines.rl.hrl.hierarchical_policy import ( # noqa: F401. HierarchicalPolicy, ) -from habitat_baselines.rl.ppo import PPO +from habitat_baselines.rl.ppo import PPO # noqa: F401. from habitat_baselines.rl.ppo.policy import NetPolicy from habitat_baselines.utils.common import ( batch_obs, @@ -189,9 +191,14 @@ def _setup_actor_critic_agent(self, ppo_cfg: "DictConfig") -> None: nn.init.orthogonal_(self.actor_critic.critic.fc.weight) nn.init.constant_(self.actor_critic.critic.fc.bias, 0) - self.agent = (DDPPO if self._is_distributed else PPO).from_config( - self.actor_critic, ppo_cfg - ) + if self._is_distributed: + agent_cls = baseline_registry.get_updater( + self.config.distrib_updater_name + ) + else: + agent_cls = baseline_registry.get_updater(self.config.updater_name) + + self.agent = agent_cls.from_config(self.actor_critic, ppo_cfg) def _init_envs(self, config=None, is_eval: bool = False): if config is None: @@ -329,13 +336,16 @@ def _init_train(self, resume_state=None): self._nbuffers = 2 if ppo_cfg.use_double_buffered_sampler else 1 - self.rollouts = RolloutStorage( + rollouts_cls = baseline_registry.get_storage( + self.config.rollout_storage + ) + self.rollouts = rollouts_cls( ppo_cfg.num_steps, self.envs.num_envs, obs_space, self.policy_action_space, ppo_cfg.hidden_size, - num_recurrent_layers=self.actor_critic.net.num_recurrent_layers, + num_recurrent_layers=self.actor_critic.num_recurrent_layers, is_double_buffered=ppo_cfg.use_double_buffered_sampler, action_shape=action_shape, discrete_actions=discrete_actions, @@ -505,6 +515,7 @@ def _compute_actions_and_step_envs(self, buffer_index: int = 0): action_log_probs=action_data.action_log_probs, value_preds=action_data.values, buffer_index=buffer_index, + should_inserts=action_data.should_inserts, ) def _collect_environment_result(self, buffer_index: int = 0): diff --git a/habitat-lab/habitat/config/default_structured_configs.py b/habitat-lab/habitat/config/default_structured_configs.py index ccbb1aa984..f54e12a7e1 100644 --- a/habitat-lab/habitat/config/default_structured_configs.py +++ b/habitat-lab/habitat/config/default_structured_configs.py @@ -129,6 +129,11 @@ class RearrangeStopActionConfig(ActionConfig): type: str = "RearrangeStopAction" +@dataclass +class PddlApplyActionConfig(ActionConfig): + type: str = "PddlApplyAction" + + @dataclass class OracleNavActionConfig(ActionConfig): """ @@ -143,6 +148,9 @@ class OracleNavActionConfig(ActionConfig): forward_velocity: float = 1.0 turn_thresh: float = 0.1 dist_thresh: float = 0.2 + stop_thresh: float = 0.001 + spawn_max_dist_to_obj: float = 2.0 + num_spawn_attempts: int = 200 lin_speed: float = 10.0 ang_speed: float = 10.0 allow_dyn_slide: bool = True @@ -723,7 +731,7 @@ class TaskConfig(HabitatBaseConfig): # Spawn parameters physics_stability_steps: int = 1 num_spawn_attempts: int = 200 - spawn_max_dists_to_obj: float = 2.0 + spawn_max_dist_to_obj: float = 2.0 base_angle_noise: float = 0.523599 # EE sample parameters ee_sample_factor: float = 0.2 @@ -1186,6 +1194,12 @@ class HabitatConfig(HabitatBaseConfig): name="oracle_nav_action", node=OracleNavActionConfig, ) +cs.store( + package="habitat.task.actions.pddl_apply_action", + group="habitat/task/actions", + name="pddl_apply_action", + node=PddlApplyActionConfig, +) # Dataset Config Schema cs.store( @@ -1313,6 +1327,12 @@ class HabitatConfig(HabitatBaseConfig): name="instance_imagegoal_hfov_sensor", node=InstanceImageGoalHFOVSensorConfig, ) +cs.store( + package="habitat.task.lab_sensors.localization_sensor", + group="habitat/task/lab_sensors", + name="localization_sensor", + node=LocalizationSensorConfig, +) cs.store( package="habitat.task.lab_sensors.target_start_sensor", group="habitat/task/lab_sensors", diff --git a/habitat-lab/habitat/config/habitat/task/rearrange/rearrange.yaml b/habitat-lab/habitat/config/habitat/task/rearrange/rearrange.yaml index 13dcf40da7..9f3b3f3608 100644 --- a/habitat-lab/habitat/config/habitat/task/rearrange/rearrange.yaml +++ b/habitat-lab/habitat/config/habitat/task/rearrange/rearrange.yaml @@ -27,6 +27,7 @@ defaults: - relative_resting_pos_sensor - target_start_gps_compass_sensor - target_goal_gps_compass_sensor + - localization_sensor - _self_ type: RearrangeCompositeTask-v0 diff --git a/habitat-lab/habitat/config/habitat/task/rearrange/rearrange_easy.yaml b/habitat-lab/habitat/config/habitat/task/rearrange/rearrange_easy.yaml index 793fbc1847..c138ca2238 100644 --- a/habitat-lab/habitat/config/habitat/task/rearrange/rearrange_easy.yaml +++ b/habitat-lab/habitat/config/habitat/task/rearrange/rearrange_easy.yaml @@ -28,6 +28,7 @@ defaults: - end_effector_sensor - target_start_gps_compass_sensor - target_goal_gps_compass_sensor + - localization_sensor - _self_ type: RearrangeCompositeTask-v0 diff --git a/habitat-lab/habitat/tasks/rearrange/actions/pddl_actions.py b/habitat-lab/habitat/tasks/rearrange/actions/pddl_actions.py index 279bc079f7..df54c59593 100644 --- a/habitat-lab/habitat/tasks/rearrange/actions/pddl_actions.py +++ b/habitat-lab/habitat/tasks/rearrange/actions/pddl_actions.py @@ -8,7 +8,6 @@ from habitat.core.registry import registry from habitat.sims.habitat_simulator.actions import HabitatSimActions from habitat.tasks.rearrange.actions.grip_actions import RobotAction -from habitat.tasks.rearrange.utils import rearrange_logger @registry.register_task_action @@ -38,7 +37,10 @@ def action_space(self): { self._action_arg_prefix + "pddl_action": spaces.Box( - shape=(action_n_args,), low=-1, high=1, dtype=np.float32 + shape=(action_n_args,), + low=np.finfo(np.float32).min, + high=np.finfo(np.float32).max, + dtype=np.float32, ) } ) @@ -49,6 +51,7 @@ def was_prev_action_invalid(self): def reset(self, *args, **kwargs): self._was_prev_action_invalid = False + self._prev_action = None def get_pddl_action_start(self, action_id: int) -> int: start_idx = 0 @@ -56,10 +59,8 @@ def get_pddl_action_start(self, action_id: int) -> int: start_idx += action.n_args return start_idx - def step(self, *args, is_last_action, **kwargs): - apply_pddl_action = kwargs[self._action_arg_prefix + "pddl_action"] + def _apply_action(self, apply_pddl_action): cur_i = 0 - self._was_prev_action_invalid = False for action in self._action_ordering: action_part = apply_pddl_action[cur_i : cur_i + action.n_args][:] if sum(action_part) > 0: @@ -71,26 +72,31 @@ def step(self, *args, is_last_action, **kwargs): raise ValueError( f"Got invalid action value < 0 in {action_part} with action {action}" ) - rearrange_logger.debug(f"Got action part {real_action_idxs}") param_values = [ self._entities_list[i] for i in real_action_idxs ] - apply_action = action.copy() + apply_action = action.clone() apply_action.set_param_values(param_values) + self._prev_action = apply_action if self._task.pddl_problem.is_expr_true(apply_action.precond): - rearrange_logger.debug( - f"Applying action {action} with obj args {param_values}" - ) self._task.pddl_problem.apply_action(apply_action) else: - rearrange_logger.debug( - f"Preconds not satisfied for: action {action} with obj args {param_values}" - ) self._was_prev_action_invalid = True cur_i += action.n_args + + def step(self, *args, is_last_action, **kwargs): + self._prev_action = None + apply_pddl_action = kwargs[self._action_arg_prefix + "pddl_action"] + self._was_prev_action_invalid = False + inputs_outside = any( + a < 0 or a > len(self._entities_list) for a in apply_pddl_action + ) + if not inputs_outside: + self._apply_action(apply_pddl_action) + if is_last_action: return self._sim.step(HabitatSimActions.arm_action) else: diff --git a/habitat-lab/habitat/tasks/rearrange/sub_tasks/pick_task.py b/habitat-lab/habitat/tasks/rearrange/sub_tasks/pick_task.py index 23eebb33a3..f1f396d169 100644 --- a/habitat-lab/habitat/tasks/rearrange/sub_tasks/pick_task.py +++ b/habitat-lab/habitat/tasks/rearrange/sub_tasks/pick_task.py @@ -58,7 +58,7 @@ def _gen_start_pos(self, sim, episode, sel_idx): start_pos, angle_to_obj, was_succ = get_robot_spawns( snap_pos, self._config.base_angle_noise, - self._config.spawn_max_dists_to_obj, + self._config.spawn_max_dist_to_obj, sim, self._config.num_spawn_attempts, self._config.physics_stability_steps, diff --git a/test/test_baseline_training.py b/test/test_baseline_training.py index eacd10759d..43a72477f7 100644 --- a/test/test_baseline_training.py +++ b/test/test_baseline_training.py @@ -154,19 +154,24 @@ def test_trainers(config_path, num_updates, overrides, trainer_name): [ ( "habitat-baselines/habitat_baselines/config/rearrange/rl_hl_srl_onav.yaml", - "eval", - "ppo", - ), - ( - "habitat-baselines/habitat_baselines/config/rearrange/tp_srl_oracle_nav.yaml", - "eval", - "ppo", - ), - ( - "habitat-baselines/habitat_baselines/config/rearrange/tp_srl.yaml", - "eval", + "train", "ppo", ), + # ( + # "habitat-baselines/habitat_baselines/config/rearrange/rl_hl_srl_onav.yaml", + # "eval", + # "ppo", + # ), + # ( + # "habitat-baselines/habitat_baselines/config/rearrange/tp_srl_oracle_nav.yaml", + # "eval", + # "ppo", + # ), + # ( + # "habitat-baselines/habitat_baselines/config/rearrange/tp_srl.yaml", + # "eval", + # "ppo", + # ), ], ) def test_hrl(config_path, mode, trainer_name): @@ -180,6 +185,7 @@ def test_hrl(config_path, mode, trainer_name): [ "habitat_baselines.num_updates=1", "habitat_baselines.eval.split=minival", + "habitat.dataset.split=minival", "habitat_baselines.total_num_steps=-1.0", "habitat_baselines.test_episode_count=1", "habitat_baselines.checkpoint_folder=data/test_checkpoints/test_training", From daa84dbec3250dfc2be3622d4adfa3a40f1e99d4 Mon Sep 17 00:00:00 2001 From: ASzot Date: Tue, 27 Dec 2022 06:36:32 +0100 Subject: [PATCH 03/48] Fixed config setup --- .../habitat_baselines/rl/hrl/hrl_rollout_storage.py | 2 +- habitat-baselines/habitat_baselines/rl/ppo/ppo_trainer.py | 8 +++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/habitat-baselines/habitat_baselines/rl/hrl/hrl_rollout_storage.py b/habitat-baselines/habitat_baselines/rl/hrl/hrl_rollout_storage.py index b7750c5493..7d728afdfa 100644 --- a/habitat-baselines/habitat_baselines/rl/hrl/hrl_rollout_storage.py +++ b/habitat-baselines/habitat_baselines/rl/hrl/hrl_rollout_storage.py @@ -15,7 +15,7 @@ EPS_PPO = 1e-5 -@baseline_registry.register_policy +@baseline_registry.register_storage class HrlRolloutStorage(RolloutStorage): def __init__(self, numsteps, num_envs, *args, **kwargs): super().__init__(numsteps, num_envs, *args, **kwargs) diff --git a/habitat-baselines/habitat_baselines/rl/ppo/ppo_trainer.py b/habitat-baselines/habitat_baselines/rl/ppo/ppo_trainer.py index a5a87c1b6b..fdcd9d33ca 100644 --- a/habitat-baselines/habitat_baselines/rl/ppo/ppo_trainer.py +++ b/habitat-baselines/habitat_baselines/rl/ppo/ppo_trainer.py @@ -193,10 +193,12 @@ def _setup_actor_critic_agent(self, ppo_cfg: "DictConfig") -> None: if self._is_distributed: agent_cls = baseline_registry.get_updater( - self.config.distrib_updater_name + self.config.habitat_baselines.distrib_updater_name ) else: - agent_cls = baseline_registry.get_updater(self.config.updater_name) + agent_cls = baseline_registry.get_updater( + self.config.habitat_baselines.updater_name + ) self.agent = agent_cls.from_config(self.actor_critic, ppo_cfg) @@ -337,7 +339,7 @@ def _init_train(self, resume_state=None): self._nbuffers = 2 if ppo_cfg.use_double_buffered_sampler else 1 rollouts_cls = baseline_registry.get_storage( - self.config.rollout_storage + self.config.habitat_baselines.rollout_storage ) self.rollouts = rollouts_cls( ppo_cfg.num_steps, From 7ae2c6b1de4e622f2a0dc8259df5124d70af021f Mon Sep 17 00:00:00 2001 From: akshararai Date: Fri, 6 Jan 2023 18:40:31 -0500 Subject: [PATCH 04/48] Train hl modif (#1057) * separate config policy * udpate config * renamed hierarchical_srl_learned_nav.yaml to hierarchical_srl.yaml since that already implies a learned nav * renamed yaml files to follow convention of _tp_ for task planner, _srl_ for learned skills, _onav_ for oracle nav * order of parameters in config to avoid overwite * added documentation on how to run the HRL policy, removed auto_name, not needed in this config * remove empty file * fixed the yaml linked in the readme * updated the description of the config fike * Delete tp_noop_onav.yaml don't need this file anymore, added readme instructions to do this by changing hydra parameters. * setting the agent back to depth_head_agent as _vis slows down the training. Apologies * removed one config file, since it seemed like mostly a repeat of the tp config * cleaned up config files by removing reduntant parameters * renamed monolithic_pointnav to be monolithic * fixed the comment * added localization_sensor to all task lab_sensors Co-authored-by: Xavier Puig --- habitat-baselines/habitat_baselines/README.md | 25 +++++ .../rl/policy/hierarchical_srl_onav.yaml | 77 +++++++++++++ .../rl/policy/hierarchical_tp_noop_onav.yaml | 72 ++++++++++++ .../rl/policy/hierarchical_tp_srl.yaml | 71 ++++++++++++ .../rl/policy/monolithic.yaml | 4 + .../config/rearrange/rl_hl_srl_onav.yaml | 103 +----------------- .../config/rearrange/rl_rearrange.yaml | 6 +- .../config/rearrange/rl_rearrange_easy.yaml | 6 +- .../config/rearrange/rl_skill.yaml | 4 +- .../config/rearrange/tp_srl.yaml | 85 +-------------- .../config/rearrange/tp_srl_oracle_nav.yaml | 85 +-------------- .../task/rearrange/prepare_groceries.yaml | 1 + .../rearrange/rearrange_easy_multi_agent.yaml | 1 + .../habitat/task/rearrange/set_table.yaml | 1 + .../habitat/task/rearrange/tidy_house.yaml | 1 + 15 files changed, 263 insertions(+), 279 deletions(-) create mode 100644 habitat-baselines/habitat_baselines/config/habitat_baselines/rl/policy/hierarchical_srl_onav.yaml create mode 100644 habitat-baselines/habitat_baselines/config/habitat_baselines/rl/policy/hierarchical_tp_noop_onav.yaml create mode 100644 habitat-baselines/habitat_baselines/config/habitat_baselines/rl/policy/hierarchical_tp_srl.yaml create mode 100644 habitat-baselines/habitat_baselines/config/habitat_baselines/rl/policy/monolithic.yaml diff --git a/habitat-baselines/habitat_baselines/README.md b/habitat-baselines/habitat_baselines/README.md index 85e89065d6..e2e06f258d 100644 --- a/habitat-baselines/habitat_baselines/README.md +++ b/habitat-baselines/habitat_baselines/README.md @@ -53,6 +53,31 @@ To use them download pre-trained pytorch models from [link](https://dl.fbaipubli The `habitat_baselines/config/pointnav/ppo_pointnav.yaml` config has better hyperparameters for large scale training and loads the [Gibson PointGoal Navigation Dataset](/README.md#datasets) instead of the test scenes. Change the `/benchmark/nav/pointnav: pointnav_gibson` in `habitat_baselines/config/pointnav/ppo_pointnav.yaml` to `/benchmark/nav/pointnav: pointnav_mp3d` in the defaults list for training on [MatterPort3D PointGoal Navigation Dataset](/README.md#datasets). +### Hierarchical Reinforcement Learning (HRL) + +We provide a two-layer hierarchical policy class, consisting of a low-level skill that moves the robot, and a high-level policy that reasons about which low-level skill to use in the current state. This can be especially powerful in long-horizon mobile manipulation tasks, like those introduced in [Habitat2.0](https://arxiv.org/abs/2106.14405). Both the low- and high- level can be either learned or an oracle. For oracle high-level we use [PDDL] (https://planning.wiki/guide/whatis/pddl), and for oracle low-level we use instantaneous transitions, with the environment set to the final desired state. Additionally, for navigation, we provide an oracle navigation skill that uses A-star and the map of the environment to move the robot to its goal. + +To run the following examples, you need the [ReplicaCAD dataset](https://github.com/facebookresearch/habitat-sim/blob/main/DATASETS.md#replicacad). + +To train a high-level policy, while using pre-learned low-level skills (SRL baseline from [Habitat2.0](https://arxiv.org/abs/2106.14405)), you can run: + +```bash +python -u habitat_baselines/run.py \ + --exp-config habitat_baselines/config/rearrange/rl_hl_srl_onav.yaml \ + --run-type train +``` +To run a rearrangement episode with oracle in both low- and high-level, you can run: + +```bash +python -u habitat_baselines/run.py \ + --exp-config habitat_baselines/config/rearrange/rl_hl_srl_onav.yaml \ + --run-type eval \ + habitat_baselines/rl/policy=hierarchical_tp_noop_onav +``` + +To change the task (like set table) that you train your skills on, you can change the line `/habitat/task/rearrange: rearrange_easy` to `/habitat/task/rearrange: set_table` in the defaults of your config. + + ### Classic **SLAM based** diff --git a/habitat-baselines/habitat_baselines/config/habitat_baselines/rl/policy/hierarchical_srl_onav.yaml b/habitat-baselines/habitat_baselines/config/habitat_baselines/rl/policy/hierarchical_srl_onav.yaml new file mode 100644 index 0000000000..d92283dbc2 --- /dev/null +++ b/habitat-baselines/habitat_baselines/config/habitat_baselines/rl/policy/hierarchical_srl_onav.yaml @@ -0,0 +1,77 @@ +name: "HierarchicalPolicy" +obs_transforms: + add_virtual_keys: + virtual_keys: + "goal_to_agent_gps_compass": 2 +hierarchical_policy: + high_level_policy: + name: "NeuralHighLevelPolicy" + allow_other_place: False + hidden_dim: 512 + use_rnn: True + rnn_type: 'LSTM' + backbone: resnet18 + normalize_visual_inputs: False + num_rnn_layers: 2 + policy_input_keys: + - "robot_head_depth" + defined_skills: + open_cab: + skill_name: "ArtObjSkillPolicy" + name: "PointNavResNetPolicy" + load_ckpt_file: "data/models/open_cab.pth" + + open_fridge: + skill_name: "ArtObjSkillPolicy" + load_ckpt_file: "data/models/open_fridge.pth" + + close_cab: + skill_name: "ArtObjSkillPolicy" + obs_skill_inputs: ["obj_start_sensor"] + load_ckpt_file: "data/models/close_cab.pth" + + close_fridge: + skill_name: "ArtObjSkillPolicy" + obs_skill_inputs: ["obj_start_sensor"] + load_ckpt_file: "data/models/close_fridge.pth" + + pick: + skill_name: "PickSkillPolicy" + name: "PointNavResNetPolicy" + obs_skill_inputs: ["obj_start_sensor"] + load_ckpt_file: "data/models/pick.pth" + + place: + skill_name: "PlaceSkillPolicy" + name: "PointNavResNetPolicy" + obs_skill_inputs: ["obj_goal_sensor"] + load_ckpt_file: "data/models/place.pth" + + wait_skill: + skill_name: "WaitSkillPolicy" + max_skill_steps: -1 + force_end_on_timeout: False + + oracle_nav: + skill_name: "OracleNavPolicy" + obs_skill_inputs: ["obj_start_sensor", "abs_obj_start_sensor", "obj_goal_sensor", "abs_obj_goal_sensor"] + max_skill_steps: 300 + + reset_arm_skill: + skill_name: "ResetArmSkill" + max_skill_steps: 50 + reset_joint_state: [-4.50e-01, -1.08e00, 9.95e-02, 9.38e-01, -7.88e-04, 1.57e00, 4.62e-03] + force_end_on_timeout: False + + use_skills: + # Uncomment if you are also using these skills + # open_cab: "open_cab" + # open_fridge: "open_fridge" + # close_cab: "open_cab" + # close_fridge: "open_fridge" + pick: "pick" + place: "place" + nav: "oracle_nav" + nav_to_receptacle: "oracle_nav" + wait: "wait_skill" + reset_arm: "reset_arm_skill" \ No newline at end of file diff --git a/habitat-baselines/habitat_baselines/config/habitat_baselines/rl/policy/hierarchical_tp_noop_onav.yaml b/habitat-baselines/habitat_baselines/config/habitat_baselines/rl/policy/hierarchical_tp_noop_onav.yaml new file mode 100644 index 0000000000..d090a528c7 --- /dev/null +++ b/habitat-baselines/habitat_baselines/config/habitat_baselines/rl/policy/hierarchical_tp_noop_onav.yaml @@ -0,0 +1,72 @@ + +name: "HierarchicalPolicy" +obs_transforms: + add_virtual_keys: + virtual_keys: + "goal_to_agent_gps_compass": 2 +hierarchical_policy: + high_level_policy: + name: "FixedHighLevelPolicy" + add_arm_rest: True + + defined_skills: + open_cab: + skill_name: "NoopSkillPolicy" + max_skill_steps: 1 + apply_postconds: True + + open_fridge: + skill_name: "NoopSkillPolicy" + max_skill_steps: 1 + apply_postconds: True + + close_cab: + skill_name: "NoopSkillPolicy" + obs_skill_inputs: ["obj_start_sensor"] + max_skill_steps: 1 + + close_fridge: + skill_name: "NoopSkillPolicy" + obs_skill_inputs: ["obj_start_sensor"] + max_skill_steps: 1 + apply_postconds: True + + pick: + skill_name: "NoopSkillPolicy" + obs_skill_inputs: ["obj_start_sensor"] + max_skill_steps: 1 + apply_postconds: True + + place: + skill_name: "NoopSkillPolicy" + obs_skill_inputs: ["obj_goal_sensor"] + max_skill_steps: 1 + apply_postconds: True + + nav_to_obj: + skill_name: "OracleNavPolicy" + obs_skill_inputs: ["obj_start_sensor", "abs_obj_start_sensor", "obj_goal_sensor", "abs_obj_goal_sensor"] + max_skill_steps: 300 + + wait_skill: + skill_name: "WaitSkillPolicy" + max_skill_steps: -1 + force_end_on_timeout: False + + reset_arm_skill: + skill_name: "ResetArmSkill" + max_skill_steps: 50 + reset_joint_state: [-4.50e-01, -1.07e00, 9.95e-02, 9.38e-01, -7.88e-04, 1.57e00, 4.62e-03] + force_end_on_timeout: False + + use_skills: + open_cab: "open_cab" + open_fridge: "open_fridge" + close_cab: "close_cab" + close_fridge: "close_fridge" + pick: "pick" + place: "place" + nav: "nav_to_obj" + nav_to_receptacle: "nav_to_obj" + wait: "wait_skill" + reset_arm: "reset_arm_skill" diff --git a/habitat-baselines/habitat_baselines/config/habitat_baselines/rl/policy/hierarchical_tp_srl.yaml b/habitat-baselines/habitat_baselines/config/habitat_baselines/rl/policy/hierarchical_tp_srl.yaml new file mode 100644 index 0000000000..3ac608e23b --- /dev/null +++ b/habitat-baselines/habitat_baselines/config/habitat_baselines/rl/policy/hierarchical_tp_srl.yaml @@ -0,0 +1,71 @@ +name: "HierarchicalPolicy" +obs_transforms: + add_virtual_keys: + virtual_keys: + "goal_to_agent_gps_compass": 2 +hierarchical_policy: + high_level_policy: + name: "FixedHighLevelPolicy" + add_arm_rest: True + defined_skills: + open_cab: + skill_name: "ArtObjSkillPolicy" + name: "PointNavResNetPolicy" + load_ckpt_file: "data/models/open_cab.pth" + + open_fridge: + skill_name: "ArtObjSkillPolicy" + skill_name: "ArtObjSkillPolicy" + load_ckpt_file: "data/models/open_fridge.pth" + + close_cab: + skill_name: "ArtObjSkillPolicy" + obs_skill_inputs: ["obj_start_sensor"] + load_ckpt_file: "data/models/close_cab.pth" + + close_fridge: + skill_name: "ArtObjSkillPolicy" + obs_skill_inputs: ["obj_start_sensor"] + load_ckpt_file: "data/models/close_fridge.pth" + + pick: + skill_name: "PickSkillPolicy" + obs_skill_inputs: ["obj_start_sensor"] + load_ckpt_file: "data/models/pick.pth" + + place: + skill_name: "PlaceSkillPolicy" + obs_skill_inputs: ["obj_goal_sensor"] + load_ckpt_file: "data/models/place.pth" + + wait_skill: + skill_name: "WaitSkillPolicy" + max_skill_steps: -1 + force_end_on_timeout: False + + nav_to_obj: + skill_name: "NavSkillPolicy" + obs_skill_inputs: ["goal_to_agent_gps_compass"] + obs_skill_input_dim: 2 + load_ckpt_file: "data/models/nav.pth" + max_skill_steps: 300 + force_end_on_timeout: False + + reset_arm_skill: + skill_name: "ResetArmSkill" + max_skill_steps: 50 + reset_joint_state: [-4.50e-01, -1.08e00, 9.95e-02, 9.38e-01, -7.88e-04, 1.57e00, 4.62e-03] + force_end_on_timeout: False + + use_skills: + # Uncomment if you are also using these skills + # open_cab: "open_cab" + # open_fridge: "open_fridge" + # close_cab: "open_cab" + # close_fridge: "open_fridge" + pick: "pick" + place: "place" + nav: "nav_to_obj" + nav_to_receptacle: "nav_to_obj" + wait: "wait_skill" + reset_arm: "reset_arm_skill" \ No newline at end of file diff --git a/habitat-baselines/habitat_baselines/config/habitat_baselines/rl/policy/monolithic.yaml b/habitat-baselines/habitat_baselines/config/habitat_baselines/rl/policy/monolithic.yaml new file mode 100644 index 0000000000..9a1850d158 --- /dev/null +++ b/habitat-baselines/habitat_baselines/config/habitat_baselines/rl/policy/monolithic.yaml @@ -0,0 +1,4 @@ +name: "PointNavResNetPolicy" +action_distribution_type: "gaussian" +action_dist: + use_log_std: True \ No newline at end of file diff --git a/habitat-baselines/habitat_baselines/config/rearrange/rl_hl_srl_onav.yaml b/habitat-baselines/habitat_baselines/config/rearrange/rl_hl_srl_onav.yaml index 1675eadb6c..5ad654137d 100644 --- a/habitat-baselines/habitat_baselines/config/rearrange/rl_hl_srl_onav.yaml +++ b/habitat-baselines/habitat_baselines/config/rearrange/rl_hl_srl_onav.yaml @@ -1,12 +1,14 @@ # @package _global_ -# Pick and place are kinematically simulated. -# Navigation is fully simulated. +# Pick and place are instantaneous skills, +# where environment is set to final state. +# Navigation is kinematically simulated. defaults: - /benchmark/rearrange: rearrange_easy - /habitat_baselines: habitat_baselines_rl_config_base - /habitat_baselines/rl/policy/obs_transforms: - add_virtual_keys_base + - /habitat_baselines/rl/policy: hierarchical_srl_onav - /habitat/task/actions: - pddl_apply_action - oracle_nav_action @@ -17,7 +19,6 @@ defaults: habitat: gym: - auto_name: RearrangeEasy obs_keys: - robot_head_depth - relative_resting_position @@ -34,15 +35,12 @@ habitat_baselines: verbose: False trainer_name: "ddppo" torch_gpu_id: 0 - tensorboard_dir: "tb" rollout_storage: "HrlRolloutStorage" updater_name: "HrlPPO" distrib_updater_name: "HrlDDPPO" - video_dir: "video_dir" video_fps: 30 video_render_views: - "third_rgb_sensor" - test_episode_count: -1 eval_ckpt_path_dir: "" num_environments: 3 writer_type: 'tb' @@ -61,86 +59,6 @@ habitat_baselines: video_option: ["disk"] rl: - policy: - name: "HierarchicalPolicy" - obs_transforms: - add_virtual_keys: - virtual_keys: - "goal_to_agent_gps_compass": 2 - hierarchical_policy: - high_level_policy: - name: "NeuralHighLevelPolicy" - allow_other_place: False - hidden_dim: 512 - use_rnn: True - rnn_type: 'LSTM' - backbone: resnet18 - normalize_visual_inputs: False - num_rnn_layers: 2 - policy_input_keys: - - "robot_head_depth" - defined_skills: - open_cab: - skill_name: "NoopSkillPolicy" - max_skill_steps: 1 - apply_postconds: True - - open_fridge: - skill_name: "NoopSkillPolicy" - max_skill_steps: 1 - apply_postconds: True - - close_cab: - skill_name: "NoopSkillPolicy" - obs_skill_inputs: ["obj_start_sensor"] - max_skill_steps: 1 - - close_fridge: - skill_name: "NoopSkillPolicy" - obs_skill_inputs: ["obj_start_sensor"] - max_skill_steps: 1 - apply_postconds: True - - pick: - skill_name: "NoopSkillPolicy" - obs_skill_inputs: ["obj_start_sensor"] - max_skill_steps: 1 - apply_postconds: True - - place: - skill_name: "NoopSkillPolicy" - obs_skill_inputs: ["obj_goal_sensor"] - max_skill_steps: 1 - apply_postconds: True - - nav_to_obj: - skill_name: "OracleNavPolicy" - obs_skill_inputs: ["obj_start_sensor", "abs_obj_start_sensor", "obj_goal_sensor", "abs_obj_goal_sensor"] - max_skill_steps: 300 - - wait_skill: - skill_name: "WaitSkillPolicy" - max_skill_steps: -1 - force_end_on_timeout: False - - reset_arm_skill: - skill_name: "ResetArmSkill" - max_skill_steps: 50 - reset_joint_state: [-4.5003259e-01, -1.0799699e00, 9.9526465e-02, 9.3869519e-01, -7.8854430e-04, 1.5702540e00, 4.6168058e-03] - force_end_on_timeout: False - - use_skills: - open_cab: "open_cab" - open_fridge: "open_fridge" - close_cab: "close_cab" - close_fridge: "close_fridge" - pick: "pick" - place: "place" - nav: "nav_to_obj" - nav_to_receptacle: "nav_to_obj" - wait: "wait_skill" - reset_arm: "reset_arm_skill" - ppo: # ppo params clip_param: 0.2 @@ -155,18 +73,6 @@ habitat_baselines: use_gae: True gamma: 0.99 tau: 0.95 - use_linear_clip_decay: False - use_linear_lr_decay: False - reward_window_size: 50 - - use_normalized_advantage: False - - hidden_size: 512 - - # Use double buffered sampling, typically helps - # when environment time is similar or larger than - # policy inference time during rollout generation - use_double_buffered_sampler: False ddppo: sync_frac: 0.6 @@ -182,7 +88,6 @@ habitat_baselines: train_encoder: True # Whether to reset the critic linear layer reset_critic: False - # Model parameters backbone: resnet18 rnn_type: LSTM diff --git a/habitat-baselines/habitat_baselines/config/rearrange/rl_rearrange.yaml b/habitat-baselines/habitat_baselines/config/rearrange/rl_rearrange.yaml index f96bfaf471..38a7ce3537 100644 --- a/habitat-baselines/habitat_baselines/config/rearrange/rl_rearrange.yaml +++ b/habitat-baselines/habitat_baselines/config/rearrange/rl_rearrange.yaml @@ -2,6 +2,7 @@ defaults: - /benchmark/rearrange: rearrange + - /habitat_baselines/rl/policy: monolithic - /habitat_baselines: habitat_baselines_rl_config_base - _self_ @@ -32,11 +33,6 @@ habitat_baselines: video_option: ["disk"] rl: - policy: - name: "PointNavResNetPolicy" - action_distribution_type: "gaussian" - action_dist: - use_log_std: True ppo: # ppo params clip_param: 0.2 diff --git a/habitat-baselines/habitat_baselines/config/rearrange/rl_rearrange_easy.yaml b/habitat-baselines/habitat_baselines/config/rearrange/rl_rearrange_easy.yaml index ed08e7a5c2..253db1cc6a 100644 --- a/habitat-baselines/habitat_baselines/config/rearrange/rl_rearrange_easy.yaml +++ b/habitat-baselines/habitat_baselines/config/rearrange/rl_rearrange_easy.yaml @@ -3,6 +3,7 @@ defaults: - /benchmark/rearrange: rearrange_easy - /habitat_baselines: habitat_baselines_rl_config_base + - /habitat_baselines/rl/policy: monolithic - _self_ habitat_baselines: @@ -32,11 +33,6 @@ habitat_baselines: video_option: ["disk"] rl: - policy: - name: "PointNavResNetPolicy" - action_distribution_type: "gaussian" - action_dist: - use_log_std: True ppo: # ppo params clip_param: 0.2 diff --git a/habitat-baselines/habitat_baselines/config/rearrange/rl_skill.yaml b/habitat-baselines/habitat_baselines/config/rearrange/rl_skill.yaml index 56018e9435..9f0d2e8d94 100644 --- a/habitat-baselines/habitat_baselines/config/rearrange/rl_skill.yaml +++ b/habitat-baselines/habitat_baselines/config/rearrange/rl_skill.yaml @@ -3,6 +3,7 @@ defaults: - /benchmark/rearrange: pick - /habitat_baselines: habitat_baselines_rl_config_base + - /habitat_baselines/rl/policy: monolithic - _self_ habitat_baselines: @@ -33,10 +34,7 @@ habitat_baselines: rl: policy: - name: "PointNavResNetPolicy" - action_distribution_type: "gaussian" action_dist: - use_log_std: True clamp_std: True std_init: -1.0 use_std_param: True diff --git a/habitat-baselines/habitat_baselines/config/rearrange/tp_srl.yaml b/habitat-baselines/habitat_baselines/config/rearrange/tp_srl.yaml index 3f2fe45fb5..ef9e31daf2 100644 --- a/habitat-baselines/habitat_baselines/config/rearrange/tp_srl.yaml +++ b/habitat-baselines/habitat_baselines/config/rearrange/tp_srl.yaml @@ -5,6 +5,7 @@ defaults: - /habitat_baselines: habitat_baselines_rl_config_base - /habitat_baselines/rl/policy/obs_transforms: - add_virtual_keys_base + - /habitat_baselines/rl/policy: hierarchical_srl_learned_nav - _self_ @@ -35,90 +36,6 @@ habitat_baselines: video_option: ["disk"] rl: - policy: - name: "HierarchicalPolicy" - obs_transforms: - add_virtual_keys: - virtual_keys: - "goal_to_agent_gps_compass": 2 - hierarchical_policy: - high_level_policy: - name: "FixedHighLevelPolicy" - add_arm_rest: True - defined_skills: - open_cab: - skill_name: "ArtObjSkillPolicy" - name: "PointNavResNetPolicy" - load_ckpt_file: "data/models/open_cab.pth" - max_skill_steps: 200 - - open_fridge: - skill_name: "ArtObjSkillPolicy" - load_ckpt_file: "data/models/open_fridge.pth" - max_skill_steps: 200 - - close_cab: - skill_name: "ArtObjSkillPolicy" - obs_skill_inputs: ["obj_start_sensor"] - load_ckpt_file: "data/models/close_cab.pth" - max_skill_steps: 200 - - close_fridge: - skill_name: "ArtObjSkillPolicy" - obs_skill_inputs: ["obj_start_sensor"] - load_ckpt_file: "data/models/close_fridge.pth" - max_skill_steps: 200 - - pick: - skill_name: "PickSkillPolicy" - name: "PointNavResNetPolicy" - action_distribution_type: "gaussian" - at_resting_threshold: 0.15 - obs_skill_inputs: ["obj_start_sensor"] - load_ckpt_file: "data/models/pick.pth" - max_skill_steps: 200 - - place: - skill_name: "PlaceSkillPolicy" - name: "PointNavResNetPolicy" - action_distribution_type: "gaussian" - at_resting_threshold: 0.15 - obs_skill_inputs: ["obj_goal_sensor"] - load_ckpt_file: "data/models/place.pth" - max_skill_steps: 200 - - nav_to_obj: - skill_name: "NavSkillPolicy" - obs_skill_inputs: ["goal_to_agent_gps_compass"] - obs_skill_input_dim: 2 - load_ckpt_file: "data/models/nav.pth" - max_skill_steps: 300 - force_end_on_timeout: False - - wait_skill: - skill_name: "WaitSkillPolicy" - max_skill_steps: -1 - force_end_on_timeout: False - - reset_arm_skill: - skill_name: "ResetArmSkill" - max_skill_steps: 50 - reset_joint_state: [-4.5003259e-01, -1.0799699e00, 9.9526465e-02, 9.3869519e-01, -7.8854430e-04, 1.5702540e00, 4.6168058e-03] - force_end_on_timeout: False - - use_skills: - # Uncomment if you are also using these skills - # open_cab: "open_cab" - # open_fridge: "open_fridge" - # close_cab: "open_cab" - # close_fridgE: "open_fridge" - pick: "pick" - place: "place" - nav: "nav_to_obj" - nav_to_receptacle: "nav_to_obj" - wait: "wait_skill" - reset_arm: "reset_arm_skill" - ddppo: pretrained: False pretrained_encoder: False diff --git a/habitat-baselines/habitat_baselines/config/rearrange/tp_srl_oracle_nav.yaml b/habitat-baselines/habitat_baselines/config/rearrange/tp_srl_oracle_nav.yaml index f456242704..6a4a769b04 100644 --- a/habitat-baselines/habitat_baselines/config/rearrange/tp_srl_oracle_nav.yaml +++ b/habitat-baselines/habitat_baselines/config/rearrange/tp_srl_oracle_nav.yaml @@ -5,6 +5,7 @@ defaults: - /habitat_baselines: habitat_baselines_rl_config_base - /habitat_baselines/rl/policy/obs_transforms: - add_virtual_keys_base + - /habitat_baselines/rl/policy: hierarchical_srl_oracle_nav - /habitat/task/actions: - oracle_nav_action - arm_action @@ -26,6 +27,7 @@ habitat: - is_holding - localization_sensor + habitat_baselines: verbose: False trainer_name: "ddppo" @@ -33,8 +35,6 @@ habitat_baselines: tensorboard_dir: "tb" video_dir: "video_dir" video_fps: 30 - video_render_views: - - "third_rgb_sensor" test_episode_count: -1 eval_ckpt_path_dir: "" num_environments: 1 @@ -53,87 +53,6 @@ habitat_baselines: video_option: ["disk"] rl: - policy: - name: "HierarchicalPolicy" - obs_transforms: - add_virtual_keys: - virtual_keys: - "goal_to_agent_gps_compass": 2 - hierarchical_policy: - high_level_policy: - name: "FixedHighLevelPolicy" - add_arm_rest: True - defined_skills: - open_cab: - skill_name: "ArtObjSkillPolicy" - name: "PointNavResNetPolicy" - load_ckpt_file: "data/models/open_cab.pth" - max_skill_steps: 200 - - open_fridge: - skill_name: "ArtObjSkillPolicy" - load_ckpt_file: "data/models/open_fridge.pth" - max_skill_steps: 200 - - close_cab: - skill_name: "ArtObjSkillPolicy" - obs_skill_inputs: ["obj_start_sensor"] - load_ckpt_file: "data/models/close_cab.pth" - max_skill_steps: 200 - - close_fridge: - skill_name: "ArtObjSkillPolicy" - obs_skill_inputs: ["obj_start_sensor"] - load_ckpt_file: "data/models/close_fridge.pth" - max_skill_steps: 200 - - pick: - skill_name: "PickSkillPolicy" - name: "PointNavResNetPolicy" - action_distribution_type: "gaussian" - at_resting_threshold: 0.15 - obs_skill_inputs: ["obj_start_sensor"] - load_ckpt_file: "data/models/pick.pth" - max_skill_steps: 200 - - place: - skill_name: "PlaceSkillPolicy" - name: "PointNavResNetPolicy" - action_distribution_type: "gaussian" - at_resting_threshold: 0.15 - obs_skill_inputs: ["obj_goal_sensor"] - load_ckpt_file: "data/models/place.pth" - max_skill_steps: 200 - - wait_skill: - skill_name: "WaitSkillPolicy" - max_skill_steps: -1 - force_end_on_timeout: False - - gt_nav: - skill_name: "OracleNavPolicy" - obs_skill_inputs: ["obj_start_sensor", "abs_obj_start_sensor", "obj_goal_sensor", "abs_obj_goal_sensor"] - max_skill_steps: 300 - - reset_arm_skill: - skill_name: "ResetArmSkill" - max_skill_steps: 50 - reset_joint_state: [-4.5003259e-01, -1.0799699e00, 9.9526465e-02, 9.3869519e-01, -7.8854430e-04, 1.5702540e00, 4.6168058e-03] - force_end_on_timeout: False - - use_skills: - # Uncomment if you are also using these skills - # open_cab: "open_cab" - # open_fridge: "open_fridge" - # close_cab: "open_cab" - # close_fridge: "open_fridge" - pick: "pick" - place: "place" - nav: "gt_nav" - nav_to_receptacle: "gt_nav" - wait: "wait_skill" - reset_arm: "reset_arm_skill" - ddppo: pretrained: False pretrained_encoder: False diff --git a/habitat-lab/habitat/config/habitat/task/rearrange/prepare_groceries.yaml b/habitat-lab/habitat/config/habitat/task/rearrange/prepare_groceries.yaml index d404648720..bb54918c66 100644 --- a/habitat-lab/habitat/config/habitat/task/rearrange/prepare_groceries.yaml +++ b/habitat-lab/habitat/config/habitat/task/rearrange/prepare_groceries.yaml @@ -28,6 +28,7 @@ defaults: - end_effector_sensor - target_start_gps_compass_sensor - target_goal_gps_compass_sensor + - localization_sensor - _self_ type: RearrangeCompositeTask-v0 diff --git a/habitat-lab/habitat/config/habitat/task/rearrange/rearrange_easy_multi_agent.yaml b/habitat-lab/habitat/config/habitat/task/rearrange/rearrange_easy_multi_agent.yaml index b4bcb800f2..1f84e938a7 100644 --- a/habitat-lab/habitat/config/habitat/task/rearrange/rearrange_easy_multi_agent.yaml +++ b/habitat-lab/habitat/config/habitat/task/rearrange/rearrange_easy_multi_agent.yaml @@ -24,6 +24,7 @@ defaults: - end_effector_sensor - target_start_gps_compass_sensor - target_goal_gps_compass_sensor + - localization_sensor - _self_ type: RearrangeCompositeTask-v0 diff --git a/habitat-lab/habitat/config/habitat/task/rearrange/set_table.yaml b/habitat-lab/habitat/config/habitat/task/rearrange/set_table.yaml index 662f8da3bb..f11cc889d9 100644 --- a/habitat-lab/habitat/config/habitat/task/rearrange/set_table.yaml +++ b/habitat-lab/habitat/config/habitat/task/rearrange/set_table.yaml @@ -28,6 +28,7 @@ defaults: - end_effector_sensor - target_start_gps_compass_sensor - target_goal_gps_compass_sensor + - localization_sensor - _self_ type: RearrangeCompositeTask-v0 diff --git a/habitat-lab/habitat/config/habitat/task/rearrange/tidy_house.yaml b/habitat-lab/habitat/config/habitat/task/rearrange/tidy_house.yaml index f1452911a4..6d176c934a 100644 --- a/habitat-lab/habitat/config/habitat/task/rearrange/tidy_house.yaml +++ b/habitat-lab/habitat/config/habitat/task/rearrange/tidy_house.yaml @@ -28,6 +28,7 @@ defaults: - end_effector_sensor - target_start_gps_compass_sensor - target_goal_gps_compass_sensor + - localization_sensor - _self_ type: RearrangeCompositeTask-v0 From bb2e4da257cecd936b3041e81342f520809bce70 Mon Sep 17 00:00:00 2001 From: Xavier Puig Date: Mon, 9 Jan 2023 09:58:36 -0800 Subject: [PATCH 05/48] Update README.md Fixed link for PDDL --- habitat-baselines/habitat_baselines/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/habitat-baselines/habitat_baselines/README.md b/habitat-baselines/habitat_baselines/README.md index e2e06f258d..a37992654c 100644 --- a/habitat-baselines/habitat_baselines/README.md +++ b/habitat-baselines/habitat_baselines/README.md @@ -55,7 +55,7 @@ Change the `/benchmark/nav/pointnav: pointnav_gibson` in `habitat_baselines/conf ### Hierarchical Reinforcement Learning (HRL) -We provide a two-layer hierarchical policy class, consisting of a low-level skill that moves the robot, and a high-level policy that reasons about which low-level skill to use in the current state. This can be especially powerful in long-horizon mobile manipulation tasks, like those introduced in [Habitat2.0](https://arxiv.org/abs/2106.14405). Both the low- and high- level can be either learned or an oracle. For oracle high-level we use [PDDL] (https://planning.wiki/guide/whatis/pddl), and for oracle low-level we use instantaneous transitions, with the environment set to the final desired state. Additionally, for navigation, we provide an oracle navigation skill that uses A-star and the map of the environment to move the robot to its goal. +We provide a two-layer hierarchical policy class, consisting of a low-level skill that moves the robot, and a high-level policy that reasons about which low-level skill to use in the current state. This can be especially powerful in long-horizon mobile manipulation tasks, like those introduced in [Habitat2.0](https://arxiv.org/abs/2106.14405). Both the low- and high- level can be either learned or an oracle. For oracle high-level we use [PDDL](https://planning.wiki/guide/whatis/pddl), and for oracle low-level we use instantaneous transitions, with the environment set to the final desired state. Additionally, for navigation, we provide an oracle navigation skill that uses A-star and the map of the environment to move the robot to its goal. To run the following examples, you need the [ReplicaCAD dataset](https://github.com/facebookresearch/habitat-sim/blob/main/DATASETS.md#replicacad). From 628063d3ace15548aba8b21e94eb0a9ac143f7cb Mon Sep 17 00:00:00 2001 From: Xavier Puig Date: Mon, 9 Jan 2023 11:41:00 -0800 Subject: [PATCH 06/48] Match tensor device when checking if the skills is done --- habitat-baselines/habitat_baselines/rl/hrl/skills/skill.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/habitat-baselines/habitat_baselines/rl/hrl/skills/skill.py b/habitat-baselines/habitat_baselines/rl/hrl/skills/skill.py index 4e95a29221..3344fb7be9 100644 --- a/habitat-baselines/habitat_baselines/rl/hrl/skills/skill.py +++ b/habitat-baselines/habitat_baselines/rl/hrl/skills/skill.py @@ -195,6 +195,8 @@ def should_terminate( ) self._delay_term[env_i] = True is_skill_done[i] = 0.0 + + is_skill_done = is_skill_done.to(hl_says_term.device) is_skill_done |= hl_says_term if bad_terminate.sum() > 0: From d179ecf9b408f7519b9149bacdfe35193cc21597 Mon Sep 17 00:00:00 2001 From: Xavier Puig Date: Fri, 13 Jan 2023 09:03:30 -0800 Subject: [PATCH 07/48] Train hl modif2 (#1076) * make should_terminate consistently on cpu * Corrected config files for skills using noop + bug in pddl_apply_action * bug fixes in apply postcond * Update hierarchical_policy.py --- .../rl/policy/hierarchical_tp_noop_onav.yaml | 2 ++ .../rl/hrl/hierarchical_policy.py | 8 ++++-- .../rl/hrl/skills/oracle_nav.py | 2 +- .../habitat_baselines/rl/hrl/skills/skill.py | 25 +++++++++++-------- 4 files changed, 23 insertions(+), 14 deletions(-) diff --git a/habitat-baselines/habitat_baselines/config/habitat_baselines/rl/policy/hierarchical_tp_noop_onav.yaml b/habitat-baselines/habitat_baselines/config/habitat_baselines/rl/policy/hierarchical_tp_noop_onav.yaml index d090a528c7..4540b9aa88 100644 --- a/habitat-baselines/habitat_baselines/config/habitat_baselines/rl/policy/hierarchical_tp_noop_onav.yaml +++ b/habitat-baselines/habitat_baselines/config/habitat_baselines/rl/policy/hierarchical_tp_noop_onav.yaml @@ -36,12 +36,14 @@ hierarchical_policy: obs_skill_inputs: ["obj_start_sensor"] max_skill_steps: 1 apply_postconds: True + force_end_on_timeout: False place: skill_name: "NoopSkillPolicy" obs_skill_inputs: ["obj_goal_sensor"] max_skill_steps: 1 apply_postconds: True + force_end_on_timeout: False nav_to_obj: skill_name: "OracleNavPolicy" diff --git a/habitat-baselines/habitat_baselines/rl/hrl/hierarchical_policy.py b/habitat-baselines/habitat_baselines/rl/hrl/hierarchical_policy.py index b8f63ad9eb..ad0226262e 100644 --- a/habitat-baselines/habitat_baselines/rl/hrl/hierarchical_policy.py +++ b/habitat-baselines/habitat_baselines/rl/hrl/hierarchical_policy.py @@ -212,7 +212,7 @@ def act( masks, self._cur_skills, log_info, - ) + ).cpu() # Compute the actions from the current skills actions = torch.zeros( (self._num_envs, get_num_actions(self._action_space)), @@ -239,9 +239,12 @@ def act( # Policy has not prediced a skill yet. should_terminate[batch_ids] = 1.0 continue + # TODO: either change name of the function or assign actions somewhere + # else. Updating actions in should_terminate is counterintuitive ( should_terminate[batch_ids], bad_should_terminate[batch_ids], + actions[batch_ids] ) = self._skills[skill_id].should_terminate( **dat, batch_idx=batch_ids, @@ -315,7 +318,8 @@ def act( ) # LL skills are not allowed to terminate the overall episode. - actions[batch_ids] = action_data.actions + actions[batch_ids] += action_data.actions + # Add actions from apply_postcond rnn_hidden_states[batch_ids] = action_data.rnn_hidden_states actions[:, self._stop_action_idx] = 0.0 diff --git a/habitat-baselines/habitat_baselines/rl/hrl/skills/oracle_nav.py b/habitat-baselines/habitat_baselines/rl/hrl/skills/oracle_nav.py index 374e17494c..5a27d3f527 100644 --- a/habitat-baselines/habitat_baselines/rl/hrl/skills/oracle_nav.py +++ b/habitat-baselines/habitat_baselines/rl/hrl/skills/oracle_nav.py @@ -116,7 +116,7 @@ def _is_skill_done( masks, batch_idx, ) -> torch.BoolTensor: - ret = torch.zeros(masks.shape[0], dtype=torch.bool).to(masks.device) + ret = torch.zeros(masks.shape[0], dtype=torch.bool) cur_pos = observations[LocalizationSensor.cls_uuid].cpu() diff --git a/habitat-baselines/habitat_baselines/rl/hrl/skills/skill.py b/habitat-baselines/habitat_baselines/rl/hrl/skills/skill.py index 3344fb7be9..a0ca5854b5 100644 --- a/habitat-baselines/habitat_baselines/rl/hrl/skills/skill.py +++ b/habitat-baselines/habitat_baselines/rl/hrl/skills/skill.py @@ -38,7 +38,9 @@ def __init__( None for _ in range(self._batch_size) ] - if "pddl_apply_action" in action_space: + # TODO: for some reason this doesnt work with "pddl_apply_action" in action_space + # and needs to go through the keys argument + if "pddl_apply_action" in list(action_space.keys()): self._pddl_ac_start, _ = find_action_range( action_space, "pddl_apply_action" ) @@ -102,6 +104,9 @@ def _apply_postcond( env_i, idx, ): + """ + Modifies the actions according to the postconditions set in self._pddl_problem.actions[skill_name] + """ skill_args = self._raw_skill_args[env_i] action = self._pddl_problem.actions[skill_name] @@ -137,7 +142,7 @@ def _apply_postcond( apply_action.set_param_values(entities) log_info[env_i]["pddl_action"] = apply_action.compact_str - return actions + return actions[idx] def should_terminate( self, @@ -156,7 +161,7 @@ def should_terminate( """ is_skill_done = self._is_skill_done( observations, rnn_hidden_states, prev_actions, masks, batch_idx - ) + ).cpu() if is_skill_done.sum() > 0: self._internal_log( f"Requested skill termination {is_skill_done}", @@ -177,10 +182,10 @@ def should_terminate( if self._config.max_skill_steps > 0: over_max_len = cur_skill_step > self._config.max_skill_steps if self._config.force_end_on_timeout: - bad_terminate = over_max_len + bad_terminate = over_max_len.cpu() else: - is_skill_done = is_skill_done | over_max_len - + is_skill_done = is_skill_done | over_max_len.cpu() + new_actions = torch.zeros_like(actions) for i, env_i in enumerate(batch_idx): if self._delay_term[env_i]: self._delay_term[env_i] = False @@ -190,22 +195,20 @@ def should_terminate( and is_skill_done[i] == 1.0 and hl_says_term[i] == 0.0 ): - actions = self._apply_postcond( + new_actions[i] = self._apply_postcond( actions, log_info, skill_name[i], env_i, i ) self._delay_term[env_i] = True is_skill_done[i] = 0.0 - is_skill_done = is_skill_done.to(hl_says_term.device) - is_skill_done |= hl_says_term + is_skill_done |= hl_says_term.cpu() if bad_terminate.sum() > 0: self._internal_log( f"Bad terminating due to timeout {cur_skill_step}, {bad_terminate}", observations, ) - - return is_skill_done, bad_terminate + return is_skill_done.cpu(), bad_terminate.cpu(), new_actions def on_enter( self, From 8eddcd153858fc2cb9892e2355e978f9bd4e05b5 Mon Sep 17 00:00:00 2001 From: ASzot Date: Thu, 19 Jan 2023 21:59:24 -0800 Subject: [PATCH 08/48] Fixed RNN problem --- .../common/rollout_storage.py | 22 ++++- .../defined_skills/nn_skills.yaml | 43 ++++++++ .../defined_skills/noop_skills.yaml | 52 ++++++++++ .../rl/policy/hierarchical_srl_onav.yaml | 77 --------------- .../rl/policy/hierarchical_tp_noop_onav.yaml | 74 -------------- .../rl/policy/hierarchical_tp_srl.yaml | 71 ------------- .../habitat_baselines/rl/policy/hl_fixed.yaml | 21 ++++ .../rl/policy/hl_neural.yaml | 29 ++++++ .../config/rearrange/rl_hierarchical.yaml | 66 +++++++++++++ ...v.yaml => rl_hierarchical_oracle_nav.yaml} | 19 ++-- .../config/rearrange/rl_skill.yaml | 1 - .../config/rearrange/spap_pick.yaml | 39 -------- .../config/rearrange/spap_reach_state.yaml | 42 -------- .../config/rearrange/tp_srl.yaml | 42 -------- .../config/rearrange/tp_srl_oracle_nav.yaml | 61 ------------ .../rl/hrl/hierarchical_policy.py | 93 ++++++++++++------ .../rl/hrl/hl/high_level_policy.py | 27 +++++ .../rl/hrl/hl/neural_policy.py | 40 +++++++- .../habitat_baselines/rl/hrl/hrl_ppo.py | 14 +-- .../rl/hrl/hrl_rollout_storage.py | 41 ++++++-- .../rl/hrl/skills/art_obj.py | 2 +- .../rl/hrl/skills/nn_skill.py | 10 +- .../rl/hrl/skills/oracle_nav.py | 7 +- .../habitat_baselines/rl/hrl/skills/reset.py | 19 ++-- .../habitat_baselines/rl/hrl/skills/skill.py | 15 +-- .../habitat_baselines/rl/hrl/skills/wait.py | 4 +- .../habitat_baselines/rl/ppo/policy.py | 49 ++++++--- .../habitat_baselines/rl/ppo/ppo_trainer.py | 62 ++++++------ ...55.s-164-67-210-79.resnet.ucla.edu.58058.0 | Bin 0 -> 40 bytes .../benchmark/rearrange/rearrange_easy.yaml | 1 + test/test_baseline_training.py | 56 ++++++----- 31 files changed, 536 insertions(+), 563 deletions(-) create mode 100644 habitat-baselines/habitat_baselines/config/habitat_baselines/rl/policy/hierarchical_policy/defined_skills/nn_skills.yaml create mode 100644 habitat-baselines/habitat_baselines/config/habitat_baselines/rl/policy/hierarchical_policy/defined_skills/noop_skills.yaml delete mode 100644 habitat-baselines/habitat_baselines/config/habitat_baselines/rl/policy/hierarchical_srl_onav.yaml delete mode 100644 habitat-baselines/habitat_baselines/config/habitat_baselines/rl/policy/hierarchical_tp_noop_onav.yaml delete mode 100644 habitat-baselines/habitat_baselines/config/habitat_baselines/rl/policy/hierarchical_tp_srl.yaml create mode 100644 habitat-baselines/habitat_baselines/config/habitat_baselines/rl/policy/hl_fixed.yaml create mode 100644 habitat-baselines/habitat_baselines/config/habitat_baselines/rl/policy/hl_neural.yaml create mode 100644 habitat-baselines/habitat_baselines/config/rearrange/rl_hierarchical.yaml rename habitat-baselines/habitat_baselines/config/rearrange/{rl_hl_srl_onav.yaml => rl_hierarchical_oracle_nav.yaml} (81%) delete mode 100644 habitat-baselines/habitat_baselines/config/rearrange/spap_pick.yaml delete mode 100644 habitat-baselines/habitat_baselines/config/rearrange/spap_reach_state.yaml delete mode 100644 habitat-baselines/habitat_baselines/config/rearrange/tp_srl.yaml delete mode 100644 habitat-baselines/habitat_baselines/config/rearrange/tp_srl_oracle_nav.yaml create mode 100644 habitat-baselines/tb/events.out.tfevents.1674193555.s-164-67-210-79.resnet.ucla.edu.58058.0 diff --git a/habitat-baselines/habitat_baselines/common/rollout_storage.py b/habitat-baselines/habitat_baselines/common/rollout_storage.py index 8bc0b3b9f2..50d11942c2 100644 --- a/habitat-baselines/habitat_baselines/common/rollout_storage.py +++ b/habitat-baselines/habitat_baselines/common/rollout_storage.py @@ -5,7 +5,7 @@ # LICENSE file in the root directory of this source tree. import warnings -from typing import Any, Dict, Iterator, Optional, Tuple +from typing import Any, Dict, Iterator, Optional import numpy as np import torch @@ -16,6 +16,10 @@ build_pack_info_from_dones, build_rnn_build_seq_info, ) +from habitat_baselines.utils.common import ( + get_num_actions, + is_continuous_action_space, +) @baseline_registry.register_storage @@ -30,10 +34,22 @@ def __init__( action_space, recurrent_hidden_state_size, num_recurrent_layers=1, - action_shape: Optional[Tuple[int]] = None, is_double_buffered: bool = False, - discrete_actions: bool = True, ): + + if is_continuous_action_space(action_space): + # Assume ALL actions are NOT discrete + action_shape = ( + get_num_actions( + action_space, + ), + ) + discrete_actions = False + else: + # For discrete pointnav + action_shape = (1,) + discrete_actions = True + self.buffers = TensorDict() self.buffers["observations"] = TensorDict() diff --git a/habitat-baselines/habitat_baselines/config/habitat_baselines/rl/policy/hierarchical_policy/defined_skills/nn_skills.yaml b/habitat-baselines/habitat_baselines/config/habitat_baselines/rl/policy/hierarchical_policy/defined_skills/nn_skills.yaml new file mode 100644 index 0000000000..24d7cbca38 --- /dev/null +++ b/habitat-baselines/habitat_baselines/config/habitat_baselines/rl/policy/hierarchical_policy/defined_skills/nn_skills.yaml @@ -0,0 +1,43 @@ +open_cab: + skill_name: "ArtObjSkillPolicy" + load_ckpt_file: "data/models/open_cab.pth" + +open_fridge: + skill_name: "ArtObjSkillPolicy" + load_ckpt_file: "data/models/open_fridge.pth" + +close_cab: + skill_name: "ArtObjSkillPolicy" + load_ckpt_file: "data/models/close_cab.pth" + +close_fridge: + skill_name: "ArtObjSkillPolicy" + load_ckpt_file: "data/models/close_fridge.pth" + +pick: + skill_name: "PickSkillPolicy" + obs_skill_inputs: ["obj_start_sensor"] + load_ckpt_file: "data/models/pick.pth" + +place: + skill_name: "PlaceSkillPolicy" + obs_skill_inputs: ["obj_goal_sensor"] + load_ckpt_file: "data/models/place.pth" + +wait_skill: + skill_name: "WaitSkillPolicy" + max_skill_steps: -1 + force_end_on_timeout: False + +nav_to_obj: + skill_name: "NavSkillPolicy" + obs_skill_inputs: ["goal_to_agent_gps_compass"] + load_ckpt_file: "data/models/nav.pth" + max_skill_steps: 300 + obs_skill_input_dim: 2 + +reset_arm_skill: + skill_name: "ResetArmSkill" + max_skill_steps: 50 + reset_joint_state: [-4.50e-01, -1.08e00, 9.95e-02, 9.38e-01, -7.88e-04, 1.57e00, 4.62e-03] + force_end_on_timeout: False diff --git a/habitat-baselines/habitat_baselines/config/habitat_baselines/rl/policy/hierarchical_policy/defined_skills/noop_skills.yaml b/habitat-baselines/habitat_baselines/config/habitat_baselines/rl/policy/hierarchical_policy/defined_skills/noop_skills.yaml new file mode 100644 index 0000000000..c8b1a34e29 --- /dev/null +++ b/habitat-baselines/habitat_baselines/config/habitat_baselines/rl/policy/hierarchical_policy/defined_skills/noop_skills.yaml @@ -0,0 +1,52 @@ +open_cab: + skill_name: "NoopSkillPolicy" + max_skill_steps: 1 + apply_postconds: True + +open_fridge: + skill_name: "NoopSkillPolicy" + max_skill_steps: 1 + apply_postconds: True + +close_cab: + skill_name: "NoopSkillPolicy" + obs_skill_inputs: ["obj_start_sensor"] + max_skill_steps: 1 + +close_fridge: + skill_name: "NoopSkillPolicy" + obs_skill_inputs: ["obj_start_sensor"] + max_skill_steps: 1 + apply_postconds: True + +pick: + skill_name: "NoopSkillPolicy" + obs_skill_inputs: ["obj_start_sensor"] + max_skill_steps: 1 + apply_postconds: True + force_end_on_timeout: False + +place: + skill_name: "NoopSkillPolicy" + obs_skill_inputs: ["obj_goal_sensor"] + max_skill_steps: 1 + apply_postconds: True + force_end_on_timeout: False + +wait_skill: + skill_name: "WaitSkillPolicy" + max_skill_steps: -1 + +nav_to_obj: + skill_name: "NoopSkillPolicy" + obs_skill_inputs: ["goal_to_agent_gps_compass"] + max_skill_steps: 1 + apply_postconds: True + force_end_on_timeout: False + obs_skill_input_dim: 2 + +reset_arm_skill: + skill_name: "ResetArmSkill" + max_skill_steps: 50 + reset_joint_state: [-4.50e-01, -1.07e00, 9.95e-02, 9.38e-01, -7.88e-04, 1.57e00, 4.62e-03] + force_end_on_timeout: False diff --git a/habitat-baselines/habitat_baselines/config/habitat_baselines/rl/policy/hierarchical_srl_onav.yaml b/habitat-baselines/habitat_baselines/config/habitat_baselines/rl/policy/hierarchical_srl_onav.yaml deleted file mode 100644 index d92283dbc2..0000000000 --- a/habitat-baselines/habitat_baselines/config/habitat_baselines/rl/policy/hierarchical_srl_onav.yaml +++ /dev/null @@ -1,77 +0,0 @@ -name: "HierarchicalPolicy" -obs_transforms: - add_virtual_keys: - virtual_keys: - "goal_to_agent_gps_compass": 2 -hierarchical_policy: - high_level_policy: - name: "NeuralHighLevelPolicy" - allow_other_place: False - hidden_dim: 512 - use_rnn: True - rnn_type: 'LSTM' - backbone: resnet18 - normalize_visual_inputs: False - num_rnn_layers: 2 - policy_input_keys: - - "robot_head_depth" - defined_skills: - open_cab: - skill_name: "ArtObjSkillPolicy" - name: "PointNavResNetPolicy" - load_ckpt_file: "data/models/open_cab.pth" - - open_fridge: - skill_name: "ArtObjSkillPolicy" - load_ckpt_file: "data/models/open_fridge.pth" - - close_cab: - skill_name: "ArtObjSkillPolicy" - obs_skill_inputs: ["obj_start_sensor"] - load_ckpt_file: "data/models/close_cab.pth" - - close_fridge: - skill_name: "ArtObjSkillPolicy" - obs_skill_inputs: ["obj_start_sensor"] - load_ckpt_file: "data/models/close_fridge.pth" - - pick: - skill_name: "PickSkillPolicy" - name: "PointNavResNetPolicy" - obs_skill_inputs: ["obj_start_sensor"] - load_ckpt_file: "data/models/pick.pth" - - place: - skill_name: "PlaceSkillPolicy" - name: "PointNavResNetPolicy" - obs_skill_inputs: ["obj_goal_sensor"] - load_ckpt_file: "data/models/place.pth" - - wait_skill: - skill_name: "WaitSkillPolicy" - max_skill_steps: -1 - force_end_on_timeout: False - - oracle_nav: - skill_name: "OracleNavPolicy" - obs_skill_inputs: ["obj_start_sensor", "abs_obj_start_sensor", "obj_goal_sensor", "abs_obj_goal_sensor"] - max_skill_steps: 300 - - reset_arm_skill: - skill_name: "ResetArmSkill" - max_skill_steps: 50 - reset_joint_state: [-4.50e-01, -1.08e00, 9.95e-02, 9.38e-01, -7.88e-04, 1.57e00, 4.62e-03] - force_end_on_timeout: False - - use_skills: - # Uncomment if you are also using these skills - # open_cab: "open_cab" - # open_fridge: "open_fridge" - # close_cab: "open_cab" - # close_fridge: "open_fridge" - pick: "pick" - place: "place" - nav: "oracle_nav" - nav_to_receptacle: "oracle_nav" - wait: "wait_skill" - reset_arm: "reset_arm_skill" \ No newline at end of file diff --git a/habitat-baselines/habitat_baselines/config/habitat_baselines/rl/policy/hierarchical_tp_noop_onav.yaml b/habitat-baselines/habitat_baselines/config/habitat_baselines/rl/policy/hierarchical_tp_noop_onav.yaml deleted file mode 100644 index 4540b9aa88..0000000000 --- a/habitat-baselines/habitat_baselines/config/habitat_baselines/rl/policy/hierarchical_tp_noop_onav.yaml +++ /dev/null @@ -1,74 +0,0 @@ - -name: "HierarchicalPolicy" -obs_transforms: - add_virtual_keys: - virtual_keys: - "goal_to_agent_gps_compass": 2 -hierarchical_policy: - high_level_policy: - name: "FixedHighLevelPolicy" - add_arm_rest: True - - defined_skills: - open_cab: - skill_name: "NoopSkillPolicy" - max_skill_steps: 1 - apply_postconds: True - - open_fridge: - skill_name: "NoopSkillPolicy" - max_skill_steps: 1 - apply_postconds: True - - close_cab: - skill_name: "NoopSkillPolicy" - obs_skill_inputs: ["obj_start_sensor"] - max_skill_steps: 1 - - close_fridge: - skill_name: "NoopSkillPolicy" - obs_skill_inputs: ["obj_start_sensor"] - max_skill_steps: 1 - apply_postconds: True - - pick: - skill_name: "NoopSkillPolicy" - obs_skill_inputs: ["obj_start_sensor"] - max_skill_steps: 1 - apply_postconds: True - force_end_on_timeout: False - - place: - skill_name: "NoopSkillPolicy" - obs_skill_inputs: ["obj_goal_sensor"] - max_skill_steps: 1 - apply_postconds: True - force_end_on_timeout: False - - nav_to_obj: - skill_name: "OracleNavPolicy" - obs_skill_inputs: ["obj_start_sensor", "abs_obj_start_sensor", "obj_goal_sensor", "abs_obj_goal_sensor"] - max_skill_steps: 300 - - wait_skill: - skill_name: "WaitSkillPolicy" - max_skill_steps: -1 - force_end_on_timeout: False - - reset_arm_skill: - skill_name: "ResetArmSkill" - max_skill_steps: 50 - reset_joint_state: [-4.50e-01, -1.07e00, 9.95e-02, 9.38e-01, -7.88e-04, 1.57e00, 4.62e-03] - force_end_on_timeout: False - - use_skills: - open_cab: "open_cab" - open_fridge: "open_fridge" - close_cab: "close_cab" - close_fridge: "close_fridge" - pick: "pick" - place: "place" - nav: "nav_to_obj" - nav_to_receptacle: "nav_to_obj" - wait: "wait_skill" - reset_arm: "reset_arm_skill" diff --git a/habitat-baselines/habitat_baselines/config/habitat_baselines/rl/policy/hierarchical_tp_srl.yaml b/habitat-baselines/habitat_baselines/config/habitat_baselines/rl/policy/hierarchical_tp_srl.yaml deleted file mode 100644 index 3ac608e23b..0000000000 --- a/habitat-baselines/habitat_baselines/config/habitat_baselines/rl/policy/hierarchical_tp_srl.yaml +++ /dev/null @@ -1,71 +0,0 @@ -name: "HierarchicalPolicy" -obs_transforms: - add_virtual_keys: - virtual_keys: - "goal_to_agent_gps_compass": 2 -hierarchical_policy: - high_level_policy: - name: "FixedHighLevelPolicy" - add_arm_rest: True - defined_skills: - open_cab: - skill_name: "ArtObjSkillPolicy" - name: "PointNavResNetPolicy" - load_ckpt_file: "data/models/open_cab.pth" - - open_fridge: - skill_name: "ArtObjSkillPolicy" - skill_name: "ArtObjSkillPolicy" - load_ckpt_file: "data/models/open_fridge.pth" - - close_cab: - skill_name: "ArtObjSkillPolicy" - obs_skill_inputs: ["obj_start_sensor"] - load_ckpt_file: "data/models/close_cab.pth" - - close_fridge: - skill_name: "ArtObjSkillPolicy" - obs_skill_inputs: ["obj_start_sensor"] - load_ckpt_file: "data/models/close_fridge.pth" - - pick: - skill_name: "PickSkillPolicy" - obs_skill_inputs: ["obj_start_sensor"] - load_ckpt_file: "data/models/pick.pth" - - place: - skill_name: "PlaceSkillPolicy" - obs_skill_inputs: ["obj_goal_sensor"] - load_ckpt_file: "data/models/place.pth" - - wait_skill: - skill_name: "WaitSkillPolicy" - max_skill_steps: -1 - force_end_on_timeout: False - - nav_to_obj: - skill_name: "NavSkillPolicy" - obs_skill_inputs: ["goal_to_agent_gps_compass"] - obs_skill_input_dim: 2 - load_ckpt_file: "data/models/nav.pth" - max_skill_steps: 300 - force_end_on_timeout: False - - reset_arm_skill: - skill_name: "ResetArmSkill" - max_skill_steps: 50 - reset_joint_state: [-4.50e-01, -1.08e00, 9.95e-02, 9.38e-01, -7.88e-04, 1.57e00, 4.62e-03] - force_end_on_timeout: False - - use_skills: - # Uncomment if you are also using these skills - # open_cab: "open_cab" - # open_fridge: "open_fridge" - # close_cab: "open_cab" - # close_fridge: "open_fridge" - pick: "pick" - place: "place" - nav: "nav_to_obj" - nav_to_receptacle: "nav_to_obj" - wait: "wait_skill" - reset_arm: "reset_arm_skill" \ No newline at end of file diff --git a/habitat-baselines/habitat_baselines/config/habitat_baselines/rl/policy/hl_fixed.yaml b/habitat-baselines/habitat_baselines/config/habitat_baselines/rl/policy/hl_fixed.yaml new file mode 100644 index 0000000000..0aef449dbe --- /dev/null +++ b/habitat-baselines/habitat_baselines/config/habitat_baselines/rl/policy/hl_fixed.yaml @@ -0,0 +1,21 @@ +name: "HierarchicalPolicy" +obs_transforms: + add_virtual_keys: + virtual_keys: + "goal_to_agent_gps_compass": 2 +hierarchical_policy: + high_level_policy: + name: "FixedHighLevelPolicy" + add_arm_rest: True + use_skills: + open_cab: "open_cab" + open_fridge: "open_fridge" + close_cab: "close_cab" + close_fridge: "close_fridge" + pick: "pick" + place: "place" + nav: "nav_to_obj" + nav_to_receptacle: "nav_to_obj" + wait: "wait_skill" + reset_arm: "reset_arm_skill" + defined_skills: {} diff --git a/habitat-baselines/habitat_baselines/config/habitat_baselines/rl/policy/hl_neural.yaml b/habitat-baselines/habitat_baselines/config/habitat_baselines/rl/policy/hl_neural.yaml new file mode 100644 index 0000000000..f15a1eb389 --- /dev/null +++ b/habitat-baselines/habitat_baselines/config/habitat_baselines/rl/policy/hl_neural.yaml @@ -0,0 +1,29 @@ +name: "HierarchicalPolicy" +obs_transforms: + add_virtual_keys: + virtual_keys: + "goal_to_agent_gps_compass": 2 +hierarchical_policy: + high_level_policy: + name: "NeuralHighLevelPolicy" + allow_other_place: False + hidden_dim: 512 + use_rnn: True + rnn_type: 'LSTM' + backbone: resnet18 + normalize_visual_inputs: False + num_rnn_layers: 2 + policy_input_keys: + - "robot_head_depth" + use_skills: + open_cab: "open_cab" + open_fridge: "open_fridge" + close_cab: "open_cab" + close_fridge: "open_fridge" + pick: "pick" + place: "place" + nav: "nav_to_obj" + nav_to_receptacle: "nav_to_obj" + wait: "wait_skill" + reset_arm: "reset_arm_skill" + defined_skills: {} diff --git a/habitat-baselines/habitat_baselines/config/rearrange/rl_hierarchical.yaml b/habitat-baselines/habitat_baselines/config/rearrange/rl_hierarchical.yaml new file mode 100644 index 0000000000..df1f0ee015 --- /dev/null +++ b/habitat-baselines/habitat_baselines/config/rearrange/rl_hierarchical.yaml @@ -0,0 +1,66 @@ +# @package _global_ + +defaults: + - /benchmark/rearrange: rearrange_easy + - /habitat_baselines: habitat_baselines_rl_config_base + - /habitat_baselines/rl/policy/obs_transforms: + - add_virtual_keys_base + - /habitat_baselines/rl/policy: hl_fixed + - /habitat_baselines/rl/policy/hierarchical_policy/defined_skills: nn_skills + - _self_ + +habitat_baselines: + verbose: False + trainer_name: "ddppo" + torch_gpu_id: 0 + video_fps: 30 + eval_ckpt_path_dir: "" + num_environments: 3 + writer_type: 'tb' + num_updates: -1 + total_num_steps: 1.0e8 + log_interval: 10 + num_checkpoints: 20 + force_torch_single_threaded: True + eval_keys_to_include_in_name: ['reward', 'force', 'composite_success'] + load_resume_state_config: False + + eval: + use_ckpt_config: False + should_load_ckpt: False + video_option: ["disk"] + + rl: + ppo: + # ppo params + clip_param: 0.2 + ppo_epoch: 2 + num_mini_batch: 2 + value_loss_coef: 0.5 + entropy_coef: 0.0001 + lr: 2.5e-4 + eps: 1e-5 + max_grad_norm: 0.2 + num_steps: 128 + use_gae: True + gamma: 0.99 + tau: 0.95 + + ddppo: + sync_frac: 0.6 + # The PyTorch distributed backend to use + distrib_backend: NCCL + # Visual encoder backbone + pretrained_weights: data/ddppo-models/gibson-2plus-resnet50.pth + # Initialize with pretrained weights + pretrained: False + # Initialize just the visual encoder backbone with pretrained weights + pretrained_encoder: False + # Whether the visual encoder backbone will be trained. + train_encoder: True + # Whether to reset the critic linear layer + reset_critic: False + # Model parameters + backbone: resnet18 + rnn_type: LSTM + num_recurrent_layers: 2 diff --git a/habitat-baselines/habitat_baselines/config/rearrange/rl_hl_srl_onav.yaml b/habitat-baselines/habitat_baselines/config/rearrange/rl_hierarchical_oracle_nav.yaml similarity index 81% rename from habitat-baselines/habitat_baselines/config/rearrange/rl_hl_srl_onav.yaml rename to habitat-baselines/habitat_baselines/config/rearrange/rl_hierarchical_oracle_nav.yaml index 5ad654137d..3152f00641 100644 --- a/habitat-baselines/habitat_baselines/config/rearrange/rl_hl_srl_onav.yaml +++ b/habitat-baselines/habitat_baselines/config/rearrange/rl_hierarchical_oracle_nav.yaml @@ -1,14 +1,12 @@ # @package _global_ -# Pick and place are instantaneous skills, -# where environment is set to final state. -# Navigation is kinematically simulated. defaults: - /benchmark/rearrange: rearrange_easy - /habitat_baselines: habitat_baselines_rl_config_base - /habitat_baselines/rl/policy/obs_transforms: - add_virtual_keys_base - - /habitat_baselines/rl/policy: hierarchical_srl_onav + - /habitat_baselines/rl/policy: hl_neural + - /habitat_baselines/rl/policy/hierarchical_policy/defined_skills: noop_skills - /habitat/task/actions: - pddl_apply_action - oracle_nav_action @@ -28,6 +26,7 @@ habitat: - obj_goal_gps_compass - joint - is_holding + - ee_pos - localization_sensor @@ -39,8 +38,6 @@ habitat_baselines: updater_name: "HrlPPO" distrib_updater_name: "HrlDDPPO" video_fps: 30 - video_render_views: - - "third_rgb_sensor" eval_ckpt_path_dir: "" num_environments: 3 writer_type: 'tb' @@ -59,6 +56,16 @@ habitat_baselines: video_option: ["disk"] rl: + policy: + hierarchical_policy: + use_skills: + nav: "oracle_nav" + nav_to_receptacle: "oracle_nav" + defined_skills: + oracle_nav: + skill_name: "OracleNavPolicy" + obs_skill_inputs: ["obj_start_sensor", "abs_obj_start_sensor", "obj_goal_sensor", "abs_obj_goal_sensor"] + max_skill_steps: 300 ppo: # ppo params clip_param: 0.2 diff --git a/habitat-baselines/habitat_baselines/config/rearrange/rl_skill.yaml b/habitat-baselines/habitat_baselines/config/rearrange/rl_skill.yaml index e8908a1062..845561c403 100644 --- a/habitat-baselines/habitat_baselines/config/rearrange/rl_skill.yaml +++ b/habitat-baselines/habitat_baselines/config/rearrange/rl_skill.yaml @@ -18,7 +18,6 @@ habitat_baselines: eval_ckpt_path_dir: "data/new_checkpoints" # 26 environments will just barely be below 16gb. num_environments: 26 - checkpoint_folder: "data/new_checkpoints" num_updates: -1 total_num_steps: 1.0e8 log_interval: 10 diff --git a/habitat-baselines/habitat_baselines/config/rearrange/spap_pick.yaml b/habitat-baselines/habitat_baselines/config/rearrange/spap_pick.yaml deleted file mode 100644 index 5ba89283e5..0000000000 --- a/habitat-baselines/habitat_baselines/config/rearrange/spap_pick.yaml +++ /dev/null @@ -1,39 +0,0 @@ -# @package _global_ - -defaults: - - /benchmark/rearrange: pick - - /habitat_baselines: habitat_baselines_spa_config_base - - _self_ - -habitat_baselines: - video_dir: "data/vids/" - - sense_plan_act: - verbose: True - run_freq: 2 - n_grasps: 100 - mp_obj: True - mp_margin: null - mp_render: True - timeout: 3 - exec_ee_thresh: 0.1 - # "Priv" or "Reg" - mp_sim_type: "Priv" - video_dir: 'data/vids' - debug_dir: "data/mp_test" - count_obj_collisions: True - grasp_gen_is_verbose: True - ik_dist_thresh: 0.1 - - eval: - video_option: ["disk"] - -habitat: - gym: - obs_keys: ['joint', 'ee_pos'] - desired_goal_keys: ['obj_goal_pos_sensor'] - achieved_goal_keys: [] - action_keys: ['arm_action'] - task: - success_reward: 2000.0 - end_on_success: False diff --git a/habitat-baselines/habitat_baselines/config/rearrange/spap_reach_state.yaml b/habitat-baselines/habitat_baselines/config/rearrange/spap_reach_state.yaml deleted file mode 100644 index d62f630093..0000000000 --- a/habitat-baselines/habitat_baselines/config/rearrange/spap_reach_state.yaml +++ /dev/null @@ -1,42 +0,0 @@ -# @package _global_ - -defaults: - - /benchmark/rearrange: reach_state - - /habitat_baselines: habitat_baselines_spa_config_base - - _self_ - -habitat_baselines: - video_dir: "data/vids/" - - sense_plan_act: - verbose: True - run_freq: 4 - n_grasps: 100 - mp_obj: True - mp_margin: null - mp_render: True - timeout: 3 - exec_ee_thresh: 0.1 - # "Priv" or "Reg" - mp_sim_type: "Priv" - video_dir: 'data/vids' - debug_dir: "data/mp_test" - count_obj_collisions: True - grasp_gen_is_verbose: True - ik_dist_thresh: 0.1 - ik_speed_factor: 1.0 - - eval: - video_option: ["disk"] - -habitat: - gym: - obs_keys: [ 'joint', 'ee_pos' ] - desired_goal_keys: [ 'resting_position' ] - action_keys: [ 'arm_action' ] - task: - reward_measure: "rearrange_reach_reward" - success_measure: "rearrange_reach_success" - success_reward: 10.0 - slack_reward: -0.01 - end_on_success: False diff --git a/habitat-baselines/habitat_baselines/config/rearrange/tp_srl.yaml b/habitat-baselines/habitat_baselines/config/rearrange/tp_srl.yaml deleted file mode 100644 index 86394ad477..0000000000 --- a/habitat-baselines/habitat_baselines/config/rearrange/tp_srl.yaml +++ /dev/null @@ -1,42 +0,0 @@ -# @package _global_ - -defaults: - - /benchmark/rearrange: rearrange_easy - - /habitat_baselines: habitat_baselines_rl_config_base - - /habitat_baselines/rl/policy/obs_transforms: - - add_virtual_keys_base - - /habitat_baselines/rl/policy: hierarchical_srl_learned_nav - - /habitat/simulator/sim_sensors@habitat_baselines.eval.extra_sim_sensors.third_rgb_sensor: third_rgb_sensor - - _self_ - - -habitat_baselines: - verbose: False - trainer_name: "ddppo" - torch_gpu_id: 0 - tensorboard_dir: "tb" - video_dir: "video_dir" - video_fps: 30 - test_episode_count: -1 - eval_ckpt_path_dir: "" - num_environments: 1 - writer_type: 'tb' - checkpoint_folder: "data/new_checkpoints" - num_updates: -1 - total_num_steps: 1.0e8 - log_interval: 10 - num_checkpoints: 20 - force_torch_single_threaded: True - eval_keys_to_include_in_name: ['reward', 'force', 'composite_success'] - load_resume_state_config: False - eval: - use_ckpt_config: False - should_load_ckpt: False - video_option: ["disk"] - - rl: - ddppo: - pretrained: False - pretrained_encoder: False - train_encoder: True - reset_critic: False diff --git a/habitat-baselines/habitat_baselines/config/rearrange/tp_srl_oracle_nav.yaml b/habitat-baselines/habitat_baselines/config/rearrange/tp_srl_oracle_nav.yaml deleted file mode 100644 index 55e684eec3..0000000000 --- a/habitat-baselines/habitat_baselines/config/rearrange/tp_srl_oracle_nav.yaml +++ /dev/null @@ -1,61 +0,0 @@ -# @package _global_ - -defaults: - - /benchmark/rearrange: rearrange_easy - - /habitat_baselines: habitat_baselines_rl_config_base - - /habitat_baselines/rl/policy/obs_transforms: - - add_virtual_keys_base - - /habitat_baselines/rl/policy: hierarchical_srl_oracle_nav - - /habitat/task/actions: - - oracle_nav_action - - arm_action - - base_velocity - - rearrange_stop - - /habitat/simulator/sim_sensors@habitat_baselines.eval.extra_sim_sensors.third_rgb_sensor: third_rgb_sensor - - _self_ - -habitat: - gym: - auto_name: RearrangeEasy - obs_keys: - - robot_head_depth - - relative_resting_position - - obj_start_sensor - - obj_goal_sensor - - obj_start_gps_compass - - obj_goal_gps_compass - - joint - - is_holding - - localization_sensor - - -habitat_baselines: - verbose: False - trainer_name: "ddppo" - torch_gpu_id: 0 - tensorboard_dir: "tb" - video_dir: "video_dir" - video_fps: 30 - test_episode_count: -1 - eval_ckpt_path_dir: "" - num_environments: 1 - writer_type: 'tb' - checkpoint_folder: "data/new_checkpoints" - num_updates: -1 - total_num_steps: 1.0e8 - log_interval: 10 - num_checkpoints: 20 - force_torch_single_threaded: True - eval_keys_to_include_in_name: ['reward', 'force', 'composite_success'] - load_resume_state_config: False - eval: - use_ckpt_config: False - should_load_ckpt: False - video_option: ["disk"] - - rl: - ddppo: - pretrained: False - pretrained_encoder: False - train_encoder: True - reset_critic: False diff --git a/habitat-baselines/habitat_baselines/rl/hrl/hierarchical_policy.py b/habitat-baselines/habitat_baselines/rl/hrl/hierarchical_policy.py index ad0226262e..ab8f9bd420 100644 --- a/habitat-baselines/habitat_baselines/rl/hrl/hierarchical_policy.py +++ b/habitat-baselines/habitat_baselines/rl/hrl/hierarchical_policy.py @@ -93,9 +93,6 @@ def __init__( self._name_to_idx[skill_id] = i self._idx_to_name[i] = skill_id - self._call_high_level: torch.Tensor = torch.ones( - self._num_envs, dtype=torch.bool - ) self._cur_skills: torch.Tensor = torch.full( (self._num_envs,), -1, dtype=torch.long ) @@ -118,6 +115,13 @@ def __init__( def eval(self): pass + def get_policy_action_space( + self, env_action_space: spaces.Space + ) -> spaces.Space: + return self._high_level_policy.get_policy_action_space( + env_action_space + ) + def extract_policy_info( self, action_data, infos, dones ) -> List[Dict[str, float]]: @@ -142,7 +146,10 @@ def extract_policy_info( @property def num_recurrent_layers(self): - return self._high_level_policy.num_recurrent_layers + if self._high_level_policy.num_recurrent_layers != 0: + return self._high_level_policy.num_recurrent_layers + else: + return self._skills[0].num_recurrent_layers @property def should_load_agent_state(self): @@ -194,11 +201,11 @@ def act( masks, deterministic=False, ): - + masks_cpu = masks.cpu() log_info: List[Dict[str, Any]] = [{} for _ in range(self._num_envs)] - self._high_level_policy.apply_mask(masks) # type: ignore[attr-defined] + self._high_level_policy.apply_mask(masks_cpu) # type: ignore[attr-defined] - should_terminate: torch.BoolTensor = torch.zeros( + call_high_level: torch.BoolTensor = torch.zeros( (self._num_envs,), dtype=torch.bool ) bad_should_terminate: torch.BoolTensor = torch.zeros( @@ -212,7 +219,7 @@ def act( masks, self._cur_skills, log_info, - ).cpu() + ) # Compute the actions from the current skills actions = torch.zeros( (self._num_envs, get_num_actions(self._action_space)), @@ -237,14 +244,14 @@ def act( for skill_id, (batch_ids, dat) in grouped_skills.items(): if skill_id == -1: # Policy has not prediced a skill yet. - should_terminate[batch_ids] = 1.0 + call_high_level[batch_ids] = 1.0 continue # TODO: either change name of the function or assign actions somewhere - # else. Updating actions in should_terminate is counterintuitive + # else. Updating actions in should_terminate is counterintuitive ( - should_terminate[batch_ids], + call_high_level[batch_ids], bad_should_terminate[batch_ids], - actions[batch_ids] + actions[batch_ids], ) = self._skills[skill_id].should_terminate( **dat, batch_idx=batch_ids, @@ -254,16 +261,14 @@ def act( for i in batch_ids ], ) - self._call_high_level = should_terminate - # Always call high-level if the episode is over. - self._call_high_level = self._call_high_level | (~masks).view(-1).cpu() + call_high_level = call_high_level | (~masks_cpu).view(-1) # If any skills want to terminate invoke the high-level policy to get # the next skill. hl_terminate = torch.zeros(self._num_envs, dtype=torch.bool) hl_info: Dict[str, Any] = {} - if self._call_high_level.sum() > 0: + if call_high_level.sum() > 0: ( new_skills, new_skill_args, @@ -274,7 +279,7 @@ def act( rnn_hidden_states, prev_actions, masks, - self._call_high_level, + call_high_level, deterministic, log_info, ) @@ -282,7 +287,7 @@ def act( sel_grouped_skills = self._broadcast_skill_ids( new_skills, sel_dat={}, - should_adds=self._call_high_level, + should_adds=call_high_level, ) for skill_id, (batch_ids, _) in sel_grouped_skills.items(): @@ -293,11 +298,16 @@ def act( rnn_hidden_states, prev_actions, ) - rnn_hidden_states[batch_ids] *= 0.0 - prev_actions[batch_ids] *= 0 - self._cur_skills = ( - (~self._call_high_level) * self._cur_skills - ) + (self._call_high_level * new_skills) + if "rnn_hidden_states" not in hl_info: + rnn_hidden_states[batch_ids] *= 0.0 + prev_actions[batch_ids] *= 0 + elif self._skills[skill_id].num_recurrent_layers != 0: + raise ValueError( + f"The code does not currently support neural LL and neural HL skills. Skill={self._skills[skill_id]}, HL={self._high_level_policy}" + ) + self._cur_skills = ((~call_high_level) * self._cur_skills) + ( + call_high_level * new_skills + ) grouped_skills = self._broadcast_skill_ids( self._cur_skills, @@ -332,16 +342,43 @@ def act( ) actions[batch_idx, self._stop_action_idx] = 1.0 - ac_info = { - "values": None, - "action_log_probs": None, + action_kwargs = { "rnn_hidden_states": rnn_hidden_states, "actions": actions, } - ac_info.update(hl_info) + action_kwargs.update(hl_info) return PolicyAction( - take_actions=actions, policy_info=log_info, **ac_info + take_actions=actions, + policy_info=log_info, + should_inserts=call_high_level, + **action_kwargs, + ) + + def get_value(self, observations, rnn_hidden_states, prev_actions, masks): + return self._high_level_policy.get_value( + observations, rnn_hidden_states, prev_actions, masks + ) + + def _get_policy_components(self) -> Tuple[nn.Module]: + return self._high_level_policy.get_policy_components() + + def evaluate_actions( + self, + observations, + rnn_hidden_states, + prev_actions, + masks, + action, + rnn_build_seq_info: Dict[str, torch.Tensor], + ): + return self._high_level_policy.evaluate_actions( + observations, + rnn_hidden_states, + prev_actions, + masks, + action, + rnn_build_seq_info, ) @classmethod diff --git a/habitat-baselines/habitat_baselines/rl/hrl/hl/high_level_policy.py b/habitat-baselines/habitat_baselines/rl/hrl/hl/high_level_policy.py index 5bbc2726ec..6d1eb7bf50 100644 --- a/habitat-baselines/habitat_baselines/rl/hrl/hl/high_level_policy.py +++ b/habitat-baselines/habitat_baselines/rl/hrl/hl/high_level_policy.py @@ -8,6 +8,10 @@ class HighLevelPolicy(nn.Module): + """ + High level policy that selects from low-level skills. + """ + def __init__( self, config, @@ -27,6 +31,21 @@ def __init__( def to(self, device): self._device = device + return super().to(device) + + def get_value(self, observations, rnn_hidden_states, prev_actions, masks): + raise NotImplementedError() + + def evaluate_actions( + self, + observations, + rnn_hidden_states, + prev_actions, + masks, + action, + rnn_build_seq_info, + ): + raise NotImplementedError() @property def num_recurrent_layers(self): @@ -35,6 +54,11 @@ def num_recurrent_layers(self): def parameters(self): return iter([nn.Parameter(torch.zeros((1,), device=self._device))]) + def get_policy_action_space( + self, env_action_space: spaces.Space + ) -> spaces.Space: + return env_action_space + def get_next_skill( self, observations, @@ -69,6 +93,9 @@ def get_next_skill( def apply_mask(self, mask: torch.Tensor) -> None: pass + def get_policy_components(self) -> Tuple[nn.Module]: + return tuple() + def get_termination( self, observations, diff --git a/habitat-baselines/habitat_baselines/rl/hrl/hl/neural_policy.py b/habitat-baselines/habitat_baselines/rl/hrl/hl/neural_policy.py index c855e4fcc5..f8796f08a3 100644 --- a/habitat-baselines/habitat_baselines/rl/hrl/hl/neural_policy.py +++ b/habitat-baselines/habitat_baselines/rl/hrl/hl/neural_policy.py @@ -1,5 +1,5 @@ from itertools import chain -from typing import Any, List +from typing import Any, List, Tuple import gym.spaces as spaces import numpy as np @@ -92,6 +92,11 @@ def _setup_actions(self) -> List[PddlAction]: ] return all_actions + def get_policy_action_space( + self, env_action_space: spaces.Space + ) -> spaces.Space: + return spaces.Discrete(self._n_actions) + @property def num_recurrent_layers(self): return self._state_encoder.num_recurrent_layers @@ -105,7 +110,10 @@ def parameters(self): self._critic.parameters(), ) - def forward(self, obs, rnn_hidden_states, masks): + def get_policy_components(self) -> Tuple[nn.Module]: + return (self,) + + def forward(self, obs, rnn_hidden_states, masks, rnn_build_seq_info=None): hidden = [] if len(self._im_obs_space) > 0: im_obs = {k: obs[k] for k in self._im_obs_space.keys()} @@ -117,7 +125,9 @@ def forward(self, obs, rnn_hidden_states, masks): hidden.extend([obs[k] for k in self._state_obs_space.keys()]) hidden = torch.cat(hidden, -1) - return self._state_encoder(hidden, rnn_hidden_states, masks) + return self._state_encoder( + hidden, rnn_hidden_states, masks, rnn_build_seq_info + ) def to(self, device): self._device = device @@ -127,6 +137,30 @@ def get_value(self, observations, rnn_hidden_states, prev_actions, masks): state, _ = self.forward(observations, rnn_hidden_states, masks) return self._critic(state) + def evaluate_actions( + self, + observations, + rnn_hidden_states, + prev_actions, + masks, + action, + rnn_build_seq_info, + ): + features, _ = self.forward( + observations, rnn_hidden_states, masks, rnn_build_seq_info + ) + distribution = self._policy(features) + value = self._critic(features) + action_log_probs = distribution.log_probs(action) + distribution_entropy = distribution.entropy() + + return ( + value, + action_log_probs, + distribution_entropy, + rnn_hidden_states, + ) + def get_next_skill( self, observations, diff --git a/habitat-baselines/habitat_baselines/rl/hrl/hrl_ppo.py b/habitat-baselines/habitat_baselines/rl/hrl/hrl_ppo.py index 706f2b9a57..a0cb2222a7 100644 --- a/habitat-baselines/habitat_baselines/rl/hrl/hrl_ppo.py +++ b/habitat-baselines/habitat_baselines/rl/hrl/hrl_ppo.py @@ -28,13 +28,7 @@ def reduce_loss(loss): self._set_grads_to_none() - ( - values, - action_log_probs, - dist_entropy, - _, - aux_loss_res, - ) = self._evaluate_actions( + (values, action_log_probs, dist_entropy, _,) = self._evaluate_actions( batch["observations"], batch["recurrent_hidden_states"], batch["prev_actions"], @@ -87,8 +81,6 @@ def reduce_loss(loss): else: all_losses.append(self.entropy_coef.lagrangian_loss(dist_entropy)) - all_losses.extend(v["loss"] for v in aux_loss_res.values()) - total_loss = torch.stack(all_losses).sum() total_loss = self.before_backward(total_loss) @@ -118,10 +110,6 @@ def reduce_loss(loss): self.entropy_coef().detach() ) - for name, res in aux_loss_res.items(): - for k, v in res.items(): - learner_metrics[f"aux_{name}_{k}"].append(v.detach()) - @baseline_registry.register_updater class HrlDDPPO(DecentralizedDistributedMixin, HrlPPO): diff --git a/habitat-baselines/habitat_baselines/rl/hrl/hrl_rollout_storage.py b/habitat-baselines/habitat_baselines/rl/hrl/hrl_rollout_storage.py index 7d728afdfa..6d8491e592 100644 --- a/habitat-baselines/habitat_baselines/rl/hrl/hrl_rollout_storage.py +++ b/habitat-baselines/habitat_baselines/rl/hrl/hrl_rollout_storage.py @@ -11,6 +11,10 @@ from habitat_baselines.common.baseline_registry import baseline_registry from habitat_baselines.common.rollout_storage import RolloutStorage from habitat_baselines.common.tensor_dict import TensorDict +from habitat_baselines.rl.models.rnn_state_encoder import ( + build_pack_info_from_dones, + build_rnn_build_seq_info, +) EPS_PPO = 1e-5 @@ -20,7 +24,6 @@ class HrlRolloutStorage(RolloutStorage): def __init__(self, numsteps, num_envs, *args, **kwargs): super().__init__(numsteps, num_envs, *args, **kwargs) self._num_envs = num_envs - self._numsteps = numsteps self._cur_step_idxs = torch.zeros(self._num_envs, dtype=torch.long) self._last_should_inserts = None if self.is_double_buffered: @@ -40,6 +43,10 @@ def insert( buffer_index: int = 0, should_inserts=None, ): + if next_masks is not None: + next_masks = next_masks.to(self.device) + if rewards is not None: + rewards = rewards.to(self.device) next_step = dict( observations=next_observations, recurrent_hidden_states=next_recurrent_hidden_states, @@ -69,9 +76,7 @@ def insert( reward_write_idxs = torch.maximum( self._cur_step_idxs - 1, torch.zeros_like(self._cur_step_idxs) ) - self.buffers["rewards"][reward_write_idxs, env_idxs] += rewards.to( - self.device - ) + self.buffers["rewards"][reward_write_idxs, env_idxs] += rewards if len(next_step) > 0: self.buffers.set( @@ -97,9 +102,9 @@ def insert( def advance_rollout(self, buffer_index: int = 0): self._cur_step_idxs += self._last_should_inserts.long() - is_past_buffer = self._cur_step_idxs >= self._numsteps + is_past_buffer = self._cur_step_idxs >= self.num_steps if is_past_buffer.sum() > 0: - self._cur_step_idxs[is_past_buffer] = self._numsteps - 1 + self._cur_step_idxs[is_past_buffer] = self.num_steps - 1 env_idxs = torch.arange(self._num_envs) self.buffers["rewards"][ self._cur_step_idxs[is_past_buffer], env_idxs[is_past_buffer] @@ -139,14 +144,20 @@ def recurrent_generator( self, advantages, num_batches ) -> Iterator[TensorDict]: num_environments = advantages.size(1) + dones_cpu = ( + torch.logical_not(self.buffers["masks"]) + .cpu() + .view(-1, self._num_envs) + .numpy() + ) for inds in torch.randperm(num_environments).chunk(num_batches): - batch = self.buffers[0 : self._numsteps, inds] - batch["advantages"] = advantages[: self._numsteps, inds] + batch = self.buffers[0 : self.num_steps, inds] + batch["advantages"] = advantages[: self.num_steps, inds] batch["recurrent_hidden_states"] = batch[ "recurrent_hidden_states" ][0:1] batch["loss_mask"] = ( - torch.arange(self._numsteps, device=advantages.device) + torch.arange(self.num_steps, device=advantages.device) .view(-1, 1, 1) .repeat(1, len(inds), 1) ) @@ -156,4 +167,14 @@ def recurrent_generator( batch["loss_mask"][:, i] < self._cur_step_idxs[env_i] - 1 ) - yield batch.map(lambda v: v.flatten(0, 1)) + batch.map_in_place(lambda v: v.flatten(0, 1)) + batch["rnn_build_seq_info"] = build_rnn_build_seq_info( + device=self.device, + build_fn_result=build_pack_info_from_dones( + dones_cpu[0 : self.num_steps, inds.numpy()].reshape( + -1, len(inds) + ), + ), + ) + + yield batch.to_tree() diff --git a/habitat-baselines/habitat_baselines/rl/hrl/skills/art_obj.py b/habitat-baselines/habitat_baselines/rl/hrl/skills/art_obj.py index af083ef84a..b3211760c8 100644 --- a/habitat-baselines/habitat_baselines/rl/hrl/skills/art_obj.py +++ b/habitat-baselines/habitat_baselines/rl/hrl/skills/art_obj.py @@ -66,4 +66,4 @@ def _is_skill_done( def _parse_skill_arg(self, skill_arg): self._internal_log(f"Parsing skill argument {skill_arg}") - return int(skill_arg[-1].split("|")[1]) + return int(skill_arg[1].split("|")[1]) diff --git a/habitat-baselines/habitat_baselines/rl/hrl/skills/nn_skill.py b/habitat-baselines/habitat_baselines/rl/hrl/skills/nn_skill.py index ed153443f6..3478613fb1 100644 --- a/habitat-baselines/habitat_baselines/rl/hrl/skills/nn_skill.py +++ b/habitat-baselines/habitat_baselines/rl/hrl/skills/nn_skill.py @@ -134,14 +134,16 @@ def _internal_act( masks, deterministic, ) - full_action = torch.zeros(prev_actions.shape, device=masks.device) + full_action = torch.zeros( + (masks.shape[0], self._full_ac_size), device=masks.device + ) full_action[ :, self._ac_start : self._ac_start + self._ac_len ] = action_data.actions action_data.actions = full_action self._did_want_done[cur_batch_idx] = full_action[ - cur_batch_idx, self._stop_action_idx + :, self._stop_action_idx ] return action_data @@ -176,6 +178,10 @@ def from_config( ) for k in config.obs_skill_inputs: + if k not in filtered_obs_space.spaces: + raise ValueError( + f"Could not find {k} for skill {policy_cfg.habitat.gym.auto_name}" + ) space = filtered_obs_space.spaces[k] # There is always a 3D position filtered_obs_space.spaces[k] = truncate_obs_space(space, 3) diff --git a/habitat-baselines/habitat_baselines/rl/hrl/skills/oracle_nav.py b/habitat-baselines/habitat_baselines/rl/hrl/skills/oracle_nav.py index 5a27d3f527..2208084edc 100644 --- a/habitat-baselines/habitat_baselines/rl/hrl/skills/oracle_nav.py +++ b/habitat-baselines/habitat_baselines/rl/hrl/skills/oracle_nav.py @@ -169,6 +169,9 @@ def _parse_skill_arg(self, skill_arg): def _get_multi_sensor_index(self, batch_idx): return [self._cur_skill_args[i].target_idx for i in batch_idx] + def requires_rnn_state(self): + return False + def _internal_act( self, observations, @@ -178,7 +181,9 @@ def _internal_act( cur_batch_idx, deterministic=False, ): - full_action = torch.zeros(prev_actions.shape, device=masks.device) + full_action = torch.zeros( + (masks.shape[0], self._full_ac_size), device=masks.device + ) action_idxs = torch.FloatTensor( [self._cur_skill_args[i].action_idx + 1 for i in cur_batch_idx] ) diff --git a/habitat-baselines/habitat_baselines/rl/hrl/skills/reset.py b/habitat-baselines/habitat_baselines/rl/hrl/skills/reset.py index 7d17aead32..5c1ab5e23a 100644 --- a/habitat-baselines/habitat_baselines/rl/hrl/skills/reset.py +++ b/habitat-baselines/habitat_baselines/rl/hrl/skills/reset.py @@ -9,8 +9,8 @@ import torch from habitat_baselines.rl.hrl.skills.skill import SkillPolicy +from habitat_baselines.rl.hrl.utils import find_action_range from habitat_baselines.rl.ppo.policy import PolicyAction -from habitat_baselines.utils.common import get_num_actions class ResetArmSkill(SkillPolicy): @@ -23,12 +23,7 @@ def __init__( super().__init__(config, action_space, batch_size, True) self._target = np.array([float(x) for x in config.reset_joint_state]) - self._ac_start = 0 - for k, space in action_space.items(): - if k != "arm_action": - self._ac_start += get_num_actions(space) - else: - break + self._arm_ac_range = find_action_range(action_space, "arm_action") def on_enter( self, @@ -85,14 +80,16 @@ def _internal_act( # always in [-1,1] and has the benefit of reducing the delta # amount was we converge to the target. delta = delta / np.maximum( - self._initial_delta.max(-1, keepdims=True), 1e-5 + self._initial_delta[cur_batch_idx].max(-1, keepdims=True), 1e-5 ) action = torch.zeros_like(prev_actions) - action[..., self._ac_start : self._ac_start + 7] = torch.from_numpy( - delta - ).to(device=action.device, dtype=action.dtype) + action[ + ..., self._arm_ac_range[0] : self._arm_ac_range[1] + ] = torch.from_numpy(delta).to( + device=action.device, dtype=action.dtype + ) return PolicyAction( actions=action, rnn_hidden_states=rnn_hidden_states diff --git a/habitat-baselines/habitat_baselines/rl/hrl/skills/skill.py b/habitat-baselines/habitat_baselines/rl/hrl/skills/skill.py index a0ca5854b5..fff04bf3d9 100644 --- a/habitat-baselines/habitat_baselines/rl/hrl/skills/skill.py +++ b/habitat-baselines/habitat_baselines/rl/hrl/skills/skill.py @@ -37,6 +37,7 @@ def __init__( self._raw_skill_args: List[Optional[str]] = [ None for _ in range(self._batch_size) ] + self._full_ac_size = get_num_actions(action_space) # TODO: for some reason this doesnt work with "pddl_apply_action" in action_space # and needs to go through the keys argument @@ -155,13 +156,13 @@ def should_terminate( batch_idx: List[int], skill_name: List[str], log_info, - ) -> Tuple[torch.BoolTensor, torch.BoolTensor]: + ) -> Tuple[torch.BoolTensor, torch.BoolTensor, torch.Tensor]: """ :returns: A (batch_size,) size tensor where 1 indicates the skill wants to end and 0 if not. """ is_skill_done = self._is_skill_done( observations, rnn_hidden_states, prev_actions, masks, batch_idx - ).cpu() + ) if is_skill_done.sum() > 0: self._internal_log( f"Requested skill termination {is_skill_done}", @@ -182,9 +183,9 @@ def should_terminate( if self._config.max_skill_steps > 0: over_max_len = cur_skill_step > self._config.max_skill_steps if self._config.force_end_on_timeout: - bad_terminate = over_max_len.cpu() + bad_terminate = over_max_len else: - is_skill_done = is_skill_done | over_max_len.cpu() + is_skill_done = is_skill_done | over_max_len new_actions = torch.zeros_like(actions) for i, env_i in enumerate(batch_idx): if self._delay_term[env_i]: @@ -200,15 +201,15 @@ def should_terminate( ) self._delay_term[env_i] = True is_skill_done[i] = 0.0 - - is_skill_done |= hl_says_term.cpu() + + is_skill_done |= hl_says_term if bad_terminate.sum() > 0: self._internal_log( f"Bad terminating due to timeout {cur_skill_step}, {bad_terminate}", observations, ) - return is_skill_done.cpu(), bad_terminate.cpu(), new_actions + return is_skill_done, bad_terminate, new_actions def on_enter( self, diff --git a/habitat-baselines/habitat_baselines/rl/hrl/skills/wait.py b/habitat-baselines/habitat_baselines/rl/hrl/skills/wait.py index e8a7ca03e1..caab4194ee 100644 --- a/habitat-baselines/habitat_baselines/rl/hrl/skills/wait.py +++ b/habitat-baselines/habitat_baselines/rl/hrl/skills/wait.py @@ -40,7 +40,9 @@ def _internal_act( cur_batch_idx, deterministic=False, ): - action = torch.zeros(prev_actions.shape, device=prev_actions.device) + action = torch.zeros( + (masks.shape[0], self._full_ac_size), device=prev_actions.device + ) return PolicyAction( actions=action, rnn_hidden_states=rnn_hidden_states ) diff --git a/habitat-baselines/habitat_baselines/rl/ppo/policy.py b/habitat-baselines/habitat_baselines/rl/ppo/policy.py index 22b7d95205..07da61345f 100644 --- a/habitat-baselines/habitat_baselines/rl/ppo/policy.py +++ b/habitat-baselines/habitat_baselines/rl/ppo/policy.py @@ -5,7 +5,16 @@ # LICENSE file in the root directory of this source tree. import abc from dataclasses import dataclass -from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Union +from typing import ( + TYPE_CHECKING, + Any, + Dict, + Iterable, + List, + Optional, + Tuple, + Union, +) import torch from gym import spaces @@ -33,8 +42,12 @@ @dataclass class PolicyAction: + """ + Information returned from the `Policy` class. + """ + rnn_hidden_states: torch.Tensor - actions: torch.Tensor + actions: Optional[torch.Tensor] = None values: Optional[torch.Tensor] = None action_log_probs: Optional[torch.Tensor] = None take_actions: Optional[torch.Tensor] = None @@ -69,6 +82,26 @@ def num_recurrent_layers(self) -> int: def forward(self, *x): raise NotImplementedError + def get_policy_action_space( + self, env_action_space: spaces.Space + ) -> spaces.Space: + return env_action_space + + def _get_policy_components(self) -> Tuple[nn.Module]: + return tuple() + + def aux_loss_parameters(self) -> Dict[str, Iterable[torch.Tensor]]: + return {} + + def policy_parameters(self) -> Iterable[torch.Tensor]: + for c in self._get_policy_components(): + yield from c.parameters() + + def all_policy_tensors(self) -> Iterable[torch.Tensor]: + yield from self.policy_parameters() + for c in self._get_policy_components(): + yield from c.buffers() + def extract_policy_info( self, action_data: PolicyAction, infos, dones ) -> List[Dict[str, float]]: @@ -235,19 +268,9 @@ def evaluate_actions( aux_loss_res, ) - @property - def policy_components(self): + def _get_policy_components(self) -> Tuple[nn.Module]: return (self.net, self.critic, self.action_distribution) - def policy_parameters(self) -> Iterable[torch.Tensor]: - for c in self.policy_components: - yield from c.parameters() - - def all_policy_tensors(self) -> Iterable[torch.Tensor]: - yield from self.policy_parameters() - for c in self.policy_components: - yield from c.buffers() - def aux_loss_parameters(self) -> Dict[str, Iterable[torch.Tensor]]: return {k: v.parameters() for k, v in self.aux_loss_modules.items()} diff --git a/habitat-baselines/habitat_baselines/rl/ppo/ppo_trainer.py b/habitat-baselines/habitat_baselines/rl/ppo/ppo_trainer.py index 2559085cba..1adbc11c38 100644 --- a/habitat-baselines/habitat_baselines/rl/ppo/ppo_trainer.py +++ b/habitat-baselines/habitat_baselines/rl/ppo/ppo_trainer.py @@ -154,8 +154,8 @@ def _setup_actor_critic_agent(self, ppo_cfg: "DictConfig") -> None: self.actor_critic = policy.from_config( self.config, observation_space, - self.policy_action_space, - orig_action_space=self.orig_policy_action_space, + self.env_action_space, + orig_action_space=self.orig_env_action_space, ) self.obs_space = observation_space self.actor_critic.to(self.device) @@ -205,6 +205,9 @@ def _setup_actor_critic_agent(self, ppo_cfg: "DictConfig") -> None: ) self.agent = agent_cls.from_config(self.actor_critic, ppo_cfg) + self.policy_action_space = self.actor_critic.get_policy_action_space( + self.envs.action_spaces[0] + ) def _init_envs(self, config=None, is_eval: bool = False): if config is None: @@ -215,6 +218,8 @@ def _init_envs(self, config=None, is_eval: bool = False): workers_ignore_signals=is_slurm_batch_job(), enforce_scenes_greater_eq_environments=is_eval, ) + self.env_action_space = self.envs.action_spaces[0] + self.orig_env_action_space = self.envs.orig_action_spaces[0] def _init_train(self, resume_state=None): if resume_state is None: @@ -286,18 +291,6 @@ def _init_train(self, resume_state=None): self._init_envs() - action_space = self.envs.action_spaces[0] - self.policy_action_space = action_space - self.orig_policy_action_space = self.envs.orig_action_spaces[0] - if is_continuous_action_space(action_space): - # Assume ALL actions are NOT discrete - action_shape = (get_num_actions(action_space),) - discrete_actions = False - else: - # For discrete pointnav - action_shape = (1,) - discrete_actions = True - ppo_cfg = self.config.habitat_baselines.rl.ppo if torch.cuda.is_available(): self.device = torch.device( @@ -353,8 +346,6 @@ def _init_train(self, resume_state=None): ppo_cfg.hidden_size, num_recurrent_layers=self.actor_critic.num_recurrent_layers, is_double_buffered=ppo_cfg.use_double_buffered_sampler, - action_shape=action_shape, - discrete_actions=discrete_actions, ) self.rollouts.to(self.device) @@ -463,12 +454,12 @@ def _compute_actions_and_step_envs(self, buffer_index: int = 0): range(env_slice.start, env_slice.stop), action_data.env_actions.cpu().unbind(0), ): - if is_continuous_action_space(self.policy_action_space): + if is_continuous_action_space(self.env_action_space): # Clipping actions to the specified limits act = np.clip( act.numpy(), - self.policy_action_space.low, - self.policy_action_space.high, + self.env_action_space.low, + self.env_action_space.high, ) else: act = act.item() @@ -934,20 +925,16 @@ def _eval_checkpoint( self._init_envs(config, is_eval=True) - action_space = self.envs.action_spaces[0] - self.policy_action_space = action_space - self.orig_policy_action_space = self.envs.orig_action_spaces[0] - if is_continuous_action_space(action_space): + self._setup_actor_critic_agent(ppo_cfg) + if is_continuous_action_space(self.policy_action_space): # Assume NONE of the actions are discrete - action_shape = (get_num_actions(action_space),) + action_shape = (get_num_actions(self.policy_action_space),) discrete_actions = False else: # For discrete pointnav action_shape = (1,) discrete_actions = True - self._setup_actor_critic_agent(ppo_cfg) - if self.agent.actor_critic.should_load_agent_state: self.agent.load_state_dict(ckpt_dict["state_dict"]) self.actor_critic = self.agent.actor_critic @@ -1027,19 +1014,30 @@ def _eval_checkpoint( not_done_masks, deterministic=False, ) - test_recurrent_hidden_states = action_data.rnn_hidden_states - - prev_actions.copy_(action_data.actions) # type: ignore + if action_data.should_inserts is None: + test_recurrent_hidden_states = ( + action_data.rnn_hidden_states + ) + prev_actions.copy_(action_data.actions) # type: ignore + else: + for i, should_insert in enumerate( + action_data.should_inserts + ): + if should_insert.item(): + test_recurrent_hidden_states[ + i + ] = action_data.rnn_hidden_states[i] + prev_actions[i].copy_(action_data.actions[i]) # type: ignore # NB: Move actions to CPU. If CUDA tensors are # sent in to env.step(), that will create CUDA contexts # in the subprocesses. - if is_continuous_action_space(self.policy_action_space): + if is_continuous_action_space(self.env_action_space): # Clipping actions to the specified limits step_data = [ np.clip( a.numpy(), - self.policy_action_space.low, - self.policy_action_space.high, + self.env_action_space.low, + self.env_action_space.high, ) for a in action_data.env_actions.cpu() ] diff --git a/habitat-baselines/tb/events.out.tfevents.1674193555.s-164-67-210-79.resnet.ucla.edu.58058.0 b/habitat-baselines/tb/events.out.tfevents.1674193555.s-164-67-210-79.resnet.ucla.edu.58058.0 new file mode 100644 index 0000000000000000000000000000000000000000..11853e6cc35c6fa02416a8cca40abb3d004e18ad GIT binary patch literal 40 rcmb1OfPlsI-b$P&Paay*_34JA6mL>dVrHJ6YguYuiIvgv2lx8`_;n8f literal 0 HcmV?d00001 diff --git a/habitat-lab/habitat/config/benchmark/rearrange/rearrange_easy.yaml b/habitat-lab/habitat/config/benchmark/rearrange/rearrange_easy.yaml index a9f5b8b68d..7b5995e431 100644 --- a/habitat-lab/habitat/config/benchmark/rearrange/rearrange_easy.yaml +++ b/habitat-lab/habitat/config/benchmark/rearrange/rearrange_easy.yaml @@ -19,6 +19,7 @@ habitat: - obj_goal_gps_compass - joint - is_holding + - ee_pos environment: max_episode_steps: 1500 simulator: diff --git a/test/test_baseline_training.py b/test/test_baseline_training.py index 43a72477f7..52f0242b01 100644 --- a/test/test_baseline_training.py +++ b/test/test_baseline_training.py @@ -5,6 +5,7 @@ # LICENSE file in the root directory of this source tree. import glob +import itertools import os import random @@ -150,31 +151,35 @@ def test_trainers(config_path, num_updates, overrides, trainer_name): @pytest.mark.parametrize( - "config_path,mode,trainer_name", - [ - ( - "habitat-baselines/habitat_baselines/config/rearrange/rl_hl_srl_onav.yaml", - "train", - "ppo", - ), - # ( - # "habitat-baselines/habitat_baselines/config/rearrange/rl_hl_srl_onav.yaml", - # "eval", - # "ppo", - # ), - # ( - # "habitat-baselines/habitat_baselines/config/rearrange/tp_srl_oracle_nav.yaml", - # "eval", - # "ppo", - # ), - # ( - # "habitat-baselines/habitat_baselines/config/rearrange/tp_srl.yaml", - # "eval", - # "ppo", - # ), - ], + "config_path,policy_type,skill_type,mode", + list( + itertools.product( + [ + "habitat-baselines/habitat_baselines/config/rearrange/rl_hierarchical_oracle_nav.yaml", + "habitat-baselines/habitat_baselines/config/rearrange/rl_hierarchical.yaml", + ], + [ + "hl_neural", + "hl_fixed", + ], + [ + "nn_skills", + "noop_skills", + ], + [ + "eval", + "train", + ], + ) + ), ) -def test_hrl(config_path, mode, trainer_name): +def test_hrl(config_path, policy_type, skill_type, mode): + if policy_type == "hl_neural" and skill_type == "nn_skills": + return + if policy_type == "hl_fixed" and mode == "train": + return + if skill_type == "noop_skills" and "oracle" not in config_path: + return # Remove the checkpoints from previous tests for f in glob.glob("data/test_checkpoints/test_training/*"): os.remove(f) @@ -189,7 +194,8 @@ def test_hrl(config_path, mode, trainer_name): "habitat_baselines.total_num_steps=-1.0", "habitat_baselines.test_episode_count=1", "habitat_baselines.checkpoint_folder=data/test_checkpoints/test_training", - f"habitat_baselines.trainer_name={trainer_name}", + f"habitat_baselines/rl/policy={policy_type}", + f"habitat_baselines/rl/policy/hierarchical_policy/defined_skills={skill_type}", ], ) with read_write(config): From fa94bfc56d23430651e7af68aa1bb352262defdd Mon Sep 17 00:00:00 2001 From: ASzot Date: Fri, 20 Jan 2023 10:20:29 -0800 Subject: [PATCH 09/48] Fixed tests --- .../habitat_baselines/rl/hrl/hierarchical_policy.py | 1 + habitat-baselines/habitat_baselines/rl/hrl/skills/nav.py | 4 ++-- habitat-baselines/habitat_baselines/rl/hrl/skills/reset.py | 3 ++- habitat-baselines/habitat_baselines/rl/hrl/skills/skill.py | 4 +++- habitat-baselines/habitat_baselines/rl/hrl/skills/wait.py | 2 +- 5 files changed, 9 insertions(+), 5 deletions(-) diff --git a/habitat-baselines/habitat_baselines/rl/hrl/hierarchical_policy.py b/habitat-baselines/habitat_baselines/rl/hrl/hierarchical_policy.py index ab8f9bd420..5e6da4ea32 100644 --- a/habitat-baselines/habitat_baselines/rl/hrl/hierarchical_policy.py +++ b/habitat-baselines/habitat_baselines/rl/hrl/hierarchical_policy.py @@ -261,6 +261,7 @@ def act( for i in batch_ids ], ) + # Always call high-level if the episode is over. call_high_level = call_high_level | (~masks_cpu).view(-1) diff --git a/habitat-baselines/habitat_baselines/rl/hrl/skills/nav.py b/habitat-baselines/habitat_baselines/rl/hrl/skills/nav.py index 834ea862f6..642aaf9a69 100644 --- a/habitat-baselines/habitat_baselines/rl/hrl/skills/nav.py +++ b/habitat-baselines/habitat_baselines/rl/hrl/skills/nav.py @@ -47,8 +47,8 @@ def _get_filtered_obs(self, observations, cur_batch_idx) -> TensorDict: ret_obs = super()._get_filtered_obs(observations, cur_batch_idx) if NavGoalPointGoalSensor.cls_uuid in ret_obs: - for i in cur_batch_idx: - if self._cur_skill_args[cur_batch_idx[i]].is_target: + for i, batch_i in enumerate(cur_batch_idx): + if self._cur_skill_args[batch_i].is_target: replace_sensor = TargetGoalGpsCompassSensor.cls_uuid else: replace_sensor = TargetStartGpsCompassSensor.cls_uuid diff --git a/habitat-baselines/habitat_baselines/rl/hrl/skills/reset.py b/habitat-baselines/habitat_baselines/rl/hrl/skills/reset.py index 5c1ab5e23a..8e34798f2a 100644 --- a/habitat-baselines/habitat_baselines/rl/hrl/skills/reset.py +++ b/habitat-baselines/habitat_baselines/rl/hrl/skills/reset.py @@ -24,6 +24,7 @@ def __init__( self._target = np.array([float(x) for x in config.reset_joint_state]) self._arm_ac_range = find_action_range(action_space, "arm_action") + self._arm_ac_range = (self._arm_ac_range[0], self._target.shape[0]) def on_enter( self, @@ -84,7 +85,7 @@ def _internal_act( ) action = torch.zeros_like(prev_actions) - + # There is an extra grab action that we don't want to set. action[ ..., self._arm_ac_range[0] : self._arm_ac_range[1] ] = torch.from_numpy(delta).to( diff --git a/habitat-baselines/habitat_baselines/rl/hrl/skills/skill.py b/habitat-baselines/habitat_baselines/rl/hrl/skills/skill.py index fff04bf3d9..8252e87671 100644 --- a/habitat-baselines/habitat_baselines/rl/hrl/skills/skill.py +++ b/habitat-baselines/habitat_baselines/rl/hrl/skills/skill.py @@ -301,7 +301,9 @@ def _is_skill_done( self, observations, rnn_hidden_states, prev_actions, masks, batch_idx ) -> torch.BoolTensor: """ - :returns: A (batch_size,) size tensor where 1 indicates the skill wants to end and 0 if not. + :returns: A (batch_size,) size tensor where 1 indicates the skill wants + to end and 0 if not where batch_size is potentially a subset of the + overall num_environments as specified by `batch_idx`. """ return torch.zeros(observations.shape[0], dtype=torch.bool).to( masks.device diff --git a/habitat-baselines/habitat_baselines/rl/hrl/skills/wait.py b/habitat-baselines/habitat_baselines/rl/hrl/skills/wait.py index caab4194ee..d5500d3185 100644 --- a/habitat-baselines/habitat_baselines/rl/hrl/skills/wait.py +++ b/habitat-baselines/habitat_baselines/rl/hrl/skills/wait.py @@ -29,7 +29,7 @@ def _is_skill_done( self, observations, rnn_hidden_states, prev_actions, masks, batch_idx ) -> torch.BoolTensor: assert self._wait_time > 0 - return self._cur_skill_step >= self._wait_time + return (self._cur_skill_step >= self._wait_time)[batch_idx] def _internal_act( self, From 11659b4be8c0d53ebd5799e0a0afe3799dc2c0ee Mon Sep 17 00:00:00 2001 From: ASzot Date: Fri, 20 Jan 2023 10:31:36 -0800 Subject: [PATCH 10/48] Fixed formatting --- habitat-baselines/habitat_baselines/README.md | 2 +- .../rl/policy/monolithic.yaml | 2 +- .../rl/hrl/hierarchical_policy.py | 2 +- .../rl/hrl/hl/high_level_policy.py | 4 ++-- .../rl/hrl/hl/neural_policy.py | 6 +++--- .../rl/hrl/hrl_rollout_storage.py | 4 ++-- .../habitat_baselines/rl/ppo/policy.py | 19 +++++-------------- .../rl/ver/ver_rollout_storage.py | 4 ---- .../habitat_baselines/rl/ver/ver_trainer.py | 17 +---------------- 9 files changed, 16 insertions(+), 44 deletions(-) diff --git a/habitat-baselines/habitat_baselines/README.md b/habitat-baselines/habitat_baselines/README.md index a37992654c..bb0319da10 100644 --- a/habitat-baselines/habitat_baselines/README.md +++ b/habitat-baselines/habitat_baselines/README.md @@ -57,7 +57,7 @@ Change the `/benchmark/nav/pointnav: pointnav_gibson` in `habitat_baselines/conf We provide a two-layer hierarchical policy class, consisting of a low-level skill that moves the robot, and a high-level policy that reasons about which low-level skill to use in the current state. This can be especially powerful in long-horizon mobile manipulation tasks, like those introduced in [Habitat2.0](https://arxiv.org/abs/2106.14405). Both the low- and high- level can be either learned or an oracle. For oracle high-level we use [PDDL](https://planning.wiki/guide/whatis/pddl), and for oracle low-level we use instantaneous transitions, with the environment set to the final desired state. Additionally, for navigation, we provide an oracle navigation skill that uses A-star and the map of the environment to move the robot to its goal. -To run the following examples, you need the [ReplicaCAD dataset](https://github.com/facebookresearch/habitat-sim/blob/main/DATASETS.md#replicacad). +To run the following examples, you need the [ReplicaCAD dataset](https://github.com/facebookresearch/habitat-sim/blob/main/DATASETS.md#replicacad). To train a high-level policy, while using pre-learned low-level skills (SRL baseline from [Habitat2.0](https://arxiv.org/abs/2106.14405)), you can run: diff --git a/habitat-baselines/habitat_baselines/config/habitat_baselines/rl/policy/monolithic.yaml b/habitat-baselines/habitat_baselines/config/habitat_baselines/rl/policy/monolithic.yaml index 9a1850d158..83f65a6e5f 100644 --- a/habitat-baselines/habitat_baselines/config/habitat_baselines/rl/policy/monolithic.yaml +++ b/habitat-baselines/habitat_baselines/config/habitat_baselines/rl/policy/monolithic.yaml @@ -1,4 +1,4 @@ name: "PointNavResNetPolicy" action_distribution_type: "gaussian" action_dist: - use_log_std: True \ No newline at end of file + use_log_std: True diff --git a/habitat-baselines/habitat_baselines/rl/hrl/hierarchical_policy.py b/habitat-baselines/habitat_baselines/rl/hrl/hierarchical_policy.py index 5e6da4ea32..7ecf3ab7ff 100644 --- a/habitat-baselines/habitat_baselines/rl/hrl/hierarchical_policy.py +++ b/habitat-baselines/habitat_baselines/rl/hrl/hierarchical_policy.py @@ -361,7 +361,7 @@ def get_value(self, observations, rnn_hidden_states, prev_actions, masks): observations, rnn_hidden_states, prev_actions, masks ) - def _get_policy_components(self) -> Tuple[nn.Module]: + def _get_policy_components(self) -> List[nn.Module]: return self._high_level_policy.get_policy_components() def evaluate_actions( diff --git a/habitat-baselines/habitat_baselines/rl/hrl/hl/high_level_policy.py b/habitat-baselines/habitat_baselines/rl/hrl/hl/high_level_policy.py index 6d1eb7bf50..c919abbf51 100644 --- a/habitat-baselines/habitat_baselines/rl/hrl/hl/high_level_policy.py +++ b/habitat-baselines/habitat_baselines/rl/hrl/hl/high_level_policy.py @@ -93,8 +93,8 @@ def get_next_skill( def apply_mask(self, mask: torch.Tensor) -> None: pass - def get_policy_components(self) -> Tuple[nn.Module]: - return tuple() + def get_policy_components(self) -> List[nn.Module]: + return [] def get_termination( self, diff --git a/habitat-baselines/habitat_baselines/rl/hrl/hl/neural_policy.py b/habitat-baselines/habitat_baselines/rl/hrl/hl/neural_policy.py index f8796f08a3..3c6def42d2 100644 --- a/habitat-baselines/habitat_baselines/rl/hrl/hl/neural_policy.py +++ b/habitat-baselines/habitat_baselines/rl/hrl/hl/neural_policy.py @@ -1,5 +1,5 @@ from itertools import chain -from typing import Any, List, Tuple +from typing import Any, List import gym.spaces as spaces import numpy as np @@ -110,8 +110,8 @@ def parameters(self): self._critic.parameters(), ) - def get_policy_components(self) -> Tuple[nn.Module]: - return (self,) + def get_policy_components(self) -> List[nn.Module]: + return [self] def forward(self, obs, rnn_hidden_states, masks, rnn_build_seq_info=None): hidden = [] diff --git a/habitat-baselines/habitat_baselines/rl/hrl/hrl_rollout_storage.py b/habitat-baselines/habitat_baselines/rl/hrl/hrl_rollout_storage.py index 6d8491e592..2ee96eb16a 100644 --- a/habitat-baselines/habitat_baselines/rl/hrl/hrl_rollout_storage.py +++ b/habitat-baselines/habitat_baselines/rl/hrl/hrl_rollout_storage.py @@ -10,7 +10,7 @@ from habitat_baselines.common.baseline_registry import baseline_registry from habitat_baselines.common.rollout_storage import RolloutStorage -from habitat_baselines.common.tensor_dict import TensorDict +from habitat_baselines.common.tensor_dict import DictTree from habitat_baselines.rl.models.rnn_state_encoder import ( build_pack_info_from_dones, build_rnn_build_seq_info, @@ -142,7 +142,7 @@ def compute_returns(self, next_value, use_gae, gamma, tau): def recurrent_generator( self, advantages, num_batches - ) -> Iterator[TensorDict]: + ) -> Iterator[DictTree]: num_environments = advantages.size(1) dones_cpu = ( torch.logical_not(self.buffers["masks"]) diff --git a/habitat-baselines/habitat_baselines/rl/ppo/policy.py b/habitat-baselines/habitat_baselines/rl/ppo/policy.py index 07da61345f..2ec25b68c8 100644 --- a/habitat-baselines/habitat_baselines/rl/ppo/policy.py +++ b/habitat-baselines/habitat_baselines/rl/ppo/policy.py @@ -5,16 +5,7 @@ # LICENSE file in the root directory of this source tree. import abc from dataclasses import dataclass -from typing import ( - TYPE_CHECKING, - Any, - Dict, - Iterable, - List, - Optional, - Tuple, - Union, -) +from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Union import torch from gym import spaces @@ -87,8 +78,8 @@ def get_policy_action_space( ) -> spaces.Space: return env_action_space - def _get_policy_components(self) -> Tuple[nn.Module]: - return tuple() + def _get_policy_components(self) -> List[nn.Module]: + return [] def aux_loss_parameters(self) -> Dict[str, Iterable[torch.Tensor]]: return {} @@ -268,8 +259,8 @@ def evaluate_actions( aux_loss_res, ) - def _get_policy_components(self) -> Tuple[nn.Module]: - return (self.net, self.critic, self.action_distribution) + def _get_policy_components(self) -> List[nn.Module]: + return [self.net, self.critic, self.action_distribution] def aux_loss_parameters(self) -> Dict[str, Iterable[torch.Tensor]]: return {k: v.parameters() for k, v in self.aux_loss_modules.items()} diff --git a/habitat-baselines/habitat_baselines/rl/ver/ver_rollout_storage.py b/habitat-baselines/habitat_baselines/rl/ver/ver_rollout_storage.py index 361ee96ad4..d9f6c70af0 100644 --- a/habitat-baselines/habitat_baselines/rl/ver/ver_rollout_storage.py +++ b/habitat-baselines/habitat_baselines/rl/ver/ver_rollout_storage.py @@ -144,9 +144,7 @@ def __init__( action_space, recurrent_hidden_state_size, num_recurrent_layers=1, - action_shape: Optional[Tuple[int]] = None, is_double_buffered: bool = False, - discrete_actions: bool = True, ): super().__init__( numsteps, @@ -155,9 +153,7 @@ def __init__( action_space, recurrent_hidden_state_size, num_recurrent_layers, - action_shape, is_double_buffered, - discrete_actions, ) self.use_is_coeffs = variable_experience diff --git a/habitat-baselines/habitat_baselines/rl/ver/ver_trainer.py b/habitat-baselines/habitat_baselines/rl/ver/ver_trainer.py index d4241e77cc..504bdf71d5 100644 --- a/habitat-baselines/habitat_baselines/rl/ver/ver_trainer.py +++ b/habitat-baselines/habitat_baselines/rl/ver/ver_trainer.py @@ -52,12 +52,7 @@ WorkerBase, WorkerQueues, ) -from habitat_baselines.utils.common import ( - cosine_decay, - get_num_actions, - inference_mode, - is_continuous_action_space, -) +from habitat_baselines.utils.common import cosine_decay, inference_mode try: torch.backends.cudnn.allow_tf32 = True @@ -196,14 +191,6 @@ def _init_train(self, resume_state): ) for ew in self.environment_workers ] - if is_continuous_action_space(action_space): - # Assume ALL actions are NOT discrete - action_shape = (get_num_actions(action_space),) - discrete_actions = False - else: - # For discrete pointnav - action_shape = (1,) - discrete_actions = True ppo_cfg = self.config.habitat_baselines.rl.ppo if torch.cuda.is_available(): @@ -253,8 +240,6 @@ def _init_train(self, resume_state): action_space=self.policy_action_space, recurrent_hidden_state_size=ppo_cfg.hidden_size, num_recurrent_layers=self.actor_critic.net.num_recurrent_layers, - action_shape=action_shape, - discrete_actions=discrete_actions, observation_space=rollouts_obs_space, ) self.rollouts = VERRolloutStorage(**storage_kwargs) From 633ebf3b128c493ad9e4429e51e3cb1257863b1c Mon Sep 17 00:00:00 2001 From: ASzot Date: Fri, 27 Jan 2023 08:47:51 -0800 Subject: [PATCH 11/48] Fixed device issues. Cleaned up configs. --- .../config/default_structured_configs.py | 4 +- .../defined_skills/nn_skills.yaml | 5 +- .../{noop_skills.yaml => oracle_skills.yaml} | 5 +- .../habitat_baselines/rl/policy/hl_fixed.yaml | 11 --- .../rl/policy/hl_neural.yaml | 11 --- .../rl/policy/oracle_nav.yaml | 39 +++++++++ .../config/rearrange/rl_hierarchical.yaml | 1 + .../rearrange/rl_hierarchical_oracle_nav.yaml | 81 +++---------------- .../rl/hrl/hierarchical_policy.py | 21 +++-- .../habitat_baselines/rl/hrl/skills/reset.py | 1 - .../habitat_baselines/rl/hrl/skills/skill.py | 10 ++- test/test_baseline_training.py | 4 +- 12 files changed, 78 insertions(+), 115 deletions(-) rename habitat-baselines/habitat_baselines/config/habitat_baselines/rl/policy/hierarchical_policy/defined_skills/{noop_skills.yaml => oracle_skills.yaml} (94%) create mode 100644 habitat-baselines/habitat_baselines/config/habitat_baselines/rl/policy/oracle_nav.yaml diff --git a/habitat-baselines/habitat_baselines/config/default_structured_configs.py b/habitat-baselines/habitat_baselines/config/default_structured_configs.py index c61405bf1e..8190f2d116 100644 --- a/habitat-baselines/habitat_baselines/config/default_structured_configs.py +++ b/habitat-baselines/habitat_baselines/config/default_structured_configs.py @@ -5,7 +5,7 @@ # LICENSE file in the root directory of this source tree. from dataclasses import dataclass, field -from typing import Any, Dict, List, Tuple +from typing import Any, Dict, List, Optional, Tuple from hydra.core.config_store import ConfigStore from omegaconf import MISSING @@ -242,13 +242,13 @@ class HrlDefinedSkill(HabitatBaselinesBaseConfig): stop_thresh: float = 0.001 # For the reset_arm_skill reset_joint_state: List[float] = MISSING + pddl_action_names: Optional[List[str]] = None @dataclass class HierarchicalPolicy(HabitatBaselinesBaseConfig): high_level_policy: Dict[str, Any] = MISSING defined_skills: Dict[str, HrlDefinedSkill] = field(default_factory=dict) - use_skills: Dict[str, str] = field(default_factory=dict) @dataclass diff --git a/habitat-baselines/habitat_baselines/config/habitat_baselines/rl/policy/hierarchical_policy/defined_skills/nn_skills.yaml b/habitat-baselines/habitat_baselines/config/habitat_baselines/rl/policy/hierarchical_policy/defined_skills/nn_skills.yaml index 24d7cbca38..a0370b63cf 100644 --- a/habitat-baselines/habitat_baselines/config/habitat_baselines/rl/policy/hierarchical_policy/defined_skills/nn_skills.yaml +++ b/habitat-baselines/habitat_baselines/config/habitat_baselines/rl/policy/hierarchical_policy/defined_skills/nn_skills.yaml @@ -24,7 +24,7 @@ place: obs_skill_inputs: ["obj_goal_sensor"] load_ckpt_file: "data/models/place.pth" -wait_skill: +wait: skill_name: "WaitSkillPolicy" max_skill_steps: -1 force_end_on_timeout: False @@ -35,8 +35,9 @@ nav_to_obj: load_ckpt_file: "data/models/nav.pth" max_skill_steps: 300 obs_skill_input_dim: 2 + pddl_action_names: ["nav", "nav_to_receptacle"] -reset_arm_skill: +reset_arm: skill_name: "ResetArmSkill" max_skill_steps: 50 reset_joint_state: [-4.50e-01, -1.08e00, 9.95e-02, 9.38e-01, -7.88e-04, 1.57e00, 4.62e-03] diff --git a/habitat-baselines/habitat_baselines/config/habitat_baselines/rl/policy/hierarchical_policy/defined_skills/noop_skills.yaml b/habitat-baselines/habitat_baselines/config/habitat_baselines/rl/policy/hierarchical_policy/defined_skills/oracle_skills.yaml similarity index 94% rename from habitat-baselines/habitat_baselines/config/habitat_baselines/rl/policy/hierarchical_policy/defined_skills/noop_skills.yaml rename to habitat-baselines/habitat_baselines/config/habitat_baselines/rl/policy/hierarchical_policy/defined_skills/oracle_skills.yaml index c8b1a34e29..d3f9c34723 100644 --- a/habitat-baselines/habitat_baselines/config/habitat_baselines/rl/policy/hierarchical_policy/defined_skills/noop_skills.yaml +++ b/habitat-baselines/habitat_baselines/config/habitat_baselines/rl/policy/hierarchical_policy/defined_skills/oracle_skills.yaml @@ -33,7 +33,7 @@ place: apply_postconds: True force_end_on_timeout: False -wait_skill: +wait: skill_name: "WaitSkillPolicy" max_skill_steps: -1 @@ -44,8 +44,9 @@ nav_to_obj: apply_postconds: True force_end_on_timeout: False obs_skill_input_dim: 2 + pddl_action_names: ["nav", "nav_to_receptacle"] -reset_arm_skill: +reset_arm: skill_name: "ResetArmSkill" max_skill_steps: 50 reset_joint_state: [-4.50e-01, -1.07e00, 9.95e-02, 9.38e-01, -7.88e-04, 1.57e00, 4.62e-03] diff --git a/habitat-baselines/habitat_baselines/config/habitat_baselines/rl/policy/hl_fixed.yaml b/habitat-baselines/habitat_baselines/config/habitat_baselines/rl/policy/hl_fixed.yaml index 0aef449dbe..018fd32519 100644 --- a/habitat-baselines/habitat_baselines/config/habitat_baselines/rl/policy/hl_fixed.yaml +++ b/habitat-baselines/habitat_baselines/config/habitat_baselines/rl/policy/hl_fixed.yaml @@ -7,15 +7,4 @@ hierarchical_policy: high_level_policy: name: "FixedHighLevelPolicy" add_arm_rest: True - use_skills: - open_cab: "open_cab" - open_fridge: "open_fridge" - close_cab: "close_cab" - close_fridge: "close_fridge" - pick: "pick" - place: "place" - nav: "nav_to_obj" - nav_to_receptacle: "nav_to_obj" - wait: "wait_skill" - reset_arm: "reset_arm_skill" defined_skills: {} diff --git a/habitat-baselines/habitat_baselines/config/habitat_baselines/rl/policy/hl_neural.yaml b/habitat-baselines/habitat_baselines/config/habitat_baselines/rl/policy/hl_neural.yaml index f15a1eb389..2e7d9a4c87 100644 --- a/habitat-baselines/habitat_baselines/config/habitat_baselines/rl/policy/hl_neural.yaml +++ b/habitat-baselines/habitat_baselines/config/habitat_baselines/rl/policy/hl_neural.yaml @@ -15,15 +15,4 @@ hierarchical_policy: num_rnn_layers: 2 policy_input_keys: - "robot_head_depth" - use_skills: - open_cab: "open_cab" - open_fridge: "open_fridge" - close_cab: "open_cab" - close_fridge: "open_fridge" - pick: "pick" - place: "place" - nav: "nav_to_obj" - nav_to_receptacle: "nav_to_obj" - wait: "wait_skill" - reset_arm: "reset_arm_skill" defined_skills: {} diff --git a/habitat-baselines/habitat_baselines/config/habitat_baselines/rl/policy/oracle_nav.yaml b/habitat-baselines/habitat_baselines/config/habitat_baselines/rl/policy/oracle_nav.yaml new file mode 100644 index 0000000000..8aa067b89c --- /dev/null +++ b/habitat-baselines/habitat_baselines/config/habitat_baselines/rl/policy/oracle_nav.yaml @@ -0,0 +1,39 @@ +# @package _global_ +defaults: + - /habitat/task/actions: + - pddl_apply_action + - oracle_nav_action + - arm_action + - base_velocity + - rearrange_stop +habitat: + gym: + obs_keys: + - robot_head_depth + - relative_resting_position + - obj_start_sensor + - obj_goal_sensor + - obj_start_gps_compass + - obj_goal_gps_compass + - joint + - is_holding + - ee_pos + - localization_sensor + simulator: + habitat_sim_v0: + allow_sliding: True + + +habitat_baselines: + rl: + policy: + hierarchical_policy: + # Override to use the oracle navigation skill. + use_skills: + nav: "oracle_nav" + nav_to_receptacle: "oracle_nav" + defined_skills: + oracle_nav: + skill_name: "OracleNavPolicy" + obs_skill_inputs: ["obj_start_sensor", "abs_obj_start_sensor", "obj_goal_sensor", "abs_obj_goal_sensor"] + max_skill_steps: 300 diff --git a/habitat-baselines/habitat_baselines/config/rearrange/rl_hierarchical.yaml b/habitat-baselines/habitat_baselines/config/rearrange/rl_hierarchical.yaml index df1f0ee015..387b8398f6 100644 --- a/habitat-baselines/habitat_baselines/config/rearrange/rl_hierarchical.yaml +++ b/habitat-baselines/habitat_baselines/config/rearrange/rl_hierarchical.yaml @@ -3,6 +3,7 @@ defaults: - /benchmark/rearrange: rearrange_easy - /habitat_baselines: habitat_baselines_rl_config_base + - /habitat/simulator/sim_sensors@habitat_baselines.eval.extra_sim_sensors.third_rgb_sensor: third_rgb_sensor - /habitat_baselines/rl/policy/obs_transforms: - add_virtual_keys_base - /habitat_baselines/rl/policy: hl_fixed diff --git a/habitat-baselines/habitat_baselines/config/rearrange/rl_hierarchical_oracle_nav.yaml b/habitat-baselines/habitat_baselines/config/rearrange/rl_hierarchical_oracle_nav.yaml index 3152f00641..bb3e76924e 100644 --- a/habitat-baselines/habitat_baselines/config/rearrange/rl_hierarchical_oracle_nav.yaml +++ b/habitat-baselines/habitat_baselines/config/rearrange/rl_hierarchical_oracle_nav.yaml @@ -1,18 +1,15 @@ # @package _global_ +# Extends the `rl_hierarchical` config to use an oracle navigation action. +# Several things change to support the oracle navigation: +# - Adding the oracle navigation action +# - + defaults: - - /benchmark/rearrange: rearrange_easy - - /habitat_baselines: habitat_baselines_rl_config_base - - /habitat_baselines/rl/policy/obs_transforms: - - add_virtual_keys_base - - /habitat_baselines/rl/policy: hl_neural - - /habitat_baselines/rl/policy/hierarchical_policy/defined_skills: noop_skills + - rl_hierarchical - /habitat/task/actions: - pddl_apply_action - oracle_nav_action - - arm_action - - base_velocity - - rearrange_stop - _self_ habitat: @@ -28,74 +25,18 @@ habitat: - is_holding - ee_pos - localization_sensor - + simulator: + habitat_sim_v0: + allow_sliding: True habitat_baselines: - verbose: False - trainer_name: "ddppo" - torch_gpu_id: 0 - rollout_storage: "HrlRolloutStorage" - updater_name: "HrlPPO" - distrib_updater_name: "HrlDDPPO" - video_fps: 30 - eval_ckpt_path_dir: "" - num_environments: 3 - writer_type: 'tb' - checkpoint_folder: "data/new_checkpoints" - num_updates: -1 - total_num_steps: 1.0e8 - log_interval: 10 - num_checkpoints: 20 - force_torch_single_threaded: True - eval_keys_to_include_in_name: ['reward', 'force', 'composite_success'] - load_resume_state_config: False - - eval: - use_ckpt_config: False - should_load_ckpt: False - video_option: ["disk"] rl: policy: hierarchical_policy: - use_skills: - nav: "oracle_nav" - nav_to_receptacle: "oracle_nav" + # Override to use the oracle navigation skill. defined_skills: - oracle_nav: + nav_to_obj: skill_name: "OracleNavPolicy" obs_skill_inputs: ["obj_start_sensor", "abs_obj_start_sensor", "obj_goal_sensor", "abs_obj_goal_sensor"] max_skill_steps: 300 - ppo: - # ppo params - clip_param: 0.2 - ppo_epoch: 2 - num_mini_batch: 2 - value_loss_coef: 0.5 - entropy_coef: 0.0001 - lr: 2.5e-4 - eps: 1e-5 - max_grad_norm: 0.2 - num_steps: 128 - use_gae: True - gamma: 0.99 - tau: 0.95 - - ddppo: - sync_frac: 0.6 - # The PyTorch distributed backend to use - distrib_backend: NCCL - # Visual encoder backbone - pretrained_weights: data/ddppo-models/gibson-2plus-resnet50.pth - # Initialize with pretrained weights - pretrained: False - # Initialize just the visual encoder backbone with pretrained weights - pretrained_encoder: False - # Whether the visual encoder backbone will be trained. - train_encoder: True - # Whether to reset the critic linear layer - reset_critic: False - # Model parameters - backbone: resnet18 - rnn_type: LSTM - num_recurrent_layers: 2 diff --git a/habitat-baselines/habitat_baselines/rl/hrl/hierarchical_policy.py b/habitat-baselines/habitat_baselines/rl/hrl/hierarchical_policy.py index 7ecf3ab7ff..9078e4b894 100644 --- a/habitat-baselines/habitat_baselines/rl/hrl/hierarchical_policy.py +++ b/habitat-baselines/habitat_baselines/rl/hrl/hierarchical_policy.py @@ -70,16 +70,9 @@ def __init__( config, ) - for i, (skill_id, use_skill_name) in enumerate( - config.hierarchical_policy.use_skills.items() + for i, (skill_name, skill_config) in enumerate( + config.hierarchical_policy.defined_skills.items() ): - if use_skill_name == "": - # Skip loading this skill if no name is provided - continue - skill_config = config.hierarchical_policy.defined_skills[ - use_skill_name - ] - cls = eval(skill_config.skill_name) skill_policy = cls.from_config( skill_config, @@ -90,8 +83,13 @@ def __init__( ) skill_policy.set_pddl_problem(self._pddl_problem) self._skills[i] = skill_policy - self._name_to_idx[skill_id] = i - self._idx_to_name[i] = skill_id + if skill_config.pddl_action_names is None: + action_names = [skill_name] + else: + action_names = skill_config.pddl_action_names + for skill_id in action_names: + self._name_to_idx[skill_id] = i + self._idx_to_name[i] = skill_id self._cur_skills: torch.Tensor = torch.full( (self._num_envs,), -1, dtype=torch.long @@ -248,6 +246,7 @@ def act( continue # TODO: either change name of the function or assign actions somewhere # else. Updating actions in should_terminate is counterintuitive + ( call_high_level[batch_ids], bad_should_terminate[batch_ids], diff --git a/habitat-baselines/habitat_baselines/rl/hrl/skills/reset.py b/habitat-baselines/habitat_baselines/rl/hrl/skills/reset.py index 8e34798f2a..efe5531075 100644 --- a/habitat-baselines/habitat_baselines/rl/hrl/skills/reset.py +++ b/habitat-baselines/habitat_baselines/rl/hrl/skills/reset.py @@ -59,7 +59,6 @@ def _is_skill_done( return ( torch.as_tensor( np.abs(current_joint_pos - self._target).max(-1), - device=rnn_hidden_states.device, dtype=torch.float32, ) < 5e-2 diff --git a/habitat-baselines/habitat_baselines/rl/hrl/skills/skill.py b/habitat-baselines/habitat_baselines/rl/hrl/skills/skill.py index 8252e87671..dfe0a75738 100644 --- a/habitat-baselines/habitat_baselines/rl/hrl/skills/skill.py +++ b/habitat-baselines/habitat_baselines/rl/hrl/skills/skill.py @@ -158,11 +158,15 @@ def should_terminate( log_info, ) -> Tuple[torch.BoolTensor, torch.BoolTensor, torch.Tensor]: """ - :returns: A (batch_size,) size tensor where 1 indicates the skill wants to end and 0 if not. + :returns: Both of the BoolTensor's will be on the CPU. + - `is_skill_done`: Shape (batch_size,) size tensor where 1 + indicates the skill to return control to HL policy. + - `bad_terminate`: Shape (batch_size,) size tensor where 1 + indicates the skill should immediately end the episode. """ is_skill_done = self._is_skill_done( observations, rnn_hidden_states, prev_actions, masks, batch_idx - ) + ).cpu() if is_skill_done.sum() > 0: self._internal_log( f"Requested skill termination {is_skill_done}", @@ -274,7 +278,7 @@ def act( return action_data def to(self, device): - self._cur_skill_step = self._cur_skill_step.to(device) + pass def _select_obs(self, obs, cur_batch_idx): """ diff --git a/test/test_baseline_training.py b/test/test_baseline_training.py index 52f0242b01..d377990317 100644 --- a/test/test_baseline_training.py +++ b/test/test_baseline_training.py @@ -164,7 +164,7 @@ def test_trainers(config_path, num_updates, overrides, trainer_name): ], [ "nn_skills", - "noop_skills", + "oracle_skills", ], [ "eval", @@ -178,7 +178,7 @@ def test_hrl(config_path, policy_type, skill_type, mode): return if policy_type == "hl_fixed" and mode == "train": return - if skill_type == "noop_skills" and "oracle" not in config_path: + if skill_type == "oracle_skills" and "oracle" not in config_path: return # Remove the checkpoints from previous tests for f in glob.glob("data/test_checkpoints/test_training/*"): From 1c82390e4f02973cd0d4356dc9542c087f7e5d6d Mon Sep 17 00:00:00 2001 From: ASzot Date: Fri, 27 Jan 2023 08:59:32 -0800 Subject: [PATCH 12/48] More config cleanup --- .../rl/policy/oracle_nav.yaml | 39 ------------------- .../rearrange/rl_hierarchical_oracle_nav.yaml | 4 +- .../rl/hrl/hierarchical_policy.py | 15 ++++--- 3 files changed, 11 insertions(+), 47 deletions(-) delete mode 100644 habitat-baselines/habitat_baselines/config/habitat_baselines/rl/policy/oracle_nav.yaml diff --git a/habitat-baselines/habitat_baselines/config/habitat_baselines/rl/policy/oracle_nav.yaml b/habitat-baselines/habitat_baselines/config/habitat_baselines/rl/policy/oracle_nav.yaml deleted file mode 100644 index 8aa067b89c..0000000000 --- a/habitat-baselines/habitat_baselines/config/habitat_baselines/rl/policy/oracle_nav.yaml +++ /dev/null @@ -1,39 +0,0 @@ -# @package _global_ -defaults: - - /habitat/task/actions: - - pddl_apply_action - - oracle_nav_action - - arm_action - - base_velocity - - rearrange_stop -habitat: - gym: - obs_keys: - - robot_head_depth - - relative_resting_position - - obj_start_sensor - - obj_goal_sensor - - obj_start_gps_compass - - obj_goal_gps_compass - - joint - - is_holding - - ee_pos - - localization_sensor - simulator: - habitat_sim_v0: - allow_sliding: True - - -habitat_baselines: - rl: - policy: - hierarchical_policy: - # Override to use the oracle navigation skill. - use_skills: - nav: "oracle_nav" - nav_to_receptacle: "oracle_nav" - defined_skills: - oracle_nav: - skill_name: "OracleNavPolicy" - obs_skill_inputs: ["obj_start_sensor", "abs_obj_start_sensor", "obj_goal_sensor", "abs_obj_goal_sensor"] - max_skill_steps: 300 diff --git a/habitat-baselines/habitat_baselines/config/rearrange/rl_hierarchical_oracle_nav.yaml b/habitat-baselines/habitat_baselines/config/rearrange/rl_hierarchical_oracle_nav.yaml index bb3e76924e..92b80bea64 100644 --- a/habitat-baselines/habitat_baselines/config/rearrange/rl_hierarchical_oracle_nav.yaml +++ b/habitat-baselines/habitat_baselines/config/rearrange/rl_hierarchical_oracle_nav.yaml @@ -3,7 +3,8 @@ # Extends the `rl_hierarchical` config to use an oracle navigation action. # Several things change to support the oracle navigation: # - Adding the oracle navigation action -# - +# - Add sliding. +# - Use the oracle navigation skill. defaults: - rl_hierarchical @@ -30,7 +31,6 @@ habitat: allow_sliding: True habitat_baselines: - rl: policy: hierarchical_policy: diff --git a/habitat-baselines/habitat_baselines/rl/hrl/hierarchical_policy.py b/habitat-baselines/habitat_baselines/rl/hrl/hierarchical_policy.py index 9078e4b894..7376fc822a 100644 --- a/habitat-baselines/habitat_baselines/rl/hrl/hierarchical_policy.py +++ b/habitat-baselines/habitat_baselines/rl/hrl/hierarchical_policy.py @@ -70,9 +70,11 @@ def __init__( config, ) - for i, (skill_name, skill_config) in enumerate( - config.hierarchical_policy.defined_skills.items() - ): + skill_i = 0 + for ( + skill_name, + skill_config, + ) in config.hierarchical_policy.defined_skills.items(): cls = eval(skill_config.skill_name) skill_policy = cls.from_config( skill_config, @@ -82,14 +84,15 @@ def __init__( full_config, ) skill_policy.set_pddl_problem(self._pddl_problem) - self._skills[i] = skill_policy if skill_config.pddl_action_names is None: action_names = [skill_name] else: action_names = skill_config.pddl_action_names for skill_id in action_names: - self._name_to_idx[skill_id] = i - self._idx_to_name[i] = skill_id + self._name_to_idx[skill_id] = skill_i + self._idx_to_name[skill_i] = skill_id + self._skills[skill_i] = skill_policy + skill_i += 1 self._cur_skills: torch.Tensor = torch.full( (self._num_envs,), -1, dtype=torch.long From 2e973f5c525cee5cb339d3596e5254109ae8bb1c Mon Sep 17 00:00:00 2001 From: ASzot Date: Fri, 27 Jan 2023 10:13:43 -0800 Subject: [PATCH 13/48] Addressing PR comments --- .../common/baseline_registry.py | 5 ++- .../config/default_structured_configs.py | 10 +++--- .../config/rearrange/rl_hierarchical.yaml | 3 ++ .../habitat_baselines/rl/hrl/__init__.py | 2 +- .../rl/hrl/hierarchical_policy.py | 15 ++++++-- .../rl/hrl/hl/fixed_policy.py | 6 ++-- .../rl/hrl/hl/high_level_policy.py | 3 +- .../habitat_baselines/rl/hrl/hrl_ppo.py | 4 +-- .../rl/hrl/hrl_rollout_storage.py | 35 +++++++++++++++---- .../habitat_baselines/rl/hrl/skills/skill.py | 6 ++-- .../config/default_structured_configs.py | 3 -- 11 files changed, 65 insertions(+), 27 deletions(-) diff --git a/habitat-baselines/habitat_baselines/common/baseline_registry.py b/habitat-baselines/habitat_baselines/common/baseline_registry.py index 7ff89a28b6..864833b0a2 100644 --- a/habitat-baselines/habitat_baselines/common/baseline_registry.py +++ b/habitat-baselines/habitat_baselines/common/baseline_registry.py @@ -23,6 +23,7 @@ from typing import Optional from habitat.core.registry import Registry +from habitat_baselines.common.rollout_storage import RolloutStorage class BaselineRegistry(Registry): @@ -138,7 +139,9 @@ def get_auxiliary_loss(cls, name: str): @classmethod def register_storage(cls, to_register=None, *, name: Optional[str] = None): - return cls._register_impl("storage", to_register, name) + return cls._register_impl( + "storage", to_register, name, assert_type=RolloutStorage + ) @classmethod def get_storage(cls, name: str): diff --git a/habitat-baselines/habitat_baselines/config/default_structured_configs.py b/habitat-baselines/habitat_baselines/config/default_structured_configs.py index 8190f2d116..062340b64c 100644 --- a/habitat-baselines/habitat_baselines/config/default_structured_configs.py +++ b/habitat-baselines/habitat_baselines/config/default_structured_configs.py @@ -224,7 +224,7 @@ class Eq2CubeConfig(ObsTransformConfig): @dataclass -class HrlDefinedSkill(HabitatBaselinesBaseConfig): +class HrlDefinedSkillConfig(HabitatBaselinesBaseConfig): skill_name: str = MISSING name: str = "PointNavResNetPolicy" action_distribution_type: str = "gaussian" @@ -246,9 +246,11 @@ class HrlDefinedSkill(HabitatBaselinesBaseConfig): @dataclass -class HierarchicalPolicy(HabitatBaselinesBaseConfig): +class HierarchicalPolicyConfig(HabitatBaselinesBaseConfig): high_level_policy: Dict[str, Any] = MISSING - defined_skills: Dict[str, HrlDefinedSkill] = field(default_factory=dict) + defined_skills: Dict[str, HrlDefinedSkillConfig] = field( + default_factory=dict + ) @dataclass @@ -259,7 +261,7 @@ class PolicyConfig(HabitatBaselinesBaseConfig): # For gaussian action distribution: action_dist: ActionDistributionConfig = ActionDistributionConfig() obs_transforms: Dict[str, ObsTransformConfig] = field(default_factory=dict) - hierarchical_policy: HierarchicalPolicy = MISSING + hierarchical_policy: HierarchicalPolicyConfig = MISSING @dataclass diff --git a/habitat-baselines/habitat_baselines/config/rearrange/rl_hierarchical.yaml b/habitat-baselines/habitat_baselines/config/rearrange/rl_hierarchical.yaml index 387b8398f6..70d35f7702 100644 --- a/habitat-baselines/habitat_baselines/config/rearrange/rl_hierarchical.yaml +++ b/habitat-baselines/habitat_baselines/config/rearrange/rl_hierarchical.yaml @@ -1,5 +1,8 @@ # @package _global_ +# Config for running hierarchical policies where a high-level (HL) policy selects from a set of low-level (LL) policies. +# Supports different HL policy configurations and using a variety of LL policies. + defaults: - /benchmark/rearrange: rearrange_easy - /habitat_baselines: habitat_baselines_rl_config_base diff --git a/habitat-baselines/habitat_baselines/rl/hrl/__init__.py b/habitat-baselines/habitat_baselines/rl/hrl/__init__.py index 75f7fadf5c..394540d026 100644 --- a/habitat-baselines/habitat_baselines/rl/hrl/__init__.py +++ b/habitat-baselines/habitat_baselines/rl/hrl/__init__.py @@ -4,7 +4,7 @@ # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. -from habitat_baselines.rl.hrl.hrl_ppo import HrlPPO +from habitat_baselines.rl.hrl.hrl_ppo import HRLPPO from habitat_baselines.rl.hrl.hrl_rollout_storage import HrlRolloutStorage __all__ = [ diff --git a/habitat-baselines/habitat_baselines/rl/hrl/hierarchical_policy.py b/habitat-baselines/habitat_baselines/rl/hrl/hierarchical_policy.py index 7376fc822a..5c768aba17 100644 --- a/habitat-baselines/habitat_baselines/rl/hrl/hierarchical_policy.py +++ b/habitat-baselines/habitat_baselines/rl/hrl/hierarchical_policy.py @@ -40,6 +40,15 @@ @baseline_registry.register_policy class HierarchicalPolicy(nn.Module, Policy): + """ + :property _pddl_problem: Stores the PDDL domain information. This allows + accessing all the possible entities, actions, and predicates. Note that + this is not the grounded PDDL problem with truth values assigned to the + predicates basedon the current simulator state. + """ + + _pddl_problem: PddlProblem + def __init__( self, config, @@ -213,7 +222,7 @@ def act( (self._num_envs,), dtype=torch.bool ) - hl_says_term = self._high_level_policy.get_termination( + hl_policy_wants_termination = self._high_level_policy.get_termination( observations, rnn_hidden_states, prev_actions, @@ -221,7 +230,7 @@ def act( self._cur_skills, log_info, ) - # Compute the actions from the current skills + # Initialize empty action set based on the overall action space. actions = torch.zeros( (self._num_envs, get_num_actions(self._action_space)), device=masks.device, @@ -235,7 +244,7 @@ def act( "prev_actions": prev_actions, "masks": masks, "actions": actions, - "hl_says_term": hl_says_term, + "hl_policy_wants_termination": hl_policy_wants_termination, }, # Only decide on skill termination if the episode is active. should_adds=masks, diff --git a/habitat-baselines/habitat_baselines/rl/hrl/hl/fixed_policy.py b/habitat-baselines/habitat_baselines/rl/hrl/hl/fixed_policy.py index 8b6e6a6552..fa6b09f542 100644 --- a/habitat-baselines/habitat_baselines/rl/hrl/hl/fixed_policy.py +++ b/habitat-baselines/habitat_baselines/rl/hrl/hl/fixed_policy.py @@ -32,10 +32,10 @@ def __init__(self, *args, **kwargs): def _parse_solution_actions(self, solution): solution_actions = [] - for i, sol_step in enumerate(solution): + for i, hl_action in enumerate(solution): sol_action = ( - sol_step.name, - [x.name for x in sol_step.param_values], + hl_action.name, + [x.name for x in hl_action.param_values], ) solution_actions.append(sol_action) diff --git a/habitat-baselines/habitat_baselines/rl/hrl/hl/high_level_policy.py b/habitat-baselines/habitat_baselines/rl/hrl/hl/high_level_policy.py index c919abbf51..d4dd946415 100644 --- a/habitat-baselines/habitat_baselines/rl/hrl/hl/high_level_policy.py +++ b/habitat-baselines/habitat_baselines/rl/hrl/hl/high_level_policy.py @@ -107,9 +107,10 @@ def get_termination( ) -> torch.BoolTensor: """ Can force the currently executing skill to terminate. + In the base HighLevelPolicy, the skill always continues. Returns: A binary tensor where 1 indicates the current skill should - terminate and 0 indicates the skill can continue. + terminate and 0 indicates the skill can continue. """ return torch.zeros(self._num_envs, dtype=torch.bool) diff --git a/habitat-baselines/habitat_baselines/rl/hrl/hrl_ppo.py b/habitat-baselines/habitat_baselines/rl/hrl/hrl_ppo.py index a0cb2222a7..31e742de5c 100644 --- a/habitat-baselines/habitat_baselines/rl/hrl/hrl_ppo.py +++ b/habitat-baselines/habitat_baselines/rl/hrl/hrl_ppo.py @@ -11,7 +11,7 @@ @baseline_registry.register_updater -class HrlPPO(PPO): +class HRLPPO(PPO): def _update_from_batch(self, batch, epoch, rollouts, learner_metrics): n_samples = max(batch["loss_mask"].sum(), 1) @@ -112,5 +112,5 @@ def reduce_loss(loss): @baseline_registry.register_updater -class HrlDDPPO(DecentralizedDistributedMixin, HrlPPO): +class HrlDDPPO(DecentralizedDistributedMixin, HRLPPO): pass diff --git a/habitat-baselines/habitat_baselines/rl/hrl/hrl_rollout_storage.py b/habitat-baselines/habitat_baselines/rl/hrl/hrl_rollout_storage.py index 2ee96eb16a..8cc1730e51 100644 --- a/habitat-baselines/habitat_baselines/rl/hrl/hrl_rollout_storage.py +++ b/habitat-baselines/habitat_baselines/rl/hrl/hrl_rollout_storage.py @@ -4,7 +4,7 @@ # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. -from typing import Iterator +from typing import Iterator, Optional import torch @@ -21,15 +21,22 @@ @baseline_registry.register_storage class HrlRolloutStorage(RolloutStorage): + """ + Supports variable writes to the rollout buffer where data is not inserted + into the buffer on every step. When getting batches from the storage, these + batches will only contain samples that were written. This means that the + batches could be variable size and less than the maximum size of the + rollout buffer. + """ + def __init__(self, numsteps, num_envs, *args, **kwargs): super().__init__(numsteps, num_envs, *args, **kwargs) self._num_envs = num_envs self._cur_step_idxs = torch.zeros(self._num_envs, dtype=torch.long) self._last_should_inserts = None - if self.is_double_buffered: - raise ValueError( - "HRL storage does not support double buffered sampling" - ) + assert ( + not self.is_double_buffered + ), "HRL storage does not support double buffered sampling" def insert( self, @@ -41,8 +48,19 @@ def insert( rewards=None, next_masks=None, buffer_index: int = 0, - should_inserts=None, + should_inserts: Optional[torch.BoolTensor] = None, ): + """ + The only key different from the base `RolloutStorage` is + `should_inserts`. This is a bool tensor of shape [# environments,]. If + `should_insert[i] == True`, then this will the sample at enviroment + index `i` into the rollout buffer at environment index `i`, if not, it + will ignore the sample. If None, this defaults to the last insert + state. + + Rewards are summed when `should_insert[i] == False`. + """ + if next_masks is not None: next_masks = next_masks.to(self.device) if rewards is not None: @@ -143,6 +161,11 @@ def compute_returns(self, next_value, use_gae, gamma, tau): def recurrent_generator( self, advantages, num_batches ) -> Iterator[DictTree]: + """ + Generates data batches based on the data that has been written to the + rollout buffer. + """ + num_environments = advantages.size(1) dones_cpu = ( torch.logical_not(self.buffers["masks"]) diff --git a/habitat-baselines/habitat_baselines/rl/hrl/skills/skill.py b/habitat-baselines/habitat_baselines/rl/hrl/skills/skill.py index dfe0a75738..0e1ac275b7 100644 --- a/habitat-baselines/habitat_baselines/rl/hrl/skills/skill.py +++ b/habitat-baselines/habitat_baselines/rl/hrl/skills/skill.py @@ -152,7 +152,7 @@ def should_terminate( prev_actions, masks, actions, - hl_says_term, + hl_policy_wants_termination, batch_idx: List[int], skill_name: List[str], log_info, @@ -198,7 +198,7 @@ def should_terminate( elif ( self._config.apply_postconds and is_skill_done[i] == 1.0 - and hl_says_term[i] == 0.0 + and hl_policy_wants_termination[i] == 0.0 ): new_actions[i] = self._apply_postcond( actions, log_info, skill_name[i], env_i, i @@ -206,7 +206,7 @@ def should_terminate( self._delay_term[env_i] = True is_skill_done[i] = 0.0 - is_skill_done |= hl_says_term + is_skill_done |= hl_policy_wants_termination if bad_terminate.sum() > 0: self._internal_log( diff --git a/habitat-lab/habitat/config/default_structured_configs.py b/habitat-lab/habitat/config/default_structured_configs.py index 7ae7ab593f..69e8dbcd6b 100644 --- a/habitat-lab/habitat/config/default_structured_configs.py +++ b/habitat-lab/habitat/config/default_structured_configs.py @@ -149,9 +149,6 @@ class OracleNavActionConfig(ActionConfig): forward_velocity: float = 1.0 turn_thresh: float = 0.1 dist_thresh: float = 0.2 - stop_thresh: float = 0.001 - spawn_max_dist_to_obj: float = 2.0 - num_spawn_attempts: int = 200 lin_speed: float = 10.0 ang_speed: float = 10.0 allow_dyn_slide: bool = True From 3a805eaa2576f9dea1f84e5209c114e95887985a Mon Sep 17 00:00:00 2001 From: ASzot Date: Fri, 27 Jan 2023 10:23:55 -0800 Subject: [PATCH 14/48] Updated circular reference --- .../habitat_baselines/common/baseline_registry.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/habitat-baselines/habitat_baselines/common/baseline_registry.py b/habitat-baselines/habitat_baselines/common/baseline_registry.py index 864833b0a2..17b5697084 100644 --- a/habitat-baselines/habitat_baselines/common/baseline_registry.py +++ b/habitat-baselines/habitat_baselines/common/baseline_registry.py @@ -23,7 +23,6 @@ from typing import Optional from habitat.core.registry import Registry -from habitat_baselines.common.rollout_storage import RolloutStorage class BaselineRegistry(Registry): @@ -139,6 +138,13 @@ def get_auxiliary_loss(cls, name: str): @classmethod def register_storage(cls, to_register=None, *, name: Optional[str] = None): + """ + Registers data storage for storing data in the policy rollout in the + trainer and then for fetching data batches for the updater. + """ + + from habitat_baselines.common.rollout_storage import RolloutStorage + return cls._register_impl( "storage", to_register, name, assert_type=RolloutStorage ) @@ -149,6 +155,10 @@ def get_storage(cls, name: str): @classmethod def register_updater(cls, to_register=None, *, name: Optional[str] = None): + """ + Registers a policy updater. + """ + return cls._register_impl("updater", to_register, name) @classmethod From 7d92072817588d97328e7ba2fda59b491be464c2 Mon Sep 17 00:00:00 2001 From: ASzot Date: Fri, 27 Jan 2023 10:44:13 -0800 Subject: [PATCH 15/48] Addressing PR comments --- .../config/default_structured_configs.py | 11 ++++++++++- .../config/habitat_baselines/rl/policy/hl_neural.yaml | 2 +- .../habitat_baselines/rl/hrl/hierarchical_policy.py | 9 +++++++-- .../habitat_baselines/rl/hrl/hl/high_level_policy.py | 2 +- .../habitat_baselines/rl/hrl/skills/nn_skill.py | 4 ++-- .../habitat_baselines/rl/hrl/skills/noop.py | 4 ++-- .../habitat_baselines/rl/hrl/skills/oracle_nav.py | 6 +++--- .../habitat_baselines/rl/hrl/skills/pick.py | 6 ++++-- .../habitat_baselines/rl/hrl/skills/reset.py | 4 ++-- .../habitat_baselines/rl/hrl/skills/skill.py | 8 ++++---- .../habitat_baselines/rl/hrl/skills/wait.py | 4 ++-- habitat-baselines/habitat_baselines/rl/hrl/utils.py | 5 +++-- habitat-baselines/habitat_baselines/rl/ppo/policy.py | 8 ++++---- 13 files changed, 45 insertions(+), 28 deletions(-) diff --git a/habitat-baselines/habitat_baselines/config/default_structured_configs.py b/habitat-baselines/habitat_baselines/config/default_structured_configs.py index 062340b64c..99e4e8d781 100644 --- a/habitat-baselines/habitat_baselines/config/default_structured_configs.py +++ b/habitat-baselines/habitat_baselines/config/default_structured_configs.py @@ -225,20 +225,29 @@ class Eq2CubeConfig(ObsTransformConfig): @dataclass class HrlDefinedSkillConfig(HabitatBaselinesBaseConfig): + """ + Defines a low-level skill to be used in the hierarchical policy. + """ + skill_name: str = MISSING name: str = "PointNavResNetPolicy" action_distribution_type: str = "gaussian" load_ckpt_file: str = "" max_skill_steps: int = 200 + # If true, the stop action will be called if the skill times out. force_end_on_timeout: bool = True + # Overrides the config file of a neural network skill rather than loading + # the config file from the checkpoint file. force_config_file: str = "" at_resting_threshold: float = 0.15 + # If true, this willapply the post-conditions of the skill after it + # terminates. apply_postconds: bool = False obs_skill_inputs: List[str] = field(default_factory=list) obs_skill_input_dim: int = 3 start_zone_radius: float = 0.3 # For the oracle navigation skill - nav_action_name: str = "base_velocity" + action_name: str = "base_velocity" stop_thresh: float = 0.001 # For the reset_arm_skill reset_joint_state: List[float] = MISSING diff --git a/habitat-baselines/habitat_baselines/config/habitat_baselines/rl/policy/hl_neural.yaml b/habitat-baselines/habitat_baselines/config/habitat_baselines/rl/policy/hl_neural.yaml index 2e7d9a4c87..4246ca0e03 100644 --- a/habitat-baselines/habitat_baselines/config/habitat_baselines/rl/policy/hl_neural.yaml +++ b/habitat-baselines/habitat_baselines/config/habitat_baselines/rl/policy/hl_neural.yaml @@ -15,4 +15,4 @@ hierarchical_policy: num_rnn_layers: 2 policy_input_keys: - "robot_head_depth" - defined_skills: {} + defined_skills: MISSING diff --git a/habitat-baselines/habitat_baselines/rl/hrl/hierarchical_policy.py b/habitat-baselines/habitat_baselines/rl/hrl/hierarchical_policy.py index 5c768aba17..b1e0973a80 100644 --- a/habitat-baselines/habitat_baselines/rl/hrl/hierarchical_policy.py +++ b/habitat-baselines/habitat_baselines/rl/hrl/hierarchical_policy.py @@ -34,7 +34,7 @@ WaitSkillPolicy, ) from habitat_baselines.rl.hrl.utils import find_action_range -from habitat_baselines.rl.ppo.policy import Policy, PolicyAction +from habitat_baselines.rl.ppo.policy import Policy, PolicyActionData from habitat_baselines.utils.common import get_num_actions @@ -128,6 +128,11 @@ def eval(self): def get_policy_action_space( self, env_action_space: spaces.Space ) -> spaces.Space: + """ + Fetches the policy action space for learning. If we are learning the HL + policy, it will return its custom action space for learning. + """ + return self._high_level_policy.get_policy_action_space( env_action_space ) @@ -360,7 +365,7 @@ def act( } action_kwargs.update(hl_info) - return PolicyAction( + return PolicyActionData( take_actions=actions, policy_info=log_info, should_inserts=call_high_level, diff --git a/habitat-baselines/habitat_baselines/rl/hrl/hl/high_level_policy.py b/habitat-baselines/habitat_baselines/rl/hrl/hl/high_level_policy.py index d4dd946415..c148c62f28 100644 --- a/habitat-baselines/habitat_baselines/rl/hrl/hl/high_level_policy.py +++ b/habitat-baselines/habitat_baselines/rl/hrl/hl/high_level_policy.py @@ -86,7 +86,7 @@ def get_next_skill( - skill_args_data: Arguments for the next skill. - immediate_end: Binary masks indicating which environment(s) should end immediately. - - Information for PolicyAction + - Information for PolicyActionData """ raise NotImplementedError() diff --git a/habitat-baselines/habitat_baselines/rl/hrl/skills/nn_skill.py b/habitat-baselines/habitat_baselines/rl/hrl/skills/nn_skill.py index 3478613fb1..921e96dd2f 100644 --- a/habitat-baselines/habitat_baselines/rl/hrl/skills/nn_skill.py +++ b/habitat-baselines/habitat_baselines/rl/hrl/skills/nn_skill.py @@ -14,7 +14,7 @@ from habitat_baselines.common.tensor_dict import TensorDict from habitat_baselines.config.default import get_config from habitat_baselines.rl.hrl.skills.skill import SkillPolicy -from habitat_baselines.rl.ppo.policy import PolicyAction +from habitat_baselines.rl.ppo.policy import PolicyActionData from habitat_baselines.utils.common import get_num_actions @@ -119,7 +119,7 @@ def _internal_act( masks, cur_batch_idx, deterministic=False, - ) -> PolicyAction: + ) -> PolicyActionData: filtered_obs = self._get_filtered_obs(observations, cur_batch_idx) filtered_prev_actions = prev_actions[ diff --git a/habitat-baselines/habitat_baselines/rl/hrl/skills/noop.py b/habitat-baselines/habitat_baselines/rl/hrl/skills/noop.py index 6456344cb9..271afeaa0f 100644 --- a/habitat-baselines/habitat_baselines/rl/hrl/skills/noop.py +++ b/habitat-baselines/habitat_baselines/rl/hrl/skills/noop.py @@ -4,7 +4,7 @@ import torch from habitat_baselines.rl.hrl.skills.skill import SkillPolicy -from habitat_baselines.rl.ppo.policy import PolicyAction +from habitat_baselines.rl.ppo.policy import PolicyActionData class NoopSkillPolicy(SkillPolicy): @@ -34,6 +34,6 @@ def _internal_act( full_action, deterministic=False, ): - return PolicyAction( + return PolicyActionData( actions=full_action, rnn_hidden_states=rnn_hidden_states ) diff --git a/habitat-baselines/habitat_baselines/rl/hrl/skills/oracle_nav.py b/habitat-baselines/habitat_baselines/rl/hrl/skills/oracle_nav.py index 2208084edc..ffcd805da9 100644 --- a/habitat-baselines/habitat_baselines/rl/hrl/skills/oracle_nav.py +++ b/habitat-baselines/habitat_baselines/rl/hrl/skills/oracle_nav.py @@ -17,7 +17,7 @@ from habitat_baselines.common.logging import baselines_logger from habitat_baselines.rl.hrl.skills.nn_skill import NnSkillPolicy from habitat_baselines.rl.hrl.utils import find_action_range -from habitat_baselines.rl.ppo.policy import PolicyAction +from habitat_baselines.rl.ppo.policy import PolicyActionData class OracleNavPolicy(NnSkillPolicy): @@ -88,7 +88,7 @@ def from_config( cls, config, observation_space, action_space, batch_size, full_config ): filtered_action_space = ActionSpace( - {config.nav_action_name: action_space[config.nav_action_name]} + {config.action_name: action_space[config.nav_action_name]} ) baselines_logger.debug( f"Loaded action space {filtered_action_space} for skill {config.skill_name}" @@ -190,6 +190,6 @@ def _internal_act( full_action[:, self._oracle_nav_ac_idx] = action_idxs - return PolicyAction( + return PolicyActionData( actions=full_action, rnn_hidden_states=rnn_hidden_states ) diff --git a/habitat-baselines/habitat_baselines/rl/hrl/skills/pick.py b/habitat-baselines/habitat_baselines/rl/hrl/skills/pick.py index 4efb63d415..4cc58e520f 100644 --- a/habitat-baselines/habitat_baselines/rl/hrl/skills/pick.py +++ b/habitat-baselines/habitat_baselines/rl/hrl/skills/pick.py @@ -9,7 +9,7 @@ RelativeRestingPositionSensor, ) from habitat_baselines.rl.hrl.skills.nn_skill import NnSkillPolicy -from habitat_baselines.rl.ppo.policy import PolicyAction +from habitat_baselines.rl.ppo.policy import PolicyActionData class PickSkillPolicy(NnSkillPolicy): @@ -34,7 +34,9 @@ def _parse_skill_arg(self, skill_arg): self._internal_log(f"Parsing skill argument {skill_arg}") return int(skill_arg[0].split("|")[1]) - def _mask_pick(self, action: PolicyAction, observations) -> PolicyAction: + def _mask_pick( + self, action: PolicyActionData, observations + ) -> PolicyActionData: # Mask out the release if the object is already held. is_holding = observations[IsHoldingSensor.cls_uuid].view(-1) for i in torch.nonzero(is_holding): diff --git a/habitat-baselines/habitat_baselines/rl/hrl/skills/reset.py b/habitat-baselines/habitat_baselines/rl/hrl/skills/reset.py index efe5531075..39bf4ea058 100644 --- a/habitat-baselines/habitat_baselines/rl/hrl/skills/reset.py +++ b/habitat-baselines/habitat_baselines/rl/hrl/skills/reset.py @@ -10,7 +10,7 @@ from habitat_baselines.rl.hrl.skills.skill import SkillPolicy from habitat_baselines.rl.hrl.utils import find_action_range -from habitat_baselines.rl.ppo.policy import PolicyAction +from habitat_baselines.rl.ppo.policy import PolicyActionData class ResetArmSkill(SkillPolicy): @@ -91,6 +91,6 @@ def _internal_act( device=action.device, dtype=action.dtype ) - return PolicyAction( + return PolicyActionData( actions=action, rnn_hidden_states=rnn_hidden_states ) diff --git a/habitat-baselines/habitat_baselines/rl/hrl/skills/skill.py b/habitat-baselines/habitat_baselines/rl/hrl/skills/skill.py index 0e1ac275b7..c6d6f3fb61 100644 --- a/habitat-baselines/habitat_baselines/rl/hrl/skills/skill.py +++ b/habitat-baselines/habitat_baselines/rl/hrl/skills/skill.py @@ -10,7 +10,7 @@ from habitat.tasks.rearrange.rearrange_sensors import IsHoldingSensor from habitat_baselines.common.logging import baselines_logger from habitat_baselines.rl.hrl.utils import find_action_range -from habitat_baselines.rl.ppo.policy import Policy, PolicyAction +from habitat_baselines.rl.ppo.policy import Policy, PolicyActionData from habitat_baselines.utils.common import get_num_actions @@ -81,8 +81,8 @@ def _get_multi_sensor_index(self, batch_idx: List[int]) -> List[int]: return [self._cur_skill_args[i] for i in batch_idx] def _keep_holding_state( - self, action_data: PolicyAction, observations - ) -> PolicyAction: + self, action_data: PolicyActionData, observations + ) -> PolicyActionData: """ Makes the action so it does not result in dropping or picking up an object. Used in navigation and other skills which are not supposed to @@ -327,5 +327,5 @@ def _internal_act( masks, cur_batch_idx, deterministic=False, - ) -> PolicyAction: + ) -> PolicyActionData: raise NotImplementedError() diff --git a/habitat-baselines/habitat_baselines/rl/hrl/skills/wait.py b/habitat-baselines/habitat_baselines/rl/hrl/skills/wait.py index d5500d3185..6a0887ba96 100644 --- a/habitat-baselines/habitat_baselines/rl/hrl/skills/wait.py +++ b/habitat-baselines/habitat_baselines/rl/hrl/skills/wait.py @@ -8,7 +8,7 @@ import torch from habitat_baselines.rl.hrl.skills.skill import SkillPolicy -from habitat_baselines.rl.ppo.policy import PolicyAction +from habitat_baselines.rl.ppo.policy import PolicyActionData class WaitSkillPolicy(SkillPolicy): @@ -43,6 +43,6 @@ def _internal_act( action = torch.zeros( (masks.shape[0], self._full_ac_size), device=prev_actions.device ) - return PolicyAction( + return PolicyActionData( actions=action, rnn_hidden_states=rnn_hidden_states ) diff --git a/habitat-baselines/habitat_baselines/rl/hrl/utils.py b/habitat-baselines/habitat_baselines/rl/hrl/utils.py index 0c22f3b183..d2b56d2f1e 100644 --- a/habitat-baselines/habitat_baselines/rl/hrl/utils.py +++ b/habitat-baselines/habitat_baselines/rl/hrl/utils.py @@ -12,7 +12,8 @@ def find_action_range( action_space: ActionSpace, search_key: str ) -> Tuple[int, int]: """ - Returns the start and end indices of an action key in the action tensor. + Returns the start and end indices of an action key in the action tensor. If + the key is not found. It returns None. """ start_idx = 0 @@ -24,5 +25,5 @@ def find_action_range( break start_idx += get_num_actions(action_space[k]) if not found: - return None + raise ValueError(f"Could not find stop action in {action_space}") return start_idx, end_idx diff --git a/habitat-baselines/habitat_baselines/rl/ppo/policy.py b/habitat-baselines/habitat_baselines/rl/ppo/policy.py index 2ec25b68c8..ecfd261063 100644 --- a/habitat-baselines/habitat_baselines/rl/ppo/policy.py +++ b/habitat-baselines/habitat_baselines/rl/ppo/policy.py @@ -32,7 +32,7 @@ @dataclass -class PolicyAction: +class PolicyActionData: """ Information returned from the `Policy` class. """ @@ -94,7 +94,7 @@ def all_policy_tensors(self) -> Iterable[torch.Tensor]: yield from c.buffers() def extract_policy_info( - self, action_data: PolicyAction, infos, dones + self, action_data: PolicyActionData, infos, dones ) -> List[Dict[str, float]]: """ Gets the log information from the policy at the current time step. @@ -112,7 +112,7 @@ def act( prev_actions, masks, deterministic=False, - ) -> PolicyAction: + ) -> PolicyActionData: raise NotImplementedError @classmethod @@ -203,7 +203,7 @@ def act( action = distribution.sample() action_log_probs = distribution.log_probs(action) - return PolicyAction( + return PolicyActionData( values=value, actions=value, action_log_probs=action_log_probs, From 4696551ea3d059f1f3f35d68e074ba308219415d Mon Sep 17 00:00:00 2001 From: ASzot Date: Fri, 27 Jan 2023 10:47:00 -0800 Subject: [PATCH 16/48] Addressing PR comments --- .../habitat_baselines/rl/hrl/hrl_rollout_storage.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/habitat-baselines/habitat_baselines/rl/hrl/hrl_rollout_storage.py b/habitat-baselines/habitat_baselines/rl/hrl/hrl_rollout_storage.py index 8cc1730e51..1de4f6b56d 100644 --- a/habitat-baselines/habitat_baselines/rl/hrl/hrl_rollout_storage.py +++ b/habitat-baselines/habitat_baselines/rl/hrl/hrl_rollout_storage.py @@ -118,6 +118,12 @@ def insert( self._last_should_inserts = should_inserts def advance_rollout(self, buffer_index: int = 0): + """ + This will advance to writing at the next step in the data buffer ONLY + if an element was written to that environment index in the previous + step. + """ + self._cur_step_idxs += self._last_should_inserts.long() is_past_buffer = self._cur_step_idxs >= self.num_steps From c11601feb9d5a9d96661fbdbd016005f59e5f999 Mon Sep 17 00:00:00 2001 From: Andrew Szot Date: Fri, 27 Jan 2023 10:48:53 -0800 Subject: [PATCH 17/48] Update habitat-baselines/habitat_baselines/rl/hrl/skills/skill.py Co-authored-by: Vincent-Pierre BERGES <28320361+vincentpierre@users.noreply.github.com> --- habitat-baselines/habitat_baselines/rl/hrl/skills/skill.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/habitat-baselines/habitat_baselines/rl/hrl/skills/skill.py b/habitat-baselines/habitat_baselines/rl/hrl/skills/skill.py index c6d6f3fb61..da8c915863 100644 --- a/habitat-baselines/habitat_baselines/rl/hrl/skills/skill.py +++ b/habitat-baselines/habitat_baselines/rl/hrl/skills/skill.py @@ -112,10 +112,7 @@ def _apply_postcond( action = self._pddl_problem.actions[skill_name] entities = [self._pddl_problem.get_entity(x) for x in skill_args] - if self._pddl_ac_start is None: - raise ValueError( - "Apply post cond not supported when pddl action not in action space" - ) + assert self._pddl_ac_start is not None, "Apply post cond not supported when pddl action not in action space" ac_idx = self._pddl_ac_start found = False From 90155e13c085017eda20a537bd3c8a8669e6312e Mon Sep 17 00:00:00 2001 From: ASzot Date: Fri, 27 Jan 2023 10:56:24 -0800 Subject: [PATCH 18/48] Addressing PR comments --- .../rl/hrl/hierarchical_policy.py | 2 +- .../rl/hrl/skills/nn_skill.py | 4 +++ .../habitat_baselines/rl/hrl/skills/skill.py | 35 +++++++++++-------- 3 files changed, 26 insertions(+), 15 deletions(-) diff --git a/habitat-baselines/habitat_baselines/rl/hrl/hierarchical_policy.py b/habitat-baselines/habitat_baselines/rl/hrl/hierarchical_policy.py index b1e0973a80..e57a2dc5ef 100644 --- a/habitat-baselines/habitat_baselines/rl/hrl/hierarchical_policy.py +++ b/habitat-baselines/habitat_baselines/rl/hrl/hierarchical_policy.py @@ -318,7 +318,7 @@ def act( if "rnn_hidden_states" not in hl_info: rnn_hidden_states[batch_ids] *= 0.0 prev_actions[batch_ids] *= 0 - elif self._skills[skill_id].num_recurrent_layers != 0: + elif self._skills[skill_id].has_hidden_state: raise ValueError( f"The code does not currently support neural LL and neural HL skills. Skill={self._skills[skill_id]}, HL={self._high_level_policy}" ) diff --git a/habitat-baselines/habitat_baselines/rl/hrl/skills/nn_skill.py b/habitat-baselines/habitat_baselines/rl/hrl/skills/nn_skill.py index 921e96dd2f..12e172e114 100644 --- a/habitat-baselines/habitat_baselines/rl/hrl/skills/nn_skill.py +++ b/habitat-baselines/habitat_baselines/rl/hrl/skills/nn_skill.py @@ -73,6 +73,10 @@ def parameters(self): else: return [] + @property + def has_hidden_state(self): + return self.num_recurrent_layers != 0 + @property def num_recurrent_layers(self): if self._wrap_policy is not None: diff --git a/habitat-baselines/habitat_baselines/rl/hrl/skills/skill.py b/habitat-baselines/habitat_baselines/rl/hrl/skills/skill.py index da8c915863..b912640359 100644 --- a/habitat-baselines/habitat_baselines/rl/hrl/skills/skill.py +++ b/habitat-baselines/habitat_baselines/rl/hrl/skills/skill.py @@ -2,11 +2,12 @@ # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. -from typing import Any, List, Optional, Tuple +from typing import Any, Dict, List, Optional, Tuple import gym.spaces as spaces import torch +from habitat.core.simulator import Observations from habitat.tasks.rearrange.rearrange_sensors import IsHoldingSensor from habitat_baselines.common.logging import baselines_logger from habitat_baselines.rl.hrl.utils import find_action_range @@ -80,6 +81,14 @@ def _get_multi_sensor_index(self, batch_idx: List[int]) -> List[int]: """ return [self._cur_skill_args[i] for i in batch_idx] + @property + def has_hidden_state(self): + """ + Returns if the skill requires a hidden state. + """ + + return False + def _keep_holding_state( self, action_data: PolicyActionData, observations ) -> PolicyActionData: @@ -112,7 +121,9 @@ def _apply_postcond( action = self._pddl_problem.actions[skill_name] entities = [self._pddl_problem.get_entity(x) for x in skill_args] - assert self._pddl_ac_start is not None, "Apply post cond not supported when pddl action not in action space" + assert ( + self._pddl_ac_start is not None + ), "Apply post cond not supported when pddl action not in action space" ac_idx = self._pddl_ac_start found = False @@ -130,7 +141,7 @@ def _apply_postcond( ] if len(entity_idxs) != action.n_args: raise ValueError( - f"Inconsistent # of args {action.n_args} versus {entity_idxs} for {action} with {skill_args} and {entities}" + f"The skill was called with the wrong # of args {action.n_args} versus {entity_idxs} for {action} with {skill_args} and {entities}. Make sure the skill and PDDL definition match." ) actions[idx, ac_idx : ac_idx + action.n_args] = torch.tensor( @@ -144,15 +155,15 @@ def _apply_postcond( def should_terminate( self, - observations, - rnn_hidden_states, - prev_actions, - masks, - actions, - hl_policy_wants_termination, + observations: Observations, + rnn_hidden_states: torch.Tensor, + prev_actions: torch.Tensor, + masks: torch.Tensor, + actions: torch.Tensor, + hl_policy_wants_termination: torch.BoolTensor, batch_idx: List[int], skill_name: List[str], - log_info, + log_info: List[Dict[str, Any]], ) -> Tuple[torch.BoolTensor, torch.BoolTensor, torch.Tensor]: """ :returns: Both of the BoolTensor's will be on the CPU. @@ -171,10 +182,6 @@ def should_terminate( ) cur_skill_step = self._cur_skill_step[batch_idx] - bad_terminate = torch.zeros( - cur_skill_step.shape, - dtype=torch.bool, - ) bad_terminate = torch.zeros( cur_skill_step.shape, From 755acb4aa0af9ef62055cf95305d458700b6ea83 Mon Sep 17 00:00:00 2001 From: ASzot Date: Fri, 27 Jan 2023 22:16:46 -0800 Subject: [PATCH 19/48] Resolved storage problem --- .../habitat_baselines/common/baseline_registry.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/habitat-baselines/habitat_baselines/common/baseline_registry.py b/habitat-baselines/habitat_baselines/common/baseline_registry.py index 17b5697084..26554f6054 100644 --- a/habitat-baselines/habitat_baselines/common/baseline_registry.py +++ b/habitat-baselines/habitat_baselines/common/baseline_registry.py @@ -143,11 +143,7 @@ def register_storage(cls, to_register=None, *, name: Optional[str] = None): trainer and then for fetching data batches for the updater. """ - from habitat_baselines.common.rollout_storage import RolloutStorage - - return cls._register_impl( - "storage", to_register, name, assert_type=RolloutStorage - ) + return cls._register_impl("storage", to_register, name) @classmethod def get_storage(cls, name: str): From 4754fd77c6d640a77a7ccfa6da9f1211bd034619 Mon Sep 17 00:00:00 2001 From: Xavier Puig Date: Sun, 29 Jan 2023 19:46:41 -0800 Subject: [PATCH 20/48] Update oracle_nav.py --- habitat-baselines/habitat_baselines/rl/hrl/skills/oracle_nav.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/habitat-baselines/habitat_baselines/rl/hrl/skills/oracle_nav.py b/habitat-baselines/habitat_baselines/rl/hrl/skills/oracle_nav.py index ffcd805da9..be26af308a 100644 --- a/habitat-baselines/habitat_baselines/rl/hrl/skills/oracle_nav.py +++ b/habitat-baselines/habitat_baselines/rl/hrl/skills/oracle_nav.py @@ -88,7 +88,7 @@ def from_config( cls, config, observation_space, action_space, batch_size, full_config ): filtered_action_space = ActionSpace( - {config.action_name: action_space[config.nav_action_name]} + {config.action_name: action_space[config.action_name]} ) baselines_logger.debug( f"Loaded action space {filtered_action_space} for skill {config.skill_name}" From 587672eb0f5f72e08c1e751007184711e4ed9b4f Mon Sep 17 00:00:00 2001 From: ASzot Date: Sun, 29 Jan 2023 20:03:39 -0800 Subject: [PATCH 21/48] Fix for agent rotation --- .../habitat_baselines/rl/ppo/policy.py | 9 ++++++-- .../habitat_baselines/rl/ppo/ppo_trainer.py | 2 +- .../rl/ver/inference_worker.py | 22 ++++++++----------- .../habitat_baselines/rl/ver/ver_trainer.py | 3 ++- .../habitat/tasks/rearrange/rearrange_task.py | 3 +++ 5 files changed, 22 insertions(+), 17 deletions(-) diff --git a/habitat-baselines/habitat_baselines/rl/ppo/policy.py b/habitat-baselines/habitat_baselines/rl/ppo/policy.py index ecfd261063..9ab6b7c144 100644 --- a/habitat-baselines/habitat_baselines/rl/ppo/policy.py +++ b/habitat-baselines/habitat_baselines/rl/ppo/policy.py @@ -34,7 +34,12 @@ @dataclass class PolicyActionData: """ - Information returned from the `Policy` class. + Information returned from the `Policy.act` method representing the + information from an agent's action. + + :property should_inserts: Of shape [# envs, 1]. If False at environment + index `i`, then don't write this transition to the rollout buffer. If + `None`, then write all data. """ rnn_hidden_states: torch.Tensor @@ -43,7 +48,7 @@ class PolicyActionData: action_log_probs: Optional[torch.Tensor] = None take_actions: Optional[torch.Tensor] = None policy_info: Optional[List[Dict[str, Any]]] = None - should_inserts: Optional[torch.Tensor] = None + should_inserts: Optional[torch.BoolTensor] = None def write_action(self, write_idx, write_action): self.actions[:, write_idx] = write_action diff --git a/habitat-baselines/habitat_baselines/rl/ppo/ppo_trainer.py b/habitat-baselines/habitat_baselines/rl/ppo/ppo_trainer.py index 1adbc11c38..114845a9b6 100644 --- a/habitat-baselines/habitat_baselines/rl/ppo/ppo_trainer.py +++ b/habitat-baselines/habitat_baselines/rl/ppo/ppo_trainer.py @@ -206,7 +206,7 @@ def _setup_actor_critic_agent(self, ppo_cfg: "DictConfig") -> None: self.agent = agent_cls.from_config(self.actor_critic, ppo_cfg) self.policy_action_space = self.actor_critic.get_policy_action_space( - self.envs.action_spaces[0] + self.env_action_space ) def _init_envs(self, config=None, is_eval: bool = False): diff --git a/habitat-baselines/habitat_baselines/rl/ver/inference_worker.py b/habitat-baselines/habitat_baselines/rl/ver/inference_worker.py index 56ef3f7025..e4880156d0 100644 --- a/habitat-baselines/habitat_baselines/rl/ver/inference_worker.py +++ b/habitat-baselines/habitat_baselines/rl/ver/inference_worker.py @@ -318,13 +318,7 @@ def step(self) -> Tuple[bool, List[Tuple[int, int]]]: PointNavResNetNet.PRETRAINED_VISUAL_FEATURES_KEY ] = self.visual_encoder(obs) - ( - values, - actions, - actions_log_probs, - next_recurrent_hidden_states, - policy_info, - ) = self.actor_critic.act( + action_data = self.actor_critic.act( obs, recurrent_hidden_states, prev_actions, @@ -333,10 +327,12 @@ def step(self) -> Tuple[bool, List[Tuple[int, int]]]: if not final_batch: self.rollouts.next_hidden_states.index_copy_( - 0, environment_ids, next_recurrent_hidden_states + 0, + environment_ids, + action_data.rnn_hidden_states, ) self.rollouts.next_prev_actions.index_copy_( - 0, environment_ids, actions + 0, environment_ids, action_data.actions ) if self._variable_experience: @@ -348,7 +344,7 @@ def step(self) -> Tuple[bool, List[Tuple[int, int]]]: dtype=np.int64, ) - cpu_actions = actions.to(device="cpu") + cpu_actions = action_data.env_actions.to(device="cpu") self.transfer_buffers["actions"][ self.new_reqs ] = cpu_actions.numpy() @@ -388,8 +384,8 @@ def step(self) -> Tuple[bool, List[Tuple[int, int]]]: dict( masks=to_batch["masks"], observations=obs, - actions=actions, - action_log_probs=actions_log_probs, + actions=action_data.actions, + action_log_probs=action_data.actions_log_probs, recurrent_hidden_states=recurrent_hidden_states, prev_actions=prev_actions, policy_version=self.rollouts.current_policy_version.expand( @@ -398,7 +394,7 @@ def step(self) -> Tuple[bool, List[Tuple[int, int]]]: episode_ids=to_batch["episode_ids"], environment_ids=to_batch["environment_ids"], step_ids=to_batch["step_ids"], - value_preds=values, + value_preds=action_data.values, returns=torch.full( (), float("nan"), diff --git a/habitat-baselines/habitat_baselines/rl/ver/ver_trainer.py b/habitat-baselines/habitat_baselines/rl/ver/ver_trainer.py index 504bdf71d5..73c50ef4ca 100644 --- a/habitat-baselines/habitat_baselines/rl/ver/ver_trainer.py +++ b/habitat-baselines/habitat_baselines/rl/ver/ver_trainer.py @@ -181,7 +181,8 @@ def _init_train(self, resume_state): action_space = init_reports[0]["act_space"] self.policy_action_space = action_space - self.orig_policy_action_space = None + self.env_action_space = action_space + self.orig_env_action_space = None [ ew.set_action_plugin( diff --git a/habitat-lab/habitat/tasks/rearrange/rearrange_task.py b/habitat-lab/habitat/tasks/rearrange/rearrange_task.py index 1c9d38f2a4..4563bc490f 100644 --- a/habitat-lab/habitat/tasks/rearrange/rearrange_task.py +++ b/habitat-lab/habitat/tasks/rearrange/rearrange_task.py @@ -96,6 +96,9 @@ def __init__( # Duplicate sensors that handle robots. One for each robot. self._duplicate_sensor_suite(self.sensor_suite) + def overwrite_sim_config(self, config: Any, episode: Episode) -> Any: + return config + @property def targ_idx(self): return self._targ_idx From f16bc1473bad2a97c7684aa74de1ec5140b578bd Mon Sep 17 00:00:00 2001 From: ASzot Date: Mon, 30 Jan 2023 13:08:13 -0800 Subject: [PATCH 22/48] Missing key --- habitat-lab/habitat/config/default_structured_configs.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/habitat-lab/habitat/config/default_structured_configs.py b/habitat-lab/habitat/config/default_structured_configs.py index 69e8dbcd6b..f56dc7eb70 100644 --- a/habitat-lab/habitat/config/default_structured_configs.py +++ b/habitat-lab/habitat/config/default_structured_configs.py @@ -153,6 +153,8 @@ class OracleNavActionConfig(ActionConfig): ang_speed: float = 10.0 allow_dyn_slide: bool = True allow_back: bool = True + spawn_max_dist_to_obj: float = 2.0 + num_spawn_attempts: int = 200 # ----------------------------------------------------------------------------- From 22cb83f1190a4b43534295a5adfc2813ff0bcbac Mon Sep 17 00:00:00 2001 From: ASzot Date: Mon, 30 Jan 2023 22:18:13 -0800 Subject: [PATCH 23/48] More docs --- .../defined_skills/oracle_skills.yaml | 3 +++ .../habitat_baselines/rl/hrl/__init__.py | 2 +- .../habitat_baselines/rl/ppo/policy.py | 22 ++++++++++++++++++- 3 files changed, 25 insertions(+), 2 deletions(-) diff --git a/habitat-baselines/habitat_baselines/config/habitat_baselines/rl/policy/hierarchical_policy/defined_skills/oracle_skills.yaml b/habitat-baselines/habitat_baselines/config/habitat_baselines/rl/policy/hierarchical_policy/defined_skills/oracle_skills.yaml index d3f9c34723..98985a5eec 100644 --- a/habitat-baselines/habitat_baselines/config/habitat_baselines/rl/policy/hierarchical_policy/defined_skills/oracle_skills.yaml +++ b/habitat-baselines/habitat_baselines/config/habitat_baselines/rl/policy/hierarchical_policy/defined_skills/oracle_skills.yaml @@ -1,3 +1,6 @@ +# Oracle skills that will teleport to the skill post-condition. When automatically setting predicates you may want to run the simulation in kinematic mode: +# To run in kinematic mode, add: `habitat.simulator.kinematic_mode=True habitat.simulator.ac_freq_ratio=1` + open_cab: skill_name: "NoopSkillPolicy" max_skill_steps: 1 diff --git a/habitat-baselines/habitat_baselines/rl/hrl/__init__.py b/habitat-baselines/habitat_baselines/rl/hrl/__init__.py index 394540d026..39a3f0c90d 100644 --- a/habitat-baselines/habitat_baselines/rl/hrl/__init__.py +++ b/habitat-baselines/habitat_baselines/rl/hrl/__init__.py @@ -8,6 +8,6 @@ from habitat_baselines.rl.hrl.hrl_rollout_storage import HrlRolloutStorage __all__ = [ - "HrlPPO", + "HRLPPO", "HrlRolloutStorage", ] diff --git a/habitat-baselines/habitat_baselines/rl/ppo/policy.py b/habitat-baselines/habitat_baselines/rl/ppo/policy.py index 9ab6b7c144..21e621488b 100644 --- a/habitat-baselines/habitat_baselines/rl/ppo/policy.py +++ b/habitat-baselines/habitat_baselines/rl/ppo/policy.py @@ -40,6 +40,19 @@ class PolicyActionData: :property should_inserts: Of shape [# envs, 1]. If False at environment index `i`, then don't write this transition to the rollout buffer. If `None`, then write all data. + :property policy_info`: Optional logging information about the policy per + environment. For example, you could log the policy entropy. + :property take_actions`: If specified, these actions will be executed in + the environment, but not stored in the storage buffer. This allows + exectuing and learning from different actions. If not specified, the + agent will execute `self.actions`. + :property values: The actor value predictions. None if the actor does not predict value. + :property actions: The actions to store in the storage buffer. if + `take_actions` is None, then this is also the action executed in the + environment. + :property rnn_hidden_states: Actor hidden states. + :property action_log_probs: The log probabilities of the actions under the + current policy. """ rnn_hidden_states: torch.Tensor @@ -50,11 +63,18 @@ class PolicyActionData: policy_info: Optional[List[Dict[str, Any]]] = None should_inserts: Optional[torch.BoolTensor] = None - def write_action(self, write_idx, write_action): + def write_action(self, write_idx: int, write_action: torch.Tensor) -> None: + """ + Used to override an action across all environments. + """ self.actions[:, write_idx] = write_action @property def env_actions(self) -> torch.Tensor: + """ + The actions to execute in the environment. + """ + if self.take_actions is None: return self.actions else: From df8b7a6c75ec1d663027c877935cf9c1e8a80da7 Mon Sep 17 00:00:00 2001 From: Andrew Szot Date: Mon, 30 Jan 2023 22:20:10 -0800 Subject: [PATCH 24/48] Update habitat-baselines/habitat_baselines/rl/hrl/hrl_rollout_storage.py Co-authored-by: Vincent-Pierre BERGES <28320361+vincentpierre@users.noreply.github.com> --- .../habitat_baselines/rl/hrl/hrl_rollout_storage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/habitat-baselines/habitat_baselines/rl/hrl/hrl_rollout_storage.py b/habitat-baselines/habitat_baselines/rl/hrl/hrl_rollout_storage.py index 1de4f6b56d..959c041d98 100644 --- a/habitat-baselines/habitat_baselines/rl/hrl/hrl_rollout_storage.py +++ b/habitat-baselines/habitat_baselines/rl/hrl/hrl_rollout_storage.py @@ -58,7 +58,7 @@ def insert( will ignore the sample. If None, this defaults to the last insert state. - Rewards are summed when `should_insert[i] == False`. + Rewards acquired of steps where `should_insert[i] == False` will be summed up and added to the next step where `should_insert[i] == True` """ if next_masks is not None: From 422c036f7b971f439b9a2b5d7c77aa008fe50a79 Mon Sep 17 00:00:00 2001 From: Andrew Szot Date: Mon, 30 Jan 2023 22:20:21 -0800 Subject: [PATCH 25/48] Update habitat-baselines/habitat_baselines/rl/hrl/utils.py Co-authored-by: Vincent-Pierre BERGES <28320361+vincentpierre@users.noreply.github.com> --- habitat-baselines/habitat_baselines/rl/hrl/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/habitat-baselines/habitat_baselines/rl/hrl/utils.py b/habitat-baselines/habitat_baselines/rl/hrl/utils.py index d2b56d2f1e..2c1b64bfe2 100644 --- a/habitat-baselines/habitat_baselines/rl/hrl/utils.py +++ b/habitat-baselines/habitat_baselines/rl/hrl/utils.py @@ -13,7 +13,7 @@ def find_action_range( ) -> Tuple[int, int]: """ Returns the start and end indices of an action key in the action tensor. If - the key is not found. It returns None. + the key is not found, a Value error will be thrown. """ start_idx = 0 From 5643b0eb6203ed9f7b91b0f3ec6b75d3b12722c2 Mon Sep 17 00:00:00 2001 From: ASzot Date: Mon, 30 Jan 2023 22:19:52 -0800 Subject: [PATCH 26/48] Updated name --- habitat-baselines/habitat_baselines/rl/hrl/hrl_ppo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/habitat-baselines/habitat_baselines/rl/hrl/hrl_ppo.py b/habitat-baselines/habitat_baselines/rl/hrl/hrl_ppo.py index 31e742de5c..4acad90588 100644 --- a/habitat-baselines/habitat_baselines/rl/hrl/hrl_ppo.py +++ b/habitat-baselines/habitat_baselines/rl/hrl/hrl_ppo.py @@ -112,5 +112,5 @@ def reduce_loss(loss): @baseline_registry.register_updater -class HrlDDPPO(DecentralizedDistributedMixin, HRLPPO): +class HRLDDPPO(DecentralizedDistributedMixin, HRLPPO): pass From ebe877eb093364812a84661b9e8745a0c48657a7 Mon Sep 17 00:00:00 2001 From: ASzot Date: Tue, 31 Jan 2023 22:26:35 -0800 Subject: [PATCH 27/48] fixes for training --- .../habitat_baselines/rl/policy/hl_neural.yaml | 2 +- .../config/rearrange/rl_hierarchical.yaml | 1 + .../habitat_baselines/rl/hrl/hl/neural_policy.py | 1 + .../rl/hrl/hrl_rollout_storage.py | 14 +++++++++----- .../habitat_baselines/rl/ppo/policy.py | 2 +- .../habitat_baselines/rl/ver/inference_worker.py | 2 +- .../habitat/simulator/agents/rgbd_head_agent.yaml | 6 ++++++ 7 files changed, 20 insertions(+), 8 deletions(-) create mode 100644 habitat-lab/habitat/config/habitat/simulator/agents/rgbd_head_agent.yaml diff --git a/habitat-baselines/habitat_baselines/config/habitat_baselines/rl/policy/hl_neural.yaml b/habitat-baselines/habitat_baselines/config/habitat_baselines/rl/policy/hl_neural.yaml index 4246ca0e03..2e7d9a4c87 100644 --- a/habitat-baselines/habitat_baselines/config/habitat_baselines/rl/policy/hl_neural.yaml +++ b/habitat-baselines/habitat_baselines/config/habitat_baselines/rl/policy/hl_neural.yaml @@ -15,4 +15,4 @@ hierarchical_policy: num_rnn_layers: 2 policy_input_keys: - "robot_head_depth" - defined_skills: MISSING + defined_skills: {} diff --git a/habitat-baselines/habitat_baselines/config/rearrange/rl_hierarchical.yaml b/habitat-baselines/habitat_baselines/config/rearrange/rl_hierarchical.yaml index 70d35f7702..56d78df85a 100644 --- a/habitat-baselines/habitat_baselines/config/rearrange/rl_hierarchical.yaml +++ b/habitat-baselines/habitat_baselines/config/rearrange/rl_hierarchical.yaml @@ -28,6 +28,7 @@ habitat_baselines: force_torch_single_threaded: True eval_keys_to_include_in_name: ['reward', 'force', 'composite_success'] load_resume_state_config: False + rollout_storage: "HrlRolloutStorage" eval: use_ckpt_config: False diff --git a/habitat-baselines/habitat_baselines/rl/hrl/hl/neural_policy.py b/habitat-baselines/habitat_baselines/rl/hrl/hl/neural_policy.py index 3c6def42d2..15933f7354 100644 --- a/habitat-baselines/habitat_baselines/rl/hrl/hl/neural_policy.py +++ b/habitat-baselines/habitat_baselines/rl/hrl/hl/neural_policy.py @@ -159,6 +159,7 @@ def evaluate_actions( action_log_probs, distribution_entropy, rnn_hidden_states, + {}, ) def get_next_skill( diff --git a/habitat-baselines/habitat_baselines/rl/hrl/hrl_rollout_storage.py b/habitat-baselines/habitat_baselines/rl/hrl/hrl_rollout_storage.py index 959c041d98..26dbbe9afc 100644 --- a/habitat-baselines/habitat_baselines/rl/hrl/hrl_rollout_storage.py +++ b/habitat-baselines/habitat_baselines/rl/hrl/hrl_rollout_storage.py @@ -10,7 +10,7 @@ from habitat_baselines.common.baseline_registry import baseline_registry from habitat_baselines.common.rollout_storage import RolloutStorage -from habitat_baselines.common.tensor_dict import DictTree +from habitat_baselines.common.tensor_dict import DictTree, TensorDict from habitat_baselines.rl.models.rnn_state_encoder import ( build_pack_info_from_dones, build_rnn_build_seq_info, @@ -78,8 +78,12 @@ def insert( value_preds=value_preds, ) - next_step = {k: v for k, v in next_step.items() if v is not None} - current_step = {k: v for k, v in current_step.items() if v is not None} + next_step = TensorDict( + {k: v for k, v in next_step.items() if v is not None} + ) + current_step = TensorDict( + {k: v for k, v in current_step.items() if v is not None} + ) if should_inserts is None: should_inserts = self._last_should_inserts @@ -102,7 +106,7 @@ def insert( self._cur_step_idxs[should_inserts] + 1, env_idxs[should_inserts], ), - next_step, + next_step[should_inserts], strict=False, ) @@ -112,7 +116,7 @@ def insert( self._cur_step_idxs[should_inserts], env_idxs[should_inserts], ), - current_step, + current_step[should_inserts], strict=False, ) self._last_should_inserts = should_inserts diff --git a/habitat-baselines/habitat_baselines/rl/ppo/policy.py b/habitat-baselines/habitat_baselines/rl/ppo/policy.py index 21e621488b..e31120c932 100644 --- a/habitat-baselines/habitat_baselines/rl/ppo/policy.py +++ b/habitat-baselines/habitat_baselines/rl/ppo/policy.py @@ -230,7 +230,7 @@ def act( action_log_probs = distribution.log_probs(action) return PolicyActionData( values=value, - actions=value, + actions=action, action_log_probs=action_log_probs, rnn_hidden_states=rnn_hidden_states, ) diff --git a/habitat-baselines/habitat_baselines/rl/ver/inference_worker.py b/habitat-baselines/habitat_baselines/rl/ver/inference_worker.py index e4880156d0..d8646af772 100644 --- a/habitat-baselines/habitat_baselines/rl/ver/inference_worker.py +++ b/habitat-baselines/habitat_baselines/rl/ver/inference_worker.py @@ -385,7 +385,7 @@ def step(self) -> Tuple[bool, List[Tuple[int, int]]]: masks=to_batch["masks"], observations=obs, actions=action_data.actions, - action_log_probs=action_data.actions_log_probs, + action_log_probs=action_data.action_log_probs, recurrent_hidden_states=recurrent_hidden_states, prev_actions=prev_actions, policy_version=self.rollouts.current_policy_version.expand( diff --git a/habitat-lab/habitat/config/habitat/simulator/agents/rgbd_head_agent.yaml b/habitat-lab/habitat/config/habitat/simulator/agents/rgbd_head_agent.yaml new file mode 100644 index 0000000000..338bbd2dd3 --- /dev/null +++ b/habitat-lab/habitat/config/habitat/simulator/agents/rgbd_head_agent.yaml @@ -0,0 +1,6 @@ +# @package habitat.simulator.agents.rgb_head_agent + +defaults: + - agent_base + - /habitat/simulator/sim_sensors@sim_sensors.head_rgb_sensor: head_rgb_sensor + - /habitat/simulator/sim_sensors@sim_sensors.head_depth_sensor: head_depth_sensor From e1a8727bb72a234b7ebe3680cdee99900d72149c Mon Sep 17 00:00:00 2001 From: ASzot Date: Wed, 1 Feb 2023 16:24:19 -0800 Subject: [PATCH 28/48] Fixed env issue --- .../habitat_baselines/config/rearrange/rl_hierarchical.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/habitat-baselines/habitat_baselines/config/rearrange/rl_hierarchical.yaml b/habitat-baselines/habitat_baselines/config/rearrange/rl_hierarchical.yaml index 56d78df85a..bb980e2713 100644 --- a/habitat-baselines/habitat_baselines/config/rearrange/rl_hierarchical.yaml +++ b/habitat-baselines/habitat_baselines/config/rearrange/rl_hierarchical.yaml @@ -19,7 +19,7 @@ habitat_baselines: torch_gpu_id: 0 video_fps: 30 eval_ckpt_path_dir: "" - num_environments: 3 + num_environments: 4 writer_type: 'tb' num_updates: -1 total_num_steps: 1.0e8 From 2673f4fc0feee886d9fd73841a2502f0f4dac6a7 Mon Sep 17 00:00:00 2001 From: ASzot Date: Thu, 2 Feb 2023 12:49:19 -0800 Subject: [PATCH 29/48] Fixed deprecated configs --- .../habitat_baselines/config/rearrange/rl_skill.yaml | 4 ++-- habitat-lab/habitat/config/README.md | 1 - habitat-lab/habitat/config/default_structured_configs.py | 4 ---- .../habitat/config/habitat/task/rearrange/pick_spa.yaml | 4 ---- habitat-lab/habitat/config/habitat/task/rearrange/play.yaml | 3 --- 5 files changed, 2 insertions(+), 14 deletions(-) diff --git a/habitat-baselines/habitat_baselines/config/rearrange/rl_skill.yaml b/habitat-baselines/habitat_baselines/config/rearrange/rl_skill.yaml index 3bfc43f59e..08d7a89c73 100644 --- a/habitat-baselines/habitat_baselines/config/rearrange/rl_skill.yaml +++ b/habitat-baselines/habitat_baselines/config/rearrange/rl_skill.yaml @@ -18,8 +18,8 @@ habitat_baselines: eval_ckpt_path_dir: "data/new_checkpoints" # 26 environments will just barely be below 16gb. # num_environments: 26 - # 20 environments will just barely be below 11gb. - num_environments: 20 + # 18 environments will just barely be below 11gb. + num_environments: 18 num_updates: -1 total_num_steps: 1.0e8 log_interval: 10 diff --git a/habitat-lab/habitat/config/README.md b/habitat-lab/habitat/config/README.md index 16a1783ab6..3f6dfb4565 100644 --- a/habitat-lab/habitat/config/README.md +++ b/habitat-lab/habitat/config/README.md @@ -241,7 +241,6 @@ habitat: constraint_violation_drops_object: false force_regenerate: false should_save_to_cache: true - must_look_at_targ: true object_in_hand_sample_prob: 0.167 render_target: true ee_sample_factor: 0.2 diff --git a/habitat-lab/habitat/config/default_structured_configs.py b/habitat-lab/habitat/config/default_structured_configs.py index f56dc7eb70..e7ef02d9ae 100644 --- a/habitat-lab/habitat/config/default_structured_configs.py +++ b/habitat-lab/habitat/config/default_structured_configs.py @@ -723,7 +723,6 @@ class TaskConfig(HabitatBaseConfig): force_regenerate: bool = False # Saves the generated starts to a cache if they are not already generated should_save_to_cache: bool = False - must_look_at_targ: bool = True object_in_hand_sample_prob: float = 0.167 min_start_distance: float = 3.0 gfx_replay_dir = "data/replays" @@ -746,9 +745,6 @@ class TaskConfig(HabitatBaseConfig): cache_robot_init: bool = False success_state: float = 0.0 # Measurements for composite tasks. - # If true, does not care about navigability or collisions - # with objects when spawning robot - easy_init: bool = False should_enforce_target_within_reach: bool = False # COMPOSITE task CONFIG task_spec_base_path: str = "habitat/task/rearrange/pddl/" diff --git a/habitat-lab/habitat/config/habitat/task/rearrange/pick_spa.yaml b/habitat-lab/habitat/config/habitat/task/rearrange/pick_spa.yaml index f720718123..a9879d7c44 100644 --- a/habitat-lab/habitat/config/habitat/task/rearrange/pick_spa.yaml +++ b/habitat-lab/habitat/config/habitat/task/rearrange/pick_spa.yaml @@ -36,10 +36,6 @@ base_angle_noise: 0.15 base_noise: 0.05 force_regenerate: False -# If true, does not care about navigability or collisions with objects when spawning -# robot -easy_init: False - actions: arm_action: arm_controller: "ArmAbsPosKinematicAction" diff --git a/habitat-lab/habitat/config/habitat/task/rearrange/play.yaml b/habitat-lab/habitat/config/habitat/task/rearrange/play.yaml index 639e87ecd1..74d4304263 100644 --- a/habitat-lab/habitat/config/habitat/task/rearrange/play.yaml +++ b/habitat-lab/habitat/config/habitat/task/rearrange/play.yaml @@ -26,9 +26,6 @@ base_angle_noise: 0.0 base_noise: 0.0 constraint_violation_ends_episode: False -# If true, does not care about navigability or collisions -# with objects when spawning robot -easy_init: False force_regenerate: True actions: From dd01d50525d847a6a7bb77acaee60afe066f8b8c Mon Sep 17 00:00:00 2001 From: ASzot Date: Thu, 2 Feb 2023 16:05:47 -0800 Subject: [PATCH 30/48] Speed fix --- .../config/default_structured_configs.py | 1 + .../habitat/tasks/rearrange/rearrange_sim.py | 26 ++++++++++++++++++- 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/habitat-lab/habitat/config/default_structured_configs.py b/habitat-lab/habitat/config/default_structured_configs.py index c8d4369172..421ecb313e 100644 --- a/habitat-lab/habitat/config/default_structured_configs.py +++ b/habitat-lab/habitat/config/default_structured_configs.py @@ -1074,6 +1074,7 @@ class SimulatorConfig(HabitatBaseConfig): create_renderer: bool = False requires_textures: bool = True auto_sleep: bool = False + sleep_dist: float = 3.0 step_physics: bool = True concur_render: bool = False # If markers should be updated at every step: diff --git a/habitat-lab/habitat/tasks/rearrange/rearrange_sim.py b/habitat-lab/habitat/tasks/rearrange/rearrange_sim.py index 3c6bf26661..e6e279fb94 100644 --- a/habitat-lab/habitat/tasks/rearrange/rearrange_sim.py +++ b/habitat-lab/habitat/tasks/rearrange/rearrange_sim.py @@ -143,6 +143,22 @@ def _try_acquire_context(self): if self.habitat_config.concur_render: self.renderer.acquire_gl_context() + def _auto_sleep(self): + all_robo_pos = [ + robot.base_pos for robot in self.robots_mgr.robots_iter + ] + rom = self.get_rigid_object_manager() + for handle, ro in rom.get_objects_by_handle_substring().items(): + is_far = any( + (robo_pos - ro.translation).length() + > self.habitat_config.sleep_dist + for robo_pos in all_robo_pos + ) + if is_far: + ro.motion_type = habitat_sim.physics.MotionType.STATIC + else: + ro.motion_type = self._obj_orig_motion_types[handle] + def sleep_all_objects(self): """ De-activate (sleep) all rigid objects in the scene, assuming they are already in a dynamically stable state. @@ -150,6 +166,7 @@ def sleep_all_objects(self): rom = self.get_rigid_object_manager() for _, ro in rom.get_objects_by_handle_substring().items(): ro.awake = False + aom = self.get_articulated_object_manager() for _, ao in aom.get_objects_by_handle_substring().items(): ao.awake = False @@ -242,6 +259,12 @@ def reconfigure(self, config: "DictConfig", ep_info: RearrangeEpisode): if self.habitat_config.auto_sleep: self.sleep_all_objects() + rom = self.get_rigid_object_manager() + self._obj_orig_motion_types = { + handle: ro.motion_type + for handle, ro in rom.get_objects_by_handle_substring().items() + } + if new_scene: self._load_navmesh(ep_info) @@ -662,6 +685,8 @@ def step(self, action: Union[str, int]) -> Observations: self.viz_ids = defaultdict(lambda: None) self.maybe_update_robot() + if self.habitat_config.sleep_dist > 0.0: + self._auto_sleep() if self.habitat_config.concur_render: self._prev_sim_obs = self.start_async_render() @@ -751,7 +776,6 @@ def internal_step( Never call sim.step_world directly or miss updating the robot. """ - # optionally step physics and update the robot for benchmarking purposes if self.habitat_config.step_physics: self.step_world(dt) From b20fcb177e0934ffb5095e3a24e100fd288d3199 Mon Sep 17 00:00:00 2001 From: ASzot Date: Thu, 2 Feb 2023 18:44:30 -0800 Subject: [PATCH 31/48] Updated configs --- .../defined_skills/oracle_skills.yaml | 2 +- habitat-lab/habitat/config/default_structured_configs.py | 9 +-------- 2 files changed, 2 insertions(+), 9 deletions(-) diff --git a/habitat-baselines/habitat_baselines/config/habitat_baselines/rl/policy/hierarchical_policy/defined_skills/oracle_skills.yaml b/habitat-baselines/habitat_baselines/config/habitat_baselines/rl/policy/hierarchical_policy/defined_skills/oracle_skills.yaml index 98985a5eec..b7b6357e48 100644 --- a/habitat-baselines/habitat_baselines/config/habitat_baselines/rl/policy/hierarchical_policy/defined_skills/oracle_skills.yaml +++ b/habitat-baselines/habitat_baselines/config/habitat_baselines/rl/policy/hierarchical_policy/defined_skills/oracle_skills.yaml @@ -1,5 +1,5 @@ # Oracle skills that will teleport to the skill post-condition. When automatically setting predicates you may want to run the simulation in kinematic mode: -# To run in kinematic mode, add: `habitat.simulator.kinematic_mode=True habitat.simulator.ac_freq_ratio=1` +# To run in kinematic mode, add: `habitat.simulator.kinematic_mode=True habitat.simulator.ac_freq_ratio=1 habitat.task.measurements.force_terminate.max_accum_force=-1.0 habitat.task.measurements.force_terminate.max_instant_force=-1.0` open_cab: skill_name: "NoopSkillPolicy" diff --git a/habitat-lab/habitat/config/default_structured_configs.py b/habitat-lab/habitat/config/default_structured_configs.py index 421ecb313e..9e2862b6fe 100644 --- a/habitat-lab/habitat/config/default_structured_configs.py +++ b/habitat-lab/habitat/config/default_structured_configs.py @@ -780,13 +780,6 @@ class CompositeSuccessMeasurementConfig(MeasurementConfig): must_call_stop: bool = True -@dataclass -class CompositeRewardMeasurementConfig(MeasurementConfig): - type: str = "CompositeReward" - must_call_stop: bool = True - success_reward: float = 10.0 - - @dataclass class DoesWantTerminateMeasurementConfig(MeasurementConfig): type: str = "DoesWantTerminate" @@ -1074,7 +1067,7 @@ class SimulatorConfig(HabitatBaseConfig): create_renderer: bool = False requires_textures: bool = True auto_sleep: bool = False - sleep_dist: float = 3.0 + sleep_dist: float = -1.0 step_physics: bool = True concur_render: bool = False # If markers should be updated at every step: From 7982f4059ce7f695f6fc2664580bb5256142750d Mon Sep 17 00:00:00 2001 From: ASzot Date: Fri, 3 Feb 2023 12:54:13 -0800 Subject: [PATCH 32/48] Pddl action fixes --- .../hierarchical_policy/defined_skills/oracle_skills.yaml | 4 ++++ .../config/rearrange/rl_hierarchical_oracle_nav.yaml | 1 - habitat-baselines/habitat_baselines/rl/hrl/skills/skill.py | 3 +++ .../habitat/config/habitat/task/rearrange/rearrange_easy.yaml | 1 + 4 files changed, 8 insertions(+), 1 deletion(-) diff --git a/habitat-baselines/habitat_baselines/config/habitat_baselines/rl/policy/hierarchical_policy/defined_skills/oracle_skills.yaml b/habitat-baselines/habitat_baselines/config/habitat_baselines/rl/policy/hierarchical_policy/defined_skills/oracle_skills.yaml index b7b6357e48..27cb7b8060 100644 --- a/habitat-baselines/habitat_baselines/config/habitat_baselines/rl/policy/hierarchical_policy/defined_skills/oracle_skills.yaml +++ b/habitat-baselines/habitat_baselines/config/habitat_baselines/rl/policy/hierarchical_policy/defined_skills/oracle_skills.yaml @@ -1,6 +1,10 @@ # Oracle skills that will teleport to the skill post-condition. When automatically setting predicates you may want to run the simulation in kinematic mode: # To run in kinematic mode, add: `habitat.simulator.kinematic_mode=True habitat.simulator.ac_freq_ratio=1 habitat.task.measurements.force_terminate.max_accum_force=-1.0 habitat.task.measurements.force_terminate.max_instant_force=-1.0` +defaults: + - /habitat/task/actions: + - pddl_apply_action + open_cab: skill_name: "NoopSkillPolicy" max_skill_steps: 1 diff --git a/habitat-baselines/habitat_baselines/config/rearrange/rl_hierarchical_oracle_nav.yaml b/habitat-baselines/habitat_baselines/config/rearrange/rl_hierarchical_oracle_nav.yaml index 92b80bea64..0f6ddbdbfb 100644 --- a/habitat-baselines/habitat_baselines/config/rearrange/rl_hierarchical_oracle_nav.yaml +++ b/habitat-baselines/habitat_baselines/config/rearrange/rl_hierarchical_oracle_nav.yaml @@ -9,7 +9,6 @@ defaults: - rl_hierarchical - /habitat/task/actions: - - pddl_apply_action - oracle_nav_action - _self_ diff --git a/habitat-baselines/habitat_baselines/rl/hrl/skills/skill.py b/habitat-baselines/habitat_baselines/rl/hrl/skills/skill.py index b912640359..0ceadb67ee 100644 --- a/habitat-baselines/habitat_baselines/rl/hrl/skills/skill.py +++ b/habitat-baselines/habitat_baselines/rl/hrl/skills/skill.py @@ -48,6 +48,9 @@ def __init__( ) else: self._pddl_ac_start = None + if self._config.apply_postconds and self._pddl_ac_start is None: + raise ValueError(f"Could not find PDDL action in skill {self}") + self._delay_term: List[Optional[bool]] = [ None for _ in range(self._batch_size) ] diff --git a/habitat-lab/habitat/config/habitat/task/rearrange/rearrange_easy.yaml b/habitat-lab/habitat/config/habitat/task/rearrange/rearrange_easy.yaml index c138ca2238..e7dff55b48 100644 --- a/habitat-lab/habitat/config/habitat/task/rearrange/rearrange_easy.yaml +++ b/habitat-lab/habitat/config/habitat/task/rearrange/rearrange_easy.yaml @@ -19,6 +19,7 @@ defaults: - did_violate_hold_constraint - move_objects_reward - gfx_replay_measure + - composite_stage_goals - /habitat/task/lab_sensors: - relative_resting_pos_sensor - target_start_sensor From 902bfa145ce3cfb5d00bc973e0cfe97aade00ae0 Mon Sep 17 00:00:00 2001 From: ASzot Date: Fri, 3 Feb 2023 20:14:49 -0800 Subject: [PATCH 33/48] Removed speed opts. Fixed some bugs --- .../config/default_structured_configs.py | 3 + .../defined_skills/oracle_skills.yaml | 10 +- .../rl/policy/hl_neural.yaml | 9 ++ .../rl/hrl/hl/neural_policy.py | 7 ++ .../habitat_baselines/rl/hrl/skills/place.py | 1 - .../habitat_baselines/rl/hrl/skills/skill.py | 32 +++--- .../config/default_structured_configs.py | 1 - .../domain_configs/replica_cad.yaml | 104 ++++++++++++++++++ .../habitat/tasks/rearrange/rearrange_sim.py | 18 --- 9 files changed, 151 insertions(+), 34 deletions(-) diff --git a/habitat-baselines/habitat_baselines/config/default_structured_configs.py b/habitat-baselines/habitat_baselines/config/default_structured_configs.py index 99e4e8d781..6563f3639b 100644 --- a/habitat-baselines/habitat_baselines/config/default_structured_configs.py +++ b/habitat-baselines/habitat_baselines/config/default_structured_configs.py @@ -251,6 +251,9 @@ class HrlDefinedSkillConfig(HabitatBaselinesBaseConfig): stop_thresh: float = 0.001 # For the reset_arm_skill reset_joint_state: List[float] = MISSING + # The set of PDDL action names (as defined in the PDDL domain file) that + # map to this skill. If not specified,the name of the skill must match the + # PDDL action name. pddl_action_names: Optional[List[str]] = None diff --git a/habitat-baselines/habitat_baselines/config/habitat_baselines/rl/policy/hierarchical_policy/defined_skills/oracle_skills.yaml b/habitat-baselines/habitat_baselines/config/habitat_baselines/rl/policy/hierarchical_policy/defined_skills/oracle_skills.yaml index 27cb7b8060..ab4bae46ef 100644 --- a/habitat-baselines/habitat_baselines/config/habitat_baselines/rl/policy/hierarchical_policy/defined_skills/oracle_skills.yaml +++ b/habitat-baselines/habitat_baselines/config/habitat_baselines/rl/policy/hierarchical_policy/defined_skills/oracle_skills.yaml @@ -9,22 +9,30 @@ open_cab: skill_name: "NoopSkillPolicy" max_skill_steps: 1 apply_postconds: True + force_end_on_timeout: False + pddl_action_names: ["open_cab_by_name"] open_fridge: skill_name: "NoopSkillPolicy" max_skill_steps: 1 apply_postconds: True + force_end_on_timeout: False + pddl_action_names: ["open_fridge_by_name"] close_cab: skill_name: "NoopSkillPolicy" obs_skill_inputs: ["obj_start_sensor"] max_skill_steps: 1 + force_end_on_timeout: False + pddl_action_names: ["close_cab_by_name"] close_fridge: skill_name: "NoopSkillPolicy" obs_skill_inputs: ["obj_start_sensor"] max_skill_steps: 1 apply_postconds: True + force_end_on_timeout: False + pddl_action_names: ["close_fridge_by_name"] pick: skill_name: "NoopSkillPolicy" @@ -51,7 +59,7 @@ nav_to_obj: apply_postconds: True force_end_on_timeout: False obs_skill_input_dim: 2 - pddl_action_names: ["nav", "nav_to_receptacle"] + pddl_action_names: ["nav", "nav_to_receptacle_by_name"] reset_arm: skill_name: "ResetArmSkill" diff --git a/habitat-baselines/habitat_baselines/config/habitat_baselines/rl/policy/hl_neural.yaml b/habitat-baselines/habitat_baselines/config/habitat_baselines/rl/policy/hl_neural.yaml index 2e7d9a4c87..1ede79538b 100644 --- a/habitat-baselines/habitat_baselines/config/habitat_baselines/rl/policy/hl_neural.yaml +++ b/habitat-baselines/habitat_baselines/config/habitat_baselines/rl/policy/hl_neural.yaml @@ -6,6 +6,15 @@ obs_transforms: hierarchical_policy: high_level_policy: name: "NeuralHighLevelPolicy" + allowed_actions: + - nav + - pick + - place + - nav_to_receptacle_by_name + - open_fridge_by_name + - close_fridge_by_name + - open_cab_by_name + - close_cab_by_name allow_other_place: False hidden_dim: 512 use_rnn: True diff --git a/habitat-baselines/habitat_baselines/rl/hrl/hl/neural_policy.py b/habitat-baselines/habitat_baselines/rl/hrl/hl/neural_policy.py index 15933f7354..d06f2d7d90 100644 --- a/habitat-baselines/habitat_baselines/rl/hrl/hl/neural_policy.py +++ b/habitat-baselines/habitat_baselines/rl/hrl/hl/neural_policy.py @@ -1,3 +1,4 @@ +import logging from itertools import chain from typing import Any, List @@ -7,6 +8,7 @@ import torch.nn as nn from habitat.tasks.rearrange.multi_task.pddl_action import PddlAction +from habitat_baselines.common.logging import baselines_logger from habitat_baselines.rl.ddppo.policy import resnet from habitat_baselines.rl.ddppo.policy.resnet_policy import ResNetEncoder from habitat_baselines.rl.hrl.hl.high_level_policy import HighLevelPolicy @@ -81,6 +83,9 @@ def __init__(self, *args, **kwargs): def _setup_actions(self) -> List[PddlAction]: all_actions = self._pddl_prob.get_possible_actions() + all_actions = [ + ac for ac in all_actions if ac.name in self._config.allowed_actions + ] if not self._config.allow_other_place: all_actions = [ ac @@ -191,6 +196,8 @@ def get_next_skill( if should_plan != 1.0: continue use_ac = self._all_actions[skill_sel[batch_idx]] + if baselines_logger.level >= logging.DEBUG: + baselines_logger.debug(f"HL Policy selected skill {use_ac}") next_skill[batch_idx] = self._skill_name_to_idx[use_ac.name] skill_args_data[batch_idx] = [ entity.name for entity in use_ac.param_values diff --git a/habitat-baselines/habitat_baselines/rl/hrl/skills/place.py b/habitat-baselines/habitat_baselines/rl/hrl/skills/place.py index a8d069910c..a99ac1a8e1 100644 --- a/habitat-baselines/habitat_baselines/rl/hrl/skills/place.py +++ b/habitat-baselines/habitat_baselines/rl/hrl/skills/place.py @@ -46,7 +46,6 @@ def _is_skill_done( if is_done.sum() > 0: self._internal_log( f"Terminating with {rel_resting_pos} and {is_holding}", - observations, ) return is_done diff --git a/habitat-baselines/habitat_baselines/rl/hrl/skills/skill.py b/habitat-baselines/habitat_baselines/rl/hrl/skills/skill.py index 0ceadb67ee..6bc7777806 100644 --- a/habitat-baselines/habitat_baselines/rl/hrl/skills/skill.py +++ b/habitat-baselines/habitat_baselines/rl/hrl/skills/skill.py @@ -2,6 +2,7 @@ # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. +import logging from typing import Any, Dict, List, Optional, Tuple import gym.spaces as spaces @@ -71,7 +72,7 @@ def __init__( action_space, "rearrange_stop" ) - def _internal_log(self, s, observations=None): + def _internal_log(self, s): baselines_logger.debug( f"Skill {self._config.skill_name} @ step {self._cur_skill_step}: {s}" ) @@ -181,7 +182,6 @@ def should_terminate( if is_skill_done.sum() > 0: self._internal_log( f"Requested skill termination {is_skill_done}", - observations, ) cur_skill_step = self._cur_skill_step[batch_idx] @@ -192,33 +192,35 @@ def should_terminate( dtype=torch.bool, ) if self._config.max_skill_steps > 0: - over_max_len = cur_skill_step > self._config.max_skill_steps + over_max_len = cur_skill_step >= self._config.max_skill_steps if self._config.force_end_on_timeout: bad_terminate = over_max_len else: is_skill_done = is_skill_done | over_max_len + + is_skill_done |= hl_policy_wants_termination + new_actions = torch.zeros_like(actions) for i, env_i in enumerate(batch_idx): if self._delay_term[env_i]: + self._internal_log( + "Terminating skill due to delayed termination." + ) self._delay_term[env_i] = False - is_skill_done[i] = 1.0 - elif ( - self._config.apply_postconds - and is_skill_done[i] == 1.0 - and hl_policy_wants_termination[i] == 0.0 - ): + is_skill_done[i] = True + elif self._config.apply_postconds and is_skill_done[i]: new_actions[i] = self._apply_postcond( actions, log_info, skill_name[i], env_i, i ) self._delay_term[env_i] = True - is_skill_done[i] = 0.0 - - is_skill_done |= hl_policy_wants_termination + is_skill_done[i] = False + self._internal_log( + "Applying PDDL action and terminating on the next step." + ) if bad_terminate.sum() > 0: self._internal_log( f"Bad terminating due to timeout {cur_skill_step}, {bad_terminate}", - observations, ) return is_skill_done, bad_terminate, new_actions @@ -238,6 +240,10 @@ def on_enter( self._cur_skill_step[batch_idxs] = 0 for i, batch_idx in enumerate(batch_idxs): self._raw_skill_args[batch_idx] = skill_arg[i] + if baselines_logger.level >= logging.DEBUG: + baselines_logger.debug( + f"Entering skill {self} with arguments {skill_arg[i]}" + ) self._cur_skill_args[batch_idx] = self._parse_skill_arg( skill_arg[i] ) diff --git a/habitat-lab/habitat/config/default_structured_configs.py b/habitat-lab/habitat/config/default_structured_configs.py index 9e2862b6fe..8940845ba4 100644 --- a/habitat-lab/habitat/config/default_structured_configs.py +++ b/habitat-lab/habitat/config/default_structured_configs.py @@ -1067,7 +1067,6 @@ class SimulatorConfig(HabitatBaseConfig): create_renderer: bool = False requires_textures: bool = True auto_sleep: bool = False - sleep_dist: float = -1.0 step_physics: bool = True concur_render: bool = False # If markers should be updated at every step: diff --git a/habitat-lab/habitat/tasks/rearrange/multi_task/domain_configs/replica_cad.yaml b/habitat-lab/habitat/tasks/rearrange/multi_task/domain_configs/replica_cad.yaml index 2de18d0c9b..20a15150fa 100644 --- a/habitat-lab/habitat/tasks/rearrange/multi_task/domain_configs/replica_cad.yaml +++ b/habitat-lab/habitat/tasks/rearrange/multi_task/domain_configs/replica_cad.yaml @@ -310,3 +310,107 @@ actions: config_args: task.base_angle_noise: 0.0 task.spawn_region_scale: 0.0 + + ######################################################### + # Receptacle name only based variants of the receptacle skills. This does not + # require any information about knowing which objects the receptacle + # contains. + - name: nav_to_receptacle_by_name + parameters: + - name: marker + expr_type: art_obj_type + - name: robot + expr_type: robot_type + precondition: null + postcondition: + - robot_at(marker, robot) + task_info: + task: NavToObjTask-v0 + task_def: "nav_to_obj" + config_args: + task.force_regenerate: True + task.should_save_to_cache: False + + - name: open_fridge_by_name + parameters: + - name: fridge_id + expr_type: fridge_type + - name: robot + expr_type: robot_type + precondition: + expr_type: AND + sub_exprs: + - robot_at(fridge_id, robot) + - closed_fridge(fridge_id) + postcondition: + - opened_fridge(fridge_id) + task_info: + task_def: "open_fridge" + task: RearrangeOpenFridgeTask-v0 + add_task_args: + marker: fridge_id + config_args: + task.base_angle_noise: 0.0 + task.spawn_region_scale: 0.0 + + - name: close_fridge_by_name + parameters: + - name: fridge_id + expr_type: fridge_type + - name: robot + expr_type: robot_type + precondition: + expr_type: AND + sub_exprs: + - robot_at(fridge_id, robot) + - opened_fridge(fridge_id) + postcondition: + - closed_fridge(fridge_id) + task_info: + task_def: "close_fridge" + task: RearrangeCloseFridgeTask-v0 + add_task_args: + marker: fridge_id + config_args: + task.base_angle_noise: 0.0 + task.spawn_region_scale: 0.0 + + - name: open_cab_by_name + parameters: + - name: marker + expr_type: cab_type + - name: robot + expr_type: robot_type + precondition: + expr_type: AND + sub_exprs: + - robot_at(marker, robot) + - closed_cab(marker) + postcondition: + - opened_cab(marker) + task_info: + task_def: "open_cab" + task: RearrangeOpenDrawerTask-v0 + config_args: + task.base_angle_noise: 0.0 + task.spawn_region_scale: 0.0 + + - name: close_cab_by_name + parameters: + - name: marker + expr_type: cab_type + - name: robot + expr_type: robot_type + precondition: + expr_type: AND + sub_exprs: + - robot_at(marker, robot) + - opened_cab(marker) + postcondition: + - closed_cab(marker) + task_info: + task_def: "close_cab" + task: RearrangeCloseDrawerTask-v0 + config_args: + task.base_angle_noise: 0.0 + task.spawn_region_scale: 0.0 diff --git a/habitat-lab/habitat/tasks/rearrange/rearrange_sim.py b/habitat-lab/habitat/tasks/rearrange/rearrange_sim.py index e6e279fb94..b9bd60daee 100644 --- a/habitat-lab/habitat/tasks/rearrange/rearrange_sim.py +++ b/habitat-lab/habitat/tasks/rearrange/rearrange_sim.py @@ -143,22 +143,6 @@ def _try_acquire_context(self): if self.habitat_config.concur_render: self.renderer.acquire_gl_context() - def _auto_sleep(self): - all_robo_pos = [ - robot.base_pos for robot in self.robots_mgr.robots_iter - ] - rom = self.get_rigid_object_manager() - for handle, ro in rom.get_objects_by_handle_substring().items(): - is_far = any( - (robo_pos - ro.translation).length() - > self.habitat_config.sleep_dist - for robo_pos in all_robo_pos - ) - if is_far: - ro.motion_type = habitat_sim.physics.MotionType.STATIC - else: - ro.motion_type = self._obj_orig_motion_types[handle] - def sleep_all_objects(self): """ De-activate (sleep) all rigid objects in the scene, assuming they are already in a dynamically stable state. @@ -685,8 +669,6 @@ def step(self, action: Union[str, int]) -> Observations: self.viz_ids = defaultdict(lambda: None) self.maybe_update_robot() - if self.habitat_config.sleep_dist > 0.0: - self._auto_sleep() if self.habitat_config.concur_render: self._prev_sim_obs = self.start_async_render() From 74278bde54f476188039215dffd5bcc8d48504d3 Mon Sep 17 00:00:00 2001 From: ASzot Date: Sat, 4 Feb 2023 11:13:09 -0800 Subject: [PATCH 34/48] Fixed rendering text to the frame --- .../habitat_baselines/config/rearrange/rl_hierarchical.yaml | 4 ++-- habitat-baselines/habitat_baselines/rl/ppo/ppo_trainer.py | 4 +--- habitat-lab/habitat/utils/render_wrapper.py | 4 ++++ 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/habitat-baselines/habitat_baselines/config/rearrange/rl_hierarchical.yaml b/habitat-baselines/habitat_baselines/config/rearrange/rl_hierarchical.yaml index bb980e2713..611a64c2ec 100644 --- a/habitat-baselines/habitat_baselines/config/rearrange/rl_hierarchical.yaml +++ b/habitat-baselines/habitat_baselines/config/rearrange/rl_hierarchical.yaml @@ -22,9 +22,9 @@ habitat_baselines: num_environments: 4 writer_type: 'tb' num_updates: -1 - total_num_steps: 1.0e8 + total_num_steps: 5.0e7 log_interval: 10 - num_checkpoints: 20 + num_checkpoints: 10 force_torch_single_threaded: True eval_keys_to_include_in_name: ['reward', 'force', 'composite_success'] load_resume_state_config: False diff --git a/habitat-baselines/habitat_baselines/rl/ppo/ppo_trainer.py b/habitat-baselines/habitat_baselines/rl/ppo/ppo_trainer.py index cf4dbcfe92..eb5577a917 100644 --- a/habitat-baselines/habitat_baselines/rl/ppo/ppo_trainer.py +++ b/habitat-baselines/habitat_baselines/rl/ppo/ppo_trainer.py @@ -1103,9 +1103,7 @@ def _eval_checkpoint( frame = observations_to_image( {k: v[i] * 0.0 for k, v in batch.items()}, infos[i] ) - frame = overlay_frame( - frame, extract_scalars_from_info(infos[i]) - ) + frame = overlay_frame(frame, infos[i]) rgb_frames[i].append(frame) # episode ended diff --git a/habitat-lab/habitat/utils/render_wrapper.py b/habitat-lab/habitat/utils/render_wrapper.py index 8000093d99..63ad257b6c 100644 --- a/habitat-lab/habitat/utils/render_wrapper.py +++ b/habitat-lab/habitat/utils/render_wrapper.py @@ -66,6 +66,10 @@ def append_text_to_image( def overlay_frame(frame, info, additional=None): + """ + Renders text from the `info` dictionary to the `frame` image. + """ + lines = [] flattened_info = flatten_dict(info) for k, v in flattened_info.items(): From 49a71a42064d39a36a40a5c981a635714fc390ff Mon Sep 17 00:00:00 2001 From: ASzot Date: Sat, 4 Feb 2023 11:27:17 -0800 Subject: [PATCH 35/48] Addressing Vince's PR comments --- habitat-baselines/habitat_baselines/README.md | 13 +++++++------ .../config/rearrange/rl_rearrange.yaml | 2 +- ...4193555.s-164-67-210-79.resnet.ucla.edu.58058.0 | Bin 40 -> 0 bytes 3 files changed, 8 insertions(+), 7 deletions(-) delete mode 100644 habitat-baselines/tb/events.out.tfevents.1674193555.s-164-67-210-79.resnet.ucla.edu.58058.0 diff --git a/habitat-baselines/habitat_baselines/README.md b/habitat-baselines/habitat_baselines/README.md index bb0319da10..86cfd34ab7 100644 --- a/habitat-baselines/habitat_baselines/README.md +++ b/habitat-baselines/habitat_baselines/README.md @@ -62,17 +62,18 @@ To run the following examples, you need the [ReplicaCAD dataset](https://github. To train a high-level policy, while using pre-learned low-level skills (SRL baseline from [Habitat2.0](https://arxiv.org/abs/2106.14405)), you can run: ```bash -python -u habitat_baselines/run.py \ - --exp-config habitat_baselines/config/rearrange/rl_hl_srl_onav.yaml \ +python -u habitat-baselines/habitat_baselines/run.py \ + --exp-config habitat-baselines/habitat_baselines/config/rearrange/rl_hierarchical.yaml \ --run-type train ``` -To run a rearrangement episode with oracle in both low- and high-level, you can run: +To run a rearrangement episode with oracle low-level skills and a fixed task planner, run: ```bash -python -u habitat_baselines/run.py \ - --exp-config habitat_baselines/config/rearrange/rl_hl_srl_onav.yaml \ +python -u habitat-baselines/habitat_baselines/run.py \ + --exp-config habitat-baselines/habitat_baselines/config/rearrange/rl_hierarchical.yaml \ --run-type eval \ - habitat_baselines/rl/policy=hierarchical_tp_noop_onav + habitat_baselines/rl/policy=hl_fixed \ + habitat_baselines/rl/policy/hierarchical_policy/defined_skills=oracle_skills ``` To change the task (like set table) that you train your skills on, you can change the line `/habitat/task/rearrange: rearrange_easy` to `/habitat/task/rearrange: set_table` in the defaults of your config. diff --git a/habitat-baselines/habitat_baselines/config/rearrange/rl_rearrange.yaml b/habitat-baselines/habitat_baselines/config/rearrange/rl_rearrange.yaml index d68be219bb..30dfdcd50e 100644 --- a/habitat-baselines/habitat_baselines/config/rearrange/rl_rearrange.yaml +++ b/habitat-baselines/habitat_baselines/config/rearrange/rl_rearrange.yaml @@ -2,8 +2,8 @@ defaults: - /benchmark/rearrange: rearrange - - /habitat_baselines/rl/policy: monolithic - /habitat_baselines: habitat_baselines_rl_config_base + - /habitat_baselines/rl/policy: monolithic - /habitat/simulator/sim_sensors@habitat_baselines.eval.extra_sim_sensors.third_rgb_sensor: third_rgb_sensor - _self_ diff --git a/habitat-baselines/tb/events.out.tfevents.1674193555.s-164-67-210-79.resnet.ucla.edu.58058.0 b/habitat-baselines/tb/events.out.tfevents.1674193555.s-164-67-210-79.resnet.ucla.edu.58058.0 deleted file mode 100644 index 11853e6cc35c6fa02416a8cca40abb3d004e18ad..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 40 rcmb1OfPlsI-b$P&Paay*_34JA6mL>dVrHJ6YguYuiIvgv2lx8`_;n8f From b41133ad1db34fd4b88de15a93cce5daab88387a Mon Sep 17 00:00:00 2001 From: ASzot Date: Sat, 4 Feb 2023 15:25:51 -0800 Subject: [PATCH 36/48] Refactored navigation to be much clearer --- .../rl/hrl/skills/oracle_nav.py | 54 +++++-------------- .../rearrange/actions/oracle_nav_action.py | 45 ++++++++-------- test/test_baseline_training.py | 1 + 3 files changed, 38 insertions(+), 62 deletions(-) diff --git a/habitat-baselines/habitat_baselines/rl/hrl/skills/oracle_nav.py b/habitat-baselines/habitat_baselines/rl/hrl/skills/oracle_nav.py index be26af308a..fe1364ae51 100644 --- a/habitat-baselines/habitat_baselines/rl/hrl/skills/oracle_nav.py +++ b/habitat-baselines/habitat_baselines/rl/hrl/skills/oracle_nav.py @@ -8,11 +8,7 @@ import torch from habitat.core.spaces import ActionSpace -from habitat.tasks.rearrange.actions.oracle_nav_action import ( - get_possible_nav_to_actions, -) from habitat.tasks.rearrange.multi_task.pddl_domain import PddlProblem -from habitat.tasks.rearrange.multi_task.rearrange_pddl import RIGID_OBJ_TYPE from habitat.tasks.rearrange.rearrange_sensors import LocalizationSensor from habitat_baselines.common.logging import baselines_logger from habitat_baselines.rl.hrl.skills.nn_skill import NnSkillPolicy @@ -23,9 +19,11 @@ class OracleNavPolicy(NnSkillPolicy): @dataclass class OracleNavActionArgs: + """ + :property action_idx: The index of the oracle action we want to execute + """ + action_idx: int - is_target_obj: bool - target_idx: int def __init__( self, @@ -53,7 +51,7 @@ def __init__( pddl_task_path, task_config, ) - self._poss_actions = get_possible_nav_to_actions(self._pddl_problem) + self._all_entities = self._pddl_problem.get_ordered_entities_list() self._oracle_nav_ac_idx, _ = find_action_range( action_space, "oracle_nav_action" ) @@ -130,47 +128,23 @@ def _is_skill_done( return ret def _parse_skill_arg(self, skill_arg): - marker = None if len(skill_arg) == 2: - targ_obj, _ = skill_arg + search_target, _ = skill_arg elif len(skill_arg) == 3: - marker, targ_obj, _ = skill_arg + _, search_target, _ = skill_arg else: raise ValueError( f"Unexpected number of skill arguments in {skill_arg}" ) - targ_obj_idx = int(targ_obj.split("|")[-1]) - - targ_obj = self._pddl_problem.get_entity(targ_obj) - if marker is not None: - marker = self._pddl_problem.get_entity(marker) - - match_i = None - for i, action in enumerate(self._poss_actions): - match_obj = action.get_arg_value("obj") - if marker is not None: - match_marker = action.get_arg_value("marker") - if match_marker != marker: - continue - if match_obj != targ_obj: - continue - match_i = i - break - if match_i is None: - raise ValueError(f"Cannot find matching action for {skill_arg}") - is_target_obj = targ_obj.expr_type.is_subtype_of( - self._pddl_problem.expr_types[RIGID_OBJ_TYPE] - ) - return OracleNavPolicy.OracleNavActionArgs( - match_i, is_target_obj, targ_obj_idx - ) - - def _get_multi_sensor_index(self, batch_idx): - return [self._cur_skill_args[i].target_idx for i in batch_idx] + target = self._pddl_problem.get_entity(search_target) + if target is None: + raise ValueError( + f"Cannot find matching entity for {search_target}" + ) + match_i = self._all_entities.index(target) - def requires_rnn_state(self): - return False + return OracleNavPolicy.OracleNavActionArgs(match_i) def _internal_act( self, diff --git a/habitat-lab/habitat/tasks/rearrange/actions/oracle_nav_action.py b/habitat-lab/habitat/tasks/rearrange/actions/oracle_nav_action.py index 19523e04c1..521aa5a5b7 100644 --- a/habitat-lab/habitat/tasks/rearrange/actions/oracle_nav_action.py +++ b/habitat-lab/habitat/tasks/rearrange/actions/oracle_nav_action.py @@ -14,32 +14,34 @@ from habitat.tasks.utils import get_angle -def compute_turn(rel, turn_vel, robot_forward): - is_left = np.cross(robot_forward, rel) > 0 - if is_left: - vel = [0, -turn_vel] - else: - vel = [0, turn_vel] - return vel - - -def get_possible_nav_to_actions(pddl_problem): - return pddl_problem.get_possible_actions( - allowed_action_names=["nav", "nav_to_receptacle"], - true_preds=None, - ) - - @registry.register_task_action class OracleNavAction(BaseVelAction): + """ + An action that will convert the index of an entity (in the sense of + `PddlEntity`) to navigate to and convert this to base control to move the + robot to the closest navigable position to that entity. The entity index is + the index into the list of all available entities in the current scene. + """ + def __init__(self, *args, task, **kwargs): super().__init__(*args, **kwargs) self._task = task - self._poss_actions = get_possible_nav_to_actions(task.pddl_problem) + self._poss_entities = ( + self._task.pddl_problem.get_ordered_entities_list() + ) self._prev_ep_id = None self._targets = {} + @staticmethod + def _compute_turn(rel, turn_vel, robot_forward): + is_left = np.cross(robot_forward, rel) > 0 + if is_left: + vel = [0, -turn_vel] + else: + vel = [0, turn_vel] + return vel + @property def action_space(self): return spaces.Dict( @@ -62,8 +64,7 @@ def reset(self, *args, **kwargs): def _get_target_for_idx(self, nav_to_target_idx: int): if nav_to_target_idx not in self._targets: - action = self._poss_actions[nav_to_target_idx] - nav_to_obj = action.get_arg_value("obj") + nav_to_obj = self._poss_entities[nav_to_target_idx] obj_pos = self._task.pddl_problem.sim_info.get_entity_pos( nav_to_obj ) @@ -95,7 +96,7 @@ def step(self, *args, is_last_action, **kwargs): self._action_arg_prefix + "oracle_nav_action" ] if nav_to_target_idx <= 0 or nav_to_target_idx > len( - self._poss_actions + self._poss_entities ): if is_last_action: return self._sim.step(HabitatSimActions.base_velocity) @@ -134,7 +135,7 @@ def step(self, *args, is_last_action, **kwargs): if not at_goal: if dist_to_final_nav_targ < self._config.dist_thresh: # Look at the object - vel = compute_turn( + vel = OracleNavAction._compute_turn( rel_pos, self._config.turn_velocity, robot_forward ) elif angle_to_target < self._config.turn_thresh: @@ -142,7 +143,7 @@ def step(self, *args, is_last_action, **kwargs): vel = [self._config.forward_velocity, 0] else: # Look at the target waypoint. - vel = compute_turn( + vel = OracleNavAction._compute_turn( rel_targ, self._config.turn_velocity, robot_forward ) else: diff --git a/test/test_baseline_training.py b/test/test_baseline_training.py index d377990317..0b9b7b08b4 100644 --- a/test/test_baseline_training.py +++ b/test/test_baseline_training.py @@ -174,6 +174,7 @@ def test_trainers(config_path, num_updates, overrides, trainer_name): ), ) def test_hrl(config_path, policy_type, skill_type, mode): + if policy_type == "hl_neural" and skill_type == "nn_skills": return if policy_type == "hl_fixed" and mode == "train": From e7a877b3c31d2b9268c07a2e2117e0e09651852a Mon Sep 17 00:00:00 2001 From: ASzot Date: Sun, 5 Feb 2023 13:12:50 -0800 Subject: [PATCH 37/48] Fixed some of the tests --- .../habitat_baselines/agents/ppo_agents.py | 12 ++++-------- .../sims/habitat_simulator/habitat_simulator.py | 4 ++-- habitat-lab/habitat/tasks/nav/nav.py | 4 +--- test/test_baseline_config.py | 5 +++++ 4 files changed, 12 insertions(+), 13 deletions(-) diff --git a/habitat-baselines/habitat_baselines/agents/ppo_agents.py b/habitat-baselines/habitat_baselines/agents/ppo_agents.py index 7b25704cf2..f87a85ef3e 100644 --- a/habitat-baselines/habitat_baselines/agents/ppo_agents.py +++ b/habitat-baselines/habitat_baselines/agents/ppo_agents.py @@ -126,23 +126,19 @@ def reset(self) -> None: def act(self, observations: Observations) -> Dict[str, int]: batch = batch_obs([observations], device=self.device) with torch.no_grad(): - ( - _, - actions, - _, - self.test_recurrent_hidden_states, - ) = self.actor_critic.act( + action_data = self.actor_critic.act( batch, self.test_recurrent_hidden_states, self.prev_actions, self.not_done_masks, deterministic=False, ) + self.test_recurrent_hidden_states = action_data.rnn_hidden_states # Make masks not done till reset (end of episode) will be called self.not_done_masks.fill_(True) - self.prev_actions.copy_(actions) # type: ignore + self.prev_actions.copy_(action_data.actions) # type: ignore - return {"action": actions[0][0].item()} + return {"action": action_data.env_actions[0][0].item()} def main(): diff --git a/habitat-lab/habitat/sims/habitat_simulator/habitat_simulator.py b/habitat-lab/habitat/sims/habitat_simulator/habitat_simulator.py index d09ef21c42..027e4e82f4 100644 --- a/habitat-lab/habitat/sims/habitat_simulator/habitat_simulator.py +++ b/habitat-lab/habitat/sims/habitat_simulator/habitat_simulator.py @@ -390,8 +390,8 @@ def _update_agents_state(self) -> bool: agent_cfg = self.habitat_config.agents[agent_name] if agent_cfg.is_set_start_state: self.set_agent_state( - agent_cfg.start_position, - agent_cfg.start_rotation, + [float(k) for k in agent_cfg.start_position], + [float(k) for k in agent_cfg.start_rotation], agent_id, ) is_updated = True diff --git a/habitat-lab/habitat/tasks/nav/nav.py b/habitat-lab/habitat/tasks/nav/nav.py index f4bcc4c572..c844c5a7f4 100644 --- a/habitat-lab/habitat/tasks/nav/nav.py +++ b/habitat-lab/habitat/tasks/nav/nav.py @@ -1317,9 +1317,7 @@ def overwrite_sim_config(self, config: Any, episode: Episode) -> Any: ): agent_config = get_agent_config(config.simulator) agent_config.start_position = episode.start_position - agent_config.start_rotation = [ - float(k) for k in episode.start_rotation - ] + agent_config.start_rotation = episode.start_rotation agent_config.is_set_start_state = True return config diff --git a/test/test_baseline_config.py b/test/test_baseline_config.py index 533944bd78..15b3fb524b 100644 --- a/test/test_baseline_config.py +++ b/test/test_baseline_config.py @@ -25,4 +25,9 @@ def test_baselines_configs(test_cfg_path): cleaned_path = test_cfg_path.replace( "habitat-baselines/habitat_baselines/config/", "" ) + if "habitat_baselines" in cleaned_path: + # Do not test non-standalone config options that are + # supposed to be used with "main" configs. + return + get_config(cleaned_path) From 5c213e3062b8c06a42df384e33de50f4484e9659 Mon Sep 17 00:00:00 2001 From: ASzot Date: Sun, 5 Feb 2023 21:41:20 -0800 Subject: [PATCH 38/48] Adddressed PR comments --- .../habitat_baselines/rl/hrl/hierarchical_policy.py | 4 ++-- .../habitat_baselines/rl/hrl/skills/reset.py | 12 +++++++----- .../habitat_baselines/rl/hrl/skills/skill.py | 4 ++-- .../habitat_baselines/rl/ppo/ppo_trainer.py | 5 +++++ test/test_baseline_training.py | 4 ++++ 5 files changed, 20 insertions(+), 9 deletions(-) diff --git a/habitat-baselines/habitat_baselines/rl/hrl/hierarchical_policy.py b/habitat-baselines/habitat_baselines/rl/hrl/hierarchical_policy.py index e57a2dc5ef..bcad61b862 100644 --- a/habitat-baselines/habitat_baselines/rl/hrl/hierarchical_policy.py +++ b/habitat-baselines/habitat_baselines/rl/hrl/hierarchical_policy.py @@ -227,7 +227,7 @@ def act( (self._num_envs,), dtype=torch.bool ) - hl_policy_wants_termination = self._high_level_policy.get_termination( + hl_wants_skill_term = self._high_level_policy.get_termination( observations, rnn_hidden_states, prev_actions, @@ -249,7 +249,7 @@ def act( "prev_actions": prev_actions, "masks": masks, "actions": actions, - "hl_policy_wants_termination": hl_policy_wants_termination, + "hl_wants_skill_term": hl_wants_skill_term, }, # Only decide on skill termination if the episode is active. should_adds=masks, diff --git a/habitat-baselines/habitat_baselines/rl/hrl/skills/reset.py b/habitat-baselines/habitat_baselines/rl/hrl/skills/reset.py index 39bf4ea058..937f0097c1 100644 --- a/habitat-baselines/habitat_baselines/rl/hrl/skills/reset.py +++ b/habitat-baselines/habitat_baselines/rl/hrl/skills/reset.py @@ -21,10 +21,12 @@ def __init__( batch_size, ): super().__init__(config, action_space, batch_size, True) - self._target = np.array([float(x) for x in config.reset_joint_state]) + self._rest_state = np.array( + [float(x) for x in config.reset_joint_state] + ) self._arm_ac_range = find_action_range(action_space, "arm_action") - self._arm_ac_range = (self._arm_ac_range[0], self._target.shape[0]) + self._arm_ac_range = (self._arm_ac_range[0], self._rest_state.shape[0]) def on_enter( self, @@ -43,7 +45,7 @@ def on_enter( ) self._initial_delta = ( - self._target - observations["joint"].cpu().numpy() + self._rest_state - observations["joint"].cpu().numpy() ) return ret @@ -58,7 +60,7 @@ def _is_skill_done( return ( torch.as_tensor( - np.abs(current_joint_pos - self._target).max(-1), + np.abs(current_joint_pos - self._rest_state).max(-1), dtype=torch.float32, ) < 5e-2 @@ -74,7 +76,7 @@ def _internal_act( deterministic=False, ): current_joint_pos = observations["joint"].cpu().numpy() - delta = self._target - current_joint_pos + delta = self._rest_state - current_joint_pos # Dividing by max initial delta means that the action will # always in [-1,1] and has the benefit of reducing the delta diff --git a/habitat-baselines/habitat_baselines/rl/hrl/skills/skill.py b/habitat-baselines/habitat_baselines/rl/hrl/skills/skill.py index 6bc7777806..b1a8f87cd4 100644 --- a/habitat-baselines/habitat_baselines/rl/hrl/skills/skill.py +++ b/habitat-baselines/habitat_baselines/rl/hrl/skills/skill.py @@ -164,7 +164,7 @@ def should_terminate( prev_actions: torch.Tensor, masks: torch.Tensor, actions: torch.Tensor, - hl_policy_wants_termination: torch.BoolTensor, + hl_wants_skill_term: torch.BoolTensor, batch_idx: List[int], skill_name: List[str], log_info: List[Dict[str, Any]], @@ -198,7 +198,7 @@ def should_terminate( else: is_skill_done = is_skill_done | over_max_len - is_skill_done |= hl_policy_wants_termination + is_skill_done |= hl_wants_skill_term new_actions = torch.zeros_like(actions) for i, env_i in enumerate(batch_idx): diff --git a/habitat-baselines/habitat_baselines/rl/ppo/ppo_trainer.py b/habitat-baselines/habitat_baselines/rl/ppo/ppo_trainer.py index eb5577a917..708e20f802 100644 --- a/habitat-baselines/habitat_baselines/rl/ppo/ppo_trainer.py +++ b/habitat-baselines/habitat_baselines/rl/ppo/ppo_trainer.py @@ -84,6 +84,11 @@ class PPOTrainer(BaseRLTrainer): r"""Trainer class for PPO algorithm Paper: https://arxiv.org/abs/1707.06347. + + :property env_action_space: The action space required by the environment. + :property policy_action_space: The action space the policy acts in. This + can be different from the environment action space for hierarchical + policies. """ supported_tasks = ["Nav-v0"] diff --git a/test/test_baseline_training.py b/test/test_baseline_training.py index 0b9b7b08b4..aa1fdeccd9 100644 --- a/test/test_baseline_training.py +++ b/test/test_baseline_training.py @@ -174,6 +174,7 @@ def test_trainers(config_path, num_updates, overrides, trainer_name): ), ) def test_hrl(config_path, policy_type, skill_type, mode): + TRAIN_LOG_FILE = "data/test_train.log" if policy_type == "hl_neural" and skill_type == "nn_skills": return @@ -184,6 +185,8 @@ def test_hrl(config_path, policy_type, skill_type, mode): # Remove the checkpoints from previous tests for f in glob.glob("data/test_checkpoints/test_training/*"): os.remove(f) + if os.path.exists(TRAIN_LOG_FILE): + os.remove(TRAIN_LOG_FILE) # Setup the training config = get_config( @@ -195,6 +198,7 @@ def test_hrl(config_path, policy_type, skill_type, mode): "habitat_baselines.total_num_steps=-1.0", "habitat_baselines.test_episode_count=1", "habitat_baselines.checkpoint_folder=data/test_checkpoints/test_training", + f"habitat_baselines.log_file={TRAIN_LOG_FILE}", f"habitat_baselines/rl/policy={policy_type}", f"habitat_baselines/rl/policy/hierarchical_policy/defined_skills={skill_type}", ], From d9721f19ee6b464db16ca8ef4190b873a6c70e03 Mon Sep 17 00:00:00 2001 From: ASzot Date: Sun, 5 Feb 2023 21:48:01 -0800 Subject: [PATCH 39/48] Fixed rotation issue --- habitat-lab/habitat/tasks/nav/nav.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/habitat-lab/habitat/tasks/nav/nav.py b/habitat-lab/habitat/tasks/nav/nav.py index c844c5a7f4..f4bcc4c572 100644 --- a/habitat-lab/habitat/tasks/nav/nav.py +++ b/habitat-lab/habitat/tasks/nav/nav.py @@ -1317,7 +1317,9 @@ def overwrite_sim_config(self, config: Any, episode: Episode) -> Any: ): agent_config = get_agent_config(config.simulator) agent_config.start_position = episode.start_position - agent_config.start_rotation = episode.start_rotation + agent_config.start_rotation = [ + float(k) for k in episode.start_rotation + ] agent_config.is_set_start_state = True return config From f8387defbd5ea590a7557acbb79ace241a8b2aa4 Mon Sep 17 00:00:00 2001 From: ASzot Date: Sun, 5 Feb 2023 21:52:01 -0800 Subject: [PATCH 40/48] Fixed black --- .../habitat_baselines/common/rollout_storage.py | 1 - habitat-baselines/habitat_baselines/rl/hrl/hrl_ppo.py | 7 ++++++- habitat-baselines/habitat_baselines/rl/ppo/ppo.py | 1 - 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/habitat-baselines/habitat_baselines/common/rollout_storage.py b/habitat-baselines/habitat_baselines/common/rollout_storage.py index a146d9eb4a..8120b56494 100644 --- a/habitat-baselines/habitat_baselines/common/rollout_storage.py +++ b/habitat-baselines/habitat_baselines/common/rollout_storage.py @@ -36,7 +36,6 @@ def __init__( num_recurrent_layers=1, is_double_buffered: bool = False, ): - if is_continuous_action_space(action_space): # Assume ALL actions are NOT discrete action_shape = ( diff --git a/habitat-baselines/habitat_baselines/rl/hrl/hrl_ppo.py b/habitat-baselines/habitat_baselines/rl/hrl/hrl_ppo.py index 4acad90588..998933c459 100644 --- a/habitat-baselines/habitat_baselines/rl/hrl/hrl_ppo.py +++ b/habitat-baselines/habitat_baselines/rl/hrl/hrl_ppo.py @@ -28,7 +28,12 @@ def reduce_loss(loss): self._set_grads_to_none() - (values, action_log_probs, dist_entropy, _,) = self._evaluate_actions( + ( + values, + action_log_probs, + dist_entropy, + _, + ) = self._evaluate_actions( batch["observations"], batch["recurrent_hidden_states"], batch["prev_actions"], diff --git a/habitat-baselines/habitat_baselines/rl/ppo/ppo.py b/habitat-baselines/habitat_baselines/rl/ppo/ppo.py index b2d764ca6b..7affdc63ba 100644 --- a/habitat-baselines/habitat_baselines/rl/ppo/ppo.py +++ b/habitat-baselines/habitat_baselines/rl/ppo/ppo.py @@ -296,7 +296,6 @@ def update( self, rollouts: RolloutStorage, ) -> Dict[str, float]: - advantages = self.get_advantages(rollouts) learner_metrics: Dict[str, List[Any]] = collections.defaultdict(list) From 1c8f54cd0fb047b46255bdbfc498f7941f87ee7c Mon Sep 17 00:00:00 2001 From: ASzot Date: Tue, 7 Feb 2023 17:45:57 -0800 Subject: [PATCH 41/48] Addressed PR comments --- .../habitat_baselines/common/rollout_storage.py | 15 ++------------- .../config/default_structured_configs.py | 2 +- .../config/rearrange/rl_hierarchical.yaml | 4 +++- .../rl/hrl/hl/high_level_policy.py | 8 ++++++++ .../habitat_baselines/rl/hrl/skills/skill.py | 13 ++++++++----- .../habitat_baselines/rl/ppo/policy.py | 2 ++ .../habitat_baselines/rl/ppo/ppo_trainer.py | 14 +++++--------- .../habitat_baselines/utils/common.py | 17 +++++++++++++++++ 8 files changed, 46 insertions(+), 29 deletions(-) diff --git a/habitat-baselines/habitat_baselines/common/rollout_storage.py b/habitat-baselines/habitat_baselines/common/rollout_storage.py index 8120b56494..f48bd71042 100644 --- a/habitat-baselines/habitat_baselines/common/rollout_storage.py +++ b/habitat-baselines/habitat_baselines/common/rollout_storage.py @@ -17,8 +17,8 @@ build_rnn_build_seq_info, ) from habitat_baselines.utils.common import ( + get_action_space_info, get_num_actions, - is_continuous_action_space, ) @@ -36,18 +36,7 @@ def __init__( num_recurrent_layers=1, is_double_buffered: bool = False, ): - if is_continuous_action_space(action_space): - # Assume ALL actions are NOT discrete - action_shape = ( - get_num_actions( - action_space, - ), - ) - discrete_actions = False - else: - # For discrete pointnav - action_shape = (1,) - discrete_actions = True + discrete_actions, action_shape = get_action_space_info(action_space) self.buffers = TensorDict() self.buffers["observations"] = TensorDict() diff --git a/habitat-baselines/habitat_baselines/config/default_structured_configs.py b/habitat-baselines/habitat_baselines/config/default_structured_configs.py index 6563f3639b..6d774afa9b 100644 --- a/habitat-baselines/habitat_baselines/config/default_structured_configs.py +++ b/habitat-baselines/habitat_baselines/config/default_structured_configs.py @@ -392,7 +392,7 @@ class HabitatBaselinesConfig(HabitatBaselinesBaseConfig): eval_ckpt_path_dir: str = "data/checkpoints" num_environments: int = 16 num_processes: int = -1 # deprecated - rollout_storage: str = "RolloutStorage" + rollout_storage_name: str = "RolloutStorage" checkpoint_folder: str = "data/checkpoints" num_updates: int = 10000 num_checkpoints: int = 10 diff --git a/habitat-baselines/habitat_baselines/config/rearrange/rl_hierarchical.yaml b/habitat-baselines/habitat_baselines/config/rearrange/rl_hierarchical.yaml index 611a64c2ec..f7f142a8a0 100644 --- a/habitat-baselines/habitat_baselines/config/rearrange/rl_hierarchical.yaml +++ b/habitat-baselines/habitat_baselines/config/rearrange/rl_hierarchical.yaml @@ -16,6 +16,8 @@ defaults: habitat_baselines: verbose: False trainer_name: "ddppo" + updater_name: "HRLPPO" + updater_name: "HRLDDPPO" torch_gpu_id: 0 video_fps: 30 eval_ckpt_path_dir: "" @@ -28,7 +30,7 @@ habitat_baselines: force_torch_single_threaded: True eval_keys_to_include_in_name: ['reward', 'force', 'composite_success'] load_resume_state_config: False - rollout_storage: "HrlRolloutStorage" + rollout_storage_name: "HrlRolloutStorage" eval: use_ckpt_config: False diff --git a/habitat-baselines/habitat_baselines/rl/hrl/hl/high_level_policy.py b/habitat-baselines/habitat_baselines/rl/hrl/hl/high_level_policy.py index c148c62f28..ffc2874240 100644 --- a/habitat-baselines/habitat_baselines/rl/hrl/hl/high_level_policy.py +++ b/habitat-baselines/habitat_baselines/rl/hrl/hl/high_level_policy.py @@ -91,9 +91,17 @@ def get_next_skill( raise NotImplementedError() def apply_mask(self, mask: torch.Tensor) -> None: + """ + Called before every step with the mask information at the current step. + """ + pass def get_policy_components(self) -> List[nn.Module]: + """ + Gets the torch modules that are in the HL policy architecture. + """ + return [] def get_termination( diff --git a/habitat-baselines/habitat_baselines/rl/hrl/skills/skill.py b/habitat-baselines/habitat_baselines/rl/hrl/skills/skill.py index b1a8f87cd4..9a1eb5647e 100644 --- a/habitat-baselines/habitat_baselines/rl/hrl/skills/skill.py +++ b/habitat-baselines/habitat_baselines/rl/hrl/skills/skill.py @@ -29,6 +29,9 @@ def __init__( """ self._config = config self._batch_size = batch_size + self._apply_postcond = self._config.apply_postconds + self._force_end_on_timeout = self._config.force_end_on_timeout + self._max_skill_steps = self._config.max_skill_steps self._cur_skill_step = torch.zeros(self._batch_size) self._should_keep_hold_state = should_keep_hold_state @@ -49,7 +52,7 @@ def __init__( ) else: self._pddl_ac_start = None - if self._config.apply_postconds and self._pddl_ac_start is None: + if self._apply_postconds and self._pddl_ac_start is None: raise ValueError(f"Could not find PDDL action in skill {self}") self._delay_term: List[Optional[bool]] = [ @@ -191,9 +194,9 @@ def should_terminate( device=cur_skill_step.device, dtype=torch.bool, ) - if self._config.max_skill_steps > 0: - over_max_len = cur_skill_step >= self._config.max_skill_steps - if self._config.force_end_on_timeout: + if self._max_skill_steps > 0: + over_max_len = cur_skill_step >= self._max_skill_steps + if self._force_end_on_timeout: bad_terminate = over_max_len else: is_skill_done = is_skill_done | over_max_len @@ -208,7 +211,7 @@ def should_terminate( ) self._delay_term[env_i] = False is_skill_done[i] = True - elif self._config.apply_postconds and is_skill_done[i]: + elif self._apply_postconds and is_skill_done[i]: new_actions[i] = self._apply_postcond( actions, log_info, skill_name[i], env_i, i ) diff --git a/habitat-baselines/habitat_baselines/rl/ppo/policy.py b/habitat-baselines/habitat_baselines/rl/ppo/policy.py index e31120c932..fb83a36478 100644 --- a/habitat-baselines/habitat_baselines/rl/ppo/policy.py +++ b/habitat-baselines/habitat_baselines/rl/ppo/policy.py @@ -66,6 +66,8 @@ class PolicyActionData: def write_action(self, write_idx: int, write_action: torch.Tensor) -> None: """ Used to override an action across all environments. + :param write_idx: The index in the action dimension to write the new action. + :param write_action: The action to write at `write_idx`. """ self.actions[:, write_idx] = write_action diff --git a/habitat-baselines/habitat_baselines/rl/ppo/ppo_trainer.py b/habitat-baselines/habitat_baselines/rl/ppo/ppo_trainer.py index 708e20f802..e169ee7296 100644 --- a/habitat-baselines/habitat_baselines/rl/ppo/ppo_trainer.py +++ b/habitat-baselines/habitat_baselines/rl/ppo/ppo_trainer.py @@ -65,6 +65,7 @@ from habitat_baselines.utils.common import ( batch_obs, generate_video, + get_action_space_info, get_num_actions, inference_mode, is_continuous_action_space, @@ -346,7 +347,7 @@ def _init_train(self, resume_state=None): self._nbuffers = 2 if ppo_cfg.use_double_buffered_sampler else 1 rollouts_cls = baseline_registry.get_storage( - self.config.habitat_baselines.rollout_storage + self.config.habitat_baselines.rollout_storage_name ) self.rollouts = rollouts_cls( ppo_cfg.num_steps, @@ -938,14 +939,9 @@ def _eval_checkpoint( self._init_envs(config, is_eval=True) self._setup_actor_critic_agent(ppo_cfg) - if is_continuous_action_space(self.policy_action_space): - # Assume NONE of the actions are discrete - action_shape = (get_num_actions(self.policy_action_space),) - discrete_actions = False - else: - # For discrete pointnav - action_shape = (1,) - discrete_actions = True + discrete_actions, action_shape = get_action_space_info( + self.policy_action_space + ) if self.agent.actor_critic.should_load_agent_state: self.agent.load_state_dict(ckpt_dict["state_dict"]) diff --git a/habitat-baselines/habitat_baselines/utils/common.py b/habitat-baselines/habitat_baselines/utils/common.py index 76e52423d6..4451ec5ae2 100644 --- a/habitat-baselines/habitat_baselines/utils/common.py +++ b/habitat-baselines/habitat_baselines/utils/common.py @@ -694,6 +694,23 @@ def is_continuous_action_space(action_space) -> bool: ) +def get_action_space_info(ac_space: spaces.Space) -> Tuple[Tuple[int], bool]: + """ + :returns: The shape of the action space and if the action space is discrete. If the action space is discrete, the shape will be `(1,)`. + """ + if is_continuous_action_space(ac_space): + # Assume NONE of the actions are discrete + return ( + get_num_actions( + ac_space, + ), + False, + ) + else: + # For discrete pointnav + return (1,), True + + def get_num_actions(action_space) -> int: num_actions = 0 for v in iterate_action_space_recursively(action_space): From f2c673148e5c2e17655ac8a0b79485078ff8da15 Mon Sep 17 00:00:00 2001 From: ASzot Date: Tue, 7 Feb 2023 17:53:02 -0800 Subject: [PATCH 42/48] Addressed PR comments --- examples/tutorials/colabs/Habitat2_Quickstart.ipynb | 8 ++++++-- examples/tutorials/colabs/Habitat_Lab.ipynb | 8 ++++++-- .../colabs/Habitat_Lab_TopdownMap_Visualization.ipynb | 4 +++- examples/tutorials/nb_python/Habitat2_Quickstart.py | 1 + examples/tutorials/nb_python/Habitat_Lab.py | 1 + .../nb_python/Habitat_Lab_TopdownMap_Visualization.py | 1 + .../habitat_baselines/common/rollout_storage.py | 5 +---- .../config/rearrange/rl_hierarchical.yaml | 2 +- .../habitat_baselines/rl/hrl/hl/high_level_policy.py | 2 -- habitat-baselines/habitat_baselines/rl/hrl/hrl_ppo.py | 7 +------ .../habitat_baselines/rl/hrl/skills/oracle_nav.py | 2 +- .../habitat_baselines/rl/hrl/skills/skill.py | 2 +- habitat-baselines/habitat_baselines/rl/ppo/ppo_trainer.py | 1 - habitat-baselines/habitat_baselines/utils/common.py | 6 ++++-- 14 files changed, 27 insertions(+), 23 deletions(-) diff --git a/examples/tutorials/colabs/Habitat2_Quickstart.ipynb b/examples/tutorials/colabs/Habitat2_Quickstart.ipynb index 3291ab5be8..15bde460af 100644 --- a/examples/tutorials/colabs/Habitat2_Quickstart.ipynb +++ b/examples/tutorials/colabs/Habitat2_Quickstart.ipynb @@ -188,7 +188,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "lines_to_next_cell": 2 + }, "outputs": [], "source": [ "env = gym.make(\"HabitatRenderPick-v0\")\n", @@ -209,7 +211,9 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "lines_to_next_cell": 2 + }, "source": [ "# Defining New Tasks\n", "\n", diff --git a/examples/tutorials/colabs/Habitat_Lab.ipynb b/examples/tutorials/colabs/Habitat_Lab.ipynb index 8d94345475..ea1fd2c0b4 100644 --- a/examples/tutorials/colabs/Habitat_Lab.ipynb +++ b/examples/tutorials/colabs/Habitat_Lab.ipynb @@ -354,7 +354,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "lines_to_next_cell": 2 + }, "outputs": [], "source": [ " action = None\n", @@ -387,7 +389,9 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "lines_to_next_cell": 2 + }, "source": [ "## Create a new Sensor" ] diff --git a/examples/tutorials/colabs/Habitat_Lab_TopdownMap_Visualization.ipynb b/examples/tutorials/colabs/Habitat_Lab_TopdownMap_Visualization.ipynb index ca30a63c24..2978c91f1c 100644 --- a/examples/tutorials/colabs/Habitat_Lab_TopdownMap_Visualization.ipynb +++ b/examples/tutorials/colabs/Habitat_Lab_TopdownMap_Visualization.ipynb @@ -47,7 +47,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "lines_to_next_cell": 2 + }, "outputs": [], "source": [ "# [setup]\n", diff --git a/examples/tutorials/nb_python/Habitat2_Quickstart.py b/examples/tutorials/nb_python/Habitat2_Quickstart.py index 5eafa8de88..7a45d1c819 100644 --- a/examples/tutorials/nb_python/Habitat2_Quickstart.py +++ b/examples/tutorials/nb_python/Habitat2_Quickstart.py @@ -187,6 +187,7 @@ def insert_render_options(config): # For other examples of task, sensor, and measurement definitions, [see here # for existing tasks](https://github.com/facebookresearch/habitat-lab/tree/main/habitat-lab/habitat/tasks/rearrange/sub_tasks). Tasks, sensors, and measurements are connected through a config file that defines the task. + # %% @registry.register_task(name="RearrangeDemoNavPickTask-v0") class NavPickTaskV1(RearrangeTask): diff --git a/examples/tutorials/nb_python/Habitat_Lab.py b/examples/tutorials/nb_python/Habitat_Lab.py index b28c87776e..1df88c4bd1 100644 --- a/examples/tutorials/nb_python/Habitat_Lab.py +++ b/examples/tutorials/nb_python/Habitat_Lab.py @@ -306,6 +306,7 @@ def _check_episode_is_active(self, *args, **kwargs): # %% [markdown] # ## Create a new Sensor + # %% @registry.register_sensor(name="agent_position_sensor") class AgentPositionSensor(habitat.Sensor): diff --git a/examples/tutorials/nb_python/Habitat_Lab_TopdownMap_Visualization.py b/examples/tutorials/nb_python/Habitat_Lab_TopdownMap_Visualization.py index 3a39dc12f5..40ef559a25 100644 --- a/examples/tutorials/nb_python/Habitat_Lab_TopdownMap_Visualization.py +++ b/examples/tutorials/nb_python/Habitat_Lab_TopdownMap_Visualization.py @@ -86,6 +86,7 @@ os.makedirs(output_path) # [/setup] + # %% # [example_1] def example_pointnav_draw_target_birdseye_view(): diff --git a/habitat-baselines/habitat_baselines/common/rollout_storage.py b/habitat-baselines/habitat_baselines/common/rollout_storage.py index f48bd71042..afda49835f 100644 --- a/habitat-baselines/habitat_baselines/common/rollout_storage.py +++ b/habitat-baselines/habitat_baselines/common/rollout_storage.py @@ -16,10 +16,7 @@ build_pack_info_from_dones, build_rnn_build_seq_info, ) -from habitat_baselines.utils.common import ( - get_action_space_info, - get_num_actions, -) +from habitat_baselines.utils.common import get_action_space_info @baseline_registry.register_storage diff --git a/habitat-baselines/habitat_baselines/config/rearrange/rl_hierarchical.yaml b/habitat-baselines/habitat_baselines/config/rearrange/rl_hierarchical.yaml index f7f142a8a0..f5971142d4 100644 --- a/habitat-baselines/habitat_baselines/config/rearrange/rl_hierarchical.yaml +++ b/habitat-baselines/habitat_baselines/config/rearrange/rl_hierarchical.yaml @@ -17,7 +17,7 @@ habitat_baselines: verbose: False trainer_name: "ddppo" updater_name: "HRLPPO" - updater_name: "HRLDDPPO" + distrib_updater_name: "HRLDDPPO" torch_gpu_id: 0 video_fps: 30 eval_ckpt_path_dir: "" diff --git a/habitat-baselines/habitat_baselines/rl/hrl/hl/high_level_policy.py b/habitat-baselines/habitat_baselines/rl/hrl/hl/high_level_policy.py index ffc2874240..ec12f0ebc1 100644 --- a/habitat-baselines/habitat_baselines/rl/hrl/hl/high_level_policy.py +++ b/habitat-baselines/habitat_baselines/rl/hrl/hl/high_level_policy.py @@ -95,8 +95,6 @@ def apply_mask(self, mask: torch.Tensor) -> None: Called before every step with the mask information at the current step. """ - pass - def get_policy_components(self) -> List[nn.Module]: """ Gets the torch modules that are in the HL policy architecture. diff --git a/habitat-baselines/habitat_baselines/rl/hrl/hrl_ppo.py b/habitat-baselines/habitat_baselines/rl/hrl/hrl_ppo.py index 998933c459..4acad90588 100644 --- a/habitat-baselines/habitat_baselines/rl/hrl/hrl_ppo.py +++ b/habitat-baselines/habitat_baselines/rl/hrl/hrl_ppo.py @@ -28,12 +28,7 @@ def reduce_loss(loss): self._set_grads_to_none() - ( - values, - action_log_probs, - dist_entropy, - _, - ) = self._evaluate_actions( + (values, action_log_probs, dist_entropy, _,) = self._evaluate_actions( batch["observations"], batch["recurrent_hidden_states"], batch["prev_actions"], diff --git a/habitat-baselines/habitat_baselines/rl/hrl/skills/oracle_nav.py b/habitat-baselines/habitat_baselines/rl/hrl/skills/oracle_nav.py index fe1364ae51..369bdf4e05 100644 --- a/habitat-baselines/habitat_baselines/rl/hrl/skills/oracle_nav.py +++ b/habitat-baselines/habitat_baselines/rl/hrl/skills/oracle_nav.py @@ -121,7 +121,7 @@ def _is_skill_done( for i, batch_i in enumerate(batch_idx): prev_pos = self._prev_pos[batch_i] if prev_pos is not None: - movement = torch.linalg.norm(prev_pos - cur_pos[i]) + movement = (prev_pos - cur_pos[i]).pow(2).sum().sqrt() ret[i] = movement < self._config.stop_thresh self._prev_pos[batch_i] = cur_pos[i] diff --git a/habitat-baselines/habitat_baselines/rl/hrl/skills/skill.py b/habitat-baselines/habitat_baselines/rl/hrl/skills/skill.py index 9a1eb5647e..e373f83378 100644 --- a/habitat-baselines/habitat_baselines/rl/hrl/skills/skill.py +++ b/habitat-baselines/habitat_baselines/rl/hrl/skills/skill.py @@ -29,7 +29,7 @@ def __init__( """ self._config = config self._batch_size = batch_size - self._apply_postcond = self._config.apply_postconds + self._apply_postconds = self._config.apply_postconds self._force_end_on_timeout = self._config.force_end_on_timeout self._max_skill_steps = self._config.max_skill_steps diff --git a/habitat-baselines/habitat_baselines/rl/ppo/ppo_trainer.py b/habitat-baselines/habitat_baselines/rl/ppo/ppo_trainer.py index e169ee7296..910ef21374 100644 --- a/habitat-baselines/habitat_baselines/rl/ppo/ppo_trainer.py +++ b/habitat-baselines/habitat_baselines/rl/ppo/ppo_trainer.py @@ -66,7 +66,6 @@ batch_obs, generate_video, get_action_space_info, - get_num_actions, inference_mode, is_continuous_action_space, ) diff --git a/habitat-baselines/habitat_baselines/utils/common.py b/habitat-baselines/habitat_baselines/utils/common.py index 4451ec5ae2..2e8570de95 100644 --- a/habitat-baselines/habitat_baselines/utils/common.py +++ b/habitat-baselines/habitat_baselines/utils/common.py @@ -701,8 +701,10 @@ def get_action_space_info(ac_space: spaces.Space) -> Tuple[Tuple[int], bool]: if is_continuous_action_space(ac_space): # Assume NONE of the actions are discrete return ( - get_num_actions( - ac_space, + ( + get_num_actions( + ac_space, + ), ), False, ) From 11e77c326f40232c9ff7ff308e459d2fb85cd92e Mon Sep 17 00:00:00 2001 From: ASzot Date: Tue, 7 Feb 2023 18:08:43 -0800 Subject: [PATCH 43/48] Fixed config --- .../habitat_baselines/config/default_structured_configs.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/habitat-baselines/habitat_baselines/config/default_structured_configs.py b/habitat-baselines/habitat_baselines/config/default_structured_configs.py index 29a6ad3a3f..f90cd5f32c 100644 --- a/habitat-baselines/habitat_baselines/config/default_structured_configs.py +++ b/habitat-baselines/habitat_baselines/config/default_structured_configs.py @@ -214,6 +214,7 @@ class Eq2CubeConfig(ObsTransformConfig): node=Eq2CubeConfig, ) + @attr.s(auto_attribs=True, slots=True) class HrlDefinedSkillConfig(HabitatBaselinesBaseConfig): """ @@ -249,7 +250,7 @@ class HrlDefinedSkillConfig(HabitatBaselinesBaseConfig): @attr.s(auto_attribs=True, slots=True) -class HierarchicalPolicy(HabitatBaselinesBaseConfig): +class HierarchicalPolicyConfig(HabitatBaselinesBaseConfig): high_level_policy: Dict[str, Any] = MISSING defined_skills: Dict[str, HrlDefinedSkillConfig] = dict() use_skills: Dict[str, str] = dict() From 6f5ea767c5bb5bc8412bc850a7a5539d76dba516 Mon Sep 17 00:00:00 2001 From: ASzot Date: Tue, 7 Feb 2023 18:09:36 -0800 Subject: [PATCH 44/48] Fixed typo --- habitat-baselines/habitat_baselines/common/rollout_storage.py | 2 +- habitat-baselines/habitat_baselines/rl/ppo/ppo_trainer.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/habitat-baselines/habitat_baselines/common/rollout_storage.py b/habitat-baselines/habitat_baselines/common/rollout_storage.py index afda49835f..6fe9a03fc3 100644 --- a/habitat-baselines/habitat_baselines/common/rollout_storage.py +++ b/habitat-baselines/habitat_baselines/common/rollout_storage.py @@ -33,7 +33,7 @@ def __init__( num_recurrent_layers=1, is_double_buffered: bool = False, ): - discrete_actions, action_shape = get_action_space_info(action_space) + action_shape, discrete_actions = get_action_space_info(action_space) self.buffers = TensorDict() self.buffers["observations"] = TensorDict() diff --git a/habitat-baselines/habitat_baselines/rl/ppo/ppo_trainer.py b/habitat-baselines/habitat_baselines/rl/ppo/ppo_trainer.py index 910ef21374..dcc4289cb6 100644 --- a/habitat-baselines/habitat_baselines/rl/ppo/ppo_trainer.py +++ b/habitat-baselines/habitat_baselines/rl/ppo/ppo_trainer.py @@ -938,7 +938,7 @@ def _eval_checkpoint( self._init_envs(config, is_eval=True) self._setup_actor_critic_agent(ppo_cfg) - discrete_actions, action_shape = get_action_space_info( + action_shape, discrete_actions = get_action_space_info( self.policy_action_space ) From cb6ce62e6c72e58160fe967a5db0b87764d09c3a Mon Sep 17 00:00:00 2001 From: ASzot Date: Tue, 7 Feb 2023 18:12:01 -0800 Subject: [PATCH 45/48] Fixed another typo --- habitat-baselines/habitat_baselines/rl/hrl/hrl_ppo.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/habitat-baselines/habitat_baselines/rl/hrl/hrl_ppo.py b/habitat-baselines/habitat_baselines/rl/hrl/hrl_ppo.py index 4acad90588..b7bbd38bc2 100644 --- a/habitat-baselines/habitat_baselines/rl/hrl/hrl_ppo.py +++ b/habitat-baselines/habitat_baselines/rl/hrl/hrl_ppo.py @@ -28,7 +28,13 @@ def reduce_loss(loss): self._set_grads_to_none() - (values, action_log_probs, dist_entropy, _,) = self._evaluate_actions( + ( + values, + action_log_probs, + dist_entropy, + _, + _, + ) = self._evaluate_actions( batch["observations"], batch["recurrent_hidden_states"], batch["prev_actions"], From 6d4b968bd17e6a0edfd9b95d2741d6c8bf83a1e4 Mon Sep 17 00:00:00 2001 From: ASzot Date: Tue, 7 Feb 2023 18:13:30 -0800 Subject: [PATCH 46/48] CI --- .../habitat_baselines/config/default_structured_configs.py | 1 - habitat-lab/habitat/config/default_structured_configs.py | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/habitat-baselines/habitat_baselines/config/default_structured_configs.py b/habitat-baselines/habitat_baselines/config/default_structured_configs.py index f90cd5f32c..6e540e627c 100644 --- a/habitat-baselines/habitat_baselines/config/default_structured_configs.py +++ b/habitat-baselines/habitat_baselines/config/default_structured_configs.py @@ -4,7 +4,6 @@ # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. -from dataclasses import dataclass, field from typing import Any, Dict, List, Optional, Tuple import attr diff --git a/habitat-lab/habitat/config/default_structured_configs.py b/habitat-lab/habitat/config/default_structured_configs.py index 1a6fc472d5..c0638a849c 100644 --- a/habitat-lab/habitat/config/default_structured_configs.py +++ b/habitat-lab/habitat/config/default_structured_configs.py @@ -200,6 +200,7 @@ class RearrangeStopActionConfig(ActionConfig): class PddlApplyActionConfig(ActionConfig): type: str = "PddlApplyAction" + @attr.s(auto_attribs=True, slots=True) class OracleNavActionConfig(ActionConfig): """ From 9d2c2f5a99a64d72a9a78a2a0f2157bde0e37cfb Mon Sep 17 00:00:00 2001 From: ASzot Date: Wed, 8 Feb 2023 22:32:15 -0800 Subject: [PATCH 47/48] Updated to work with older pytorch version --- .../habitat_baselines/rl/hrl/hrl_rollout_storage.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/habitat-baselines/habitat_baselines/rl/hrl/hrl_rollout_storage.py b/habitat-baselines/habitat_baselines/rl/hrl/hrl_rollout_storage.py index 26dbbe9afc..9acf0854ea 100644 --- a/habitat-baselines/habitat_baselines/rl/hrl/hrl_rollout_storage.py +++ b/habitat-baselines/habitat_baselines/rl/hrl/hrl_rollout_storage.py @@ -95,9 +95,7 @@ def insert( env_idxs = torch.arange(self._num_envs) if rewards is not None: # Accumulate rewards between updates. - reward_write_idxs = torch.maximum( - self._cur_step_idxs - 1, torch.zeros_like(self._cur_step_idxs) - ) + reward_write_idxs = torch.clamp(self._cur_step_idxs - 1, min=0) self.buffers["rewards"][reward_write_idxs, env_idxs] += rewards if len(next_step) > 0: From 48142fb9c152392e8cfe388a60e2d14b869e0fbd Mon Sep 17 00:00:00 2001 From: vincentpierre Date: Thu, 9 Feb 2023 13:18:06 -0800 Subject: [PATCH 48/48] renaming --exp-config to --config-name again --- habitat-baselines/habitat_baselines/README.md | 7 +++---- scripts/generate_profile_shell_scripts.py | 2 +- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/habitat-baselines/habitat_baselines/README.md b/habitat-baselines/habitat_baselines/README.md index 8451a4b4a5..8c04a8ef2a 100644 --- a/habitat-baselines/habitat_baselines/README.md +++ b/habitat-baselines/habitat_baselines/README.md @@ -61,15 +61,14 @@ To train a high-level policy, while using pre-learned low-level skills (SRL base ```bash python -u habitat-baselines/habitat_baselines/run.py \ - --exp-config habitat-baselines/habitat_baselines/config/rearrange/rl_hierarchical.yaml \ - --run-type train + --config-name=rearrange/rl_hierarchical.yaml ``` To run a rearrangement episode with oracle low-level skills and a fixed task planner, run: ```bash python -u habitat-baselines/habitat_baselines/run.py \ - --exp-config habitat-baselines/habitat_baselines/config/rearrange/rl_hierarchical.yaml \ - --run-type eval \ + --config-name=rearrange/rl_hierarchical.yaml \ + habitat_baselines.evaluate=True \ habitat_baselines/rl/policy=hl_fixed \ habitat_baselines/rl/policy/hierarchical_policy/defined_skills=oracle_skills ``` diff --git a/scripts/generate_profile_shell_scripts.py b/scripts/generate_profile_shell_scripts.py index 030bcf29ec..b61728e29a 100644 --- a/scripts/generate_profile_shell_scripts.py +++ b/scripts/generate_profile_shell_scripts.py @@ -22,7 +22,7 @@ if __name__ == "__main__": # The Habitat-lab program to be profiled (the command you usually use to # invoke it). - program_str = "python -u -m habitat_baselines.run --exp-config habitat-baselines/habitat_baselines/config/pointnav/ddppo_pointnav.yaml --run-type train" + program_str = "python -u -m habitat_baselines.run --config-name=pointnav/ddppo_pointnav.yaml" # Path to Nsight Systems nsys command-line tool. This hard-coded path is # for the FAIR cluster.