Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

update: add antmaze configs #13

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
98 changes: 98 additions & 0 deletions scripts/configs/bt_iql/antmaze/default.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
algorithm:
class: BTIQL
beta: 0.1
expectile: 0.9
max_exp_clip: 100.0
reward_reg: 0.0
rm_label: true

checkpoint: null
seed: 0
name: default
debug: false
device: null
wandb:
activate: false
entity: null
project: null

env: antmaze-medium-diverse-v2
env_kwargs:
env_wrapper:
env_wrapper_kwargs:

optim:
default:
class: Adam
lr: 0.0003

network:
reward:
class: EnsembleMLP
ensemble_size: 1
hidden_dims: [256, 256]
reward_act: identity
actor:
class: SquashedGaussianActor
hidden_dims: [256, 256]
reparameterize: false
conditioned_logstd: false
logstd_min: -5
logstd_max: 2
critic:
class: Critic
ensemble_size: 2
hidden_dims: [256, 256]
value:
class: Critic
ensemble_size: 1
hidden_dims: [256, 256]

rm_dataset:
- class: IPLComparisonOfflineDataset
env: antmaze-medium-diverse-v2
batch_size: 64
segment_length: null
mode: human
rm_dataloader:
num_workers: 2
batch_size: null

rl_dataset:
- class: D4RLOfflineDataset
env: antmaze-medium-diverse-v2
batch_size: 256
mode: transition
reward_normalize: true
rl_dataloader:
num_workers: 2
batch_size: null

trainer:
env_freq: null
rm_label: true
rm_steps: 50000
rl_steps: 1000000
log_freq: 500
profile_freq: 500
eval_freq: 10000

rm_eval:
function: eval_reward_model
eval_dataset_kwargs:
class: IPLComparisonOfflineDataset
env: antmaze-medium-diverse-v2
batch_size: 32
mode: human
eval: true
rl_eval:
function: eval_offline
num_ep: 100
deterministic: true

schedulers:
actor:
class: CosineAnnealingLR
T_max: 1000000

processor: null
103 changes: 103 additions & 0 deletions scripts/configs/bt_iql/antmaze/earlystop.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
algorithm:
class: BTIQL
beta: 0.1
expectile: 0.9
max_exp_clip: 100.0
reward_reg: 0.0
rm_label: true

checkpoint: null
seed: 0
name: default
debug: false
device: null
wandb:
activate: false
entity: null
project: null

env: antmaze-medium-diverse-v2
env_kwargs:
env_wrapper:
env_wrapper_kwargs:

optim:
default:
class: Adam
lr: 0.0003

network:
reward:
class: EnsembleMLP
ensemble_size: 1
hidden_dims: [256, 256]
reward_act: identity
actor:
class: SquashedGaussianActor
hidden_dims: [256, 256]
reparameterize: false
conditioned_logstd: false
logstd_min: -5
logstd_max: 2
critic:
class: Critic
ensemble_size: 2
hidden_dims: [256, 256]
value:
class: Critic
ensemble_size: 1
hidden_dims: [256, 256]

rm_dataset:
- class: IPLComparisonOfflineDataset
env: antmaze-medium-diverse-v2
batch_size: 64
segment_length: null
mode: human
rm_dataloader:
num_workers: 2
batch_size: null

rl_dataset:
- class: D4RLOfflineDataset
env: antmaze-medium-diverse-v2
batch_size: 256
mode: transition
reward_normalize: true
rl_dataloader:
num_workers: 2
batch_size: null

trainer:
env_freq: null
rm_label: true
rm_steps: null # use early stop
rl_steps: 1000000
log_freq: 500
profile_freq: 500
eval_freq: 10000
# early stop
earlystop_tolerance: 5
earlystop_metric: val_acc
earlystop_mode: max
earlystop_start_step: 0

rm_eval:
function: eval_reward_model
eval_dataset_kwargs:
class: IPLComparisonOfflineDataset
env: antmaze-medium-diverse-v2
batch_size: 32
mode: human
eval: true
rl_eval:
function: eval_offline
num_ep: 100
deterministic: true

schedulers:
actor:
class: CosineAnnealingLR
T_max: 1000000

processor: null
134 changes: 134 additions & 0 deletions scripts/configs/hpl/discrete/antmaze.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
algorithm:
class: HindsightPreferenceLearning
expectile: 0.9
beta: 0.1
max_exp_clip: 100.0
discount: 0.99
tau: 0.005
seq_len: 100
future_len: 5 # [5, 10, 20]
z_dim: 128 # [128]
prior_sample: 20
vae_steps: 250000 # [250k, 200k]
reward_steps: 100000 # [20k]
kl_loss_coef: 0.1 # [0.5, 5.0]
kl_balance_coef: 0.5
reg_coef: 0.0
discrete: true
discrete_group: 8
stoc_encoding: true # for hopper-medium-expert, try true
rm_label: true

checkpoint: null
seed: 0
name: default
debug: false
device: null
wandb:
activate: false
entity: null
project: null

env: antmaze-medium-play-v2
env_kwargs:
env_wrapper:
env_wrapper_kwargs:

optim:
default:
class: Adam
lr: 0.0003

network:
encoder:
embed_dim: 128
num_layers: 3
num_heads: 4
dropout: 0.1
decoder:
embed_dim: 128
hidden_dims: [256, 256, 256] # shallower?
ortho_init: true
prior:
hidden_dims: [256, 256]
ortho_init: true
reward:
class: Critic
hidden_dims: [256, 256, 256] # tune
ortho_init: true
reward_act: sigmoid
actor:
class: SquashedGaussianActor
hidden_dims: [256, 256, 256]
reparameterize: false
conditioned_logstd: true
logstd_min: -7
logstd_max: 2
ortho_init: true
critic:
class: Critic
ensemble_size: 2
hidden_dims: [256, 256, 256]
ortho_init: true
value:
class: Critic
ensemble_size: 1
hidden_dims: [256, 256, 256]
ortho_init: true


rm_dataset:
- class: D4RLOfflineDataset
env: antmaze-medium-play-v2
batch_size: 64 # [64, 128]
mode: trajectory
segment_length: 100
padding_mode: none
- class: IPLComparisonOfflineDataset
env: antmaze-medium-play-v2
batch_size: 8
mode: human
- class: D4RLOfflineDataset
env: antmaze-medium-play-v2
batch_size: 512
mode: transition
rm_dataloader:
num_workers: 2
batch_size: null

rl_dataset:
- class: D4RLOfflineDataset
env: antmaze-medium-play-v2
batch_size: 512
mode: transition
rl_dataloader:
num_workers: 2
batch_size: null

trainer:
env_freq: null
rm_label: true
rm_steps: 350000
rl_steps: 1000000
log_freq: 500
profile_freq: 500
eval_freq: 10000 # don't do eval

rm_eval:
function: eval_reward_model
eval_dataset_kwargs:
class: IPLComparisonOfflineDataset
env: antmaze-medium-diverse-v2
batch_size: 32
mode: human
eval: true
rl_eval:
function: eval_offline
num_ep: 100
deterministic: true

schedulers:

processor: null

# finalized
Loading