Skip to content

Commit

Permalink
feature(xcy): add Metadrive Env and its SEZ configs (#192)
Browse files Browse the repository at this point in the history
* feature(xcy): add meta drive env

* polish(xcy):change config

* feature(xcy):change metadrive version

* polish(xcy):polish the config and comments

---------

Co-authored-by: 蒲源 <48008469+puyuan1996@users.noreply.github.com>
  • Loading branch information
HarryXuancy and puyuan1996 authored Aug 2, 2024
1 parent f062baa commit 4c68ac8
Show file tree
Hide file tree
Showing 7 changed files with 723 additions and 3 deletions.
2 changes: 1 addition & 1 deletion requirements-build.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
cython>=0.29
build>=0.7.0
auditwheel>=4
numpy>=1.22.4
numpy>=1.21.6
2 changes: 1 addition & 1 deletion requirements-doc.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ plantumlcli>=0.0.4
packaging
sphinx-multiversion~=0.2.4
where~=1.0.2
numpy>=1.22.4,<2
numpy>=1.19,<2
easydict>=1.7,<2
scikit-learn>=0.24.2
nbsphinx>=0.8.8
Expand Down
2 changes: 1 addition & 1 deletion requirements-test.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,6 @@ pytest-benchmark~=3.4.0
testtools>=2
hbutils>=0.6.13
setuptools<=59.5.0
numpy>=1.22.4
numpy>=1.21.6
easydict>=1.7,<2
swig >= 4.1.1
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
DI-engine>=0.4.7
gymnasium[atari]
moviepy
numpy>=1.22.4
pympler
minigrid
Expand Down
103 changes: 103 additions & 0 deletions zoo/metadrive/config/metadrive_sampled_efficientzero_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
from easydict import EasyDict

# ==============================================================
# begin of the most frequently changed config specified by the user
# ==============================================================
continuous_action_space = True
K = 20
collector_env_num = 8
n_episode = 8
evaluator_env_num = 3
num_simulations = 50
update_per_collect = 200
batch_size = 64
max_env_step = int(1e6)
reanalyze_ratio = 0.
# ==============================================================
# end of the most frequently changed config specified by the user
# ==============================================================

metadrive_sampled_efficientzero_config = dict(
exp_name=
f'data_sez_ctree/sez_metadrive_old{K}_ns{num_simulations}_upc{update_per_collect}_rr{reanalyze_ratio}_seed0',
env=dict(
env_name='MetaDrive',
continuous=True,
obs_shape = [5, 84, 84],
manually_discretization=False,
collector_env_num=collector_env_num,
evaluator_env_num=evaluator_env_num,
n_evaluator_episode=evaluator_env_num,
manager=dict(shared_memory=False, ),
metadrive=dict(
use_render=False,
traffic_density=0.20, # Density of vehicles occupying the roads, range in [0,1]
map='XSOS', # Int or string: an easy way to fill map_config
horizon=4000, # Max step number
driving_reward=1.0, # Reward to encourage agent to move forward.
speed_reward=0.1, # Reward to encourage agent to drive at a high speed
use_lateral_reward=False, # reward for lane keeping
out_of_road_penalty=40.0, # Penalty to discourage driving out of road
crash_vehicle_penalty=40.0, # Penalty to discourage collision
decision_repeat=10, # Reciprocal of decision frequency
out_of_route_done=True, # Game over if driving out of road
),

),
policy=dict(
model=dict(
observation_shape=[5, 84, 84],
action_space_size=2,
continuous_action_space=continuous_action_space,
num_of_sampled_actions=K,
sigma_type='conditioned',
model_type='conv', # options={'mlp', 'conv'}
lstm_hidden_size=128,
latent_state_dim=128,
downsample = True,
image_channel=5,
),
cuda=True,
env_type='not_board_games',
game_segment_length=50,
update_per_collect=update_per_collect,
batch_size=batch_size,
optim_type='Adam',
lr_piecewise_constant_decay=False,
learning_rate=0.003,
# NOTE: for continuous gaussian policy, we use the policy_entropy_loss as in the original Sampled MuZero paper.
policy_entropy_loss_weight=5e-3,
num_simulations=num_simulations,
reanalyze_ratio=reanalyze_ratio,
n_episode=n_episode,
eval_freq=int(2000),
replay_buffer_size=int(1e6), # the size/capacity of replay_buffer, in the terms of transitions.
collector_env_num=collector_env_num,
evaluator_env_num=evaluator_env_num,
),
)
metadrive_sampled_efficientzero_config = EasyDict(metadrive_sampled_efficientzero_config)
main_config = metadrive_sampled_efficientzero_config

metadrive_sampled_efficientzero_create_config = dict(
env=dict(
type='metadrive_lightzero',
import_names=['zoo.metadrive.env.metadrive_env'],
),
env_manager=dict(type='subprocess'),
policy=dict(
type='sampled_efficientzero',
import_names=['lzero.policy.sampled_efficientzero'],
),
collector=dict(
type='episode_muzero',
get_train_sample=True,
import_names=['lzero.worker.muzero_collector'],
)
)
metadrive_sampled_efficientzero_create_config = EasyDict(metadrive_sampled_efficientzero_create_config)
create_config = metadrive_sampled_efficientzero_create_config
if __name__ == "__main__":

from lzero.entry import train_muzero
train_muzero([main_config, create_config], seed=0, max_env_step=max_env_step)
Loading

0 comments on commit 4c68ac8

Please sign in to comment.