Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Format all code using black #534

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@


# The name of the Pygments (syntax highlighting) style to use.
pygments_style = 'sphinx'
pygments_style = "sphinx"

# If true, `todo` and `todoList` produce output, else they produce nothing.
todo_include_todos = False
Expand Down
4 changes: 2 additions & 2 deletions examples/cim/rl/algorithms/ac.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,10 +58,10 @@ def get_ac(state_dim: int, name: str) -> ActorCriticTrainer:
name=name,
params=ActorCriticParams(
get_v_critic_net_func=lambda: MyCriticNet(state_dim),
reward_discount=.0,
reward_discount=0.0,
grad_iters=10,
critic_loss_cls=torch.nn.SmoothL1Loss,
min_logp=None,
lam=.0,
lam=0.0,
),
)
22 changes: 13 additions & 9 deletions examples/cim/rl/algorithms/dqn.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,14 +38,18 @@ def get_dqn_policy(state_dim: int, action_num: int, name: str) -> ValueBasedPoli
name=name,
q_net=MyQNet(state_dim, action_num),
exploration_strategy=(epsilon_greedy, {"epsilon": 0.4}),
exploration_scheduling_options=[(
"epsilon", MultiLinearExplorationScheduler, {
"splits": [(2, 0.32)],
"initial_value": 0.4,
"last_ep": 5,
"final_value": 0.0,
}
)],
exploration_scheduling_options=[
(
"epsilon",
MultiLinearExplorationScheduler,
{
"splits": [(2, 0.32)],
"initial_value": 0.4,
"last_ep": 5,
"final_value": 0.0,
},
)
],
warmup=100,
)

Expand All @@ -54,7 +58,7 @@ def get_dqn(name: str) -> DQNTrainer:
return DQNTrainer(
name=name,
params=DQNParams(
reward_discount=.0,
reward_discount=0.0,
update_target_every=5,
num_epochs=10,
soft_update_coef=0.1,
Expand Down
10 changes: 5 additions & 5 deletions examples/cim/rl/algorithms/maddpg.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,15 @@
"activation": torch.nn.Tanh,
"softmax": True,
"batch_norm": False,
"head": True
"head": True,
}
critic_net_conf = {
"hidden_dims": [256, 128, 64],
"output_dim": 1,
"activation": torch.nn.LeakyReLU,
"softmax": False,
"batch_norm": True,
"head": True
"head": True,
}
actor_learning_rate = 0.001
critic_learning_rate = 0.001
Expand Down Expand Up @@ -64,9 +64,9 @@ def get_maddpg(state_dim: int, action_dims: List[int], name: str) -> DiscreteMAD
return DiscreteMADDPGTrainer(
name=name,
params=DiscreteMADDPGParams(
reward_discount=.0,
reward_discount=0.0,
num_epoch=10,
get_q_critic_net_func=partial(get_multi_critic_net, state_dim, action_dims),
shared_critic=False
)
shared_critic=False,
),
)
4 changes: 2 additions & 2 deletions examples/cim/rl/algorithms/ppo.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,11 @@ def get_ppo(state_dim: int, name: str) -> PPOTrainer:
name=name,
params=PPOParams(
get_v_critic_net_func=lambda: MyCriticNet(state_dim),
reward_discount=.0,
reward_discount=0.0,
grad_iters=10,
critic_loss_cls=torch.nn.SmoothL1Loss,
min_logp=None,
lam=.0,
lam=0.0,
clip_ratio=0.1,
),
)
27 changes: 7 additions & 20 deletions examples/cim/rl/config.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,7 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

env_conf = {
"scenario": "cim",
"topology": "toy.4p_ssdd_l0.0",
"durations": 560
}
env_conf = {"scenario": "cim", "topology": "toy.4p_ssdd_l0.0", "durations": 560}
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

mark

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ignore this file in the command.


if env_conf["topology"].startswith("toy"):
num_agents = int(env_conf["topology"].split(".")[1][0])
Expand All @@ -15,29 +11,20 @@
port_attributes = ["empty", "full", "on_shipper", "on_consignee", "booking", "shortage", "fulfillment"]
vessel_attributes = ["empty", "full", "remaining_space"]

state_shaping_conf = {
"look_back": 7,
"max_ports_downstream": 2
}
state_shaping_conf = {"look_back": 7, "max_ports_downstream": 2}

action_shaping_conf = {
"action_space": [(i - 10) / 10 for i in range(21)],
"finite_vessel_space": True,
"has_early_discharge": True
"has_early_discharge": True,
}

reward_shaping_conf = {
"time_window": 99,
"fulfillment_factor": 1.0,
"shortage_factor": 1.0,
"time_decay": 0.97
}
reward_shaping_conf = {"time_window": 99, "fulfillment_factor": 1.0, "shortage_factor": 1.0, "time_decay": 0.97}

# obtain state dimension from a temporary env_wrapper instance
state_dim = (
(state_shaping_conf["look_back"] + 1) * (state_shaping_conf["max_ports_downstream"] + 1) * len(port_attributes)
+ len(vessel_attributes)
)
state_dim = (state_shaping_conf["look_back"] + 1) * (state_shaping_conf["max_ports_downstream"] + 1) * len(
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

mark

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add related descriptions into wiki.

port_attributes
) + len(vessel_attributes)

action_num = len(action_shaping_conf["action_space"])

Expand Down
31 changes: 21 additions & 10 deletions examples/cim/rl/env_sampler.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,28 +9,37 @@
from maro.simulator.scenarios.cim.common import Action, ActionType, DecisionEvent

from .config import (
action_shaping_conf, port_attributes, reward_shaping_conf, state_shaping_conf,
action_shaping_conf,
port_attributes,
reward_shaping_conf,
state_shaping_conf,
vessel_attributes,
)


class CIMEnvSampler(AbsEnvSampler):
def _get_global_and_agent_state_impl(
self, event: DecisionEvent, tick: int = None,
self,
event: DecisionEvent,
tick: int = None,
) -> Tuple[Union[None, np.ndarray, List[object]], Dict[Any, Union[np.ndarray, List[object]]]]:
tick = self._env.tick
vessel_snapshots, port_snapshots = self._env.snapshot_list["vessels"], self._env.snapshot_list["ports"]
port_idx, vessel_idx = event.port_idx, event.vessel_idx
ticks = [max(0, tick - rt) for rt in range(state_shaping_conf["look_back"] - 1)]
future_port_list = vessel_snapshots[tick: vessel_idx: 'future_stop_list'].astype('int')
state = np.concatenate([
port_snapshots[ticks: [port_idx] + list(future_port_list): port_attributes],
vessel_snapshots[tick: vessel_idx: vessel_attributes]
])
future_port_list = vessel_snapshots[tick:vessel_idx:"future_stop_list"].astype("int")
state = np.concatenate(
[
port_snapshots[ticks : [port_idx] + list(future_port_list) : port_attributes],
vessel_snapshots[tick:vessel_idx:vessel_attributes],
]
)
return state, {port_idx: state}

def _translate_to_env_action(
self, action_dict: Dict[Any, Union[np.ndarray, List[object]]], event: DecisionEvent,
self,
action_dict: Dict[Any, Union[np.ndarray, List[object]]],
event: DecisionEvent,
) -> Dict[Any, object]:
action_space = action_shaping_conf["action_space"]
finite_vsl_space = action_shaping_conf["finite_vessel_space"]
Expand All @@ -40,7 +49,7 @@ def _translate_to_env_action(

vsl_idx, action_scope = event.vessel_idx, event.action_scope
vsl_snapshots = self._env.snapshot_list["vessels"]
vsl_space = vsl_snapshots[self._env.tick:vsl_idx:vessel_attributes][2] if finite_vsl_space else float("inf")
vsl_space = vsl_snapshots[self._env.tick : vsl_idx : vessel_attributes][2] if finite_vsl_space else float("inf")

percent = abs(action_space[model_action[0]])
zero_action_idx = len(action_space) / 2 # index corresponding to value zero.
Expand All @@ -49,7 +58,9 @@ def _translate_to_env_action(
actual_action = min(round(percent * action_scope.load), vsl_space)
elif model_action > zero_action_idx:
action_type = ActionType.DISCHARGE
early_discharge = vsl_snapshots[self._env.tick:vsl_idx:"early_discharge"][0] if has_early_discharge else 0
early_discharge = (
vsl_snapshots[self._env.tick : vsl_idx : "early_discharge"][0] if has_early_discharge else 0
)
lihuoran marked this conversation as resolved.
Show resolved Hide resolved
plan_action = percent * (action_scope.discharge + early_discharge) - early_discharge
actual_action = round(plan_action) if plan_action > 0 else round(percent * action_scope.discharge)
else:
Expand Down
16 changes: 5 additions & 11 deletions examples/cim/rl/rl_component_bundle.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def get_env_sampler(self) -> AbsEnvSampler:
return CIMEnvSampler(self.env, self.test_env, reward_eval_delay=reward_shaping_conf["time_window"])

def get_agent2policy(self) -> Dict[Any, str]:
return {agent: f"{algorithm}_{agent}.policy"for agent in self.env.agent_idx_list}
return {agent: f"{algorithm}_{agent}.policy" for agent in self.env.agent_idx_list}

def get_policy_creator(self) -> Dict[str, Callable[[], AbsPolicy]]:
if algorithm == "ac":
Expand Down Expand Up @@ -60,23 +60,17 @@ def get_policy_creator(self) -> Dict[str, Callable[[], AbsPolicy]]:
def get_trainer_creator(self) -> Dict[str, Callable[[], AbsTrainer]]:
if algorithm == "ac":
trainer_creator = {
f"{algorithm}_{i}": partial(get_ac, state_dim, f"{algorithm}_{i}")
for i in range(num_agents)
f"{algorithm}_{i}": partial(get_ac, state_dim, f"{algorithm}_{i}") for i in range(num_agents)
}
elif algorithm == "ppo":
trainer_creator = {
f"{algorithm}_{i}": partial(get_ppo, state_dim, f"{algorithm}_{i}")
for i in range(num_agents)
f"{algorithm}_{i}": partial(get_ppo, state_dim, f"{algorithm}_{i}") for i in range(num_agents)
}
elif algorithm == "dqn":
trainer_creator = {
f"{algorithm}_{i}": partial(get_dqn, f"{algorithm}_{i}")
for i in range(num_agents)
}
trainer_creator = {f"{algorithm}_{i}": partial(get_dqn, f"{algorithm}_{i}") for i in range(num_agents)}
elif algorithm == "discrete_maddpg":
trainer_creator = {
f"{algorithm}_{i}": partial(get_maddpg, state_dim, [1], f"{algorithm}_{i}")
for i in range(num_agents)
f"{algorithm}_{i}": partial(get_maddpg, state_dim, [1], f"{algorithm}_{i}") for i in range(num_agents)
}
else:
raise ValueError(f"Unsupported algorithm: {algorithm}")
Expand Down
9 changes: 7 additions & 2 deletions examples/citi_bike/greedy/launcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,13 @@ def choose_action(self, decision_event: DecisionEvent):


if __name__ == "__main__":
env = Env(scenario=config.env.scenario, topology=config.env.topology, start_tick=config.env.start_tick,
durations=config.env.durations, snapshot_resolution=config.env.resolution)
env = Env(
scenario=config.env.scenario,
topology=config.env.topology,
start_tick=config.env.start_tick,
durations=config.env.durations,
snapshot_resolution=config.env.resolution,
)

if config.env.seed is not None:
env.set_seed(config.env.seed)
Expand Down
Loading