Skip to content

Commit

Permalink
fixed typo in rainbow DQN paper reference (#569)
Browse files Browse the repository at this point in the history
* fixed typo in rainbow DQN paper ref

* fix gym==0.23 ci failure

Co-authored-by: Jiayi Weng <trinkle23897@gmail.com>
  • Loading branch information
abcamiletto and Trinkle23897 authored Mar 16, 2022
1 parent 39f8391 commit 2336a7d
Show file tree
Hide file tree
Showing 8 changed files with 15 additions and 14 deletions.
2 changes: 1 addition & 1 deletion LICENSE
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
MIT License

Copyright (c) 2020 Tianshou contributors
Copyright (c) 2022 Tianshou contributors

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
Expand Down
2 changes: 1 addition & 1 deletion docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ Welcome to Tianshou!
* :class:`~tianshou.policy.DQNPolicy` `Double DQN <https://arxiv.org/pdf/1509.06461.pdf>`_
* :class:`~tianshou.policy.DQNPolicy` `Dueling DQN <https://arxiv.org/pdf/1511.06581.pdf>`_
* :class:`~tianshou.policy.C51Policy` `Categorical DQN <https://arxiv.org/pdf/1707.06887.pdf>`_
* :class:`~tianshou.policy.RainbowPolicy` `Rainbow DQN <https://arxiv.org/pdf/1707.02298.pdf>`_
* :class:`~tianshou.policy.RainbowPolicy` `Rainbow DQN <https://arxiv.org/pdf/1710.02298.pdf>`_
* :class:`~tianshou.policy.QRDQNPolicy` `Quantile Regression DQN <https://arxiv.org/pdf/1710.10044.pdf>`_
* :class:`~tianshou.policy.IQNPolicy` `Implicit Quantile Network <https://arxiv.org/pdf/1806.06923.pdf>`_
* :class:`~tianshou.policy.FQFPolicy` `Fully-parameterized Quantile Function <https://arxiv.org/pdf/1911.02140.pdf>`_
Expand Down
9 changes: 5 additions & 4 deletions tianshou/data/collector.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,14 +64,15 @@ def __init__(
super().__init__()
if isinstance(env, gym.Env) and not hasattr(env, "__len__"):
warnings.warn("Single environment detected, wrap to DummyVectorEnv.")
env = DummyVectorEnv([lambda: env])
self.env = env
self.env_num = len(env)
self.env = DummyVectorEnv([lambda: env]) # type: ignore
else:
self.env = env # type: ignore
self.env_num = len(self.env)
self.exploration_noise = exploration_noise
self._assign_buffer(buffer)
self.policy = policy
self.preprocess_fn = preprocess_fn
self._action_space = env.action_space
self._action_space = self.env.action_space
# avoid creating attribute outside __init__
self.reset(False)

Expand Down
2 changes: 1 addition & 1 deletion tianshou/env/pettingzoo_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from pettingzoo.utils.wrappers import BaseWrapper


class PettingZooEnv(AECEnv, gym.Env, ABC):
class PettingZooEnv(AECEnv, ABC):
"""The interface for petting zoo environments.
Multi-agent environments must be wrapped as
Expand Down
3 changes: 2 additions & 1 deletion tianshou/env/venvs.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from tianshou.utils import RunningMeanStd


class BaseVectorEnv(gym.Env):
class BaseVectorEnv(object):
"""Base class for vectorized environments wrapper.
Usage:
Expand Down Expand Up @@ -196,6 +196,7 @@ def _assert_id(self, id: Union[List[int], np.ndarray]) -> None:
assert i in self.ready_id, \
f"Can only interact with ready environments {self.ready_id}."

# TODO: compatible issue with reset -> (obs, info)
def reset(
self, id: Optional[Union[int, List[int], np.ndarray]] = None
) -> np.ndarray:
Expand Down
4 changes: 2 additions & 2 deletions tianshou/env/worker/dummy.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,9 @@ def wait( # type: ignore

def send(self, action: Optional[np.ndarray]) -> None:
if action is None:
self.result = self.env.reset()
self.result = self.env.reset() # type: ignore
else:
self.result = self.env.step(action)
self.result = self.env.step(action) # type: ignore

def seed(self, seed: Optional[int] = None) -> List[int]:
super().seed(seed)
Expand Down
2 changes: 1 addition & 1 deletion tianshou/env/worker/subproc.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def _setup_buf(space: gym.Space) -> Union[dict, tuple, ShArray]:
assert isinstance(space.spaces, tuple)
return tuple([_setup_buf(t) for t in space.spaces])
else:
return ShArray(space.dtype, space.shape)
return ShArray(space.dtype, space.shape) # type: ignore


def _worker(
Expand Down
5 changes: 2 additions & 3 deletions tianshou/policy/modelfree/sac.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,9 +122,8 @@ def forward( # type: ignore
# You can check out the original SAC paper (arXiv 1801.01290): Eq 21.
# in appendix C to get some understanding of this equation.
if self.action_scaling and self.action_space is not None:
action_scale = to_torch_as(
(self.action_space.high - self.action_space.low) / 2.0, act
)
low, high = self.action_space.low, self.action_space.high # type: ignore
action_scale = to_torch_as((high - low) / 2.0, act)
else:
action_scale = 1.0 # type: ignore
squashed_action = torch.tanh(act)
Expand Down

0 comments on commit 2336a7d

Please sign in to comment.