From 87f752287b617d1e48d2068c52d5dcd5621aa0d1 Mon Sep 17 00:00:00 2001 From: Trinkle23897 Date: Sat, 24 Apr 2021 17:42:58 +0800 Subject: [PATCH 1/8] fix np.stack ValueError --- tianshou/env/venvs.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/tianshou/env/venvs.py b/tianshou/env/venvs.py index b2fc73b33..8741c5a4b 100644 --- a/tianshou/env/venvs.py +++ b/tianshou/env/venvs.py @@ -163,7 +163,11 @@ def reset( id = self._wrap_id(id) if self.is_async: self._assert_id(id) - obs = np.stack([self.workers[i].reset() for i in id]) + obs = [self.workers[i].reset() for i in id] + try: + obs = np.stack(obs) + except ValueError: # different len(obs) + obs = np.array(obs, dtype=object) if self.obs_rms and self.update_obs_rms: self.obs_rms.update(obs) return self.normalize_obs(obs) @@ -236,7 +240,13 @@ def step( info["env_id"] = env_id result.append((obs, rew, done, info)) self.ready_id.append(env_id) - obs_stack, rew_stack, done_stack, info_stack = map(np.stack, zip(*result)) + obs_stack, rew_stack, done_stack, info_stack = zip(*result) + try: + obs_stack = np.stack(obs_stack) + except ValueError: # different len(obs) + obs_stack = np.array(obs_stack, dtype=object) + rew_stack, done_stack, info_stack = map( + np.stack, [rew_stack, done_stack, info_stack]) if self.obs_rms and self.update_obs_rms: self.obs_rms.update(obs_stack) return [self.normalize_obs(obs_stack), rew_stack, done_stack, info_stack] From f5ad3b29fac5cf897bcdb7be4a5c47151d4920e4 Mon Sep 17 00:00:00 2001 From: Trinkle23897 Date: Sat, 24 Apr 2021 19:42:27 +0800 Subject: [PATCH 2/8] add test --- setup.py | 1 + test/base/env.py | 29 +++++++++++++++++++++++++++++ test/base/test_env.py | 15 +++++++++++++-- tianshou/env/venvs.py | 20 ++++++++++---------- 4 files changed, 53 insertions(+), 12 deletions(-) diff --git a/setup.py b/setup.py index 24220c2bd..04149ea1e 100644 --- a/setup.py +++ b/setup.py @@ -62,6 +62,7 @@ def get_version() -> str: "pytest", "pytest-cov", "ray>=1.0.0", + "networkx", "mypy", "pydocstyle", "doc8", diff --git a/test/base/env.py b/test/base/env.py index 68acdd714..1151f5b76 100644 --- a/test/base/env.py +++ b/test/base/env.py @@ -2,6 +2,8 @@ import time import random import numpy as np +import networkx as nx +from copy import deepcopy from gym.spaces import Discrete, MultiDiscrete, Box, Dict, Tuple @@ -107,3 +109,30 @@ def step(self, action): self.done = self.index == self.size return self._get_state(), self._get_reward(), \ self.done, {'key': 1, 'env': self} + + +class NXEnv(gym.Env): + def __init__(self, size, obs_type, feat_dim=32): + self.size = size + self.feat_dim = feat_dim + self.graph = nx.Graph() + self.graph.add_nodes_from(list(range(size))) + assert obs_type in ["array", "object"] + self.obs_type = obs_type + + def _encode_obs(self): + if self.obs_type == "array": + return np.stack([v["data"] for v in self.graph._node.values()]) + return deepcopy(self.graph) + + def reset(self): + graph_state = np.random.rand(self.size, self.feat_dim) + for i in range(self.size): + self.graph.nodes[i]["data"] = graph_state[i] + return self._encode_obs() + + def step(self, action): + next_graph_state = np.random.rand(self.size, self.feat_dim) + for i in range(self.size): + self.graph.nodes[i]["data"] = next_graph_state[i] + return self._encode_obs(), 1.0, 0, {} diff --git a/test/base/test_env.py b/test/base/test_env.py index 12ef60cba..a7ddd353e 100644 --- a/test/base/test_env.py +++ b/test/base/test_env.py @@ -6,9 +6,9 @@ ShmemVectorEnv, RayVectorEnv if __name__ == '__main__': - from env import MyTestEnv + from env import MyTestEnv, NXEnv else: # pytest - from test.base.env import MyTestEnv + from test.base.env import MyTestEnv, NXEnv def has_ray(): @@ -167,7 +167,18 @@ def test_vecenv(size=10, num=8, sleep=0.001): v.close() +def test_env_obs(): + for obs_type in ["array", "object"]: + envs = SubprocVectorEnv([ + lambda i=x: NXEnv(i, obs_type) for x in [5, 10, 15, 20]]) + obs = envs.reset() + assert obs.dtype == object + obs = envs.step([1, 1, 1, 1])[0] + assert obs.dtype == object + + if __name__ == '__main__': + test_env_obs() test_vecenv() test_async_env() test_async_check_id() diff --git a/tianshou/env/venvs.py b/tianshou/env/venvs.py index 8741c5a4b..591c4251e 100644 --- a/tianshou/env/venvs.py +++ b/tianshou/env/venvs.py @@ -1,6 +1,6 @@ import gym import numpy as np -from typing import Any, List, Union, Optional, Callable +from typing import Any, List, Tuple, Union, Optional, Callable from tianshou.utils import RunningMeanStd from tianshou.env.worker import EnvWorker, DummyEnvWorker, SubprocEnvWorker, \ @@ -163,11 +163,11 @@ def reset( id = self._wrap_id(id) if self.is_async: self._assert_id(id) - obs = [self.workers[i].reset() for i in id] + obs_list = [self.workers[i].reset() for i in id] try: - obs = np.stack(obs) + obs = np.stack(obs_list) except ValueError: # different len(obs) - obs = np.array(obs, dtype=object) + obs = np.array(obs_list, dtype=object) if self.obs_rms and self.update_obs_rms: self.obs_rms.update(obs) return self.normalize_obs(obs) @@ -176,7 +176,7 @@ def step( self, action: np.ndarray, id: Optional[Union[int, List[int], np.ndarray]] = None - ) -> List[np.ndarray]: + ) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: """Run one timestep of some environments' dynamics. If id is None, run one timestep of all the environments’ dynamics; @@ -240,16 +240,16 @@ def step( info["env_id"] = env_id result.append((obs, rew, done, info)) self.ready_id.append(env_id) - obs_stack, rew_stack, done_stack, info_stack = zip(*result) + obs_list, rew_list, done_list, info_list = zip(*result) try: - obs_stack = np.stack(obs_stack) + obs_stack = np.stack(obs_list) except ValueError: # different len(obs) - obs_stack = np.array(obs_stack, dtype=object) + obs_stack = np.array(obs_list, dtype=object) rew_stack, done_stack, info_stack = map( - np.stack, [rew_stack, done_stack, info_stack]) + np.stack, [rew_list, done_list, info_list]) if self.obs_rms and self.update_obs_rms: self.obs_rms.update(obs_stack) - return [self.normalize_obs(obs_stack), rew_stack, done_stack, info_stack] + return self.normalize_obs(obs_stack), rew_stack, done_stack, info_stack def seed( self, seed: Optional[Union[int, List[int]]] = None From 0df13eeadc53b4b5afd5fafd1583b4df852d5422 Mon Sep 17 00:00:00 2001 From: Trinkle23897 Date: Sat, 24 Apr 2021 20:13:02 +0800 Subject: [PATCH 3/8] fix batch --- test/base/test_batch.py | 12 ++++++++++-- test/base/test_collector.py | 13 +++++++++++-- tianshou/data/batch.py | 4 +++- 3 files changed, 24 insertions(+), 5 deletions(-) diff --git a/test/base/test_batch.py b/test/base/test_batch.py index bc91025c1..15357f16c 100644 --- a/test/base/test_batch.py +++ b/test/base/test_batch.py @@ -4,6 +4,7 @@ import pickle import pytest import numpy as np +import networkx as nx from itertools import starmap from tianshou.data import Batch, to_torch, to_numpy @@ -36,8 +37,7 @@ def test_batch(): assert 'a' not in b with pytest.raises(AssertionError): Batch({1: 2}) - with pytest.raises(TypeError): - Batch(a=[np.zeros((2, 3)), np.zeros((3, 3))]) + assert Batch(a=[np.zeros((2, 3)), np.zeros((3, 3))]).a.dtype == object with pytest.raises(TypeError): Batch(a=[np.zeros((3, 2)), np.zeros((3, 3))]) with pytest.raises(TypeError): @@ -170,6 +170,14 @@ def test_batch(): assert a.a[0] is None and a.a[1] is None assert a.b[0] is None and a.b[1] is None + # nx.Graph corner case + assert Batch(a=np.array([nx.Graph(), nx.Graph()], dtype=object)).a.dtype == object + g1 = nx.Graph() + g1.add_nodes_from(list(range(10))) + g2 = nx.Graph() + g2.add_nodes_from(list(range(20))) + assert Batch(a=np.array([g1, g2])).a.dtype == object + def test_batch_over_batch(): batch = Batch(a=[3, 4, 5], b=[4, 5, 6]) diff --git a/test/base/test_collector.py b/test/base/test_collector.py index b9d789193..b275526f7 100644 --- a/test/base/test_collector.py +++ b/test/base/test_collector.py @@ -14,9 +14,9 @@ ) if __name__ == '__main__': - from env import MyTestEnv + from env import MyTestEnv, NXEnv else: # pytest - from test.base.env import MyTestEnv + from test.base.env import MyTestEnv, NXEnv class MyPolicy(BasePolicy): @@ -137,6 +137,15 @@ def test_collector(): with pytest.raises(TypeError): c2.collect() + # test NXEnv + for obs_type in ["array", "object"]: + envs = SubprocVectorEnv([ + lambda i=x: NXEnv(i, obs_type) for x in [5, 10, 15, 20]]) + c3 = Collector(policy, envs, + VectorReplayBuffer(total_size=100, buffer_num=4)) + c3.collect(n_step=6) + assert c3.buffer.obs.dtype == object + def test_collector_with_async(): env_lens = [2, 3, 4, 5] diff --git a/tianshou/data/batch.py b/tianshou/data/batch.py index ae907f00f..55a00dacd 100644 --- a/tianshou/data/batch.py +++ b/tianshou/data/batch.py @@ -65,7 +65,9 @@ def _to_array_with_correct_type(v: Any) -> np.ndarray: # array([{}, array({}, dtype=object)], dtype=object) if not v.shape: v = v.item(0) - elif any(isinstance(e, (np.ndarray, torch.Tensor)) for e in v.reshape(-1)): + elif all(isinstance(e, np.ndarray) for e in v.reshape(-1)): + return v # various length, np.array([[1], [2, 3], [4, 5, 6]]) + elif any(isinstance(e, torch.Tensor) for e in v.reshape(-1)): raise ValueError("Numpy arrays of tensors are not supported yet.") return v From 009daadfe7fe18b68426202511de76f90aa76e47 Mon Sep 17 00:00:00 2001 From: Trinkle23897 Date: Sat, 24 Apr 2021 20:36:21 +0800 Subject: [PATCH 4/8] remove BaseVectorEnv.__del__ and see what happens in extra sys test --- .github/workflows/extra_sys.yml | 2 +- test/base/test_env.py | 7 +++++-- tianshou/env/venvs.py | 5 ----- 3 files changed, 6 insertions(+), 8 deletions(-) diff --git a/.github/workflows/extra_sys.yml b/.github/workflows/extra_sys.yml index 4716cca23..9fcb226e5 100644 --- a/.github/workflows/extra_sys.yml +++ b/.github/workflows/extra_sys.yml @@ -24,4 +24,4 @@ jobs: python -m pip install ".[dev]" --upgrade - name: Test with pytest run: | - pytest test/base test/continuous --ignore-glob "*env.py" --cov=tianshou --durations=0 -v + pytest test/base test/continuous --cov=tianshou --durations=0 -v diff --git a/test/base/test_env.py b/test/base/test_env.py index a7ddd353e..ef9474edb 100644 --- a/test/base/test_env.py +++ b/test/base/test_env.py @@ -1,3 +1,4 @@ +import sys import time import numpy as np from gym.spaces.discrete import Discrete @@ -79,7 +80,8 @@ def test_async_env(size=10000, num=8, sleep=0.1): Batch.cat(o) v.close() # assure 1/7 improvement - assert spent_time < 6.0 * sleep * num / (num + 1) + if sys.platform != "darwin": # macOS cannot pass this check + assert spent_time < 6.0 * sleep * num / (num + 1) def test_async_check_id(size=100, num=4, sleep=.2, timeout=.7): @@ -116,7 +118,8 @@ def test_async_check_id(size=100, num=4, sleep=.2, timeout=.7): pass_check = 0 break total_pass += pass_check - assert total_pass >= 2 + if sys.platform != "darwin": # macOS cannot pass this check + assert total_pass >= 2 def test_vecenv(size=10, num=8, sleep=0.001): diff --git a/tianshou/env/venvs.py b/tianshou/env/venvs.py index 591c4251e..7ac76017f 100644 --- a/tianshou/env/venvs.py +++ b/tianshou/env/venvs.py @@ -302,11 +302,6 @@ def normalize_obs(self, obs: np.ndarray) -> np.ndarray: obs = np.clip(obs, -clip_max, clip_max) # type: ignore return obs - def __del__(self) -> None: - """Redirect to self.close().""" - if not self.is_closed: - self.close() - class DummyVectorEnv(BaseVectorEnv): """Dummy vectorized environment wrapper, implemented in for-loop. From cb3772c2223b1b085f5c5e2eddc91186e3eaa0de Mon Sep 17 00:00:00 2001 From: Trinkle23897 Date: Sat, 24 Apr 2021 20:52:55 +0800 Subject: [PATCH 5/8] drop 3.6 --- .github/workflows/extra_sys.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/extra_sys.yml b/.github/workflows/extra_sys.yml index 9fcb226e5..124ec40d4 100644 --- a/.github/workflows/extra_sys.yml +++ b/.github/workflows/extra_sys.yml @@ -9,7 +9,7 @@ jobs: strategy: matrix: os: [macos-latest, windows-latest] - python-version: [3.6, 3.7, 3.8] + python-version: [3.7, 3.8] steps: - uses: actions/checkout@v2 - name: Set up Python ${{ matrix.python-version }} From 23d74bc433251e272a6242deb63898a5b92d5f53 Mon Sep 17 00:00:00 2001 From: Trinkle23897 Date: Sat, 24 Apr 2021 21:02:26 +0800 Subject: [PATCH 6/8] fix chart error --- docs/_static/js/benchmark.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/_static/js/benchmark.js b/docs/_static/js/benchmark.js index d29810164..8b7b58043 100644 --- a/docs/_static/js/benchmark.js +++ b/docs/_static/js/benchmark.js @@ -14,7 +14,7 @@ function showEnv(elem) { var dataSource = { $schema: "https://vega.github.io/schema/vega-lite/v5.json", data: { - url: "/_static/js/mujoco/benchmark/" + selectEnv + "/result.json" + url: "/en/master/_static/js/mujoco/benchmark/" + selectEnv + "/result.json" }, mark: "line", height: 400, From 68ae2167b63663f0888770349a1a50bd6a3047d6 Mon Sep 17 00:00:00 2001 From: Trinkle23897 Date: Sat, 24 Apr 2021 21:50:00 +0800 Subject: [PATCH 7/8] fix test --- tianshou/policy/modelfree/trpo.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tianshou/policy/modelfree/trpo.py b/tianshou/policy/modelfree/trpo.py index 9d456878c..bf1e723e6 100644 --- a/tianshou/policy/modelfree/trpo.py +++ b/tianshou/policy/modelfree/trpo.py @@ -59,7 +59,6 @@ def __init__( **kwargs: Any, ) -> None: super().__init__(actor, critic, optim, dist_fn, **kwargs) - del self._step_size self._max_backtracks = max_backtracks self._delta = max_kl self._backtrack_coeff = backtrack_coeff From 2c1cc1d9717725042b3a4efa910a8b9944ba4dc1 Mon Sep 17 00:00:00 2001 From: Trinkle23897 Date: Sat, 24 Apr 2021 22:00:34 +0800 Subject: [PATCH 8/8] fix test --- tianshou/policy/modelfree/trpo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tianshou/policy/modelfree/trpo.py b/tianshou/policy/modelfree/trpo.py index bf1e723e6..78313e637 100644 --- a/tianshou/policy/modelfree/trpo.py +++ b/tianshou/policy/modelfree/trpo.py @@ -122,7 +122,7 @@ def learn( # type: ignore " are poor and need to be changed.") # optimize citirc - for _ in range(self._optim_critic_iters): + for _ in range(self._optim_critic_iters): # type: ignore value = self.critic(b.obs).flatten() vf_loss = F.mse_loss(b.returns, value) self.optim.zero_grad()