Merge pull request thu-ml#302 from thu-ml/dev

v0.4.0
BFAnas · Mar 2, 2021 · 3c9f257 · 3c9f257
2 parents 68ddc0c + 09775bd
commit 3c9f257
Show file tree

Hide file tree

Showing 100 changed files with 4,044 additions and 4,166 deletions.
diff --git a/README.md b/README.md
@@ -158,13 +158,14 @@ Currently, the overall code of Tianshou platform is less than 2500 lines. Most o
 ```python
 result = collector.collect(n_step=n)
 ```
-
-If you have 3 environments in total and want to collect 1 episode in the first environment, 3 for the third environment:
+If you have 3 environments in total and want to collect 4 episodes:
 
 ```python
-result = collector.collect(n_episode=[1, 0, 3])
+result = collector.collect(n_episode=4)
 ```
 
+Collector will collect exactly 4 episodes without any bias of episode length despite we only have 3 parallel environments.
+
 If you want to train the given policy with a sampled batch:
 
 ```python
@@ -190,12 +191,13 @@ Define some hyper-parameters:
 ```python
 task = 'CartPole-v0'
 lr, epoch, batch_size = 1e-3, 10, 64
-train_num, test_num = 8, 100
+train_num, test_num = 10, 100
 gamma, n_step, target_freq = 0.9, 3, 320
 buffer_size = 20000
 eps_train, eps_test = 0.1, 0.05
-step_per_epoch, collect_per_step = 1000, 10
+step_per_epoch, step_per_collect = 10000, 10
 writer = SummaryWriter('log/dqn')  # tensorboard is also supported!
+logger = ts.utils.BasicLogger(writer)
 ```
 
 Make environments:
@@ -223,20 +225,20 @@ Setup policy and collectors:
 
 ```python
 policy = ts.policy.DQNPolicy(net, optim, gamma, n_step, target_update_freq=target_freq)
-train_collector = ts.data.Collector(policy, train_envs, ts.data.ReplayBuffer(buffer_size))
-test_collector = ts.data.Collector(policy, test_envs)
+train_collector = ts.data.Collector(policy, train_envs, ts.data.VectorReplayBuffer(buffer_size, train_num), exploration_noise=True)
+test_collector = ts.data.Collector(policy, test_envs, exploration_noise=True)  # because DQN uses epsilon-greedy method
 ```
 
 Let's train it:
 
 ```python
 result = ts.trainer.offpolicy_trainer(
-    policy, train_collector, test_collector, epoch, step_per_epoch, collect_per_step,
-    test_num, batch_size,
+    policy, train_collector, test_collector, epoch, step_per_epoch, step_per_collect,
+    test_num, batch_size, update_per_step=1 / step_per_collect,
     train_fn=lambda epoch, env_step: policy.set_eps(eps_train),
     test_fn=lambda epoch, env_step: policy.set_eps(eps_test),
     stop_fn=lambda mean_rewards: mean_rewards >= env.spec.reward_threshold,
-    writer=writer)
+    logger=logger)
 print(f'Finished training! Use {result["duration"]}')
 ```
 
@@ -252,7 +254,7 @@ Watch the performance with 35 FPS:
 ```python
 policy.eval()
 policy.set_eps(eps_test)
-collector = ts.data.Collector(policy, env)
+collector = ts.data.Collector(policy, env, exploration_noise=True)
 collector.collect(n_episode=1, render=1 / 35)
 ```
 

diff --git a/docs/api/tianshou.data.rst b/docs/api/tianshou.data.rst
@@ -1,7 +1,90 @@
 tianshou.data
 =============
 
-.. automodule:: tianshou.data
+
+Batch
+-----
+
+.. autoclass:: tianshou.data.Batch
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+
+Buffer
+------
+
+ReplayBuffer
+~~~~~~~~~~~~
+
+.. autoclass:: tianshou.data.ReplayBuffer
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+PrioritizedReplayBuffer
+~~~~~~~~~~~~~~~~~~~~~~~
+
+.. autoclass:: tianshou.data.PrioritizedReplayBuffer
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+ReplayBufferManager
+~~~~~~~~~~~~~~~~~~~
+
+.. autoclass:: tianshou.data.ReplayBufferManager
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+PrioritizedReplayBufferManager
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. autoclass:: tianshou.data.PrioritizedReplayBufferManager
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+VectorReplayBuffer
+~~~~~~~~~~~~~~~~~~
+
+.. autoclass:: tianshou.data.VectorReplayBuffer
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+PrioritizedVectorReplayBuffer
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. autoclass:: tianshou.data.PrioritizedVectorReplayBuffer
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+CachedReplayBuffer
+~~~~~~~~~~~~~~~~~~
+
+.. autoclass:: tianshou.data.CachedReplayBuffer
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+Collector
+---------
+
+Collector
+~~~~~~~~~
+
+.. autoclass:: tianshou.data.Collector
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+AsyncCollector
+~~~~~~~~~~~~~~
+
+.. autoclass:: tianshou.data.AsyncCollector
    :members:
    :undoc-members:
    :show-inheritance:
diff --git a/docs/api/tianshou.env.rst b/docs/api/tianshou.env.rst
@@ -1,12 +1,82 @@
 tianshou.env
 ============
 
-.. automodule:: tianshou.env
+
+VectorEnv
+---------
+
+BaseVectorEnv
+~~~~~~~~~~~~~
+
+.. autoclass:: tianshou.env.BaseVectorEnv
    :members:
    :undoc-members:
    :show-inheritance:
 
-.. automodule:: tianshou.env.worker
+DummyVectorEnv
+~~~~~~~~~~~~~~
+
+.. autoclass:: tianshou.env.DummyVectorEnv
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+SubprocVectorEnv
+~~~~~~~~~~~~~~~~
+
+.. autoclass:: tianshou.env.SubprocVectorEnv
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+ShmemVectorEnv
+~~~~~~~~~~~~~~
+
+.. autoclass:: tianshou.env.ShmemVectorEnv
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+RayVectorEnv
+~~~~~~~~~~~~
+
+.. autoclass:: tianshou.env.RayVectorEnv
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+
+Worker
+------
+
+EnvWorker
+~~~~~~~~~
+
+.. autoclass:: tianshou.env.worker.EnvWorker
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+DummyEnvWorker
+~~~~~~~~~~~~~~
+
+.. autoclass:: tianshou.env.worker.DummyEnvWorker
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+SubprocEnvWorker
+~~~~~~~~~~~~~~~~
+
+.. autoclass:: tianshou.env.worker.SubprocEnvWorker
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+RayEnvWorker
+~~~~~~~~~~~~
+
+.. autoclass:: tianshou.env.worker.RayEnvWorker
    :members:
    :undoc-members:
    :show-inheritance:
diff --git a/docs/api/tianshou.policy.rst b/docs/api/tianshou.policy.rst
@@ -1,7 +1,106 @@
 tianshou.policy
 ===============
 
-.. automodule:: tianshou.policy
+Base
+----
+
+.. autoclass:: tianshou.policy.BasePolicy
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+.. autoclass:: tianshou.policy.RandomPolicy
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+Model-free
+----------
+
+DQN Family
+~~~~~~~~~~
+
+.. autoclass:: tianshou.policy.DQNPolicy
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+.. autoclass:: tianshou.policy.C51Policy
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+.. autoclass:: tianshou.policy.QRDQNPolicy
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+On-policy
+~~~~~~~~~
+
+.. autoclass:: tianshou.policy.PGPolicy
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+.. autoclass:: tianshou.policy.A2CPolicy
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+.. autoclass:: tianshou.policy.PPOPolicy
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+Off-policy
+~~~~~~~~~~
+
+.. autoclass:: tianshou.policy.DDPGPolicy
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+.. autoclass:: tianshou.policy.TD3Policy
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+.. autoclass:: tianshou.policy.SACPolicy
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+.. autoclass:: tianshou.policy.DiscreteSACPolicy
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+Imitation
+---------
+
+.. autoclass:: tianshou.policy.ImitationPolicy
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+.. autoclass:: tianshou.policy.DiscreteBCQPolicy
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+Model-based
+-----------
+
+.. autoclass:: tianshou.policy.PSRLPolicy
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+Multi-agent
+-----------
+
+.. autoclass:: tianshou.policy.MultiAgentPolicyManager
    :members:
    :undoc-members:
    :show-inheritance:
diff --git a/docs/api/tianshou.utils.rst b/docs/api/tianshou.utils.rst
@@ -6,16 +6,29 @@ tianshou.utils
    :undoc-members:
    :show-inheritance:
 
+
+Pre-defined Networks
+--------------------
+
+Common
+~~~~~~
+
 .. automodule:: tianshou.utils.net.common
    :members:
    :undoc-members:
    :show-inheritance:
 
+Discrete
+~~~~~~~~
+
 .. automodule:: tianshou.utils.net.discrete
    :members:
    :undoc-members:
    :show-inheritance:
 
+Continuous
+~~~~~~~~~~
+
 .. automodule:: tianshou.utils.net.continuous
    :members:
    :undoc-members:

diff --git a/docs/conf.py b/docs/conf.py
@@ -70,6 +70,7 @@
         ]
     )
 }
+autodoc_member_order = "bysource"
 bibtex_bibfiles = ['refs.bib']
 
 # -- Options for HTML output -------------------------------------------------

diff --git a/docs/contributor.rst b/docs/contributor.rst
@@ -7,3 +7,4 @@ We always welcome contributions to help make Tianshou better. Below are an incom
 * Minghao Zhang (`Mehooz <https://github.com/Mehooz>`_)
 * Alexis Duburcq (`duburcqa <https://github.com/duburcqa>`_)
 * Kaichao You (`youkaichao <https://github.com/youkaichao>`_)
+* Huayu Chen (`ChenDRAG <https://github.com/ChenDRAG>`_)
-Original file line number
+Diff line change
@@ Expand Up / @@ -70,6 +70,7 @@ @@
             ]
         )
     }
+    autodoc_member_order = "bysource"
     bibtex_bibfiles = ['refs.bib']
     # -- Options for HTML output -------------------------------------------------
@@ Expand Down @@