Skip to content

Commit

Permalink
fix typo for clear buffer
Browse files Browse the repository at this point in the history
  • Loading branch information
ruoyuGao committed Mar 30, 2023
1 parent d7d16ac commit 2d741e8
Show file tree
Hide file tree
Showing 6 changed files with 10 additions and 14 deletions.
4 changes: 2 additions & 2 deletions ding/reward_model/gail_irl_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ class GailRewardModel(BaseRewardModel):
8 | ``hidden_size`` int 128 | Linear model hidden size |
9 | ``collect_count`` int 100000 | Expert dataset size | One entry is a (s,a)
| | | tuple
10 | ``clear_buffer_`` int 1 | clear buffer per fix iters | make sure replay
10 | ``clear_buffer_`` int 1 | clear buffer per fixed iters | make sure replay
| ``per_iters`` | buffer's data count
| | isn't too few.
| | (code work in entry)
Expand All @@ -155,7 +155,7 @@ class GailRewardModel(BaseRewardModel):
hidden_size=128,
# (int) Expert dataset size.
collect_count=100000,
# (int) Clear buffer per fix iters.
# (int) Clear buffer per fixed iters.
clear_buffer_per_iters=1,
)

Expand Down
4 changes: 2 additions & 2 deletions ding/reward_model/icm_reward_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ class ICMRewardModel(BaseRewardModel):
``reward_norm`` | extrinsic reward
12 | ``extrinsic_`` int 1 | the upper bound of the reward
``reward_norm_max`` | normalization
13 | ``clear_buffer`` int 1 | clear buffer per fix iters | make sure replay
13 | ``clear_buffer`` int 1 | clear buffer per fixed iters | make sure replay
``_per_iters`` | buffer's data count
| isn't too few.
| (code work in entry)
Expand Down Expand Up @@ -192,7 +192,7 @@ class ICMRewardModel(BaseRewardModel):
extrinsic_reward_norm=True,
# (int) The upper bound of the reward normalization.
extrinsic_reward_norm_max=1,
# (int) Clear buffer per fix iters.
# (int) Clear buffer per fixed iters.
clear_buffer_per_iters=100,
)

Expand Down
4 changes: 2 additions & 2 deletions ding/reward_model/pdeil_irl_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ class PdeilRewardModel(BaseRewardModel):
| ``action`` | |
4 | ``alpha`` float 0.5 | coefficient for Probability |
| | Density Estimator |
5 | ``clear_buffer`` int 1 | clear buffer per fix iters | make sure replay
5 | ``clear_buffer`` int 1 | clear buffer per fixed iters | make sure replay
``_per_iters`` | buffer's data count
| isn't too few.
| (code work in entry)
Expand All @@ -50,7 +50,7 @@ class PdeilRewardModel(BaseRewardModel):
# when alpha is close to 0, the estimator has high variance and low bias;
# when alpha is close to 1, the estimator has high bias and low variance.
alpha=0.5,
# (int) Clear buffer per fix iters.
# (int) Clear buffer per fixed iters.
clear_buffer_per_iters=1,
)

Expand Down
4 changes: 2 additions & 2 deletions ding/reward_model/pwil_irl_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ class PwilRewardModel(BaseRewardModel):
5 | ``beta`` int 5 | factor beta |
6 | ``s_size`` int 4 | state size |
7 | ``a_size`` int 2 | action size |
8 | ``clear_buffer`` int 1 | clear buffer per fix iters | make sure replay
8 | ``clear_buffer`` int 1 | clear buffer per fixed iters | make sure replay
``_per_iters`` | buffer's data count
| isn't too few.
| (code work in entry)
Expand All @@ -78,7 +78,7 @@ class PwilRewardModel(BaseRewardModel):
# s_size=4,
# (int) Action size.
# a_size=2,
# (int) Clear buffer per fix iters.
# (int) Clear buffer per fixed iters.
clear_buffer_per_iters=1,
)

Expand Down
4 changes: 2 additions & 2 deletions ding/reward_model/red_irl_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ class RedRewardModel(BaseRewardModel):
6 | ``hidden_size`` int 128 | Linear model hidden size |
7 | ``update_per_`` int 100 | Number of updates per collect |
| ``collect`` | |
8 | ``clear_buffer`` int 1 | clear buffer per fix iters | make sure replay
8 | ``clear_buffer`` int 1 | clear buffer per fixed iters | make sure replay
``_per_iters`` | buffer's data count
| isn't too few.
| (code work in entry)
Expand Down Expand Up @@ -80,7 +80,7 @@ class RedRewardModel(BaseRewardModel):
# (float) Hyperparameter at estimated score of r(s,a).
# r(s,a) = exp(-sigma* L(s,a))
sigma=0.5,
# (int) Clear buffer per fix iters.
# (int) Clear buffer per fixed iters.
clear_buffer_per_iters=1,
)

Expand Down
4 changes: 0 additions & 4 deletions ding/reward_model/rnd_reward_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,10 +81,6 @@ class RndRewardModel(BaseRewardModel):
``reward_norm``
12 | ``extrinsic_`` int 1 | the upper bound of the reward
``reward_norm_max`` | normalization
13 | ``clear_buffer`` int 1 | clear buffer per fix iters | make sure replay
``_per_iters`` | buffer's data count
| isn't too few.
| (code work in entry)
== ==================== ===== ============= ======================================= =======================
"""
config = dict(
Expand Down

0 comments on commit 2d741e8

Please sign in to comment.