diff --git a/gymnasium/envs/mujoco/humanoidstandup_v5.py b/gymnasium/envs/mujoco/humanoidstandup_v5.py index 2d0b16451..1108f92c0 100644 --- a/gymnasium/envs/mujoco/humanoidstandup_v5.py +++ b/gymnasium/envs/mujoco/humanoidstandup_v5.py @@ -195,7 +195,7 @@ class HumanoidStandupEnv(MujocoEnv, utils.EzPickle): A reward for moving up (trying to stand up). This is not a relative reward, measuring how far up the robot has moved since the last timestep, but an absolute reward measuring how far up the Humanoid has moved up in total. - It is measured as $w_{uph} \times (z_{after-action} - 0)/dt$, + It is measured as $w_{uph} \times \frac{z_{after-action} - 0}{dt}$, where $z_{after-action}$ is the z coordinate of the torso after taking an action, and $dt$ is the time between actions, which depends on the `frame_skip` parameter (default is $5$), and `frametime`, which is $0.01$ - so the default is $dt = 5 \times 0.01 = 0.05$,