Farama-Foundation · pseudo-rnd-thoughts · Mar 14, 2023 · Mar 14, 2023 · Mar 14, 2023 · Mar 14, 2023
diff --git a/minigrid/envs/babyai/goto.py b/minigrid/envs/babyai/goto.py
@@ -44,7 +44,7 @@ class GoToRedBallGrey(RoomGridLevel):
 
     ## Rewards
 
-    A reward of '1' is given for success, and '0' for failure.
+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
 
     ## Termination
 
@@ -110,7 +110,7 @@ class GoToRedBall(RoomGridLevel):
 
     ## Rewards
 
-    A reward of '1' is given for success, and '0' for failure.
+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
 
     ## Termination
 
@@ -173,7 +173,7 @@ class GoToRedBallNoDists(GoToRedBall):
 
     ## Rewards
 
-    A reward of '1' is given for success, and '0' for failure.
+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
 
     ## Termination
 
@@ -230,7 +230,7 @@ class GoToObj(RoomGridLevel):
 
     ## Rewards
 
-    A reward of '1' is given for success, and '0' for failure.
+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
 
     ## Termination
 
@@ -297,7 +297,7 @@ class GoToLocal(RoomGridLevel):
 
     ## Rewards
 
-    A reward of '1' is given for success, and '0' for failure.
+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
 
     ## Termination
 
@@ -373,7 +373,7 @@ class GoTo(RoomGridLevel):
 
     ## Rewards
 
-    A reward of '1' is given for success, and '0' for failure.
+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
 
     ## Termination
 
@@ -463,7 +463,7 @@ class GoToImpUnlock(RoomGridLevel):
 
     ## Rewards
 
-    A reward of '1' is given for success, and '0' for failure.
+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
 
     ## Termination
 
@@ -568,7 +568,7 @@ class GoToSeq(LevelGen):
 
     ## Rewards
 
-    A reward of '1' is given for success, and '0' for failure.
+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
 
     ## Termination
 
@@ -636,7 +636,7 @@ class GoToRedBlueBall(RoomGridLevel):
 
     ## Rewards
 
-    A reward of '1' is given for success, and '0' for failure.
+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
 
     ## Termination
 
@@ -712,7 +712,7 @@ class GoToDoor(RoomGridLevel):
 
     ## Rewards
 
-    A reward of '1' is given for success, and '0' for failure.
+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
 
     ## Termination
 
@@ -780,7 +780,7 @@ class GoToObjDoor(RoomGridLevel):
 
     ## Rewards
 
-    A reward of '1' is given for success, and '0' for failure.
+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
 
     ## Termination
 

diff --git a/minigrid/envs/babyai/open.py b/minigrid/envs/babyai/open.py
@@ -51,7 +51,7 @@ class Open(RoomGridLevel):
 
     ## Rewards
 
-    A reward of '1' is given for success, and '0' for failure.
+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
 
     ## Termination
 
@@ -121,7 +121,7 @@ class OpenRedDoor(RoomGridLevel):
 
     ## Rewards
 
-    A reward of '1' is given for success, and '0' for failure.
+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
 
     ## Termination
 
@@ -183,7 +183,7 @@ class OpenDoor(RoomGridLevel):
 
     ## Rewards
 
-    A reward of '1' is given for success, and '0' for failure.
+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
 
     ## Termination
 
@@ -267,7 +267,7 @@ class OpenTwoDoors(RoomGridLevel):
 
     ## Rewards
 
-    A reward of '1' is given for success, and '0' for failure.
+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
 
     ## Termination
 
@@ -367,7 +367,7 @@ class OpenDoorsOrder(RoomGridLevel):
 
     ## Rewards
 
-    A reward of '1' is given for success, and '0' for failure.
+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
 
     ## Termination
 

diff --git a/minigrid/envs/babyai/other.py b/minigrid/envs/babyai/other.py
@@ -64,7 +64,7 @@ class ActionObjDoor(RoomGridLevel):
 
     ## Rewards
 
-    A reward of '1' is given for success, and '0' for failure.
+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
 
     ## Termination
 
@@ -142,7 +142,7 @@ class FindObjS5(RoomGridLevel):
 
     ## Rewards
 
-    A reward of '1' is given for success, and '0' for failure.
+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
 
     ## Termination
 
@@ -211,7 +211,7 @@ class KeyCorridor(RoomGridLevel):
 
     ## Rewards
 
-    A reward of '1' is given for success, and '0' for failure.
+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
 
     ## Termination
 
@@ -305,7 +305,7 @@ class OneRoomS8(RoomGridLevel):
 
     ## Rewards
 
-    A reward of '1' is given for success, and '0' for failure.
+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
 
     ## Termination
 
@@ -372,7 +372,7 @@ class MoveTwoAcross(RoomGridLevel):
 
     ## Rewards
 
-    A reward of '1' is given for success, and '0' for failure.
+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
 
     ## Termination
 

diff --git a/minigrid/envs/babyai/pickup.py b/minigrid/envs/babyai/pickup.py
@@ -47,7 +47,7 @@ class Pickup(RoomGridLevel):
 
     ## Rewards
 
-    A reward of '1' is given for success, and '0' for failure.
+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
 
     ## Termination
 
@@ -110,7 +110,7 @@ class UnblockPickup(RoomGridLevel):
 
     ## Rewards
 
-    A reward of '1' is given for success, and '0' for failure.
+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
 
     ## Termination
 
@@ -180,7 +180,7 @@ class PickupLoc(LevelGen):
 
     ## Rewards
 
-    A reward of '1' is given for success, and '0' for failure.
+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
 
     ## Termination
 
@@ -252,7 +252,7 @@ class PickupDist(RoomGridLevel):
 
     ## Rewards
 
-    A reward of '1' is given for success, and '0' for failure.
+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
 
     ## Termination
 
@@ -328,7 +328,7 @@ class PickupAbove(RoomGridLevel):
 
     ## Rewards
 
-    A reward of '1' is given for success, and '0' for failure.
+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
 
     ## Termination
 

diff --git a/minigrid/envs/babyai/putnext.py b/minigrid/envs/babyai/putnext.py
@@ -47,7 +47,7 @@ class PutNextLocal(RoomGridLevel):
 
     ## Rewards
 
-    A reward of '1' is given for success, and '0' for failure.
+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
 
     ## Termination
 
@@ -119,7 +119,7 @@ class PutNext(RoomGridLevel):
 
     ## Rewards
 
-    A reward of '1' is given for success, and '0' for failure.
+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
 
     ## Termination
 

diff --git a/minigrid/envs/babyai/synth.py b/minigrid/envs/babyai/synth.py
@@ -64,7 +64,7 @@ class Synth(LevelGen):
 
     ## Rewards
 
-    A reward of '1' is given for success, and '0' for failure.
+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
 
     ## Termination
 
@@ -152,7 +152,7 @@ class SynthLoc(LevelGen):
 
     ## Rewards
 
-    A reward of '1' is given for success, and '0' for failure.
+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
 
     ## Termination
 
@@ -257,7 +257,7 @@ class SynthSeq(LevelGen):
 
     ## Rewards
 
-    A reward of '1' is given for success, and '0' for failure.
+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
 
     ## Termination
 
@@ -359,7 +359,7 @@ class MiniBossLevel(LevelGen):
 
     ## Rewards
 
-    A reward of '1' is given for success, and '0' for failure.
+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
 
     ## Termination
 
@@ -462,7 +462,7 @@ class BossLevel(LevelGen):
 
     ## Rewards
 
-    A reward of '1' is given for success, and '0' for failure.
+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
 
     ## Termination
 
@@ -558,7 +558,7 @@ class BossLevelNoUnlock(LevelGen):
 
     ## Rewards
 
-    A reward of '1' is given for success, and '0' for failure.
+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
 
     ## Termination
 

diff --git a/minigrid/envs/babyai/unlock.py b/minigrid/envs/babyai/unlock.py
@@ -48,7 +48,7 @@ class Unlock(RoomGridLevel):
 
     ## Rewards
 
-    A reward of '1' is given for success, and '0' for failure.
+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
 
     ## Termination
 
@@ -144,7 +144,7 @@ class UnlockLocal(RoomGridLevel):
 
     ## Rewards
 
-    A reward of '1' is given for success, and '0' for failure.
+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
 
     ## Termination
 
@@ -207,7 +207,7 @@ class KeyInBox(RoomGridLevel):
 
     ## Rewards
 
-    A reward of '1' is given for success, and '0' for failure.
+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
 
     ## Termination
 
@@ -274,7 +274,7 @@ class UnlockPickup(RoomGridLevel):
 
     ## Rewards
 
-    A reward of '1' is given for success, and '0' for failure.
+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
 
     ## Termination
 
@@ -349,7 +349,7 @@ class BlockedUnlockPickup(RoomGridLevel):
 
     ## Rewards
 
-    A reward of '1' is given for success, and '0' for failure.
+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
 
     ## Termination
 
@@ -422,7 +422,7 @@ class UnlockToUnlock(RoomGridLevel):
 
     ## Rewards
 
-    A reward of '1' is given for success, and '0' for failure.
+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
 
     ## Termination
 

diff --git a/minigrid/envs/blockedunlockpickup.py b/minigrid/envs/blockedunlockpickup.py
@@ -49,7 +49,7 @@ class BlockedUnlockPickupEnv(RoomGrid):
 
     ## Rewards
 
-    A reward of '1' is given for success, and '0' for failure.
+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
 
     ## Termination
 

diff --git a/minigrid/envs/crossing.py b/minigrid/envs/crossing.py
@@ -54,7 +54,7 @@ class CrossingEnv(MiniGridEnv):
 
     ## Rewards
 
-    A reward of '1' is given for success, and '0' for failure.
+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
 
     ## Termination
 

diff --git a/minigrid/envs/distshift.py b/minigrid/envs/distshift.py
@@ -45,7 +45,7 @@ class DistShiftEnv(MiniGridEnv):
 
     ## Rewards
 
-    A reward of '1' is given for success, and '0' for failure.
+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
 
     ## Termination
 

diff --git a/minigrid/envs/doorkey.py b/minigrid/envs/doorkey.py
@@ -42,7 +42,7 @@ class DoorKeyEnv(MiniGridEnv):
 
     ## Rewards
 
-    A reward of '1' is given for success, and '0' for failure.
+    A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure.
 
     ## Termination