diff --git a/minigrid/envs/babyai/goto.py b/minigrid/envs/babyai/goto.py index a99179cf9..59f987c22 100644 --- a/minigrid/envs/babyai/goto.py +++ b/minigrid/envs/babyai/goto.py @@ -44,7 +44,7 @@ class GoToRedBallGrey(RoomGridLevel): ## Rewards - A reward of '1' is given for success, and '0' for failure. + A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure. ## Termination @@ -110,7 +110,7 @@ class GoToRedBall(RoomGridLevel): ## Rewards - A reward of '1' is given for success, and '0' for failure. + A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure. ## Termination @@ -173,7 +173,7 @@ class GoToRedBallNoDists(GoToRedBall): ## Rewards - A reward of '1' is given for success, and '0' for failure. + A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure. ## Termination @@ -230,7 +230,7 @@ class GoToObj(RoomGridLevel): ## Rewards - A reward of '1' is given for success, and '0' for failure. + A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure. ## Termination @@ -297,7 +297,7 @@ class GoToLocal(RoomGridLevel): ## Rewards - A reward of '1' is given for success, and '0' for failure. + A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure. ## Termination @@ -373,7 +373,7 @@ class GoTo(RoomGridLevel): ## Rewards - A reward of '1' is given for success, and '0' for failure. + A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure. ## Termination @@ -463,7 +463,7 @@ class GoToImpUnlock(RoomGridLevel): ## Rewards - A reward of '1' is given for success, and '0' for failure. + A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure. ## Termination @@ -568,7 +568,7 @@ class GoToSeq(LevelGen): ## Rewards - A reward of '1' is given for success, and '0' for failure. + A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure. ## Termination @@ -636,7 +636,7 @@ class GoToRedBlueBall(RoomGridLevel): ## Rewards - A reward of '1' is given for success, and '0' for failure. + A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure. ## Termination @@ -712,7 +712,7 @@ class GoToDoor(RoomGridLevel): ## Rewards - A reward of '1' is given for success, and '0' for failure. + A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure. ## Termination @@ -780,7 +780,7 @@ class GoToObjDoor(RoomGridLevel): ## Rewards - A reward of '1' is given for success, and '0' for failure. + A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure. ## Termination diff --git a/minigrid/envs/babyai/open.py b/minigrid/envs/babyai/open.py index eaafba60a..0e27fd294 100644 --- a/minigrid/envs/babyai/open.py +++ b/minigrid/envs/babyai/open.py @@ -51,7 +51,7 @@ class Open(RoomGridLevel): ## Rewards - A reward of '1' is given for success, and '0' for failure. + A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure. ## Termination @@ -121,7 +121,7 @@ class OpenRedDoor(RoomGridLevel): ## Rewards - A reward of '1' is given for success, and '0' for failure. + A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure. ## Termination @@ -183,7 +183,7 @@ class OpenDoor(RoomGridLevel): ## Rewards - A reward of '1' is given for success, and '0' for failure. + A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure. ## Termination @@ -267,7 +267,7 @@ class OpenTwoDoors(RoomGridLevel): ## Rewards - A reward of '1' is given for success, and '0' for failure. + A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure. ## Termination @@ -367,7 +367,7 @@ class OpenDoorsOrder(RoomGridLevel): ## Rewards - A reward of '1' is given for success, and '0' for failure. + A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure. ## Termination diff --git a/minigrid/envs/babyai/other.py b/minigrid/envs/babyai/other.py index 389977100..0e29315b3 100644 --- a/minigrid/envs/babyai/other.py +++ b/minigrid/envs/babyai/other.py @@ -64,7 +64,7 @@ class ActionObjDoor(RoomGridLevel): ## Rewards - A reward of '1' is given for success, and '0' for failure. + A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure. ## Termination @@ -142,7 +142,7 @@ class FindObjS5(RoomGridLevel): ## Rewards - A reward of '1' is given for success, and '0' for failure. + A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure. ## Termination @@ -211,7 +211,7 @@ class KeyCorridor(RoomGridLevel): ## Rewards - A reward of '1' is given for success, and '0' for failure. + A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure. ## Termination @@ -305,7 +305,7 @@ class OneRoomS8(RoomGridLevel): ## Rewards - A reward of '1' is given for success, and '0' for failure. + A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure. ## Termination @@ -372,7 +372,7 @@ class MoveTwoAcross(RoomGridLevel): ## Rewards - A reward of '1' is given for success, and '0' for failure. + A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure. ## Termination diff --git a/minigrid/envs/babyai/pickup.py b/minigrid/envs/babyai/pickup.py index 9ff019a40..81ca88d86 100644 --- a/minigrid/envs/babyai/pickup.py +++ b/minigrid/envs/babyai/pickup.py @@ -47,7 +47,7 @@ class Pickup(RoomGridLevel): ## Rewards - A reward of '1' is given for success, and '0' for failure. + A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure. ## Termination @@ -110,7 +110,7 @@ class UnblockPickup(RoomGridLevel): ## Rewards - A reward of '1' is given for success, and '0' for failure. + A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure. ## Termination @@ -180,7 +180,7 @@ class PickupLoc(LevelGen): ## Rewards - A reward of '1' is given for success, and '0' for failure. + A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure. ## Termination @@ -252,7 +252,7 @@ class PickupDist(RoomGridLevel): ## Rewards - A reward of '1' is given for success, and '0' for failure. + A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure. ## Termination @@ -328,7 +328,7 @@ class PickupAbove(RoomGridLevel): ## Rewards - A reward of '1' is given for success, and '0' for failure. + A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure. ## Termination diff --git a/minigrid/envs/babyai/putnext.py b/minigrid/envs/babyai/putnext.py index 1a81d300d..3e3ed0a01 100644 --- a/minigrid/envs/babyai/putnext.py +++ b/minigrid/envs/babyai/putnext.py @@ -47,7 +47,7 @@ class PutNextLocal(RoomGridLevel): ## Rewards - A reward of '1' is given for success, and '0' for failure. + A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure. ## Termination @@ -119,7 +119,7 @@ class PutNext(RoomGridLevel): ## Rewards - A reward of '1' is given for success, and '0' for failure. + A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure. ## Termination diff --git a/minigrid/envs/babyai/synth.py b/minigrid/envs/babyai/synth.py index 439d4bcb5..7f60c0300 100644 --- a/minigrid/envs/babyai/synth.py +++ b/minigrid/envs/babyai/synth.py @@ -64,7 +64,7 @@ class Synth(LevelGen): ## Rewards - A reward of '1' is given for success, and '0' for failure. + A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure. ## Termination @@ -152,7 +152,7 @@ class SynthLoc(LevelGen): ## Rewards - A reward of '1' is given for success, and '0' for failure. + A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure. ## Termination @@ -257,7 +257,7 @@ class SynthSeq(LevelGen): ## Rewards - A reward of '1' is given for success, and '0' for failure. + A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure. ## Termination @@ -359,7 +359,7 @@ class MiniBossLevel(LevelGen): ## Rewards - A reward of '1' is given for success, and '0' for failure. + A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure. ## Termination @@ -462,7 +462,7 @@ class BossLevel(LevelGen): ## Rewards - A reward of '1' is given for success, and '0' for failure. + A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure. ## Termination @@ -558,7 +558,7 @@ class BossLevelNoUnlock(LevelGen): ## Rewards - A reward of '1' is given for success, and '0' for failure. + A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure. ## Termination diff --git a/minigrid/envs/babyai/unlock.py b/minigrid/envs/babyai/unlock.py index 4a58a343e..0bc9d6d53 100644 --- a/minigrid/envs/babyai/unlock.py +++ b/minigrid/envs/babyai/unlock.py @@ -48,7 +48,7 @@ class Unlock(RoomGridLevel): ## Rewards - A reward of '1' is given for success, and '0' for failure. + A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure. ## Termination @@ -144,7 +144,7 @@ class UnlockLocal(RoomGridLevel): ## Rewards - A reward of '1' is given for success, and '0' for failure. + A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure. ## Termination @@ -207,7 +207,7 @@ class KeyInBox(RoomGridLevel): ## Rewards - A reward of '1' is given for success, and '0' for failure. + A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure. ## Termination @@ -274,7 +274,7 @@ class UnlockPickup(RoomGridLevel): ## Rewards - A reward of '1' is given for success, and '0' for failure. + A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure. ## Termination @@ -349,7 +349,7 @@ class BlockedUnlockPickup(RoomGridLevel): ## Rewards - A reward of '1' is given for success, and '0' for failure. + A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure. ## Termination @@ -422,7 +422,7 @@ class UnlockToUnlock(RoomGridLevel): ## Rewards - A reward of '1' is given for success, and '0' for failure. + A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure. ## Termination diff --git a/minigrid/envs/blockedunlockpickup.py b/minigrid/envs/blockedunlockpickup.py index 942a8964a..30490d14a 100644 --- a/minigrid/envs/blockedunlockpickup.py +++ b/minigrid/envs/blockedunlockpickup.py @@ -49,7 +49,7 @@ class BlockedUnlockPickupEnv(RoomGrid): ## Rewards - A reward of '1' is given for success, and '0' for failure. + A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure. ## Termination diff --git a/minigrid/envs/crossing.py b/minigrid/envs/crossing.py index c0acf6088..45314031b 100644 --- a/minigrid/envs/crossing.py +++ b/minigrid/envs/crossing.py @@ -54,7 +54,7 @@ class CrossingEnv(MiniGridEnv): ## Rewards - A reward of '1' is given for success, and '0' for failure. + A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure. ## Termination diff --git a/minigrid/envs/distshift.py b/minigrid/envs/distshift.py index 30579c1ae..cc9289ed4 100644 --- a/minigrid/envs/distshift.py +++ b/minigrid/envs/distshift.py @@ -45,7 +45,7 @@ class DistShiftEnv(MiniGridEnv): ## Rewards - A reward of '1' is given for success, and '0' for failure. + A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure. ## Termination diff --git a/minigrid/envs/doorkey.py b/minigrid/envs/doorkey.py index 221fe1753..0f83fe848 100644 --- a/minigrid/envs/doorkey.py +++ b/minigrid/envs/doorkey.py @@ -42,7 +42,7 @@ class DoorKeyEnv(MiniGridEnv): ## Rewards - A reward of '1' is given for success, and '0' for failure. + A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure. ## Termination diff --git a/minigrid/envs/dynamicobstacles.py b/minigrid/envs/dynamicobstacles.py index 817468b7c..900005978 100644 --- a/minigrid/envs/dynamicobstacles.py +++ b/minigrid/envs/dynamicobstacles.py @@ -47,7 +47,7 @@ class DynamicObstaclesEnv(MiniGridEnv): ## Rewards - A reward of '1' is given for success, and '0' for failure. A '-1' penalty is + A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure. A '-1' penalty is subtracted if the agent collides with an obstacle. ## Termination diff --git a/minigrid/envs/empty.py b/minigrid/envs/empty.py index 6a0389e1d..f4cb7c8b0 100644 --- a/minigrid/envs/empty.py +++ b/minigrid/envs/empty.py @@ -45,7 +45,7 @@ class EmptyEnv(MiniGridEnv): ## Rewards - A reward of '1' is given for success, and '0' for failure. + A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure. ## Termination diff --git a/minigrid/envs/fetch.py b/minigrid/envs/fetch.py index 3bc3408c6..887562f19 100644 --- a/minigrid/envs/fetch.py +++ b/minigrid/envs/fetch.py @@ -51,7 +51,7 @@ class FetchEnv(MiniGridEnv): ## Rewards - A reward of '1' is given for success, and '0' for failure. + A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure. ## Termination diff --git a/minigrid/envs/fourrooms.py b/minigrid/envs/fourrooms.py index dcb87fddd..046dc271a 100644 --- a/minigrid/envs/fourrooms.py +++ b/minigrid/envs/fourrooms.py @@ -42,7 +42,7 @@ class FourRoomsEnv(MiniGridEnv): ## Rewards - A reward of '1' is given for success, and '0' for failure. + A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure. ## Termination diff --git a/minigrid/envs/gotodoor.py b/minigrid/envs/gotodoor.py index deb288f4c..5018d3afb 100644 --- a/minigrid/envs/gotodoor.py +++ b/minigrid/envs/gotodoor.py @@ -46,7 +46,7 @@ class GoToDoorEnv(MiniGridEnv): ## Rewards - A reward of '1' is given for success, and '0' for failure. + A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure. ## Termination diff --git a/minigrid/envs/gotoobject.py b/minigrid/envs/gotoobject.py index 8bd8b841f..a9a844567 100644 --- a/minigrid/envs/gotoobject.py +++ b/minigrid/envs/gotoobject.py @@ -9,8 +9,58 @@ class GoToObjectEnv(MiniGridEnv): """ - Environment in which the agent is instructed to go to a given object - named using an English text string + ## Description + + This environment is a room with colored objects. The agent + receives a textual (mission) string as input, telling it which colored object to go + to, (eg: "go to the red key"). It receives a positive reward for performing + the `done` action next to the correct object, as indicated in the mission + string. + + ## Mission Space + + "go to the {color} {obj_type}" + + {color} is the color of the object. Can be "red", "green", "blue", "purple", + "yellow" or "grey". + {obj_type} is the type of the object. Can be "key", "ball", "box". + + ## Action Space + + | Num | Name | Action | + |-----|--------------|----------------------| + | 0 | left | Turn left | + | 1 | right | Turn right | + | 2 | forward | Move forward | + | 3 | pickup | Unused | + | 4 | drop | Unused | + | 5 | toggle | Unused | + | 6 | done | Done completing task | + + ## Observation Encoding + + - Each tile is encoded as a 3 dimensional tuple: + `(OBJECT_IDX, COLOR_IDX, STATE)` + - `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in + [minigrid/minigrid.py](minigrid/minigrid.py) + - `STATE` refers to the door state with 0=open, 1=closed and 2=locked + + ## Rewards + + A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure. + + ## Termination + + The episode ends if any one of the following conditions is met: + + 1. The agent stands next the correct door performing the `done` action. + 2. Timeout (see `max_steps`). + + ## Registered Configurations + + - `MiniGrid-GoToObject-6x6-N2-v0` + - `MiniGrid-GoToObject-8x8-N2-v0` + """ def __init__(self, size=6, numObjs=2, max_steps: int | None = None, **kwargs): @@ -104,7 +154,7 @@ def step(self, action): # Reward performing the done action next to the target object if action == self.actions.done: - if abs(ax - tx) <= 1 and abs(ay - ty) <= 1: + if (ax == tx and abs(ay - ty) == 1) or (ay == ty and abs(ax - tx) == 1): reward = self._reward() terminated = True diff --git a/minigrid/envs/keycorridor.py b/minigrid/envs/keycorridor.py index 3cf3fe4e8..69a1b2fff 100644 --- a/minigrid/envs/keycorridor.py +++ b/minigrid/envs/keycorridor.py @@ -49,7 +49,7 @@ class KeyCorridorEnv(RoomGrid): ## Rewards - A reward of '1' is given for success, and '0' for failure. + A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure. ## Termination diff --git a/minigrid/envs/lavagap.py b/minigrid/envs/lavagap.py index d709a649b..76b280d27 100644 --- a/minigrid/envs/lavagap.py +++ b/minigrid/envs/lavagap.py @@ -46,7 +46,7 @@ class LavaGapEnv(MiniGridEnv): ## Rewards - A reward of '1' is given for success, and '0' for failure. + A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure. ## Termination diff --git a/minigrid/envs/lockedroom.py b/minigrid/envs/lockedroom.py index 8c742da32..d604a1a4e 100644 --- a/minigrid/envs/lockedroom.py +++ b/minigrid/envs/lockedroom.py @@ -61,7 +61,7 @@ class LockedRoomEnv(MiniGridEnv): ## Rewards - A reward of '1' is given for success, and '0' for failure. + A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure. ## Termination diff --git a/minigrid/envs/memory.py b/minigrid/envs/memory.py index 0ef729d81..2d48f8034 100644 --- a/minigrid/envs/memory.py +++ b/minigrid/envs/memory.py @@ -46,7 +46,7 @@ class MemoryEnv(MiniGridEnv): ## Rewards - A reward of '1' is given for success, and '0' for failure. + A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure. ## Termination diff --git a/minigrid/envs/multiroom.py b/minigrid/envs/multiroom.py index bc4c9145d..82ceff4f2 100644 --- a/minigrid/envs/multiroom.py +++ b/minigrid/envs/multiroom.py @@ -52,7 +52,7 @@ class MultiRoomEnv(MiniGridEnv): ## Rewards - A reward of '1' is given for success, and '0' for failure. + A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure. ## Termination diff --git a/minigrid/envs/obstructedmaze.py b/minigrid/envs/obstructedmaze.py index fcbcfe4f0..2067d6c88 100644 --- a/minigrid/envs/obstructedmaze.py +++ b/minigrid/envs/obstructedmaze.py @@ -41,7 +41,7 @@ class ObstructedMazeEnv(RoomGrid): ## Rewards - A reward of '1' is given for success, and '0' for failure. + A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure. ## Termination diff --git a/minigrid/envs/putnear.py b/minigrid/envs/putnear.py index f1a2fbc95..d7104fd46 100644 --- a/minigrid/envs/putnear.py +++ b/minigrid/envs/putnear.py @@ -48,7 +48,7 @@ class PutNearEnv(MiniGridEnv): ## Rewards - A reward of '1' is given for success, and '0' for failure. + A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure. ## Termination diff --git a/minigrid/envs/redbluedoors.py b/minigrid/envs/redbluedoors.py index ca4bc8bd8..e59f58cee 100644 --- a/minigrid/envs/redbluedoors.py +++ b/minigrid/envs/redbluedoors.py @@ -42,7 +42,7 @@ class RedBlueDoorEnv(MiniGridEnv): ## Rewards - A reward of '1' is given for success, and '0' for failure. + A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure. ## Termination diff --git a/minigrid/envs/unlock.py b/minigrid/envs/unlock.py index 0fc53c1cf..0b2e06bc1 100644 --- a/minigrid/envs/unlock.py +++ b/minigrid/envs/unlock.py @@ -38,7 +38,7 @@ class UnlockEnv(RoomGrid): ## Rewards - A reward of '1' is given for success, and '0' for failure. + A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure. ## Termination diff --git a/minigrid/envs/unlockpickup.py b/minigrid/envs/unlockpickup.py index 33aa2fbfe..820ce4c50 100644 --- a/minigrid/envs/unlockpickup.py +++ b/minigrid/envs/unlockpickup.py @@ -42,7 +42,7 @@ class UnlockPickupEnv(RoomGrid): ## Rewards - A reward of '1' is given for success, and '0' for failure. + A reward of '1 - 0.9 * (step_count / max_steps)' is given for success, and '0' for failure. ## Termination