eugenevinitsky · Rohan138 · Feb 18, 2022 · Feb 18, 2022 · Feb 18, 2022 · Mar 8, 2022
diff --git a/requirements.txt b/requirements.txt
@@ -1,8 +1,9 @@
 gym>=0.21.0
 ray[rllib]==0.8.5
-pettingzoo>=1.13.1
+pettingzoo>=1.18.1
 opencv-python>=3.4
 numpy>=1.21.0
+protobuf<3.20
 scipy
 pandas
 matplotlib

diff --git a/run_scripts/sb3_independent.py b/run_scripts/sb3_independent.py
@@ -4,6 +4,7 @@
 import supersuit as ss
 import torch
 import torch.nn.functional as F
+
 # pip install git+https://github.com/Rohan138/marl-baselines3
 from marl_baselines3 import IndependentPPO
 from stable_baselines3.common.torch_layers import BaseFeaturesExtractor

diff --git a/setup.py b/setup.py
@@ -15,7 +15,7 @@
     requirements = fh.readlines()
 
 extras = {
-    "sb3": ["stable-baselines3"],
+    "sb3": ["stable-baselines3", "supersuit>=3.0.0"],
     "rllib": ["tensorflow>=2.6.0"],
     "dev": ["pytest", "black", "isort"],
 }

diff --git a/social_dilemmas/envs/cleanup.py b/social_dilemmas/envs/cleanup.py
@@ -12,9 +12,9 @@
 # Custom colour dictionary
 CLEANUP_COLORS = {
     b"C": np.array([100, 255, 255], dtype=np.uint8),  # Cyan cleaning beam
-    b"S": np.array([113, 75, 24], dtype=np.uint8),  # Light grey-blue stream cell
-    b"H": np.array([99, 156, 194], dtype=np.uint8),  # Brown waste cells
-    b"R": np.array([113, 75, 24], dtype=np.uint8),  # Light grey-blue river cell
+    b"S": np.array([99, 156, 194], dtype=np.uint8),  # Light grey-blue stream cell
+    b"H": np.array([113, 75, 24], dtype=np.uint8),  # Brown waste cells
+    b"R": np.array([99, 156, 194], dtype=np.uint8),  # Light grey-blue river cell
 }
 
 SPAWN_PROB = [0, 0.005, 0.02, 0.05]

diff --git a/social_dilemmas/envs/gym/discrete_with_dtype.py b/social_dilemmas/envs/gym/discrete_with_dtype.py
@@ -5,5 +5,6 @@ class DiscreteWithDType(Discrete):
     def __init__(self, n, dtype):
         assert n >= 0
         self.n = n
+        self.start = 0
         # Skip Discrete __init__ on purpose, to avoid setting the wrong dtype
         super(Discrete, self).__init__((), dtype)
diff --git a/social_dilemmas/envs/map_env.py b/social_dilemmas/envs/map_env.py
@@ -306,7 +306,7 @@ def step(self, actions):
         dones["__all__"] = np.any(list(dones.values()))
         return observations, rewards, dones, infos
 
-    def reset(self):
+    def reset(self, seed=None):
         """Reset the environment.
 
         This method is performed in between rollouts. It resets the state of
@@ -318,6 +318,9 @@ def reset(self):
             the initial observation of the space. The initial reward is assumed
             to be zero.
         """
+        if seed is not None:
+            np.random.seed(seed)
+
         self.beam_pos = []
         self.agents = {}
         self.setup_agents()
@@ -346,9 +349,6 @@ def reset(self):
                 observations[agent.agent_id] = {"curr_obs": rgb_arr}
         return observations
 
-    def seed(self, seed=None):
-        np.random.seed(seed)
-
     def close(self):
         plt.close()
 

diff --git a/social_dilemmas/envs/pettingzoo_env.py b/social_dilemmas/envs/pettingzoo_env.py
@@ -2,7 +2,7 @@
 
 from gym.utils import EzPickle
 from pettingzoo.utils import wrappers
-from pettingzoo.utils.conversions import from_parallel_wrapper
+from pettingzoo.utils.conversions import parallel_to_aec_wrapper
 from pettingzoo.utils.env import ParallelEnv
 
 from social_dilemmas.envs.env_creator import get_env_creator
@@ -15,7 +15,7 @@ def parallel_env(max_cycles=MAX_CYCLES, **ssd_args):
 
 
 def raw_env(max_cycles=MAX_CYCLES, **ssd_args):
-    return from_parallel_wrapper(parallel_env(max_cycles, **ssd_args))
+    return parallel_to_aec_wrapper(parallel_env(max_cycles, **ssd_args))
 
 
 def env(max_cycles=MAX_CYCLES, **ssd_args):
@@ -36,14 +36,11 @@ def __init__(self, env, max_cycles):
         self.action_space = lru_cache(maxsize=None)(lambda agent_id: env.action_space)
         self.action_spaces = {agent: env.action_space for agent in self.possible_agents}
 
-    def reset(self):
+    def reset(self, seed=None):
         self.agents = self.possible_agents[:]
         self.num_cycles = 0
         self.dones = {agent: False for agent in self.agents}
-        return self.ssd_env.reset()
-
-    def seed(self, seed=None):
-        return self.ssd_env.seed(seed)
+        return self.ssd_env.reset(seed)
 
     def render(self, mode="human"):
         return self.ssd_env.render(mode=mode)

diff --git a/social_dilemmas/maps.py b/social_dilemmas/maps.py
@@ -25,29 +25,29 @@
 
 CLEANUP_MAP = [
     "@@@@@@@@@@@@@@@@@@",
-    "@RRRRRR     BBBBB@",
-    "@HHHHHH      BBBB@",
-    "@RRRRRR     BBBBB@",
+    "@HHHHHH     BBBBB@",
+    "@RRRRRR      BBBB@",
+    "@HHHHHH     BBBBB@",
     "@RRRRR  P    BBBB@",
-    "@RRRRR    P BBBBB@",
-    "@HHHHH       BBBB@",
-    "@RRRRR      BBBBB@",
-    "@HHHHHHSSSSSSBBBB@",
-    "@HHHHHHSSSSSSBBBB@",
-    "@RRRRR   P P BBBB@",
-    "@HHHHH   P  BBBBB@",
-    "@RRRRRR    P BBBB@",
-    "@HHHHHH P   BBBBB@",
-    "@RRRRR       BBBB@",
-    "@HHHH    P  BBBBB@",
-    "@RRRRR       BBBB@",
-    "@HHHHH  P P BBBBB@",
-    "@RRRRR       BBBB@",
-    "@HHHH       BBBBB@",
+    "@HHHHH    P BBBBB@",
     "@RRRRR       BBBB@",
     "@HHHHH      BBBBB@",
-    "@RRRRR       BBBB@",
-    "@HHHH       BBBBB@",
+    "@SSSSSSHHHHHHBBBB@",
+    "@SSSSSSHHHHHHBBBB@",
+    "@HHHHH   P P BBBB@",
+    "@RRRRR   P  BBBBB@",
+    "@HHHHHH    P BBBB@",
+    "@RRRRRR P   BBBBB@",
+    "@HHHHH       BBBB@",
+    "@RRRR    P  BBBBB@",
+    "@HHHHH       BBBB@",
+    "@RRRRR  P P BBBBB@",
+    "@HHHHH       BBBB@",
+    "@RRRR       BBBBB@",
+    "@HHHHH       BBBB@",
+    "@RRRRR      BBBBB@",
+    "@HHHHH       BBBB@",
+    "@RRRR       BBBBB@",
     "@@@@@@@@@@@@@@@@@@",
 ]
 

diff --git a/tests/test_pettingzoo.py b/tests/test_pettingzoo.py
@@ -11,7 +11,6 @@
 class PettingZooTest(unittest.TestCase):
     def test_parallel(self):
         env = parallel_env(max_cycles=MAX_CYCLES, env="harvest", num_agents=2)
-        env.seed()
         env.reset()
         n_act = env.action_space("agent-0").n
         for _ in range(MAX_CYCLES * env.num_agents):
@@ -23,7 +22,6 @@ def test_parallel(self):
 
     def test_aec(self):
         env = aec_env(max_cycles=MAX_CYCLES, env="harvest", num_agents=2)
-        env.seed(0)
         env.reset()
         n_act = env.action_space("agent-0").n
         for agent in env.agent_iter(max_iter=MAX_CYCLES * env.num_agents):