Formatting Code

projectmesa · Sep 6, 2024 · 55024df · 55024df
1 parent f55e8f9
commit 55024df
Show file tree

Hide file tree

Showing 8 changed files with 51 additions and 52 deletions.
diff --git a/rl/boltzmann_money/model.py b/rl/boltzmann_money/model.py
@@ -26,7 +26,7 @@
 NUM_AGENTS = 10
 
 # Define the agent class
-class MoneyAgent_RL(MoneyAgent):
+class MoneyAgentRL(MoneyAgent):
     def __init__(self, unique_id, model):
         super().__init__(unique_id, model)
         self.wealth = np.random.randint(1, NUM_AGENTS)
@@ -78,7 +78,7 @@ def step(self):
 
 
 # Define the model class
-class BoltzmannWealthModel_RL(BoltzmannWealthModel, gymnasium.Env):
+class BoltzmannWealthModelRL(BoltzmannWealthModel, gymnasium.Env):
     def __init__(self, N, width, height):
         super().__init__(N, width, height)
         # Define the observation and action space for the RL model
@@ -140,8 +140,8 @@ def reset(self, *, seed=None, options=None):
         self.grid = mesa.space.MultiGrid(self.grid.width, self.grid.height, True)
         self.schedule = mesa.time.RandomActivation(self)
         for i in range(self.num_agents):
-            # Create MoneyAgent_RL instances and add them to the schedule
-            a = MoneyAgent_RL(i, self)
+            # Create MoneyAgentRL instances and add them to the schedule
+            a = MoneyAgentRL(i, self)
             self.schedule.add(a)
             x = self.random.randrange(self.grid.width)
             y = self.random.randrange(self.grid.height)
@@ -153,6 +153,6 @@ def _get_obs(self):
         # The observation is the wealth of each agent and their position
         obs = []
         for a in self.schedule.agents:
-            obs.append([a.wealth] + list(a.pos))
+            obs.append([a.wealth, *list(a.pos)])
         return np.array(obs)
 
diff --git a/rl/boltzmann_money/server.py b/rl/boltzmann_money/server.py
@@ -3,12 +3,12 @@
 import mesa
 from mesa.visualization.ModularVisualization import ModularServer
 from mesa.visualization.modules import ChartModule
-from model import BoltzmannWealthModel_RL
+from model import BoltzmannWealthModelRL
 from stable_baselines3 import PPO
 
 
 # Modify the MoneyModel class to take actions from the RL model
-class MoneyModelRL(BoltzmannWealthModel_RL):
+class MoneyModelRL(BoltzmannWealthModelRL):
     def __init__(self, N, width, height):
         super().__init__(N, width, height)
         model_path = os.path.join(os.path.dirname(__file__), '..', 'model', 'boltzmann_money.zip')

diff --git a/rl/boltzmann_money/train.py b/rl/boltzmann_money/train.py
@@ -1,14 +1,14 @@
 import argparse
 
-from model import NUM_AGENTS, BoltzmannWealthModel_RL
+from model import NUM_AGENTS, BoltzmannWealthModelRL
 from stable_baselines3 import PPO
 from stable_baselines3.common.callbacks import EvalCallback
 
 
 def rl_model(args):
     # Create the environment
-    env = BoltzmannWealthModel_RL(N=NUM_AGENTS, width=NUM_AGENTS, height=NUM_AGENTS)
-    eval_env = BoltzmannWealthModel_RL(N=NUM_AGENTS, width=NUM_AGENTS, height=NUM_AGENTS)
+    env = BoltzmannWealthModelRL(N=NUM_AGENTS, width=NUM_AGENTS, height=NUM_AGENTS)
+    eval_env = BoltzmannWealthModelRL(N=NUM_AGENTS, width=NUM_AGENTS, height=NUM_AGENTS)
     eval_callback = EvalCallback(eval_env, best_model_save_path='./logs/',
                                  log_path='./logs/', eval_freq=5000)
     # Define the PPO model

diff --git a/rl/wolf_sheep/agents.py b/rl/wolf_sheep/agents.py
@@ -3,7 +3,7 @@
 from .utility import move
 
 
-class Sheep_RL(Sheep):
+class SheepRL(Sheep):
 
     def step(self):
         """
@@ -36,13 +36,13 @@ def step(self):
             if self.model.grass:
                 self.energy /= 2
             unique_id_str = f"sheep_{self.model.next_id()}"
-            lamb = Sheep_RL(
+            lamb = SheepRL(
                 unique_id_str, self.pos, self.model, self.moore, self.energy
             )
             self.model.grid.place_agent(lamb, self.pos)
             self.model.schedule.add(lamb)
 
-class Wolf_RL(Wolf):
+class WolfRL(Wolf):
 
     def step(self):
         """
@@ -74,7 +74,7 @@ def step(self):
                 # Create a new wolf cub
                 self.energy /= 2
                 unique_id_str = f"wolf_{self.model.next_id()}"
-                cub = Wolf_RL(
+                cub = WolfRL(
                     unique_id_str, self.pos, self.model, self.moore, self.energy
                 )
                 self.model.grid.place_agent(cub, cub.pos)

diff --git a/rl/wolf_sheep/model.py b/rl/wolf_sheep/model.py
@@ -6,13 +6,13 @@
 from mesa_models.wolf_sheep.scheduler import RandomActivationByTypeFiltered
 from ray.rllib.env import MultiAgentEnv
 
-from .agents import Sheep_RL, Wolf_RL
+from .agents import SheepRL, WolfRL
 from .utility import create_intial_agents, grid_to_observation
 
 
-class WolfSheep_RL(WolfSheep, MultiAgentEnv):
+class WolfSheepRL(WolfSheep, MultiAgentEnv):
     """
-    Wolf_RL-Sheep Predation Model
+    WolfRL-Sheep Predation Model
     """
 
     def __init__(
@@ -30,7 +30,7 @@ def __init__(
         vision=4
     ):
         """
-        Create a new Wolf_RL-Sheep model with the given parameters.
+        Create a new WolfRL-Sheep model with the given parameters.
         """
         super().__init__(width, height, initial_sheep, initial_wolves, sheep_reproduce, wolf_reproduce, wolf_gain_from_food, grass, grass_regrowth_time, sheep_gain_from_food)
         # Defining RL specific attributes 
@@ -45,8 +45,8 @@ def __init__(
         self.max_steps = 500
         self.datacollector = mesa.DataCollector(
             {
-                "Wolves": lambda m: m.schedule.get_type_count(Wolf_RL),
-                "Sheep": lambda m: m.schedule.get_type_count(Sheep_RL),
+                "Wolves": lambda m: m.schedule.get_type_count(WolfRL),
+                "Sheep": lambda m: m.schedule.get_type_count(SheepRL),
                 "Grass": lambda m: m.schedule.get_type_count(
                     GrassPatch, lambda x: x.fully_grown
                 ),
@@ -64,10 +64,10 @@ def step(self, action_dict):
 
         # Get observations
         # We convert grid to a matrix and then neighbors of each agent is extracted
-        grid_to_observation(self, Sheep_RL, Wolf_RL, GrassPatch)
+        grid_to_observation(self, SheepRL, WolfRL, GrassPatch)
         obs = { }
         for agent in self.schedule.agents:
-            if isinstance(agent, (Sheep_RL, Wolf_RL)):
+            if isinstance(agent, (SheepRL, WolfRL)):
                 neighbors = agent.model.grid.get_neighborhood(
                             agent.pos, moore=True, radius=self.vision)
 
@@ -76,22 +76,22 @@ def step(self, action_dict):
                     'energy': np.array([agent.energy])}        
 
         # Either time finishes or either wolves or sheep are extinct
-        done = {a.unique_id: False for a in self.schedule.agents if isinstance(a, (Sheep_RL, Wolf_RL))}
+        done = {a.unique_id: False for a in self.schedule.agents if isinstance(a, (SheepRL, WolfRL))}
 
         # Check if either wolves or sheep are extinct
-        if self.schedule.get_type_count(Wolf_RL) == 0 or self.schedule.get_type_count(Sheep_RL) == 0 or self.schedule.time > self.max_steps:
+        if self.schedule.get_type_count(WolfRL) == 0 or self.schedule.get_type_count(SheepRL) == 0 or self.schedule.time > self.max_steps:
             done['__all__'] = True
         else:
             done['__all__'] = False
 
         # Prepare info dictionary
-        truncated = {a.unique_id: False for a in self.schedule.agents if isinstance(a, (Sheep_RL, Wolf_RL))}
+        truncated = {a.unique_id: False for a in self.schedule.agents if isinstance(a, (SheepRL, WolfRL))}
         truncated['__all__'] = np.all(list(truncated.values()))
 
         # All the agents that dies during this step are marked as done and rewarded penalty
         sample = next(iter(obs.values()))  
-        for agent_id in action_dict.keys():
-            if agent_id not in rewards.keys():
+        for agent_id in action_dict:
+            if agent_id not in rewards:
                 done[agent_id] = True
                 rewards[agent.unique_id] = -20 
                 truncated[agent.unique_id] = False
@@ -109,8 +109,8 @@ def cal_reward(self):
         # Calculate rewards
         # Agents are rewarded for being alive and having energy
         for agent in self.schedule.agents:
-            if isinstance(agent, (Sheep_RL, Wolf_RL)):
-                if isinstance(agent, Sheep_RL):
+            if isinstance(agent, (SheepRL, WolfRL)):
+                if isinstance(agent, SheepRL):
                     rewards[agent.unique_id] = min(4, agent.energy - 4)
                 else:
                     rewards[agent.unique_id] = min(4, agent.energy/5 - 4)
@@ -122,11 +122,11 @@ def reset(self, *, seed=None, options=None):
         self.schedule = RandomActivationByTypeFiltered(self)
         self.grid = mesa.space.MultiGrid(self.width, self.height, torus=True)
         self.current_id = 0
-        create_intial_agents(self, Sheep_RL, Wolf_RL, GrassPatch)
-        grid_to_observation(self, Sheep_RL, Wolf_RL, GrassPatch)
+        create_intial_agents(self, SheepRL, WolfRL, GrassPatch)
+        grid_to_observation(self, SheepRL, WolfRL, GrassPatch)
         obs = {}
         for agent in self.schedule.agents:
-            if isinstance(agent, (Sheep_RL, Wolf_RL)):
+            if isinstance(agent, (SheepRL, WolfRL)):
                 neighbors = agent.model.grid.get_neighborhood(
                             agent.pos, moore=True, radius=self.vision)
 

diff --git a/rl/wolf_sheep/server.py b/rl/wolf_sheep/server.py
@@ -6,16 +6,16 @@
 from ray import tune
 from ray.rllib.algorithms.algorithm import Algorithm
 
-from .agents import Sheep_RL, Wolf_RL
-from .model import WolfSheep_RL
+from .agents import SheepRL, WolfRL
+from .model import WolfSheepRL
 from .utility import grid_to_observation
 
 
-class WolfSheepServer(WolfSheep_RL):
+class WolfSheepServer(WolfSheepRL):
     def __init__(self, width=20, height=20, initial_sheep=100, initial_wolves=25, sheep_reproduce=0.04, wolf_reproduce=0.05, wolf_gain_from_food=20, grass=True, grass_regrowth_time=30, sheep_gain_from_food=4, model_path=None):
         super().__init__(width, height, initial_sheep, initial_wolves, sheep_reproduce, wolf_reproduce, wolf_gain_from_food, grass, grass_regrowth_time, sheep_gain_from_food)
         def env_creator(_):
-            return WolfSheep_RL(width, height, initial_sheep, initial_wolves, sheep_reproduce, wolf_reproduce, wolf_gain_from_food, grass, grass_regrowth_time, sheep_gain_from_food)
+            return WolfSheepRL(width, height, initial_sheep, initial_wolves, sheep_reproduce, wolf_reproduce, wolf_gain_from_food, grass, grass_regrowth_time, sheep_gain_from_food)
         tune.register_env("WorldSheepModel-v0", env_creator)
         self.iteration = 0
         # Load the model from checkpoint
@@ -29,24 +29,24 @@ def step(self):
             self.reset()
         self.datacollector.collect(self)
         # Get the observation for each agent
-        grid_to_observation(self, Sheep_RL, Wolf_RL, GrassPatch)
+        grid_to_observation(self, SheepRL, WolfRL, GrassPatch)
         obs = {}
         for agent in self.schedule.agents:
-            if isinstance(agent, (Sheep_RL, Wolf_RL)):
+            if isinstance(agent, (SheepRL, WolfRL)):
                 neighbors = agent.model.grid.get_neighborhood(agent.pos, moore=True, radius=self.vision)
                 obs[agent.unique_id] = {'grid': np.array([self.obs_grid[neighbor[0]][neighbor[1]] for neighbor in neighbors]), 'energy': np.array([agent.energy])}    
         action_dict = {}
         # Get the action for each agent
         for agent in self.schedule.agents:
-            if isinstance(agent, Sheep_RL):
+            if isinstance(agent, SheepRL):
                 action_dict[agent.unique_id] = self.sheep_policy.compute_single_action(obs[agent.unique_id], explore=False)[0]
-            elif isinstance(agent, Wolf_RL):
+            elif isinstance(agent, WolfRL):
                 action_dict[agent.unique_id] = self.wolf_policy.compute_single_action(obs[agent.unique_id], explore=False)[0]
         self.action_dict = action_dict
         # Take a step in the environment
         self.schedule.step()
         self.iteration += 1
-        if self.schedule.get_type_count(Wolf_RL) == 0 or self.schedule.get_type_count(Sheep_RL) == 0 or self.schedule.time > self.max_steps:
+        if self.schedule.get_type_count(WolfRL) == 0 or self.schedule.get_type_count(SheepRL) == 0 or self.schedule.time > self.max_steps:
             self.running = False
 
 def wolf_sheep_portrayal(agent):
@@ -57,12 +57,12 @@ def wolf_sheep_portrayal(agent):
     file_path = os.path.dirname(os.path.abspath(__file__))
     resources_path = os.path.join(file_path, "resources")
 
-    if type(agent) is Sheep_RL:
+    if type(agent) is SheepRL:
         portrayal["Shape"] = os.path.join(resources_path, "sheep.png")
         portrayal["scale"] = 0.9
         portrayal["Layer"] = 1
 
-    elif type(agent) is Wolf_RL:
+    elif type(agent) is WolfRL:
         portrayal["Shape"] = os.path.join(resources_path, "wolf.png")
         portrayal["scale"] = 0.9
         portrayal["Layer"] = 2

diff --git a/rl/wolf_sheep/train_config.py b/rl/wolf_sheep/train_config.py
@@ -3,13 +3,12 @@
 from ray.rllib.algorithms.ppo import PPOConfig
 from ray.rllib.policy.policy import PolicySpec
 
-from .agents import Sheep_RL
-from .model import WolfSheep_RL
+from .model import WolfSheepRL
 
 # Configuration to train the model
 # Feel free to adjust the configuration as necessary
 def env_creator(_):
-    return WolfSheep_RL(width=20, height=20, initial_sheep=100, initial_wolves=25, sheep_reproduce=0.04, wolf_reproduce=0.05, wolf_gain_from_food=20, grass=True, grass_regrowth_time=30, sheep_gain_from_food=4)
+    return WolfSheepRL(width=20, height=20, initial_sheep=100, initial_wolves=25, sheep_reproduce=0.04, wolf_reproduce=0.05, wolf_gain_from_food=20, grass=True, grass_regrowth_time=30, sheep_gain_from_food=4)
 
 config = {
     "env_name": "WorldSheepModel-v0",

diff --git a/rl/wolf_sheep/utility.py b/rl/wolf_sheep/utility.py
@@ -1,11 +1,11 @@
-def create_intial_agents(self, Sheep_RL, Wolf_RL, GrassPatch):
+def create_intial_agents(self, SheepRL, WolfRL, GrassPatch):
     # Create sheep:
     for i in range(self.initial_sheep):
         x = self.random.randrange(self.width)
         y = self.random.randrange(self.height)
         energy = self.random.randrange(2 * self.sheep_gain_from_food)
         unique_id_str = f"sheep_{self.next_id()}"
-        sheep = Sheep_RL(unique_id_str, None, self, True, energy)
+        sheep = SheepRL(unique_id_str, None, self, True, energy)
         self.grid.place_agent(sheep, (x, y))
         self.schedule.add(sheep)
 
@@ -15,7 +15,7 @@ def create_intial_agents(self, Sheep_RL, Wolf_RL, GrassPatch):
         y = self.random.randrange(self.height)
         energy = self.random.randrange(2 * self.wolf_gain_from_food)
         unique_id_str = f"wolf_{self.next_id()}"
-        wolf = Wolf_RL(unique_id_str, None, self, True, energy)
+        wolf = WolfRL(unique_id_str, None, self, True, energy)
         self.grid.place_agent(wolf, (x, y))
         self.schedule.add(wolf)
 
@@ -63,17 +63,17 @@ def move(self, action):
     if new_position in empty_neighbors:
         self.model.grid.move_agent(self, new_position)
 
-def grid_to_observation(self, Sheep_RL, Wolf_RL, GrassPatch):
+def grid_to_observation(self, SheepRL, WolfRL, GrassPatch):
     # Convert grid to matrix for better representation
     self.obs_grid = []
     for i in self.grid._grid:
         row = []
         for j in i:
             value = [0, 0, 0]
             for agent in j:
-                if isinstance(agent, Sheep_RL):
+                if isinstance(agent, SheepRL):
                     value[0] = 1
-                elif isinstance(agent, Wolf_RL):
+                elif isinstance(agent, WolfRL):
                     value[1] = 1
                 elif isinstance(agent, GrassPatch) and agent.fully_grown:
                     value[2] = 1