From 55024dfdabe182d2dbf8c5b14d395ee8bdd48224 Mon Sep 17 00:00:00 2001 From: mahesheka Date: Fri, 6 Sep 2024 18:36:03 +0530 Subject: [PATCH] Formatting Code --- rl/boltzmann_money/model.py | 10 +++++----- rl/boltzmann_money/server.py | 4 ++-- rl/boltzmann_money/train.py | 6 +++--- rl/wolf_sheep/agents.py | 8 ++++---- rl/wolf_sheep/model.py | 36 +++++++++++++++++------------------ rl/wolf_sheep/server.py | 22 ++++++++++----------- rl/wolf_sheep/train_config.py | 5 ++--- rl/wolf_sheep/utility.py | 12 ++++++------ 8 files changed, 51 insertions(+), 52 deletions(-) diff --git a/rl/boltzmann_money/model.py b/rl/boltzmann_money/model.py index 8d57eea9..0b4242f8 100644 --- a/rl/boltzmann_money/model.py +++ b/rl/boltzmann_money/model.py @@ -26,7 +26,7 @@ NUM_AGENTS = 10 # Define the agent class -class MoneyAgent_RL(MoneyAgent): +class MoneyAgentRL(MoneyAgent): def __init__(self, unique_id, model): super().__init__(unique_id, model) self.wealth = np.random.randint(1, NUM_AGENTS) @@ -78,7 +78,7 @@ def step(self): # Define the model class -class BoltzmannWealthModel_RL(BoltzmannWealthModel, gymnasium.Env): +class BoltzmannWealthModelRL(BoltzmannWealthModel, gymnasium.Env): def __init__(self, N, width, height): super().__init__(N, width, height) # Define the observation and action space for the RL model @@ -140,8 +140,8 @@ def reset(self, *, seed=None, options=None): self.grid = mesa.space.MultiGrid(self.grid.width, self.grid.height, True) self.schedule = mesa.time.RandomActivation(self) for i in range(self.num_agents): - # Create MoneyAgent_RL instances and add them to the schedule - a = MoneyAgent_RL(i, self) + # Create MoneyAgentRL instances and add them to the schedule + a = MoneyAgentRL(i, self) self.schedule.add(a) x = self.random.randrange(self.grid.width) y = self.random.randrange(self.grid.height) @@ -153,6 +153,6 @@ def _get_obs(self): # The observation is the wealth of each agent and their position obs = [] for a in self.schedule.agents: - obs.append([a.wealth] + list(a.pos)) + obs.append([a.wealth, *list(a.pos)]) return np.array(obs) diff --git a/rl/boltzmann_money/server.py b/rl/boltzmann_money/server.py index 47b60d5a..954dc835 100644 --- a/rl/boltzmann_money/server.py +++ b/rl/boltzmann_money/server.py @@ -3,12 +3,12 @@ import mesa from mesa.visualization.ModularVisualization import ModularServer from mesa.visualization.modules import ChartModule -from model import BoltzmannWealthModel_RL +from model import BoltzmannWealthModelRL from stable_baselines3 import PPO # Modify the MoneyModel class to take actions from the RL model -class MoneyModelRL(BoltzmannWealthModel_RL): +class MoneyModelRL(BoltzmannWealthModelRL): def __init__(self, N, width, height): super().__init__(N, width, height) model_path = os.path.join(os.path.dirname(__file__), '..', 'model', 'boltzmann_money.zip') diff --git a/rl/boltzmann_money/train.py b/rl/boltzmann_money/train.py index c5814bf6..9fcf8fca 100644 --- a/rl/boltzmann_money/train.py +++ b/rl/boltzmann_money/train.py @@ -1,14 +1,14 @@ import argparse -from model import NUM_AGENTS, BoltzmannWealthModel_RL +from model import NUM_AGENTS, BoltzmannWealthModelRL from stable_baselines3 import PPO from stable_baselines3.common.callbacks import EvalCallback def rl_model(args): # Create the environment - env = BoltzmannWealthModel_RL(N=NUM_AGENTS, width=NUM_AGENTS, height=NUM_AGENTS) - eval_env = BoltzmannWealthModel_RL(N=NUM_AGENTS, width=NUM_AGENTS, height=NUM_AGENTS) + env = BoltzmannWealthModelRL(N=NUM_AGENTS, width=NUM_AGENTS, height=NUM_AGENTS) + eval_env = BoltzmannWealthModelRL(N=NUM_AGENTS, width=NUM_AGENTS, height=NUM_AGENTS) eval_callback = EvalCallback(eval_env, best_model_save_path='./logs/', log_path='./logs/', eval_freq=5000) # Define the PPO model diff --git a/rl/wolf_sheep/agents.py b/rl/wolf_sheep/agents.py index 16784725..6edc2f20 100644 --- a/rl/wolf_sheep/agents.py +++ b/rl/wolf_sheep/agents.py @@ -3,7 +3,7 @@ from .utility import move -class Sheep_RL(Sheep): +class SheepRL(Sheep): def step(self): """ @@ -36,13 +36,13 @@ def step(self): if self.model.grass: self.energy /= 2 unique_id_str = f"sheep_{self.model.next_id()}" - lamb = Sheep_RL( + lamb = SheepRL( unique_id_str, self.pos, self.model, self.moore, self.energy ) self.model.grid.place_agent(lamb, self.pos) self.model.schedule.add(lamb) -class Wolf_RL(Wolf): +class WolfRL(Wolf): def step(self): """ @@ -74,7 +74,7 @@ def step(self): # Create a new wolf cub self.energy /= 2 unique_id_str = f"wolf_{self.model.next_id()}" - cub = Wolf_RL( + cub = WolfRL( unique_id_str, self.pos, self.model, self.moore, self.energy ) self.model.grid.place_agent(cub, cub.pos) diff --git a/rl/wolf_sheep/model.py b/rl/wolf_sheep/model.py index f9e00582..8c7b9616 100644 --- a/rl/wolf_sheep/model.py +++ b/rl/wolf_sheep/model.py @@ -6,13 +6,13 @@ from mesa_models.wolf_sheep.scheduler import RandomActivationByTypeFiltered from ray.rllib.env import MultiAgentEnv -from .agents import Sheep_RL, Wolf_RL +from .agents import SheepRL, WolfRL from .utility import create_intial_agents, grid_to_observation -class WolfSheep_RL(WolfSheep, MultiAgentEnv): +class WolfSheepRL(WolfSheep, MultiAgentEnv): """ - Wolf_RL-Sheep Predation Model + WolfRL-Sheep Predation Model """ def __init__( @@ -30,7 +30,7 @@ def __init__( vision=4 ): """ - Create a new Wolf_RL-Sheep model with the given parameters. + Create a new WolfRL-Sheep model with the given parameters. """ super().__init__(width, height, initial_sheep, initial_wolves, sheep_reproduce, wolf_reproduce, wolf_gain_from_food, grass, grass_regrowth_time, sheep_gain_from_food) # Defining RL specific attributes @@ -45,8 +45,8 @@ def __init__( self.max_steps = 500 self.datacollector = mesa.DataCollector( { - "Wolves": lambda m: m.schedule.get_type_count(Wolf_RL), - "Sheep": lambda m: m.schedule.get_type_count(Sheep_RL), + "Wolves": lambda m: m.schedule.get_type_count(WolfRL), + "Sheep": lambda m: m.schedule.get_type_count(SheepRL), "Grass": lambda m: m.schedule.get_type_count( GrassPatch, lambda x: x.fully_grown ), @@ -64,10 +64,10 @@ def step(self, action_dict): # Get observations # We convert grid to a matrix and then neighbors of each agent is extracted - grid_to_observation(self, Sheep_RL, Wolf_RL, GrassPatch) + grid_to_observation(self, SheepRL, WolfRL, GrassPatch) obs = { } for agent in self.schedule.agents: - if isinstance(agent, (Sheep_RL, Wolf_RL)): + if isinstance(agent, (SheepRL, WolfRL)): neighbors = agent.model.grid.get_neighborhood( agent.pos, moore=True, radius=self.vision) @@ -76,22 +76,22 @@ def step(self, action_dict): 'energy': np.array([agent.energy])} # Either time finishes or either wolves or sheep are extinct - done = {a.unique_id: False for a in self.schedule.agents if isinstance(a, (Sheep_RL, Wolf_RL))} + done = {a.unique_id: False for a in self.schedule.agents if isinstance(a, (SheepRL, WolfRL))} # Check if either wolves or sheep are extinct - if self.schedule.get_type_count(Wolf_RL) == 0 or self.schedule.get_type_count(Sheep_RL) == 0 or self.schedule.time > self.max_steps: + if self.schedule.get_type_count(WolfRL) == 0 or self.schedule.get_type_count(SheepRL) == 0 or self.schedule.time > self.max_steps: done['__all__'] = True else: done['__all__'] = False # Prepare info dictionary - truncated = {a.unique_id: False for a in self.schedule.agents if isinstance(a, (Sheep_RL, Wolf_RL))} + truncated = {a.unique_id: False for a in self.schedule.agents if isinstance(a, (SheepRL, WolfRL))} truncated['__all__'] = np.all(list(truncated.values())) # All the agents that dies during this step are marked as done and rewarded penalty sample = next(iter(obs.values())) - for agent_id in action_dict.keys(): - if agent_id not in rewards.keys(): + for agent_id in action_dict: + if agent_id not in rewards: done[agent_id] = True rewards[agent.unique_id] = -20 truncated[agent.unique_id] = False @@ -109,8 +109,8 @@ def cal_reward(self): # Calculate rewards # Agents are rewarded for being alive and having energy for agent in self.schedule.agents: - if isinstance(agent, (Sheep_RL, Wolf_RL)): - if isinstance(agent, Sheep_RL): + if isinstance(agent, (SheepRL, WolfRL)): + if isinstance(agent, SheepRL): rewards[agent.unique_id] = min(4, agent.energy - 4) else: rewards[agent.unique_id] = min(4, agent.energy/5 - 4) @@ -122,11 +122,11 @@ def reset(self, *, seed=None, options=None): self.schedule = RandomActivationByTypeFiltered(self) self.grid = mesa.space.MultiGrid(self.width, self.height, torus=True) self.current_id = 0 - create_intial_agents(self, Sheep_RL, Wolf_RL, GrassPatch) - grid_to_observation(self, Sheep_RL, Wolf_RL, GrassPatch) + create_intial_agents(self, SheepRL, WolfRL, GrassPatch) + grid_to_observation(self, SheepRL, WolfRL, GrassPatch) obs = {} for agent in self.schedule.agents: - if isinstance(agent, (Sheep_RL, Wolf_RL)): + if isinstance(agent, (SheepRL, WolfRL)): neighbors = agent.model.grid.get_neighborhood( agent.pos, moore=True, radius=self.vision) diff --git a/rl/wolf_sheep/server.py b/rl/wolf_sheep/server.py index 894f625b..445b0c4a 100644 --- a/rl/wolf_sheep/server.py +++ b/rl/wolf_sheep/server.py @@ -6,16 +6,16 @@ from ray import tune from ray.rllib.algorithms.algorithm import Algorithm -from .agents import Sheep_RL, Wolf_RL -from .model import WolfSheep_RL +from .agents import SheepRL, WolfRL +from .model import WolfSheepRL from .utility import grid_to_observation -class WolfSheepServer(WolfSheep_RL): +class WolfSheepServer(WolfSheepRL): def __init__(self, width=20, height=20, initial_sheep=100, initial_wolves=25, sheep_reproduce=0.04, wolf_reproduce=0.05, wolf_gain_from_food=20, grass=True, grass_regrowth_time=30, sheep_gain_from_food=4, model_path=None): super().__init__(width, height, initial_sheep, initial_wolves, sheep_reproduce, wolf_reproduce, wolf_gain_from_food, grass, grass_regrowth_time, sheep_gain_from_food) def env_creator(_): - return WolfSheep_RL(width, height, initial_sheep, initial_wolves, sheep_reproduce, wolf_reproduce, wolf_gain_from_food, grass, grass_regrowth_time, sheep_gain_from_food) + return WolfSheepRL(width, height, initial_sheep, initial_wolves, sheep_reproduce, wolf_reproduce, wolf_gain_from_food, grass, grass_regrowth_time, sheep_gain_from_food) tune.register_env("WorldSheepModel-v0", env_creator) self.iteration = 0 # Load the model from checkpoint @@ -29,24 +29,24 @@ def step(self): self.reset() self.datacollector.collect(self) # Get the observation for each agent - grid_to_observation(self, Sheep_RL, Wolf_RL, GrassPatch) + grid_to_observation(self, SheepRL, WolfRL, GrassPatch) obs = {} for agent in self.schedule.agents: - if isinstance(agent, (Sheep_RL, Wolf_RL)): + if isinstance(agent, (SheepRL, WolfRL)): neighbors = agent.model.grid.get_neighborhood(agent.pos, moore=True, radius=self.vision) obs[agent.unique_id] = {'grid': np.array([self.obs_grid[neighbor[0]][neighbor[1]] for neighbor in neighbors]), 'energy': np.array([agent.energy])} action_dict = {} # Get the action for each agent for agent in self.schedule.agents: - if isinstance(agent, Sheep_RL): + if isinstance(agent, SheepRL): action_dict[agent.unique_id] = self.sheep_policy.compute_single_action(obs[agent.unique_id], explore=False)[0] - elif isinstance(agent, Wolf_RL): + elif isinstance(agent, WolfRL): action_dict[agent.unique_id] = self.wolf_policy.compute_single_action(obs[agent.unique_id], explore=False)[0] self.action_dict = action_dict # Take a step in the environment self.schedule.step() self.iteration += 1 - if self.schedule.get_type_count(Wolf_RL) == 0 or self.schedule.get_type_count(Sheep_RL) == 0 or self.schedule.time > self.max_steps: + if self.schedule.get_type_count(WolfRL) == 0 or self.schedule.get_type_count(SheepRL) == 0 or self.schedule.time > self.max_steps: self.running = False def wolf_sheep_portrayal(agent): @@ -57,12 +57,12 @@ def wolf_sheep_portrayal(agent): file_path = os.path.dirname(os.path.abspath(__file__)) resources_path = os.path.join(file_path, "resources") - if type(agent) is Sheep_RL: + if type(agent) is SheepRL: portrayal["Shape"] = os.path.join(resources_path, "sheep.png") portrayal["scale"] = 0.9 portrayal["Layer"] = 1 - elif type(agent) is Wolf_RL: + elif type(agent) is WolfRL: portrayal["Shape"] = os.path.join(resources_path, "wolf.png") portrayal["scale"] = 0.9 portrayal["Layer"] = 2 diff --git a/rl/wolf_sheep/train_config.py b/rl/wolf_sheep/train_config.py index 6fb0c889..d4f55a6f 100644 --- a/rl/wolf_sheep/train_config.py +++ b/rl/wolf_sheep/train_config.py @@ -3,13 +3,12 @@ from ray.rllib.algorithms.ppo import PPOConfig from ray.rllib.policy.policy import PolicySpec -from .agents import Sheep_RL -from .model import WolfSheep_RL +from .model import WolfSheepRL # Configuration to train the model # Feel free to adjust the configuration as necessary def env_creator(_): - return WolfSheep_RL(width=20, height=20, initial_sheep=100, initial_wolves=25, sheep_reproduce=0.04, wolf_reproduce=0.05, wolf_gain_from_food=20, grass=True, grass_regrowth_time=30, sheep_gain_from_food=4) + return WolfSheepRL(width=20, height=20, initial_sheep=100, initial_wolves=25, sheep_reproduce=0.04, wolf_reproduce=0.05, wolf_gain_from_food=20, grass=True, grass_regrowth_time=30, sheep_gain_from_food=4) config = { "env_name": "WorldSheepModel-v0", diff --git a/rl/wolf_sheep/utility.py b/rl/wolf_sheep/utility.py index 2a1309ae..aa9f7bce 100644 --- a/rl/wolf_sheep/utility.py +++ b/rl/wolf_sheep/utility.py @@ -1,11 +1,11 @@ -def create_intial_agents(self, Sheep_RL, Wolf_RL, GrassPatch): +def create_intial_agents(self, SheepRL, WolfRL, GrassPatch): # Create sheep: for i in range(self.initial_sheep): x = self.random.randrange(self.width) y = self.random.randrange(self.height) energy = self.random.randrange(2 * self.sheep_gain_from_food) unique_id_str = f"sheep_{self.next_id()}" - sheep = Sheep_RL(unique_id_str, None, self, True, energy) + sheep = SheepRL(unique_id_str, None, self, True, energy) self.grid.place_agent(sheep, (x, y)) self.schedule.add(sheep) @@ -15,7 +15,7 @@ def create_intial_agents(self, Sheep_RL, Wolf_RL, GrassPatch): y = self.random.randrange(self.height) energy = self.random.randrange(2 * self.wolf_gain_from_food) unique_id_str = f"wolf_{self.next_id()}" - wolf = Wolf_RL(unique_id_str, None, self, True, energy) + wolf = WolfRL(unique_id_str, None, self, True, energy) self.grid.place_agent(wolf, (x, y)) self.schedule.add(wolf) @@ -63,7 +63,7 @@ def move(self, action): if new_position in empty_neighbors: self.model.grid.move_agent(self, new_position) -def grid_to_observation(self, Sheep_RL, Wolf_RL, GrassPatch): +def grid_to_observation(self, SheepRL, WolfRL, GrassPatch): # Convert grid to matrix for better representation self.obs_grid = [] for i in self.grid._grid: @@ -71,9 +71,9 @@ def grid_to_observation(self, Sheep_RL, Wolf_RL, GrassPatch): for j in i: value = [0, 0, 0] for agent in j: - if isinstance(agent, Sheep_RL): + if isinstance(agent, SheepRL): value[0] = 1 - elif isinstance(agent, Wolf_RL): + elif isinstance(agent, WolfRL): value[1] = 1 elif isinstance(agent, GrassPatch) and agent.fully_grown: value[2] = 1