Skip to content

Commit

Permalink
Formatting Code
Browse files Browse the repository at this point in the history
  • Loading branch information
harshmahesheka committed Sep 6, 2024
1 parent f55e8f9 commit 55024df
Show file tree
Hide file tree
Showing 8 changed files with 51 additions and 52 deletions.
10 changes: 5 additions & 5 deletions rl/boltzmann_money/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
NUM_AGENTS = 10

# Define the agent class
class MoneyAgent_RL(MoneyAgent):
class MoneyAgentRL(MoneyAgent):
def __init__(self, unique_id, model):
super().__init__(unique_id, model)
self.wealth = np.random.randint(1, NUM_AGENTS)
Expand Down Expand Up @@ -78,7 +78,7 @@ def step(self):


# Define the model class
class BoltzmannWealthModel_RL(BoltzmannWealthModel, gymnasium.Env):
class BoltzmannWealthModelRL(BoltzmannWealthModel, gymnasium.Env):
def __init__(self, N, width, height):
super().__init__(N, width, height)
# Define the observation and action space for the RL model
Expand Down Expand Up @@ -140,8 +140,8 @@ def reset(self, *, seed=None, options=None):
self.grid = mesa.space.MultiGrid(self.grid.width, self.grid.height, True)
self.schedule = mesa.time.RandomActivation(self)
for i in range(self.num_agents):
# Create MoneyAgent_RL instances and add them to the schedule
a = MoneyAgent_RL(i, self)
# Create MoneyAgentRL instances and add them to the schedule
a = MoneyAgentRL(i, self)
self.schedule.add(a)
x = self.random.randrange(self.grid.width)
y = self.random.randrange(self.grid.height)
Expand All @@ -153,6 +153,6 @@ def _get_obs(self):
# The observation is the wealth of each agent and their position
obs = []
for a in self.schedule.agents:
obs.append([a.wealth] + list(a.pos))
obs.append([a.wealth, *list(a.pos)])
return np.array(obs)

4 changes: 2 additions & 2 deletions rl/boltzmann_money/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,12 @@
import mesa
from mesa.visualization.ModularVisualization import ModularServer
from mesa.visualization.modules import ChartModule
from model import BoltzmannWealthModel_RL
from model import BoltzmannWealthModelRL
from stable_baselines3 import PPO


# Modify the MoneyModel class to take actions from the RL model
class MoneyModelRL(BoltzmannWealthModel_RL):
class MoneyModelRL(BoltzmannWealthModelRL):
def __init__(self, N, width, height):
super().__init__(N, width, height)
model_path = os.path.join(os.path.dirname(__file__), '..', 'model', 'boltzmann_money.zip')
Expand Down
6 changes: 3 additions & 3 deletions rl/boltzmann_money/train.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
import argparse

from model import NUM_AGENTS, BoltzmannWealthModel_RL
from model import NUM_AGENTS, BoltzmannWealthModelRL
from stable_baselines3 import PPO
from stable_baselines3.common.callbacks import EvalCallback


def rl_model(args):
# Create the environment
env = BoltzmannWealthModel_RL(N=NUM_AGENTS, width=NUM_AGENTS, height=NUM_AGENTS)
eval_env = BoltzmannWealthModel_RL(N=NUM_AGENTS, width=NUM_AGENTS, height=NUM_AGENTS)
env = BoltzmannWealthModelRL(N=NUM_AGENTS, width=NUM_AGENTS, height=NUM_AGENTS)
eval_env = BoltzmannWealthModelRL(N=NUM_AGENTS, width=NUM_AGENTS, height=NUM_AGENTS)
eval_callback = EvalCallback(eval_env, best_model_save_path='./logs/',
log_path='./logs/', eval_freq=5000)
# Define the PPO model
Expand Down
8 changes: 4 additions & 4 deletions rl/wolf_sheep/agents.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from .utility import move


class Sheep_RL(Sheep):
class SheepRL(Sheep):

def step(self):
"""
Expand Down Expand Up @@ -36,13 +36,13 @@ def step(self):
if self.model.grass:
self.energy /= 2
unique_id_str = f"sheep_{self.model.next_id()}"
lamb = Sheep_RL(
lamb = SheepRL(
unique_id_str, self.pos, self.model, self.moore, self.energy
)
self.model.grid.place_agent(lamb, self.pos)
self.model.schedule.add(lamb)

class Wolf_RL(Wolf):
class WolfRL(Wolf):

def step(self):
"""
Expand Down Expand Up @@ -74,7 +74,7 @@ def step(self):
# Create a new wolf cub
self.energy /= 2
unique_id_str = f"wolf_{self.model.next_id()}"
cub = Wolf_RL(
cub = WolfRL(
unique_id_str, self.pos, self.model, self.moore, self.energy
)
self.model.grid.place_agent(cub, cub.pos)
Expand Down
36 changes: 18 additions & 18 deletions rl/wolf_sheep/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,13 @@
from mesa_models.wolf_sheep.scheduler import RandomActivationByTypeFiltered
from ray.rllib.env import MultiAgentEnv

from .agents import Sheep_RL, Wolf_RL
from .agents import SheepRL, WolfRL
from .utility import create_intial_agents, grid_to_observation


class WolfSheep_RL(WolfSheep, MultiAgentEnv):
class WolfSheepRL(WolfSheep, MultiAgentEnv):
"""
Wolf_RL-Sheep Predation Model
WolfRL-Sheep Predation Model
"""

def __init__(
Expand All @@ -30,7 +30,7 @@ def __init__(
vision=4
):
"""
Create a new Wolf_RL-Sheep model with the given parameters.
Create a new WolfRL-Sheep model with the given parameters.
"""
super().__init__(width, height, initial_sheep, initial_wolves, sheep_reproduce, wolf_reproduce, wolf_gain_from_food, grass, grass_regrowth_time, sheep_gain_from_food)
# Defining RL specific attributes
Expand All @@ -45,8 +45,8 @@ def __init__(
self.max_steps = 500
self.datacollector = mesa.DataCollector(
{
"Wolves": lambda m: m.schedule.get_type_count(Wolf_RL),
"Sheep": lambda m: m.schedule.get_type_count(Sheep_RL),
"Wolves": lambda m: m.schedule.get_type_count(WolfRL),
"Sheep": lambda m: m.schedule.get_type_count(SheepRL),
"Grass": lambda m: m.schedule.get_type_count(
GrassPatch, lambda x: x.fully_grown
),
Expand All @@ -64,10 +64,10 @@ def step(self, action_dict):

# Get observations
# We convert grid to a matrix and then neighbors of each agent is extracted
grid_to_observation(self, Sheep_RL, Wolf_RL, GrassPatch)
grid_to_observation(self, SheepRL, WolfRL, GrassPatch)
obs = { }
for agent in self.schedule.agents:
if isinstance(agent, (Sheep_RL, Wolf_RL)):
if isinstance(agent, (SheepRL, WolfRL)):
neighbors = agent.model.grid.get_neighborhood(
agent.pos, moore=True, radius=self.vision)

Expand All @@ -76,22 +76,22 @@ def step(self, action_dict):
'energy': np.array([agent.energy])}

# Either time finishes or either wolves or sheep are extinct
done = {a.unique_id: False for a in self.schedule.agents if isinstance(a, (Sheep_RL, Wolf_RL))}
done = {a.unique_id: False for a in self.schedule.agents if isinstance(a, (SheepRL, WolfRL))}

# Check if either wolves or sheep are extinct
if self.schedule.get_type_count(Wolf_RL) == 0 or self.schedule.get_type_count(Sheep_RL) == 0 or self.schedule.time > self.max_steps:
if self.schedule.get_type_count(WolfRL) == 0 or self.schedule.get_type_count(SheepRL) == 0 or self.schedule.time > self.max_steps:
done['__all__'] = True
else:
done['__all__'] = False

# Prepare info dictionary
truncated = {a.unique_id: False for a in self.schedule.agents if isinstance(a, (Sheep_RL, Wolf_RL))}
truncated = {a.unique_id: False for a in self.schedule.agents if isinstance(a, (SheepRL, WolfRL))}
truncated['__all__'] = np.all(list(truncated.values()))

# All the agents that dies during this step are marked as done and rewarded penalty
sample = next(iter(obs.values()))
for agent_id in action_dict.keys():
if agent_id not in rewards.keys():
for agent_id in action_dict:
if agent_id not in rewards:
done[agent_id] = True
rewards[agent.unique_id] = -20
truncated[agent.unique_id] = False
Expand All @@ -109,8 +109,8 @@ def cal_reward(self):
# Calculate rewards
# Agents are rewarded for being alive and having energy
for agent in self.schedule.agents:
if isinstance(agent, (Sheep_RL, Wolf_RL)):
if isinstance(agent, Sheep_RL):
if isinstance(agent, (SheepRL, WolfRL)):
if isinstance(agent, SheepRL):
rewards[agent.unique_id] = min(4, agent.energy - 4)
else:
rewards[agent.unique_id] = min(4, agent.energy/5 - 4)
Expand All @@ -122,11 +122,11 @@ def reset(self, *, seed=None, options=None):
self.schedule = RandomActivationByTypeFiltered(self)
self.grid = mesa.space.MultiGrid(self.width, self.height, torus=True)
self.current_id = 0
create_intial_agents(self, Sheep_RL, Wolf_RL, GrassPatch)
grid_to_observation(self, Sheep_RL, Wolf_RL, GrassPatch)
create_intial_agents(self, SheepRL, WolfRL, GrassPatch)
grid_to_observation(self, SheepRL, WolfRL, GrassPatch)
obs = {}
for agent in self.schedule.agents:
if isinstance(agent, (Sheep_RL, Wolf_RL)):
if isinstance(agent, (SheepRL, WolfRL)):
neighbors = agent.model.grid.get_neighborhood(
agent.pos, moore=True, radius=self.vision)

Expand Down
22 changes: 11 additions & 11 deletions rl/wolf_sheep/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,16 @@
from ray import tune
from ray.rllib.algorithms.algorithm import Algorithm

from .agents import Sheep_RL, Wolf_RL
from .model import WolfSheep_RL
from .agents import SheepRL, WolfRL
from .model import WolfSheepRL
from .utility import grid_to_observation


class WolfSheepServer(WolfSheep_RL):
class WolfSheepServer(WolfSheepRL):
def __init__(self, width=20, height=20, initial_sheep=100, initial_wolves=25, sheep_reproduce=0.04, wolf_reproduce=0.05, wolf_gain_from_food=20, grass=True, grass_regrowth_time=30, sheep_gain_from_food=4, model_path=None):
super().__init__(width, height, initial_sheep, initial_wolves, sheep_reproduce, wolf_reproduce, wolf_gain_from_food, grass, grass_regrowth_time, sheep_gain_from_food)
def env_creator(_):
return WolfSheep_RL(width, height, initial_sheep, initial_wolves, sheep_reproduce, wolf_reproduce, wolf_gain_from_food, grass, grass_regrowth_time, sheep_gain_from_food)
return WolfSheepRL(width, height, initial_sheep, initial_wolves, sheep_reproduce, wolf_reproduce, wolf_gain_from_food, grass, grass_regrowth_time, sheep_gain_from_food)
tune.register_env("WorldSheepModel-v0", env_creator)
self.iteration = 0
# Load the model from checkpoint
Expand All @@ -29,24 +29,24 @@ def step(self):
self.reset()
self.datacollector.collect(self)
# Get the observation for each agent
grid_to_observation(self, Sheep_RL, Wolf_RL, GrassPatch)
grid_to_observation(self, SheepRL, WolfRL, GrassPatch)
obs = {}
for agent in self.schedule.agents:
if isinstance(agent, (Sheep_RL, Wolf_RL)):
if isinstance(agent, (SheepRL, WolfRL)):
neighbors = agent.model.grid.get_neighborhood(agent.pos, moore=True, radius=self.vision)
obs[agent.unique_id] = {'grid': np.array([self.obs_grid[neighbor[0]][neighbor[1]] for neighbor in neighbors]), 'energy': np.array([agent.energy])}
action_dict = {}
# Get the action for each agent
for agent in self.schedule.agents:
if isinstance(agent, Sheep_RL):
if isinstance(agent, SheepRL):
action_dict[agent.unique_id] = self.sheep_policy.compute_single_action(obs[agent.unique_id], explore=False)[0]
elif isinstance(agent, Wolf_RL):
elif isinstance(agent, WolfRL):
action_dict[agent.unique_id] = self.wolf_policy.compute_single_action(obs[agent.unique_id], explore=False)[0]
self.action_dict = action_dict
# Take a step in the environment
self.schedule.step()
self.iteration += 1
if self.schedule.get_type_count(Wolf_RL) == 0 or self.schedule.get_type_count(Sheep_RL) == 0 or self.schedule.time > self.max_steps:
if self.schedule.get_type_count(WolfRL) == 0 or self.schedule.get_type_count(SheepRL) == 0 or self.schedule.time > self.max_steps:
self.running = False

def wolf_sheep_portrayal(agent):
Expand All @@ -57,12 +57,12 @@ def wolf_sheep_portrayal(agent):
file_path = os.path.dirname(os.path.abspath(__file__))
resources_path = os.path.join(file_path, "resources")

if type(agent) is Sheep_RL:
if type(agent) is SheepRL:
portrayal["Shape"] = os.path.join(resources_path, "sheep.png")
portrayal["scale"] = 0.9
portrayal["Layer"] = 1

elif type(agent) is Wolf_RL:
elif type(agent) is WolfRL:
portrayal["Shape"] = os.path.join(resources_path, "wolf.png")
portrayal["scale"] = 0.9
portrayal["Layer"] = 2
Expand Down
5 changes: 2 additions & 3 deletions rl/wolf_sheep/train_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,12 @@
from ray.rllib.algorithms.ppo import PPOConfig
from ray.rllib.policy.policy import PolicySpec

from .agents import Sheep_RL
from .model import WolfSheep_RL
from .model import WolfSheepRL

# Configuration to train the model
# Feel free to adjust the configuration as necessary
def env_creator(_):
return WolfSheep_RL(width=20, height=20, initial_sheep=100, initial_wolves=25, sheep_reproduce=0.04, wolf_reproduce=0.05, wolf_gain_from_food=20, grass=True, grass_regrowth_time=30, sheep_gain_from_food=4)
return WolfSheepRL(width=20, height=20, initial_sheep=100, initial_wolves=25, sheep_reproduce=0.04, wolf_reproduce=0.05, wolf_gain_from_food=20, grass=True, grass_regrowth_time=30, sheep_gain_from_food=4)

config = {
"env_name": "WorldSheepModel-v0",
Expand Down
12 changes: 6 additions & 6 deletions rl/wolf_sheep/utility.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
def create_intial_agents(self, Sheep_RL, Wolf_RL, GrassPatch):
def create_intial_agents(self, SheepRL, WolfRL, GrassPatch):
# Create sheep:
for i in range(self.initial_sheep):
x = self.random.randrange(self.width)
y = self.random.randrange(self.height)
energy = self.random.randrange(2 * self.sheep_gain_from_food)
unique_id_str = f"sheep_{self.next_id()}"
sheep = Sheep_RL(unique_id_str, None, self, True, energy)
sheep = SheepRL(unique_id_str, None, self, True, energy)
self.grid.place_agent(sheep, (x, y))
self.schedule.add(sheep)

Expand All @@ -15,7 +15,7 @@ def create_intial_agents(self, Sheep_RL, Wolf_RL, GrassPatch):
y = self.random.randrange(self.height)
energy = self.random.randrange(2 * self.wolf_gain_from_food)
unique_id_str = f"wolf_{self.next_id()}"
wolf = Wolf_RL(unique_id_str, None, self, True, energy)
wolf = WolfRL(unique_id_str, None, self, True, energy)
self.grid.place_agent(wolf, (x, y))
self.schedule.add(wolf)

Expand Down Expand Up @@ -63,17 +63,17 @@ def move(self, action):
if new_position in empty_neighbors:
self.model.grid.move_agent(self, new_position)

def grid_to_observation(self, Sheep_RL, Wolf_RL, GrassPatch):
def grid_to_observation(self, SheepRL, WolfRL, GrassPatch):
# Convert grid to matrix for better representation
self.obs_grid = []
for i in self.grid._grid:
row = []
for j in i:
value = [0, 0, 0]
for agent in j:
if isinstance(agent, Sheep_RL):
if isinstance(agent, SheepRL):
value[0] = 1
elif isinstance(agent, Wolf_RL):
elif isinstance(agent, WolfRL):
value[1] = 1
elif isinstance(agent, GrassPatch) and agent.fully_grown:
value[2] = 1
Expand Down

0 comments on commit 55024df

Please sign in to comment.