instadeepai · Louay-Ben-nessir · Feb 20, 2024 · Feb 26, 2024 · Feb 26, 2024 · Feb 27, 2024
@@ -0,0 +1,10 @@
+# ---Environment Configs---
+defaults:
+  - _self_
+  - scenario: multicvrp-2v-20c # [multicvrp-2v-20c, multicvrp-2v-6c]
+
+env_name: MultiCVRP
+
+eval_metric: episode_return
+
+kwargs: {}
@@ -0,0 +1,6 @@
+name: MultiCVRP-v0
+task_name: multicvrp-2v-20c
+
+task_config:
+  num_customers : 20
+  num_vehicles : 2
@@ -0,0 +1,6 @@
+name: MultiCVRP-v0
+task_name: multicvrp-2v-6c
+
+task_config:
+  num_customers : 6
+  num_vehicles : 2
@@ -26,6 +26,9 @@
 from jumanji.environments.routing.lbf.generator import (
     RandomGenerator as LbfRandomGenerator,
 )
+from jumanji.environments.routing.multi_cvrp.generator import (
+    UniformRandomGenerator as MultiCVRPRandomGenerator,
+)
 from jumanji.environments.routing.robot_warehouse.generator import (
     RandomGenerator as RwareRandomGenerator,
 )
@@ -40,6 +43,7 @@
     LbfWrapper,
     MabraxWrapper,
     MatraxWrapper,
+    MultiCVRPWrapper,
     RecordEpisodeMetrics,
     RwareWrapper,
     SmaxWrapper,
@@ -50,6 +54,7 @@
     "RobotWarehouse-v0": {"generator": RwareRandomGenerator, "wrapper": RwareWrapper},
     "LevelBasedForaging-v0": {"generator": LbfRandomGenerator, "wrapper": LbfWrapper},
     "MaConnector-v2": {"generator": ConnectorRandomGenerator, "wrapper": ConnectorWrapper},
+    "MultiCVRP-v0": {"generator": MultiCVRPRandomGenerator, "wrapper": MultiCVRPWrapper},
 }
 
 # Define a different registry for Matrax since it has no generator.
@@ -65,7 +70,10 @@ def add_optional_wrappers(
 ) -> Environment:
     # Add the global state to observation.
     if add_global_state:
-        env = GlobalStateWrapper(env)
+        if hasattr(env, "has_global_state"):
+            env.has_global_state = True
+        else:
+            env = GlobalStateWrapper(env)
 
     # Add agent id to observation.
     if config.system.add_agent_id:
@@ -95,8 +103,8 @@ def make_jumanji_env(
     # Create envs.
     env = jumanji.make(env_name, generator=generator, **config.env.kwargs)
     eval_env = jumanji.make(env_name, generator=generator, **config.env.kwargs)
-    env, eval_env = wrapper(env), wrapper(eval_env)
 
+    env, eval_env = wrapper(env), wrapper(eval_env)
     env = add_optional_wrappers(env, config, add_global_state)
     eval_env = add_optional_wrappers(eval_env, config, add_global_state)
 

@@ -16,6 +16,11 @@
 from mava.wrappers.episode_metrics import RecordEpisodeMetrics
 from mava.wrappers.gigastep import GigastepWrapper
 from mava.wrappers.jaxmarl import MabraxWrapper, SmaxWrapper
-from mava.wrappers.jumanji import ConnectorWrapper, LbfWrapper, RwareWrapper
+from mava.wrappers.jumanji import (
+    ConnectorWrapper,
+    LbfWrapper,
+    MultiCVRPWrapper,
+    RwareWrapper,
+)
 from mava.wrappers.matrax import MatraxWrapper
 from mava.wrappers.observation import AgentIDWrapper, GlobalStateWrapper
@@ -16,6 +16,7 @@
 
 import chex
 import jax.numpy as jnp
+from jax import tree_util
 from jumanji import specs
 from jumanji.env import Environment
 from jumanji.environments.routing.connector import MaConnector
@@ -26,6 +27,10 @@
     TARGET,
 )
 from jumanji.environments.routing.lbf import LevelBasedForaging
+from jumanji.environments.routing.multi_cvrp import MultiCVRP
+from jumanji.environments.routing.multi_cvrp.types import (
+    Observation as MultiCvrpObservation,
+)
 from jumanji.environments.routing.robot_warehouse import RobotWarehouse
 from jumanji.types import TimeStep
 from jumanji.wrappers import Wrapper
@@ -39,27 +44,27 @@ def __init__(self, env: Environment):
         self._num_agents = self._env.num_agents
         self.time_limit = self._env.time_limit
 
-    def modify_timestep(self, timestep: TimeStep) -> TimeStep[Observation]:
+    def modify_timestep(self, timestep: TimeStep, state: State) -> TimeStep[Observation]:
         """Modify the timestep for `step` and `reset`."""
         pass
 
     def reset(self, key: chex.PRNGKey) -> Tuple[State, TimeStep]:
         """Reset the environment."""
         state, timestep = self._env.reset(key)
-        return state, self.modify_timestep(timestep)
+        return state, self.modify_timestep(timestep, state)
 
     def step(self, state: State, action: chex.Array) -> Tuple[State, TimeStep]:
         """Step the environment."""
         state, timestep = self._env.step(state, action)
-        return state, self.modify_timestep(timestep)
+        return state, self.modify_timestep(timestep, state)
 
     def observation_spec(self) -> specs.Spec[Observation]:
         """Specification of the observation of the environment."""
         step_count = specs.BoundedArray(
             (self._num_agents,),
-            int,
-            jnp.zeros(self._num_agents, dtype=int),
-            jnp.repeat(self.time_limit, self._num_agents),
+            jnp.int32,
+            [0] * self._num_agents,
+            [self.time_limit] * self._num_agents,
             "step_count",
         )
         return self._env.observation_spec().replace(step_count=step_count)
@@ -71,7 +76,7 @@ class RwareWrapper(MultiAgentWrapper):
     def __init__(self, env: RobotWarehouse):
         super().__init__(env)
 
-    def modify_timestep(self, timestep: TimeStep) -> TimeStep[Observation]:
+    def modify_timestep(self, timestep: TimeStep, state: State) -> TimeStep[Observation]:
         """Modify the timestep for the Robotic Warehouse environment."""
         observation = Observation(
             agents_view=timestep.observation.agents_view,
@@ -108,7 +113,7 @@ def aggregate_rewards(
         reward = jnp.repeat(team_reward, self._num_agents)
         return timestep.replace(observation=observation, reward=reward)
 
-    def modify_timestep(self, timestep: TimeStep) -> TimeStep[Observation]:
+    def modify_timestep(self, timestep: TimeStep, state: State) -> TimeStep[Observation]:
         """Modify the timestep for Level-Based Foraging environment and update
         the reward based on the specified reward handling strategy."""
 
@@ -137,7 +142,7 @@ def __init__(self, env: MaConnector, has_global_state: bool = False):
         self.has_global_state = has_global_state
 
     def modify_timestep(
-        self, timestep: TimeStep
+        self, timestep: TimeStep, state: State
     ) -> TimeStep[Union[Observation, ObservationGlobalState]]:
         """Modify the timestep for the Connector environment."""
 
@@ -217,3 +222,129 @@ def observation_spec(self) -> specs.Spec[Union[Observation, ObservationGlobalSta
             )
 
         return spec
+
+
+class MultiCVRPWrapper(MultiAgentWrapper):
+    """Wrapper for MultiCVRP environment."""
+
+    def __init__(self, env: MultiCVRP, has_global_state: bool = False):
+        env.num_agents = env._num_vehicles
+        env.time_limit = env._num_customers + 1  # added for consistency
+        super().__init__(env)
+        self._env = env
+        self.has_global_state = has_global_state
+
+    def modify_timestep(self, timestep: TimeStep, state: State) -> TimeStep[Observation]:
+        observation, global_observation = self._flatten_observation(timestep.observation)
+        obs_data = {
+            "agents_view": observation,
+            "action_mask": timestep.observation.action_mask,
+            "step_count": jnp.repeat(state.step_count, (self.num_agents)),
+        }
+        if self.has_global_state:
+            obs_data["global_state"] = global_observation
+            observation = ObservationGlobalState(**obs_data)
+        else:
+            observation = Observation(**obs_data)
+
+        reward = jnp.repeat(timestep.reward, (self.num_agents))
+        discount = jnp.repeat(timestep.discount, (self.num_agents))
+        timestep = timestep.replace(observation=observation, reward=reward, discount=discount)
+        return timestep
+
+    def _flatten_observation(
+        self, observation: MultiCvrpObservation
+    ) -> Tuple[chex.Array, Union[None, chex.Array]]:
+        """
+        Concatenates all observation fields into a single array.
+
+        Args:
+            observation (MultiCvrpObservation): The raw observation NamedTuple provided by jumanji.
+
+        Returns:
+            observations (chex.Array): Concatenated individual observations for each agent,
+                shaped (num_agents, vehicle_info + customer_info).
+            global_observation (Union[None, chex.Array]): Concatenated global observation
+                shaped (num_agents, global_info) if has_global_state = True, None otherwise.
+        """
+        global_observation = None
+        # N: number of nodes, same as _num_customers + 1
+        # V: number of vehicles, same as num_agents
+        # nodes are composed of (x, y, demands)
+        # Windows are composed of (start_time, end_time)
+        # Coeffs are composed of (early, late)
+        # Vehicles have ((x, y), local_time, capacity)
+
+        # Tuple[(N, 3), (N, 2), (N, 2)]
+        customers_info, _ = tree_util.tree_flatten(
+            (observation.nodes, observation.windows, observation.coeffs)
+        )
+        # Tuple[(V, 2), (V, 1), (V, 1)]
+        vehicles_info, _ = tree_util.tree_flatten(observation.vehicles)
+
+        # (N * 7, )
+        customers_info = jnp.column_stack(customers_info).ravel()
+        # (V, 4)
+        vehicles_info = jnp.column_stack(vehicles_info)
+
+        if self.has_global_state:
+            # (V * 4 * N * 7, )
+            global_observation = jnp.concatenate((vehicles_info.ravel(), customers_info))
+            # (V, N * 7 * V * 4)
+            global_observation = jnp.tile(global_observation, (self.num_agents, 1))
+
+        # (V, N * 7)
+        customers_info = jnp.tile(customers_info, (self.num_agents, 1))
+        # (V, 4 + N * 7)
+        observations = jnp.column_stack((vehicles_info, customers_info))
+        return observations, global_observation
+
+    def observation_spec(self) -> specs.Spec[Observation]:
+        step_count = specs.BoundedArray(
+            (self.num_agents,), jnp.int32, 0, self._env._num_customers + 1, "step_count"
+        )
+        action_mask = specs.BoundedArray(
+            (self.num_agents, self._env._num_customers + 1), bool, False, True, "action_mask"
+        )
+
+        agents_view = specs.BoundedArray(
+            (self.num_agents, (self._env._num_customers + 1) * 7 + 4),
+            jnp.float32,
+            -jnp.inf,
+            jnp.inf,
+            "agents_view",
+        )
+        if self.has_global_state:
+            global_state = specs.Array(
+                (self.num_agents, (self._env._num_customers + 1) * 7 + 4 * self.num_agents),
+                jnp.float32,
+                "global_state",
+            )
+            return specs.Spec(
+                ObservationGlobalState,
+                "ObservationSpec",
+                agents_view=agents_view,
+                action_mask=action_mask,
+                global_state=global_state,
+                step_count=step_count,
+            )
+        return specs.Spec(
+            Observation,
+            "ObservationSpec",
+            agents_view=agents_view,
+            action_mask=action_mask,
+            step_count=step_count,
+        )
+
+    def reward_spec(self) -> specs.Array:
+        return specs.Array(shape=(self.num_agents,), dtype=float, name="reward")
+
+    def discount_spec(self) -> specs.BoundedArray:
+        return specs.BoundedArray(
+            shape=(self.num_agents,), dtype=float, minimum=0.0, maximum=1.0, name="discount"
+        )
+
+    def action_spec(self) -> specs.Spec:
+        return specs.MultiDiscreteArray(
+            num_values=jnp.full(self.num_agents, self._env._num_customers + 1)
+        )