From 9c2b281eef345874aca87f4e1ab71fc34e379a74 Mon Sep 17 00:00:00 2001 From: Adam Amer <136176500+adamamer20@users.noreply.github.com> Date: Tue, 16 Jul 2024 00:08:04 +0200 Subject: [PATCH 01/10] change from types to types_ to avoid import issues --- mesa_frames/abstract/agents.py | 2 +- mesa_frames/concrete/agents.py | 6 +- mesa_frames/concrete/pandas/agentset.py | 439 ++++++++++++++++++++++ mesa_frames/concrete/polars/agentset.py | 472 ++++++++++++++++++++++++ mesa_frames/types.py | 32 -- mesa_frames/types_.py | 73 ++++ tests/test_agents.py | 2 +- 7 files changed, 988 insertions(+), 38 deletions(-) create mode 100644 mesa_frames/concrete/pandas/agentset.py create mode 100644 mesa_frames/concrete/polars/agentset.py delete mode 100644 mesa_frames/types.py create mode 100644 mesa_frames/types_.py diff --git a/mesa_frames/abstract/agents.py b/mesa_frames/abstract/agents.py index 4217648e..93baaaa0 100644 --- a/mesa_frames/abstract/agents.py +++ b/mesa_frames/abstract/agents.py @@ -9,7 +9,7 @@ from typing_extensions import Any, Self, overload from mesa_frames.abstract.mixin import CopyMixin -from mesa_frames.types import BoolSeries, DataFrame, IdsLike, Index, MaskLike, Series +from mesa_frames.types_ import BoolSeries, DataFrame, IdsLike, Index, MaskLike, Series if TYPE_CHECKING: from mesa_frames.concrete.agents import AgentSetDF diff --git a/mesa_frames/concrete/agents.py b/mesa_frames/concrete/agents.py index 14891b83..3f8530e5 100644 --- a/mesa_frames/concrete/agents.py +++ b/mesa_frames/concrete/agents.py @@ -1,14 +1,12 @@ from collections import defaultdict from collections.abc import Callable, Collection, Iterable, Iterator, Sequence -from typing import Literal, cast +from typing import TYPE_CHECKING, Literal, cast import polars as pl from typing_extensions import Any, Self, overload -from typing import TYPE_CHECKING - from mesa_frames.abstract.agents import AgentContainer, AgentSetDF -from mesa_frames.types import ( +from mesa_frames.types_ import ( AgnosticMask, BoolSeries, DataFrame, diff --git a/mesa_frames/concrete/pandas/agentset.py b/mesa_frames/concrete/pandas/agentset.py new file mode 100644 index 00000000..14b45a04 --- /dev/null +++ b/mesa_frames/concrete/pandas/agentset.py @@ -0,0 +1,439 @@ +from collections.abc import Callable, Collection, Iterable, Iterator, Sequence +from typing import TYPE_CHECKING + +import geopandas as gpd +import pandas as pd +import polars as pl +from typing_extensions import Any, Self, overload + +from mesa_frames.abstract.agents import AgentSetDF +from mesa_frames.concrete.pandas.mixin import PandasMixin +from mesa_frames.concrete.polars.agentset import AgentSetPolars +from mesa_frames.types_ import PandasIdsLike, PandasMaskLike + +if TYPE_CHECKING: + pass + + +class AgentSetPandas(AgentSetDF, PandasMixin): + _agents: pd.DataFrame + _mask: pd.Series + _copy_with_method: dict[str, tuple[str, list[str]]] = { + "_agents": ("copy", ["deep"]), + "_mask": ("copy", ["deep"]), + } + """A pandas-based implementation of the AgentSet. + + Attributes + ---------- + _agents : pd.DataFrame + The agents in the AgentSet. + _copy_only_reference : list[str] = ['_model'] + A list of attributes to copy with a reference only. + _copy_with_method: dict[str, tuple[str, list[str]]] = { + "_agents": ("copy", ["deep"]), + "_mask": ("copy", ["deep"]), + } + A dictionary of attributes to copy with a specified method and arguments. + _mask : pd.Series + A boolean mask indicating which agents are active. + _model : ModelDF + The model that the AgentSetDF belongs to. + + Properties + ---------- + active_agents(self) -> pd.DataFrame + Get the active agents in the AgentSetPandas. + agents(self) -> pd.DataFrame + Get or set the agents in the AgentSetPandas. + inactive_agents(self) -> pd.DataFrame + Get the inactive agents in the AgentSetPandas. + model(self) -> ModelDF + Get the model associated with the AgentSetPandas. + random(self) -> Generator + Get the random number generator associated with the model. + + Methods + ------- + __init__(self, model: ModelDF) -> None + Initialize a new AgentSetPandas. + add(self, other: pd.DataFrame | Sequence[Any] | dict[str, Any], inplace: bool = True) -> Self + Add agents to the AgentSetPandas. + contains(self, ids: PandasIdsLike) -> bool | pd.Series + Check if agents with the specified IDs are in the AgentSetPandas. + copy(self, deep: bool = False, memo: dict | None = None) -> Self + Create a copy of the AgentSetPandas. + discard(self, ids: PandasIdsLike, inplace: bool = True) -> Self + Remove an agent from the AgentSetPandas. Does not raise an error if the agent is not found. + do(self, method_name: str, *args, return_results: bool = False, inplace: bool = True, **kwargs) -> Self | Any + Invoke a method on the AgentSetPandas. + get(self, attr_names: str | Collection[str] | None, mask: PandasMaskLike = None) -> pd.Series | pd.DataFrame + Retrieve the value of a specified attribute for each agent in the AgentSetPandas. + remove(self, ids: PandasIdsLike, inplace: bool = True) -> Self + Remove agents from the AgentSetPandas. + select(self, mask: PandasMaskLike = None, filter_func: Callable[[Self], PandasMaskLike] | None = None, n: int | None = None, negate: bool = False, inplace: bool = True) -> Self + Select agents in the AgentSetPandas based on the given criteria. + set(self, attr_names: str | Collection[str] | dict[str, Any] | None = None, values: Any | None = None, mask: PandasMaskLike | None = None, inplace: bool = True) -> Self + Set the value of a specified attribute or attributes for each agent in the mask in the AgentSetPandas. + shuffle(self, inplace: bool = True) -> Self + Shuffle the order of agents in the AgentSetPandas. + sort(self, by: str | Sequence[str], ascending: bool | Sequence[bool] = True, inplace: bool = True, **kwargs) -> Self + Sort the agents in the AgentSetPandas based on the given criteria. + to_polars(self) -> "AgentSetPolars" + Convert the AgentSetPandas to an AgentSetPolars. + _get_bool_mask(self, mask: PandasMaskLike = None) -> pd.Series + Get a boolean mask for selecting agents. + _get_masked_df(self, mask: PandasMaskLike = None) -> pd.DataFrame + Get a DataFrame of agents that match the mask. + __getattr__(self, key: str) -> pd.Series + Retrieve an attribute of the underlying DataFrame. + __iter__(self) -> Iterator + Get an iterator for the agents in the AgentSetPandas. + __len__(self) -> int + Get the number of agents in the AgentSetPandas. + __repr__(self) -> str + Get the string representation of the AgentSetPandas. + __reversed__(self) -> Iterator + Get a reversed iterator for the agents in the AgentSetPandas. + __str__(self) -> str + Get the string representation of the AgentSetPandas. + """ + + def add( + self, + agents: pd.DataFrame | gpd.GeoDataFrame | Sequence[Any] | dict[str, Any], + inplace: bool = True, + ) -> Self: + obj = self._get_obj(inplace) + if isinstance(agents, gpd.GeoDataFrame): + try: + self.model.space + except ValueError: + raise ValueError( + "You are adding agents with a GeoDataFrame but haven't set model.space. Set it before adding agents with a GeoDataFrame or add agents with a standard DataFrame" + ) + if isinstance(agents, (pd.DataFrame, gpd.GeoDataFrame)): + new_agents = agents + if "unique_id" != agents.index.name: + try: + new_agents.set_index("unique_id", inplace=True, drop=True) + except KeyError: + raise KeyError("DataFrame must have a unique_id column/index.") + elif isinstance(agents, dict): + if "unique_id" not in agents: + raise KeyError("Dictionary must have a unique_id key.") + index = agents.pop("unique_id") + if not isinstance(index, list): + index = [index] + new_agents = pd.DataFrame(agents, index=pd.Index(index, name="unique_id")) + else: + if len(agents) != len(obj._agents.columns) + 1: + raise ValueError( + "Length of data must match the number of columns in the AgentSet if being added as a Collection." + ) + columns = pd.Index(["unique_id"]).append(obj._agents.columns.copy()) + new_agents = pd.DataFrame([agents], columns=columns).set_index( + "unique_id", drop=True + ) + + if new_agents.index.dtype != "int64": + raise TypeError("unique_id must be of type int64.") + + if not obj._agents.index.intersection(new_agents.index).empty: + raise KeyError("Some IDs already exist in the agent set.") + + original_active_indices = obj._mask.index[obj._mask].copy() + + obj._agents = pd.concat([obj._agents, new_agents]) + + obj._update_mask(original_active_indices, new_agents.index) + + return obj + + @overload + def contains(self, agents: int) -> bool: ... + + @overload + def contains(self, agents: PandasIdsLike) -> pd.Series: ... + + def contains(self, agents: PandasIdsLike) -> bool | pd.Series: + if isinstance(agents, pd.Series): + return agents.isin(self._agents.index) + elif isinstance(agents, pd.Index): + return pd.Series( + agents.isin(self._agents.index), index=agents, dtype=pd.BooleanDtype() + ) + elif isinstance(agents, Collection): + return pd.Series(list(agents), index=list(agents)).isin(self._agents.index) + else: + return agents in self._agents.index + + def get( + self, + attr_names: str | Collection[str] | None = None, + mask: PandasMaskLike = None, + ) -> pd.Index | pd.Series | pd.DataFrame: + mask = self._get_bool_mask(mask) + if attr_names is None: + return self._agents.loc[mask] + else: + if attr_names == "unique_id": + return self._agents.loc[mask].index + if isinstance(attr_names, str): + return self._agents.loc[mask, attr_names] + if isinstance(attr_names, Collection): + return self._agents.loc[mask, list(attr_names)] + + def remove( + self, + ids: PandasIdsLike, + inplace: bool = True, + ) -> Self: + obj = self._get_obj(inplace) + initial_len = len(obj._agents) + mask = obj._get_bool_mask(ids) + remove_ids = obj._agents[mask].index + original_active_indices = obj._mask.index[obj._mask].copy() + obj._agents.drop(remove_ids, inplace=True) + if len(obj._agents) == initial_len: + raise KeyError("Some IDs were not found in agent set.") + + self._update_mask(original_active_indices) + return obj + + def set( + self, + attr_names: str | dict[str, Any] | Collection[str] | None = None, + values: Any | None = None, + mask: PandasMaskLike = None, + inplace: bool = True, + ) -> Self: + obj = self._get_obj(inplace) + b_mask = obj._get_bool_mask(mask) + masked_df = obj._get_masked_df(mask) + + if not attr_names: + attr_names = masked_df.columns + + if isinstance(attr_names, dict): + for key, val in attr_names.items(): + masked_df.loc[:, key] = val + elif ( + isinstance(attr_names, str) + or ( + isinstance(attr_names, Collection) + and all(isinstance(n, str) for n in attr_names) + ) + ) and values is not None: + if not isinstance(attr_names, str): # isinstance(attr_names, Collection) + attr_names = list(attr_names) + masked_df.loc[:, attr_names] = values + else: + raise ValueError( + "Either attr_names must be a dictionary with columns as keys and values or values must be provided." + ) + + non_masked_df = obj._agents[~b_mask] + original_index = obj._agents.index + obj._agents = pd.concat([non_masked_df, masked_df]) + obj._agents = obj._agents.reindex(original_index) + return obj + + def select( + self, + mask: PandasMaskLike = None, + filter_func: Callable[[Self], PandasMaskLike] | None = None, + n: int | None = None, + negate: bool = False, + inplace: bool = True, + ) -> Self: + obj = self._get_obj(inplace) + bool_mask = obj._get_bool_mask(mask) + if filter_func: + bool_mask = bool_mask & obj._get_bool_mask(filter_func(obj)) + if negate: + bool_mask = ~bool_mask + if n is not None: + bool_mask = pd.Series( + obj._agents.index.isin(obj._agents[bool_mask].sample(n).index), + index=obj._agents.index, + ) + obj._mask = bool_mask + return obj + + def shuffle(self, inplace: bool = True) -> Self: + obj = self._get_obj(inplace) + obj._agents = obj._agents.sample(frac=1) + return obj + + def sort( + self, + by: str | Sequence[str], + ascending: bool | Sequence[bool] = True, + inplace: bool = True, + **kwargs, + ) -> Self: + obj = self._get_obj(inplace) + obj._agents.sort_values(by=by, ascending=ascending, **kwargs, inplace=True) + return obj + + def to_polars(self) -> AgentSetPolars: + new_obj = AgentSetPolars(self._model) + new_obj._agents = pl.DataFrame(self._agents) + new_obj._mask = pl.Series(self._mask) + return new_obj + + def _concatenate_agentsets( + self, + agentsets: Iterable[Self], + duplicates_allowed: bool = True, + keep_first_only: bool = True, + original_masked_index: pd.Index | None = None, + ) -> Self: + if not duplicates_allowed: + indices = [self._agents.index.to_series()] + [ + agentset._agents.index.to_series() for agentset in agentsets + ] + pd.concat(indices, verify_integrity=True) + if duplicates_allowed & keep_first_only: + final_df = self._agents.copy() + final_mask = self._mask.copy() + for obj in iter(agentsets): + final_df = final_df.combine_first(obj._agents) + final_mask = final_mask.combine_first(obj._mask) + else: + final_df = pd.concat([obj._agents for obj in agentsets]) + final_mask = pd.concat([obj._mask for obj in agentsets]) + self._agents = final_df + self._mask = final_mask + if not isinstance(original_masked_index, type(None)): + ids_to_remove = original_masked_index.difference(self._agents.index) + if not ids_to_remove.empty: + self.remove(ids_to_remove, inplace=True) + return self + + def _get_bool_mask( + self, + mask: PandasMaskLike = None, + ) -> pd.Series: + if isinstance(mask, pd.Series) and mask.dtype == bool: + return mask + elif isinstance(mask, pd.DataFrame): + return pd.Series( + self._agents.index.isin(mask.index), index=self._agents.index + ) + elif isinstance(mask, list): + return pd.Series(self._agents.index.isin(mask), index=self._agents.index) + elif mask is None or mask == "all": + return pd.Series(True, index=self._agents.index) + elif mask == "active": + return self._mask + else: + return pd.Series(self._agents.index.isin([mask]), index=self._agents.index) + + def _get_masked_df( + self, + mask: PandasMaskLike = None, + ) -> pd.DataFrame: + if isinstance(mask, pd.Series) and mask.dtype == bool: + return self._agents.loc[mask] + elif isinstance(mask, pd.DataFrame): + if mask.index.name != "unique_id": + if "unique_id" in mask.columns: + mask.set_index("unique_id", inplace=True, drop=True) + else: + raise KeyError("DataFrame must have a unique_id column/index.") + return pd.DataFrame(index=mask.index).join( + self._agents, on="unique_id", how="left" + ) + elif isinstance(mask, pd.Series): + mask_df = mask.to_frame("unique_id").set_index("unique_id") + return mask_df.join(self._agents, on="unique_id", how="left") + elif mask is None or mask == "all": + return self._agents + elif mask == "active": + return self._agents.loc[self._mask] + else: + mask_series = pd.Series(mask) + mask_df = mask_series.to_frame("unique_id").set_index("unique_id") + return mask_df.join(self._agents, on="unique_id", how="left") + + @overload + def _get_obj_copy(self, obj: pd.Series) -> pd.Series: ... + + @overload + def _get_obj_copy(self, obj: pd.DataFrame) -> pd.DataFrame: ... + + @overload + def _get_obj_copy(self, obj: pd.Index) -> pd.Index: ... + + def _get_obj_copy( + self, obj: pd.Series | pd.DataFrame | pd.Index + ) -> pd.Series | pd.DataFrame | pd.Index: + return obj.copy() + + def _update_mask( + self, + original_active_indices: pd.Index, + new_active_indices: pd.Index | None = None, + ) -> None: + # Update the mask with the old active agents and the new agents + if new_active_indices is None: + self._mask = pd.Series( + self._agents.index.isin(original_active_indices), + index=self._agents.index, + dtype=pd.BooleanDtype(), + ) + else: + self._mask = pd.Series( + self._agents.index.isin(original_active_indices) + | self._agents.index.isin(new_active_indices), + index=self._agents.index, + dtype=pd.BooleanDtype(), + ) + + def __getattr__(self, name: str) -> Any: + super().__getattr__(name) + return getattr(self._agents, name) + + def __iter__(self) -> Iterator[dict[str, Any]]: + for index, row in self._agents.iterrows(): + row_dict = row.to_dict() + row_dict["unique_id"] = index + yield row_dict + + def __len__(self) -> int: + return len(self._agents) + + def __reversed__(self) -> Iterator: + return iter(self._agents[::-1].iterrows()) + + @property + def agents(self) -> pd.DataFrame: + return self._agents + + @agents.setter + def agents(self, new_agents: pd.DataFrame) -> None: + if new_agents.index.name == "unique_id": + pass + elif "unique_id" in new_agents.columns: + new_agents.set_index("unique_id", inplace=True, drop=True) + else: + raise KeyError("The DataFrame should have a 'unique_id' index/column") + self._agents = new_agents + + @property + def active_agents(self) -> pd.DataFrame: + return self._agents.loc[self._mask] + + @active_agents.setter + def active_agents(self, mask: PandasMaskLike) -> None: + self.select(mask=mask, inplace=True) + + @property + def inactive_agents(self) -> pd.DataFrame: + return self._agents.loc[~self._mask] + + @property + def index(self) -> pd.Index: + return self._agents.index diff --git a/mesa_frames/concrete/polars/agentset.py b/mesa_frames/concrete/polars/agentset.py new file mode 100644 index 00000000..c8d0ecc6 --- /dev/null +++ b/mesa_frames/concrete/polars/agentset.py @@ -0,0 +1,472 @@ +from collections.abc import Callable, Collection, Iterable, Iterator, Sequence +from typing import TYPE_CHECKING + +import geopolars as gpl +import polars as pl +from polars._typing import IntoExpr +from typing_extensions import Any, Self, overload + +from mesa_frames.concrete.agents import AgentSetDF +from mesa_frames.concrete.polars.mixin import PolarsMixin +from mesa_frames.types_ import PolarsIdsLike, PolarsMaskLike + +if TYPE_CHECKING: + from mesa_frames.concrete.pandas.agentset import AgentSetPandas + + +class AgentSetPolars(AgentSetDF, PolarsMixin): + _agents: pl.DataFrame + _copy_with_method: dict[str, tuple[str, list[str]]] = { + "_agents": ("clone", []), + } + _copy_only_reference: list[str] = ["_model", "_mask"] + _mask: pl.Expr | pl.Series + + """A polars-based implementation of the AgentSet. + + Attributes + ---------- + _agents : pl.DataFrame + The agents in the AgentSet. + _copy_only_reference : list[str] = ["_model", "_mask"] + A list of attributes to copy with a reference only. + _copy_with_method: dict[str, tuple[str, list[str]]] = { + "_agents": ("copy", ["deep"]), + "_mask": ("copy", ["deep"]), + } + A dictionary of attributes to copy with a specified method and arguments. + model : ModelDF + The model to which the AgentSet belongs. + _mask : pl.Series + A boolean mask indicating which agents are active. + + Properties + ---------- + active_agents(self) -> pl.DataFrame + Get the active agents in the AgentSetPolars. + agents(self) -> pl.DataFrame + Get or set the agents in the AgentSetPolars. + inactive_agents(self) -> pl.DataFrame + Get the inactive agents in the AgentSetPolars. + model(self) -> ModelDF + Get the model associated with the AgentSetPolars. + random(self) -> Generator + Get the random number generator associated with the model. + + + Methods + ------- + __init__(self, model: ModelDF) -> None + Initialize a new AgentSetPolars. + add(self, other: pl.DataFrame | Sequence[Any] | dict[str, Any], inplace: bool = True) -> Self + Add agents to the AgentSetPolars. + contains(self, ids: PolarsIdsLike) -> bool | pl.Series + Check if agents with the specified IDs are in the AgentSetPolars. + copy(self, deep: bool = False, memo: dict | None = None) -> Self + Create a copy of the AgentSetPolars. + discard(self, ids: PolarsIdsLike, inplace: bool = True) -> Self + Remove an agent from the AgentSetPolars. Does not raise an error if the agent is not found. + do(self, method_name: str, *args, return_results: bool = False, inplace: bool = True, **kwargs) -> Self | Any + Invoke a method on the AgentSetPolars. + get(self, attr_names: IntoExpr | Iterable[IntoExpr] | None, mask: PolarsMaskLike = None) -> pl.Series | pl.DataFrame + Retrieve the value of a specified attribute for each agent in the AgentSetPolars. + remove(self, ids: PolarsIdsLike, inplace: bool = True) -> Self + Remove agents from the AgentSetPolars. + select(self, mask: PolarsMaskLike = None, filter_func: Callable[[Self], PolarsMaskLike] | None = None, n: int | None = None, negate: bool = False, inplace: bool = True) -> Self + Select agents in the AgentSetPolars based on the given criteria. + set(self, attr_names: str | Collection[str] | dict[str, Any] | None = None, values: Any | None = None, mask: PolarsMaskLike | None = None, inplace: bool = True) -> Self + Set the value of a specified attribute or attributes for each agent in the mask in the AgentSetPolars. + shuffle(self, inplace: bool = True) -> Self + Shuffle the order of agents in the AgentSetPolars. + sort(self, by: str | Sequence[str], ascending: bool | Sequence[bool] = True, inplace: bool = True, **kwargs) -> Self + Sort the agents in the AgentSetPolars based on the given criteria. + to_pandas(self) -> "AgentSetPandas" + Convert the AgentSetPolars to an AgentSetPandas. + _get_bool_mask(self, mask: PolarsMaskLike = None) -> pl.Series | pl.Expr + Get a boolean mask for selecting agents. + _get_masked_df(self, mask: PolarsMaskLike = None) -> pl.DataFrame + Get a DataFrame of agents that match the mask. + __getattr__(self, key: str) -> pl.Series + Retrieve an attribute of the underlying DataFrame. + __iter__(self) -> Iterator + Get an iterator for the agents in the AgentSetPolars. + __len__(self) -> int + Get the number of agents in the AgentSetPolars. + __repr__(self) -> str + Get the string representation of the AgentSetPolars. + __reversed__(self) -> Iterator + Get a reversed iterator for the agents in the AgentSetPolars. + __str__(self) -> str + Get the string representation of the AgentSetPolars. + + """ + + def add( + self, + agents: pl.DataFrame | Sequence[Any] | dict[str, Any], + inplace: bool = True, + ) -> Self: + """Add agents to the AgentSetPolars. + + Parameters + ---------- + other : pl.DataFrame | Sequence[Any] | dict[str, Any] + The agents to add. + inplace : bool, optional + Whether to add the agents in place, by default True. + + Returns + ------- + Self + The updated AgentSetPolars. + """ + obj = self._get_obj(inplace) + if isinstance(agents, gpl.GeoDataFrame): + try: + self.model.space + except ValueError: + raise ValueError( + "You are adding agents with a GeoDataFrame but haven't set model.space. Set it before adding agents with a GeoDataFrame or add agents with a standard DataFrame" + ) + if isinstance(agents, gpl.GeoDataFrame, pl.DataFrame): + if "unique_id" not in agents.columns: + raise KeyError("DataFrame must have a unique_id column.") + new_agents = agents + elif isinstance(agents, dict): + if "unique_id" not in agents: + raise KeyError("Dictionary must have a unique_id key.") + new_agents = pl.DataFrame(agents) + else: + if len(agents) != len(obj._agents.columns): + raise ValueError( + "Length of data must match the number of columns in the AgentSet if being added as a Collection." + ) + new_agents = pl.DataFrame([agents], schema=obj._agents.schema) + + if new_agents["unique_id"].dtype != pl.Int64: + raise TypeError("unique_id column must be of type int64.") + + # If self._mask is pl.Expr, then new mask is the same. + # If self._mask is pl.Series[bool], then new mask has to be updated. + + if isinstance(obj._mask, pl.Series): + original_active_indices = obj._agents.filter(obj._mask)["unique_id"] + + obj._agents = pl.concat([obj._agents, new_agents], how="diagonal_relaxed") + + if isinstance(obj._mask, pl.Series): + obj._update_mask(original_active_indices, new_agents["unique_id"]) + + return obj + + @overload + def contains(self, agents: int) -> bool: ... + + @overload + def contains(self, agents: PolarsIdsLike) -> pl.Series: ... + + def contains( + self, + agents: PolarsIdsLike, + ) -> bool | pl.Series: + if isinstance(agents, pl.Series): + return agents.is_in(self._agents["unique_id"]) + elif isinstance(agents, Collection): + return pl.Series(agents).is_in(self._agents["unique_id"]) + else: + return agents in self._agents["unique_id"] + + def get( + self, + attr_names: IntoExpr | Iterable[IntoExpr] | None, + mask: PolarsMaskLike = None, + ) -> pl.Series | pl.DataFrame: + masked_df = self._get_masked_df(mask) + attr_names = self.agents.select(attr_names).columns.copy() + if not attr_names: + return masked_df + masked_df = masked_df.select(attr_names) + if masked_df.shape[1] == 1: + return masked_df[masked_df.columns[0]] + return masked_df + + def remove(self, ids: PolarsIdsLike, inplace: bool = True) -> Self: + obj = self._get_obj(inplace=inplace) + initial_len = len(obj._agents) + mask = obj._get_bool_mask(ids) + + if isinstance(obj._mask, pl.Series): + original_active_indices = obj._agents.filter(obj._mask)["unique_id"] + + obj._agents = obj._agents.filter(mask.not_()) + if len(obj._agents) == initial_len: + raise KeyError(f"IDs {ids} not found in agent set.") + + if isinstance(obj._mask, pl.Series): + obj._update_mask(original_active_indices) + return obj + + def set( + self, + attr_names: str | Collection[str] | dict[str, Any] | None = None, + values: Any | None = None, + mask: PolarsMaskLike = None, + inplace: bool = True, + ) -> Self: + obj = self._get_obj(inplace) + b_mask = obj._get_bool_mask(mask) + masked_df = obj._get_masked_df(mask) + + if not attr_names: + attr_names = masked_df.columns + attr_names.remove("unique_id") + + def process_single_attr( + masked_df: pl.DataFrame, attr_name: str, values: Any + ) -> pl.DataFrame: + if isinstance(values, pl.DataFrame): + return masked_df.with_columns(values.to_series().alias(attr_name)) + elif isinstance(values, pl.Expr): + return masked_df.with_columns(values.alias(attr_name)) + if isinstance(values, pl.Series): + return masked_df.with_columns(values.alias(attr_name)) + else: + if isinstance(values, Collection): + values = pl.Series(values) + else: + values = pl.repeat(values, len(masked_df)) + return masked_df.with_columns(values.alias(attr_name)) + + if isinstance(attr_names, str) and values is not None: + masked_df = process_single_attr(masked_df, attr_names, values) + elif isinstance(attr_names, Collection) and values is not None: + if isinstance(values, Collection) and len(attr_names) == len(values): + for attribute, val in zip(attr_names, values): + masked_df = process_single_attr(masked_df, attribute, val) + else: + for attribute in attr_names: + masked_df = process_single_attr(masked_df, attribute, values) + elif isinstance(attr_names, dict): + for key, val in attr_names.items(): + masked_df = process_single_attr(masked_df, key, val) + else: + raise ValueError( + "attr_names must be a string, a collection of string or a dictionary with columns as keys and values." + ) + non_masked_df = obj._agents.filter(b_mask.not_()) + original_index = obj._agents.select("unique_id") + obj._agents = pl.concat([non_masked_df, masked_df], how="diagonal_relaxed") + obj._agents = original_index.join(obj._agents, on="unique_id", how="left") + return obj + + def select( + self, + mask: PolarsMaskLike = None, + filter_func: Callable[[Self], pl.Series] | None = None, + n: int | None = None, + negate: bool = False, + inplace: bool = True, + ) -> Self: + obj = self._get_obj(inplace) + mask = obj._get_bool_mask(mask) + if filter_func: + mask = mask & filter_func(obj) + if n is not None: + mask = (obj._agents["unique_id"]).is_in( + obj._agents.filter(mask).sample(n)["unique_id"] + ) + if negate: + mask = mask.not_() + obj._mask = mask + return obj + + def shuffle(self, inplace: bool = True) -> Self: + obj = self._get_obj(inplace) + obj._agents = obj._agents.sample(fraction=1, shuffle=True) + return obj + + def sort( + self, + by: str | Sequence[str], + ascending: bool | Sequence[bool] = True, + inplace: bool = True, + **kwargs, + ) -> Self: + obj = self._get_obj(inplace) + if isinstance(ascending, bool): + descending = not ascending + else: + descending = [not a for a in ascending] + obj._agents = obj._agents.sort(by=by, descending=descending, **kwargs) + return obj + + def to_pandas(self) -> "AgentSetPandas": + from mesa_frames.concrete.pandas.agentset_pandas import AgentSetPandas + + new_obj = AgentSetPandas(self._model) + new_obj._agents = self._agents.to_pandas() + if isinstance(self._mask, pl.Series): + new_obj._mask = self._mask.to_pandas() + else: # self._mask is Expr + new_obj._mask = ( + self._agents["unique_id"] + .is_in(self._agents.filter(self._mask)["unique_id"]) + .to_pandas() + ) + return new_obj + + def _concatenate_agentsets( + self, + agentsets: Iterable[Self], + duplicates_allowed: bool = True, + keep_first_only: bool = True, + original_masked_index: pl.Series | None = None, + ) -> Self: + if not duplicates_allowed: + indices_list = [self._agents["unique_id"]] + [ + agentset._agents["unique_id"] for agentset in agentsets + ] + all_indices = pl.concat(indices_list) + if all_indices.is_duplicated().any(): + raise ValueError( + "Some ids are duplicated in the AgentSetDFs that are trying to be concatenated" + ) + if duplicates_allowed & keep_first_only: + # Find the original_index list (ie longest index list), to sort correctly the rows after concatenation + max_length = max(len(agentset) for agentset in agentsets) + for agentset in agentsets: + if len(agentset) == max_length: + original_index = agentset._agents["unique_id"] + final_dfs = [self._agents] + final_active_indices = [self._agents["unique_id"]] + final_indices = self._agents["unique_id"].clone() + for obj in iter(agentsets): + # Remove agents that are already in the final DataFrame + final_dfs.append( + obj._agents.filter(pl.col("unique_id").is_in(final_indices).not_()) + ) + # Add the indices of the active agents of current AgentSet + final_active_indices.append(obj._agents.filter(obj._mask)["unique_id"]) + # Update the indices of the agents in the final DataFrame + final_indices = pl.concat( + [final_indices, final_dfs[-1]["unique_id"]], how="vertical" + ) + # Left-join original index with concatenated dfs to keep original ids order + final_df = original_index.to_frame().join( + pl.concat(final_dfs, how="diagonal_relaxed"), on="unique_id", how="left" + ) + # + final_active_index = pl.concat(final_active_indices, how="vertical") + + else: + final_df = pl.concat( + [obj._agents for obj in agentsets], how="diagonal_relaxed" + ) + final_active_index = pl.concat( + [obj._agents.filter(obj._mask)["unique_id"] for obj in agentsets] + ) + final_mask = final_df["unique_id"].is_in(final_active_index) + self._agents = final_df + self._mask = final_mask + # If some ids were removed in the do-method, we need to remove them also from final_df + if not isinstance(original_masked_index, type(None)): + ids_to_remove = original_masked_index.filter( + original_masked_index.is_in(self._agents["unique_id"]).not_() + ) + if not ids_to_remove.is_empty(): + self.remove(ids_to_remove, inplace=True) + return self + + @overload + def _get_obj_copy(self, obj: pl.Series) -> pl.Series: ... + + @overload + def _get_obj_copy(self, obj: pl.DataFrame) -> pl.DataFrame: ... + + def _get_obj_copy(self, obj: pl.Series | pl.DataFrame) -> pl.Series | pl.DataFrame: + return obj.clone() + + def _update_mask( + self, original_active_indices: pl.Series, new_indices: pl.Series | None = None + ) -> None: + if new_indices is not None: + self._mask = self._agents["unique_id"].is_in( + original_active_indices + ) | self._agents["unique_id"].is_in(new_indices) + else: + self._mask = self._agents["unique_id"].is_in(original_active_indices) + + def __getattr__(self, key: str) -> pl.Series: + super().__getattr__(key) + return self._agents[key] + + @overload + def __getitem__( + self, + key: str | tuple[PolarsMaskLike, str], + ) -> pl.Series: ... + + @overload + def __getitem__( + self, + key: ( + PolarsMaskLike + | Collection[str] + | tuple[ + PolarsMaskLike, + Collection[str], + ] + ), + ) -> pl.DataFrame: ... + + def __getitem__( + self, + key: ( + str + | Collection[str] + | PolarsMaskLike + | tuple[PolarsMaskLike, str] + | tuple[ + PolarsMaskLike, + Collection[str], + ] + ), + ) -> pl.Series | pl.DataFrame: + attr = super().__getitem__(key) + assert isinstance(attr, (pl.Series, pl.DataFrame)) + return attr + + def __iter__(self) -> Iterator[dict[str, Any]]: + return iter(self._agents.iter_rows(named=True)) + + def __len__(self) -> int: + return len(self._agents) + + def __reversed__(self) -> Iterator: + return reversed(iter(self._agents.iter_rows(named=True))) + + @property + def agents(self) -> pl.DataFrame: + return self._agents + + @agents.setter + def agents(self, agents: pl.DataFrame) -> None: + if "unique_id" not in agents.columns: + raise KeyError("DataFrame must have a unique_id column.") + self._agents = agents + + @property + def active_agents(self) -> pl.DataFrame: + return self.agents.filter(self._mask) + + @active_agents.setter + def active_agents(self, mask: PolarsMaskLike) -> None: + self.select(mask=mask, inplace=True) + + @property + def inactive_agents(self) -> pl.DataFrame: + return self.agents.filter(~self._mask) + + @property + def index(self) -> pl.Series: + return self._agents["unique_id"] diff --git a/mesa_frames/types.py b/mesa_frames/types.py deleted file mode 100644 index 232aa0f4..00000000 --- a/mesa_frames/types.py +++ /dev/null @@ -1,32 +0,0 @@ -from collections.abc import Collection -from typing import Literal - -import pandas as pd -import polars as pl -from numpy import ndarray - -####----- Agnostic Types -----#### -AgnosticMask = Literal["all", "active"] | None -AgnosticIds = int | Collection[int] - -###----- Pandas Types -----### - -ArrayLike = pd.api.extensions.ExtensionArray | ndarray -AnyArrayLike = ArrayLike | pd.Index | pd.Series -PandasMaskLike = AgnosticMask | pd.Series | pd.DataFrame | AnyArrayLike -PandasIdsLike = AgnosticIds | pd.Series | pd.Index - -###----- Polars Types -----### - -PolarsMaskLike = AgnosticMask | pl.Expr | pl.Series | pl.DataFrame | Collection[int] -PolarsIdsLike = AgnosticIds | pl.Series - -###----- Generic -----### - -DataFrame = pd.DataFrame | pl.DataFrame -Series = pd.Series | pl.Series -Index = pd.Index | pl.Series -BoolSeries = pd.Series | pl.Series -MaskLike = AgnosticMask | PandasMaskLike | PolarsMaskLike -IdsLike = AgnosticIds | PandasIdsLike | PolarsIdsLike -TimeT = float | int diff --git a/mesa_frames/types_.py b/mesa_frames/types_.py new file mode 100644 index 00000000..2e9c6014 --- /dev/null +++ b/mesa_frames/types_.py @@ -0,0 +1,73 @@ +from collections.abc import Collection + +import geopandas as gpd +import geopolars as gpl +import numpy as np +import pandas as pd +import polars as pl +from numpy import ndarray +from typing_extensions import Literal, Sequence + +####----- Agnostic Types -----#### +AgnosticMask = Literal["all", "active"] | None +AgnosticIds = int | Collection[int] + +###----- Pandas Types -----### + +ArrayLike = pd.api.extensions.ExtensionArray | ndarray +AnyArrayLike = ArrayLike | pd.Index | pd.Series +PandasMaskLike = AgnosticMask | pd.Series | pd.DataFrame | AnyArrayLike +PandasIdsLike = AgnosticIds | pd.Series | pd.Index +PandasGridCapacity = np.ndarray + +###----- Polars Types -----### + +PolarsMaskLike = AgnosticMask | pl.Expr | pl.Series | pl.DataFrame | Collection[int] +PolarsIdsLike = AgnosticIds | pl.Series +PolarsGridCapacity = list[pl.Expr] + +###----- Generic -----### + +GeoDataFrame = gpd.GeoDataFrame | gpl.GeoDataFrame +GeoSeries = gpd.GeoSeries | gpl.GeoSeries +DataFrame = pd.DataFrame | pl.DataFrame +Series = pd.Series | pl.Series | GeoSeries +Index = pd.Index | pl.Series +BoolSeries = pd.Series | pl.Series +MaskLike = AgnosticMask | PandasMaskLike | PolarsMaskLike +IdsLike = AgnosticIds | PandasIdsLike | PolarsIdsLike + +###----- Time ------### +TimeT = float | int + + +###----- Space -----### + +NetworkCoordinate = int | DataFrame + +GridCoordinate = int | Sequence[int] | DataFrame + +DiscreteCoordinate = NetworkCoordinate | GridCoordinate +ContinousCoordinate = float | Sequence[float] | DataFrame + +SpaceCoordinate = DiscreteCoordinate | ContinousCoordinate + + +NetworkCoordinates = NetworkCoordinate | Collection[NetworkCoordinate] +GridCoordinates = ( + GridCoordinate | Sequence[int | slice | Sequence[int]] | Collection[GridCoordinate] +) + +DiscreteCoordinates = NetworkCoordinates | GridCoordinates +ContinousCoordinates = ( + ContinousCoordinate + | Sequence[float | Sequence[float]] + | Collection[ContinousCoordinate] +) + +SpaceCoordinates = DiscreteCoordinates | ContinousCoordinates + +GridCapacity = PandasGridCapacity | PolarsGridCapacity +NetworkCapacity = DataFrame + +DiscreteSpaceCapacity = GridCapacity | NetworkCapacity diff --git a/tests/test_agents.py b/tests/test_agents.py index 59d7be13..f1886b15 100644 --- a/tests/test_agents.py +++ b/tests/test_agents.py @@ -6,7 +6,7 @@ from mesa_frames import AgentsDF, ModelDF from mesa_frames.abstract.agents import AgentSetDF -from mesa_frames.types import MaskLike +from mesa_frames.types_ import MaskLike from tests.test_agentset_pandas import ( ExampleAgentSetPandas, fix1_AgentSetPandas, From 9546b1064daaaa123307079eae71b887a630c489 Mon Sep 17 00:00:00 2001 From: Adam Amer <136176500+adamamer20@users.noreply.github.com> Date: Tue, 16 Jul 2024 00:08:29 +0200 Subject: [PATCH 02/10] creation of DataFrameMixin --- mesa_frames/abstract/mixin.py | 80 ++++++++++++++- mesa_frames/concrete/pandas/mixin.py | 119 ++++++++++++++++++++++ mesa_frames/concrete/polars/mixin.py | 143 +++++++++++++++++++++++++++ 3 files changed, 341 insertions(+), 1 deletion(-) create mode 100644 mesa_frames/concrete/pandas/mixin.py create mode 100644 mesa_frames/concrete/polars/mixin.py diff --git a/mesa_frames/abstract/mixin.py b/mesa_frames/abstract/mixin.py index a62752d9..258a331a 100644 --- a/mesa_frames/abstract/mixin.py +++ b/mesa_frames/abstract/mixin.py @@ -1,7 +1,9 @@ from abc import ABC, abstractmethod from copy import copy, deepcopy -from typing_extensions import Self +from typing_extensions import Any, Collection, Iterator, Literal, Self, Sequence + +from mesa_frames.types_ import BoolSeries, DataFrame, MaskLike, Series class CopyMixin(ABC): @@ -142,3 +144,79 @@ def __deepcopy__(self, memo: dict) -> Self: A deep copy of the AgentContainer. """ return self.copy(deep=True, memo=memo) + + +class DataFrameMixin(ABC): + @abstractmethod + def _df_add_columns( + self, original_df: DataFrame, new_columns: list[str], data: Any + ) -> DataFrame: ... + + @abstractmethod + def _df_combine_first( + self, original_df: DataFrame, new_df: DataFrame, index_cols: list[str] + ) -> DataFrame: ... + + @abstractmethod + def _df_concat( + self, + dfs: Collection[DataFrame], + how: Literal["horizontal"] | Literal["vertical"] = "vertical", + ignore_index: bool = False, + ) -> DataFrame: ... + + @abstractmethod + def _df_constructor( + self, + data: Sequence[Sequence] | dict[str | Any] | None = None, + columns: list[str] | None = None, + index_col: str | list[str] | None = None, + dtypes: dict[str, Any] | None = None, + ) -> DataFrame: ... + + @abstractmethod + def _df_get_bool_mask( + self, + df: DataFrame, + index_col: str, + mask: MaskLike | None = None, + negate: bool = False, + ) -> BoolSeries: ... + + @abstractmethod + def _df_get_masked_df( + self, + df: DataFrame, + index_col: str, + mask: MaskLike | None = None, + columns: list[str] | None = None, + negate: bool = False, + ) -> DataFrame: ... + + @abstractmethod + def _df_iterator(self, df: DataFrame) -> Iterator[dict[str, Any]]: ... + + @abstractmethod + def _df_remove( + self, df: DataFrame, ids: Sequence[Any], index_col: str | None = None + ) -> DataFrame: ... + + @abstractmethod + def _df_sample( + self, + df: DataFrame, + n: int | None = None, + frac: float | None = None, + with_replacement: bool = False, + shuffle: bool = False, + seed: int | None = None, + ) -> DataFrame: ... + + @abstractmethod + def _srs_constructor( + self, + data: Sequence[Any] | None = None, + name: str | None = None, + dtype: Any | None = None, + index: Sequence[Any] | None = None, + ) -> Series: ... diff --git a/mesa_frames/concrete/pandas/mixin.py b/mesa_frames/concrete/pandas/mixin.py new file mode 100644 index 00000000..b207f5fd --- /dev/null +++ b/mesa_frames/concrete/pandas/mixin.py @@ -0,0 +1,119 @@ +import pandas as pd +from typing_extensions import Any, Collection, Iterator, Literal, Sequence + +from mesa_frames.abstract.mixin import DataFrameMixin +from mesa_frames.types_ import PandasMaskLike + + +class PandasMixin(DataFrameMixin): + def _df_add_columns( + self, original_df: pd.DataFrame, new_columns: list[str], data: Any + ) -> pd.DataFrame: + original_df[new_columns] = data + return original_df + + def _df_combine_first( + self, original_df: pd.DataFrame, new_df: pd.DataFrame, index_cols: list[str] + ) -> pd.DataFrame: + return original_df.combine_first(new_df) + + def _df_concat( + self, + dfs: Collection[pd.DataFrame], + how: Literal["horizontal"] | Literal["vertical"] = "vertical", + ignore_index: bool = False, + ) -> pd.DataFrame: + return pd.concat( + dfs, axis=0 if how == "vertical" else 1, ignore_index=ignore_index + ) + + def _df_constructor( + self, + data: Sequence[Sequence] | dict[str | Any] | None = None, + columns: list[str] | None = None, + index_col: str | list[str] | None = None, + dtypes: dict[str, Any] | None = None, + ) -> pd.DataFrame: + df = pd.DataFrame(data=data, columns=columns).astype(dtypes) + if index_col: + df.set_index(index_col) + return df + + def _df_get_bool_mask( + self, + df: pd.DataFrame, + index_col: str, + mask: PandasMaskLike = None, + negate: bool = False, + ) -> pd.Series: + if isinstance(mask, pd.Series) and mask.dtype == bool and len(mask) == len(df): + result = mask + elif isinstance(mask, pd.DataFrame): + if mask.index.name == index_col: + result = pd.Series(df.index.isin(mask.index), index=df.index) + elif index_col in mask.columns: + result = pd.Series(df.index.isin(mask[index_col]), index=df.index) + else: + raise ValueError( + f"A DataFrame mask must have a column/index with name {index_col}" + ) + elif mask is None or mask == "all": + result = pd.Series(True, index=df.index) + elif isinstance(mask, Sequence): + result = pd.Series(df.index.isin(mask), index=df.index) + else: + result = pd.Series(df.index.isin([mask]), index=df.index) + + if negate: + result = ~result + + return result + + def _df_get_masked_df( + self, + df: pd.DataFrame, + index_col: str, + mask: PandasMaskLike | None = None, + columns: list[str] | None = None, + negate: bool = False, + ) -> pd.DataFrame: + b_mask = self._df_get_bool_mask(df, index_col, mask, negate) + if columns: + return df.loc[b_mask, columns] + return df.loc[b_mask] + + def _df_iterator(self, df: pd.DataFrame) -> Iterator[dict[str, Any]]: + for index, row in df.iterrows(): + row_dict = row.to_dict() + row_dict["unique_id"] = index + yield row_dict + + def _df_remove( + self, + df: pd.DataFrame, + ids: Sequence[Any], + index_col: str | None = None, + ) -> pd.DataFrame: + return df[~df.index.isin(ids)] + + def _df_sample( + self, + df: pd.DataFrame, + n: int | None = None, + frac: float | None = None, + with_replacement: bool = False, + shuffle: bool = False, + seed: int | None = None, + ) -> pd.DataFrame: + return df.sample( + n=n, frac=frac, replace=with_replacement, shuffle=shuffle, random_state=seed + ) + + def _srs_constructor( + self, + data: Sequence[Sequence] | None = None, + name: str | None = None, + dtype: Any | None = None, + index: Sequence[Any] | None = None, + ) -> pd.Series: + return pd.Series(data, name=name, dtype=dtype, index=index) diff --git a/mesa_frames/concrete/polars/mixin.py b/mesa_frames/concrete/polars/mixin.py new file mode 100644 index 00000000..7300c197 --- /dev/null +++ b/mesa_frames/concrete/polars/mixin.py @@ -0,0 +1,143 @@ +import polars as pl +from typing_extensions import Any, Collection, Iterator, Literal, Sequence + +from mesa_frames.abstract.mixin import DataFrameMixin +from mesa_frames.types_ import PolarsMaskLike + + +class PolarsMixin(DataFrameMixin): + # TODO: complete with other dtypes + _dtypes_mapping: dict[str, Any] = {"int64": pl.Int64, "bool": pl.Boolean} + + def _df_add_columns( + self, original_df: pl.DataFrame, new_columns: list[str], data: Any + ) -> pl.DataFrame: + return original_df.with_columns( + **{col: value for col, value in zip(new_columns, data)} + ) + + def _df_combine_first( + self, original_df: pl.DataFrame, new_df: pl.DataFrame, index_cols: list[str] + ) -> pl.DataFrame: + new_df = original_df.join(new_df, on=index_cols, how="full", suffix="_right") + # Find columns with the _right suffix and update the corresponding original columns + updated_columns = [] + for col in new_df.columns: + if col.endswith("_right"): + original_col = col.replace("_right", "") + updated_columns.append( + pl.when(pl.col(col).is_not_null()) + .then(pl.col(col)) + .otherwise(pl.col(original_col)) + .alias(original_col) + ) + + # Apply the updates and remove the _right columns + new_df = new_df.with_columns(updated_columns).select( + pl.col(r"^(?!.*_right$).*") + ) + return new_df + + def _df_concat( + self, + dfs: Collection[pl.DataFrame], + how: Literal["horizontal"] | Literal["vertical"] = "vertical", + ignore_index: bool = False, + ) -> pl.DataFrame: + return pl.concat( + dfs, how="vertical_relaxed" if how == "vertical" else "horizontal_relaxed" + ) + + def _df_constructor( + self, + data: Sequence[Sequence] | dict[str | Any] | None = None, + columns: list[str] | None = None, + index_col: str | list[str] | None = None, + dtypes: dict[str, str] | None = None, + ) -> pl.DataFrame: + dtypes = {k: self._dtypes_mapping.get(v, v) for k, v in dtypes.items()} + return pl.DataFrame(data=data, schema=dtypes if dtypes else columns) + + def _df_get_bool_mask( + self, + df: pl.DataFrame, + index_col: str, + mask: PolarsMaskLike = None, + negate: bool = False, + ) -> pl.Series | pl.Expr: + def bool_mask_from_series(mask: pl.Series) -> pl.Series: + if ( + isinstance(mask, pl.Series) + and mask.dtype == pl.Boolean + and len(mask) == len(df) + ): + return mask + return df[index_col].is_in(mask) + + if isinstance(mask, pl.Expr): + result = mask + elif isinstance(mask, pl.Series): + result = bool_mask_from_series(mask) + elif isinstance(mask, pl.DataFrame): + if index_col in mask.columns: + result = bool_mask_from_series(mask[index_col]) + elif len(mask.columns) == 1 and mask.dtypes[0] == pl.Boolean: + result = bool_mask_from_series(mask[mask.columns[0]]) + else: + raise KeyError( + f"DataFrame must have an {index_col} column or a single boolean column." + ) + elif mask is None or mask == "all": + result = pl.Series([True] * len(df)) + elif isinstance(mask, Collection): + result = bool_mask_from_series(pl.Series(mask)) + else: + result = bool_mask_from_series(pl.Series([mask])) + + if negate: + result = ~result + + return result + + def _df_get_masked_df( + self, + df: pl.DataFrame, + index_col: str, + mask: PolarsMaskLike | None = None, + columns: list[str] | None = None, + negate: bool = False, + ) -> pl.DataFrame: + b_mask = self._df_get_bool_mask(df, index_col, mask, negate=negate) + if columns: + return df.filter(b_mask)[columns] + return df.filter(b_mask) + + def _df_iterator(self, df: pl.DataFrame) -> Iterator[dict[str, Any]]: + return iter(df.iter_rows(named=True)) + + def _df_remove( + self, df: pl.DataFrame, ids: Sequence[Any], index_col: str | None = None + ) -> pl.DataFrame: + return df.filter(pl.col(index_col).is_in(ids).not_()) + + def _df_sample( + self, + df: pl.DataFrame, + n: int | None = None, + frac: float | None = None, + with_replacement: bool = False, + shuffle: bool = False, + seed: int | None = None, + ) -> pl.DataFrame: + return df.sample( + n=n, frac=frac, replace=with_replacement, shuffle=shuffle, seed=seed + ) + + def _srs_constructor( + self, + data: Sequence[Any] | None = None, + name: str | None = None, + dtype: Any | None = None, + index: Sequence[Any] | None = None, + ) -> pl.Series: + return pl.Series(name=name, values=data, dtype=self._dtypes_mapping[dtype]) From a716118aff16acab2b6af89202f79db6fabd6c52 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 15 Jul 2024 22:12:57 +0000 Subject: [PATCH 03/10] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- mesa_frames/abstract/mixin.py | 3 ++- mesa_frames/concrete/pandas/mixin.py | 3 ++- mesa_frames/concrete/polars/mixin.py | 3 ++- mesa_frames/types_.py | 2 +- 4 files changed, 7 insertions(+), 4 deletions(-) diff --git a/mesa_frames/abstract/mixin.py b/mesa_frames/abstract/mixin.py index 258a331a..088901ec 100644 --- a/mesa_frames/abstract/mixin.py +++ b/mesa_frames/abstract/mixin.py @@ -1,7 +1,8 @@ from abc import ABC, abstractmethod from copy import copy, deepcopy -from typing_extensions import Any, Collection, Iterator, Literal, Self, Sequence +from typing_extensions import Any, Self +from typing import Collection, Iterator, Literal, Sequence from mesa_frames.types_ import BoolSeries, DataFrame, MaskLike, Series diff --git a/mesa_frames/concrete/pandas/mixin.py b/mesa_frames/concrete/pandas/mixin.py index b207f5fd..fca88e95 100644 --- a/mesa_frames/concrete/pandas/mixin.py +++ b/mesa_frames/concrete/pandas/mixin.py @@ -1,5 +1,6 @@ import pandas as pd -from typing_extensions import Any, Collection, Iterator, Literal, Sequence +from typing_extensions import Any +from typing import Collection, Iterator, Literal, Sequence from mesa_frames.abstract.mixin import DataFrameMixin from mesa_frames.types_ import PandasMaskLike diff --git a/mesa_frames/concrete/polars/mixin.py b/mesa_frames/concrete/polars/mixin.py index 7300c197..70ccfdd8 100644 --- a/mesa_frames/concrete/polars/mixin.py +++ b/mesa_frames/concrete/polars/mixin.py @@ -1,5 +1,6 @@ import polars as pl -from typing_extensions import Any, Collection, Iterator, Literal, Sequence +from typing_extensions import Any +from typing import Collection, Iterator, Literal, Sequence from mesa_frames.abstract.mixin import DataFrameMixin from mesa_frames.types_ import PolarsMaskLike diff --git a/mesa_frames/types_.py b/mesa_frames/types_.py index 2e9c6014..bc4e1ec3 100644 --- a/mesa_frames/types_.py +++ b/mesa_frames/types_.py @@ -6,7 +6,7 @@ import pandas as pd import polars as pl from numpy import ndarray -from typing_extensions import Literal, Sequence +from typing import Literal, Sequence ####----- Agnostic Types -----#### AgnosticMask = Literal["all", "active"] | None From a9c9925e56567d4388896b17df881b5f5563f585 Mon Sep 17 00:00:00 2001 From: Adam Amer <136176500+adamamer20@users.noreply.github.com> Date: Tue, 16 Jul 2024 00:23:43 +0200 Subject: [PATCH 04/10] removing space types (has it's own PR) --- mesa_frames/types_.py | 43 ++----------------------------------------- 1 file changed, 2 insertions(+), 41 deletions(-) diff --git a/mesa_frames/types_.py b/mesa_frames/types_.py index bc4e1ec3..c34e7929 100644 --- a/mesa_frames/types_.py +++ b/mesa_frames/types_.py @@ -1,12 +1,9 @@ from collections.abc import Collection +from typing import Literal -import geopandas as gpd -import geopolars as gpl -import numpy as np import pandas as pd import polars as pl from numpy import ndarray -from typing import Literal, Sequence ####----- Agnostic Types -----#### AgnosticMask = Literal["all", "active"] | None @@ -18,20 +15,16 @@ AnyArrayLike = ArrayLike | pd.Index | pd.Series PandasMaskLike = AgnosticMask | pd.Series | pd.DataFrame | AnyArrayLike PandasIdsLike = AgnosticIds | pd.Series | pd.Index -PandasGridCapacity = np.ndarray ###----- Polars Types -----### PolarsMaskLike = AgnosticMask | pl.Expr | pl.Series | pl.DataFrame | Collection[int] PolarsIdsLike = AgnosticIds | pl.Series -PolarsGridCapacity = list[pl.Expr] ###----- Generic -----### -GeoDataFrame = gpd.GeoDataFrame | gpl.GeoDataFrame -GeoSeries = gpd.GeoSeries | gpl.GeoSeries DataFrame = pd.DataFrame | pl.DataFrame -Series = pd.Series | pl.Series | GeoSeries +Series = pd.Series | pl.Series Index = pd.Index | pl.Series BoolSeries = pd.Series | pl.Series MaskLike = AgnosticMask | PandasMaskLike | PolarsMaskLike @@ -39,35 +32,3 @@ ###----- Time ------### TimeT = float | int - - -###----- Space -----### - -NetworkCoordinate = int | DataFrame - -GridCoordinate = int | Sequence[int] | DataFrame - -DiscreteCoordinate = NetworkCoordinate | GridCoordinate -ContinousCoordinate = float | Sequence[float] | DataFrame - -SpaceCoordinate = DiscreteCoordinate | ContinousCoordinate - - -NetworkCoordinates = NetworkCoordinate | Collection[NetworkCoordinate] -GridCoordinates = ( - GridCoordinate | Sequence[int | slice | Sequence[int]] | Collection[GridCoordinate] -) - -DiscreteCoordinates = NetworkCoordinates | GridCoordinates -ContinousCoordinates = ( - ContinousCoordinate - | Sequence[float | Sequence[float]] - | Collection[ContinousCoordinate] -) - -SpaceCoordinates = DiscreteCoordinates | ContinousCoordinates - -GridCapacity = PandasGridCapacity | PolarsGridCapacity -NetworkCapacity = DataFrame - -DiscreteSpaceCapacity = GridCapacity | NetworkCapacity From cecf5af256e228268187b9942f400ae609442207 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 15 Jul 2024 22:23:56 +0000 Subject: [PATCH 05/10] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- mesa_frames/abstract/mixin.py | 3 ++- mesa_frames/concrete/pandas/mixin.py | 3 ++- mesa_frames/concrete/polars/mixin.py | 3 ++- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/mesa_frames/abstract/mixin.py b/mesa_frames/abstract/mixin.py index 088901ec..6f59e2ce 100644 --- a/mesa_frames/abstract/mixin.py +++ b/mesa_frames/abstract/mixin.py @@ -2,7 +2,8 @@ from copy import copy, deepcopy from typing_extensions import Any, Self -from typing import Collection, Iterator, Literal, Sequence +from typing import Literal +from collections.abc import Collection, Iterator, Sequence from mesa_frames.types_ import BoolSeries, DataFrame, MaskLike, Series diff --git a/mesa_frames/concrete/pandas/mixin.py b/mesa_frames/concrete/pandas/mixin.py index fca88e95..bb1d546f 100644 --- a/mesa_frames/concrete/pandas/mixin.py +++ b/mesa_frames/concrete/pandas/mixin.py @@ -1,6 +1,7 @@ import pandas as pd from typing_extensions import Any -from typing import Collection, Iterator, Literal, Sequence +from typing import Literal +from collections.abc import Collection, Iterator, Sequence from mesa_frames.abstract.mixin import DataFrameMixin from mesa_frames.types_ import PandasMaskLike diff --git a/mesa_frames/concrete/polars/mixin.py b/mesa_frames/concrete/polars/mixin.py index 70ccfdd8..e2922817 100644 --- a/mesa_frames/concrete/polars/mixin.py +++ b/mesa_frames/concrete/polars/mixin.py @@ -1,6 +1,7 @@ import polars as pl from typing_extensions import Any -from typing import Collection, Iterator, Literal, Sequence +from typing import Literal +from collections.abc import Collection, Iterator, Sequence from mesa_frames.abstract.mixin import DataFrameMixin from mesa_frames.types_ import PolarsMaskLike From 4722edbbd4a4fba04943706b575a4b4a8db42288 Mon Sep 17 00:00:00 2001 From: Adam Amer <136176500+adamamer20@users.noreply.github.com> Date: Tue, 16 Jul 2024 00:31:37 +0200 Subject: [PATCH 06/10] update types with types_ --- mesa_frames/concrete/agentset_pandas.py | 2 +- mesa_frames/concrete/agentset_polars.py | 2 +- pyproject.toml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/mesa_frames/concrete/agentset_pandas.py b/mesa_frames/concrete/agentset_pandas.py index 8f4ce4d2..152b2b15 100644 --- a/mesa_frames/concrete/agentset_pandas.py +++ b/mesa_frames/concrete/agentset_pandas.py @@ -7,7 +7,7 @@ from mesa_frames.abstract.agents import AgentSetDF from mesa_frames.concrete.agentset_polars import AgentSetPolars -from mesa_frames.types import PandasIdsLike, PandasMaskLike +from mesa_frames.types_ import PandasIdsLike, PandasMaskLike if TYPE_CHECKING: from mesa_frames.concrete.model import ModelDF diff --git a/mesa_frames/concrete/agentset_polars.py b/mesa_frames/concrete/agentset_polars.py index 358a310a..72785e38 100644 --- a/mesa_frames/concrete/agentset_polars.py +++ b/mesa_frames/concrete/agentset_polars.py @@ -6,7 +6,7 @@ from typing_extensions import Any, Self, overload from mesa_frames.concrete.agents import AgentSetDF -from mesa_frames.types import PolarsIdsLike, PolarsMaskLike +from mesa_frames.types_ import PolarsIdsLike, PolarsMaskLike if TYPE_CHECKING: from mesa_frames.concrete.agentset_pandas import AgentSetPandas diff --git a/pyproject.toml b/pyproject.toml index cd17c042..4a8e2ee9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,7 +21,7 @@ pandas = [ "pyarrow", ] polars = [ - "polars>=1.0.0", #polars._typing (see mesa_frames.types) added in 1.0.0 + "polars>=1.0.0", #polars._typing (see mesa_frames.types_) added in 1.0.0 ] dev = [ "mesa_frames[pandas,polars]", From 21c5ef8487e9b912d9e1b24dc195bb4dd3d1475f Mon Sep 17 00:00:00 2001 From: Adam Amer <136176500+adamamer20@users.noreply.github.com> Date: Tue, 16 Jul 2024 09:47:25 +0200 Subject: [PATCH 07/10] Moved agentset to library folder --- mesa_frames/concrete/agentset_pandas.py | 439 ------------------- mesa_frames/concrete/agentset_polars.py | 554 ------------------------ mesa_frames/concrete/pandas/agentset.py | 20 +- mesa_frames/concrete/polars/agentset.py | 103 ++++- 4 files changed, 104 insertions(+), 1012 deletions(-) delete mode 100644 mesa_frames/concrete/agentset_pandas.py delete mode 100644 mesa_frames/concrete/agentset_polars.py diff --git a/mesa_frames/concrete/agentset_pandas.py b/mesa_frames/concrete/agentset_pandas.py deleted file mode 100644 index 152b2b15..00000000 --- a/mesa_frames/concrete/agentset_pandas.py +++ /dev/null @@ -1,439 +0,0 @@ -from collections.abc import Callable, Collection, Iterable, Iterator, Sequence -from typing import TYPE_CHECKING - -import pandas as pd -import polars as pl -from typing_extensions import Any, Self, overload - -from mesa_frames.abstract.agents import AgentSetDF -from mesa_frames.concrete.agentset_polars import AgentSetPolars -from mesa_frames.types_ import PandasIdsLike, PandasMaskLike - -if TYPE_CHECKING: - from mesa_frames.concrete.model import ModelDF - - -class AgentSetPandas(AgentSetDF): - _agents: pd.DataFrame - _mask: pd.Series - _copy_with_method: dict[str, tuple[str, list[str]]] = { - "_agents": ("copy", ["deep"]), - "_mask": ("copy", ["deep"]), - } - """A pandas-based implementation of the AgentSet. - - Attributes - ---------- - _agents : pd.DataFrame - The agents in the AgentSet. - _copy_only_reference : list[str] = ['_model'] - A list of attributes to copy with a reference only. - _copy_with_method: dict[str, tuple[str, list[str]]] = { - "_agents": ("copy", ["deep"]), - "_mask": ("copy", ["deep"]), - } - A dictionary of attributes to copy with a specified method and arguments. - _mask : pd.Series - A boolean mask indicating which agents are active. - _model : ModelDF - The model that the AgentSetDF belongs to. - - Properties - ---------- - active_agents(self) -> pd.DataFrame - Get the active agents in the AgentSetPandas. - agents(self) -> pd.DataFrame - Get or set the agents in the AgentSetPandas. - inactive_agents(self) -> pd.DataFrame - Get the inactive agents in the AgentSetPandas. - model(self) -> ModelDF - Get the model associated with the AgentSetPandas. - random(self) -> Generator - Get the random number generator associated with the model. - - Methods - ------- - __init__(self, model: ModelDF) -> None - Initialize a new AgentSetPandas. - add(self, other: pd.DataFrame | Sequence[Any] | dict[str, Any], inplace: bool = True) -> Self - Add agents to the AgentSetPandas. - contains(self, ids: PandasIdsLike) -> bool | pd.Series - Check if agents with the specified IDs are in the AgentSetPandas. - copy(self, deep: bool = False, memo: dict | None = None) -> Self - Create a copy of the AgentSetPandas. - discard(self, ids: PandasIdsLike, inplace: bool = True) -> Self - Remove an agent from the AgentSetPandas. Does not raise an error if the agent is not found. - do(self, method_name: str, *args, return_results: bool = False, inplace: bool = True, **kwargs) -> Self | Any - Invoke a method on the AgentSetPandas. - get(self, attr_names: str | Collection[str] | None, mask: PandasMaskLike = None) -> pd.Series | pd.DataFrame - Retrieve the value of a specified attribute for each agent in the AgentSetPandas. - remove(self, ids: PandasIdsLike, inplace: bool = True) -> Self - Remove agents from the AgentSetPandas. - select(self, mask: PandasMaskLike = None, filter_func: Callable[[Self], PandasMaskLike] | None = None, n: int | None = None, negate: bool = False, inplace: bool = True) -> Self - Select agents in the AgentSetPandas based on the given criteria. - set(self, attr_names: str | Collection[str] | dict[str, Any] | None = None, values: Any | None = None, mask: PandasMaskLike | None = None, inplace: bool = True) -> Self - Set the value of a specified attribute or attributes for each agent in the mask in the AgentSetPandas. - shuffle(self, inplace: bool = True) -> Self - Shuffle the order of agents in the AgentSetPandas. - sort(self, by: str | Sequence[str], ascending: bool | Sequence[bool] = True, inplace: bool = True, **kwargs) -> Self - Sort the agents in the AgentSetPandas based on the given criteria. - to_polars(self) -> "AgentSetPolars" - Convert the AgentSetPandas to an AgentSetPolars. - _get_bool_mask(self, mask: PandasMaskLike = None) -> pd.Series - Get a boolean mask for selecting agents. - _get_masked_df(self, mask: PandasMaskLike = None) -> pd.DataFrame - Get a DataFrame of agents that match the mask. - __getattr__(self, key: str) -> pd.Series - Retrieve an attribute of the underlying DataFrame. - __iter__(self) -> Iterator - Get an iterator for the agents in the AgentSetPandas. - __len__(self) -> int - Get the number of agents in the AgentSetPandas. - __repr__(self) -> str - Get the string representation of the AgentSetPandas. - __reversed__(self) -> Iterator - Get a reversed iterator for the agents in the AgentSetPandas. - __str__(self) -> str - Get the string representation of the AgentSetPandas. - """ - - def __init__(self, model: "ModelDF") -> None: - self._model = model - self._agents = ( - pd.DataFrame(columns=["unique_id"]) - .astype({"unique_id": "int64"}) - .set_index("unique_id") - ) - self._mask = pd.Series(True, index=self._agents.index, dtype=pd.BooleanDtype()) - - def add( - self, - agents: pd.DataFrame | Sequence[Any] | dict[str, Any], - inplace: bool = True, - ) -> Self: - obj = self._get_obj(inplace) - if isinstance(agents, pd.DataFrame): - new_agents = agents - if "unique_id" != agents.index.name: - try: - new_agents.set_index("unique_id", inplace=True, drop=True) - except KeyError: - raise KeyError("DataFrame must have a unique_id column/index.") - elif isinstance(agents, dict): - if "unique_id" not in agents: - raise KeyError("Dictionary must have a unique_id key.") - index = agents.pop("unique_id") - if not isinstance(index, list): - index = [index] - new_agents = pd.DataFrame(agents, index=pd.Index(index, name="unique_id")) - else: - if len(agents) != len(obj._agents.columns) + 1: - raise ValueError( - "Length of data must match the number of columns in the AgentSet if being added as a Collection." - ) - columns = pd.Index(["unique_id"]).append(obj._agents.columns.copy()) - new_agents = pd.DataFrame([agents], columns=columns).set_index( - "unique_id", drop=True - ) - - if new_agents.index.dtype != "int64": - raise TypeError("unique_id must be of type int64.") - - if not obj._agents.index.intersection(new_agents.index).empty: - raise KeyError("Some IDs already exist in the agent set.") - - original_active_indices = obj._mask.index[obj._mask].copy() - - obj._agents = pd.concat([obj._agents, new_agents]) - - obj._update_mask(original_active_indices, new_agents.index) - - return obj - - @overload - def contains(self, agents: int) -> bool: ... - - @overload - def contains(self, agents: PandasIdsLike) -> pd.Series: ... - - def contains(self, agents: PandasIdsLike) -> bool | pd.Series: - if isinstance(agents, pd.Series): - return agents.isin(self._agents.index) - elif isinstance(agents, pd.Index): - return pd.Series( - agents.isin(self._agents.index), index=agents, dtype=pd.BooleanDtype() - ) - elif isinstance(agents, Collection): - return pd.Series(list(agents), index=list(agents)).isin(self._agents.index) - else: - return agents in self._agents.index - - def get( - self, - attr_names: str | Collection[str] | None = None, - mask: PandasMaskLike = None, - ) -> pd.Index | pd.Series | pd.DataFrame: - mask = self._get_bool_mask(mask) - if attr_names is None: - return self._agents.loc[mask] - else: - if attr_names == "unique_id": - return self._agents.loc[mask].index - if isinstance(attr_names, str): - return self._agents.loc[mask, attr_names] - if isinstance(attr_names, Collection): - return self._agents.loc[mask, list(attr_names)] - - def remove( - self, - ids: PandasIdsLike, - inplace: bool = True, - ) -> Self: - obj = self._get_obj(inplace) - initial_len = len(obj._agents) - mask = obj._get_bool_mask(ids) - remove_ids = obj._agents[mask].index - original_active_indices = obj._mask.index[obj._mask].copy() - obj._agents.drop(remove_ids, inplace=True) - if len(obj._agents) == initial_len: - raise KeyError("Some IDs were not found in agent set.") - - self._update_mask(original_active_indices) - return obj - - def set( - self, - attr_names: str | dict[str, Any] | Collection[str] | None = None, - values: Any | None = None, - mask: PandasMaskLike = None, - inplace: bool = True, - ) -> Self: - obj = self._get_obj(inplace) - b_mask = obj._get_bool_mask(mask) - masked_df = obj._get_masked_df(mask) - - if not attr_names: - attr_names = masked_df.columns - - if isinstance(attr_names, dict): - for key, val in attr_names.items(): - masked_df.loc[:, key] = val - elif ( - isinstance(attr_names, str) - or ( - isinstance(attr_names, Collection) - and all(isinstance(n, str) for n in attr_names) - ) - ) and values is not None: - if not isinstance(attr_names, str): # isinstance(attr_names, Collection) - attr_names = list(attr_names) - masked_df.loc[:, attr_names] = values - else: - raise ValueError( - "Either attr_names must be a dictionary with columns as keys and values or values must be provided." - ) - - non_masked_df = obj._agents[~b_mask] - original_index = obj._agents.index - obj._agents = pd.concat([non_masked_df, masked_df]) - obj._agents = obj._agents.reindex(original_index) - return obj - - def select( - self, - mask: PandasMaskLike = None, - filter_func: Callable[[Self], PandasMaskLike] | None = None, - n: int | None = None, - negate: bool = False, - inplace: bool = True, - ) -> Self: - obj = self._get_obj(inplace) - bool_mask = obj._get_bool_mask(mask) - if filter_func: - bool_mask = bool_mask & obj._get_bool_mask(filter_func(obj)) - if negate: - bool_mask = ~bool_mask - if n is not None: - bool_mask = pd.Series( - obj._agents.index.isin(obj._agents[bool_mask].sample(n).index), - index=obj._agents.index, - ) - obj._mask = bool_mask - return obj - - def shuffle(self, inplace: bool = True) -> Self: - obj = self._get_obj(inplace) - obj._agents = obj._agents.sample(frac=1) - return obj - - def sort( - self, - by: str | Sequence[str], - ascending: bool | Sequence[bool] = True, - inplace: bool = True, - **kwargs, - ) -> Self: - obj = self._get_obj(inplace) - obj._agents.sort_values(by=by, ascending=ascending, **kwargs, inplace=True) - return obj - - def to_polars(self) -> AgentSetPolars: - new_obj = AgentSetPolars(self._model) - new_obj._agents = pl.DataFrame(self._agents) - new_obj._mask = pl.Series(self._mask) - return new_obj - - def _concatenate_agentsets( - self, - agentsets: Iterable[Self], - duplicates_allowed: bool = True, - keep_first_only: bool = True, - original_masked_index: pd.Index | None = None, - ) -> Self: - if not duplicates_allowed: - indices = [self._agents.index.to_series()] + [ - agentset._agents.index.to_series() for agentset in agentsets - ] - pd.concat(indices, verify_integrity=True) - if duplicates_allowed & keep_first_only: - final_df = self._agents.copy() - final_mask = self._mask.copy() - for obj in iter(agentsets): - final_df = final_df.combine_first(obj._agents) - final_mask = final_mask.combine_first(obj._mask) - else: - final_df = pd.concat([obj._agents for obj in agentsets]) - final_mask = pd.concat([obj._mask for obj in agentsets]) - self._agents = final_df - self._mask = final_mask - if not isinstance(original_masked_index, type(None)): - ids_to_remove = original_masked_index.difference(self._agents.index) - if not ids_to_remove.empty: - self.remove(ids_to_remove, inplace=True) - return self - - def _get_bool_mask( - self, - mask: PandasMaskLike = None, - ) -> pd.Series: - if isinstance(mask, pd.Series) and mask.dtype == bool: - return mask - elif isinstance(mask, pd.DataFrame): - return pd.Series( - self._agents.index.isin(mask.index), index=self._agents.index - ) - elif isinstance(mask, list): - return pd.Series(self._agents.index.isin(mask), index=self._agents.index) - elif mask is None or mask == "all": - return pd.Series(True, index=self._agents.index) - elif mask == "active": - return self._mask - else: - return pd.Series(self._agents.index.isin([mask]), index=self._agents.index) - - def _get_masked_df( - self, - mask: PandasMaskLike = None, - ) -> pd.DataFrame: - if isinstance(mask, pd.Series) and mask.dtype == bool: - return self._agents.loc[mask] - elif isinstance(mask, pd.DataFrame): - if mask.index.name != "unique_id": - if "unique_id" in mask.columns: - mask.set_index("unique_id", inplace=True, drop=True) - else: - raise KeyError("DataFrame must have a unique_id column/index.") - return pd.DataFrame(index=mask.index).join( - self._agents, on="unique_id", how="left" - ) - elif isinstance(mask, pd.Series): - mask_df = mask.to_frame("unique_id").set_index("unique_id") - return mask_df.join(self._agents, on="unique_id", how="left") - elif mask is None or mask == "all": - return self._agents - elif mask == "active": - return self._agents.loc[self._mask] - else: - mask_series = pd.Series(mask) - mask_df = mask_series.to_frame("unique_id").set_index("unique_id") - return mask_df.join(self._agents, on="unique_id", how="left") - - @overload - def _get_obj_copy(self, obj: pd.Series) -> pd.Series: ... - - @overload - def _get_obj_copy(self, obj: pd.DataFrame) -> pd.DataFrame: ... - - @overload - def _get_obj_copy(self, obj: pd.Index) -> pd.Index: ... - - def _get_obj_copy( - self, obj: pd.Series | pd.DataFrame | pd.Index - ) -> pd.Series | pd.DataFrame | pd.Index: - return obj.copy() - - def _update_mask( - self, - original_active_indices: pd.Index, - new_active_indices: pd.Index | None = None, - ) -> None: - # Update the mask with the old active agents and the new agents - if new_active_indices is None: - self._mask = pd.Series( - self._agents.index.isin(original_active_indices), - index=self._agents.index, - dtype=pd.BooleanDtype(), - ) - else: - self._mask = pd.Series( - self._agents.index.isin(original_active_indices) - | self._agents.index.isin(new_active_indices), - index=self._agents.index, - dtype=pd.BooleanDtype(), - ) - - def __getattr__(self, name: str) -> Any: - super().__getattr__(name) - return getattr(self._agents, name) - - def __iter__(self) -> Iterator[dict[str, Any]]: - for index, row in self._agents.iterrows(): - row_dict = row.to_dict() - row_dict["unique_id"] = index - yield row_dict - - def __len__(self) -> int: - return len(self._agents) - - def __reversed__(self) -> Iterator: - return iter(self._agents[::-1].iterrows()) - - @property - def agents(self) -> pd.DataFrame: - return self._agents - - @agents.setter - def agents(self, new_agents: pd.DataFrame) -> None: - if new_agents.index.name == "unique_id": - pass - elif "unique_id" in new_agents.columns: - new_agents.set_index("unique_id", inplace=True, drop=True) - else: - raise KeyError("The DataFrame should have a 'unique_id' index/column") - self._agents = new_agents - - @property - def active_agents(self) -> pd.DataFrame: - return self._agents.loc[self._mask] - - @active_agents.setter - def active_agents(self, mask: PandasMaskLike) -> None: - self.select(mask=mask, inplace=True) - - @property - def inactive_agents(self) -> pd.DataFrame: - return self._agents.loc[~self._mask] - - @property - def index(self) -> pd.Index: - return self._agents.index diff --git a/mesa_frames/concrete/agentset_polars.py b/mesa_frames/concrete/agentset_polars.py deleted file mode 100644 index 72785e38..00000000 --- a/mesa_frames/concrete/agentset_polars.py +++ /dev/null @@ -1,554 +0,0 @@ -from collections.abc import Callable, Collection, Iterable, Iterator, Sequence -from typing import TYPE_CHECKING - -import polars as pl -from polars._typing import IntoExpr -from typing_extensions import Any, Self, overload - -from mesa_frames.concrete.agents import AgentSetDF -from mesa_frames.types_ import PolarsIdsLike, PolarsMaskLike - -if TYPE_CHECKING: - from mesa_frames.concrete.agentset_pandas import AgentSetPandas - from mesa_frames.concrete.model import ModelDF - - -class AgentSetPolars(AgentSetDF): - _agents: pl.DataFrame - _copy_with_method: dict[str, tuple[str, list[str]]] = { - "_agents": ("clone", []), - } - _copy_only_reference: list[str] = ["_model", "_mask"] - _mask: pl.Expr | pl.Series - - """A polars-based implementation of the AgentSet. - - Attributes - ---------- - _agents : pl.DataFrame - The agents in the AgentSet. - _copy_only_reference : list[str] = ["_model", "_mask"] - A list of attributes to copy with a reference only. - _copy_with_method: dict[str, tuple[str, list[str]]] = { - "_agents": ("copy", ["deep"]), - "_mask": ("copy", ["deep"]), - } - A dictionary of attributes to copy with a specified method and arguments. - model : ModelDF - The model to which the AgentSet belongs. - _mask : pl.Series - A boolean mask indicating which agents are active. - - Properties - ---------- - active_agents(self) -> pl.DataFrame - Get the active agents in the AgentSetPolars. - agents(self) -> pl.DataFrame - Get or set the agents in the AgentSetPolars. - inactive_agents(self) -> pl.DataFrame - Get the inactive agents in the AgentSetPolars. - model(self) -> ModelDF - Get the model associated with the AgentSetPolars. - random(self) -> Generator - Get the random number generator associated with the model. - - - Methods - ------- - __init__(self, model: ModelDF) -> None - Initialize a new AgentSetPolars. - add(self, other: pl.DataFrame | Sequence[Any] | dict[str, Any], inplace: bool = True) -> Self - Add agents to the AgentSetPolars. - contains(self, ids: PolarsIdsLike) -> bool | pl.Series - Check if agents with the specified IDs are in the AgentSetPolars. - copy(self, deep: bool = False, memo: dict | None = None) -> Self - Create a copy of the AgentSetPolars. - discard(self, ids: PolarsIdsLike, inplace: bool = True) -> Self - Remove an agent from the AgentSetPolars. Does not raise an error if the agent is not found. - do(self, method_name: str, *args, return_results: bool = False, inplace: bool = True, **kwargs) -> Self | Any - Invoke a method on the AgentSetPolars. - get(self, attr_names: IntoExpr | Iterable[IntoExpr] | None, mask: PolarsMaskLike = None) -> pl.Series | pl.DataFrame - Retrieve the value of a specified attribute for each agent in the AgentSetPolars. - remove(self, ids: PolarsIdsLike, inplace: bool = True) -> Self - Remove agents from the AgentSetPolars. - select(self, mask: PolarsMaskLike = None, filter_func: Callable[[Self], PolarsMaskLike] | None = None, n: int | None = None, negate: bool = False, inplace: bool = True) -> Self - Select agents in the AgentSetPolars based on the given criteria. - set(self, attr_names: str | Collection[str] | dict[str, Any] | None = None, values: Any | None = None, mask: PolarsMaskLike | None = None, inplace: bool = True) -> Self - Set the value of a specified attribute or attributes for each agent in the mask in the AgentSetPolars. - shuffle(self, inplace: bool = True) -> Self - Shuffle the order of agents in the AgentSetPolars. - sort(self, by: str | Sequence[str], ascending: bool | Sequence[bool] = True, inplace: bool = True, **kwargs) -> Self - Sort the agents in the AgentSetPolars based on the given criteria. - to_pandas(self) -> "AgentSetPandas" - Convert the AgentSetPolars to an AgentSetPandas. - _get_bool_mask(self, mask: PolarsMaskLike = None) -> pl.Series | pl.Expr - Get a boolean mask for selecting agents. - _get_masked_df(self, mask: PolarsMaskLike = None) -> pl.DataFrame - Get a DataFrame of agents that match the mask. - __getattr__(self, key: str) -> pl.Series - Retrieve an attribute of the underlying DataFrame. - __iter__(self) -> Iterator - Get an iterator for the agents in the AgentSetPolars. - __len__(self) -> int - Get the number of agents in the AgentSetPolars. - __repr__(self) -> str - Get the string representation of the AgentSetPolars. - __reversed__(self) -> Iterator - Get a reversed iterator for the agents in the AgentSetPolars. - __str__(self) -> str - Get the string representation of the AgentSetPolars. - - """ - - def __init__(self, model: "ModelDF") -> None: - """Initialize a new AgentSetPolars. - - Parameters - ---------- - model : ModelDF - The model that the agent set belongs to. - - Returns - ------- - None - """ - self._model = model - self._agents = pl.DataFrame(schema={"unique_id": pl.Int64}) - self._mask = pl.repeat(True, len(self._agents), dtype=pl.Boolean, eager=True) - - def add( - self, - agents: pl.DataFrame | Sequence[Any] | dict[str, Any], - inplace: bool = True, - ) -> Self: - """Add agents to the AgentSetPolars. - - Parameters - ---------- - other : pl.DataFrame | Sequence[Any] | dict[str, Any] - The agents to add. - inplace : bool, optional - Whether to add the agents in place, by default True. - - Returns - ------- - Self - The updated AgentSetPolars. - """ - obj = self._get_obj(inplace) - if isinstance(agents, pl.DataFrame): - if "unique_id" not in agents.columns: - raise KeyError("DataFrame must have a unique_id column.") - new_agents = agents - elif isinstance(agents, dict): - if "unique_id" not in agents: - raise KeyError("Dictionary must have a unique_id key.") - new_agents = pl.DataFrame(agents) - else: - if len(agents) != len(obj._agents.columns): - raise ValueError( - "Length of data must match the number of columns in the AgentSet if being added as a Collection." - ) - new_agents = pl.DataFrame([agents], schema=obj._agents.schema) - - if new_agents["unique_id"].dtype != pl.Int64: - raise TypeError("unique_id column must be of type int64.") - - # If self._mask is pl.Expr, then new mask is the same. - # If self._mask is pl.Series[bool], then new mask has to be updated. - - if isinstance(obj._mask, pl.Series): - original_active_indices = obj._agents.filter(obj._mask)["unique_id"] - - obj._agents = pl.concat([obj._agents, new_agents], how="diagonal_relaxed") - - if isinstance(obj._mask, pl.Series): - obj._update_mask(original_active_indices, new_agents["unique_id"]) - - return obj - - @overload - def contains(self, agents: int) -> bool: ... - - @overload - def contains(self, agents: PolarsIdsLike) -> pl.Series: ... - - def contains( - self, - agents: PolarsIdsLike, - ) -> bool | pl.Series: - if isinstance(agents, pl.Series): - return agents.is_in(self._agents["unique_id"]) - elif isinstance(agents, Collection): - return pl.Series(agents).is_in(self._agents["unique_id"]) - else: - return agents in self._agents["unique_id"] - - def get( - self, - attr_names: IntoExpr | Iterable[IntoExpr] | None, - mask: PolarsMaskLike = None, - ) -> pl.Series | pl.DataFrame: - masked_df = self._get_masked_df(mask) - attr_names = self.agents.select(attr_names).columns.copy() - if not attr_names: - return masked_df - masked_df = masked_df.select(attr_names) - if masked_df.shape[1] == 1: - return masked_df[masked_df.columns[0]] - return masked_df - - def remove(self, ids: PolarsIdsLike, inplace: bool = True) -> Self: - obj = self._get_obj(inplace=inplace) - initial_len = len(obj._agents) - mask = obj._get_bool_mask(ids) - - if isinstance(obj._mask, pl.Series): - original_active_indices = obj._agents.filter(obj._mask)["unique_id"] - - obj._agents = obj._agents.filter(mask.not_()) - if len(obj._agents) == initial_len: - raise KeyError(f"IDs {ids} not found in agent set.") - - if isinstance(obj._mask, pl.Series): - obj._update_mask(original_active_indices) - return obj - - def set( - self, - attr_names: str | Collection[str] | dict[str, Any] | None = None, - values: Any | None = None, - mask: PolarsMaskLike = None, - inplace: bool = True, - ) -> Self: - obj = self._get_obj(inplace) - b_mask = obj._get_bool_mask(mask) - masked_df = obj._get_masked_df(mask) - - if not attr_names: - attr_names = masked_df.columns - attr_names.remove("unique_id") - - def process_single_attr( - masked_df: pl.DataFrame, attr_name: str, values: Any - ) -> pl.DataFrame: - if isinstance(values, pl.DataFrame): - return masked_df.with_columns(values.to_series().alias(attr_name)) - elif isinstance(values, pl.Expr): - return masked_df.with_columns(values.alias(attr_name)) - if isinstance(values, pl.Series): - return masked_df.with_columns(values.alias(attr_name)) - else: - if isinstance(values, Collection): - values = pl.Series(values) - else: - values = pl.repeat(values, len(masked_df)) - return masked_df.with_columns(values.alias(attr_name)) - - if isinstance(attr_names, str) and values is not None: - masked_df = process_single_attr(masked_df, attr_names, values) - elif isinstance(attr_names, Collection) and values is not None: - if isinstance(values, Collection) and len(attr_names) == len(values): - for attribute, val in zip(attr_names, values): - masked_df = process_single_attr(masked_df, attribute, val) - else: - for attribute in attr_names: - masked_df = process_single_attr(masked_df, attribute, values) - elif isinstance(attr_names, dict): - for key, val in attr_names.items(): - masked_df = process_single_attr(masked_df, key, val) - else: - raise ValueError( - "attr_names must be a string, a collection of string or a dictionary with columns as keys and values." - ) - non_masked_df = obj._agents.filter(b_mask.not_()) - original_index = obj._agents.select("unique_id") - obj._agents = pl.concat([non_masked_df, masked_df], how="diagonal_relaxed") - obj._agents = original_index.join(obj._agents, on="unique_id", how="left") - return obj - - def select( - self, - mask: PolarsMaskLike = None, - filter_func: Callable[[Self], pl.Series] | None = None, - n: int | None = None, - negate: bool = False, - inplace: bool = True, - ) -> Self: - obj = self._get_obj(inplace) - mask = obj._get_bool_mask(mask) - if filter_func: - mask = mask & filter_func(obj) - if n is not None: - mask = (obj._agents["unique_id"]).is_in( - obj._agents.filter(mask).sample(n)["unique_id"] - ) - if negate: - mask = mask.not_() - obj._mask = mask - return obj - - def shuffle(self, inplace: bool = True) -> Self: - obj = self._get_obj(inplace) - obj._agents = obj._agents.sample(fraction=1, shuffle=True) - return obj - - def sort( - self, - by: str | Sequence[str], - ascending: bool | Sequence[bool] = True, - inplace: bool = True, - **kwargs, - ) -> Self: - obj = self._get_obj(inplace) - if isinstance(ascending, bool): - descending = not ascending - else: - descending = [not a for a in ascending] - obj._agents = obj._agents.sort(by=by, descending=descending, **kwargs) - return obj - - def to_pandas(self) -> "AgentSetPandas": - from mesa_frames.concrete.agentset_pandas import AgentSetPandas - - new_obj = AgentSetPandas(self._model) - new_obj._agents = self._agents.to_pandas() - if isinstance(self._mask, pl.Series): - new_obj._mask = self._mask.to_pandas() - else: # self._mask is Expr - new_obj._mask = ( - self._agents["unique_id"] - .is_in(self._agents.filter(self._mask)["unique_id"]) - .to_pandas() - ) - return new_obj - - def _concatenate_agentsets( - self, - agentsets: Iterable[Self], - duplicates_allowed: bool = True, - keep_first_only: bool = True, - original_masked_index: pl.Series | None = None, - ) -> Self: - if not duplicates_allowed: - indices_list = [self._agents["unique_id"]] + [ - agentset._agents["unique_id"] for agentset in agentsets - ] - all_indices = pl.concat(indices_list) - if all_indices.is_duplicated().any(): - raise ValueError( - "Some ids are duplicated in the AgentSetDFs that are trying to be concatenated" - ) - if duplicates_allowed & keep_first_only: - # Find the original_index list (ie longest index list), to sort correctly the rows after concatenation - max_length = max(len(agentset) for agentset in agentsets) - for agentset in agentsets: - if len(agentset) == max_length: - original_index = agentset._agents["unique_id"] - final_dfs = [self._agents] - final_active_indices = [self._agents["unique_id"]] - final_indices = self._agents["unique_id"].clone() - for obj in iter(agentsets): - # Remove agents that are already in the final DataFrame - final_dfs.append( - obj._agents.filter(pl.col("unique_id").is_in(final_indices).not_()) - ) - # Add the indices of the active agents of current AgentSet - final_active_indices.append(obj._agents.filter(obj._mask)["unique_id"]) - # Update the indices of the agents in the final DataFrame - final_indices = pl.concat( - [final_indices, final_dfs[-1]["unique_id"]], how="vertical" - ) - # Left-join original index with concatenated dfs to keep original ids order - final_df = original_index.to_frame().join( - pl.concat(final_dfs, how="diagonal_relaxed"), on="unique_id", how="left" - ) - # - final_active_index = pl.concat(final_active_indices, how="vertical") - - else: - final_df = pl.concat( - [obj._agents for obj in agentsets], how="diagonal_relaxed" - ) - final_active_index = pl.concat( - [obj._agents.filter(obj._mask)["unique_id"] for obj in agentsets] - ) - final_mask = final_df["unique_id"].is_in(final_active_index) - self._agents = final_df - self._mask = final_mask - # If some ids were removed in the do-method, we need to remove them also from final_df - if not isinstance(original_masked_index, type(None)): - ids_to_remove = original_masked_index.filter( - original_masked_index.is_in(self._agents["unique_id"]).not_() - ) - if not ids_to_remove.is_empty(): - self.remove(ids_to_remove, inplace=True) - return self - - def _get_bool_mask( - self, - mask: PolarsMaskLike = None, - ) -> pl.Series | pl.Expr: - def bool_mask_from_series(mask: pl.Series) -> pl.Series: - if ( - isinstance(mask, pl.Series) - and mask.dtype == pl.Boolean - and len(mask) == len(self._agents) - ): - return mask - return self._agents["unique_id"].is_in(mask) - - if isinstance(mask, pl.Expr): - return mask - elif isinstance(mask, pl.Series): - return bool_mask_from_series(mask) - elif isinstance(mask, pl.DataFrame): - if "unique_id" in mask.columns: - return bool_mask_from_series(mask["unique_id"]) - elif len(mask.columns) == 1 and mask.dtypes[0] == pl.Boolean: - return bool_mask_from_series(mask[mask.columns[0]]) - else: - raise KeyError( - "DataFrame must have a 'unique_id' column or a single boolean column." - ) - elif mask is None or mask == "all": - return pl.repeat(True, len(self._agents)) - elif mask == "active": - return self._mask - elif isinstance(mask, Collection): - return bool_mask_from_series(pl.Series(mask)) - else: - return bool_mask_from_series(pl.Series([mask])) - - def _get_masked_df( - self, - mask: PolarsMaskLike = None, - ) -> pl.DataFrame: - if (isinstance(mask, pl.Series) and mask.dtype == pl.Boolean) or isinstance( - mask, pl.Expr - ): - return self._agents.filter(mask) - elif isinstance(mask, pl.DataFrame): - if not mask["unique_id"].is_in(self._agents["unique_id"]).all(): - raise KeyError( - "Some 'unique_id' of mask are not present in DataFrame 'unique_id'." - ) - return mask.select("unique_id").join( - self._agents, on="unique_id", how="left" - ) - elif isinstance(mask, pl.Series): - if not mask.is_in(self._agents["unique_id"]).all(): - raise KeyError( - "Some 'unique_id' of mask are not present in DataFrame 'unique_id'." - ) - mask_df = mask.to_frame("unique_id") - return mask_df.join(self._agents, on="unique_id", how="left") - elif mask is None or mask == "all": - return self._agents - elif mask == "active": - return self._agents.filter(self._mask) - else: - if isinstance(mask, Collection): - mask_series = pl.Series(mask) - else: - mask_series = pl.Series([mask]) - if not mask_series.is_in(self._agents["unique_id"]).all(): - raise KeyError( - "Some 'unique_id' of mask are not present in DataFrame 'unique_id'." - ) - mask_df = mask_series.to_frame("unique_id") - return mask_df.join(self._agents, on="unique_id", how="left") - - @overload - def _get_obj_copy(self, obj: pl.Series) -> pl.Series: ... - - @overload - def _get_obj_copy(self, obj: pl.DataFrame) -> pl.DataFrame: ... - - def _get_obj_copy(self, obj: pl.Series | pl.DataFrame) -> pl.Series | pl.DataFrame: - return obj.clone() - - def _update_mask( - self, original_active_indices: pl.Series, new_indices: pl.Series | None = None - ) -> None: - if new_indices is not None: - self._mask = self._agents["unique_id"].is_in( - original_active_indices - ) | self._agents["unique_id"].is_in(new_indices) - else: - self._mask = self._agents["unique_id"].is_in(original_active_indices) - - def __getattr__(self, key: str) -> pl.Series: - super().__getattr__(key) - return self._agents[key] - - @overload - def __getitem__( - self, - key: str | tuple[PolarsMaskLike, str], - ) -> pl.Series: ... - - @overload - def __getitem__( - self, - key: ( - PolarsMaskLike - | Collection[str] - | tuple[ - PolarsMaskLike, - Collection[str], - ] - ), - ) -> pl.DataFrame: ... - - def __getitem__( - self, - key: ( - str - | Collection[str] - | PolarsMaskLike - | tuple[PolarsMaskLike, str] - | tuple[ - PolarsMaskLike, - Collection[str], - ] - ), - ) -> pl.Series | pl.DataFrame: - attr = super().__getitem__(key) - assert isinstance(attr, (pl.Series, pl.DataFrame)) - return attr - - def __iter__(self) -> Iterator[dict[str, Any]]: - return iter(self._agents.iter_rows(named=True)) - - def __len__(self) -> int: - return len(self._agents) - - def __reversed__(self) -> Iterator: - return reversed(iter(self._agents.iter_rows(named=True))) - - @property - def agents(self) -> pl.DataFrame: - return self._agents - - @agents.setter - def agents(self, agents: pl.DataFrame) -> None: - if "unique_id" not in agents.columns: - raise KeyError("DataFrame must have a unique_id column.") - self._agents = agents - - @property - def active_agents(self) -> pl.DataFrame: - return self.agents.filter(self._mask) - - @active_agents.setter - def active_agents(self, mask: PolarsMaskLike) -> None: - self.select(mask=mask, inplace=True) - - @property - def inactive_agents(self) -> pl.DataFrame: - return self.agents.filter(~self._mask) - - @property - def index(self) -> pl.Series: - return self._agents["unique_id"] diff --git a/mesa_frames/concrete/pandas/agentset.py b/mesa_frames/concrete/pandas/agentset.py index 14b45a04..0fcb1d05 100644 --- a/mesa_frames/concrete/pandas/agentset.py +++ b/mesa_frames/concrete/pandas/agentset.py @@ -12,7 +12,7 @@ from mesa_frames.types_ import PandasIdsLike, PandasMaskLike if TYPE_CHECKING: - pass + from mesa_frames.concrete.model import ModelDF class AgentSetPandas(AgentSetDF, PandasMixin): @@ -99,20 +99,22 @@ class AgentSetPandas(AgentSetDF, PandasMixin): Get the string representation of the AgentSetPandas. """ + def __init__(self, model: "ModelDF") -> None: + self._model = model + self._agents = ( + pd.DataFrame(columns=["unique_id"]) + .astype({"unique_id": "int64"}) + .set_index("unique_id") + ) + self._mask = pd.Series(True, index=self._agents.index, dtype=pd.BooleanDtype()) + def add( self, agents: pd.DataFrame | gpd.GeoDataFrame | Sequence[Any] | dict[str, Any], inplace: bool = True, ) -> Self: obj = self._get_obj(inplace) - if isinstance(agents, gpd.GeoDataFrame): - try: - self.model.space - except ValueError: - raise ValueError( - "You are adding agents with a GeoDataFrame but haven't set model.space. Set it before adding agents with a GeoDataFrame or add agents with a standard DataFrame" - ) - if isinstance(agents, (pd.DataFrame, gpd.GeoDataFrame)): + if isinstance(agents, pd.DataFrame): new_agents = agents if "unique_id" != agents.index.name: try: diff --git a/mesa_frames/concrete/polars/agentset.py b/mesa_frames/concrete/polars/agentset.py index c8d0ecc6..a9ad914c 100644 --- a/mesa_frames/concrete/polars/agentset.py +++ b/mesa_frames/concrete/polars/agentset.py @@ -1,7 +1,6 @@ from collections.abc import Callable, Collection, Iterable, Iterator, Sequence from typing import TYPE_CHECKING -import geopolars as gpl import polars as pl from polars._typing import IntoExpr from typing_extensions import Any, Self, overload @@ -11,6 +10,7 @@ from mesa_frames.types_ import PolarsIdsLike, PolarsMaskLike if TYPE_CHECKING: + from mesa_frames.concrete.model import ModelDF from mesa_frames.concrete.pandas.agentset import AgentSetPandas @@ -101,6 +101,22 @@ class AgentSetPolars(AgentSetDF, PolarsMixin): """ + def __init__(self, model: "ModelDF") -> None: + """Initialize a new AgentSetPolars. + + Parameters + ---------- + model : ModelDF + The model that the agent set belongs to. + + Returns + ------- + None + """ + self._model = model + self._agents = pl.DataFrame(schema={"unique_id": pl.Int64}) + self._mask = pl.repeat(True, len(self._agents), dtype=pl.Boolean, eager=True) + def add( self, agents: pl.DataFrame | Sequence[Any] | dict[str, Any], @@ -121,14 +137,7 @@ def add( The updated AgentSetPolars. """ obj = self._get_obj(inplace) - if isinstance(agents, gpl.GeoDataFrame): - try: - self.model.space - except ValueError: - raise ValueError( - "You are adding agents with a GeoDataFrame but haven't set model.space. Set it before adding agents with a GeoDataFrame or add agents with a standard DataFrame" - ) - if isinstance(agents, gpl.GeoDataFrame, pl.DataFrame): + if isinstance(agents, pl.DataFrame): if "unique_id" not in agents.columns: raise KeyError("DataFrame must have a unique_id column.") new_agents = agents @@ -301,7 +310,7 @@ def sort( return obj def to_pandas(self) -> "AgentSetPandas": - from mesa_frames.concrete.pandas.agentset_pandas import AgentSetPandas + from mesa_frames.concrete.pandas.agentset import AgentSetPandas new_obj = AgentSetPandas(self._model) new_obj._agents = self._agents.to_pandas() @@ -377,6 +386,80 @@ def _concatenate_agentsets( self.remove(ids_to_remove, inplace=True) return self + def _get_bool_mask( + self, + mask: PolarsMaskLike = None, + ) -> pl.Series | pl.Expr: + def bool_mask_from_series(mask: pl.Series) -> pl.Series: + if ( + isinstance(mask, pl.Series) + and mask.dtype == pl.Boolean + and len(mask) == len(self._agents) + ): + return mask + return self._agents["unique_id"].is_in(mask) + + if isinstance(mask, pl.Expr): + return mask + elif isinstance(mask, pl.Series): + return bool_mask_from_series(mask) + elif isinstance(mask, pl.DataFrame): + if "unique_id" in mask.columns: + return bool_mask_from_series(mask["unique_id"]) + elif len(mask.columns) == 1 and mask.dtypes[0] == pl.Boolean: + return bool_mask_from_series(mask[mask.columns[0]]) + else: + raise KeyError( + "DataFrame must have a 'unique_id' column or a single boolean column." + ) + elif mask is None or mask == "all": + return pl.repeat(True, len(self._agents)) + elif mask == "active": + return self._mask + elif isinstance(mask, Collection): + return bool_mask_from_series(pl.Series(mask)) + else: + return bool_mask_from_series(pl.Series([mask])) + + def _get_masked_df( + self, + mask: PolarsMaskLike = None, + ) -> pl.DataFrame: + if (isinstance(mask, pl.Series) and mask.dtype == pl.Boolean) or isinstance( + mask, pl.Expr + ): + return self._agents.filter(mask) + elif isinstance(mask, pl.DataFrame): + if not mask["unique_id"].is_in(self._agents["unique_id"]).all(): + raise KeyError( + "Some 'unique_id' of mask are not present in DataFrame 'unique_id'." + ) + return mask.select("unique_id").join( + self._agents, on="unique_id", how="left" + ) + elif isinstance(mask, pl.Series): + if not mask.is_in(self._agents["unique_id"]).all(): + raise KeyError( + "Some 'unique_id' of mask are not present in DataFrame 'unique_id'." + ) + mask_df = mask.to_frame("unique_id") + return mask_df.join(self._agents, on="unique_id", how="left") + elif mask is None or mask == "all": + return self._agents + elif mask == "active": + return self._agents.filter(self._mask) + else: + if isinstance(mask, Collection): + mask_series = pl.Series(mask) + else: + mask_series = pl.Series([mask]) + if not mask_series.is_in(self._agents["unique_id"]).all(): + raise KeyError( + "Some 'unique_id' of mask are not present in DataFrame 'unique_id'." + ) + mask_df = mask_series.to_frame("unique_id") + return mask_df.join(self._agents, on="unique_id", how="left") + @overload def _get_obj_copy(self, obj: pl.Series) -> pl.Series: ... From 831324a239e4146e568076ec029344644e1b89b0 Mon Sep 17 00:00:00 2001 From: Adam Amer <136176500+adamamer20@users.noreply.github.com> Date: Tue, 16 Jul 2024 09:50:15 +0200 Subject: [PATCH 08/10] update __init__ --- mesa_frames/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mesa_frames/__init__.py b/mesa_frames/__init__.py index 61f25b58..4288c363 100644 --- a/mesa_frames/__init__.py +++ b/mesa_frames/__init__.py @@ -1,6 +1,6 @@ from mesa_frames.concrete.agents import AgentsDF -from mesa_frames.concrete.agentset_pandas import AgentSetPandas -from mesa_frames.concrete.agentset_polars import AgentSetPolars +from mesa_frames.concrete.pandas.agentset import AgentSetPandas +from mesa_frames.concrete.polars.agentset import AgentSetPolars from mesa_frames.concrete.model import ModelDF __all__ = [ From 853a19ea79ad98dffe3454a66c3cf63bbd482cf6 Mon Sep 17 00:00:00 2001 From: Adam Amer <136176500+adamamer20@users.noreply.github.com> Date: Tue, 16 Jul 2024 09:51:46 +0200 Subject: [PATCH 09/10] remove geopandas --- mesa_frames/concrete/pandas/agentset.py | 1 - 1 file changed, 1 deletion(-) diff --git a/mesa_frames/concrete/pandas/agentset.py b/mesa_frames/concrete/pandas/agentset.py index 0fcb1d05..99ed58af 100644 --- a/mesa_frames/concrete/pandas/agentset.py +++ b/mesa_frames/concrete/pandas/agentset.py @@ -1,7 +1,6 @@ from collections.abc import Callable, Collection, Iterable, Iterator, Sequence from typing import TYPE_CHECKING -import geopandas as gpd import pandas as pd import polars as pl from typing_extensions import Any, Self, overload From 7d128a8303d965f5aea7c578ce7f6b221aae6f66 Mon Sep 17 00:00:00 2001 From: Adam Amer <136176500+adamamer20@users.noreply.github.com> Date: Tue, 16 Jul 2024 09:52:44 +0200 Subject: [PATCH 10/10] removed gpd --- mesa_frames/concrete/pandas/agentset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mesa_frames/concrete/pandas/agentset.py b/mesa_frames/concrete/pandas/agentset.py index 99ed58af..0378ae5b 100644 --- a/mesa_frames/concrete/pandas/agentset.py +++ b/mesa_frames/concrete/pandas/agentset.py @@ -109,7 +109,7 @@ def __init__(self, model: "ModelDF") -> None: def add( self, - agents: pd.DataFrame | gpd.GeoDataFrame | Sequence[Any] | dict[str, Any], + agents: pd.DataFrame | Sequence[Any] | dict[str, Any], inplace: bool = True, ) -> Self: obj = self._get_obj(inplace)