From c7407022948fea5683d7c9218357b8ef773cd560 Mon Sep 17 00:00:00 2001 From: Nikhil Woodruff Date: Wed, 7 May 2025 12:13:09 +0100 Subject: [PATCH 1/7] Add database entities Fixes #118 --- policyengine/entities.py | 302 +++++++++++++++++++++++++++++++++++++++ pyproject.toml | 1 + 2 files changed, 303 insertions(+) create mode 100644 policyengine/entities.py diff --git a/policyengine/entities.py b/policyengine/entities.py new file mode 100644 index 0000000..639f306 --- /dev/null +++ b/policyengine/entities.py @@ -0,0 +1,302 @@ +from typing import Optional, List, Dict, Any, Union +from datetime import datetime, date +from sqlmodel import Field, Session, SQLModel, create_engine, Relationship, select +from enum import Enum +from pydantic import validator +from pathlib import Path + + +# Enums and Constants +class CountryCode(str, Enum): + UK = "uk" + US = "us" + + +class EntityType(str, Enum): + ENTITY = "entity" # Generic entity type + + +class BaseModel(SQLModel): + """Base model with ID as primary key""" + id: Optional[int] = Field(default=None, primary_key=True) + + +# Core policy models +class Country(BaseModel, table=True): + """Country model representing supported jurisdictions""" + code: str = Field(index=True, unique=True) # 'uk', 'us' + name: str # 'United Kingdom', 'United States' + + # Relationships + parameters: List["Parameter"] = Relationship(back_populates="country") + reforms: List["Reform"] = Relationship(back_populates="country") + entities: List["Entity"] = Relationship(back_populates="country") + variables: List["Variable"] = Relationship(back_populates="country") + simulation_runs: List["SimulationRun"] = Relationship(back_populates="country") + + +class Reform(BaseModel, table=True): + """A reform is a change to policy.""" + reform_id: str = Field(index=True) # '35' + name: str # 'Set of parameter changes involving main tax rate' + description: Optional[str] = None + country_id: Optional[int] = Field(default=None, foreign_key="country.id") + is_structural: bool = Field(default=False) # True if the reform contains non-parametric changes + + # Relationships + country: Optional[Country] = Relationship(back_populates="reforms") + parameter_changes: List["ParameterChange"] = Relationship(back_populates="reform") + simulation_runs: List["SimulationRun"] = Relationship(back_populates="reform") + + def __init__(self, **kwargs): + super().__init__(**kwargs) + + if "parameters_dict" in kwargs: + # Create ParameterChange objects from the provided dictionary + for parameter_name, changes in kwargs["parameters_dict"].items(): + for time_period, value in changes.items(): + parameter_change = ParameterChange( + parameter_name=parameter_name, + time_period=time_period, + value=value, + ) + self.parameter_changes.append(parameter_change) + + +class Parameter(BaseModel, table=True): + """Tax or benefit parameter definition""" + country_id: int = Field(foreign_key="country.id") + parameter_name: str = Field(index=True) # 'gov.tax.rate' + + # Relationships + country: Country = Relationship(back_populates="parameters") + parameter_changes: List["ParameterChange"] = Relationship(back_populates="parameter") + +class ParameterChange(BaseModel, table=True): + """Change to a parameter in a reform""" + parameter_id: int = Field(foreign_key="parameter.id") + reform_id: int = Field(foreign_key="reform.id") + value: str + time_period: str # '2025' + + # Relationships + parameter: Parameter = Relationship(back_populates="parameter_changes") + reform: Reform = Relationship(back_populates="parameter_changes") + + +# Entity and dataset models +class Entity(BaseModel, table=True): + """Entity model representing individuals, households, or other units""" + id: Optional[int] = Field(default=None, primary_key=True) + country_id: int = Field(foreign_key="country.id") + entity_type: str = Field(index=True) # Type of entity (person, household, etc.) + dataset_id: Optional[int] = Field(default=None, foreign_key="dataset.id") + + # Relationships + country: Country = Relationship(back_populates="entities") + dataset: Optional["Dataset"] = Relationship(back_populates="entities") + variable_states: List["VariableState"] = Relationship(back_populates="entity") + + +class VersionedDataset(BaseModel, table=True): + """Dataset containing entity records""" + name: str + description: Optional[str] = None + dataset_series_id: int = Field(foreign_key="datasetseries.id") + + datasets: List["Dataset"] = Relationship(back_populates="versioned_dataset") + dataset_series: "DatasetSeries" = Relationship(back_populates="versioned_datasets") + + +class DatasetSeries(BaseModel, table=True): + """Series of related datasets (e.g., annual survey data)""" + name: str + description: Optional[str] = None + + # Relationships + versioned_datasets: List["VersionedDataset"] = Relationship(back_populates="dataset_series") + + +class Dataset(BaseModel, table=True): + """Tags linking datasets to series with versioning""" + versioned_dataset_id: int = Field(foreign_key="versioneddataset.id", primary_key=True) + dataset_series_id: int = Field(foreign_key="datasetseries.id", primary_key=True) + version: str + + # Relationships + entities: List[Entity] = Relationship(back_populates="dataset") + versioned_dataset: VersionedDataset = Relationship(back_populates="datasets") + simulation_runs: List["SimulationRun"] = Relationship(back_populates="dataset") + + +# Variable models +class Variable(BaseModel, table=True): + """Definition of a specific variable (income, expenditure, etc.)""" + country_id: int = Field(foreign_key="country.id") + name: str = Field(index=True) + description: Optional[str] = None + + # Relationships + country: Country = Relationship(back_populates="variables") + variable_states: List["VariableState"] = Relationship(back_populates="variable") + + +class VariableState(BaseModel, table=True): + """Specific value of a variable for an entity at a point in time""" + variable_id: int = Field(foreign_key="variable.id") + entity_id: int = Field(foreign_key="entity.id") + time_period: str # '2025' + value: str # '30000' + simulation_run_id: Optional[int] = Field(default=None, foreign_key="simulationrun.id") + + # Relationships + variable: Variable = Relationship(back_populates="variable_states") + entity: Entity = Relationship(back_populates="variable_states") + simulation_run: Optional["SimulationRun"] = Relationship(back_populates="variable_states") + + +class SimulationRun(BaseModel, table=True): + """Record of a specific policy simulation""" + country_id: int = Field(foreign_key="country.id") + + reform_id: Optional[int] = Field(default=None, foreign_key="reform.id") + package_version: str + dataset_id: int = Field(foreign_key="dataset.id") + run_date: datetime = Field(default_factory=datetime.utcnow) + + # Relationships + reform: Optional[Reform] = Relationship(back_populates="simulation_runs") + country: Country = Relationship(back_populates="simulation_runs") + dataset: Dataset = Relationship(back_populates="simulation_runs") + variable_states: List["VariableState"] = Relationship(back_populates="simulation_run") + + +# Database management functions +def create_db_and_tables(connection_string="sqlite:///tax_policy.db"): + """Create database and tables""" + engine = create_engine(connection_string) + SQLModel.metadata.create_all(engine) + return engine + + +# Example data creation for UK tax parameter change +def add_uk_sim( +): + """Create example data for the UK tax rate change scenario""" + Path("tax_policy.db").unlink(missing_ok=True) + engine = create_db_and_tables() + + from policyengine import Simulation + + sim = Simulation( + country="uk", + scope="macro", + subsample=1000, + ) + + person_df = sim.baseline_simulation.calculate_dataframe(["person_id", "age"]) + household_df = sim.baseline_simulation.calculate_dataframe(["household_id", "household_net_income"]) + + with Session(engine) as session: + # Create countries + uk = Country(code="uk", name="United Kingdom") + us = Country(code="us", name="United States") + session.add(uk) + session.add(us) + session.commit() + + # Create dataset and series + dataset_series = DatasetSeries( + name="Enhanced FRS", + description="Enhanced Family Resources Survey" + ) + session.add(dataset_series) + + dataset = Dataset( + name="EFRS 2022", + description="Enhanced Family Resources Survey 2022", + ) + session.add(dataset) + session.commit() + + + # Tag dataset + dataset_tag = DatasetTag( + id=1, # Doesn't seem to work without this + dataset=dataset, + dataset_series=dataset_series, + version="2025.1" + ) + session.add(dataset_tag) + session.commit() + + # Add simulation run + + sim_run = SimulationRun( + country=uk, + reform=None, + package_version="1.0.0", + dataset=dataset, + run_date=datetime.utcnow(), + ) + session.add(sim_run) + session.commit() + + # Create variables + variable_names = list(person_df.columns) + list(household_df.columns) + for variable_name in variable_names: + variable = Variable( + country=uk, + name=variable_name, + description=f"Variable {variable_name} for UK tax simulation" + ) + session.add(variable) + session.commit() + + # Create all person entities + for i in range(len(person_df)): + person = Entity( + country=uk, + entity_type="person", + dataset_tag_id=dataset_tag.id, + ) + session.add(person) + for variable_name in list(person_df.columns): + # Get the variable object by name + variable = session.exec(select(Variable).where(Variable.name == variable_name).where(Variable.country_id == uk.id)).one() + variable_state = VariableState( + variable=variable, + entity=person, + time_period="2025", + value=str(person_df[variable_name].iloc[i]), # Convert to string as value is expected to be str + simulation_run=sim_run, + ) + session.add(variable_state) + session.commit() + + # Create all household entities + for i in range(len(household_df)): + household = Entity( + country=uk, + entity_type="household", + dataset_tag_id=dataset_tag.id, + ) + session.add(household) + for variable_name in list(household_df.columns): + # Get the variable object by name + variable = session.exec(select(Variable).where(Variable.name == variable_name).where(Variable.country_id == uk.id)).one() + variable_state = VariableState( + variable=variable, + entity=household, + time_period="2025", + value=household_df[variable_name].iloc[i], + ) + session.add(variable_state) + session.commit() + + + print("Successfully created example data for UK tax parameter change.") + + +if __name__ == "__main__": + add_uk_sim() \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index ef244d8..3eb977e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,6 +20,7 @@ dependencies = [ "getpass4", "pydantic", "google-cloud-storage", + "sqlmodel", ] [project.optional-dependencies] From 7a865ab304b3d8fdb5de2de1e53859121c454cd5 Mon Sep 17 00:00:00 2001 From: Nikhil Woodruff Date: Wed, 7 May 2025 12:15:27 +0100 Subject: [PATCH 2/7] Rename simulationrun to simulation --- policyengine/entities.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/policyengine/entities.py b/policyengine/entities.py index 639f306..f5dd094 100644 --- a/policyengine/entities.py +++ b/policyengine/entities.py @@ -32,7 +32,7 @@ class Country(BaseModel, table=True): reforms: List["Reform"] = Relationship(back_populates="country") entities: List["Entity"] = Relationship(back_populates="country") variables: List["Variable"] = Relationship(back_populates="country") - simulation_runs: List["SimulationRun"] = Relationship(back_populates="country") + simulation: List["Simulation"] = Relationship(back_populates="country") class Reform(BaseModel, table=True): @@ -46,7 +46,7 @@ class Reform(BaseModel, table=True): # Relationships country: Optional[Country] = Relationship(back_populates="reforms") parameter_changes: List["ParameterChange"] = Relationship(back_populates="reform") - simulation_runs: List["SimulationRun"] = Relationship(back_populates="reform") + simulations: List["Simulation"] = Relationship(back_populates="reform") def __init__(self, **kwargs): super().__init__(**kwargs) @@ -126,7 +126,7 @@ class Dataset(BaseModel, table=True): # Relationships entities: List[Entity] = Relationship(back_populates="dataset") versioned_dataset: VersionedDataset = Relationship(back_populates="datasets") - simulation_runs: List["SimulationRun"] = Relationship(back_populates="dataset") + simulations: List["Simulation"] = Relationship(back_populates="dataset") # Variable models @@ -152,10 +152,10 @@ class VariableState(BaseModel, table=True): # Relationships variable: Variable = Relationship(back_populates="variable_states") entity: Entity = Relationship(back_populates="variable_states") - simulation_run: Optional["SimulationRun"] = Relationship(back_populates="variable_states") + simulation_run: Optional["Simulation"] = Relationship(back_populates="variable_states") -class SimulationRun(BaseModel, table=True): +class Simulation(BaseModel, table=True): """Record of a specific policy simulation""" country_id: int = Field(foreign_key="country.id") @@ -165,10 +165,10 @@ class SimulationRun(BaseModel, table=True): run_date: datetime = Field(default_factory=datetime.utcnow) # Relationships - reform: Optional[Reform] = Relationship(back_populates="simulation_runs") - country: Country = Relationship(back_populates="simulation_runs") - dataset: Dataset = Relationship(back_populates="simulation_runs") - variable_states: List["VariableState"] = Relationship(back_populates="simulation_run") + reform: Optional[Reform] = Relationship(back_populates="simulations") + country: Country = Relationship(back_populates="simulations") + dataset: Dataset = Relationship(back_populates="simulations") + variable_states: List["VariableState"] = Relationship(back_populates="simulation") # Database management functions From 7026f181ee90d0da1d1abadaadfdacf15d13a5b7 Mon Sep 17 00:00:00 2001 From: Nikhil Woodruff Date: Wed, 7 May 2025 15:04:48 +0100 Subject: [PATCH 3/7] Format --- policyengine/entities.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/policyengine/entities.py b/policyengine/entities.py index f5dd094..cc38d3d 100644 --- a/policyengine/entities.py +++ b/policyengine/entities.py @@ -221,7 +221,7 @@ def add_uk_sim( # Tag dataset - dataset_tag = DatasetTag( + dataset_tag = Dataset( id=1, # Doesn't seem to work without this dataset=dataset, dataset_series=dataset_series, @@ -232,7 +232,7 @@ def add_uk_sim( # Add simulation run - sim_run = SimulationRun( + sim_run = Simulation( country=uk, reform=None, package_version="1.0.0", From 3b488cd7fd9166246e6362ddf50017093b736441 Mon Sep 17 00:00:00 2001 From: Nikhil Woodruff Date: Wed, 7 May 2025 15:04:53 +0100 Subject: [PATCH 4/7] Format --- policyengine/entities.py | 156 +++++++++++++++++++++++++++------------ 1 file changed, 108 insertions(+), 48 deletions(-) diff --git a/policyengine/entities.py b/policyengine/entities.py index cc38d3d..485574c 100644 --- a/policyengine/entities.py +++ b/policyengine/entities.py @@ -1,6 +1,13 @@ from typing import Optional, List, Dict, Any, Union from datetime import datetime, date -from sqlmodel import Field, Session, SQLModel, create_engine, Relationship, select +from sqlmodel import ( + Field, + Session, + SQLModel, + create_engine, + Relationship, + select, +) from enum import Enum from pydantic import validator from pathlib import Path @@ -18,15 +25,17 @@ class EntityType(str, Enum): class BaseModel(SQLModel): """Base model with ID as primary key""" + id: Optional[int] = Field(default=None, primary_key=True) # Core policy models class Country(BaseModel, table=True): """Country model representing supported jurisdictions""" + code: str = Field(index=True, unique=True) # 'uk', 'us' name: str # 'United Kingdom', 'United States' - + # Relationships parameters: List["Parameter"] = Relationship(back_populates="country") reforms: List["Reform"] = Relationship(back_populates="country") @@ -37,15 +46,20 @@ class Country(BaseModel, table=True): class Reform(BaseModel, table=True): """A reform is a change to policy.""" + reform_id: str = Field(index=True) # '35' name: str # 'Set of parameter changes involving main tax rate' description: Optional[str] = None country_id: Optional[int] = Field(default=None, foreign_key="country.id") - is_structural: bool = Field(default=False) # True if the reform contains non-parametric changes - + is_structural: bool = Field( + default=False + ) # True if the reform contains non-parametric changes + # Relationships country: Optional[Country] = Relationship(back_populates="reforms") - parameter_changes: List["ParameterChange"] = Relationship(back_populates="reform") + parameter_changes: List["ParameterChange"] = Relationship( + back_populates="reform" + ) simulations: List["Simulation"] = Relationship(back_populates="reform") def __init__(self, **kwargs): @@ -65,20 +79,25 @@ def __init__(self, **kwargs): class Parameter(BaseModel, table=True): """Tax or benefit parameter definition""" + country_id: int = Field(foreign_key="country.id") parameter_name: str = Field(index=True) # 'gov.tax.rate' - + # Relationships country: Country = Relationship(back_populates="parameters") - parameter_changes: List["ParameterChange"] = Relationship(back_populates="parameter") + parameter_changes: List["ParameterChange"] = Relationship( + back_populates="parameter" + ) + class ParameterChange(BaseModel, table=True): """Change to a parameter in a reform""" + parameter_id: int = Field(foreign_key="parameter.id") reform_id: int = Field(foreign_key="reform.id") value: str time_period: str # '2025' - + # Relationships parameter: Parameter = Relationship(back_populates="parameter_changes") reform: Reform = Relationship(back_populates="parameter_changes") @@ -87,88 +106,119 @@ class ParameterChange(BaseModel, table=True): # Entity and dataset models class Entity(BaseModel, table=True): """Entity model representing individuals, households, or other units""" + id: Optional[int] = Field(default=None, primary_key=True) country_id: int = Field(foreign_key="country.id") - entity_type: str = Field(index=True) # Type of entity (person, household, etc.) + entity_type: str = Field( + index=True + ) # Type of entity (person, household, etc.) dataset_id: Optional[int] = Field(default=None, foreign_key="dataset.id") - + # Relationships country: Country = Relationship(back_populates="entities") dataset: Optional["Dataset"] = Relationship(back_populates="entities") - variable_states: List["VariableState"] = Relationship(back_populates="entity") + variable_states: List["VariableState"] = Relationship( + back_populates="entity" + ) class VersionedDataset(BaseModel, table=True): """Dataset containing entity records""" + name: str description: Optional[str] = None dataset_series_id: int = Field(foreign_key="datasetseries.id") - - datasets: List["Dataset"] = Relationship(back_populates="versioned_dataset") - dataset_series: "DatasetSeries" = Relationship(back_populates="versioned_datasets") + + datasets: List["Dataset"] = Relationship( + back_populates="versioned_dataset" + ) + dataset_series: "DatasetSeries" = Relationship( + back_populates="versioned_datasets" + ) class DatasetSeries(BaseModel, table=True): """Series of related datasets (e.g., annual survey data)""" + name: str description: Optional[str] = None - + # Relationships - versioned_datasets: List["VersionedDataset"] = Relationship(back_populates="dataset_series") + versioned_datasets: List["VersionedDataset"] = Relationship( + back_populates="dataset_series" + ) class Dataset(BaseModel, table=True): """Tags linking datasets to series with versioning""" - versioned_dataset_id: int = Field(foreign_key="versioneddataset.id", primary_key=True) - dataset_series_id: int = Field(foreign_key="datasetseries.id", primary_key=True) + + versioned_dataset_id: int = Field( + foreign_key="versioneddataset.id", primary_key=True + ) + dataset_series_id: int = Field( + foreign_key="datasetseries.id", primary_key=True + ) version: str - + # Relationships entities: List[Entity] = Relationship(back_populates="dataset") - versioned_dataset: VersionedDataset = Relationship(back_populates="datasets") + versioned_dataset: VersionedDataset = Relationship( + back_populates="datasets" + ) simulations: List["Simulation"] = Relationship(back_populates="dataset") # Variable models class Variable(BaseModel, table=True): """Definition of a specific variable (income, expenditure, etc.)""" + country_id: int = Field(foreign_key="country.id") name: str = Field(index=True) description: Optional[str] = None - + # Relationships country: Country = Relationship(back_populates="variables") - variable_states: List["VariableState"] = Relationship(back_populates="variable") + variable_states: List["VariableState"] = Relationship( + back_populates="variable" + ) class VariableState(BaseModel, table=True): """Specific value of a variable for an entity at a point in time""" + variable_id: int = Field(foreign_key="variable.id") entity_id: int = Field(foreign_key="entity.id") time_period: str # '2025' value: str # '30000' - simulation_run_id: Optional[int] = Field(default=None, foreign_key="simulationrun.id") - + simulation_run_id: Optional[int] = Field( + default=None, foreign_key="simulationrun.id" + ) + # Relationships variable: Variable = Relationship(back_populates="variable_states") entity: Entity = Relationship(back_populates="variable_states") - simulation_run: Optional["Simulation"] = Relationship(back_populates="variable_states") + simulation_run: Optional["Simulation"] = Relationship( + back_populates="variable_states" + ) class Simulation(BaseModel, table=True): """Record of a specific policy simulation""" + country_id: int = Field(foreign_key="country.id") - + reform_id: Optional[int] = Field(default=None, foreign_key="reform.id") package_version: str dataset_id: int = Field(foreign_key="dataset.id") run_date: datetime = Field(default_factory=datetime.utcnow) - + # Relationships reform: Optional[Reform] = Relationship(back_populates="simulations") country: Country = Relationship(back_populates="simulations") dataset: Dataset = Relationship(back_populates="simulations") - variable_states: List["VariableState"] = Relationship(back_populates="simulation") + variable_states: List["VariableState"] = Relationship( + back_populates="simulation" + ) # Database management functions @@ -180,8 +230,7 @@ def create_db_and_tables(connection_string="sqlite:///tax_policy.db"): # Example data creation for UK tax parameter change -def add_uk_sim( -): +def add_uk_sim(): """Create example data for the UK tax rate change scenario""" Path("tax_policy.db").unlink(missing_ok=True) engine = create_db_and_tables() @@ -194,9 +243,13 @@ def add_uk_sim( subsample=1000, ) - person_df = sim.baseline_simulation.calculate_dataframe(["person_id", "age"]) - household_df = sim.baseline_simulation.calculate_dataframe(["household_id", "household_net_income"]) - + person_df = sim.baseline_simulation.calculate_dataframe( + ["person_id", "age"] + ) + household_df = sim.baseline_simulation.calculate_dataframe( + ["household_id", "household_net_income"] + ) + with Session(engine) as session: # Create countries uk = Country(code="uk", name="United Kingdom") @@ -204,14 +257,13 @@ def add_uk_sim( session.add(uk) session.add(us) session.commit() - + # Create dataset and series dataset_series = DatasetSeries( - name="Enhanced FRS", - description="Enhanced Family Resources Survey" + name="Enhanced FRS", description="Enhanced Family Resources Survey" ) session.add(dataset_series) - + dataset = Dataset( name="EFRS 2022", description="Enhanced Family Resources Survey 2022", @@ -219,13 +271,12 @@ def add_uk_sim( session.add(dataset) session.commit() - # Tag dataset dataset_tag = Dataset( - id=1, # Doesn't seem to work without this + id=1, # Doesn't seem to work without this dataset=dataset, dataset_series=dataset_series, - version="2025.1" + version="2025.1", ) session.add(dataset_tag) session.commit() @@ -248,11 +299,11 @@ def add_uk_sim( variable = Variable( country=uk, name=variable_name, - description=f"Variable {variable_name} for UK tax simulation" + description=f"Variable {variable_name} for UK tax simulation", ) session.add(variable) session.commit() - + # Create all person entities for i in range(len(person_df)): person = Entity( @@ -263,12 +314,18 @@ def add_uk_sim( session.add(person) for variable_name in list(person_df.columns): # Get the variable object by name - variable = session.exec(select(Variable).where(Variable.name == variable_name).where(Variable.country_id == uk.id)).one() + variable = session.exec( + select(Variable) + .where(Variable.name == variable_name) + .where(Variable.country_id == uk.id) + ).one() variable_state = VariableState( variable=variable, entity=person, time_period="2025", - value=str(person_df[variable_name].iloc[i]), # Convert to string as value is expected to be str + value=str( + person_df[variable_name].iloc[i] + ), # Convert to string as value is expected to be str simulation_run=sim_run, ) session.add(variable_state) @@ -284,7 +341,11 @@ def add_uk_sim( session.add(household) for variable_name in list(household_df.columns): # Get the variable object by name - variable = session.exec(select(Variable).where(Variable.name == variable_name).where(Variable.country_id == uk.id)).one() + variable = session.exec( + select(Variable) + .where(Variable.name == variable_name) + .where(Variable.country_id == uk.id) + ).one() variable_state = VariableState( variable=variable, entity=household, @@ -293,10 +354,9 @@ def add_uk_sim( ) session.add(variable_state) session.commit() - - + print("Successfully created example data for UK tax parameter change.") if __name__ == "__main__": - add_uk_sim() \ No newline at end of file + add_uk_sim() From e0ca8fb961013a87e170a6634fe84879ea7d2e3c Mon Sep 17 00:00:00 2001 From: Nikhil Woodruff Date: Wed, 7 May 2025 16:11:59 +0100 Subject: [PATCH 5/7] Remove database test code --- policyengine/entities.py | 153 +-------------------------------------- 1 file changed, 3 insertions(+), 150 deletions(-) diff --git a/policyengine/entities.py b/policyengine/entities.py index 485574c..4276bd2 100644 --- a/policyengine/entities.py +++ b/policyengine/entities.py @@ -62,20 +62,6 @@ class Reform(BaseModel, table=True): ) simulations: List["Simulation"] = Relationship(back_populates="reform") - def __init__(self, **kwargs): - super().__init__(**kwargs) - - if "parameters_dict" in kwargs: - # Create ParameterChange objects from the provided dictionary - for parameter_name, changes in kwargs["parameters_dict"].items(): - for time_period, value in changes.items(): - parameter_change = ParameterChange( - parameter_name=parameter_name, - time_period=time_period, - value=value, - ) - self.parameter_changes.append(parameter_change) - class Parameter(BaseModel, table=True): """Tax or benefit parameter definition""" @@ -190,14 +176,14 @@ class VariableState(BaseModel, table=True): entity_id: int = Field(foreign_key="entity.id") time_period: str # '2025' value: str # '30000' - simulation_run_id: Optional[int] = Field( - default=None, foreign_key="simulationrun.id" + simulation_id: Optional[int] = Field( + default=None, foreign_key="simulation.id" ) # Relationships variable: Variable = Relationship(back_populates="variable_states") entity: Entity = Relationship(back_populates="variable_states") - simulation_run: Optional["Simulation"] = Relationship( + simulation: Optional["Simulation"] = Relationship( back_populates="variable_states" ) @@ -227,136 +213,3 @@ def create_db_and_tables(connection_string="sqlite:///tax_policy.db"): engine = create_engine(connection_string) SQLModel.metadata.create_all(engine) return engine - - -# Example data creation for UK tax parameter change -def add_uk_sim(): - """Create example data for the UK tax rate change scenario""" - Path("tax_policy.db").unlink(missing_ok=True) - engine = create_db_and_tables() - - from policyengine import Simulation - - sim = Simulation( - country="uk", - scope="macro", - subsample=1000, - ) - - person_df = sim.baseline_simulation.calculate_dataframe( - ["person_id", "age"] - ) - household_df = sim.baseline_simulation.calculate_dataframe( - ["household_id", "household_net_income"] - ) - - with Session(engine) as session: - # Create countries - uk = Country(code="uk", name="United Kingdom") - us = Country(code="us", name="United States") - session.add(uk) - session.add(us) - session.commit() - - # Create dataset and series - dataset_series = DatasetSeries( - name="Enhanced FRS", description="Enhanced Family Resources Survey" - ) - session.add(dataset_series) - - dataset = Dataset( - name="EFRS 2022", - description="Enhanced Family Resources Survey 2022", - ) - session.add(dataset) - session.commit() - - # Tag dataset - dataset_tag = Dataset( - id=1, # Doesn't seem to work without this - dataset=dataset, - dataset_series=dataset_series, - version="2025.1", - ) - session.add(dataset_tag) - session.commit() - - # Add simulation run - - sim_run = Simulation( - country=uk, - reform=None, - package_version="1.0.0", - dataset=dataset, - run_date=datetime.utcnow(), - ) - session.add(sim_run) - session.commit() - - # Create variables - variable_names = list(person_df.columns) + list(household_df.columns) - for variable_name in variable_names: - variable = Variable( - country=uk, - name=variable_name, - description=f"Variable {variable_name} for UK tax simulation", - ) - session.add(variable) - session.commit() - - # Create all person entities - for i in range(len(person_df)): - person = Entity( - country=uk, - entity_type="person", - dataset_tag_id=dataset_tag.id, - ) - session.add(person) - for variable_name in list(person_df.columns): - # Get the variable object by name - variable = session.exec( - select(Variable) - .where(Variable.name == variable_name) - .where(Variable.country_id == uk.id) - ).one() - variable_state = VariableState( - variable=variable, - entity=person, - time_period="2025", - value=str( - person_df[variable_name].iloc[i] - ), # Convert to string as value is expected to be str - simulation_run=sim_run, - ) - session.add(variable_state) - session.commit() - - # Create all household entities - for i in range(len(household_df)): - household = Entity( - country=uk, - entity_type="household", - dataset_tag_id=dataset_tag.id, - ) - session.add(household) - for variable_name in list(household_df.columns): - # Get the variable object by name - variable = session.exec( - select(Variable) - .where(Variable.name == variable_name) - .where(Variable.country_id == uk.id) - ).one() - variable_state = VariableState( - variable=variable, - entity=household, - time_period="2025", - value=household_df[variable_name].iloc[i], - ) - session.add(variable_state) - session.commit() - - print("Successfully created example data for UK tax parameter change.") - - -if __name__ == "__main__": - add_uk_sim() From c86311d64d1ca4fcbbf8b095b917b82973ec33e0 Mon Sep 17 00:00:00 2001 From: Nikhil Woodruff Date: Wed, 7 May 2025 16:12:22 +0100 Subject: [PATCH 6/7] Versioning --- changelog_entry.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/changelog_entry.yaml b/changelog_entry.yaml index e69de29..a498fed 100644 --- a/changelog_entry.yaml +++ b/changelog_entry.yaml @@ -0,0 +1,4 @@ +- bump: minor + changes: + added: + - Database entities for APIv2. From 01adc9b9a09b2ed4f4509581fa213c69e00d643c Mon Sep 17 00:00:00 2001 From: Nikhil Woodruff Date: Wed, 7 May 2025 16:56:06 +0100 Subject: [PATCH 7/7] Add created_at --- policyengine/entities.py | 1 + 1 file changed, 1 insertion(+) diff --git a/policyengine/entities.py b/policyengine/entities.py index 4276bd2..dffc907 100644 --- a/policyengine/entities.py +++ b/policyengine/entities.py @@ -27,6 +27,7 @@ class BaseModel(SQLModel): """Base model with ID as primary key""" id: Optional[int] = Field(default=None, primary_key=True) + created_at: datetime = Field(default_factory=datetime.utcnow) # Core policy models