Skip to content

Commit

Permalink
Format
Browse files Browse the repository at this point in the history
  • Loading branch information
nikhilwoodruff committed Aug 19, 2024
1 parent 10fd55c commit ac3d65e
Show file tree
Hide file tree
Showing 9 changed files with 51 additions and 21 deletions.
2 changes: 1 addition & 1 deletion policyengine_us_data/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
from .datasets import *
from .datasets import *
15 changes: 11 additions & 4 deletions policyengine_us_data/datasets/cps/policyengine_cps.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@
import os
import yaml
from typing import Type
from policyengine_us_data.utils.uprating import create_policyengine_uprating_factors_table
from policyengine_us_data.utils.uprating import (
create_policyengine_uprating_factors_table,
)


class CPS(Dataset):
Expand All @@ -32,12 +34,16 @@ def generate(self):
arrays = cps_2022.load_dataset()
for variable in uprating:
if variable in arrays:
current_index = uprating[uprating.Variable == variable][self.time_period].values[0]
start_index = uprating[uprating.Variable == variable][2021].values[0]
current_index = uprating[uprating.Variable == variable][
self.time_period
].values[0]
start_index = uprating[uprating.Variable == variable][
2021
].values[0]
growth = current_index / start_index
print(f"Uprating {variable} by {growth-1:.1%}")
arrays[variable] = arrays[variable] * growth

self.save_dataset(arrays)
return

Expand Down Expand Up @@ -556,6 +562,7 @@ class CPS_2022(CPS):
file_path = STORAGE_FOLDER / "cps_2022.h5"
time_period = 2022


class CPS_2024(CPS):
name = "cps_2024"
label = "CPS 2024"
Expand Down
2 changes: 1 addition & 1 deletion policyengine_us_data/datasets/puf/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
from .policyengine_puf import *
from .policyengine_puf import *
18 changes: 13 additions & 5 deletions policyengine_us_data/datasets/puf/policyengine_puf.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,17 @@
from .uprate_puf import uprate_puf
from survey_enhance import Imputation
from .irs_puf import IRS_PUF_2015
from policyengine_us_data.utils.uprating import create_policyengine_uprating_factors_table
from policyengine_us_data.utils.uprating import (
create_policyengine_uprating_factors_table,
)

rng = np.random.default_rng(seed=64)


def impute_pension_contributions_to_puf(puf_df):
from policyengine_us import Microsimulation
from policyengine_us_data.datasets.cps import CPS_2021

cps = Microsimulation(dataset=CPS_2021)
cps_df = cps.calculate_dataframe(
["employment_income", "household_weight", "pre_tax_contributions"]
Expand All @@ -32,7 +35,6 @@ def impute_pension_contributions_to_puf(puf_df):
)



def impute_missing_demographics(
puf: pd.DataFrame, demographics: pd.DataFrame
) -> pd.DataFrame:
Expand Down Expand Up @@ -282,6 +284,7 @@ class PUF(Dataset):

def generate(self):
from policyengine_us.system import system

print("Importing PolicyEngine US variable metadata...")

irs_puf = IRS_PUF_2015()
Expand All @@ -298,15 +301,19 @@ def generate(self):
arrays = puf_2021.load_dataset()
for variable in uprating:
if variable in arrays:
current_index = uprating[uprating.Variable == variable][self.time_period].values[0]
start_index = uprating[uprating.Variable == variable][2021].values[0]
current_index = uprating[uprating.Variable == variable][
self.time_period
].values[0]
start_index = uprating[uprating.Variable == variable][
2021
].values[0]
growth = current_index / start_index
print(f"Uprating {variable} by {growth-1:.1%}")
arrays[variable] = arrays[variable] * growth
self.save_dataset(arrays)
return

puf = puf[puf.MARS != 0] # Remove aggregate records
puf = puf[puf.MARS != 0] # Remove aggregate records

print("Pre-processing PUF...")
original_recid = puf.RECID.values.copy()
Expand Down Expand Up @@ -492,6 +499,7 @@ class PUF_2021(PUF):
time_period = 2021
file_path = STORAGE_FOLDER / "pe_puf_2021.h5"


class PUF_2024(PUF):
label = "PUF 2024"
name = "puf_2024"
Expand Down
4 changes: 3 additions & 1 deletion policyengine_us_data/evaluation/loss.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import numpy as np
import pandas as pd


def create_statistical_target_matrix() -> np.array:
pass


def create_statistical_targets() -> pd.DataFrame:
pass
pass
14 changes: 10 additions & 4 deletions policyengine_us_data/evaluation/report.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from policyengine_us_data.data_storage import STORAGE_FOLDER
import argparse


def create_report():
from policyengine_us import Microsimulation
from policyengine_us_data import CPS_2024
Expand All @@ -14,19 +15,24 @@ def create_report():
hnet_totals = []
years = []
for year in range(START_YEAR, START_YEAR + BUDGET_WINDOW):
hnet_totals.append(round(sim.calculate("household_net_income", year).sum()/1e9, 1))
hnet_totals.append(
round(sim.calculate("household_net_income", year).sum() / 1e9, 1)
)
years.append(year)

df = pd.DataFrame({"Year": years, "Household net income": hnet_totals}).set_index("Year", drop=True)

df = pd.DataFrame(
{"Year": years, "Household net income": hnet_totals}
).set_index("Year", drop=True)

report = f"""# Economy summary
## Household net income
{df.T.to_markdown(index=False)}
"""

return report


if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--output", type=str, default="report.md")
Expand Down
1 change: 1 addition & 0 deletions policyengine_us_data/tests/test_datasets/test_irs_puf.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import pytest


@pytest.mark.skip(reason="This test requires private data.")
@pytest.mark.parametrize("year", [2015])
def test_irs_puf_generates(year: int):
Expand Down
3 changes: 2 additions & 1 deletion policyengine_us_data/utils/github.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ def upload(

return response.json()


def set_pr_auto_review_comment(text: str):
# On a pull request, set a review comment with the given text.

Expand All @@ -99,4 +100,4 @@ def set_pr_auto_review_comment(text: str):
if response.status_code != 200:
raise ValueError(
f"Invalid response code {response.status_code} for url {url}. Received: {response.text}"
)
)
13 changes: 9 additions & 4 deletions policyengine_us_data/utils/uprating.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
START_YEAR = 2020
END_YEAR = 2034


def create_policyengine_uprating_factors_table():
from policyengine_us.system import system

Expand All @@ -13,14 +14,18 @@ def create_policyengine_uprating_factors_table():
years = []
index_values = []

population_size = system.parameters.get_child("calibration.gov.census.populations.total")
population_size = system.parameters.get_child(
"calibration.gov.census.populations.total"
)

for variable in system.variables.values():
if variable.uprating is not None:
parameter = system.parameters.get_child(variable.uprating)
start_value = parameter(START_YEAR)
for year in range(START_YEAR, END_YEAR + 1):
population_growth = population_size(year) / population_size(START_YEAR)
population_growth = population_size(year) / population_size(
START_YEAR
)
variable_names.append(variable.name)
years.append(year)
growth = parameter(year) / start_value
Expand All @@ -29,7 +34,7 @@ def create_policyengine_uprating_factors_table():
else:
per_capita_growth = growth
index_values.append(round(per_capita_growth, 3))

df["Variable"] = variable_names
df["Year"] = years
df["Value"] = index_values
Expand All @@ -47,4 +52,4 @@ def create_policyengine_uprating_factors_table():
df_growth[START_YEAR] = 0

df_growth.to_csv(STORAGE_FOLDER / "uprating_growth_factors.csv")
return df
return df

0 comments on commit ac3d65e

Please sign in to comment.