Format

PolicyEngine · Aug 19, 2024 · ac3d65e · ac3d65e
1 parent 10fd55c
commit ac3d65e
Show file tree

Hide file tree

Showing 9 changed files with 51 additions and 21 deletions.
diff --git a/policyengine_us_data/__init__.py b/policyengine_us_data/__init__.py
@@ -1 +1 @@
-from .datasets import *
+from .datasets import *
diff --git a/policyengine_us_data/datasets/cps/policyengine_cps.py b/policyengine_us_data/datasets/cps/policyengine_cps.py
@@ -8,7 +8,9 @@
 import os
 import yaml
 from typing import Type
-from policyengine_us_data.utils.uprating import create_policyengine_uprating_factors_table
+from policyengine_us_data.utils.uprating import (
+    create_policyengine_uprating_factors_table,
+)
 
 
 class CPS(Dataset):
@@ -32,12 +34,16 @@ def generate(self):
             arrays = cps_2022.load_dataset()
             for variable in uprating:
                 if variable in arrays:
-                    current_index = uprating[uprating.Variable == variable][self.time_period].values[0]
-                    start_index = uprating[uprating.Variable == variable][2021].values[0]
+                    current_index = uprating[uprating.Variable == variable][
+                        self.time_period
+                    ].values[0]
+                    start_index = uprating[uprating.Variable == variable][
+                        2021
+                    ].values[0]
                     growth = current_index / start_index
                     print(f"Uprating {variable} by {growth-1:.1%}")
                     arrays[variable] = arrays[variable] * growth
-            
+
             self.save_dataset(arrays)
             return
 
@@ -556,6 +562,7 @@ class CPS_2022(CPS):
     file_path = STORAGE_FOLDER / "cps_2022.h5"
     time_period = 2022
 
+
 class CPS_2024(CPS):
     name = "cps_2024"
     label = "CPS 2024"

diff --git a/policyengine_us_data/datasets/puf/__init__.py b/policyengine_us_data/datasets/puf/__init__.py
@@ -1 +1 @@
-from .policyengine_puf import *
+from .policyengine_puf import *
diff --git a/policyengine_us_data/datasets/puf/policyengine_puf.py b/policyengine_us_data/datasets/puf/policyengine_puf.py
@@ -7,14 +7,17 @@
 from .uprate_puf import uprate_puf
 from survey_enhance import Imputation
 from .irs_puf import IRS_PUF_2015
-from policyengine_us_data.utils.uprating import create_policyengine_uprating_factors_table
+from policyengine_us_data.utils.uprating import (
+    create_policyengine_uprating_factors_table,
+)
 
 rng = np.random.default_rng(seed=64)
 
 
 def impute_pension_contributions_to_puf(puf_df):
     from policyengine_us import Microsimulation
     from policyengine_us_data.datasets.cps import CPS_2021
+
     cps = Microsimulation(dataset=CPS_2021)
     cps_df = cps.calculate_dataframe(
         ["employment_income", "household_weight", "pre_tax_contributions"]
@@ -32,7 +35,6 @@ def impute_pension_contributions_to_puf(puf_df):
     )
 
 
-
 def impute_missing_demographics(
     puf: pd.DataFrame, demographics: pd.DataFrame
 ) -> pd.DataFrame:
@@ -282,6 +284,7 @@ class PUF(Dataset):
 
     def generate(self):
         from policyengine_us.system import system
+
         print("Importing PolicyEngine US variable metadata...")
 
         irs_puf = IRS_PUF_2015()
@@ -298,15 +301,19 @@ def generate(self):
             arrays = puf_2021.load_dataset()
             for variable in uprating:
                 if variable in arrays:
-                    current_index = uprating[uprating.Variable == variable][self.time_period].values[0]
-                    start_index = uprating[uprating.Variable == variable][2021].values[0]
+                    current_index = uprating[uprating.Variable == variable][
+                        self.time_period
+                    ].values[0]
+                    start_index = uprating[uprating.Variable == variable][
+                        2021
+                    ].values[0]
                     growth = current_index / start_index
                     print(f"Uprating {variable} by {growth-1:.1%}")
                     arrays[variable] = arrays[variable] * growth
             self.save_dataset(arrays)
             return
 
-        puf = puf[puf.MARS != 0] # Remove aggregate records
+        puf = puf[puf.MARS != 0]  # Remove aggregate records
 
         print("Pre-processing PUF...")
         original_recid = puf.RECID.values.copy()
@@ -492,6 +499,7 @@ class PUF_2021(PUF):
     time_period = 2021
     file_path = STORAGE_FOLDER / "pe_puf_2021.h5"
 
+
 class PUF_2024(PUF):
     label = "PUF 2024"
     name = "puf_2024"

diff --git a/policyengine_us_data/evaluation/loss.py b/policyengine_us_data/evaluation/loss.py
@@ -1,8 +1,10 @@
 import numpy as np
 import pandas as pd
 
+
 def create_statistical_target_matrix() -> np.array:
     pass
 
+
 def create_statistical_targets() -> pd.DataFrame:
-    pass
+    pass
diff --git a/policyengine_us_data/evaluation/report.py b/policyengine_us_data/evaluation/report.py
@@ -1,6 +1,7 @@
 from policyengine_us_data.data_storage import STORAGE_FOLDER
 import argparse
 
+
 def create_report():
     from policyengine_us import Microsimulation
     from policyengine_us_data import CPS_2024
@@ -14,19 +15,24 @@ def create_report():
     hnet_totals = []
     years = []
     for year in range(START_YEAR, START_YEAR + BUDGET_WINDOW):
-        hnet_totals.append(round(sim.calculate("household_net_income", year).sum()/1e9, 1))
+        hnet_totals.append(
+            round(sim.calculate("household_net_income", year).sum() / 1e9, 1)
+        )
         years.append(year)
-
-    df = pd.DataFrame({"Year": years, "Household net income": hnet_totals}).set_index("Year", drop=True)
+
+    df = pd.DataFrame(
+        {"Year": years, "Household net income": hnet_totals}
+    ).set_index("Year", drop=True)
 
     report = f"""# Economy summary
 
 ## Household net income
 {df.T.to_markdown(index=False)}
 """
-    
+
     return report
 
+
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
     parser.add_argument("--output", type=str, default="report.md")

diff --git a/policyengine_us_data/tests/test_datasets/test_irs_puf.py b/policyengine_us_data/tests/test_datasets/test_irs_puf.py
@@ -1,5 +1,6 @@
 import pytest
 
+
 @pytest.mark.skip(reason="This test requires private data.")
 @pytest.mark.parametrize("year", [2015])
 def test_irs_puf_generates(year: int):

diff --git a/policyengine_us_data/utils/github.py b/policyengine_us_data/utils/github.py
@@ -80,6 +80,7 @@ def upload(
 
     return response.json()
 
+
 def set_pr_auto_review_comment(text: str):
     # On a pull request, set a review comment with the given text.
 
@@ -99,4 +100,4 @@ def set_pr_auto_review_comment(text: str):
     if response.status_code != 200:
         raise ValueError(
             f"Invalid response code {response.status_code} for url {url}. Received: {response.text}"
-        )
+        )
diff --git a/policyengine_us_data/utils/uprating.py b/policyengine_us_data/utils/uprating.py
@@ -4,6 +4,7 @@
 START_YEAR = 2020
 END_YEAR = 2034
 
+
 def create_policyengine_uprating_factors_table():
     from policyengine_us.system import system
 
@@ -13,14 +14,18 @@ def create_policyengine_uprating_factors_table():
     years = []
     index_values = []
 
-    population_size = system.parameters.get_child("calibration.gov.census.populations.total")
+    population_size = system.parameters.get_child(
+        "calibration.gov.census.populations.total"
+    )
 
     for variable in system.variables.values():
         if variable.uprating is not None:
             parameter = system.parameters.get_child(variable.uprating)
             start_value = parameter(START_YEAR)
             for year in range(START_YEAR, END_YEAR + 1):
-                population_growth = population_size(year) / population_size(START_YEAR)
+                population_growth = population_size(year) / population_size(
+                    START_YEAR
+                )
                 variable_names.append(variable.name)
                 years.append(year)
                 growth = parameter(year) / start_value
@@ -29,7 +34,7 @@ def create_policyengine_uprating_factors_table():
                 else:
                     per_capita_growth = growth
                 index_values.append(round(per_capita_growth, 3))
-            
+
     df["Variable"] = variable_names
     df["Year"] = years
     df["Value"] = index_values
@@ -47,4 +52,4 @@ def create_policyengine_uprating_factors_table():
     df_growth[START_YEAR] = 0
 
     df_growth.to_csv(STORAGE_FOLDER / "uprating_growth_factors.csv")
-    return df
+    return df
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		from .policyengine_puf import *
		from .policyengine_puf import *