-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #2 from PolicyEngine/ecps
Add the Enhanced CPS
- Loading branch information
Showing
42 changed files
with
2,281 additions
and
187 deletions.
There are no files selected for viewing
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,3 +4,5 @@ | |
**/*.h5 | ||
*.ipynb | ||
**/*.csv | ||
!uprating_factors.csv | ||
!uprating_growth_factors.csv |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,5 +2,4 @@ FROM python:latest | |
COPY . . | ||
# Install | ||
RUN make install | ||
# Run tests | ||
CMD ["make", "test"] | ||
RUN ["make", "data"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
FROM python:latest | ||
COPY . . | ||
# Install | ||
RUN make download | ||
RUN make install | ||
RUN python docs/download.py | ||
EXPOSE 8080 | ||
ENTRYPOINT ["streamlit", "run", "docs/Home.py", "--server.port=8080", "--server.address=0.0.0.0"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
import streamlit as st | ||
|
||
st.title("PolicyEngine-US-Data") | ||
|
||
st.write( | ||
"""PolicyEngine-US-Data is a package to create representative microdata for the US, designed for input in the PolicyEngine tax-benefit microsimulation model.""" | ||
) | ||
|
||
st.subheader("What does this repo do?") | ||
|
||
st.write( | ||
"""Principally, this package creates a (partly synthetic) dataset of households (with incomes, demographics and more) that describes the U.S. household sector. This dataset synthesises multiple sources of data (the Current Population Survey, the IRS Public Use File, and administrative statistics) to improve upon the accuracy of **any** of them.""" | ||
) | ||
|
||
st.subheader("What does this dataset look like?") | ||
|
||
st.write( | ||
"The below table shows an extract of the person records in one household in the dataset." | ||
) | ||
|
||
|
||
@st.cache_data | ||
def sample_household(): | ||
import pandas as pd | ||
from policyengine_us_data.datasets import EnhancedCPS_2024 | ||
from policyengine_us import Microsimulation | ||
|
||
df = Microsimulation(dataset=EnhancedCPS_2024).to_input_dataframe() | ||
|
||
household_id = df.person_household_id__2024.values[10] | ||
people_in_household = df[df.person_household_id__2024 == household_id] | ||
return people_in_household | ||
|
||
|
||
people_in_household = sample_household() | ||
|
||
st.dataframe(people_in_household.T, use_container_width=True) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
from policyengine_us_data.utils.github import download | ||
from policyengine_us_data.data_storage import STORAGE_FOLDER | ||
|
||
download( | ||
"PolicyEngine", | ||
"policyengine-us-data", | ||
"release", | ||
"enhanced_cps_2024.h5", | ||
STORAGE_FOLDER / "enhanced_cps_2024.h5", | ||
) | ||
|
||
download( | ||
"PolicyEngine", | ||
"policyengine-us-data", | ||
"release", | ||
"cps_2024.h5", | ||
STORAGE_FOLDER / "cps_2024.h5", | ||
) | ||
|
||
download( | ||
"PolicyEngine", | ||
"irs-soi-puf", | ||
"release", | ||
"puf_2024.h5", | ||
STORAGE_FOLDER / "puf_2024.h5", | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
import streamlit as st | ||
|
||
st.title("Aggregates") | ||
|
||
st.write( | ||
"""The table below shows the totals for calendar year 2024 for the Enhanced CPS dataset variables.""" | ||
) | ||
|
||
|
||
@st.cache_data | ||
def sample_household(): | ||
from policyengine_us import Microsimulation | ||
from policyengine_us_data import EnhancedCPS_2024 | ||
from policyengine_us_data.datasets.cps.extended_cps import ( | ||
IMPUTED_VARIABLES as FINANCE_VARIABLES, | ||
) | ||
import pandas as pd | ||
|
||
sim = Microsimulation(dataset=EnhancedCPS_2024) | ||
|
||
df = ( | ||
pd.DataFrame( | ||
{ | ||
"Variable": FINANCE_VARIABLES, | ||
"Total ($bn)": [ | ||
round( | ||
sim.calculate(variable, map_to="household").sum() | ||
/ 1e9, | ||
1, | ||
) | ||
for variable in FINANCE_VARIABLES | ||
], | ||
} | ||
) | ||
.sort_values("Total ($bn)", ascending=False) | ||
.set_index("Variable") | ||
) | ||
return df | ||
|
||
|
||
df = sample_household() | ||
|
||
st.dataframe(df, use_container_width=True) |
Oops, something went wrong.