Skip to content

Commit

Permalink
Merge pull request #112 from rickecon/ntbkrun
Browse files Browse the repository at this point in the history
Update file calls in calibration modules
  • Loading branch information
jdebacker authored Jun 4, 2024
2 parents 60f204f + 379ebdf commit 29acbee
Show file tree
Hide file tree
Showing 14 changed files with 200 additions and 109,309 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ htmlcov/*
*.asv
*.nav
*.snm
*.gz
*.bib.bak
*.fls
*.m~
Expand All @@ -47,6 +46,7 @@ examples/OG-USA-Example/*
cs-config/cs_config/OUTPUT_BASELINE/*
data/csv_output_files/*
data/images/*
data/PSID/psid_lifetime_income.csv
ogusa/csv_output_files/*
ogusa/images/*
.vscode/
Expand Down
9 changes: 9 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,14 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).


## [0.1.8] - 2024-05-20 12:00:00

### Added

- Updates the `ogusa` package to include the zipped `psid_lifetime_income.csv.gz` file, which is now called in some calibration modules (`bequest_transmission.py`, `deterministic_profiles.py`, and `transfer_distirbution.py`), but with an option for the user to provide their own custom datafile. These changes allow for Jupyter notebook users to execute the `Calibration` class object and for those who install the `ogusa` package from PyPI to have the required datafile for the major calibration modules.


## [0.1.7] - 2024-05-14 16:30:00

### Added
Expand Down Expand Up @@ -97,6 +105,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0



[0.1.8]: https://github.com/PSLmodels/OG-USA/compare/v0.1.7...v0.1.8
[0.1.7]: https://github.com/PSLmodels/OG-USA/compare/v0.1.6...v0.1.7
[0.1.6]: https://github.com/PSLmodels/OG-USA/compare/v0.1.5...v0.1.6
[0.1.5]: https://github.com/PSLmodels/OG-USA/compare/v0.1.4...v0.1.5
Expand Down
109,156 changes: 0 additions & 109,156 deletions data/PSID/psid_lifetime_income.csv

This file was deleted.

90 changes: 47 additions & 43 deletions ogusa/bequest_transmission.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,42 +3,40 @@
import matplotlib.pyplot as plt
import os
from ogusa.utils import MVKDE

CURDIR = os.path.split(os.path.abspath(__file__))[0]
from ogusa.constants import CODE_PATH


def get_bequest_matrix(
J=7,
lambdas=np.array([0.25, 0.25, 0.2, 0.1, 0.1, 0.09, 0.01]),
graphs=False,
data_path=None,
output_path=None,
):
"""
Returns S x J matrix representing the fraction of aggregate
bequests that go to each household by age and lifetime income group.
Args:
J (int): number of lifetime income groups
lambdas (Numpy array): length J array of lifetime income group
proportions
data_path (str): path to PSID data
output_path (str): path to save output plots and data
Returns:
kde_matrix (Numpy array): SxJ shaped array that represents the
smoothed distribution of proportions going to each (s,j)
"""
# Create directory if output directory does not already exist
CURDIR = os.path.split(os.path.abspath(__file__))[0]
output_fldr = "csv_output_files"
output_dir = os.path.join(CURDIR, "..", "data", output_fldr)
if not os.access(output_dir, os.F_OK):
os.makedirs(output_dir)
image_fldr = "images"
image_dir = os.path.join(CURDIR, "..", "data", image_fldr)
if not os.access(image_dir, os.F_OK):
os.makedirs(image_dir)

# Define a lambda function to compute the weighted mean:
# wm = lambda x: np.average(
# x, weights=df.loc[x.index, "fam_smpl_wgt_core"])

# Read in dataframe of PSID data
# df = ogcore.utils.safe_read_pickle(
# os.path.join(CURDIR, "data", "PSID", "psid_lifetime_income.pkl")
# )
df = pd.read_csv(
os.path.join(CURDIR, "..", "data", "PSID", "psid_lifetime_income.csv")
)
# Read in PSID data
if data_path is None:
# Read data file shipped with OG-USA package
df = pd.read_csv(
os.path.join(CODE_PATH, "psid_lifetime_income.csv.gz")
)
else:
# This is the case when running this from a branch of the OG-USA repo
df = pd.read_csv(data_path)

# Do some tabs with data file...
# 'net_wealth', 'inheritance', 'value_inheritance_1st',
Expand All @@ -54,12 +52,15 @@ def get_bequest_matrix(
)
# print(df[['sum_inherit', 'inheritance']].describe())

if graphs:
if output_path is not None:
# Create plot path directory if it doesn't already exist
if not os.path.exists(output_path):
os.makedirs(output_path)
# Total inheritances by year
df.groupby("year_data").mean(numeric_only=True).plot(y="inheritance")
plt.savefig(os.path.join(image_dir, "inheritance_year.png"))
plt.savefig(os.path.join(output_path, "inheritance_year.png"))
df.groupby("year_data").mean(numeric_only=True).plot(y="sum_inherit")
plt.savefig(os.path.join(image_dir, "sum_inherit_year.png"))
plt.savefig(os.path.join(output_path, "sum_inherit_year.png"))
# not that summing up inheritances gives a much larger value than
# taking the inheritance variable

Expand All @@ -68,22 +69,22 @@ def get_bequest_matrix(
df[df["year_data"] >= 1988].groupby("age").mean(
numeric_only=True
).plot(y="net_wealth")
plt.savefig(os.path.join(image_dir, "net_wealth_age.png"))
plt.savefig(os.path.join(output_path, "net_wealth_age.png"))
df[df["year_data"] >= 1988].groupby("age").mean(
numeric_only=True
).plot(y="inheritance")
plt.savefig(os.path.join(image_dir, "inheritance_age.png"))
plt.savefig(os.path.join(output_path, "inheritance_age.png"))

# Inheritances by lifetime income group
# bar plot
df[df["year_data"] >= 1988].groupby("li_group").mean(
numeric_only=True
).plot.bar(y="net_wealth")
plt.savefig(os.path.join(image_dir, "net_wealth_li.png"))
plt.savefig(os.path.join(output_path, "net_wealth_li.png"))
df[df["year_data"] >= 1988].groupby("li_group").mean(
numeric_only=True
).plot.bar(y="inheritance")
plt.savefig(os.path.join(image_dir, "inheritance_li.png"))
plt.savefig(os.path.join(output_path, "inheritance_li.png"))

# lifecycle plots with line for each ability type
pd.pivot_table(
Expand All @@ -93,15 +94,15 @@ def get_bequest_matrix(
columns="li_group",
aggfunc="mean",
).plot(legend=True)
plt.savefig(os.path.join(image_dir, "net_wealth_age_li.png"))
plt.savefig(os.path.join(output_path, "net_wealth_age_li.png"))
pd.pivot_table(
df[df["year_data"] >= 1988],
values="inheritance",
index="age",
columns="li_group",
aggfunc="mean",
).plot(legend=True)
plt.savefig(os.path.join(image_dir, "inheritance_age_li.png"))
plt.savefig(os.path.join(output_path, "inheritance_age_li.png"))

# Matrix Fraction of inheritances in a year by age and lifetime_inc
inheritance_matrix = pd.pivot_table(
Expand All @@ -114,16 +115,18 @@ def get_bequest_matrix(
# replace NaN with zero
inheritance_matrix.fillna(value=0, inplace=True)
inheritance_matrix = inheritance_matrix / inheritance_matrix.sum().sum()
# inheritance_matrix.to_csv(os.path.join(
# output_dir, 'bequest_matrix.csv'))

# estimate kernel density of bequests
if output_path is not None:
filename = os.path.join(output_path, "inheritance_kde.png")
else:
filename = None
kde_matrix = MVKDE(
80,
7,
inheritance_matrix.to_numpy(),
filename=os.path.join(image_dir, "inheritance_kde.png"),
plot=graphs,
filename=filename,
plot=(output_path is not None),
bandwidth=0.5,
)

Expand All @@ -139,10 +142,11 @@ def get_bequest_matrix(
)
kde_matrix = kde_matrix_new

np.savetxt(
os.path.join(output_dir, "bequest_matrix_kde.csv"),
kde_matrix,
delimiter=",",
)
if output_path is not None:
np.savetxt(
os.path.join(output_path, "bequest_matrix_kde.csv"),
kde_matrix,
delimiter=",",
)

return kde_matrix
22 changes: 17 additions & 5 deletions ogusa/calibrate.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from ogusa import estimate_beta_j, bequest_transmission
from ogusa import macro_params, transfer_distribution, income
from ogusa import get_micro_data, psid_data_setup
from ogusa import get_micro_data
import os
import numpy as np
from ogcore import txfunc, demographics
Expand All @@ -25,6 +25,8 @@ def __init__(
data="cps",
client=None,
num_workers=1,
demographic_data_path=None,
output_path=None,
):
"""
Constructor for the Calibration class. This class is used to find
Expand All @@ -43,10 +45,15 @@ def __init__(
data (str): data source for microsimulation model
client (Dask client object): client
num_workers (int): number of workers for Dask client
output_path (str): path to save output to
Returns:
Calibration class object instance
"""
# Create output_path if it doesn't exist
if output_path is not None:
if not os.path.exists(output_path):
os.makedirs(output_path)
self.estimate_tax_functions = estimate_tax_functions
self.estimate_beta = estimate_beta
self.estimate_chi_n = estimate_chi_n
Expand Down Expand Up @@ -76,10 +83,14 @@ def __init__(
self.macro_params = macro_params.get_macro_params()

# eta estimation
self.eta = transfer_distribution.get_transfer_matrix(p.J, p.lambdas)
self.eta = transfer_distribution.get_transfer_matrix(
p.J, p.lambdas, output_path=output_path
)

# zeta estimation
self.zeta = bequest_transmission.get_bequest_matrix(p.J, p.lambdas)
self.zeta = bequest_transmission.get_bequest_matrix(
p.J, p.lambdas, output_path=output_path
)

# demographics
if estimate_pop:
Expand All @@ -92,6 +103,7 @@ def __init__(
initial_data_year=p.start_year - 1,
final_data_year=p.start_year,
GraphDiag=False,
download_path=demographic_data_path,
)

# demographics for 80 period lives (needed for getting e below)
Expand All @@ -112,15 +124,15 @@ def __init__(
self.demographic_params["omega_SS"],
demog80["omega_SS"],
p.lambdas,
plot=False,
plot_path=output_path,
)
else:
self.e = income.get_e_interp(
p.S,
p.omega_SS,
p.omega_SS,
p.lambdas,
plot=False,
plot_path=output_path,
)

# Tax Functions
Expand Down
3 changes: 3 additions & 0 deletions ogusa/constants.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
import taxcalc
import os

SHOW_RUNTIME = False # Flag to display RuntimeWarnings when run model

REFORM_DIR = "OUTPUT_REFORM"
BASELINE_DIR = "OUTPUT_BASELINE"

CODE_PATH = os.path.abspath(os.path.dirname(__file__))

# Default year for model runs
DEFAULT_START_YEAR = 2021
# Tax-Calculator start year
Expand Down
Loading

0 comments on commit 29acbee

Please sign in to comment.