-
Notifications
You must be signed in to change notification settings - Fork 7
/
ff_simu_stats.m
383 lines (328 loc) · 14.8 KB
/
ff_simu_stats.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
%% FF_SIMU_STATS Gateway Probability Mass Statistics
% Given probability mass function f(s), and information y(s), x(s), z(s)
% at each element of the state-space, compute statistics for each
% variable, y, x, z, which are all discrete random variables. And
% compute their correlation and covariance. The function can take any
% number of outcome matrixes, y(s), ..., z(s), stored as values of
% associated with keys that name these variables in a container map.
%
% * MT_F_OF_S matrix or array probability mass at each s point
% * MP_MT_XYZ_OF_S container map of matrix with MT_X_OF_S
% * MP_SUPPORT_EXT container map with various control strings with
% defaults
%
% MP_MT_XYZ_OF_S keys and Example Values:
%
% mp_mt_xyz_of_s = containers.Map('KeyType', 'char', 'ValueType', 'any');
% mp_mt_xyz_of_s('ar_st_y_name') = {'cl_mt_pol_a', 'cl_mt_pol_b', 'cl_mt_pol_c'};
% mp_mt_xyz_of_s('cl_mt_pol_a') = {mt_pol_a, zeros(1)};
% mp_mt_xyz_of_s('cl_mt_pol_b') = {mt_pol_b, zeros(1)};
% mp_mt_xyz_of_s('cl_mt_pol_c') = {mt_pol_b, zeros(1)};
%
% MP_MT_XYZ_OF_S = FF_SIMU_STATS() default simulation stats outputs.
%
% MP_MT_XYZ_OF_S = FF_DISC_RAND_VAR_MASS2COVCOR(MT_F_OF_S,
% MP_CL_MT_XYZ_OF_S) calculates various statistics given the provided
% distribution matrix and various outcome/states matrixes.
%
% MP_MT_XYZ_OF_S = FF_DISC_RAND_VAR_MASS2COVCOR(MT_F_OF_S,
% MP_CL_MT_XYZ_OF_S, MP_SUPPORT_EXT) provides some additional control
% strings if to print verbose in each stat function.
%
% See also FX_SIMU_STATS
%
%%
function mp_cl_mt_xyz_of_s = ff_simu_stats(varargin)
%% FF_SIMU_STATS post simulation statistics generation
% Having derived f(a,z) the probability mass function of the joint discrete
% random variables, we now obtain distributional statistics. Note that we
% know f(a,z), and we also know relevant policy functions a'(a,z), c(a,z),
% or other policy functions. We can simulate any choices that are a
% function of the random variables (a,z), using f(a,z)
%
% parameter structure provides a list of
%
% # from result_map('ar_st_y_name'), get list of outcome matrix on state
% space
% # simulate each outcome using f(a,z) for probability draws
% # compute key statistics: (1) mean (expectation=sum) (2) sd (3) min and
% max (4) iqr (5) fraction = 0 (6) percentiles including: 99.9, 99, 95,
% every 5 in between 5, 1, 0.01.
%
% Uses fake binomial data when file is invoke with defaults.
%
% * the first element of each of these cell array is y(a,z), the
% outcome/choice at the state space points
% * the second element of the cell is another container, which contains
% statistics computed for f(y) based on y(a,z) and f(a,z), f(y) is the
% probability mass function for outcome y given the stationary distribution
% f(a,z). The second element container also includes f(y) itself as well as
% f(y,z).
% * additionally, result_map also stores some of the statistics for
% different variables jointly together. (a) *tb_outcomes_meansdperc*: where
% each row is a different outcome of the model, and each table column
% stores a different statistics of interest. (b) *tb_outcomes_fracheld*:
% which measures the fraction of asset held by different people.
%
%% Default
% use binomial as test case, z maps to binomial win prob, remember binom
% approximates normal.
if (~isempty(varargin))
if (length(varargin)==2)
[mt_f_of_s, mp_cl_mt_xyz_of_s] = varargin{:};
elseif (length(varargin)==3)
[mt_f_of_s, mp_cl_mt_xyz_of_s, mp_support_ext] = varargin{:};
end
% if invoked from outside overrid fully
else
it_states = 6;
it_shocks = 5;
fl_binom_n = it_states-1;
ar_binom_p = (1:(it_shocks))./(it_shocks+2);
ar_binom_x = (0:1:(it_states-1)) -3;
% f(z)
ar_binom_p_prob = binopdf(0:(it_shocks-1), it_shocks-1, 0.5);
% f(a,z), mass for a, z
mt_f_of_s = zeros([it_states, it_shocks]);
for it_z=1:it_shocks
% f(a|z)
f_a_condi_z = binopdf(ar_binom_x - min(ar_binom_x), fl_binom_n, ar_binom_p(it_z));
% f(z)
f_z = ar_binom_p_prob(it_z);
% f(a,z)=f(a|z)*f(z)
mt_f_of_s(:, it_z) = f_a_condi_z*f_z;
end
% y(a,z), some non-smooth structure
rng(123);
mt_pol_a = ar_binom_x' - 0.01*ar_binom_x'.^2 + ar_binom_p - 0.5*ar_binom_p.^2 + rand([it_states, it_shocks]);
mt_pol_a = round(mt_pol_a*3);
rng(456);
mt_pol_c = 10 -(mt_pol_a) + 15*(rand([it_states, it_shocks])-0.5);
% Generate result_map
% two column, the second zero(1) component left for disc rand var of a,
% c
mp_cl_mt_xyz_of_s = containers.Map('KeyType','char', 'ValueType','any');
mp_cl_mt_xyz_of_s('cl_mt_pol_a') = {mt_pol_a, zeros(1)};
mp_cl_mt_xyz_of_s('cl_mt_pol_c') = {mt_pol_c, zeros(1)};
mp_cl_mt_xyz_of_s('ar_st_y_name') = ["cl_mt_pol_a", "cl_mt_pol_c"];
end
%% Set and Update Support Map
mp_support = containers.Map('KeyType','char', 'ValueType','any');
mp_support('bl_display_detail') = false;
mp_support('bl_display_final') = true;
% for ff_disc_rand_var_mass2outcomes.m
mp_support('bl_display_drvm2outcomes') = false;
% for ff_disc_rand_var_stats.m
mp_support('ar_fl_percentiles') = [1 10 25 50 75 90 99];
mp_support('bl_display_drvstats') = false;
% for ff_disc_rand_var_stats.m
mp_support('bl_display_drvm2covcor') = false;
% override default support_map values
if (length(varargin)==3)
mp_support = [mp_support; mp_support_ext];
end
% Parse Support_map
params_group = values(mp_support, {'bl_display_detail', 'bl_display_final'});
[bl_display_detail, bl_display_final] = params_group{:};
params_group = values(mp_support, {'bl_display_drvm2outcomes'});
[bl_display_drvm2outcomes] = params_group{:};
params_group = values(mp_support, {'ar_fl_percentiles', 'bl_display_drvstats'});
[ar_fl_percentiles, bl_display_drvstats] = params_group{:};
params_group = values(mp_support, {'bl_display_drvm2covcor'});
[bl_display_drvm2covcor] = params_group{:};
%% Parse mp_mt_xyz_of_s
% result_map
params_group = values(mp_cl_mt_xyz_of_s, {'ar_st_y_name'});
[ar_st_y_name] = params_group{:};
%% *f(y), f(c), f(a)*: Generate Key Distributional Statistics for Each outcome
for it_y_ctr=1:length(ar_st_y_name)
%% *f(y), f(c), f(a)*: Find p(outcome(states)), proability mass function for each outcome
% Using from tools:
% <https://fanwangecon.github.io/CodeDynaAsset/tools/html/ff_disc_rand_var_mass2outcomes.html
% ff_disc_rand_var_mass2outcomes>, compute unique sorted outcomes for
% y(a,z) and find:
%
% $$ p(y,z) = \sum_{a} \left(1\left\{Y(a,z)=y\right\} \cdot p(a,z) \right)$$
%
% $$ p(y,a) = \sum_{z} \left(1\left\{Y(a,z)=y\right\} \cdot p(a,z) \right)$$
%
% $$ p(Y=y) = \sum_{a,z} \left( 1\left\{Y(a,z)=y\right\} \cdot p(a,z) \right)$$
%
% note: sum(mt_dist_az, 2) = result_map('cl_mt_pol_a'){2}, but not at
% small simulation grids. These two might be different because pol_a is
% based on a choices, mt_dist_az is based on a states
%
st_y_key = ar_st_y_name(it_y_ctr);
cl_mt_xyz_of_s = mp_cl_mt_xyz_of_s(st_y_key);
mt_y_of_s = cl_mt_xyz_of_s{1};
% run function ff_disc_rand_var_mass2outcomes.m
if (size(mt_y_of_s, 2) == 1)
% matrix inputs are single column
[ar_f_of_y, ar_y_unique_sorted] = ...
ff_disc_rand_var_mass2outcomes(st_y_key, mt_y_of_s, mt_f_of_s, bl_display_drvm2outcomes);
else
% matrix inputs are multi-column
[ar_f_of_y, ar_y_unique_sorted, mt_f_of_y_srow, mt_f_of_y_scol] = ...
ff_disc_rand_var_mass2outcomes(st_y_key, mt_y_of_s, mt_f_of_s, bl_display_drvm2outcomes);
end
%% *f(y), f(c), f(a)*: Compute Statistics for outcomes
%
% * $\mu_Y = E(Y) = \sum_{y} p(Y=y) \cdot y $
% * $\sigma_Y = \sqrt{ \sum_{y} p(Y=y) \cdot \left( y - \mu_y \right)^2}$
% * $p(y=0)$
% * $p(y=\max(y))$
% * percentiles: $min_{y} \left\{ P(Y \le y) - percentile \mid P(Y \le y) \ge percentile \right\}$
% * fraction of outcome held by up to percentiles: $E(Y<y)/E(Y)$
%
% run function ff_disc_rand_var_stats.m from tools:
[ds_stats_map] = ff_disc_rand_var_stats(st_y_key, ar_y_unique_sorted', ar_f_of_y', ...
ar_fl_percentiles, bl_display_drvstats);
% prcess results
% retrieve scalar statistics:
fl_choice_mean = ds_stats_map('fl_choice_mean');
fl_choice_sum_unweighted = ds_stats_map('fl_choice_sum_unweighted');
fl_choice_sd = ds_stats_map('fl_choice_sd');
fl_choice_coefofvar = ds_stats_map('fl_choice_coefofvar');
fl_choice_min = ds_stats_map('fl_choice_min');
fl_choice_max = ds_stats_map('fl_choice_max');
fl_choice_prob_zero = ds_stats_map('fl_choice_prob_zero');
fl_choice_prob_below_zero = ds_stats_map('fl_choice_prob_below_zero');
fl_choice_prob_above_zero = ds_stats_map('fl_choice_prob_above_zero');
fl_choice_prob_min = ds_stats_map('fl_choice_prob_min');
fl_choice_prob_max = ds_stats_map('fl_choice_prob_max');
fl_gini_index = ds_stats_map('fl_gini_index');
% retrieve distributional array stats
ar_choice_percentiles = ds_stats_map('ar_choice_percentiles');
ar_choice_perc_fracheld = ds_stats_map('ar_choice_perc_fracheld');
%% *f(y), f(c), f(a)*: Store Statistics Specific to Each Outcome
% see intro section
% Append prob mass functions to ds_stats_map
if (size(mt_y_of_s, 2) > 1)
ds_stats_map('mt_f_of_y_srow') = mt_f_of_y_srow;
ds_stats_map('mt_choice_prob_byYA') = mt_f_of_y_scol;
end
ds_stats_map('ar_y_unique_sorted') = ar_y_unique_sorted;
% fl_neg_sum = sum(ar_f_of_y<0);
% if (fl_neg_sum >= -1e20)
% warning('fl_neg_sum is too large')
% end
ar_f_of_y(ar_f_of_y<0) = 0;
ar_f_of_y = ar_f_of_y/sum(ar_f_of_y);
% sum(ar_f_of_y)
ds_stats_map('ar_f_of_y') = ar_f_of_y;
% ds_stats_map is second element of cell for the key for the variable
% in result_map
cl_mt_xyz_of_s{2} = ds_stats_map;
mp_cl_mt_xyz_of_s(st_y_key) = cl_mt_xyz_of_s;
% key stats
ar_keystats = [fl_choice_mean fl_choice_sum_unweighted fl_choice_sd fl_choice_coefofvar fl_gini_index fl_choice_min fl_choice_max ...
fl_choice_prob_zero fl_choice_prob_below_zero fl_choice_prob_above_zero ...
fl_choice_prob_min fl_choice_prob_max ar_choice_percentiles];
cl_xyz_names(it_y_ctr) = st_y_key;
if (it_y_ctr == 1)
mt_outcomes_meansdperc = ar_keystats;
mt_outcomes_fracheld = ar_choice_perc_fracheld;
else
mt_outcomes_meansdperc = [mt_outcomes_meansdperc; ar_keystats];
mt_outcomes_fracheld = [mt_outcomes_fracheld; ar_choice_perc_fracheld];
end
end
% Store Stats
tb_outcomes_meansdperc = array2table(mt_outcomes_meansdperc);
ar_fl_percentiles = ds_stats_map('ar_fl_percentiles');
cl_col_names = ['mean', 'unweighted_sum' 'sd', 'coefofvar', 'gini', 'min', 'max', ...
'pYis0', 'pYls0', 'pYgr0', 'pYisMINY', 'pYisMAXY', ...
strcat('p', string(ar_fl_percentiles))];
tb_outcomes_meansdperc.Properties.VariableNames = matlab.lang.makeValidName(cl_col_names);
tb_outcomes_meansdperc.Properties.RowNames = matlab.lang.makeValidName(cl_xyz_names);
if (bl_display_detail)
disp('xxx tb_outcomes_meansdperc: mean, sd, percentiles xxx')
disp(rows2vars(tb_outcomes_meansdperc));
end
% Process Aset Held by up to percentiles
tb_outcomes_fracheld = array2table(mt_outcomes_fracheld);
cl_col_names = [strcat('fracByP', string(ar_fl_percentiles))];
tb_outcomes_fracheld.Properties.VariableNames = matlab.lang.makeValidName(cl_col_names);
tb_outcomes_fracheld.Properties.RowNames = matlab.lang.makeValidName(cl_xyz_names);
if (bl_display_detail)
disp('xxx tb_outcomes_fracheld: fraction of asset/income/etc held by hh up to this percentile xxx')
disp(rows2vars(tb_outcomes_fracheld));
end
%% Covariance and Correlation
% Having computed elsewhere E(X), E(Y), and SD(X), SD(Y), and given X(a,z)
% and Y(a,z), which are the optimal choices along the endogenous state
% space grid a, and the exogenous state space grid z, and given also
% f(a,z), the probability mass function over (a,z), we compute covariance
% and correlation between outcomes X and Y.
%
% * Covariance
%
% $$\mathrm{Cov}\left(x,y\right) = \sum_{a} \sum_{z} f(a,z) \cdot \left( x(a,z) - \mu_x \right) \cdot \left( y(a,z) - \mu_y \right)$$
%
% * Correlation
%
% $$\rho_{x,y} = \frac{\mathrm{Cov}\left(x,y\right)}{\sigma_x \cdot \sigma_y}$$
for it_x_ctr=1:length(ar_st_y_name)
% Get X Info
st_x_key = ar_st_y_name(it_x_ctr);
cl_mt_xyz_of_s = mp_cl_mt_xyz_of_s(st_x_key);
ds_stats_map = cl_mt_xyz_of_s{2};
cl_mt_xyz_of_s = mp_cl_mt_xyz_of_s(st_x_key);
mt_x_of_s = cl_mt_xyz_of_s{1};
fl_x_mean = ds_stats_map('fl_choice_mean');
fl_x_sd = ds_stats_map('fl_choice_sd');
% Initiate storage
ar_fl_cov_var_xy = zeros([1,length(ar_st_y_name)*2]);
ar_st_covvar = strings([1,length(ar_st_y_name)*2]);
for it_y_ctr=1:length(ar_st_y_name)
% Get y Info
st_y_key = ar_st_y_name(it_y_ctr);
cl_mt_xyz_of_s = mp_cl_mt_xyz_of_s(st_y_key);
ds_stats_map = cl_mt_xyz_of_s{2};
cl_mt_xyz_of_s = mp_cl_mt_xyz_of_s(st_y_key);
mt_y_of_s = cl_mt_xyz_of_s{1};
fl_y_mean = ds_stats_map('fl_choice_mean');
fl_y_sd = ds_stats_map('fl_choice_sd');
% call ff_disc_rand_var_mass2covcor.m
[fl_cov_xy, fl_cor_xy] = ff_disc_rand_var_mass2covcor(...
mt_x_of_s, mt_y_of_s, mt_f_of_s, ...
fl_x_mean, fl_x_sd, ...
fl_y_mean, fl_y_sd, bl_display_drvm2covcor);
% only include the y name, x name is from the row
st_x_y_cov = strjoin(["fl_cov_" st_y_key], '');
st_x_y_cor = strjoin(["fl_cor_" st_y_key], '');
ds_stats_map(st_x_y_cov) = fl_cov_xy;
ds_stats_map(st_x_y_cor) = fl_cor_xy;
ar_fl_cov_var_xy(it_y_ctr*2-1) = fl_cov_xy;
ar_fl_cov_var_xy(it_y_ctr*2) = fl_cor_xy;
ar_st_covvar(it_y_ctr*2-1) = string(st_x_y_cov);
ar_st_covvar(it_y_ctr*2) = string(st_x_y_cor);
cl_mt_xyz_of_s{2} = ds_stats_map;
mp_cl_mt_xyz_of_s(st_y_key) = cl_mt_xyz_of_s;
end
if (it_x_ctr == 1)
mt_fl_cov_var_xy = ar_fl_cov_var_xy;
else
mt_fl_cov_var_xy = [mt_fl_cov_var_xy; ar_fl_cov_var_xy];
end
end
% Store Stats
tb_fl_cov_var_xy = array2table(mt_fl_cov_var_xy);
tb_fl_cov_var_xy.Properties.VariableNames = matlab.lang.makeValidName(ar_st_covvar);
tb_fl_cov_var_xy.Properties.RowNames = matlab.lang.makeValidName(cl_xyz_names);
if (bl_display_detail)
disp('xxx tb_outcomes_covvar: variance correlation xxx')
disp(rows2vars(tb_fl_cov_var_xy));
end
%% *f(y), f(c), f(a)*: Store Statistics Shared Table All Outcomes
% Add to result_map
mt_outcomes = [mt_outcomes_meansdperc, mt_fl_cov_var_xy, mt_outcomes_fracheld];
mp_cl_mt_xyz_of_s('mt_outcomes') = mt_outcomes;
% Store Stats
tb_outcomes = [tb_outcomes_meansdperc, tb_fl_cov_var_xy, tb_outcomes_fracheld];
mp_cl_mt_xyz_of_s('tb_outcomes') = tb_outcomes;
if (bl_display_final)
disp('xxx tb_outcomes: all stats xxx')
disp(rows2vars(tb_outcomes));
end
end