Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions doubleml/did/datasets/dgp_did_cs_CS2021.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,8 +97,8 @@ def make_did_cs_CS2021(n_obs=1000, dgp_type=1, include_never_treated=True, lambd

P(G_i = g) = \\frac{1}{G} \\text{ for all } g

7. Steps 1-6 generate panel data. To obtain repeated cross-sectional data, the number of generated indivials is increased
to `n_obs/lambda_t`, where `lambda_t` denotes the pobability to observe a unit at each time period (time constant).
7. Steps 1-6 generate panel data. To obtain repeated cross-sectional data, the number of generated individuals is increased
to `n_obs/lambda_t`, where `lambda_t` denotes the probability to observe a unit at each time period (time constant).
for each


Expand Down Expand Up @@ -133,7 +133,8 @@ def make_did_cs_CS2021(n_obs=1000, dgp_type=1, include_never_treated=True, lambd
Whether to include units that are never treated.

lambda_t : float, default=0.5
Probability of observing a unit at each time period.
Probability of observing a unit at each time period. Note that internally `n_obs/lambda_t` individuals are
generated of which only a fraction `lambda_t` is observed at each time period (see Step 7 in the DGP description).

time_type : str, default="datetime"
Type of time variable. Either "datetime" or "float".
Expand Down
4 changes: 2 additions & 2 deletions doubleml/did/did.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ class DoubleMLDID(LinearScoreMixin, DoubleML):
Default is ``5``.

n_rep : int
Number of repetitons for the sample splitting.
Number of repetitions for the sample splitting.
Default is ``1``.

score : str
Expand All @@ -47,7 +47,7 @@ class DoubleMLDID(LinearScoreMixin, DoubleML):
Default is ``'observational'``.

in_sample_normalization : bool
Indicates whether to use a sligthly different normalization from Sant'Anna and Zhao (2020).
Indicates whether to use a slightly different normalization from Sant'Anna and Zhao (2020).
Default is ``True``.

trimming_rule : str
Expand Down
4 changes: 2 additions & 2 deletions doubleml/did/did_binary.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ class DoubleMLDIDBinary(LinearScoreMixin, DoubleML):
Default is ``5``.

n_rep : int
Number of repetitons for the sample splitting.
Number of repetitions for the sample splitting.
Default is ``1``.

score : str
Expand All @@ -80,7 +80,7 @@ class DoubleMLDIDBinary(LinearScoreMixin, DoubleML):
Default is ``'observational'``.

in_sample_normalization : bool
Indicates whether to use a sligthly different normalization from Sant'Anna and Zhao (2020).
Indicates whether to use a slightly different normalization from Sant'Anna and Zhao (2020).
Default is ``True``.

trimming_rule : str
Expand Down
4 changes: 2 additions & 2 deletions doubleml/did/did_cs.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ class DoubleMLDIDCS(LinearScoreMixin, DoubleML):
Default is ``5``.

n_rep : int
Number of repetitons for the sample splitting.
Number of repetitions for the sample splitting.
Default is ``1``.

score : str
Expand All @@ -47,7 +47,7 @@ class DoubleMLDIDCS(LinearScoreMixin, DoubleML):
Default is ``'observational'``.

in_sample_normalization : bool
Indicates whether to use a sligthly different normalization from Sant'Anna and Zhao (2020).
Indicates whether to use a slightly different normalization from Sant'Anna and Zhao (2020).
Default is ``True``.

trimming_rule : str
Expand Down
72 changes: 72 additions & 0 deletions doubleml/did/did_cs_binary.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,78 @@


class DoubleMLDIDCSBinary(LinearScoreMixin, DoubleML):
"""Double machine learning for difference-in-differences models with repeated cross sections (binary setting in terms of group and time
combinations).

Parameters
----------
obj_dml_data : :class:`DoubleMLPanelData` object
The :class:`DoubleMLPanelData` object providing the data and specifying the variables for the causal model.

g_value : int
The value indicating the treatment group (first period with treatment).
Default is ``None``. This implements the case for the smallest, non-zero value of G.

t_value_pre : int
The value indicating the baseline pre-treatment period.

t_value_eval : int
The value indicating the period for evaluation.

ml_g : estimator implementing ``fit()`` and ``predict()``
A machine learner implementing ``fit()`` and ``predict()`` methods (e.g.
:py:class:`sklearn.ensemble.RandomForestRegressor`) for the nuisance function :math:`g_0(d,X) = E[Y_1-Y_0|D=d, X]`.
For a binary outcome variable :math:`Y` (with values 0 and 1), a classifier implementing ``fit()`` and
``predict_proba()`` can also be specified. If :py:func:`sklearn.base.is_classifier` returns ``True``,
``predict_proba()`` is used otherwise ``predict()``.

ml_m : classifier implementing ``fit()`` and ``predict_proba()``
A machine learner implementing ``fit()`` and ``predict_proba()`` methods (e.g.
:py:class:`sklearn.ensemble.RandomForestClassifier`) for the nuisance function :math:`m_0(X) = E[D=1|X]`.
Only relevant for ``score='observational'``.

control_group : str
Specifies the control group. Either ``'never_treated'`` or ``'not_yet_treated'``.
Default is ``'never_treated'``.

anticipation_periods : int
Number of anticipation periods. Default is ``0``.

n_folds : int
Number of folds.
Default is ``5``.

n_rep : int
Number of repetitions for the sample splitting.
Default is ``1``.

score : str
A str (``'observational'`` or ``'experimental'``) specifying the score function.
The ``'experimental'`` scores refers to an A/B setting, where the treatment is independent
from the pretreatment covariates.
Default is ``'observational'``.

in_sample_normalization : bool
Indicates whether to use a slightly different normalization from Sant'Anna and Zhao (2020).
Default is ``True``.

trimming_rule : str
A str (``'truncate'`` is the only choice) specifying the trimming approach.
Default is ``'truncate'``.

trimming_threshold : float
The threshold used for trimming.
Default is ``1e-2``.

draw_sample_splitting : bool
Indicates whether the sample splitting should be drawn during initialization of the object.
Default is ``True``.

print_periods : bool
Indicates whether to print information about the evaluated periods.
Default is ``False``.

"""

def __init__(
self,
Expand Down
2 changes: 1 addition & 1 deletion doubleml/irm/apo.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ class DoubleMLAPO(LinearScoreMixin, DoubleML):
Default is ``5``.

n_rep : int
Number of repetitons for the sample splitting.
Number of repetitions for the sample splitting.
Default is ``1``.

score : str or callable
Expand Down
2 changes: 1 addition & 1 deletion doubleml/irm/cvar.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ class DoubleMLCVAR(LinearScoreMixin, DoubleML):
Default is ``5``.

n_rep : int
Number of repetitons for the sample splitting.
Number of repetitions for the sample splitting.
Default is ``1``.

score : str
Expand Down
2 changes: 1 addition & 1 deletion doubleml/irm/iivm.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ class DoubleMLIIVM(LinearScoreMixin, DoubleML):
Default is ``5``.

n_rep : int
Number of repetitons for the sample splitting.
Number of repetitions for the sample splitting.
Default is ``1``.

score : str or callable
Expand Down
2 changes: 1 addition & 1 deletion doubleml/irm/irm.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ class DoubleMLIRM(LinearScoreMixin, DoubleML):
Default is ``5``.

n_rep : int
Number of repetitons for the sample splitting.
Number of repetitions for the sample splitting.
Default is ``1``.

score : str or callable
Expand Down
2 changes: 1 addition & 1 deletion doubleml/irm/lpq.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ class DoubleMLLPQ(NonLinearScoreMixin, DoubleML):
Default is ``5``.

n_rep : int
Number of repetitons for the sample splitting.
Number of repetitions for the sample splitting.
Default is ``1``.

score : str
Expand Down
2 changes: 1 addition & 1 deletion doubleml/irm/pq.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ class DoubleMLPQ(NonLinearScoreMixin, DoubleML):
Default is ``5``.

n_rep : int
Number of repetitons for the sample splitting.
Number of repetitions for the sample splitting.
Default is ``1``.

score : str
Expand Down
2 changes: 1 addition & 1 deletion doubleml/irm/qte.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ class DoubleMLQTE:
Default is ``5``.

n_rep : int
Number of repetitons for the sample splitting.
Number of repetitions for the sample splitting.
Default is ``1``.

score : str
Expand Down
2 changes: 1 addition & 1 deletion doubleml/irm/ssm.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ class DoubleMLSSM(LinearScoreMixin, DoubleML):
Default is ``5``.

n_rep : int
Number of repetitons for the sample splitting.
Number of repetitions for the sample splitting.
Default is ``1``.

score : str or callable
Expand Down
2 changes: 1 addition & 1 deletion doubleml/plm/pliv.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ class DoubleMLPLIV(LinearScoreMixin, DoubleML):
Default is ``5``.

n_rep : int
Number of repetitons for the sample splitting.
Number of repetitions for the sample splitting.
Default is ``1``.

score : str or callable
Expand Down
2 changes: 1 addition & 1 deletion doubleml/plm/plr.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ class DoubleMLPLR(LinearScoreMixin, DoubleML):
Default is ``5``.

n_rep : int
Number of repetitons for the sample splitting.
Number of repetitions for the sample splitting.
Default is ``1``.

score : str or callable
Expand Down
2 changes: 1 addition & 1 deletion doubleml/rdd/rdd.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ class RDFlex:
Default is ``5``.

n_rep : int
Number of repetitons for the sample splitting.
Number of repetitions for the sample splitting.
Default is ``1``.

cutoff : float or int
Expand Down