diff --git a/doc/spec/model_selection.rst b/doc/spec/model_selection.rst new file mode 100644 index 000000000..17f4d75e1 --- /dev/null +++ b/doc/spec/model_selection.rst @@ -0,0 +1,43 @@ +.. _model_selection: + +================= +Model Selection +================= + +Estimators that derive from :class:`._OrthoLearner` fit first stage nuisance models on different folds of the data and then fit a final model. +In many cases it will make sense to perform model selection over a number of first-stage models, and the library facilitates this by allowing +a flexible specification of the first-stage models, as any of the following: + + * An sklearn-compatible estimator + + * If the estimator is a known class that performs its own hyperparameter selection via cross-validation (such as :class:`~sklearn.linear_model.LassoCV`), + then this will be done once and then the selected hyperparameters will be used when cross-fitting on each fold + + * If a custom class is used, then it should support a `fit` method and either a `predict` method if the target is continuous or `predict_proba` if the target is discrete. + + * One of the following strings; the exact set of models supported by each of these keywords may vary depending on the version of our package: + + ``"linear"`` + Selects over linear models regularized by L1 or L2 norm + + ``"poly"`` + Selects over regularized linear models with polynomial features of different degrees + + ``"forest"`` + Selects over random forest models + + ``"gbf"`` + Selects over gradient boosting models + + ``"nnet"`` + Selects over neural network models + + ``"automl"`` + Selects over all of the above (note that this will be potentially time consuming) + + * A list of any of the above + + * An implementation of :class:`.ModelSelector`, which is a class that supports a two-stage model selection and fitting process + (this is used internally by our library and is not generally intended to be used directly by end users). + +Most subclasses also use the string `"auto"`` as a special default value to automatically select a model from an appropriate smaller subset of models than would be generated by "automl". diff --git a/doc/spec/spec.rst b/doc/spec/spec.rst index a85af4e08..38917c742 100644 --- a/doc/spec/spec.rst +++ b/doc/spec/spec.rst @@ -12,6 +12,7 @@ EconML User Guide estimation_iv estimation_dynamic inference + model_selection interpretability federated_learning references diff --git a/econml/_ortho_learner.py b/econml/_ortho_learner.py index 9423617c2..4db1e2921 100644 --- a/econml/_ortho_learner.py +++ b/econml/_ortho_learner.py @@ -28,6 +28,7 @@ class in this module implements the general logic in a very versatile way from collections import namedtuple from warnings import warn from abc import abstractmethod +from typing import List, Union import inspect from collections import defaultdict import re @@ -85,23 +86,17 @@ def _fit_fold(model, train_idxs, test_idxs, calculate_scores, args, kwargs): -Tuple containing: nuisance_temp (tuple): Predictions or values of interest from the model. fitted_model: The fitted model after training. - test_idxs (array-like): Indices of the test data. score_temp (tuple or None): Scores calculated after fitting if `calculate_scores` is True, otherwise None. """ model = clone(model, safe=False) - if len(np.intersect1d(train_idxs, test_idxs)) > 0: - raise AttributeError( - "Invalid crossfitting fold structure. Train and test indices of each fold must be disjoint. {},{}".format( - train_idxs, test_idxs)) - args_train = tuple(var[train_idxs] if var is not None else None for var in args) args_test = tuple(var[test_idxs] if var is not None else None for var in args) kwargs_train = {key: var[train_idxs] for key, var in kwargs.items()} kwargs_test = {key: var[test_idxs] for key, var in kwargs.items()} - model.train(False, *args_train, **kwargs_train) + model.train(False, None, *args_train, **kwargs_train) nuisance_temp = model.predict(*args_test, **kwargs_test) if not isinstance(nuisance_temp, tuple): @@ -113,21 +108,22 @@ def _fit_fold(model, train_idxs, test_idxs, calculate_scores, args, kwargs): if not isinstance(score_temp, tuple): score_temp = (score_temp,) - return nuisance_temp, model, test_idxs, (score_temp if calculate_scores else None) + return nuisance_temp, model, (score_temp if calculate_scores else None) -def _crossfit(model: ModelSelector, folds, use_ray, ray_remote_fun_option, *args, **kwargs): +def _crossfit(models: Union[ModelSelector, List[ModelSelector]], folds, use_ray, ray_remote_fun_option, + *args, **kwargs): """ General crossfit based calculation of nuisance parameters. Parameters ---------- - model : ModelSelector - An object that has train and predict methods. - The train method must take an 'is_selecting' argument first, and then - accept positional arguments `args` and keyword arguments `kwargs`; the predict method - just takes those `args` and `kwargs`. The train - method selects or estimates a model of the nuisance function, based on the input + models : ModelSelector or List[ModelSelector] + One or more objects that have train and predict methods. + The train method must take an 'is_selecting' argument first, a set of folds second + (which will be None when not selecting) and then accept positional arguments `args` + and keyword arguments `kwargs`; the predict method just takes those `args` and `kwargs`. + The train method selects or estimates a model of the nuisance function, based on the input data to fit. Predict evaluates the fitted nuisance function on the input data to predict. folds : list of tuple or None @@ -179,11 +175,14 @@ def _crossfit(model: ModelSelector, folds, use_ray, ray_remote_fun_option, *args class Wrapper: def __init__(self, model): self._model = model - def train(self, is_selecting, X, y, W=None): + def train(self, is_selecting, folds, X, y, W=None): self._model.fit(X, y) return self def predict(self, X, y, W=None): return self._model.predict(X) + @property + def needs_fit(self): + return False np.random.seed(123) X = np.random.normal(size=(5000, 3)) y = X[:, 0] + np.random.normal(size=(5000,)) @@ -206,65 +205,109 @@ def predict(self, X, y, W=None): model_list = [] kwargs = filter_none_kwargs(**kwargs) - model.train(True, *args, **kwargs) - - calculate_scores = hasattr(model, 'score') - # remove None arguments - - if folds is None: # skip crossfitting - model_list.append(clone(model, safe=False)) - model_list[0].train(True, *args, **kwargs) - model_list[0].train(False, *args, **kwargs) # fit the selected model - nuisances = model_list[0].predict(*args, **kwargs) - scores = model_list[0].score(*args, **kwargs) if calculate_scores else None - - if not isinstance(nuisances, tuple): - nuisances = (nuisances,) - if not isinstance(scores, tuple): - scores = (scores,) - - # scores entries should be lists of scores, so make each entry a singleton list - scores = tuple([s] for s in scores) - - first_arr = args[0] if args else kwargs.items()[0][1] - return nuisances, model_list, np.arange(first_arr.shape[0]), scores - - folds = list(folds) - fold_refs = [] - if use_ray: - # Adding the kwargs to ray object store to be used by remote functions for each fold to avoid IO overhead - ray_args = ray.put(kwargs) + + n = (args[0] if args else kwargs.items()[0][1]).shape[0] + + # fully materialize folds so that they can be reused across models + # and precompute fitted indices so that we fail fast if there's an issue with them + if folds is not None: + folds = list(folds) + fitted_inds = [] for idx, (train_idxs, test_idxs) in enumerate(folds): - fold_refs.append( - ray.remote(_fit_fold).options(**ray_remote_fun_option).remote(model, train_idxs, test_idxs, - calculate_scores, args, ray_args)) - fitted_inds = [] - for idx, (train_idxs, test_idxs) in enumerate(folds): - if use_ray: - nuisance_temp, model, test_idxs, score_temp = ray.get(fold_refs[idx]) + if len(np.intersect1d(train_idxs, test_idxs)) > 0: + raise AttributeError(f"Invalid crossfitting fold structure. Train and test indices of fold {idx+1} " + f"are not disjoint: {train_idxs}, {test_idxs}") + common_idxs = np.intersect1d(fitted_inds, test_idxs) + if len(common_idxs) > 0: + raise AttributeError(f"Invalid crossfitting fold structure. The indexes {common_idxs} in fold {idx+1} " + f"have appeared in previous folds") + fitted_inds = np.concatenate((fitted_inds, test_idxs)) + fitted_inds = np.sort(fitted_inds.astype(int)) + else: + fold_vals = [(np.arange(n), np.arange(n))] + fitted_inds = np.arange(n) + + accumulated_nuisances = () + # NOTE: if any model is missing scores we will just return None even if another model + # has scores. this is because we don't know how many scores are missing + # for the models that are missing them, so we don't know how to pad the array + calculate_scores = True + accumulated_scores = () + + # for convenience we allos a single model to be passed in lieu of a singleton list + # in that case, we will also unwrap the model output + unwrap_model_output = False + if not isinstance(models, list): + unwrap_model_output = True + models = [models] + + for model in models: + # when there is more than one model, nuisances from previous models + # come first as positional arguments + accumulated_args = accumulated_nuisances + args + if model.needs_fit: + model.train(True, fold_vals if folds is None else folds, *accumulated_args, **kwargs) + + calculate_scores &= hasattr(model, 'score') + + model_list.append([]) # add a new empty list of clones for this model + + if folds is None: # skip crossfitting + model_list[-1].append(clone(model, safe=False)) + model_list[-1][0].train(False, None, *accumulated_args, **kwargs) # fit the selected model + nuisances = model_list[-1][0].predict(*accumulated_args, **kwargs) + if not isinstance(nuisances, tuple): + nuisances = (nuisances,) + + if calculate_scores: + scores = model_list[-1][0].score(*accumulated_args, **kwargs) + if not isinstance(scores, tuple): + scores = (scores,) + # scores entries should be lists of scores, so make each entry a singleton list + scores = tuple([s] for s in scores) + else: - nuisance_temp, model, test_idxs, score_temp = _fit_fold(model, train_idxs, test_idxs, - calculate_scores, args, kwargs) + fold_refs = [] + if use_ray: + # Adding the kwargs to ray object store to be used by remote functions + # for each fold to avoid IO overhead + ray_args = ray.put(kwargs) + for idx, (train_idxs, test_idxs) in enumerate(folds): + fold_refs.append( + ray.remote(_fit_fold).options(**ray_remote_fun_option).remote(model, train_idxs, test_idxs, + calculate_scores, + accumulated_args, ray_args)) + for idx, (train_idxs, test_idxs) in enumerate(folds): + if use_ray: + nuisance_temp, model_out, score_temp = ray.get(fold_refs[idx]) + else: + nuisance_temp, model_out, score_temp = _fit_fold(model, train_idxs, test_idxs, + calculate_scores, accumulated_args, kwargs) - if len(np.intersect1d(fitted_inds, test_idxs)) > 0: - raise AttributeError( - "Invalid crossfitting fold structure. The same index appears in two test folds.") - fitted_inds = np.concatenate((fitted_inds, test_idxs)) - if idx == 0: - nuisances = tuple([np.full((args[0].shape[0],) + nuis.shape[1:], np.nan) for nuis in nuisance_temp]) + if idx == 0: + nuisances = tuple([np.full((n,) + nuis.shape[1:], np.nan) + for nuis in nuisance_temp]) - for it, nuis in enumerate(nuisance_temp): - nuisances[it][test_idxs] = nuis + for it, nuis in enumerate(nuisance_temp): + nuisances[it][test_idxs] = nuis - if calculate_scores: - if idx == 0: - scores = tuple([] for _ in score_temp) - for it, score in enumerate(score_temp): - scores[it].append(score) + if calculate_scores: + if idx == 0: + scores = tuple([] for _ in score_temp) + for it, score in enumerate(score_temp): + scores[it].append(score) - model_list.append(model) + model_list[-1].append(model_out) - return nuisances, model_list, np.sort(fitted_inds.astype(int)), (scores if calculate_scores else None) + accumulated_nuisances += nuisances + if calculate_scores: + accumulated_scores += scores + else: + accumulated_scores = None + + if unwrap_model_output: + model_list = model_list[0] + return accumulated_nuisances, model_list, fitted_inds, accumulated_scores CachedValues = namedtuple('CachedValues', ['nuisances', @@ -403,12 +446,15 @@ class ModelNuisance: def __init__(self, model_t, model_y): self._model_t = model_t self._model_y = model_y - def train(self, is_selecting, Y, T, W=None): + def train(self, is_selecting, folds, Y, T, W=None): self._model_t.fit(W, T) self._model_y.fit(W, Y) return self def predict(self, Y, T, W=None): return Y - self._model_y.predict(W), T - self._model_t.predict(W) + @property + def needs_fit(self): + return False class ModelFinal: def __init__(self): return @@ -457,12 +503,15 @@ class ModelNuisance: def __init__(self, model_t, model_y): self._model_t = model_t self._model_y = model_y - def train(self, is_selecting, Y, T, W=None): + def train(self, is_selecting, folds, Y, T, W=None): self._model_t.fit(W, np.matmul(T, np.arange(1, T.shape[1]+1))) self._model_y.fit(W, Y) return self def predict(self, Y, T, W=None): return Y - self._model_y.predict(W), T - self._model_t.predict_proba(W)[:, 1:] + @property + def needs_fit(self): + return False class ModelFinal: def __init__(self): return @@ -554,11 +603,11 @@ def _gen_ortho_learner_model_nuisance(self): Returns ------- - model_nuisance: selector - The selector for fitting the nuisance function. The returned estimator must implement + model_nuisance: list of selector + The selector(s) for fitting the nuisance function. The returned estimators must implement `train` and `predict` methods that both have signatures:: - model_nuisance.train(is_selecting, Y, T, X=X, W=W, Z=Z, + model_nuisance.train(is_selecting, folds, Y, T, X=X, W=W, Z=Z, sample_weight=sample_weight) model_nuisance.predict(Y, T, X=X, W=W, Z=Z, sample_weight=sample_weight) @@ -805,7 +854,7 @@ def _fit_nuisances(Y, T, X, W, Z, sample_weight, groups): for nuisance_mc_variants in zip(*all_nuisances)) else: raise ValueError( - "Parameter `mc_agg` must be one of {'mean', 'median'}. Got {}".format(self.mc_agg)) + f"Parameter `mc_agg` must be one of {{'mean', 'median'}}. Got {self.mc_agg}") Y, T, X, W, Z, sample_weight, freq_weight, sample_var = (self._subinds_check_none(arr, fitted_inds) for arr in (Y, T, X, W, Z, sample_weight, @@ -1035,26 +1084,45 @@ def score(self, Y, T, X=None, W=None, Z=None, sample_weight=None, groups=None): if self.discrete_outcome: Y = self.outcome_transformer.transform(Y).reshape(-1, 1) n_iters = len(self._models_nuisance) - n_splits = len(self._models_nuisance[0]) + # self._models_nuisance will be a list of lists or a list of list of lists + # so we use self._ortho_learner_model_nuisance to determine the nesting level + expand_list = not isinstance(self._ortho_learner_model_nuisance, list) + if expand_list: + n_selectors = 1 + n_splits = len(self._models_nuisance[0]) + else: + n_selectors = len(self._models_nuisance[0]) + n_splits = len(self._models_nuisance[0][0]) - # for each mc iteration - for i, models_nuisances in enumerate(self._models_nuisance): - # for each model under cross fit setting - for j, mdl in enumerate(models_nuisances): - nuisance_temp = mdl.predict(Y, T, **filter_none_kwargs(X=X, W=W, Z=Z, groups=groups)) - if not isinstance(nuisance_temp, tuple): - nuisance_temp = (nuisance_temp,) + kwargs = filter_none_kwargs(X=X, W=W, Z=Z, groups=groups) - if i == 0 and j == 0: - nuisances = [np.zeros((n_iters * n_splits,) + nuis.shape) for nuis in nuisance_temp] + accumulated_nuisances = [] - for it, nuis in enumerate(nuisance_temp): - nuisances[it][j * n_iters + i] = nuis + for sel in range(n_selectors): + accumulated_args = tuple(accumulated_nuisances) + (Y, T) + # for each mc iteration + for i, models_nuisances in enumerate(self._models_nuisance): + if expand_list: + models_nuisances = [models_nuisances] + + # for each model under cross fit setting + for j, mdl in enumerate(models_nuisances[sel]): + nuisance_temp = mdl.predict(*accumulated_args, **kwargs) + if not isinstance(nuisance_temp, tuple): + nuisance_temp = (nuisance_temp,) + + if i == 0 and j == 0: + nuisances = [np.zeros((n_iters * n_splits,) + nuis.shape) for nuis in nuisance_temp] + + for it, nuis in enumerate(nuisance_temp): + nuisances[it][j * n_iters + i] = nuis + + for it in range(len(nuisances)): + nuisances[it] = np.mean(nuisances[it], axis=0) - for it in range(len(nuisances)): - nuisances[it] = np.mean(nuisances[it], axis=0) + accumulated_nuisances += nuisances - return self._ortho_learner_model_final.score(Y, T, nuisances=nuisances, + return self._ortho_learner_model_final.score(Y, T, nuisances=accumulated_nuisances, **filter_none_kwargs(X=X, W=W, Z=Z, sample_weight=sample_weight, groups=groups)) diff --git a/econml/dml/_rlearner.py b/econml/dml/_rlearner.py index b4c346b26..1d6318481 100644 --- a/econml/dml/_rlearner.py +++ b/econml/dml/_rlearner.py @@ -1,484 +1,493 @@ -# Copyright (c) PyWhy contributors. All rights reserved. -# Licensed under the MIT License. - -""" - -The R Learner is an approach for estimating flexible non-parametric models -of conditional average treatment effects in the setting with no unobserved confounders. -The method is based on the idea of Neyman orthogonality and estimates a CATE -whose mean squared error is robust to the estimation errors of auxiliary submodels -that also need to be estimated from data: - - 1) the outcome or regression model - 2) the treatment or propensity or policy or logging policy model - -References ----------- - -Xinkun Nie, Stefan Wager (2017). Quasi-Oracle Estimation of Heterogeneous Treatment Effects. - https://arxiv.org/abs/1712.04912 - -Dylan Foster, Vasilis Syrgkanis (2019). Orthogonal Statistical Learning. - ACM Conference on Learning Theory. https://arxiv.org/abs/1901.09036 - -Chernozhukov et al. (2017). Double/debiased machine learning for treatment and structural parameters. - The Econometrics Journal. https://arxiv.org/abs/1608.00060 -""" - -from abc import abstractmethod -import numpy as np -import copy -from warnings import warn - -from ..sklearn_extensions.model_selection import ModelSelector -from ..utilities import (shape, reshape, ndim, hstack, filter_none_kwargs, _deprecate_positional) -from sklearn.linear_model import LinearRegression -from sklearn.base import clone -from .._ortho_learner import _OrthoLearner - - -class _ModelNuisance(ModelSelector): - """ - Nuisance model fits the model_y and model_t at fit time and at predict time - calculates the residual Y and residual T based on the fitted models and returns - the residuals as two nuisance parameters. - """ - - def __init__(self, model_y: ModelSelector, model_t: ModelSelector): - self._model_y = model_y - self._model_t = model_t - - def train(self, is_selecting, Y, T, X=None, W=None, Z=None, sample_weight=None, groups=None): - assert Z is None, "Cannot accept instrument!" - self._model_t.train(is_selecting, X, W, T, **filter_none_kwargs(sample_weight=sample_weight, groups=groups)) - self._model_y.train(is_selecting, X, W, Y, **filter_none_kwargs(sample_weight=sample_weight, groups=groups)) - return self - - def score(self, Y, T, X=None, W=None, Z=None, sample_weight=None, groups=None): - # note that groups are not passed to score because they are only used for fitting - T_score = self._model_t.score(X, W, T, **filter_none_kwargs(sample_weight=sample_weight)) - Y_score = self._model_y.score(X, W, Y, **filter_none_kwargs(sample_weight=sample_weight)) - return Y_score, T_score - - def predict(self, Y, T, X=None, W=None, Z=None, sample_weight=None, groups=None): - Y_pred = self._model_y.predict(X, W) - T_pred = self._model_t.predict(X, W) - if (X is None) and (W is None): # In this case predict above returns a single row - Y_pred = np.tile(Y_pred.reshape(1, -1), (Y.shape[0], 1)) - T_pred = np.tile(T_pred.reshape(1, -1), (T.shape[0], 1)) - Y_res = Y - Y_pred.reshape(Y.shape) - T_res = T - T_pred.reshape(T.shape) - return Y_res, T_res - - -class _ModelFinal: - """ - Final model at fit time, fits a residual on residual regression with a heterogeneous coefficient - that depends on X, i.e. - - .. math :: - Y - E[Y | X, W] = \\theta(X) \\cdot (T - E[T | X, W]) + \\epsilon - - and at predict time returns :math:`\\theta(X)`. The score method returns the MSE of this final - residual on residual regression. - """ - - def __init__(self, model_final): - self._model_final = model_final - - def fit(self, Y, T, X=None, W=None, Z=None, nuisances=None, - sample_weight=None, freq_weight=None, sample_var=None, groups=None): - Y_res, T_res = nuisances - self._model_final.fit(X, T, T_res, Y_res, sample_weight=sample_weight, - freq_weight=freq_weight, sample_var=sample_var) - return self - - def predict(self, X=None): - return self._model_final.predict(X) - - def score(self, Y, T, X=None, W=None, Z=None, nuisances=None, sample_weight=None, groups=None): - Y_res, T_res = nuisances - if Y_res.ndim == 1: - Y_res = Y_res.reshape((-1, 1)) - if T_res.ndim == 1: - T_res = T_res.reshape((-1, 1)) - effects = self._model_final.predict(X).reshape((-1, Y_res.shape[1], T_res.shape[1])) - Y_res_pred = np.einsum('ijk,ik->ij', effects, T_res).reshape(Y_res.shape) - if sample_weight is not None: - return np.mean(np.average((Y_res - Y_res_pred) ** 2, weights=sample_weight, axis=0)) - else: - return np.mean((Y_res - Y_res_pred) ** 2) - - -class _RLearner(_OrthoLearner): - """ - Base class for CATE learners that residualize treatment and outcome and run residual on residual regression. - The estimator is a special of an :class:`._OrthoLearner` estimator, - so it follows the two - stage process, where a set of nuisance functions are estimated in the first stage in a crossfitting - manner and a final stage estimates the CATE model. See the documentation of - :class:`._OrthoLearner` for a description of this two stage process. - - In this estimator, the CATE is estimated by using the following estimating equations: - - .. math :: - Y - \\E[Y | X, W] = \\Theta(X) \\cdot (T - \\E[T | X, W]) + \\epsilon - - Thus if we estimate the nuisance functions :math:`q(X, W) = \\E[Y | X, W]` and - :math:`f(X, W)=\\E[T | X, W]` in the first stage, we can estimate the final stage cate for each - treatment t, by running a regression, minimizing the residual on residual square loss: - - .. math :: - \\hat{\\theta} = \\arg\\min_{\\Theta}\ - \\E_n\\left[ (\\tilde{Y} - \\Theta(X) \\cdot \\tilde{T})^2 \\right] - - Where :math:`\\tilde{Y}=Y - \\E[Y | X, W]` and :math:`\\tilde{T}=T-\\E[T | X, W]` denotes the - residual outcome and residual treatment. - - Parameters - ---------- - discrete_outcome: bool - Whether the outcome should be treated as binary - - discrete_treatment: bool - Whether the treatment values should be treated as categorical, rather than continuous, quantities - - treatment_featurizer : :term:`transformer` or None - Must support fit_transform and transform. Used to create composite treatment in the final CATE regression. - The final CATE will be trained on the outcome of featurizer.fit_transform(T). - If featurizer=None, then CATE is trained on T. - - categories: 'auto' or list - The categories to use when encoding discrete treatments (or 'auto' to use the unique sorted values). - The first category will be treated as the control treatment. - - cv: int, cross-validation generator or an iterable - Determines the cross-validation splitting strategy. - Possible inputs for cv are: - - - None, to use the default 3-fold cross-validation, - - integer, to specify the number of folds. - - :term:`CV splitter` - - An iterable yielding (train, test) splits as arrays of indices. - - For integer/None inputs, if the treatment is discrete - :class:`~sklearn.model_selection.StratifiedKFold` is used, else, - :class:`~sklearn.model_selection.KFold` is used - (with a random shuffle in either case). - - Unless an iterable is used, we call `split(concat[W, X], T)` to generate the splits. If all - W, X are None, then we call `split(ones((T.shape[0], 1)), T)`. - - random_state: int, :class:`~numpy.random.mtrand.RandomState` instance or None - If int, random_state is the seed used by the random number generator; - If :class:`~numpy.random.mtrand.RandomState` instance, random_state is the random number generator; - If None, the random number generator is the :class:`~numpy.random.mtrand.RandomState` instance used - by :mod:`np.random`. - - mc_iters: int, optional - The number of times to rerun the first stage models to reduce the variance of the nuisances. - - mc_agg: {'mean', 'median'}, default 'mean' - How to aggregate the nuisance value for each sample across the `mc_iters` monte carlo iterations of - cross-fitting. - - allow_missing: bool - Whether to allow missing values in X, W. If True, will need to supply nuisance models that can handle - missing values. - - use_ray: bool, default False - Whether to use Ray to speed up the cross-fitting step. - - ray_remote_func_options : dict, optional - Options to pass to ray.remote function decorator. - see more at https://docs.ray.io/en/latest/ray-core/api/doc/ray.remote.html - - Examples - -------- - - The example code below implements a very simple version of the double machine learning - method on top of the :class:`._RLearner` class, for expository purposes. - For a more elaborate implementation of a Double Machine Learning child class of the class - checkout :class:`.DML` and its child classes: - - .. testcode:: - - import numpy as np - from sklearn.linear_model import LinearRegression - from econml.dml._rlearner import _RLearner - from econml.sklearn_extensions.model_selection import SingleModelSelector - from sklearn.base import clone - class ModelFirst: - def __init__(self, model): - self._model = clone(model, safe=False) - def fit(self, X, W, Y, sample_weight=None): - self._model.fit(np.hstack([X, W]), Y) - return self - def predict(self, X, W): - return self._model.predict(np.hstack([X, W])) - class ModelSelector(SingleModelSelector): - def __init__(self, model): - self._model = ModelFirst(model) - def train(self, is_selecting, X, W, Y, sample_weight=None): - self._model.fit(X, W, Y, sample_weight=sample_weight) - return self - @property - def best_model(self): - return self._model - @property - def best_score(self): - return 0 - class ModelFinal: - def fit(self, X, T, T_res, Y_res, sample_weight=None, freq_weight=None, sample_var=None): - self.model = LinearRegression(fit_intercept=False).fit(X * T_res.reshape(-1, 1), - Y_res) - return self - def predict(self, X): - return self.model.predict(X) - class RLearner(_RLearner): - def _gen_model_y(self): - return ModelSelector(LinearRegression()) - def _gen_model_t(self): - return ModelSelector(LinearRegression()) - def _gen_rlearner_model_final(self): - return ModelFinal() - np.random.seed(123) - X = np.random.normal(size=(1000, 3)) - y = X[:, 0] + X[:, 1] + np.random.normal(0, 0.01, size=(1000,)) - est = RLearner(cv=2, discrete_outcome=False, discrete_treatment=False, - treatment_featurizer=None, categories='auto', random_state=None) - est.fit(y, X[:, 0], X=np.ones((X.shape[0], 1)), W=X[:, 1:]) - - >>> est.const_marginal_effect(np.ones((1,1))) - array([0.999631...]) - >>> est.effect(np.ones((1,1)), T0=0, T1=10) - array([9.996314...]) - >>> est.score(y, X[:, 0], X=np.ones((X.shape[0], 1)), W=X[:, 1:]) - 9.73638006...e-05 - >>> est.rlearner_model_final_.model - LinearRegression(fit_intercept=False) - >>> est.rlearner_model_final_.model.coef_ - array([0.999631...]) - >>> est.score_ - 9.82623204...e-05 - >>> [mdl._model for mdls in est.models_y for mdl in mdls] - [LinearRegression(), LinearRegression()] - >>> [mdl._model for mdls in est.models_t for mdl in mdls] - [LinearRegression(), LinearRegression()] - - Attributes - ---------- - models_y: nested list of objects of type(model_y) - A nested list of instances of the model_y object. Number of sublist equals to number of monte carlo - iterations, each element in the sublist corresponds to a crossfitting - fold and is the model instance that was fitted for that training fold. - models_t: nested list of objects of type(model_t) - A nested list of instances of the model_t object. Number of sublist equals to number of monte carlo - iterations, each element in the sublist corresponds to a crossfitting - fold and is the model instance that was fitted for that training fold. - rlearner_model_final_ : object of type(model_final) - An instance of the model_final object that was fitted after calling fit. - score_ : float - The MSE in the final residual on residual regression - nuisance_scores_y : nested list of float - The out-of-sample scores for each outcome model - nuisance_scores_t : nested list of float - The out-of-sample scores for each treatment model - - .. math:: - \\frac{1}{n} \\sum_{i=1}^n (Y_i - \\hat{E}[Y|X_i, W_i]\ - - \\hat{\\theta}(X_i)\\cdot (T_i - \\hat{E}[T|X_i, W_i]))^2 - - If `sample_weight` is not None at fit time, then a weighted average is returned. If the outcome Y - is multidimensional, then the average of the MSEs for each dimension of Y is returned. - """ - - def __init__(self, - *, - discrete_outcome, - discrete_treatment, - treatment_featurizer, - categories, - cv, - random_state, - mc_iters=None, - mc_agg='mean', - allow_missing=False, - use_ray=False, - ray_remote_func_options=None): - super().__init__(discrete_outcome=discrete_outcome, - discrete_treatment=discrete_treatment, - treatment_featurizer=treatment_featurizer, - discrete_instrument=False, # no instrument, so doesn't matter - categories=categories, - cv=cv, - random_state=random_state, - mc_iters=mc_iters, - mc_agg=mc_agg, - allow_missing=allow_missing, - use_ray=use_ray, - ray_remote_func_options=ray_remote_func_options) - - @abstractmethod - def _gen_model_y(self): - """ - Returns - ------- - model_y: selector for the estimator of E[Y | X, W] - The estimator for fitting the response to the features and controls. Must implement - `fit` and `predict` methods. Unlike sklearn estimators both methods must - take an extra second argument (the controls), i.e. :: - - model_y.fit(X, W, Y, sample_weight=sample_weight) - model_y.predict(X, W) - """ - raise NotImplementedError("Abstract method") - - @abstractmethod - def _gen_model_t(self): - """ - Returns - ------- - model_t: selector for the estimator of E[T | X, W] - The estimator for fitting the treatment to the features and controls. Must implement - `fit` and `predict` methods. Unlike sklearn estimators both methods must - take an extra second argument (the controls), i.e. :: - - model_t.fit(X, W, T, sample_weight=sample_weight) - model_t.predict(X, W) - """ - raise NotImplementedError("Abstract method") - - @abstractmethod - def _gen_rlearner_model_final(self): - """ - Returns - ------- - model_final: estimator for fitting the response residuals to the features and treatment residuals - Must implement `fit` and `predict` methods. Unlike sklearn estimators the fit methods must - take an extra second argument (the treatment residuals). Predict, on the other hand, - should just take the features and return the constant marginal effect. More, concretely:: - - model_final.fit(X, T_res, Y_res, - sample_weight=sample_weight, freq_weight=freq_weight, sample_var=sample_var) - model_final.predict(X) - """ - raise NotImplementedError("Abstract method") - - def _gen_ortho_learner_model_nuisance(self): - return _ModelNuisance(self._gen_model_y(), self._gen_model_t()) - - def _gen_ortho_learner_model_final(self): - return _ModelFinal(self._gen_rlearner_model_final()) - - def fit(self, Y, T, *, X=None, W=None, sample_weight=None, freq_weight=None, sample_var=None, groups=None, - cache_values=False, inference=None): - """ - Estimate the counterfactual model from data, i.e. estimates function :math:`\\theta(\\cdot)`. - - Parameters - ---------- - Y: (n, d_y) matrix or vector of length n - Outcomes for each sample - T: (n, d_t) matrix or vector of length n - Treatments for each sample - X:(n, d_x) matrix, optional - Features for each sample - W:(n, d_w) matrix, optional - Controls for each sample - sample_weight : (n,) array_like, optional - Individual weights for each sample. If None, it assumes equal weight. - freq_weight: (n, ) array_like of int, optional - Weight for the observation. Observation i is treated as the mean - outcome of freq_weight[i] independent observations. - When ``sample_var`` is not None, this should be provided. - sample_var : {(n,), (n, d_y)} nd array_like, optional - Variance of the outcome(s) of the original freq_weight[i] observations that were used to - compute the mean outcome represented by observation i. - groups: (n,) vector, optional - All rows corresponding to the same group will be kept together during splitting. - If groups is not None, the `cv` argument passed to this class's initializer - must support a 'groups' argument to its split method. - cache_values: bool, default False - Whether to cache inputs and first stage results, which will allow refitting a different final model - inference: str, :class:`.Inference` instance, or None - Method for performing inference. This estimator supports 'bootstrap' - (or an instance of:class:`.BootstrapInference`). - - Returns - ------- - self: _RLearner instance - """ - # Replacing fit from _OrthoLearner, to enforce Z=None and improve the docstring - return super().fit(Y, T, X=X, W=W, - sample_weight=sample_weight, freq_weight=freq_weight, sample_var=sample_var, groups=groups, - cache_values=cache_values, - inference=inference) - - def score(self, Y, T, X=None, W=None, sample_weight=None): - """ - Score the fitted CATE model on a new data set. Generates nuisance parameters - for the new data set based on the fitted residual nuisance models created at fit time. - It uses the mean prediction of the models fitted by the different crossfit folds. - Then calculates the MSE of the final residual Y on residual T regression. - - If model_final does not have a score method, then it raises an :exc:`.AttributeError` - - Parameters - ---------- - Y: (n, d_y) matrix or vector of length n - Outcomes for each sample - T: (n, d_t) matrix or vector of length n - Treatments for each sample - X:(n, d_x) matrix, optional - Features for each sample - W:(n, d_w) matrix, optional - Controls for each sample - sample_weight:(n,) vector, optional - Weights for each samples - - - Returns - ------- - score: float - The MSE of the final CATE model on the new data. - """ - # Replacing score from _OrthoLearner, to enforce Z=None and improve the docstring - return super().score(Y, T, X=X, W=W, sample_weight=sample_weight) - - @property - def rlearner_model_final_(self): - # NOTE: important to get parent's wrapped copy so that - # after training wrapped featurizer is also trained, etc. - return self.ortho_learner_model_final_._model_final - - @property - def models_y(self): - return [[mdl._model_y.best_model for mdl in mdls] for mdls in super().models_nuisance_] - - @property - def models_t(self): - return [[mdl._model_t.best_model for mdl in mdls] for mdls in super().models_nuisance_] - - @property - def nuisance_scores_y(self): - return self.nuisance_scores_[0] - - @property - def nuisance_scores_t(self): - return self.nuisance_scores_[1] - - @property - def residuals_(self): - """ - A tuple (y_res, T_res, X, W), of the residuals from the first stage estimation - along with the associated X and W. Samples are not guaranteed to be in the same - order as the input order. - """ - if not hasattr(self, '_cached_values'): - raise AttributeError("Estimator is not fitted yet!") - if self._cached_values is None: - raise AttributeError("`fit` was called with `cache_values=False`. " - "Set to `True` to enable residual storage.") - Y_res, T_res = self._cached_values.nuisances - return Y_res, T_res, self._cached_values.X, self._cached_values.W +# Copyright (c) PyWhy contributors. All rights reserved. +# Licensed under the MIT License. + +""" + +The R Learner is an approach for estimating flexible non-parametric models +of conditional average treatment effects in the setting with no unobserved confounders. +The method is based on the idea of Neyman orthogonality and estimates a CATE +whose mean squared error is robust to the estimation errors of auxiliary submodels +that also need to be estimated from data: + + 1) the outcome or regression model + 2) the treatment or propensity or policy or logging policy model + +References +---------- + +Xinkun Nie, Stefan Wager (2017). Quasi-Oracle Estimation of Heterogeneous Treatment Effects. + https://arxiv.org/abs/1712.04912 + +Dylan Foster, Vasilis Syrgkanis (2019). Orthogonal Statistical Learning. + ACM Conference on Learning Theory. https://arxiv.org/abs/1901.09036 + +Chernozhukov et al. (2017). Double/debiased machine learning for treatment and structural parameters. + The Econometrics Journal. https://arxiv.org/abs/1608.00060 +""" + +from abc import abstractmethod +import numpy as np +import copy +from warnings import warn + +from ..sklearn_extensions.model_selection import ModelSelector +from ..utilities import (shape, reshape, ndim, hstack, filter_none_kwargs, _deprecate_positional) +from sklearn.linear_model import LinearRegression +from sklearn.base import clone +from .._ortho_learner import _OrthoLearner + + +class _ModelNuisance(ModelSelector): + """ + Nuisance model fits the model_y and model_t at fit time and at predict time + calculates the residual Y and residual T based on the fitted models and returns + the residuals as two nuisance parameters. + """ + + def __init__(self, model_y: ModelSelector, model_t: ModelSelector): + self._model_y = model_y + self._model_t = model_t + + def train(self, is_selecting, folds, Y, T, X=None, W=None, Z=None, sample_weight=None, groups=None): + assert Z is None, "Cannot accept instrument!" + self._model_t.train(is_selecting, folds, X, W, T, ** + filter_none_kwargs(sample_weight=sample_weight, groups=groups)) + self._model_y.train(is_selecting, folds, X, W, Y, ** + filter_none_kwargs(sample_weight=sample_weight, groups=groups)) + return self + + def score(self, Y, T, X=None, W=None, Z=None, sample_weight=None, groups=None): + # note that groups are not passed to score because they are only used for fitting + T_score = self._model_t.score(X, W, T, **filter_none_kwargs(sample_weight=sample_weight)) + Y_score = self._model_y.score(X, W, Y, **filter_none_kwargs(sample_weight=sample_weight)) + return Y_score, T_score + + def predict(self, Y, T, X=None, W=None, Z=None, sample_weight=None, groups=None): + Y_pred = self._model_y.predict(X, W) + T_pred = self._model_t.predict(X, W) + if (X is None) and (W is None): # In this case predict above returns a single row + Y_pred = np.tile(Y_pred.reshape(1, -1), (Y.shape[0], 1)) + T_pred = np.tile(T_pred.reshape(1, -1), (T.shape[0], 1)) + Y_res = Y - Y_pred.reshape(Y.shape) + T_res = T - T_pred.reshape(T.shape) + return Y_res, T_res + + @property + def needs_fit(self): + return self._model_y.needs_fit or self._model_t.needs_fit + + +class _ModelFinal: + """ + Final model at fit time, fits a residual on residual regression with a heterogeneous coefficient + that depends on X, i.e. + + .. math :: + Y - E[Y | X, W] = \\theta(X) \\cdot (T - E[T | X, W]) + \\epsilon + + and at predict time returns :math:`\\theta(X)`. The score method returns the MSE of this final + residual on residual regression. + """ + + def __init__(self, model_final): + self._model_final = model_final + + def fit(self, Y, T, X=None, W=None, Z=None, nuisances=None, + sample_weight=None, freq_weight=None, sample_var=None, groups=None): + Y_res, T_res = nuisances + self._model_final.fit(X, T, T_res, Y_res, sample_weight=sample_weight, + freq_weight=freq_weight, sample_var=sample_var) + return self + + def predict(self, X=None): + return self._model_final.predict(X) + + def score(self, Y, T, X=None, W=None, Z=None, nuisances=None, sample_weight=None, groups=None): + Y_res, T_res = nuisances + if Y_res.ndim == 1: + Y_res = Y_res.reshape((-1, 1)) + if T_res.ndim == 1: + T_res = T_res.reshape((-1, 1)) + effects = self._model_final.predict(X).reshape((-1, Y_res.shape[1], T_res.shape[1])) + Y_res_pred = np.einsum('ijk,ik->ij', effects, T_res).reshape(Y_res.shape) + if sample_weight is not None: + return np.mean(np.average((Y_res - Y_res_pred) ** 2, weights=sample_weight, axis=0)) + else: + return np.mean((Y_res - Y_res_pred) ** 2) + + +class _RLearner(_OrthoLearner): + """ + Base class for CATE learners that residualize treatment and outcome and run residual on residual regression. + The estimator is a special of an :class:`._OrthoLearner` estimator, + so it follows the two + stage process, where a set of nuisance functions are estimated in the first stage in a crossfitting + manner and a final stage estimates the CATE model. See the documentation of + :class:`._OrthoLearner` for a description of this two stage process. + + In this estimator, the CATE is estimated by using the following estimating equations: + + .. math :: + Y - \\E[Y | X, W] = \\Theta(X) \\cdot (T - \\E[T | X, W]) + \\epsilon + + Thus if we estimate the nuisance functions :math:`q(X, W) = \\E[Y | X, W]` and + :math:`f(X, W)=\\E[T | X, W]` in the first stage, we can estimate the final stage cate for each + treatment t, by running a regression, minimizing the residual on residual square loss: + + .. math :: + \\hat{\\theta} = \\arg\\min_{\\Theta}\ + \\E_n\\left[ (\\tilde{Y} - \\Theta(X) \\cdot \\tilde{T})^2 \\right] + + Where :math:`\\tilde{Y}=Y - \\E[Y | X, W]` and :math:`\\tilde{T}=T-\\E[T | X, W]` denotes the + residual outcome and residual treatment. + + Parameters + ---------- + discrete_outcome: bool + Whether the outcome should be treated as binary + + discrete_treatment: bool + Whether the treatment values should be treated as categorical, rather than continuous, quantities + + treatment_featurizer : :term:`transformer` or None + Must support fit_transform and transform. Used to create composite treatment in the final CATE regression. + The final CATE will be trained on the outcome of featurizer.fit_transform(T). + If featurizer=None, then CATE is trained on T. + + categories: 'auto' or list + The categories to use when encoding discrete treatments (or 'auto' to use the unique sorted values). + The first category will be treated as the control treatment. + + cv: int, cross-validation generator or an iterable + Determines the cross-validation splitting strategy. + Possible inputs for cv are: + + - None, to use the default 3-fold cross-validation, + - integer, to specify the number of folds. + - :term:`CV splitter` + - An iterable yielding (train, test) splits as arrays of indices. + + For integer/None inputs, if the treatment is discrete + :class:`~sklearn.model_selection.StratifiedKFold` is used, else, + :class:`~sklearn.model_selection.KFold` is used + (with a random shuffle in either case). + + Unless an iterable is used, we call `split(concat[W, X], T)` to generate the splits. If all + W, X are None, then we call `split(ones((T.shape[0], 1)), T)`. + + random_state: int, :class:`~numpy.random.mtrand.RandomState` instance or None + If int, random_state is the seed used by the random number generator; + If :class:`~numpy.random.mtrand.RandomState` instance, random_state is the random number generator; + If None, the random number generator is the :class:`~numpy.random.mtrand.RandomState` instance used + by :mod:`np.random`. + + mc_iters: int, optional + The number of times to rerun the first stage models to reduce the variance of the nuisances. + + mc_agg: {'mean', 'median'}, default 'mean' + How to aggregate the nuisance value for each sample across the `mc_iters` monte carlo iterations of + cross-fitting. + + allow_missing: bool + Whether to allow missing values in X, W. If True, will need to supply nuisance models that can handle + missing values. + + use_ray: bool, default False + Whether to use Ray to speed up the cross-fitting step. + + ray_remote_func_options : dict, optional + Options to pass to ray.remote function decorator. + see more at https://docs.ray.io/en/latest/ray-core/api/doc/ray.remote.html + + Examples + -------- + + The example code below implements a very simple version of the double machine learning + method on top of the :class:`._RLearner` class, for expository purposes. + For a more elaborate implementation of a Double Machine Learning child class of the class + checkout :class:`.DML` and its child classes: + + .. testcode:: + + import numpy as np + from sklearn.linear_model import LinearRegression + from econml.dml._rlearner import _RLearner + from econml.sklearn_extensions.model_selection import SingleModelSelector + from sklearn.base import clone + class ModelFirst: + def __init__(self, model): + self._model = clone(model, safe=False) + def fit(self, X, W, Y, sample_weight=None): + self._model.fit(np.hstack([X, W]), Y) + return self + def predict(self, X, W): + return self._model.predict(np.hstack([X, W])) + class ModelSelector(SingleModelSelector): + def __init__(self, model): + self._model = ModelFirst(model) + def train(self, is_selecting, folds, X, W, Y, sample_weight=None): + self._model.fit(X, W, Y, sample_weight=sample_weight) + return self + @property + def best_model(self): + return self._model + @property + def best_score(self): + return 0 + @property + def needs_fit(self): + return False + class ModelFinal: + def fit(self, X, T, T_res, Y_res, sample_weight=None, freq_weight=None, sample_var=None): + self.model = LinearRegression(fit_intercept=False).fit(X * T_res.reshape(-1, 1), + Y_res) + return self + def predict(self, X): + return self.model.predict(X) + class RLearner(_RLearner): + def _gen_model_y(self): + return ModelSelector(LinearRegression()) + def _gen_model_t(self): + return ModelSelector(LinearRegression()) + def _gen_rlearner_model_final(self): + return ModelFinal() + np.random.seed(123) + X = np.random.normal(size=(1000, 3)) + y = X[:, 0] + X[:, 1] + np.random.normal(0, 0.01, size=(1000,)) + est = RLearner(cv=2, discrete_outcome=False, discrete_treatment=False, + treatment_featurizer=None, categories='auto', random_state=None) + est.fit(y, X[:, 0], X=np.ones((X.shape[0], 1)), W=X[:, 1:]) + + >>> est.const_marginal_effect(np.ones((1,1))) + array([0.999631...]) + >>> est.effect(np.ones((1,1)), T0=0, T1=10) + array([9.996314...]) + >>> est.score(y, X[:, 0], X=np.ones((X.shape[0], 1)), W=X[:, 1:]) + 9.73638006...e-05 + >>> est.rlearner_model_final_.model + LinearRegression(fit_intercept=False) + >>> est.rlearner_model_final_.model.coef_ + array([0.999631...]) + >>> est.score_ + 9.82623204...e-05 + >>> [mdl._model for mdls in est.models_y for mdl in mdls] + [LinearRegression(), LinearRegression()] + >>> [mdl._model for mdls in est.models_t for mdl in mdls] + [LinearRegression(), LinearRegression()] + + Attributes + ---------- + models_y: nested list of objects of type(model_y) + A nested list of instances of the model_y object. Number of sublist equals to number of monte carlo + iterations, each element in the sublist corresponds to a crossfitting + fold and is the model instance that was fitted for that training fold. + models_t: nested list of objects of type(model_t) + A nested list of instances of the model_t object. Number of sublist equals to number of monte carlo + iterations, each element in the sublist corresponds to a crossfitting + fold and is the model instance that was fitted for that training fold. + rlearner_model_final_ : object of type(model_final) + An instance of the model_final object that was fitted after calling fit. + score_ : float + The MSE in the final residual on residual regression + nuisance_scores_y : nested list of float + The out-of-sample scores for each outcome model + nuisance_scores_t : nested list of float + The out-of-sample scores for each treatment model + + .. math:: + \\frac{1}{n} \\sum_{i=1}^n (Y_i - \\hat{E}[Y|X_i, W_i]\ + - \\hat{\\theta}(X_i)\\cdot (T_i - \\hat{E}[T|X_i, W_i]))^2 + + If `sample_weight` is not None at fit time, then a weighted average is returned. If the outcome Y + is multidimensional, then the average of the MSEs for each dimension of Y is returned. + """ + + def __init__(self, + *, + discrete_outcome, + discrete_treatment, + treatment_featurizer, + categories, + cv, + random_state, + mc_iters=None, + mc_agg='mean', + allow_missing=False, + use_ray=False, + ray_remote_func_options=None): + super().__init__(discrete_outcome=discrete_outcome, + discrete_treatment=discrete_treatment, + treatment_featurizer=treatment_featurizer, + discrete_instrument=False, # no instrument, so doesn't matter + categories=categories, + cv=cv, + random_state=random_state, + mc_iters=mc_iters, + mc_agg=mc_agg, + allow_missing=allow_missing, + use_ray=use_ray, + ray_remote_func_options=ray_remote_func_options) + + @abstractmethod + def _gen_model_y(self): + """ + Returns + ------- + model_y: selector for the estimator of E[Y | X, W] + The estimator for fitting the response to the features and controls. Must implement + `fit` and `predict` methods. Unlike sklearn estimators both methods must + take an extra second argument (the controls), i.e. :: + + model_y.fit(X, W, Y, sample_weight=sample_weight) + model_y.predict(X, W) + """ + raise NotImplementedError("Abstract method") + + @abstractmethod + def _gen_model_t(self): + """ + Returns + ------- + model_t: selector for the estimator of E[T | X, W] + The estimator for fitting the treatment to the features and controls. Must implement + `fit` and `predict` methods. Unlike sklearn estimators both methods must + take an extra second argument (the controls), i.e. :: + + model_t.fit(X, W, T, sample_weight=sample_weight) + model_t.predict(X, W) + """ + raise NotImplementedError("Abstract method") + + @abstractmethod + def _gen_rlearner_model_final(self): + """ + Returns + ------- + model_final: estimator for fitting the response residuals to the features and treatment residuals + Must implement `fit` and `predict` methods. Unlike sklearn estimators the fit methods must + take an extra second argument (the treatment residuals). Predict, on the other hand, + should just take the features and return the constant marginal effect. More, concretely:: + + model_final.fit(X, T_res, Y_res, + sample_weight=sample_weight, freq_weight=freq_weight, sample_var=sample_var) + model_final.predict(X) + """ + raise NotImplementedError("Abstract method") + + def _gen_ortho_learner_model_nuisance(self): + return _ModelNuisance(self._gen_model_y(), self._gen_model_t()) + + def _gen_ortho_learner_model_final(self): + return _ModelFinal(self._gen_rlearner_model_final()) + + def fit(self, Y, T, *, X=None, W=None, sample_weight=None, freq_weight=None, sample_var=None, groups=None, + cache_values=False, inference=None): + """ + Estimate the counterfactual model from data, i.e. estimates function :math:`\\theta(\\cdot)`. + + Parameters + ---------- + Y: (n, d_y) matrix or vector of length n + Outcomes for each sample + T: (n, d_t) matrix or vector of length n + Treatments for each sample + X:(n, d_x) matrix, optional + Features for each sample + W:(n, d_w) matrix, optional + Controls for each sample + sample_weight : (n,) array_like, optional + Individual weights for each sample. If None, it assumes equal weight. + freq_weight: (n, ) array_like of int, optional + Weight for the observation. Observation i is treated as the mean + outcome of freq_weight[i] independent observations. + When ``sample_var`` is not None, this should be provided. + sample_var : {(n,), (n, d_y)} nd array_like, optional + Variance of the outcome(s) of the original freq_weight[i] observations that were used to + compute the mean outcome represented by observation i. + groups: (n,) vector, optional + All rows corresponding to the same group will be kept together during splitting. + If groups is not None, the `cv` argument passed to this class's initializer + must support a 'groups' argument to its split method. + cache_values: bool, default False + Whether to cache inputs and first stage results, which will allow refitting a different final model + inference: str, :class:`.Inference` instance, or None + Method for performing inference. This estimator supports 'bootstrap' + (or an instance of:class:`.BootstrapInference`). + + Returns + ------- + self: _RLearner instance + """ + # Replacing fit from _OrthoLearner, to enforce Z=None and improve the docstring + return super().fit(Y, T, X=X, W=W, + sample_weight=sample_weight, freq_weight=freq_weight, sample_var=sample_var, groups=groups, + cache_values=cache_values, + inference=inference) + + def score(self, Y, T, X=None, W=None, sample_weight=None): + """ + Score the fitted CATE model on a new data set. Generates nuisance parameters + for the new data set based on the fitted residual nuisance models created at fit time. + It uses the mean prediction of the models fitted by the different crossfit folds. + Then calculates the MSE of the final residual Y on residual T regression. + + If model_final does not have a score method, then it raises an :exc:`.AttributeError` + + Parameters + ---------- + Y: (n, d_y) matrix or vector of length n + Outcomes for each sample + T: (n, d_t) matrix or vector of length n + Treatments for each sample + X:(n, d_x) matrix, optional + Features for each sample + W:(n, d_w) matrix, optional + Controls for each sample + sample_weight:(n,) vector, optional + Weights for each samples + + + Returns + ------- + score: float + The MSE of the final CATE model on the new data. + """ + # Replacing score from _OrthoLearner, to enforce Z=None and improve the docstring + return super().score(Y, T, X=X, W=W, sample_weight=sample_weight) + + @property + def rlearner_model_final_(self): + # NOTE: important to get parent's wrapped copy so that + # after training wrapped featurizer is also trained, etc. + return self.ortho_learner_model_final_._model_final + + @property + def models_y(self): + return [[mdl._model_y.best_model for mdl in mdls] for mdls in super().models_nuisance_] + + @property + def models_t(self): + return [[mdl._model_t.best_model for mdl in mdls] for mdls in super().models_nuisance_] + + @property + def nuisance_scores_y(self): + return self.nuisance_scores_[0] + + @property + def nuisance_scores_t(self): + return self.nuisance_scores_[1] + + @property + def residuals_(self): + """ + A tuple (y_res, T_res, X, W), of the residuals from the first stage estimation + along with the associated X and W. Samples are not guaranteed to be in the same + order as the input order. + """ + if not hasattr(self, '_cached_values'): + raise AttributeError("Estimator is not fitted yet!") + if self._cached_values is None: + raise AttributeError("`fit` was called with `cache_values=False`. " + "Set to `True` to enable residual storage.") + Y_res, T_res = self._cached_values.nuisances + return Y_res, T_res, self._cached_values.X, self._cached_values.W diff --git a/econml/dml/causal_forest.py b/econml/dml/causal_forest.py index a3affed39..bbe44008f 100644 --- a/econml/dml/causal_forest.py +++ b/econml/dml/causal_forest.py @@ -268,35 +268,23 @@ class CausalForestDML(_BaseDML): Parameters ---------- - model_y: estimator, {'linear', 'forest'}, list of str/estimator, or 'auto' - Determines how to fit the treatment to the features. - - - If an estimator, will use the model as is for fitting. - - If str, will use model associated with the keyword. + model_y: estimator, default ``'auto'`` + Determines how to fit the outcome to the features. - - 'linear' - LogisticRegressionCV if discrete_outcome=True else WeightedLassoCVWrapper - - 'forest' - RandomForestClassifier if discrete_outcome=True else RandomForestRegressor - - If list, will perform model selection on the supplied list, which can be a mix of str and estimators, \ - and then use the best estimator for fitting. - - If 'auto', model will select over linear and forest models + - If ``'auto'``, the model will be the best-fitting of a set of linear and forest models - User-supplied estimators should support 'fit' and 'predict' methods, - and additionally 'predict_proba' if discrete_outcome=True. + - Otherwise, see :ref:`model_selection` for the range of supported options; + if a single model is specified it should be a classifier if `discrete_outcome` is True + and a regressor otherwise - model_t: estimator, {'linear', 'forest'}, list of str/estimator, or 'auto', default 'auto' - Determines how to fit the treatment to the features. str in a sentence - - - If an estimator, will use the model as is for fitting. - - If str, will use model associated with the keyword. + model_t: estimator, default ``'auto'`` + Determines how to fit the treatment to the features. - - 'linear' - LogisticRegressionCV if discrete_treatment=True else WeightedLassoCVWrapper - - 'forest' - RandomForestClassifier if discrete_treatment=True else RandomForestRegressor - - If list, will perform model selection on the supplied list, which can be a mix of str and estimators, \ - and then use the best estimator for fitting. - - If 'auto', model will select over linear and forest models + - If ``'auto'``, the model will be the best-fitting of a set of linear and forest models - User-supplied estimators should support 'fit' and 'predict' methods, - and additionally 'predict_proba' if discrete_treatment=True. + - Otherwise, see :ref:`model_selection` for the range of supported options; + if a single model is specified it should be a classifier if `discrete_treatment` is True + and a regressor otherwise featurizer : :term:`transformer`, optional Must support fit_transform and transform. Used to create composite features in the final CATE regression. @@ -569,10 +557,10 @@ class CausalForestDML(_BaseDML): est.fit(y, T, X=X, W=None) >>> est.effect(X[:3]) - array([0.88518..., 1.25061..., 0.81112...]) + array([0.62947..., 1.64576..., 0.68496... ]) >>> est.effect_interval(X[:3]) - (array([0.40163..., 0.75023..., 0.46629...]), - array([1.36873..., 1.75099..., 1.15596...])) + (array([0.19136... , 1.17143..., 0.10789...]), + array([1.06758..., 2.12009..., 1.26203...])) Attributes ---------- diff --git a/econml/dml/dml.py b/econml/dml/dml.py index 7c1a2bd2d..884d820e2 100644 --- a/econml/dml/dml.py +++ b/econml/dml/dml.py @@ -82,7 +82,7 @@ def __init__(self, model: SingleModelSelector, discrete_target): self._model = clone(model, safe=False) self._discrete_target = discrete_target - def train(self, is_selecting, X, W, Target, sample_weight=None, groups=None): + def train(self, is_selecting, folds, X, W, Target, sample_weight=None, groups=None): if self._discrete_target: # In this case, the Target is the one-hot-encoding of the treatment variable # We need to go back to the label representation of the one-hot so as to call @@ -92,7 +92,7 @@ def train(self, is_selecting, X, W, Target, sample_weight=None, groups=None): "don't contain all treatments") Target = inverse_onehot(Target) - self._model.train(is_selecting, _combine(X, W, Target.shape[0]), Target, + self._model.train(is_selecting, folds, _combine(X, W, Target.shape[0]), Target, **filter_none_kwargs(groups=groups, sample_weight=sample_weight)) return self @@ -104,6 +104,10 @@ def best_model(self): def best_score(self): return self._model.best_score + @property + def needs_fit(self): + return self._model.needs_fit + def _make_first_stage_selector(model, is_discrete, random_state): if model == 'auto': @@ -354,35 +358,23 @@ class takes as input the parameter `model_t`, which is an arbitrary scikit-learn Parameters ---------- - model_y: estimator, {'linear', 'forest'}, list of str/estimator, or 'auto' - Determines how to fit the treatment to the features. + model_y: estimator, default ``'auto'`` + Determines how to fit the outcome to the features. - - If an estimator, will use the model as is for fitting. - - If str, will use model associated with the keyword. + - If ``'auto'``, the model will be the best-fitting of a set of linear and forest models - - 'linear' - LogisticRegressionCV if discrete_outcome=True else WeightedLassoCVWrapper - - 'forest' - RandomForestClassifier if discrete_outcome=True else RandomForestRegressor - - If list, will perform model selection on the supplied list, which can be a mix of str and estimators, \ - and then use the best estimator for fitting. - - If 'auto', model will select over linear and forest models + - Otherwise, see :ref:`model_selection` for the range of supported options; + if a single model is specified it should be a classifier if `discrete_outcome` is True + and a regressor otherwise - User-supplied estimators should support 'fit' and 'predict' methods, - and additionally 'predict_proba' if discrete_outcome=True. - - model_t: estimator, {'linear', 'forest'}, list of str/estimator, or 'auto + model_t: estimator, default ``'auto'`` Determines how to fit the treatment to the features. - - If an estimator, will use the model as is for fitting. - - If str, will use model associated with the keyword. - - - 'linear' - LogisticRegressionCV if discrete_treatment=True else WeightedLassoCVWrapper - - 'forest' - RandomForestClassifier if discrete_treatment=True else RandomForestRegressor - - If list, will perform model selection on the supplied list, which can be a mix of str and estimators, \ - and then use the best estimator for fitting. - - If 'auto', model will select over linear and forest models + - If ``'auto'``, the model will be the best-fitting of a set of linear and forest models - User-supplied estimators should support 'fit' and 'predict' methods, - and additionally 'predict_proba' if discrete_treatment=True. + - Otherwise, see :ref:`model_selection` for the range of supported options; + if a single model is specified it should be a classifier if `discrete_treatment` is True + and a regressor otherwise model_final: estimator The estimator for fitting the response residuals to the treatment residuals. Must implement @@ -486,19 +478,19 @@ class takes as input the parameter `model_t`, which is an arbitrary scikit-learn est.fit(y, T, X=X, W=None) >>> est.effect(X[:3]) - array([0.65142..., 1.82917..., 0.79287...]) + array([0.63382..., 1.78225..., 0.71859...]) >>> est.effect_interval(X[:3]) - (array([0.28936..., 1.31239..., 0.47626...]), - array([1.01348..., 2.34594..., 1.10949...])) + (array([0.27937..., 1.27619..., 0.42091...]), + array([0.98827... , 2.28831..., 1.01628...])) >>> est.coef_ - array([ 0.32570..., -0.05311..., -0.03973..., 0.01598..., -0.11045...]) + array([ 0.42857..., 0.04488..., -0.03317..., 0.02258..., -0.14875...]) >>> est.coef__interval() - (array([ 0.13791..., -0.20081..., -0.17941..., -0.12073..., -0.25769...]), - array([0.51348..., 0.09458..., 0.09993..., 0.15269..., 0.03679...])) + (array([ 0.25179..., -0.10558..., -0.16723... , -0.11916..., -0.28759...]), + array([ 0.60535..., 0.19536..., 0.10088..., 0.16434..., -0.00990...])) >>> est.intercept_ - 1.02940... + 1.01166... >>> est.intercept__interval() - (0.88754..., 1.17125...) + (0.87125..., 1.15207...) """ def __init__(self, *, @@ -622,35 +614,23 @@ class LinearDML(StatsModelsCateEstimatorMixin, DML): Parameters ---------- - model_y: estimator, {'linear', 'forest'}, list of str/estimator, or 'auto' - Determines how to fit the treatment to the features. - - - If an estimator, will use the model as is for fitting. - - If str, will use model associated with the keyword. + model_y: estimator, default ``'auto'`` + Determines how to fit the outcome to the features. - - 'linear' - LogisticRegressionCV if discrete_outcome=True else WeightedLassoCVWrapper - - 'forest' - RandomForestClassifier if discrete_outcome=True else RandomForestRegressor - - If list, will perform model selection on the supplied list, which can be a mix of str and estimators, \ - and then use the best estimator for fitting. - - If 'auto', model will select over linear and forest models + - If ``'auto'``, the model will be the best-fitting of a set of linear and forest models - User-supplied estimators should support 'fit' and 'predict' methods, - and additionally 'predict_proba' if discrete_outcome=True. + - Otherwise, see :ref:`model_selection` for the range of supported options; + if a single model is specified it should be a classifier if `discrete_outcome` is True + and a regressor otherwise - model_t: estimator, {'linear', 'forest'}, list of str/estimator, or 'auto', default 'auto' + model_t: estimator, default ``'auto'`` Determines how to fit the treatment to the features. - - If an estimator, will use the model as is for fitting. - - If str, will use model associated with the keyword. + - If ``'auto'``, the model will be the best-fitting of a set of linear and forest models - - 'linear' - LogisticRegressionCV if discrete_treatment=True else WeightedLassoCVWrapper - - 'forest' - RandomForestClassifier if discrete_treatment=True else RandomForestRegressor - - If list, will perform model selection on the supplied list, which can be a mix of str and estimators, \ - and then use the best estimator for fitting. - - If 'auto', model will select over linear and forest models - - User-supplied estimators should support 'fit' and 'predict' methods, - and additionally 'predict_proba' if discrete_treatment=True. + - Otherwise, see :ref:`model_selection` for the range of supported options; + if a single model is specified it should be a classifier if `discrete_treatment` is True + and a regressor otherwise featurizer : :term:`transformer`, optional Must support fit_transform and transform. Used to create composite features in the final CATE regression. @@ -743,19 +723,19 @@ class LinearDML(StatsModelsCateEstimatorMixin, DML): est.fit(y, T, X=X, W=None) >>> est.effect(X[:3]) - array([0.60257..., 1.74564..., 0.72062...]) + array([0.49977..., 1.91668..., 0.70799...]) >>> est.effect_interval(X[:3]) - (array([0.25760..., 1.24005..., 0.41770...]), - array([0.94754..., 2.25123..., 1.02354...])) + (array([0.15122..., 1.40176..., 0.40954...]), + array([0.84831..., 2.43159..., 1.00644...])) >>> est.coef_ - array([ 0.41635..., 0.00287..., -0.01831..., -0.01197..., -0.11620...]) + array([ 0.48825..., 0.00105..., 0.00244..., 0.02217..., -0.08471...]) >>> est.coef__interval() - (array([ 0.24496..., -0.13418..., -0.14852..., -0.13947..., -0.25089...]), - array([0.58775..., 0.13993..., 0.11189..., 0.11551..., 0.01848...])) + (array([ 0.30469..., -0.13904..., -0.12790..., -0.11514..., -0.22505... ]), + array([0.67180..., 0.14116..., 0.13278..., 0.15949..., 0.05562...])) >>> est.intercept_ - 0.97162... + 1.01247... >>> est.intercept__interval() - (0.83640..., 1.10684...) + (0.87480..., 1.15015...) """ def __init__(self, *, @@ -869,35 +849,23 @@ class SparseLinearDML(DebiasedLassoCateEstimatorMixin, DML): Parameters ---------- - model_y: estimator, {'linear', 'forest'}, list of str/estimator, or 'auto' - Determines how to fit the treatment to the features. + model_y: estimator, default ``'auto'`` + Determines how to fit the outcome to the features. - - If an estimator, will use the model as is for fitting. - - If str, will use model associated with the keyword. + - If ``'auto'``, the model will be the best-fitting of a set of linear and forest models - - 'linear' - LogisticRegressionCV if discrete_outcome=True else WeightedLassoCVWrapper - - 'forest' - RandomForestClassifier if discrete_outcome=True else RandomForestRegressor - - If list, will perform model selection on the supplied list, which can be a mix of str and estimators, \ - and then use the best estimator for fitting. - - If 'auto', model will select over linear and forest models + - Otherwise, see :ref:`model_selection` for the range of supported options; + if a single model is specified it should be a classifier if `discrete_outcome` is True + and a regressor otherwise - User-supplied estimators should support 'fit' and 'predict' methods, - and additionally 'predict_proba' if discrete_outcome=True. - - model_t: estimator, {'linear', 'forest'}, list of str/estimator, or 'auto', default 'auto' + model_t: estimator, default ``'auto'`` Determines how to fit the treatment to the features. - - If an estimator, will use the model as is for fitting. - - If str, will use model associated with the keyword. - - - 'linear' - LogisticRegressionCV if discrete_treatment=True else WeightedLassoCVWrapper - - 'forest' - RandomForestClassifier if discrete_treatment=True else RandomForestRegressor - - If list, will perform model selection on the supplied list, which can be a mix of str and estimators, \ - and then use the best estimator for fitting. - - If 'auto', model will select over linear and forest models + - If ``'auto'``, the model will be the best-fitting of a set of linear and forest models - User-supplied estimators should support 'fit' and 'predict' methods, - and additionally 'predict_proba' if discrete_treatment=True. + - Otherwise, see :ref:`model_selection` for the range of supported options; + if a single model is specified it should be a classifier if `discrete_treatment` is True + and a regressor otherwise alpha: str or float, default 'auto' CATE L1 regularization applied through the debiased lasso in the final model. @@ -1016,19 +984,19 @@ class SparseLinearDML(DebiasedLassoCateEstimatorMixin, DML): est.fit(y, T, X=X, W=None) >>> est.effect(X[:3]) - array([0.59812..., 1.75138..., 0.71770...]) + array([0.50083..., 1.91663..., 0.70386...]) >>> est.effect_interval(X[:3]) - (array([0.25046..., 1.24249..., 0.42606...]), - array([0.94577..., 2.26028..., 1.00935... ])) + (array([0.14616..., 1.40364..., 0.40674...]), + array([0.85550... , 2.42962... , 1.00099...])) >>> est.coef_ - array([ 0.41820..., 0.00506..., -0.01831..., -0.00778..., -0.11965...]) + array([ 0.49123..., 0.00495..., 0.00007..., 0.02302..., -0.08483...]) >>> est.coef__interval() - (array([ 0.25058..., -0.13713..., -0.15469..., -0.13932..., -0.26252...]), - array([0.58583..., 0.14726..., 0.11806..., 0.12376..., 0.02320...])) + (array([ 0.31323..., -0.13848..., -0.13721..., -0.11141..., -0.22961...]), + array([0.66923..., 0.14839... , 0.13735..., 0.15745..., 0.05993...])) >>> est.intercept_ - 0.97131... + 1.01476... >>> est.intercept__interval() - (0.83363..., 1.10899...) + (0.87620..., 1.15332...) """ def __init__(self, *, @@ -1168,32 +1136,23 @@ class KernelDML(DML): Parameters ---------- - model_y: estimator, {'linear', 'forest'}, list of str/estimator, or 'auto' - Determines how to fit the treatment to the features. - - - If an estimator, will use the model as is for fitting. - - If str, will use model associated with the keyword. + model_y: estimator, default ``'auto'`` + Determines how to fit the outcome to the features. - - 'linear' - LogisticRegressionCV if discrete_outcome=True else WeightedLassoCVWrapper - - 'forest' - RandomForestClassifier if discrete_outcome=True else RandomForestRegressor - - If list, will perform model selection on the supplied list, which can be a mix of str and estimators, \ - and then use the best estimator for fitting. - - If 'auto', model will select over linear and forest models + - If ``'auto'``, the model will be the best-fitting of a set of linear and forest models - User-supplied estimators should support 'fit' and 'predict' methods, - and additionally 'predict_proba' if discrete_outcome=True. + - Otherwise, see :ref:`model_selection` for the range of supported options; + if a single model is specified it should be a classifier if `discrete_outcome` is True + and a regressor otherwise - model_t: estimator, {'linear', 'forest'}, list of str/estimator, or 'auto', default 'auto' + model_t: estimator, default ``'auto'`` Determines how to fit the treatment to the features. - - If an estimator, will use the model as is for fitting. + - If ``'auto'``, the model will be the best-fitting of a set of linear and forest models - - If list, will perform model selection on the supplied list, which can be a mix of str and estimators, \ - and then use the best estimator for fitting. - - If 'auto', model will select over linear and forest models - - User-supplied estimators should support 'fit' and 'predict' methods, - and additionally 'predict_proba' if discrete_treatment=True. + - Otherwise, see :ref:`model_selection` for the range of supported options; + if a single model is specified it should be a classifier if `discrete_treatment` is True + and a regressor otherwise fit_cate_intercept : bool, default True Whether the linear CATE model should have a constant term. @@ -1283,7 +1242,7 @@ class KernelDML(DML): est.fit(y, T, X=X, W=None) >>> est.effect(X[:3]) - array([0.64124..., 1.46561..., 0.68568...]) + array([0.63041..., 1.86098..., 0.74218...]) """ def __init__(self, model_y='auto', model_t='auto', @@ -1393,32 +1352,23 @@ class NonParamDML(_BaseDML): Parameters ---------- - model_y: estimator, {'linear', 'forest'}, list of str/estimator, or 'auto' - Determines how to fit the treatment to the features. + model_y: estimator, default ``'auto'`` + Determines how to fit the outcome to the features. - - If an estimator, will use the model as is for fitting. - - If str, will use model associated with the keyword. + - If ``'auto'``, the model will be the best-fitting of a set of linear and forest models - - 'linear' - LogisticRegressionCV if discrete_outcome=True else WeightedLassoCVWrapper - - 'forest' - RandomForestClassifier if discrete_outcome=True else RandomForestRegressor - - If list, will perform model selection on the supplied list, which can be a mix of str and estimators, \ - and then use the best estimator for fitting. - - If 'auto', model will select over linear and forest models + - Otherwise, see :ref:`model_selection` for the range of supported options; + if a single model is specified it should be a classifier if `discrete_outcome` is True + and a regressor otherwise - User-supplied estimators should support 'fit' and 'predict' methods, - and additionally 'predict_proba' if discrete_outcome=True. - - model_t: estimator, {'linear', 'forest'}, list of str/estimator, or 'auto' + model_t: estimator, default ``'auto'`` Determines how to fit the treatment to the features. - - If an estimator, will use the model as is for fitting. - - - If list, will perform model selection on the supplied list, which can be a mix of str and estimators, \ - and then use the best estimator for fitting. - - If 'auto', model will select over linear and forest models + - If ``'auto'``, the model will be the best-fitting of a set of linear and forest models - User-supplied estimators should support 'fit' and 'predict' methods, - and additionally 'predict_proba' if discrete_treatment=True. + - Otherwise, see :ref:`model_selection` for the range of supported options; + if a single model is specified it should be a classifier if `discrete_treatment` is True + and a regressor otherwise model_final: estimator The estimator for fitting the response residuals to the treatment residuals. Must implement @@ -1515,7 +1465,7 @@ class NonParamDML(_BaseDML): est.fit(y, T, X=X, W=None) >>> est.effect(X[:3]) - array([0.32389..., 0.85703..., 0.97468...]) + array([0.35318..., 1.28760..., 0.83506...]) """ def __init__(self, *, diff --git a/econml/dr/_drlearner.py b/econml/dr/_drlearner.py index 02560fd81..f51bc99eb 100644 --- a/econml/dr/_drlearner.py +++ b/econml/dr/_drlearner.py @@ -72,7 +72,7 @@ def __init__(self, def _combine(self, X, W): return np.hstack([arr for arr in [X, W] if arr is not None]) - def train(self, is_selecting, Y, T, X=None, W=None, *, sample_weight=None, groups=None): + def train(self, is_selecting, folds, Y, T, X=None, W=None, *, sample_weight=None, groups=None): if Y.ndim != 1 and (Y.ndim != 2 or Y.shape[1] != 1): raise ValueError("The outcome matrix must be of shape ({0}, ) or ({0}, 1), " "instead got {1}.".format(len(X), Y.shape)) @@ -84,8 +84,8 @@ def train(self, is_selecting, Y, T, X=None, W=None, *, sample_weight=None, group XW = self._combine(X, W) filtered_kwargs = filter_none_kwargs(sample_weight=sample_weight) - self._model_propensity.train(is_selecting, XW, inverse_onehot(T), groups=groups, **filtered_kwargs) - self._model_regression.train(is_selecting, np.hstack([XW, T]), Y, groups=groups, **filtered_kwargs) + self._model_propensity.train(is_selecting, folds, XW, inverse_onehot(T), groups=groups, **filtered_kwargs) + self._model_regression.train(is_selecting, folds, np.hstack([XW, T]), Y, groups=groups, **filtered_kwargs) return self def score(self, Y, T, X=None, W=None, *, sample_weight=None, groups=None): @@ -126,6 +126,10 @@ def predict(self, Y, T, X=None, W=None, *, sample_weight=None, groups=None): propensities_weight = np.sum(propensities * T_complete, axis=1) return Y_pred.reshape(Y.shape + (T.shape[1] + 1,)), propensities_weight.reshape((n,)) + @property + def needs_fit(self): + return self._model_propensity.needs_fit or self._model_regression.needs_fit + def _make_first_stage_selector(model, is_discrete, random_state): if model == "auto": @@ -247,35 +251,22 @@ class takes as input the parameter ``model_regressor``, which is an arbitrary sc Parameters ---------- - model_propensity : estimator, {'linear', 'forest'}, list of str/estimator, or 'auto', default 'auto' - Estimator for Pr[T=t | X, W]. Trained by regressing treatments on (features, controls) concatenated. - - - If an estimator, will use the model as is for fitting. - - If str, will use model associated with the keyword. + model_propensity: estimator, default ``'auto'`` + Classifier for Pr[T=t | X, W]. Trained by regressing treatments on (features, controls) concatenated. - - 'linear' - LogisticRegressionCV - - 'forest' - RandomForestClassifier - - If list, will perform model selection on the supplied list, which can be a mix of str and estimators, \ - and then use the best estimator for fitting. - - If 'auto', model will select over linear and forest models + - If ``'auto'``, the model will be the best-fitting of a set of linear and forest models - User-supplied estimators should support 'fit' and 'predict', and 'predict_proba'. + - Otherwise, see :ref:`model_selection` for the range of supported options - model_regression : estimator, {'linear', 'forest'}, list of str/estimator, or 'auto' + model_regression: estimator, default ``'auto'`` Estimator for E[Y | X, W, T]. Trained by regressing Y on (features, controls, one-hot-encoded treatments) concatenated. The one-hot-encoding excludes the baseline treatment. - - If an estimator, will use the model as is for fitting. - - If str, will use model associated with the keyword. + - If ``'auto'``, the model will be the best-fitting of a set of linear and forest models - - 'linear' - LogisticRegressionCV if discrete_outcome=True else WeightedLassoCVWrapper - - 'forest' - RandomForestClassifier if discrete_outcome=True else RandomForestRegressor - - If list, will perform model selection on the supplied list, which can be a mix of str and estimators, \ - and then use the best estimator for fitting. - - If 'auto', model will select over linear and forest models - - User-supplied estimators should support 'fit' and 'predict' methods, - and additionally 'predict_proba' if discrete_outcome=True. + - Otherwise, see :ref:`model_selection` for the range of supported options; + if a single model is specified it should be a classifier if `discrete_outcome` is True + and a regressor otherwise model_final : estimator for the final cate model. Trained on regressing the doubly robust potential outcomes @@ -374,29 +365,20 @@ class takes as input the parameter ``model_regressor``, which is an arbitrary sc est.fit(y, T, X=X, W=None) >>> est.const_marginal_effect(X[:2]) - array([[0.520977..., 1.244073...], - [0.365645..., 0.749762...]]) + array([[0.516931..., 0.995704...], + [0.356427..., 0.671870...]]) >>> est.effect(X[:2], T0=0, T1=1) - array([0.520977..., 0.365645...]) + array([0.516931..., 0.356427...]) >>> est.score_ - 3.15958089... + 2.84365756... >>> est.score(y, T, X=X) - 2.60965712... + 1.06259465... >>> est.model_cate(T=1).coef_ - array([0.369069..., 0.016610..., 0.019072...]) + array([ 0.447095..., -0.001013... , 0.018982...]) >>> est.model_cate(T=2).coef_ - array([ 0.768336..., 0.082106..., -0.030475...]) + array([ 0.925055..., -0.012357... , 0.033489...]) >>> est.cate_feature_names() ['X0', 'X1', 'X2'] - >>> [mdl.coef_ for mdls in est.models_regression for mdl in mdls] - [array([ 1.463..., 0.006..., -0.006..., 0.726..., 2.029...]), - array([ 1.466..., -0.002..., 0..., 0.646..., 2.014...])] - >>> [mdl.coef_ for mdls in est.models_propensity for mdl in mdls] - [array([[-0.67903093, 0.04261741, -0.05969718], - [ 0.034..., -0.013..., -0.013...], - [ 0.644..., -0.028..., 0.073...]]), array([[-0.831..., 0.100..., 0.090...], - [ 0.084..., 0.013..., -0.154...], - [ 0.747..., -0.113..., 0.063...]])] Beyond default models: @@ -418,19 +400,19 @@ class takes as input the parameter ``model_regressor``, which is an arbitrary sc est.fit(y, T, X=X, W=None) >>> est.score_ - 3.7... + 1.7... >>> est.const_marginal_effect(X[:3]) - array([[0.64..., 1.23...], - [0.49..., 0.92...], - [0.20..., 0.26...]]) + array([[0.68..., 1.10...], + [0.56..., 0.79... ], + [0.34..., 0.10... ]]) >>> est.model_cate(T=2).coef_ - array([0.72..., 0. , 0. ]) + array([0.74..., 0. , 0. ]) >>> est.model_cate(T=2).intercept_ - 2.0... + 1.9... >>> est.model_cate(T=1).coef_ - array([0.31..., 0.01..., 0.00...]) + array([0.24..., 0.00..., 0. ]) >>> est.model_cate(T=1).intercept_ - 0.97... + 0.94... Attributes ---------- @@ -809,35 +791,22 @@ class LinearDRLearner(StatsModelsCateEstimatorDiscreteMixin, DRLearner): Parameters ---------- - model_propensity : estimator, {'linear', 'forest'}, list of str/estimator, or 'auto', default 'auto' - Estimator for Pr[T=t | X, W]. Trained by regressing treatments on (features, controls) concatenated. - - - If an estimator, will use the model as is for fitting. - - If str, will use model associated with the keyword. + model_propensity: estimator, default ``'auto'`` + Classifier for Pr[T=t | X, W]. Trained by regressing treatments on (features, controls) concatenated. - - 'linear' - LogisticRegressionCV - - 'forest' - RandomForestClassifier - - If list, will perform model selection on the supplied list, which can be a mix of str and estimators, \ - and then use the best estimator for fitting. - - If 'auto', model will select over linear and forest models + - If ``'auto'``, the model will be the best-fitting of a set of linear and forest models - User-supplied estimators should support 'fit' and 'predict', and 'predict_proba'. + - Otherwise, see :ref:`model_selection` for the range of supported options - model_regression : estimator, {'linear', 'forest'}, list of str/estimator, or 'auto' + model_regression: estimator, default ``'auto'`` Estimator for E[Y | X, W, T]. Trained by regressing Y on (features, controls, one-hot-encoded treatments) concatenated. The one-hot-encoding excludes the baseline treatment. - - If an estimator, will use the model as is for fitting. - - If str, will use model associated with the keyword. + - If ``'auto'``, the model will be the best-fitting of a set of linear and forest models - - 'linear' - LogisticRegressionCV if discrete_outcome=True else WeightedLassoCVWrapper - - 'forest' - RandomForestClassifier if discrete_outcome=True else RandomForestRegressor - - If list, will perform model selection on the supplied list, which can be a mix of str and estimators, \ - and then use the best estimator for fitting. - - If 'auto', model will select over linear and forest models - - User-supplied estimators should support 'fit' and 'predict' methods, - and additionally 'predict_proba' if discrete_outcome=True. + - Otherwise, see :ref:`model_selection` for the range of supported options; + if a single model is specified it should be a classifier if `discrete_outcome` is True + and a regressor otherwise featurizer : :term:`transformer`, optional Must support fit_transform and transform. Used to create composite features in the final CATE regression. @@ -924,17 +893,17 @@ class LinearDRLearner(StatsModelsCateEstimatorDiscreteMixin, DRLearner): est.fit(y, T, X=X, W=None) >>> est.effect(X[:3]) - array([0.457602..., 0.335707..., 0.011288...]) + array([ 0.432476..., 0.359739..., -0.085326...]) >>> est.effect_interval(X[:3]) - (array([ 0.164623..., -0.098980..., -0.493464...]), array([0.750582..., 0.77039... , 0.516041...])) + (array([ 0.084145... , -0.178020..., -0.734688...]), array([0.780807..., 0.897500..., 0.564035...])) >>> est.coef_(T=1) - array([0.338061..., 0.025654..., 0.044389...]) + array([ 0.450620..., -0.008792..., 0.075242...]) >>> est.coef__interval(T=1) - (array([ 0.135677..., -0.155845..., -0.143376...]), array([0.540446..., 0.207155..., 0.232155...])) + (array([ 0.156233... , -0.252177..., -0.159805...]), array([0.745007..., 0.234592..., 0.310290...])) >>> est.intercept_(T=1) - 0.78646497... + 0.90916103... >>> est.intercept__interval(T=1) - (0.60344468..., 0.96948526...) + (0.66855287..., 1.14976919...) Attributes ---------- @@ -1101,35 +1070,22 @@ class SparseLinearDRLearner(DebiasedLassoCateEstimatorDiscreteMixin, DRLearner): Parameters ---------- - model_propensity : estimator, {'linear', 'forest'}, list of str/estimator, or 'auto', default 'auto' - Estimator for Pr[T=t | X, W]. Trained by regressing treatments on (features, controls) concatenated. - - - If an estimator, will use the model as is for fitting. - - If str, will use model associated with the keyword. + model_propensity: estimator, default ``'auto'`` + Classifier for Pr[T=t | X, W]. Trained by regressing treatments on (features, controls) concatenated. - - 'linear' - LogisticRegressionCV - - 'forest' - RandomForestClassifier - - If list, will perform model selection on the supplied list, which can be a mix of str and estimators, \ - and then use the best estimator for fitting. - - If 'auto', model will select over linear and forest models + - If ``'auto'``, the model will be the best-fitting of a set of linear and forest models - User-supplied estimators should support 'fit' and 'predict', and 'predict_proba'. + - Otherwise, see :ref:`model_selection` for the range of supported options - model_regression : estimator, {'linear', 'forest'}, list of str/estimator, or 'auto' + model_regression: estimator, default ``'auto'`` Estimator for E[Y | X, W, T]. Trained by regressing Y on (features, controls, one-hot-encoded treatments) concatenated. The one-hot-encoding excludes the baseline treatment. - - If an estimator, will use the model as is for fitting. - - If str, will use model associated with the keyword. - - - 'linear' - LogisticRegressionCV if discrete_outcome=True else WeightedLassoCVWrapper - - 'forest' - RandomForestClassifier if discrete_outcome=True else RandomForestRegressor - - If list, will perform model selection on the supplied list, which can be a mix of str and estimators, \ - and then use the best estimator for fitting. - - If 'auto', model will select over linear and forest models + - If ``'auto'``, the model will be the best-fitting of a set of linear and forest models - User-supplied estimators should support 'fit' and 'predict' methods, - and additionally 'predict_proba' if discrete_outcome=True. + - Otherwise, see :ref:`model_selection` for the range of supported options; + if a single model is specified it should be a classifier if `discrete_outcome` is True + and a regressor otherwise featurizer : :term:`transformer`, optional Must support fit_transform and transform. Used to create composite features in the final CATE regression. @@ -1242,17 +1198,17 @@ class SparseLinearDRLearner(DebiasedLassoCateEstimatorDiscreteMixin, DRLearner): est.fit(y, T, X=X, W=None) >>> est.effect(X[:3]) - array([0.45..., 0.33..., 0.01...]) + array([ 0.43..., 0.35..., -0.08... ]) >>> est.effect_interval(X[:3]) - (array([ 0.11..., -0.13..., -0.54...]), array([0.79..., 0.80..., 0.57...])) + (array([-0.01..., -0.26..., -0.81...]), array([0.87..., 0.98..., 0.65...])) >>> est.coef_(T=1) - array([0.33..., 0.02..., 0.04...]) + array([ 0.44..., -0.00..., 0.07...]) >>> est.coef__interval(T=1) - (array([ 0.14..., -0.15..., -0.14...]), array([0.53..., 0.20..., 0.23...])) + (array([ 0.19... , -0.24..., -0.17...]), array([0.70..., 0.22..., 0.32...])) >>> est.intercept_(T=1) - 0.78... + 0.90... >>> est.intercept__interval(T=1) - (0.60..., 0.96...) + (0.66..., 1.14...) Attributes ---------- @@ -1403,35 +1359,22 @@ class ForestDRLearner(ForestModelFinalCateEstimatorDiscreteMixin, DRLearner): Parameters ---------- - model_propensity : estimator, {'linear', 'forest'}, list of str/estimator, or 'auto', default 'auto' - Estimator for Pr[T=t | X, W]. Trained by regressing treatments on (features, controls) concatenated. + model_propensity: estimator, default ``'auto'`` + Classifier for Pr[T=t | X, W]. Trained by regressing treatments on (features, controls) concatenated. - - If an estimator, will use the model as is for fitting. - - If str, will use model associated with the keyword. + - If ``'auto'``, the model will be the best-fitting of a set of linear and forest models - - 'linear' - LogisticRegressionCV - - 'forest' - RandomForestClassifier - - If list, will perform model selection on the supplied list, which can be a mix of str and estimators, \ - and then use the best estimator for fitting. - - If 'auto', model will select over linear and forest models + - Otherwise, see :ref:`model_selection` for the range of supported options - User-supplied estimators should support 'fit' and 'predict', and 'predict_proba'. - - model_regression : estimator, {'linear', 'forest'}, list of str/estimator, or 'auto' + model_regression: estimator, default ``'auto'`` Estimator for E[Y | X, W, T]. Trained by regressing Y on (features, controls, one-hot-encoded treatments) concatenated. The one-hot-encoding excludes the baseline treatment. - - If an estimator, will use the model as is for fitting. - - If str, will use model associated with the keyword. - - - 'linear' - LogisticRegressionCV if discrete_outcome=True else WeightedLassoCVWrapper - - 'forest' - RandomForestClassifier if discrete_outcome=True else RandomForestRegressor - - If list, will perform model selection on the supplied list, which can be a mix of str and estimators, \ - and then use the best estimator for fitting. - - If 'auto', model will select over linear and forest models + - If ``'auto'``, the model will be the best-fitting of a set of linear and forest models - User-supplied estimators should support 'fit' and 'predict' methods, - and additionally 'predict_proba' if discrete_outcome=True. + - Otherwise, see :ref:`model_selection` for the range of supported options; + if a single model is specified it should be a classifier if `discrete_outcome` is True + and a regressor otherwise discrete_outcome: bool, default False Whether the outcome should be treated as binary diff --git a/econml/iv/dml/_dml.py b/econml/iv/dml/_dml.py index 27e85fe7d..5ce9d4dcb 100644 --- a/econml/iv/dml/_dml.py +++ b/econml/iv/dml/_dml.py @@ -56,15 +56,16 @@ def __init__(self, else: self._model_z_xw = model_z - def train(self, is_selecting, Y, T, X=None, W=None, Z=None, sample_weight=None, groups=None): - self._model_y_xw.train(is_selecting, X=X, W=W, Target=Y, sample_weight=sample_weight, groups=groups) - self._model_t_xw.train(is_selecting, X=X, W=W, Target=T, sample_weight=sample_weight, groups=groups) + def train(self, is_selecting, folds, Y, T, X=None, W=None, Z=None, sample_weight=None, groups=None): + self._model_y_xw.train(is_selecting, folds, X=X, W=W, Target=Y, sample_weight=sample_weight, groups=groups) + self._model_t_xw.train(is_selecting, folds, X=X, W=W, Target=T, sample_weight=sample_weight, groups=groups) if self._projection: # concat W and Z WZ = _combine(W, Z, Y.shape[0]) - self._model_t_xwz.train(is_selecting, X=X, W=WZ, Target=T, sample_weight=sample_weight, groups=groups) + self._model_t_xwz.train(is_selecting, folds, X=X, W=WZ, Target=T, + sample_weight=sample_weight, groups=groups) else: - self._model_z_xw.train(is_selecting, X=X, W=W, Target=Z, sample_weight=sample_weight, groups=groups) + self._model_z_xw.train(is_selecting, folds, X=X, W=W, Target=Z, sample_weight=sample_weight, groups=groups) return self def score(self, Y, T, X=None, W=None, Z=None, sample_weight=None, groups=None): @@ -118,6 +119,12 @@ def predict(self, Y, T, X=None, W=None, Z=None, sample_weight=None, groups=None) Z_res = Z - Z_pred.reshape(Z.shape) return Y_res, T_res, Z_res + @property + def needs_fit(self): + return (self._model_y_xw.needs_fit or self._model_t_xw.needs_fit or + (self._projection and self._model_t_xwz.needs_fit) or + (not self._projection and self._model_z_xw.needs_fit)) + class _OrthoIVModelFinal: def __init__(self, model_final, featurizer, fit_cate_intercept): @@ -204,65 +211,41 @@ class OrthoIV(LinearModelFinalCateEstimatorMixin, _OrthoLearner): Parameters ---------- - model_y_xw : estimator, {'linear', 'forest'}, list of str/estimator, or 'auto' - model to estimate :math:`\\E[Y | X, W]`. - - - If an estimator, will use the model as is for fitting. - - If str, will use model associated with the keyword. - - - 'linear' - LogisticRegressionCV if discrete_outcome=True else WeightedLassoCVWrapper - - 'forest' - RandomForestClassifier if discrete_outcome=True else RandomForestRegressor - - If list, will perform model selection on the supplied list, which can be a mix of str and estimators, \ - and then use the best estimator for fitting. - - If 'auto', model will select over linear and forest models + model_y_xw: estimator, default ``'auto'`` + Determines how to fit the outcome to the features and controls (:math:`\\E[Y | X, W]`). - User-supplied estimators should support 'fit' and 'predict' methods, - and additionally 'predict_proba' if discrete_outcome=True. + - If ``'auto'``, the model will be the best-fitting of a set of linear and forest models - model_t_xw : estimator, {'linear', 'forest'}, list of str/estimator, or 'auto', default 'auto' - model to estimate :math:`\\E[T | X, W]`. + - Otherwise, see :ref:`model_selection` for the range of supported options; + if a single model is specified it should be a classifier if `discrete_outcome` is True + and a regressor otherwise - - If an estimator, will use the model as is for fitting. - - If str, will use model associated with the keyword. + model_t_xw: estimator, default ``'auto'`` + Determines how to fit the treatment to the features and controls (:math:`\\E[T | X, W]`). - - 'linear' - LogisticRegressionCV if discrete_treatment=True else WeightedLassoCVWrapper - - 'forest' - RandomForestClassifier if discrete_treatment=True else RandomForestRegressor - - If list, will perform model selection on the supplied list, which can be a mix of str and estimators, \ - and then use the best estimator for fitting. - - If 'auto', model will select over linear and forest models + - If ``'auto'``, the model will be the best-fitting of a set of linear and forest models - User-supplied estimators should support 'fit' and 'predict' methods, - and additionally 'predict_proba' if discrete_treatment=True. + - Otherwise, see :ref:`model_selection` for the range of supported options; + if a single model is specified it should be a classifier if `discrete_treatment` is True + and a regressor otherwise - model_t_xwz : estimator, {'linear', 'forest'}, list of str/estimator, or 'auto', default 'auto' - model to estimate :math:`\\E[T | X, W, Z]`. + model_t_xwz: estimator, default ``'auto'`` + Determines how to fit the treatment to the features, controls, and instrument (:math:`\\E[T | X, W, Z]`). - - If an estimator, will use the model as is for fitting. - - If str, will use model associated with the keyword. + - If ``'auto'``, the model will be the best-fitting of a set of linear and forest models - - 'linear' - LogisticRegressionCV if discrete_treatment=True else WeightedLassoCVWrapper - - 'forest' - RandomForestClassifier if discrete_treatment=True else RandomForestRegressor - - If list, will perform model selection on the supplied list, which can be a mix of str and estimators, \ - and then use the best estimator for fitting. - - If 'auto', model will select over linear and forest models + - Otherwise, see :ref:`model_selection` for the range of supported options; + if a single model is specified it should be a classifier if `discrete_treatment` is True + and a regressor otherwise - User-supplied estimators should support 'fit' and 'predict' methods, - and additionally 'predict_proba' if discrete_treatment=True. + model_z_xw: estimator, default ``'auto'`` + Determines how to fit the instrument to the features and controls (:math:`\\E[Z | X, W]`). - model_z_xw : estimator, {'linear', 'forest'}, list of str/estimator, or 'auto' - model to estimate :math:`\\E[Z | X, W]`. + - If ``'auto'``, the model will be the best-fitting of a set of linear and forest models - - If an estimator, will use the model as is for fitting. - - If str, will use model associated with the keyword. - - - 'linear' - LogisticRegressionCV if discrete_instrument=True else WeightedLassoCVWrapper - - 'forest' - RandomForestClassifier if discrete_instrument=True else RandomForestRegressor - - If list, will perform model selection on the supplied list, which can be a mix of str and estimators, \ - and then use the best estimator for fitting. - - If 'auto', model will select over linear and forest models - - User-supplied estimators should support 'fit' and 'predict' methods, - and additionally 'predict_proba' if discrete_instrument=True. + - Otherwise, see :ref:`model_selection` for the range of supported options; + if a single model is specified it should be a classifier if `discrete_instrument` is True + and a regressor otherwise projection: bool, default False If True, we fit a slight variant of OrthoIV where we use E[T|X, W, Z] as the instrument as opposed to Z, @@ -369,19 +352,19 @@ def true_heterogeneity_function(X): est.fit(Y=y, T=T, Z=Z, X=X) >>> est.effect(X[:3]) - array([-4.49594..., 5.79852..., -2.88049...]) + array([-4.28045... , 6.02945..., -2.86851...]) >>> est.effect_interval(X[:3]) - (array([-7.40954..., 1.47475..., -5.32889...]), - array([-1.58235..., 10.12229..., -0.43209...])) + (array([-7.20729..., 1.75412..., -5.20897...]), + array([-1.35361..., 10.30478..., -0.52805...])) >>> est.coef_ - array([ 5.27614..., 0.92092..., 0.57579..., -0.22810..., -0.16952...]) + array([ 4.51659..., 0.78512..., 0.23706..., 0.24126... , -0.47167...]) >>> est.coef__interval() - (array([ 3.93362..., -0.22159..., -0.59863..., -1.39139..., -1.34549...]), - array([6.61866..., 2.06345..., 1.75022..., 0.93518..., 1.00644...])) + (array([ 3.15602..., -0.35785..., -0.89798..., -0.90530..., -1.62445...]), + array([5.87715... , 1.92810... , 1.37211..., 1.38783..., 0.68110...])) >>> est.intercept_ - -0.29110... + -0.13672... >>> est.intercept__interval() - (-1.45607..., 0.87386...) + (-1.27036..., 0.99690...) """ def __init__(self, *, @@ -747,12 +730,14 @@ def __init__(self, model_y_xw: ModelSelector, model_t_xw: ModelSelector, model_t self._model_t_xw = model_t_xw self._model_t_xwz = model_t_xwz - def train(self, is_selecting, Y, T, X=None, W=None, Z=None, sample_weight=None, groups=None): - self._model_y_xw.train(is_selecting, X, W, Y, **filter_none_kwargs(sample_weight=sample_weight, groups=groups)) - self._model_t_xw.train(is_selecting, X, W, T, **filter_none_kwargs(sample_weight=sample_weight, groups=groups)) + def train(self, is_selecting, folds, Y, T, X=None, W=None, Z=None, sample_weight=None, groups=None): + self._model_y_xw.train(is_selecting, folds, X, W, Y, ** + filter_none_kwargs(sample_weight=sample_weight, groups=groups)) + self._model_t_xw.train(is_selecting, folds, X, W, T, ** + filter_none_kwargs(sample_weight=sample_weight, groups=groups)) # concat W and Z WZ = _combine(W, Z, Y.shape[0]) - self._model_t_xwz.train(is_selecting, X, WZ, T, + self._model_t_xwz.train(is_selecting, folds, X, WZ, T, **filter_none_kwargs(sample_weight=sample_weight, groups=groups)) return self @@ -788,6 +773,10 @@ def predict(self, Y, T, X=None, W=None, Z=None, sample_weight=None, groups=None) T_res = TXZ_pred.reshape(T.shape) - TX_pred.reshape(T.shape) return Y_res, T_res + @property + def needs_fit(self): + return self._model_y_xw.needs_fit or self._model_t_xw.needs_fit or self._model_t_xwz.needs_fit + class _BaseDMLIVModelFinal(_ModelFinal): """ @@ -1034,50 +1023,32 @@ class DMLIV(_BaseDMLIV): Parameters ---------- - model_y_xw : estimator, {'linear', 'forest'}, list of str/estimator, or 'auto' - model to estimate :math:`\\E[Y | X, W]`. - - - If an estimator, will use the model as is for fitting. - - If str, will use model associated with the keyword. - - - 'linear' - LogisticRegressionCV if discrete_outcome=True else WeightedLassoCVWrapper - - 'forest' - RandomForestClassifier if discrete_outcome=True else RandomForestRegressor - - If list, will perform model selection on the supplied list, which can be a mix of str and estimators, \ - and then use the best estimator for fitting. - - If 'auto', model will select over linear and forest models - - User-supplied estimators should support 'fit' and 'predict' methods, - and additionally 'predict_proba' if discrete_outcome=True. + model_y_xw: estimator, default ``'auto'`` + Determines how to fit the outcome to the features and controls (:math:`\\E[Y | X, W]`). - model_t_xw : estimator, {'linear', 'forest'}, list of str/estimator, or 'auto', default 'auto' - Model to estimate :math:`\\E[T | X, W]`. + - If ``'auto'``, the model will be the best-fitting of a set of linear and forest models - - If an estimator, will use the model as is for fitting. - - If str, will use model associated with the keyword. + - Otherwise, see :ref:`model_selection` for the range of supported options; + if a single model is specified it should be a classifier if `discrete_outcome` is True + and a regressor otherwise - - 'linear' - LogisticRegressionCV if discrete_treatment=True else WeightedLassoCVWrapper - - 'forest' - RandomForestClassifier if discrete_treatment=True else RandomForestRegressor - - If list, will perform model selection on the supplied list, which can be a mix of str and estimators, \ - and then use the best estimator for fitting. - - If 'auto', model will select over linear and forest models + model_t_xw: estimator, default ``'auto'`` + Determines how to fit the treatment to the features and controls (:math:`\\E[T | X, W]`). - User-supplied estimators should support 'fit' and 'predict' methods, - and additionally 'predict_proba' if discrete_treatment=True. + - If ``'auto'``, the model will be the best-fitting of a set of linear and forest models - model_t_xwz : estimator, {'linear', 'forest'}, list of str/estimator, or 'auto', default 'auto' - Model to estimate :math:`\\E[T | X, W, Z]`. + - Otherwise, see :ref:`model_selection` for the range of supported options; + if a single model is specified it should be a classifier if `discrete_treatment` is True + and a regressor otherwise - - If an estimator, will use the model as is for fitting. - - If str, will use model associated with the keyword. + model_t_xwz: estimator, default ``'auto'`` + Determines how to fit the treatment to the features, controls, and instrument (:math:`\\E[T | X, W, Z]`). - - 'linear' - LogisticRegressionCV if discrete_treatment=True else WeightedLassoCVWrapper - - 'forest' - RandomForestClassifier if discrete_treatment=True else RandomForestRegressor - - If list, will perform model selection on the supplied list, which can be a mix of str and estimators, \ - and then use the best estimator for fitting. - - If 'auto', model will select over linear and forest models + - If ``'auto'``, the model will be the best-fitting of a set of linear and forest models - User-supplied estimators should support 'fit' and 'predict' methods, - and additionally 'predict_proba' if discrete_treatment=True. + - Otherwise, see :ref:`model_selection` for the range of supported options; + if a single model is specified it should be a classifier if `discrete_treatment` is True + and a regressor otherwise model_final : estimator (default is :class:`.StatsModelsLinearRegression`) final model that at fit time takes as input :math:`(Y-\\E[Y|X])`, :math:`(\\E[T|X,Z]-\\E[T|X])` and X @@ -1183,11 +1154,11 @@ def true_heterogeneity_function(X): est.fit(Y=y, T=T, Z=Z, X=X) >>> est.effect(X[:3]) - array([-6.83575..., 9.40666..., -4.27123...]) + array([-3.83383..., 5.31902..., -2.78082...]) >>> est.coef_ - array([ 8.07179..., 1.51080..., 0.87328..., -0.06944..., -0.47404...]) + array([ 4.03889..., 0.89335..., 0.12043..., 0.37958..., -0.66097...]) >>> est.intercept_ - -0.20555... + -0.18482... """ @@ -1442,50 +1413,32 @@ class NonParamDMLIV(_BaseDMLIV): Parameters ---------- - model_y_xw : estimator, {'linear', 'forest'}, list of str/estimator, or 'auto' - model to estimate :math:`\\E[Y | X, W]`. - - - If an estimator, will use the model as is for fitting. - - If str, will use model associated with the keyword. - - - 'linear' - LogisticRegressionCV if discrete_outcome=True else WeightedLassoCVWrapper - - 'forest' - RandomForestClassifier if discrete_outcome=True else RandomForestRegressor - - If list, will perform model selection on the supplied list, which can be a mix of str and estimators, \ - and then use the best estimator for fitting. - - If 'auto', model will select over linear and forest models - - User-supplied estimators should support 'fit' and 'predict' methods, - and additionally 'predict_proba' if discrete_outcome=True. + model_y_xw: estimator, default ``'auto'`` + Determines how to fit the outcome to the features and controls (:math:`\\E[Y | X, W]`). - model_t_xw : estimator, {'linear', 'forest'}, list of str/estimator, or 'auto', default 'auto' - Model to estimate :math:`\\E[T | X, W]`. + - If ``'auto'``, the model will be the best-fitting of a set of linear and forest models - - If an estimator, will use the model as is for fitting. - - If str, will use model associated with the keyword. + - Otherwise, see :ref:`model_selection` for the range of supported options; + if a single model is specified it should be a classifier if `discrete_outcome` is True + and a regressor otherwise - - 'linear' - LogisticRegressionCV if discrete_treatment=True else WeightedLassoCVWrapper - - 'forest' - RandomForestClassifier if discrete_treatment=True else RandomForestRegressor - - If list, will perform model selection on the supplied list, which can be a mix of str and estimators, \ - and then use the best estimator for fitting. - - If 'auto', model will select over linear and forest models + model_t_xw: estimator, default ``'auto'`` + Determines how to fit the treatment to the features and controls (:math:`\\E[T | X, W]`). - User-supplied estimators should support 'fit' and 'predict' methods, - and additionally 'predict_proba' if discrete_treatment=True. + - If ``'auto'``, the model will be the best-fitting of a set of linear and forest models - model_t_xwz : estimator, {'linear', 'forest'}, list of str/estimator, or 'auto', default 'auto' - Model to estimate :math:`\\E[T | X, W, Z]`. + - Otherwise, see :ref:`model_selection` for the range of supported options; + if a single model is specified it should be a classifier if `discrete_treatment` is True + and a regressor otherwise - - If an estimator, will use the model as is for fitting. - - If str, will use model associated with the keyword. + model_t_xwz: estimator, default ``'auto'`` + Determines how to fit the treatment to the features, controls, and instrument (:math:`\\E[T | X, W, Z]`). - - 'linear' - LogisticRegressionCV if discrete_treatment=True else WeightedLassoCVWrapper - - 'forest' - RandomForestClassifier if discrete_treatment=True else RandomForestRegressor - - If list, will perform model selection on the supplied list, which can be a mix of str and estimators, \ - and then use the best estimator for fitting. - - If 'auto', model will select over linear and forest models + - If ``'auto'``, the model will be the best-fitting of a set of linear and forest models - User-supplied estimators should support 'fit' and 'predict' methods, - and additionally 'predict_proba' if discrete_treatment=True. + - Otherwise, see :ref:`model_selection` for the range of supported options; + if a single model is specified it should be a classifier if `discrete_treatment` is True + and a regressor otherwise model_final : estimator final model for predicting :math:`\\tilde{Y}` from X with sample weights V(X) @@ -1592,7 +1545,7 @@ def true_heterogeneity_function(X): est.fit(Y=y, T=T, Z=Z, X=X) >>> est.effect(X[:3]) - array([-6.18157..., 8.70189..., -4.06004...]) + array([-5.98517..., 9.03610..., -3.56684...]) """ diff --git a/econml/iv/dr/_dr.py b/econml/iv/dr/_dr.py index 8b205a747..0de96c444 100644 --- a/econml/iv/dr/_dr.py +++ b/econml/iv/dr/_dr.py @@ -44,15 +44,13 @@ def _combine(W, Z, n_samples): class _BaseDRIVNuisanceSelector(ModelSelector): - def __init__(self, *, prel_model_effect, model_y_xw, model_t_xw, model_tz_xw, model_z, - projection, fit_cov_directly, + def __init__(self, *, prel_model_effect, model_y_xw, model_t_xw, model_z, + projection, discrete_treatment, discrete_instrument): self._prel_model_effect = prel_model_effect self._model_y_xw = model_y_xw self._model_t_xw = model_t_xw - self._model_tz_xw = model_tz_xw self._projection = projection - self._fit_cov_directly = fit_cov_directly self._discrete_treatment = discrete_treatment self._discrete_instrument = discrete_instrument if self._projection: @@ -60,92 +58,20 @@ def __init__(self, *, prel_model_effect, model_y_xw, model_t_xw, model_tz_xw, mo else: self._model_z_xw = model_z - def train(self, is_selecting, Y, T, X=None, W=None, Z=None, sample_weight=None, groups=None): + def train(self, is_selecting, folds, Y, T, X=None, W=None, Z=None, sample_weight=None, groups=None): # T and Z only allow single continuous or binary, keep the shape of (n,) for continuous and (n,1) for binary T = T.ravel() if not self._discrete_treatment else T Z = Z.ravel() if not self._discrete_instrument else Z - self._model_y_xw.train(is_selecting, X=X, W=W, Target=Y, sample_weight=sample_weight, groups=groups) - self._model_t_xw.train(is_selecting, X=X, W=W, Target=T, sample_weight=sample_weight, groups=groups) - if is_selecting and self._fit_cov_directly: - # need to fit, too, since we call predict later inside this train method - self._model_t_xw.train(False, X=X, W=W, Target=T, sample_weight=sample_weight, groups=groups) + self._model_y_xw.train(is_selecting, folds, X=X, W=W, Target=Y, sample_weight=sample_weight, groups=groups) + self._model_t_xw.train(is_selecting, folds, X=X, W=W, Target=T, sample_weight=sample_weight, groups=groups) if self._projection: WZ = _combine(W, Z, Y.shape[0]) - self._model_t_xwz.train(is_selecting, X=X, W=WZ, Target=T, sample_weight=sample_weight, groups=groups) - if is_selecting: - # need to fit, too, since we call predict later inside this train method - self._model_t_xwz.train(False, X=X, W=WZ, Target=T, sample_weight=sample_weight, groups=groups) - else: - self._model_z_xw.train(is_selecting, X=X, W=W, Target=Z, sample_weight=sample_weight, groups=groups) - if is_selecting: - # need to fit, too, since we call predict later inside this train method - self._model_z_xw.train(False, X=X, W=W, Target=Z, sample_weight=sample_weight, groups=groups) - - if self._projection: - T_proj = self._model_t_xwz.predict(X, WZ).reshape(T.shape) - if self._fit_cov_directly: - # We're projecting, so we're treating E[T|X,Z] as the instrument (ignoring W for simplicity) - # Then beta(X) = E[T̃ (E[T|X,Z]-E[E[T|X,Z]|X)|X] and we can apply the tower rule several times to get - # = E[(E[T|X,Z]-E[T|X])^2|X] - # and also = E[(E[T|X,Z]-T)^2|X] - # so we can compute it either from (T_proj-T_pred)^2 or from (T_proj-T)^2 - T_pred = self._model_t_xw.predict(X, W) - if X is None and W is None: - T_pred = np.broadcast_to(T_pred, T.shape) - else: - T_pred = T_pred.reshape(T.shape) - target = (T_proj - T_pred)**2 - self._model_tz_xw.train(is_selecting, X=X, W=W, Target=target, - sample_weight=sample_weight, groups=groups) - else: - # return shape (n,) - target = (T * T_proj).reshape(T.shape[0],) - self._model_tz_xw.train(is_selecting, X=X, W=W, Target=target, - sample_weight=sample_weight, groups=groups) + self._model_t_xwz.train(is_selecting, folds, X=X, W=WZ, Target=T, + sample_weight=sample_weight, groups=groups) else: - if self._fit_cov_directly: - Z_pred = self._model_z_xw.predict(X, W) - T_pred = self._model_t_xw.predict(X, W) - - if X is None and W is None: - Z_pred = np.broadcast_to(Z_pred, Z.shape) - T_pred = np.broadcast_to(T_pred, T.shape) - else: - Z_pred = Z_pred.reshape(Z.shape) - T_pred = T_pred.reshape(T.shape) - - Z_res = Z - Z_pred - T_res = T - T_pred - - # need to avoid erroneous broadcasting when one of Z_res or T_res is (n,1) and the other is (n,) - assert Z_res.shape[0] == T_res.shape[0] and (Z_res.ndim == 1 or Z_res.shape[1:] == ( - 1,)) and (T_res.ndim == 1 or T_res.shape[1:] == (1,)) - target_shape = Z_res.shape if Z_res.ndim > 1 else T_res.shape - target = T_res.reshape(target_shape) * Z_res.reshape(target_shape) - # TODO: if the T and Z models overfit, then this will be biased towards 0; - # consider using nested cross-fitting - # a similar comment applies to the projection case - self._model_tz_xw.train(is_selecting, X=X, W=W, Target=target, - sample_weight=sample_weight, groups=groups) - else: - if self._discrete_treatment: - if self._discrete_instrument: - # target will be discrete and will be inversed from FirstStageWrapper, shape (n,1) - target = T * Z - else: - # shape (n,) - target = inverse_onehot(T) * Z - else: - if self._discrete_instrument: - # shape (n,) - target = T * inverse_onehot(Z) - else: - # shape(n,) - target = T * Z - self._model_tz_xw.train(is_selecting, X=X, W=W, Target=target, - sample_weight=sample_weight, groups=groups) + self._model_z_xw.train(is_selecting, folds, X=X, W=W, Target=Z, sample_weight=sample_weight, groups=groups) # TODO: prel_model_effect could allow sample_var and freq_weight? if self._discrete_instrument: @@ -187,14 +113,7 @@ def score(self, Y, T, X=None, W=None, Z=None, sample_weight=None, groups=None): else: t_xwz_score = None - if hasattr(self._model_tz_xw, 'score'): - T_proj = self._model_t_xwz.predict(X, WZ).reshape(T.shape) - # if discrete, return shape (n,1); if continuous return shape (n,) - target = (T * T_proj).reshape(T.shape[0],) - tz_xw_score = self._model_tz_xw.score(X=X, W=W, Target=target, sample_weight=sample_weight) - else: - tz_xw_score = None - return y_xw_score, t_xw_score, t_xwz_score, tz_xw_score, effect_score + return y_xw_score, t_xw_score, t_xwz_score, effect_score else: if hasattr(self._model_z_xw, 'score'): @@ -202,28 +121,11 @@ def score(self, Y, T, X=None, W=None, Z=None, sample_weight=None, groups=None): else: z_xw_score = None - if hasattr(self._model_tz_xw, 'score'): - if self._discrete_treatment: - if self._discrete_instrument: - # target will be discrete and will be inversed from FirstStageWrapper - target = T * Z - else: - target = inverse_onehot(T) * Z - else: - if self._discrete_instrument: - target = T * inverse_onehot(Z) - else: - target = T * Z - tz_xw_score = self._model_tz_xw.score(X=X, W=W, Target=target, sample_weight=sample_weight) - else: - tz_xw_score = None - - return y_xw_score, t_xw_score, z_xw_score, tz_xw_score, effect_score + return y_xw_score, t_xw_score, z_xw_score, effect_score def predict(self, Y, T, X=None, W=None, Z=None, sample_weight=None, groups=None): Y_pred = self._model_y_xw.predict(X, W) T_pred = self._model_t_xw.predict(X, W) - TZ_pred = self._model_tz_xw.predict(X, W) prel_theta = self._prel_model_effect.effect(X) if X is None: @@ -231,7 +133,6 @@ def predict(self, Y, T, X=None, W=None, Z=None, sample_weight=None, groups=None) if W is None: Y_pred = np.tile(Y_pred.reshape(1, -1), (Y.shape[0], 1)) T_pred = np.tile(T_pred.reshape(1, -1), (Y.shape[0], 1)) - TZ_pred = np.tile(TZ_pred.reshape(1, -1), (Y.shape[0], 1)) # for convenience, reshape Z,T to a vector since they are either binary or single dimensional continuous T = T.reshape(T.shape[0],) @@ -239,7 +140,6 @@ def predict(self, Y, T, X=None, W=None, Z=None, sample_weight=None, groups=None) # reshape the predictions Y_pred = Y_pred.reshape(Y.shape) T_pred = T_pred.reshape(T.shape) - TZ_pred = TZ_pred.reshape(T.shape) Y_res = Y - Y_pred T_res = T - T_pred @@ -249,30 +149,135 @@ def predict(self, Y, T, X=None, W=None, Z=None, sample_weight=None, groups=None) WZ = _combine(W, Z, Y.shape[0]) T_proj = self._model_t_xwz.predict(X, WZ).reshape(T.shape) Z_res = T_proj - T_pred + else: + Z_pred = self._model_z_xw.predict(X, W) + if X is None and W is None: + Z_pred = np.tile(Z_pred.reshape(1, -1), (Z.shape[0], 1)) + Z_pred = Z_pred.reshape(Z.shape) + Z_res = Z - Z_pred + + # check nuisances outcome shape + # Y_res could be a vector or 1-dimensional 2d-array + assert T_res.ndim == 1, "Nuisance outcome should be vector!" + assert Z_res.ndim == 1, "Nuisance outcome should be vector!" + + return prel_theta, Y_res, T_res, Z_res + + @property + def needs_fit(self): + return (self._model_y_xw.needs_fit or self._model_t_xw.needs_fit or + (self._projection and self._model_t_xwz.needs_fit) or + (not self._projection and self._model_z_xw.needs_fit)) + + +class _BaseDRIVNuisanceCovarianceSelector(ModelSelector): + def __init__(self, *, model_tz_xw, + projection, fit_cov_directly, + discrete_treatment, discrete_instrument): + self._model_tz_xw = model_tz_xw + self._projection = projection + self._fit_cov_directly = fit_cov_directly + self._discrete_treatment = discrete_treatment + self._discrete_instrument = discrete_instrument + + def _get_target(self, T_res, Z_res, T, Z): + T = T.ravel() if not self._discrete_treatment else T + Z = Z.ravel() if not self._discrete_instrument else Z + if self._projection: + if self._fit_cov_directly: + # We're projecting, so we're treating E[T|X,Z] as the instrument (ignoring W for simplicity) + # Then beta(X) = E[T̃ (E[T|X,Z]-E[E[T|X,Z]|X)|X] and we can apply the tower rule several times to get + # = E[(E[T|X,Z]-E[T|X])^2|X] + # and also = E[(E[T|X,Z]-T)^2|X] + # so we can compute it either from (T_proj-T_pred)^2 or from (T_proj-T)^2 + # The first of these is just Z_res^2 + target = Z_res**2 + else: + # fit on T*T_proj, covariance will be computed by E[T_res * T_proj] = E[T*T_proj] - E[T]^2 + # return shape (n,) + T_pred = T - T_res.reshape(T.shape) + T_proj = T_pred + Z_res.reshape(T.shape) + target = (T * T_proj).reshape(T.shape[0],) + else: + if self._fit_cov_directly: + # we will fit on the covariance (T_res*Z_res) directly + target_shape = Z_res.shape if Z_res.ndim > 1 else T_res.shape + target = T_res.reshape(target_shape) * Z_res.reshape(target_shape) + else: + # fit on TZ, covariance will be computed by E[T_res * Z_res] = TZ_pred - T_pred * Z_pred + if self._discrete_treatment: + if self._discrete_instrument: + # target will be discrete and will be inversed from FirstStageWrapper, shape (n,1) + target = T * Z + else: + # shape (n,) + target = inverse_onehot(T) * Z + else: + if self._discrete_instrument: + # shape (n,) + target = T * inverse_onehot(Z) + else: + # shape(n,) + target = T * Z + return target + + def train(self, is_selecting, folds, + prel_theta, Y_res, T_res, Z_res, + Y, T, X=None, W=None, Z=None, sample_weight=None, groups=None): + # T and Z only allow single continuous or binary, keep the shape of (n,) for continuous and (n,1) for binary + target = self._get_target(T_res, Z_res, T, Z) + self._model_tz_xw.train(is_selecting, folds, X=X, W=W, Target=target, + sample_weight=sample_weight, groups=groups) + + return self + + def score(self, prel_theta, Y_res, T_res, Z_res, Y, T, X=None, W=None, Z=None, sample_weight=None, groups=None): + # T and Z only allow single continuous or binary, keep the shape of (n,) for continuous and (n,1) for binary + if hasattr(self._model_tz_xw, 'score'): + target = self._get_target(T_res, Z_res, T, Z) + tz_xw_score = self._model_tz_xw.score(X=X, W=W, Target=target, sample_weight=sample_weight) + else: + tz_xw_score = None + + return (tz_xw_score,) + + def predict(self, prel_theta, Y_res, T_res, Z_res, Y, T, X=None, W=None, Z=None, sample_weight=None, groups=None): + TZ_pred = self._model_tz_xw.predict(X, W) + + if X is None and W is None: + TZ_pred = np.tile(TZ_pred.reshape(1, -1), (Y.shape[0], 1)) + + # for convenience, reshape Z,T to a vector since they are either binary or single dimensional continuous + T = T.reshape(T.shape[0],) + Z = Z.reshape(Z.shape[0],) + + # reshape the predictions + TZ_pred = TZ_pred.reshape(T.shape) + + if self._projection: if self._fit_cov_directly: cov = TZ_pred else: + T_pred = T - T_res cov = TZ_pred - T_pred**2 # in the projection case, this is a variance and should always be non-negative cov = np.maximum(cov, 0) else: - Z_pred = self._model_z_xw.predict(X, W) - if X is None and W is None: - Z_pred = np.tile(Z_pred.reshape(1, -1), (Z.shape[0], 1)) - Z_pred = Z_pred.reshape(Z.shape) - Z_res = Z - Z_pred if self._fit_cov_directly: cov = TZ_pred else: + T_pred = T - T_res + Z_pred = Z - Z_res cov = TZ_pred - T_pred * Z_pred # check nuisances outcome shape - # Y_res could be a vector or 1-dimensional 2d-array - assert T_res.ndim == 1, "Nuisance outcome should be vector!" - assert Z_res.ndim == 1, "Nuisance outcome should be vector!" assert cov.ndim == 1, "Nuisance outcome should be vector!" - return prel_theta, Y_res, T_res, Z_res, cov + return (cov,) + + @property + def needs_fit(self): + return self._model_tz_xw.needs_fit class _BaseDRIVModelFinal: @@ -693,15 +698,18 @@ def _gen_ortho_learner_model_nuisance(self): is_discrete=self.discrete_instrument, random_state=self.random_state) - return _BaseDRIVNuisanceSelector(prel_model_effect=self._gen_prel_model_effect(), - model_y_xw=model_y_xw, - model_t_xw=model_t_xw, - model_tz_xw=model_tz_xw, - model_z=model_z, - projection=self.projection, - fit_cov_directly=self.fit_cov_directly, - discrete_treatment=self.discrete_treatment, - discrete_instrument=self.discrete_instrument) + return [_BaseDRIVNuisanceSelector(prel_model_effect=self._gen_prel_model_effect(), + model_y_xw=model_y_xw, + model_t_xw=model_t_xw, + model_z=model_z, + projection=self.projection, + discrete_treatment=self.discrete_treatment, + discrete_instrument=self.discrete_instrument), + _BaseDRIVNuisanceCovarianceSelector(model_tz_xw=model_tz_xw, + projection=self.projection, + fit_cov_directly=self.fit_cov_directly, + discrete_treatment=self.discrete_treatment, + discrete_instrument=self.discrete_instrument)] class DRIV(_DRIV): @@ -711,83 +719,51 @@ class DRIV(_DRIV): Parameters ---------- - model_y_xw : estimator, {'linear', 'forest'}, list of str/estimator, or 'auto' - model to estimate :math:`\\E[Y | X, W]`. - - - If an estimator, will use the model as is for fitting. - - If str, will use model associated with the keyword. - - - 'linear' - LogisticRegressionCV if discrete_outcome=True else WeightedLassoCVWrapper - - 'forest' - RandomForestClassifier if discrete_outcome=True else RandomForestRegressor - - If list, will perform model selection on the supplied list, which can be a mix of str and estimators, \ - and then use the best estimator for fitting. - - If 'auto', model will select over linear and forest models - - User-supplied estimators should support 'fit' and 'predict' methods, - and additionally 'predict_proba' if discrete_outcome=True. + model_y_xw: estimator, default ``'auto'`` + Determines how to fit the outcome to the features and controls (:math:`\\E[Y | X, W]`). - model_t_xw : estimator, {'linear', 'forest'}, list of str/estimator, or 'auto', default 'auto' - Model to estimate :math:`\\E[T | X, W]`. + - If ``'auto'``, the model will be the best-fitting of a set of linear and forest models - - If an estimator, will use the model as is for fitting. - - If str, will use model associated with the keyword. + - Otherwise, see :ref:`model_selection` for the range of supported options; + if a single model is specified it should be a classifier if `discrete_outcome` is True + and a regressor otherwise - - 'linear' - LogisticRegressionCV if discrete_treatment=True else WeightedLassoCVWrapper - - 'forest' - RandomForestClassifier if discrete_treatment=True else RandomForestRegressor - - If list, will perform model selection on the supplied list, which can be a mix of str and estimators, \ - and then use the best estimator for fitting. - - If 'auto', model will select over linear and forest models + model_t_xw: estimator, default ``'auto'`` + Determines how to fit the treatment to the features and controls (:math:`\\E[T | X, W]`). - User-supplied estimators should support 'fit' and 'predict' methods, - and additionally 'predict_proba' if discrete_treatment=True. + - If ``'auto'``, the model will be the best-fitting of a set of linear and forest models - model_z_xw : estimator, {'linear', 'forest'}, list of str/estimator, or 'auto' - model to estimate :math:`\\E[Z | X, W]`. + - Otherwise, see :ref:`model_selection` for the range of supported options; + if a single model is specified it should be a classifier if `discrete_treatment` is True + and a regressor otherwise - - If an estimator, will use the model as is for fitting. - - If str, will use model associated with the keyword. + model_z_xw: estimator, default ``'auto'`` + Determines how to fit the instrument to the features and controls (:math:`\\E[Z | X, W]`). - - 'linear' - LogisticRegressionCV if discrete_instrument=True else WeightedLassoCVWrapper - - 'forest' - RandomForestClassifier if discrete_instrument=True else RandomForestRegressor - - If list, will perform model selection on the supplied list, which can be a mix of str and estimators, \ - and then use the best estimator for fitting. - - If 'auto', model will select over linear and forest models + - If ``'auto'``, the model will be the best-fitting of a set of linear and forest models - User-supplied estimators should support 'fit' and 'predict' methods, - and additionally 'predict_proba' if discrete_instrument=True. + - Otherwise, see :ref:`model_selection` for the range of supported options; + if a single model is specified it should be a classifier if `discrete_instrument` is True + and a regressor otherwise - model_t_xwz : estimator, {'linear', 'forest'}, list of str/estimator, or 'auto', default 'auto' - Model to estimate :math:`\\E[T | X, W, Z]`. + model_t_xwz: estimator, default ``'auto'`` + Determines how to fit the treatment to the features, controls, and instrument (:math:`\\E[T | X, W, Z]`). - - If an estimator, will use the model as is for fitting. - - If str, will use model associated with the keyword. + - If ``'auto'``, the model will be the best-fitting of a set of linear and forest models - - 'linear' - LogisticRegressionCV if discrete_treatment=True else WeightedLassoCVWrapper - - 'forest' - RandomForestClassifier if discrete_treatment=True else RandomForestRegressor - - If list, will perform model selection on the supplied list, which can be a mix of str and estimators, \ - and then use the best estimator for fitting. - - If 'auto', model will select over linear and forest models + - Otherwise, see :ref:`model_selection` for the range of supported options; + if a single model is specified it should be a classifier if `discrete_treatment` is True + and a regressor otherwise - User-supplied estimators should support 'fit' and 'predict' methods, - and additionally 'predict_proba' if discrete_treatment=True. + model_tz_xw: estimator, default ``'auto'`` + Determines how to fit the covariance to the features and controls (:math:`\\E[T*Z | X, W]` or + :math:`\\E[\\tilde{T}*\\tilde{Z} | X, W]` depending on `fit_cov_directly`). - model_tz_xw : estimator, {'linear', 'forest'}, list of str/estimator, or 'auto', default 'auto' - model to estimate :math:`\\E[T*Z | X, W]` or :math:`\\E[\\tilde{T}*\\tilde{Z} | X, W]` - depending on `fit_cov_directly`. - Target will be discrete if discrete instrument and discrete treatment with `fit_cov_directly=False`, - else target will be continuous. + - If ``'auto'``, the model will be the best-fitting of a set of linear and forest models - - If an estimator, will use the model as is for fitting. - - If str, will use model associated with the keyword. - - - 'linear' - LogisticRegressionCV if discrete target else WeightedLassoCVWrapper - - 'forest' - RandomForestClassifier if discrete target else RandomForestRegressor - - If list, will perform model selection on the supplied list, which can be a mix of str and estimators, \ - and then use the best estimator for fitting. - - If 'auto', model will select over linear and forest models - - User-supplied estimators should support 'fit' and 'predict' methods, - and additionally 'predict_proba' if discrete target. + - Otherwise, see :ref:`model_selection` for the range of supported options; + if a single model is specified it should be a classifier if `discrete_treatment` is True + and a regressor otherwise fit_cov_directly : bool, default True Whether to fit :math:`\\E[\\tilde{T}*\\tilde{Z} | X, W]` instead of :math:`\\E[T*Z | X, W]`. @@ -934,7 +910,7 @@ def true_heterogeneity_function(X): est.fit(Y=y, T=T, Z=Z, X=X) >>> est.effect(X[:3]) - array([-4.07330..., 6.01693..., -2.71813...]) + array([-4.15079..., 5.99291..., -2.86514...]) """ def __init__(self, *, @@ -1022,6 +998,7 @@ def _gen_prel_model_effect(self): opt_reweighted=self.prel_opt_reweighted, discrete_instrument=self.discrete_instrument, discrete_treatment=self.discrete_treatment, + discrete_outcome=self.discrete_outcome, categories=self.categories, cv=self.prel_cv, mc_iters=self.mc_iters, @@ -1035,6 +1012,7 @@ def _gen_prel_model_effect(self): model_final=clone(self.flexible_model_effect, safe=False), discrete_instrument=self.discrete_instrument, discrete_treatment=self.discrete_treatment, + discrete_outcome=self.discrete_outcome, featurizer=self._gen_featurizer(), categories=self.categories, cv=self.prel_cv, @@ -1111,7 +1089,7 @@ def models_y_xw(self): iterations, each element in the sublist corresponds to a crossfitting fold and is the model instance that was fitted for that training fold. """ - return [[mdl._model_y_xw.best_model._model for mdl in mdls] for mdls in super().models_nuisance_] + return [[mdl._model_y_xw.best_model._model for mdl in mdls[0]] for mdls in super().models_nuisance_] @property def models_t_xw(self): @@ -1125,7 +1103,7 @@ def models_t_xw(self): iterations, each element in the sublist corresponds to a crossfitting fold and is the model instance that was fitted for that training fold. """ - return [[mdl._model_t_xw.best_model._model for mdl in mdls] for mdls in super().models_nuisance_] + return [[mdl._model_t_xw.best_model._model for mdl in mdls[0]] for mdls in super().models_nuisance_] @property def models_z_xw(self): @@ -1141,7 +1119,7 @@ def models_z_xw(self): """ if self.projection: raise AttributeError("Projection model is fitted for instrument! Use models_t_xwz.") - return [[mdl._model_z_xw.best_model._model for mdl in mdls] for mdls in super().models_nuisance_] + return [[mdl._model_z_xw.best_model._model for mdl in mdls[0]] for mdls in super().models_nuisance_] @property def models_t_xwz(self): @@ -1157,7 +1135,7 @@ def models_t_xwz(self): """ if not self.projection: raise AttributeError("Direct model is fitted for instrument! Use models_z_xw.") - return [[mdl._model_t_xwz.best_model._model for mdl in mdls] for mdls in super().models_nuisance_] + return [[mdl._model_t_xwz.best_model._model for mdl in mdls[0]] for mdls in super().models_nuisance_] @property def models_tz_xw(self): @@ -1171,7 +1149,7 @@ def models_tz_xw(self): iterations, each element in the sublist corresponds to a crossfitting fold and is the model instance that was fitted for that training fold. """ - return [[mdl._model_tz_xw.best_model._model for mdl in mdls] for mdls in super().models_nuisance_] + return [[mdl._model_tz_xw.best_model._model for mdl in mdls[1]] for mdls in super().models_nuisance_] @property def models_prel_model_effect(self): @@ -1185,7 +1163,7 @@ def models_prel_model_effect(self): of monte carlo iterations, each element in the sublist corresponds to a crossfitting fold and is the model instance that was fitted for that training fold. """ - return [[mdl._prel_model_effect for mdl in mdls] for mdls in super().models_nuisance_] + return [[mdl._prel_model_effect for mdl in mdls[0]] for mdls in super().models_nuisance_] @property def nuisance_scores_y_xw(self): @@ -1220,16 +1198,16 @@ def nuisance_scores_t_xwz(self): return self.nuisance_scores_[2] @property - def nuisance_scores_tz_xw(self): + def nuisance_scores_prel_model_effect(self): """ - Get the scores for tz_xw model on the out-of-sample training data + Get the scores for prel_model_effect model on the out-of-sample training data """ return self.nuisance_scores_[3] @property - def nuisance_scores_prel_model_effect(self): + def nuisance_scores_tz_xw(self): """ - Get the scores for prel_model_effect model on the out-of-sample training data + Get the scores for tz_xw model on the out-of-sample training data """ return self.nuisance_scores_[4] @@ -1244,83 +1222,51 @@ class LinearDRIV(StatsModelsCateEstimatorMixin, DRIV): Parameters ---------- - model_y_xw : estimator, {'linear', 'forest'}, list of str/estimator, or 'auto' - model to estimate :math:`\\E[Y | X, W]`. + model_y_xw: estimator, default ``'auto'`` + Determines how to fit the outcome to the features and controls (:math:`\\E[Y | X, W]`). - - If an estimator, will use the model as is for fitting. - - If str, will use model associated with the keyword. + - If ``'auto'``, the model will be the best-fitting of a set of linear and forest models - - 'linear' - LogisticRegressionCV if discrete_outcome=True else WeightedLassoCVWrapper - - 'forest' - RandomForestClassifier if discrete_outcome=True else RandomForestRegressor - - If list, will perform model selection on the supplied list, which can be a mix of str and estimators, \ - and then use the best estimator for fitting. - - If 'auto', model will select over linear and forest models + - Otherwise, see :ref:`model_selection` for the range of supported options; + if a single model is specified it should be a classifier if `discrete_outcome` is True + and a regressor otherwise - User-supplied estimators should support 'fit' and 'predict' methods, - and additionally 'predict_proba' if discrete_outcome=True. + model_t_xw: estimator, default ``'auto'`` + Determines how to fit the treatment to the features and controls (:math:`\\E[T | X, W]`). - model_t_xw : estimator, {'linear', 'forest'}, list of str/estimator, or 'auto', default 'auto' - model to estimate :math:`\\E[T | X, W]`. + - If ``'auto'``, the model will be the best-fitting of a set of linear and forest models - - If an estimator, will use the model as is for fitting. - - If str, will use model associated with the keyword. + - Otherwise, see :ref:`model_selection` for the range of supported options; + if a single model is specified it should be a classifier if `discrete_treatment` is True + and a regressor otherwise - - 'linear' - LogisticRegressionCV if discrete_treatment=True else WeightedLassoCVWrapper - - 'forest' - RandomForestClassifier if discrete_treatment=True else RandomForestRegressor - - If list, will perform model selection on the supplied list, which can be a mix of str and estimators, \ - and then use the best estimator for fitting. - - If 'auto', model will select over linear and forest models + model_z_xw: estimator, default ``'auto'`` + Determines how to fit the instrument to the features and controls (:math:`\\E[Z | X, W]`). - User-supplied estimators should support 'fit' and 'predict' methods, - and additionally 'predict_proba' if discrete_treatment=True. + - If ``'auto'``, the model will be the best-fitting of a set of linear and forest models - model_z_xw : estimator, {'linear', 'forest'}, list of str/estimator, or 'auto' - model to estimate :math:`\\E[Z | X, W]`. + - Otherwise, see :ref:`model_selection` for the range of supported options; + if a single model is specified it should be a classifier if `discrete_instrument` is True + and a regressor otherwise - - If an estimator, will use the model as is for fitting. - - If str, will use model associated with the keyword. + model_t_xwz: estimator, default ``'auto'`` + Determines how to fit the treatment to the features, controls, and instrument (:math:`\\E[T | X, W, Z]`). - - 'linear' - LogisticRegressionCV if discrete_instrument=True else WeightedLassoCVWrapper - - 'forest' - RandomForestClassifier if discrete_instrument=True else RandomForestRegressor - - If list, will perform model selection on the supplied list, which can be a mix of str and estimators, \ - and then use the best estimator for fitting. - - If 'auto', model will select over linear and forest models + - If ``'auto'``, the model will be the best-fitting of a set of linear and forest models - User-supplied estimators should support 'fit' and 'predict' methods, - and additionally 'predict_proba' if discrete_instrument=True. + - Otherwise, see :ref:`model_selection` for the range of supported options; + if a single model is specified it should be a classifier if `discrete_treatment` is True + and a regressor otherwise - model_t_xwz : estimator, {'linear', 'forest'}, list of str/estimator, or 'auto', default 'auto' - model to estimate :math:`\\E[T | X, W, Z]`. + model_tz_xw: estimator, default ``'auto'`` + Determines how to fit the covariance to the features and controls (:math:`\\E[T*Z | X, W]` or + :math:`\\E[\\tilde{T}*\\tilde{Z} | X, W]` depending on `fit_cov_directly`). - - If an estimator, will use the model as is for fitting. - - If str, will use model associated with the keyword. + - If ``'auto'``, the model will be the best-fitting of a set of linear and forest models - - 'linear' - LogisticRegressionCV if discrete_treatment=True else WeightedLassoCVWrapper - - 'forest' - RandomForestClassifier if discrete_treatment=True else RandomForestRegressor - - If list, will perform model selection on the supplied list, which can be a mix of str and estimators, \ - and then use the best estimator for fitting. - - If 'auto', model will select over linear and forest models - - User-supplied estimators should support 'fit' and 'predict' methods, - and additionally 'predict_proba' if discrete_treatment=True. - - model_tz_xw : estimator, {'linear', 'forest'}, list of str/estimator, or 'auto', default 'auto' - model to estimate :math:`\\E[T*Z | X, W]` or :math:`\\E[\\tilde{T}*\\tilde{Z} | X, W]` - depending on `fit_cov_directly`. - Target will be discrete if discrete instrument and discrete treatment with `fit_cov_directly=False`, - else target will be continuous. - - - If an estimator, will use the model as is for fitting. - - If str, will use model associated with the keyword. - - - 'linear' - LogisticRegressionCV if discrete target else WeightedLassoCVWrapper - - 'forest' - RandomForestClassifier if discrete target else RandomForestRegressor - - If list, will perform model selection on the supplied list, which can be a mix of str and estimators, \ - and then use the best estimator for fitting. - - If 'auto', model will select over linear and forest models - - User-supplied estimators should support 'fit' and 'predict' methods, - and additionally 'predict_proba' if discrete target. + - Otherwise, see :ref:`model_selection` for the range of supported options; + if a single model is specified it should be a classifier if `discrete_treatment` is True + and a regressor otherwise fit_cov_directly : bool, default True Whether to fit :math:`\\E[\\tilde{T}*\\tilde{Z} | X, W]` instead of :math:`\\E[T*Z | X, W]`. @@ -1464,19 +1410,19 @@ def true_heterogeneity_function(X): est.fit(Y=y, T=T, Z=Z, X=X) >>> est.effect(X[:3]) - array([-4.29809..., 5.94280..., -3.00977...]) + array([-4.27803..., 5.84999..., -2.98296...]) >>> est.effect_interval(X[:3]) - (array([-7.09165..., 1.79692..., -5.46033...]), - array([-1.50452..., 10.08868..., -0.55922...])) + (array([-7.16141..., 1.71887..., -5.41441...]), + array([-1.39465..., 9.98110..., -0.55151...])) >>> est.coef_ - array([ 4.84900..., 0.82084..., 0.24269..., -0.04771..., -0.29325...]) + array([ 4.65225..., 0.93347..., 0.23315..., 0.22843..., -0.42850...]) >>> est.coef__interval() - (array([ 3.67882..., -0.35547..., -0.97063..., -1.15410..., -1.50482...]), - array([6.01917..., 1.99716..., 1.45603..., 1.05867..., 0.91831...])) + (array([ 3.40045..., -0.19165..., -0.95122..., -0.88662..., -1.56024...]), + array([5.90404..., 2.05861..., 1.41753..., 1.34349..., 0.70324...])) >>> est.intercept_ - -0.16276... + -0.12823... >>> est.intercept__interval() - (-1.32713..., 1.00160...) + (-1.27155..., 1.01508...) """ def __init__(self, *, @@ -1614,83 +1560,51 @@ class SparseLinearDRIV(DebiasedLassoCateEstimatorMixin, DRIV): Parameters ---------- - model_y_xw : estimator, {'linear', 'forest'}, list of str/estimator, or 'auto' - model to estimate :math:`\\E[Y | X, W]`. - - - If an estimator, will use the model as is for fitting. - - If str, will use model associated with the keyword. + model_y_xw: estimator, default ``'auto'`` + Determines how to fit the outcome to the features and controls (:math:`\\E[Y | X, W]`). - - 'linear' - LogisticRegressionCV if discrete_outcome=True else WeightedLassoCVWrapper - - 'forest' - RandomForestClassifier if discrete_outcome=True else RandomForestRegressor - - If list, will perform model selection on the supplied list, which can be a mix of str and estimators, \ - and then use the best estimator for fitting. - - If 'auto', model will select over linear and forest models + - If ``'auto'``, the model will be the best-fitting of a set of linear and forest models - User-supplied estimators should support 'fit' and 'predict' methods, - and additionally 'predict_proba' if discrete_outcome=True. + - Otherwise, see :ref:`model_selection` for the range of supported options; + if a single model is specified it should be a classifier if `discrete_outcome` is True + and a regressor otherwise - model_t_xw : estimator, {'linear', 'forest'}, list of str/estimator, or 'auto', default 'auto' - model to estimate :math:`\\E[T | X, W]`. + model_t_xw: estimator, default ``'auto'`` + Determines how to fit the treatment to the features and controls (:math:`\\E[T | X, W]`). - - If an estimator, will use the model as is for fitting. - - If str, will use model associated with the keyword. + - If ``'auto'``, the model will be the best-fitting of a set of linear and forest models - - 'linear' - LogisticRegressionCV if discrete_treatment=True else WeightedLassoCVWrapper - - 'forest' - RandomForestClassifier if discrete_treatment=True else RandomForestRegressor - - If list, will perform model selection on the supplied list, which can be a mix of str and estimators, \ - and then use the best estimator for fitting. - - If 'auto', model will select over linear and forest models + - Otherwise, see :ref:`model_selection` for the range of supported options; + if a single model is specified it should be a classifier if `discrete_treatment` is True + and a regressor otherwise - User-supplied estimators should support 'fit' and 'predict' methods, - and additionally 'predict_proba' if discrete_treatment=True. + model_z_xw: estimator, default ``'auto'`` + Determines how to fit the instrument to the features and controls (:math:`\\E[Z | X, W]`). - model_z_xw : estimator, {'linear', 'forest'}, list of str/estimator, or 'auto' - model to estimate :math:`\\E[Z | X, W]`. + - If ``'auto'``, the model will be the best-fitting of a set of linear and forest models - - If an estimator, will use the model as is for fitting. - - If str, will use model associated with the keyword. + - Otherwise, see :ref:`model_selection` for the range of supported options; + if a single model is specified it should be a classifier if `discrete_instrument` is True + and a regressor otherwise - - 'linear' - LogisticRegressionCV if discrete_instrument=True else WeightedLassoCVWrapper - - 'forest' - RandomForestClassifier if discrete_instrument=True else RandomForestRegressor - - If list, will perform model selection on the supplied list, which can be a mix of str and estimators, \ - and then use the best estimator for fitting. - - If 'auto', model will select over linear and forest models + model_t_xwz: estimator, default ``'auto'`` + Determines how to fit the treatment to the features, controls, and instrument (:math:`\\E[T | X, W, Z]`). - User-supplied estimators should support 'fit' and 'predict' methods, - and additionally 'predict_proba' if discrete_instrument=True. + - If ``'auto'``, the model will be the best-fitting of a set of linear and forest models - model_t_xwz : estimator, {'linear', 'forest'}, list of str/estimator, or 'auto', default 'auto' - Model to estimate :math:`\\E[T | X, W, Z]`. + - Otherwise, see :ref:`model_selection` for the range of supported options; + if a single model is specified it should be a classifier if `discrete_treatment` is True + and a regressor otherwise - - If an estimator, will use the model as is for fitting. - - If str, will use model associated with the keyword. + model_tz_xw: estimator, default ``'auto'`` + Determines how to fit the covariance to the features and controls (:math:`\\E[T*Z | X, W]` or + :math:`\\E[\\tilde{T}*\\tilde{Z} | X, W]` depending on `fit_cov_directly`). - - 'linear' - LogisticRegressionCV if discrete_treatment=True else WeightedLassoCVWrapper - - 'forest' - RandomForestClassifier if discrete_treatment=True else RandomForestRegressor - - If list, will perform model selection on the supplied list, which can be a mix of str and estimators, \ - and then use the best estimator for fitting. - - If 'auto', model will select over linear and forest models + - If ``'auto'``, the model will be the best-fitting of a set of linear and forest models - User-supplied estimators should support 'fit' and 'predict' methods, - and additionally 'predict_proba' if discrete_treatment=True. - - model_tz_xw : estimator, {'linear', 'forest'}, list of str/estimator, or 'auto', default 'auto' - model to estimate :math:`\\E[T*Z | X, W]` or :math:`\\E[\\tilde{T}*\\tilde{Z} | X, W]` - depending on `fit_cov_directly`. - Target will be discrete if discrete instrument and discrete treatment with `fit_cov_directly=False`, - else target will be continuous. - - - If an estimator, will use the model as is for fitting. - - If str, will use model associated with the keyword. - - - 'linear' - LogisticRegressionCV if discrete target else WeightedLassoCVWrapper - - 'forest' - RandomForestClassifier if discrete target else RandomForestRegressor - - If list, will perform model selection on the supplied list, which can be a mix of str and estimators, \ - and then use the best estimator for fitting. - - If 'auto', model will select over linear and forest models - - User-supplied estimators should support 'fit' and 'predict' methods, - and additionally 'predict_proba' if discrete target. + - Otherwise, see :ref:`model_selection` for the range of supported options; + if a single model is specified it should be a classifier if `discrete_treatment` is True + and a regressor otherwise fit_cov_directly : bool, default True Whether to fit :math:`\\E[\\tilde{T}*\\tilde{Z} | X, W]` instead of :math:`\\E[T*Z | X, W]`. @@ -1864,19 +1778,19 @@ def true_heterogeneity_function(X): est.fit(Y=y, T=T, Z=Z, X=X) >>> est.effect(X[:3]) - array([-4.26791..., 5.98882..., -3.02154...]) + array([-4.23929..., 5.89223..., -3.01208...]) >>> est.effect_interval(X[:3]) - (array([-7.06828..., 2.00060..., -5.46554...]), - array([-1.46754..., 9.97704..., -0.57754...])) + (array([-6.99789..., 1.96351..., -5.41963...]), + array([-1.48069..., 9.82096..., -0.60454...])) >>> est.coef_ - array([ 4.84189..., 0.81844... , 0.20681..., -0.04660..., -0.28790...]) + array([ 4.65819..., 0.94689..., 0.18314..., 0.23012..., -0.40375...]) >>> est.coef__interval() - (array([ 3.68288..., -0.35434..., -0.98986..., -1.18770..., -1.48722...]), - array([6.00090..., 1.99122..., 1.40349..., 1.09449..., 0.91141...])) + (array([ 3.51647..., -0.20839..., -0.99568..., -0.89394..., -1.58518...]), + array([5.79991..., 2.10218... , 1.36197..., 1.35420... , 0.77767...])) >>> est.intercept_ - -0.12298... + -0.06539... >>> est.intercept__interval() - (-1.28204..., 1.03607...) + (-1.20716..., 1.07637...) """ def __init__(self, *, @@ -2030,83 +1944,51 @@ class ForestDRIV(ForestModelFinalCateEstimatorMixin, DRIV): Parameters ---------- - model_y_xw : estimator, {'linear', 'forest'}, list of str/estimator, or 'auto' - model to estimate :math:`\\E[Y | X, W]`. - - - If an estimator, will use the model as is for fitting. - - If str, will use model associated with the keyword. + model_y_xw: estimator, default ``'auto'`` + Determines how to fit the outcome to the features and controls (:math:`\\E[Y | X, W]`). - - 'linear' - LogisticRegressionCV if discrete_outcome=True else WeightedLassoCVWrapper - - 'forest' - RandomForestClassifier if discrete_outcome=True else RandomForestRegressor - - If list, will perform model selection on the supplied list, which can be a mix of str and estimators, \ - and then use the best estimator for fitting. - - If 'auto', model will select over linear and forest models + - If ``'auto'``, the model will be the best-fitting of a set of linear and forest models - User-supplied estimators should support 'fit' and 'predict' methods, - and additionally 'predict_proba' if discrete_outcome=True. + - Otherwise, see :ref:`model_selection` for the range of supported options; + if a single model is specified it should be a classifier if `discrete_outcome` is True + and a regressor otherwise - model_t_xw : estimator, {'linear', 'forest'}, list of str/estimator, or 'auto', default 'auto' - model to estimate :math:`\\E[T | X, W]`. + model_t_xw: estimator, default ``'auto'`` + Determines how to fit the treatment to the features and controls (:math:`\\E[T | X, W]`). - - If an estimator, will use the model as is for fitting. - - If str, will use model associated with the keyword. + - If ``'auto'``, the model will be the best-fitting of a set of linear and forest models - - 'linear' - LogisticRegressionCV if discrete_treatment=True else WeightedLassoCVWrapper - - 'forest' - RandomForestClassifier if discrete_treatment=True else RandomForestRegressor - - If list, will perform model selection on the supplied list, which can be a mix of str and estimators, \ - and then use the best estimator for fitting. - - If 'auto', model will select over linear and forest models + - Otherwise, see :ref:`model_selection` for the range of supported options; + if a single model is specified it should be a classifier if `discrete_treatment` is True + and a regressor otherwise - User-supplied estimators should support 'fit' and 'predict' methods, - and additionally 'predict_proba' if discrete_treatment=True. + model_z_xw: estimator, default ``'auto'`` + Determines how to fit the instrument to the features and controls (:math:`\\E[Z | X, W]`). - model_z_xw : estimator, {'linear', 'forest'}, list of str/estimator, or 'auto' - model to estimate :math:`\\E[Z | X, W]`. + - If ``'auto'``, the model will be the best-fitting of a set of linear and forest models - - If an estimator, will use the model as is for fitting. - - If str, will use model associated with the keyword. + - Otherwise, see :ref:`model_selection` for the range of supported options; + if a single model is specified it should be a classifier if `discrete_instrument` is True + and a regressor otherwise - - 'linear' - LogisticRegressionCV if discrete_instrument=True else WeightedLassoCVWrapper - - 'forest' - RandomForestClassifier if discrete_instrument=True else RandomForestRegressor - - If list, will perform model selection on the supplied list, which can be a mix of str and estimators, \ - and then use the best estimator for fitting. - - If 'auto', model will select over linear and forest models + model_t_xwz: estimator, default ``'auto'`` + Determines how to fit the treatment to the features, controls, and instrument (:math:`\\E[T | X, W, Z]`). - User-supplied estimators should support 'fit' and 'predict' methods, - and additionally 'predict_proba' if discrete_instrument=True. + - If ``'auto'``, the model will be the best-fitting of a set of linear and forest models - model_t_xwz : estimator, {'linear', 'forest'}, list of str/estimator, or 'auto', default 'auto' - model to estimate :math:`\\E[T | X, W, Z]`. + - Otherwise, see :ref:`model_selection` for the range of supported options; + if a single model is specified it should be a classifier if `discrete_treatment` is True + and a regressor otherwise - - If an estimator, will use the model as is for fitting. - - If str, will use model associated with the keyword. + model_tz_xw: estimator, default ``'auto'`` + Determines how to fit the covariance to the features and controls (:math:`\\E[T*Z | X, W]` + or :math:`\\E[\\tilde{T}*\\tilde{Z} | X, W]` depending on `fit_cov_directly`). - - 'linear' - LogisticRegressionCV if discrete_treatment=True else WeightedLassoCVWrapper - - 'forest' - RandomForestClassifier if discrete_treatment=True else RandomForestRegressor - - If list, will perform model selection on the supplied list, which can be a mix of str and estimators, \ - and then use the best estimator for fitting. - - If 'auto', model will select over linear and forest models + - If ``'auto'``, the model will be the best-fitting of a set of linear and forest models - User-supplied estimators should support 'fit' and 'predict' methods, - and additionally 'predict_proba' if discrete_treatment=True. - - model_tz_xw : estimator, {'linear', 'forest'}, list of str/estimator, or 'auto', default 'auto' - model to estimate :math:`\\E[T*Z | X, W]` or :math:`\\E[\\tilde{T}*\\tilde{Z} | X, W]` - depending on `fit_cov_directly`. - Target will be discrete if discrete instrument and discrete treatment with `fit_cov_directly=False`, - else target will be continuous. - - - If an estimator, will use the model as is for fitting. - - If str, will use model associated with the keyword. - - - 'linear' - LogisticRegressionCV if discrete target else WeightedLassoCVWrapper - - 'forest' - RandomForestClassifier if discrete target else RandomForestRegressor - - If list, will perform model selection on the supplied list, which can be a mix of str and estimators, \ - and then use the best estimator for fitting. - - If 'auto', model will select over linear and forest models - - User-supplied estimators should support 'fit' and 'predict' methods, - and additionally 'predict_proba' if discrete target. + - Otherwise, see :ref:`model_selection` for the range of supported options; + if a single model is specified it should be a classifier if `discrete_treatment` is True + and a regressor otherwise fit_cov_directly : bool, default True Whether to fit :math:`\\E[\\tilde{T}*\\tilde{Z} | X, W]` instead of :math:`\\E[T*Z | X, W]`. @@ -2352,10 +2234,10 @@ def true_heterogeneity_function(X): est.fit(Y=y, T=T, Z=Z, X=X) >>> est.effect(X[:3]) - array([-1.60489..., 5.40611..., -3.46904...]) + array([-2.11667..., 6.31903..., -3.65700...]) >>> est.effect_interval(X[:3]) - (array([-5.37171..., 0.73055..., -7.15266...]), - array([ 2.16192..., 10.08168..., 0.21457...])) + (array([-5.53359..., 2.40420..., -7.14977...]), + array([ 1.30025..., 10.23385..., -0.16424...])) """ def __init__(self, *, @@ -2521,12 +2403,12 @@ def __init__(self, self._dummy_z = dummy_z self._prel_model_effect = prel_model_effect - def train(self, is_selecting, Y, T, X=None, W=None, Z=None, sample_weight=None, groups=None): - self._model_y_xw.train(is_selecting, X=X, W=W, Target=Y, sample_weight=sample_weight, groups=groups) + def train(self, is_selecting, folds, Y, T, X=None, W=None, Z=None, sample_weight=None, groups=None): + self._model_y_xw.train(is_selecting, folds, X=X, W=W, Target=Y, sample_weight=sample_weight, groups=groups) # concat W and Z WZ = _combine(W, Z, Y.shape[0]) - self._model_t_xwz.train(is_selecting, X=X, W=WZ, Target=T, sample_weight=sample_weight, groups=groups) - self._dummy_z.train(is_selecting, X=X, W=W, Target=Z, sample_weight=sample_weight, groups=groups) + self._model_t_xwz.train(is_selecting, folds, X=X, W=WZ, Target=T, sample_weight=sample_weight, groups=groups) + self._dummy_z.train(is_selecting, folds, X=X, W=W, Target=Z, sample_weight=sample_weight, groups=groups) # we need to undo the one-hot encoding for calling effect, # since it expects raw values self._prel_model_effect.fit(Y, inverse_onehot(T), Z=inverse_onehot(Z), X=X, W=W, @@ -2582,6 +2464,10 @@ def predict(self, Y, T, X=None, W=None, Z=None, sample_weight=None, groups=None) return prel_theta, Y_res, T_res, Z_res, beta + @property + def needs_fit(self): + return self._model_y_xw.needs_fit or self._model_t_xwz.needs_fit or self._dummy_z.needs_fit + class _DummyClassifier: """ @@ -2694,35 +2580,23 @@ class IntentToTreatDRIV(_IntentToTreatDRIV): Parameters ---------- - model_y_xw : estimator, {'linear', 'forest'}, list of str/estimator, or 'auto' - model to estimate :math:`\\E[Y | X, W]`. + model_y_xw: estimator, default ``'auto'`` + Determines how to fit the outcome to the features and controls (:math:`\\E[Y | X, W]`). - - If an estimator, will use the model as is for fitting. - - If str, will use model associated with the keyword. + - If ``'auto'``, the model will be the best-fitting of a set of linear and forest models - - 'linear' - LogisticRegressionCV if discrete_outcome=True else WeightedLassoCVWrapper - - 'forest' - RandomForestClassifier if discrete_outcome=True else RandomForestRegressor - - If list, will perform model selection on the supplied list, which can be a mix of str and estimators, \ - and then use the best estimator for fitting. - - If 'auto', model will select over linear and forest models + - Otherwise, see :ref:`model_selection` for the range of supported options; + if a single model is specified it should be a classifier if `discrete_outcome` is True + and a regressor otherwise - User-supplied estimators should support 'fit' and 'predict' methods, - and additionally 'predict_proba' if discrete_outcome=True. + model_t_xwz: estimator, default ``'auto'`` + Determines how to fit the treatment to the features, controls, and instrument (:math:`\\E[T | X, W, Z]`). - model_t_xwz : estimator, {'linear', 'forest'}, list of str/estimator, or 'auto', default 'auto' - model to estimate :math:`\\E[T | X, W, Z]`. + - If ``'auto'``, the model will be the best-fitting of a set of linear and forest models - - If an estimator, will use the model as is for fitting. - - If str, will use model associated with the keyword. - - - 'linear' - LogisticRegressionCV if discrete_treatment=True else WeightedLassoCVWrapper - - 'forest' - RandomForestClassifier if discrete_treatment=True else RandomForestRegressor - - If list, will perform model selection on the supplied list, which can be a mix of str and estimators, \ - and then use the best estimator for fitting. - - If 'auto', model will select over linear and forest models - - User-supplied estimators should support 'fit' and 'predict' methods, - and additionally 'predict_proba' if discrete_treatment=True. + - Otherwise, see :ref:`model_selection` for the range of supported options; + if a single model is specified it should be a classifier if `discrete_treatment` is True + and a regressor otherwise flexible_model_effect : estimator or 'auto' (default is 'auto') a flexible model for a preliminary version of the CATE, must accept sample_weight at fit time. @@ -2852,7 +2726,7 @@ def true_heterogeneity_function(X): est.fit(Y=y, T=T, Z=Z, X=X) >>> est.effect(X[:3]) - array([-3.71724..., 6.39915..., -2.14545...]) + array([-4.52641..., 6.38726..., -2.67055...]) """ def __init__(self, *, @@ -3013,36 +2887,23 @@ class LinearIntentToTreatDRIV(StatsModelsCateEstimatorMixin, IntentToTreatDRIV): Parameters ---------- - model_y_xw : estimator, {'linear', 'forest'}, list of str/estimator, or 'auto' - model to estimate :math:`\\E[Y | X, W]`. - - - If an estimator, will use the model as is for fitting. - - If str, will use model associated with the keyword. - - - 'linear' - LogisticRegressionCV if discrete_outcome=True else WeightedLassoCVWrapper - - 'forest' - RandomForestClassifier if discrete_outcome=True else RandomForestRegressor - - If list, will perform model selection on the supplied list, which can be a mix of str and estimators, \ - and then use the best estimator for fitting. - - If 'auto', model will select over linear and forest models - - User-supplied estimators should support 'fit' and 'predict' methods, - and additionally 'predict_proba' if discrete_outcome=True. + model_y_xw: estimator, default ``'auto'`` + Determines how to fit the outcome to the features and controls (:math:`\\E[Y | X, W]`). + - If ``'auto'``, the model will be the best-fitting of a set of linear and forest models - model_t_xwz : estimator, {'linear', 'forest'}, list of str/estimator, or 'auto', default 'auto' - model to estimate :math:`\\E[T | X, W, Z]`. + - Otherwise, see :ref:`model_selection` for the range of supported options; + if a single model is specified it should be a classifier if `discrete_outcome` is True + and a regressor otherwise - - If an estimator, will use the model as is for fitting. - - If str, will use model associated with the keyword. + model_t_xwz: estimator, default ``'auto'`` + Determines how to fit the treatment to the features, controls, and instrument (:math:`\\E[T | X, W, Z]`). - - 'linear' - LogisticRegressionCV if discrete_treatment=True else WeightedLassoCVWrapper - - 'forest' - RandomForestClassifier if discrete_treatment=True else RandomForestRegressor - - If list, will perform model selection on the supplied list, which can be a mix of str and estimators, \ - and then use the best estimator for fitting. - - If 'auto', model will select over linear and forest models + - If ``'auto'``, the model will be the best-fitting of a set of linear and forest models - User-supplied estimators should support 'fit' and 'predict' methods, - and additionally 'predict_proba' if discrete_treatment=True. + - Otherwise, see :ref:`model_selection` for the range of supported options; + if a single model is specified it should be a classifier if `discrete_treatment` is True + and a regressor otherwise flexible_model_effect : estimator or 'auto' (default is 'auto') a flexible model for a preliminary version of the CATE, must accept sample_weight at fit time. @@ -3173,19 +3034,19 @@ def true_heterogeneity_function(X): est.fit(Y=y, T=T, Z=Z, X=X) >>> est.effect(X[:3]) - array([-4.05294..., 6.44603..., -2.49535...]) + array([-4.80489..., 6.10521... , -2.94904...]) >>> est.effect_interval(X[:3]) - (array([-8.42902..., 0.05595..., -6.34202...]), - array([ 0.32313..., 12.83612..., 1.35131...])) + (array([-9.20176..., -0.47031... , -6.67354...]), + array([-0.40802..., 12.68073..., 0.77546...])) >>> est.coef_ - array([ 4.99132..., 0.35043..., 0.41963..., -0.63553..., -0.33972...]) + array([ 5.52418..., 0.96276..., 0.68158..., -0.16803..., -0.13056...]) >>> est.coef__interval() - (array([ 3.11828..., -1.44768..., -1.46377..., -2.36080..., -2.18746...]), - array([6.86435..., 2.14856..., 2.30303..., 1.08973..., 1.50802...])) + (array([ 3.61373..., -0.81856..., -1.12589..., -1.90193... , -1.92331...]), + array([7.43462..., 2.74409... , 2.48906..., 1.56587..., 1.66218...])) >>> est.intercept_ - -0.25633... + -0.28940... >>> est.intercept__interval() - (-2.07961..., 1.56695...) + (-2.07653..., 1.49771...) """ def __init__(self, *, diff --git a/econml/panel/dml/_dml.py b/econml/panel/dml/_dml.py index b44bf0178..900a07ffe 100644 --- a/econml/panel/dml/_dml.py +++ b/econml/panel/dml/_dml.py @@ -45,27 +45,57 @@ def __init__(self, model_y, model_t, n_periods): self._model_t = model_t self.n_periods = n_periods - def train(self, is_selecting, Y, T, X=None, W=None, sample_weight=None, groups=None): + def train(self, is_selecting, folds, Y, T, X=None, W=None, sample_weight=None, groups=None): """Fit a series of nuisance models for each period or period pairs.""" assert Y.shape[0] % self.n_periods == 0, \ "Length of training data should be an integer multiple of time periods." period_filters = _get_groups_period_filter(groups, self.n_periods) - if is_selecting: # create the per-period y and t models + if not hasattr(self, '_model_y_trained'): # create the per-period y and t models self._model_y_trained = {t: clone(self._model_y, safe=False) for t in np.arange(self.n_periods)} self._model_t_trained = {j: {t: clone(self._model_t, safe=False) for t in np.arange(j + 1)} for j in np.arange(self.n_periods)} + + # we have to filter the folds because they contain the indices in the original data not + # the indices in the period-filtered data + + def _translate_inds(t, inds): + # translate the indices in a fold to the indices in the period-filtered data + # if groups was [3,3,4,4,5,5,6,6,1,1,2,2,0,0] (the group ids can be in any order, but the + # time periods for each group should be contguous), and we had [10,11,0,1] as the indices in a fold + # (so the fold is taking the entries corresponding to groups 2 and 3) + # then group_period_filter(0) is [0,2,4,6,8,10,12] and gpf(1) is [1,3,5,7,9,11,13] + # so for period 1, the fold should be [10,0] => [5,0] (the indices that return 10 and 0 in the t=0 data) + # and for period 2, the fold should be [11,1] => [5,0] again (the indices that return 11,1 in the t=1 data) + + # filter to the indices for the time period + inds = inds[np.isin(inds, period_filters[t])] + + # now find their index in the period-filtered data, which is always sorted + return np.searchsorted(period_filters[t], inds) + + if folds is not None: + translated_folds = [] + for (train, test) in folds: + translated_folds.append((_translate_inds(0, train), _translate_inds(0, test))) + # sanity check that the folds are the same no matter the time period + for t in range(1, self.n_periods): + assert np.array_equal(_translate_inds(t, train), _translate_inds(0, train)) + assert np.array_equal(_translate_inds(t, test), _translate_inds(0, test)) + else: + translated_folds = None + for t in np.arange(self.n_periods): self._model_y_trained[t].train( - is_selecting, + is_selecting, translated_folds, self._index_or_None(X, period_filters[t]), self._index_or_None( W, period_filters[t]), Y[period_filters[self.n_periods - 1]]) for j in np.arange(t, self.n_periods): self._model_t_trained[j][t].train( - is_selecting, + is_selecting, translated_folds, self._index_or_None(X, period_filters[t]), self._index_or_None(W, period_filters[t]), T[period_filters[j]]) @@ -141,6 +171,10 @@ def _get_shape_formatter(self, X, W): def _index_or_None(self, X, filter_idx): return None if X is None else X[filter_idx] + @property + def needs_fit(self): + return self._model_t.needs_fit or self._model_y.needs_fit + class _DynamicModelFinal: """ @@ -344,35 +378,23 @@ class DynamicDML(LinearModelFinalCateEstimatorMixin, _OrthoLearner): Parameters ---------- - model_y: estimator, {'linear', 'forest'}, list of str/estimator, or 'auto' - model to estimate :math:`\\E[Y | X, W]`. - - - If an estimator, will use the model as is for fitting. - - If str, will use model associated with the keyword. + model_y: estimator, default ``'auto'`` + Determines how to fit the outcome to the features. - - 'linear' - LogisticRegressionCV if discrete_outcome=True else WeightedLassoCVWrapper - - 'forest' - RandomForestClassifier if discrete_outcome=True else RandomForestRegressor - - If list, will perform model selection on the supplied list, which can be a mix of str and estimators, \ - and then use the best estimator for fitting. - - If 'auto', model will select over linear and forest models + - If ``'auto'``, the model will be the best-fitting of a set of linear and forest models - User-supplied estimators should support 'fit' and 'predict' methods, - and additionally 'predict_proba' if discrete_outcome=True. + - Otherwise, see :ref:`model_selection` for the range of supported options; + if a single model is specified it should be a classifier if `discrete_outcome` is True + and a regressor otherwise - model_t: estimator, {'linear', 'forest'}, list of str/estimator, or 'auto' + model_t: estimator, default ``'auto'`` Determines how to fit the treatment to the features. - - If an estimator, will use the model as is for fitting. - - If str, will use model associated with the keyword. - - - 'linear' - LogisticRegressionCV if discrete_treatment=True else WeightedLassoCVWrapper - - 'forest' - RandomForestClassifier if discrete_treatment=True else RandomForestRegressor - - If list, will perform model selection on the supplied list, which can be a mix of str and estimators, \ - and then use the best estimator for fitting. - - If 'auto', model will select over linear and forest models + - If ``'auto'``, the model will be the best-fitting of a set of linear and forest models - User-supplied estimators should support 'fit' and 'predict' methods, - and additionally 'predict_proba' if discrete_treatment=True. + - Otherwise, see :ref:`model_selection` for the range of supported options; + if a single model is specified it should be a classifier if `discrete_treatment` is True + and a regressor otherwise featurizer : :term:`transformer`, optional Must support fit_transform and transform. Used to create composite features in the final CATE regression. @@ -451,31 +473,31 @@ class DynamicDML(LinearModelFinalCateEstimatorMixin, _OrthoLearner): est.fit(y, T, X=X, W=None, groups=groups, inference="auto") >>> est.const_marginal_effect(X[:2]) - array([[-0.345..., -0.056..., -0.044..., 0.046..., -0.202..., + array([[-0.363..., -0.049..., -0.044..., 0.042..., -0.202..., 0.023...], - [-0.120..., 0.434..., 0.052..., -0.201..., -0.115..., - -0.134...]]) + [-0.128..., 0.424..., 0.050... , -0.203..., -0.115..., + -0.135...]]) >>> est.effect(X[:2], T0=0, T1=1) - array([-0.579..., -0.085...]) + array([-0.594..., -0.107...]) >>> est.effect(X[:2], T0=np.zeros((2, n_periods*T.shape[1])), T1=np.ones((2, n_periods*T.shape[1]))) - array([-0.579..., -0.085...]) + array([-0.594..., -0.107...]) >>> est.coef_ - array([[ 0.108...], - [ 0.235...], - [ 0.046...], - [-0.119...], - [ 0.042...], - [-0.075...]]) + array([[ 0.112... ], + [ 0.227...], + [ 0.045...], + [-0.118...], + [ 0.041...], + [-0.076...]]) >>> est.coef__interval() - (array([[-0.042...], - [-0.001...], + (array([[-0.060...], + [-0.008...], [-0.120...], - [-0.393...], + [-0.392...], [-0.120...], - [-0.256...]]), array([[0.258...], - [0.473...], - [0.212...], - [0.154...], + [-0.257...]]), array([[0.286...], + [0.463...], + [0.212... ], + [0.156...], [0.204...], [0.104...]])) """ diff --git a/econml/policy/_drlearner.py b/econml/policy/_drlearner.py index 2ee38c158..a3018d197 100644 --- a/econml/policy/_drlearner.py +++ b/econml/policy/_drlearner.py @@ -239,34 +239,22 @@ class takes as input the parameter ``model_regressor``, which is an arbitrary sc Parameters ---------- - model_propensity : estimator, {'linear', 'forest'}, list of str/estimator, or 'auto', default 'auto' - Estimator for Pr[T=t | X, W]. Trained by regressing treatments on (features, controls) concatenated. + model_propensity: estimator, default ``'auto'`` + Classifier for Pr[T=t | X, W]. Trained by regressing treatments on (features, controls) concatenated. - - If an estimator, will use the model as is for fitting. - - If str, will use model associated with the keyword. + - If ``'auto'``, the model will be the best-fitting of a set of linear and forest models - - 'linear' - LogisticRegressionCV - - 'forest' - RandomForestClassifier - - If list, will perform model selection on the supplied list, which can be a mix of str and estimators, \ - and then use the best estimator for fitting. - - If 'auto', model will select over linear and forest models + - Otherwise, see :ref:`model_selection` for the range of supported options - User-supplied estimators should support 'fit' and 'predict', and 'predict_proba'. - - model_regression : estimator, {'linear', 'forest'}, list of str/estimator, or 'auto' + model_regression: estimator, default ``'auto'`` Estimator for E[Y | X, W, T]. Trained by regressing Y on (features, controls, one-hot-encoded treatments) concatenated. The one-hot-encoding excludes the baseline treatment. - - If an estimator, will use the model as is for fitting. - - If str, will use model associated with the keyword. - - - 'linear' - LogisticRegressionCV - - 'forest' - RandomForestClassifier - - If list, will perform model selection on the supplied list, which can be a mix of str and estimators, \ - and then use the best estimator for fitting. - - If 'auto', model will select over linear and forest models + - If ``'auto'``, the model will be the best-fitting of a set of linear and forest models - User-supplied estimators should support 'fit' and 'predict' methods. + - Otherwise, see :ref:`model_selection` for the range of supported options; + if a single model is specified it should be a classifier if `discrete_outcome` is True + and a regressor otherwise featurizer : :term:`transformer`, optional Must support fit_transform and transform. Used to create composite features in the final CATE regression. @@ -651,34 +639,22 @@ class takes as input the parameter ``model_regressor``, which is an arbitrary sc Parameters ---------- - model_propensity : estimator, {'linear', 'forest'}, list of str/estimator, or 'auto', default 'auto' - Estimator for Pr[T=t | X, W]. Trained by regressing treatments on (features, controls) concatenated. + model_propensity: estimator, default ``'auto'`` + Classifier for Pr[T=t | X, W]. Trained by regressing treatments on (features, controls) concatenated. - - If an estimator, will use the model as is for fitting. - - If str, will use model associated with the keyword. + - If ``'auto'``, the model will be the best-fitting of a set of linear and forest models - - 'linear' - LogisticRegressionCV - - 'forest' - RandomForestClassifier - - If list, will perform model selection on the supplied list, which can be a mix of str and estimators, \ - and then use the best estimator for fitting. - - If 'auto', model will select over linear and forest models + - Otherwise, see :ref:`model_selection` for the range of supported options - User-supplied estimators should support 'fit' and 'predict', and 'predict_proba'. - - model_regression : estimator, {'linear', 'forest'}, list of str/estimator, or 'auto' + model_regression: estimator, default ``'auto'`` Estimator for E[Y | X, W, T]. Trained by regressing Y on (features, controls, one-hot-encoded treatments) concatenated. The one-hot-encoding excludes the baseline treatment. - - If an estimator, will use the model as is for fitting. - - If str, will use model associated with the keyword. - - - 'linear' - LogisticRegressionCV - - 'forest' - RandomForestClassifier - - If list, will perform model selection on the supplied list, which can be a mix of str and estimators, \ - and then use the best estimator for fitting. - - If 'auto', model will select over linear and forest models + - If ``'auto'``, the model will be the best-fitting of a set of linear and forest models - User-supplied estimators should support 'fit' and 'predict' methods. + - Otherwise, see :ref:`model_selection` for the range of supported options; + if a single model is specified it should be a classifier if `discrete_outcome` is True + and a regressor otherwise featurizer : :term:`transformer`, optional Must support fit_transform and transform. Used to create composite features in the final CATE regression. diff --git a/econml/sklearn_extensions/model_selection.py b/econml/sklearn_extensions/model_selection.py index dda389eaf..5cab620c6 100644 --- a/econml/sklearn_extensions/model_selection.py +++ b/econml/sklearn_extensions/model_selection.py @@ -2,7 +2,10 @@ # Licensed under the MIT License. """Collection of scikit-learn extensions for model selection techniques.""" +from inspect import signature +import inspect import numbers +from typing import List, Optional import warnings import abc @@ -12,7 +15,8 @@ import sklearn from joblib import Parallel, delayed from sklearn.base import BaseEstimator, clone, is_classifier -from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor +from sklearn.ensemble import (GradientBoostingClassifier, GradientBoostingRegressor, + RandomForestClassifier, RandomForestRegressor) from sklearn.exceptions import FitFailedWarning from sklearn.linear_model import (ElasticNet, ElasticNetCV, Lasso, LassoCV, MultiTaskElasticNet, MultiTaskElasticNetCV, MultiTaskLasso, MultiTaskLassoCV, Ridge, RidgeCV, RidgeClassifier, RidgeClassifierCV, @@ -23,7 +27,9 @@ # TODO: conisder working around relying on sklearn implementation details from sklearn.model_selection._validation import (_check_is_permutation, _fit_and_predict) -from sklearn.preprocessing import LabelEncoder, StandardScaler +from sklearn.neural_network import MLPClassifier, MLPRegressor +from sklearn.pipeline import Pipeline, make_pipeline +from sklearn.preprocessing import LabelEncoder, PolynomialFeatures, StandardScaler from sklearn.utils import check_random_state, indexable from sklearn.utils.multiclass import type_of_target from sklearn.utils.validation import _num_samples @@ -276,9 +282,10 @@ class ModelSelector(metaclass=abc.ABCMeta): """ @abc.abstractmethod - def train(self, is_selecting: bool, *args, **kwargs): + def train(self, is_selecting: bool, folds: Optional[List], *args, **kwargs): """ Either selects a model or fits a model, depending on the value of `is_selecting`. + If `is_selecting` is `False`, then `folds` should not be provided because they are only during selection. """ raise NotImplementedError("Abstract method") @@ -298,6 +305,15 @@ def score(self, *args, **kwargs): """ raise NotImplementedError("Abstract method") + @property + @abc.abstractmethod + def needs_fit(self): + """ + Whether the model selector needs to be fit before it can be used for prediction or scoring; + in many cases this is equivalent to whether the selector is choosing between multiple models + """ + raise NotImplementedError("Abstract method") + class SingleModelSelector(ModelSelector): """ @@ -333,7 +349,7 @@ def score(self, *args, **kwargs): return None -def _fit_with_groups(model, X, y, *, groups, **kwargs): +def _fit_with_groups(model, X, y, *, sub_model=None, groups, **kwargs): """ Fits a model while correctly handling grouping if necessary. @@ -349,8 +365,10 @@ def _fit_with_groups(model, X, y, *, groups, **kwargs): rows that GroupKFold would have generated rather than using GroupKFold as the cv instance. """ if groups is not None: - if hasattr(model, 'cv'): - old_cv = model.cv + if sub_model is None: + sub_model = model + if hasattr(sub_model, 'cv'): + old_cv = sub_model.cv # logic copied from check_cv cv = 5 if old_cv is None else old_cv if isinstance(cv, numbers.Integral): @@ -360,10 +378,10 @@ def _fit_with_groups(model, X, y, *, groups, **kwargs): splits = list(cv.split(X, y, groups=groups)) try: - model.cv = splits + sub_model.cv = splits return model.fit(X, y, **kwargs) # drop groups from arg list finally: - model.cv = old_cv + sub_model.cv = old_cv # drop groups from arg list, which were already used at the outer level and may not be supported by the model return model.fit(X, y, **kwargs) @@ -371,19 +389,31 @@ def _fit_with_groups(model, X, y, *, groups, **kwargs): class FixedModelSelector(SingleModelSelector): """ - Model selection class that always selects the given model + Model selection class that always selects the given sklearn-compatible model """ def __init__(self, model): self.model = clone(model, safe=False) - def train(self, is_selecting, *args, groups=None, **kwargs): - # whether selecting or not, need to train the model on the data - _fit_with_groups(self.model, *args, groups=groups, **kwargs) - if is_selecting and hasattr(self.model, 'score'): - # TODO: we need to alter this to use out-of-sample score here, which - # will require cross-validation, but should respect grouping, stratifying, etc. - self._score = self.model.score(*args, **kwargs) + def train(self, is_selecting, folds: Optional[List], X, y, groups=None, **kwargs): + if is_selecting: + # since needs_fit is False, is_selecting will only be true if + # the score needs to be compared to another model's + # so we don't need to fit the model itself, just get the out-of-sample score + assert hasattr(self.model, 'score'), (f"Can't select between a fixed {type(self.model)} model and others " + "because it doesn't have a score method") + scores = [] + for train, test in folds: + # use _fit_with_groups instead of just fit to handle nested grouping + _fit_with_groups(self.model, X[train], y[train], + groups=None if groups is None else groups[train], + **{key: val[train] for key, val in kwargs.items()}) + scores.append(self.model.score(X[test], y[test])) + self._score = np.mean(scores) + else: + # we need to train the model on the data + _fit_with_groups(self.model, X, y, groups=groups, **kwargs) + return self @property @@ -394,22 +424,22 @@ def best_model(self): def best_score(self): return self._score + @property + def needs_fit(self): + return False # We have only a single model so we can skip the selection process + def _copy_to(m1, m2, attrs, insert_underscore=False): for attr in attrs: setattr(m2, attr, getattr(m1, attr + "_" if insert_underscore else attr)) -def _convert_linear_model(model, new_cls, extra_attrs=[]): +def _convert_linear_model(model, new_cls): new_model = new_cls() # copy common parameters - _copy_to(model, new_model, ["fit_intercept", "max_iter", - "tol", - "random_state"]) + _copy_to(model, new_model, ["fit_intercept"]) # copy common fitted variables - _copy_to(model, new_model, ["coef_", "intercept_", "n_features_in_", "n_iter_"]) - # copy attributes unique to this class - _copy_to(model, new_model, extra_attrs) + _copy_to(model, new_model, ["coef_", "intercept_", "n_features_in_"]) return new_model @@ -418,7 +448,8 @@ def _to_logisticRegression(model: LogisticRegressionCV): _copy_to(model, lr, ["penalty", "dual", "intercept_scaling", "class_weight", "solver", "multi_class", - "verbose", "n_jobs"]) + "verbose", "n_jobs", + "tol", "max_iter", "random_state", "n_iter_"]) _copy_to(model, lr, ["classes_"]) _copy_to(model, lr, ["C", "l1_ratio"], True) # these are arrays in LogisticRegressionCV, need to convert them next @@ -435,21 +466,26 @@ def _to_logisticRegression(model: LogisticRegressionCV): def _convert_linear_regression(model, new_cls, extra_attrs=["positive"]): - new_model = _convert_linear_model(model, new_cls, ["copy_X", - "n_iter_"]) + new_model = _convert_linear_model(model, new_cls) _copy_to(model, new_model, ["alpha"], True) return new_model -def _to_elasticNet(model: ElasticNetCV, is_lasso=False, cls=None, extra_attrs=[]): +def _to_elasticNet(model: ElasticNetCV, args, kwargs, is_lasso=False, cls=None, extra_attrs=[]): + # We need an R^2 score to compare to other models; ElasticNetCV doesn't provide it, + # but we can calculate it ourselves from the MSE plus the variance of the target y + y = signature(model.fit).bind(*args, **kwargs).arguments["y"] cls = cls or (Lasso if is_lasso else ElasticNet) - new_model = _convert_linear_regression(model, cls, extra_attrs + ['selection', 'warm_start', - 'dual_gap_']) + new_model = _convert_linear_regression(model, cls, extra_attrs + ['selection', 'warm_start', 'dual_gap_', + 'tol', 'max_iter', 'random_state', 'n_iter_', + 'copy_X']) if not is_lasso: # l1 ratio doesn't apply to Lasso, only ElasticNet _copy_to(model, new_model, ["l1_ratio"], True) - max_score = np.max(np.mean(model.mse_path_, axis=-1)) # last dimension in mse_path is folds, so average over that - return new_model, max_score + # max R^2 corresponds to min MSE + min_mse = np.min(np.mean(model.mse_path_, axis=-1)) # last dimension in mse_path is folds, so average over that + r2 = 1 - min_mse / np.var(y) # R^2 = 1 - MSE / Var(y) + return new_model, r2 def _to_ridge(model, cls=Ridge, extra_attrs=["positive"]): @@ -472,38 +508,66 @@ def convertible_types(): @staticmethod def can_wrap(model): + if isinstance(model, Pipeline): + return SklearnCVSelector.can_wrap(model.steps[-1][1]) return any(isinstance(model, model_type) for model_type in SklearnCVSelector.convertible_types()) @staticmethod def _model_mapping(): - return {LogisticRegressionCV: _to_logisticRegression, - ElasticNetCV: _to_elasticNet, - LassoCV: lambda model: _to_elasticNet(model, True, None, ["positive"]), - RidgeCV: _to_ridge, - RidgeClassifierCV: lambda model: _to_ridge(model, RidgeClassifier, ["positive", "class_weight", - "_label_binarizer"]), - MultiTaskElasticNetCV: lambda model: _to_elasticNet(model, False, MultiTaskElasticNet, extra_attrs=[]), - MultiTaskLassoCV: lambda model: _to_elasticNet(model, True, MultiTaskLasso, extra_attrs=[]), - WeightedLassoCVWrapper: lambda model: _to_elasticNet(model, True, WeightedLassoWrapper, - extra_attrs=[]), + return {LogisticRegressionCV: lambda model, _args, _kwargs: _to_logisticRegression(model), + ElasticNetCV: lambda model, args, kwargs: _to_elasticNet(model, args, kwargs), + LassoCV: lambda model, args, kwargs: _to_elasticNet(model, args, kwargs, True, None, ["positive"]), + RidgeCV: lambda model, _args, _kwargs: _to_ridge(model), + RidgeClassifierCV: lambda model, _args, _kwargs: _to_ridge(model, RidgeClassifier, + ["positive", "class_weight", + "_label_binarizer"]), + MultiTaskElasticNetCV: lambda model, args, kwargs: _to_elasticNet(model, args, kwargs, + False, MultiTaskElasticNet, + extra_attrs=[]), + MultiTaskLassoCV: lambda model, args, kwargs: _to_elasticNet(model, args, kwargs, + True, MultiTaskLasso, extra_attrs=[]), + WeightedLassoCVWrapper: lambda model, args, kwargs: _to_elasticNet(model, args, kwargs, + True, WeightedLassoWrapper, + extra_attrs=[]), } - def train(self, is_selecting: bool, *args, groups=None, **kwargs): + @staticmethod + def _convert_model(model, args, kwargs): + if isinstance(model, Pipeline): + name, inner_model = model.steps[-1] + best_model, score = SklearnCVSelector._convert_model(inner_model, args, kwargs) + return Pipeline(steps=[*model.steps[:-1], (name, best_model)]), score + + if isinstance(model, GridSearchCV) or isinstance(model, RandomizedSearchCV): + return model.best_estimator_, model.best_score_ + + for known_type in SklearnCVSelector._model_mapping().keys(): + if isinstance(model, known_type): + converter = SklearnCVSelector._model_mapping()[known_type] + return converter(model, args, kwargs) + + def train(self, is_selecting: bool, folds: Optional[List], *args, groups=None, **kwargs): if is_selecting: - _fit_with_groups(self.searcher, *args, groups=groups, **kwargs) + sub_model = self.searcher + if isinstance(self.searcher, Pipeline): + sub_model = self.searcher.steps[-1][1] + + init_params = inspect.signature(sub_model.__init__).parameters + if 'cv' in init_params: + default_cv = init_params['cv'].default + else: + # constructor takes cv as a positional or kwarg, just pull it out of a new instance + default_cv = type(sub_model)().cv - if isinstance(self.searcher, GridSearchCV) or isinstance(self.searcher, RandomizedSearchCV): - self._best_model = self.searcher.best_estimator_ - self._best_score = self.searcher.best_score_ + if sub_model.cv != default_cv: + warnings.warn(f"Model {sub_model} has a non-default cv attribute, which will be ignored") + sub_model.cv = folds - for known_type in self._model_mapping().keys(): - if isinstance(self.searcher, known_type): - converter = self._model_mapping()[known_type] - self._best_model, self._best_score = converter(self.searcher) - return self + self.searcher.fit(*args, **kwargs) + + self._best_model, self._best_score = self._convert_model(self.searcher, args, kwargs) else: - # don't need to use _fit_with_groups here since none of these models support it self.best_model.fit(*args, **kwargs) return self @@ -515,6 +579,11 @@ def best_model(self): def best_score(self): return self._best_score + @property + def needs_fit(self): + return True # strictly speaking, could be false if the hyperparameters are fixed + # but it would be complicated to check that + class ListSelector(SingleModelSelector): """ @@ -532,18 +601,19 @@ def __init__(self, models, unwrap=True): self.models = [clone(model, safe=False) for model in models] self.unwrap = unwrap - def train(self, is_selecting, *args, **kwargs): + def train(self, is_selecting, folds: Optional[List], *args, **kwargs): + assert len(self.models) > 0, "ListSelector must have at least one model" if is_selecting: scores = [] for model in self.models: - model.train(is_selecting, *args, **kwargs) + model.train(is_selecting, folds, *args, **kwargs) scores.append(model.best_score) self._all_scores = scores self._best_score = np.max(scores) self._best_model = self.models[np.argmax(scores)] else: - self._best_model.train(is_selecting, *args, **kwargs) + self._best_model.train(is_selecting, folds, *args, **kwargs) @property def best_model(self): @@ -557,14 +627,31 @@ def best_model(self): def best_score(self): return self._best_score + @property + def needs_fit(self): + # technically, if there is just one model and it doesn't need to be fit we don't need to fit it, + # but that complicates the training logic so we don't bother with that case + return True + def get_selector(input, is_discrete, *, random_state=None, cv=None, wrapper=GridSearchCV): named_models = { 'linear': (LogisticRegressionCV(random_state=random_state, cv=cv) if is_discrete else WeightedLassoCVWrapper(random_state=random_state, cv=cv)), + 'poly': ([make_pipeline(PolynomialFeatures(d), + (LogisticRegressionCV(random_state=random_state, cv=cv) if is_discrete + else WeightedLassoCVWrapper(random_state=random_state, cv=cv))) + for d in range(1, 4)]), 'forest': (GridSearchCV(RandomForestClassifier(random_state=random_state) if is_discrete else RandomForestRegressor(random_state=random_state), param_grid={}, cv=cv)), + 'gbf': (GridSearchCV(GradientBoostingClassifier(random_state=random_state) if is_discrete + else GradientBoostingRegressor(random_state=random_state), + param_grid={}, cv=cv)), + 'nnet': (GridSearchCV(MLPClassifier(random_state=random_state) if is_discrete + else MLPRegressor(random_state=random_state), + param_grid={}, cv=cv)), + 'automl': ["poly", "forest", "gbf", "nnet"], } if isinstance(input, ModelSelector): # we've already got a model selector, don't need to do anything return input diff --git a/econml/tests/test_cate_interpreter.py b/econml/tests/test_cate_interpreter.py index 13af13a5b..eff4393cc 100644 --- a/econml/tests/test_cate_interpreter.py +++ b/econml/tests/test_cate_interpreter.py @@ -4,6 +4,7 @@ import numpy as np import unittest import pytest +import matplotlib from econml.cate_interpreter import SingleTreeCateInterpreter, SingleTreePolicyInterpreter from econml.dml import LinearDML from sklearn.linear_model import LinearRegression, LogisticRegression @@ -16,7 +17,6 @@ except Exception: graphviz_works = False -import matplotlib matplotlib.use('Agg') diff --git a/econml/tests/test_missing_values.py b/econml/tests/test_missing_values.py index e59c8811c..8784e222e 100644 --- a/econml/tests/test_missing_values.py +++ b/econml/tests/test_missing_values.py @@ -27,7 +27,7 @@ def __init__(self, model_t, model_y): self._model_t = model_t self._model_y = model_y - def train(self, is_selecting, Y, T, W=None): + def train(self, is_selecting, folds, Y, T, W=None): self._model_t.fit(W, T) self._model_y.fit(W, Y) return self @@ -35,6 +35,10 @@ def train(self, is_selecting, Y, T, W=None): def predict(self, Y, T, W=None): return Y - self._model_y.predict(W), T - self._model_t.predict(W) + @property + def needs_fit(self): + return False + class ModelFinal: diff --git a/econml/tests/test_model_selection.py b/econml/tests/test_model_selection.py new file mode 100644 index 000000000..efb24f64f --- /dev/null +++ b/econml/tests/test_model_selection.py @@ -0,0 +1,135 @@ +# Copyright (c) PyWhy contributors. All rights reserved. +# Licensed under the MIT License. + +import unittest +import numpy as np +from scipy.special import expit +from sklearn.ensemble import RandomForestRegressor +from sklearn.linear_model import (ElasticNetCV, Lasso, LassoCV, LinearRegression, LogisticRegression, + LogisticRegressionCV, MultiTaskElasticNetCV, MultiTaskLassoCV, + RidgeCV, RidgeClassifierCV) +from sklearn.model_selection import GridSearchCV, RandomizedSearchCV +from sklearn.preprocessing import PolynomialFeatures +from econml.dml import LinearDML +from econml.sklearn_extensions.linear_model import WeightedLassoCVWrapper + + +class TestModelSelection(unittest.TestCase): + + def _simple_dgp(self, n, d_x, d_w, discrete_treatment): + n = 500 # keep the data set small since we're testing a lot of models and don't care about the results + X = np.random.normal(size=(n, d_x)) + W = np.random.normal(size=(n, d_w)) + alpha = np.random.normal(size=(X.shape[1])) + n_f = d_w + d_x + beta = np.random.normal(size=(n_f,)) + gamma = np.random.normal(size=(n_f,)) + XW = np.hstack([X, W]) + if discrete_treatment: + T = np.random.binomial(1, expit(XW @ beta)) + else: + T = XW @ beta + np.random.normal(size=(n,)) + Y = (X @ alpha) * T + XW @ gamma + np.random.normal(size=(n,)) + return Y, T, X, W + + def test_poly(self): + # tests that we can recover the right degree of polynomial features + # implicitly also tests ability to handle pipelines + # since 'poly' uses pipelines containing PolynomialFeatures + n = 5000 + X = np.random.normal(size=(n, 2)) + W = np.random.normal(size=(n, 3)) + + for true_d in range(1, 4): + with self.subTest(true_d=true_d): + pf = PolynomialFeatures(degree=true_d) + + fts = pf.fit_transform(np.hstack([X, W])) + k = fts.shape[1] + m = X.shape[1] + W.shape[1] + + alpha_x = np.random.normal(size=(X.shape[1],)) + alpha_1 = np.random.normal(size=()) + beta = np.random.normal(size=(k,)) + gamma = np.random.normal(size=(k,)) + + # generate larger coefficients in a set of high degree features, + # weighted towards higher degree features + ft_inds_beta = np.random.choice(k, size=m, replace=False, p=np.arange(k) / np.sum(np.arange(k))) + ft_inds_gamma = np.random.choice(k, size=m, replace=False, p=np.arange(k) / np.sum(np.arange(k))) + + beta[ft_inds_beta] = 10 * np.random.normal(1, size=(m,)) + gamma[ft_inds_gamma] = 10 * np.random.normal(1, size=(m,)) + + t = np.random.normal(size=(n,)) + fts @ beta + np.random.normal(scale=0.5, size=(n,)) + y = np.random.normal(size=(n,)) + t * (alpha_1 + X @ alpha_x) + fts @ gamma + + # just test a polynomial T model, since for Y the correct degree also depends on + # the interation of T and X + mdl = LinearDML(model_t='poly', + model_y=LinearRegression()).fit(y, t, X=X, W=W) + for t in mdl.models_t[0]: + self.assertEqual(t[0].degree, true_d) + + def test_all_strings(self): + for discrete_treatment in [True, False]: + Y, T, X, W = self._simple_dgp(500, 2, 3, discrete_treatment) + for model_t in ['auto', 'linear', 'poly', 'forest', 'gbf', 'nnet', 'automl']: + with self.subTest(model_t=model_t, discrete_treatment=discrete_treatment): + mdl = LinearDML(model_t=model_t, + discrete_treatment=discrete_treatment, + model_y=LinearRegression()) + mdl.fit(Y, T, X=X, W=W) + + model_t = 'some_random_string' + with self.subTest(model_t=model_t, discrete_treatment=True): + mdl = LinearDML(model_t=model_t, + discrete_treatment=discrete_treatment, + model_y=LinearRegression()) + with self.assertRaises(ValueError): + mdl.fit(Y, T, X=X, W=W) + + def test_list_selection(self): + Y, T, X, W = self._simple_dgp(500, 2, 3, False) + + # test corner case with just one model in a list + mdl = LinearDML(model_t=[LinearRegression()], + model_y=LinearRegression()) + mdl.fit(Y, T, X=X, W=W) + + # test corner case with empty list + with self.assertRaises(Exception): + mdl = LinearDML(model_t=[], + model_y=LinearRegression()) + mdl.fit(Y, T, X=X, W=W) + + # test selecting between two fixed models + mdl = LinearDML(model_t=[LinearRegression(), RandomForestRegressor()], + model_y=LinearRegression()) + mdl.fit(Y, T, X=X, W=W) + # DGP is a linear model, so linear regression should fit better + assert isinstance(mdl.models_t[0][0], LinearRegression) + + T2 = T + 10 * (X[:, 1] > 0) # add a non-linear effect + mdl.fit(Y, T2, X=X, W=W) + # DGP is now non-linear, so random forest should fit better + assert isinstance(mdl.models_t[0][0], RandomForestRegressor) + + def test_sklearn_model_selection(self): + for is_discrete, mdls in [(True, [LogisticRegressionCV(), RidgeClassifierCV(), + GridSearchCV(LogisticRegression(), {'C': [1, 10]}), + RandomizedSearchCV(LogisticRegression(), {'C': [1, 10]})]), + (False, [ElasticNetCV(), LassoCV(), RidgeCV(), + MultiTaskElasticNetCV(), MultiTaskLassoCV(), WeightedLassoCVWrapper(), + GridSearchCV(Lasso(), {'alpha': [0.1, 1]}), + RandomizedSearchCV(Lasso(), {'alpha': [0.1, 1]})])]: + Y, T, X, W = self._simple_dgp(500, 2, 3, is_discrete) + T2 = np.tile(T.reshape(-1, 1), (1, 2)) # multi-column T + for mdl in mdls: + # these models only work on multi-output data + use_array = isinstance(mdl, (MultiTaskElasticNetCV, MultiTaskLassoCV)) + with self.subTest(model=mdl): + est = LinearDML(model_t=mdl, + discrete_treatment=is_discrete, + model_y=LinearRegression()) + est.fit(Y, T2 if use_array else T, X=X, W=W) diff --git a/econml/tests/test_ortho_learner.py b/econml/tests/test_ortho_learner.py index 0c65358ea..fa8c3a7f4 100644 --- a/econml/tests/test_ortho_learner.py +++ b/econml/tests/test_ortho_learner.py @@ -3,7 +3,6 @@ from sklearn.datasets import make_regression from econml._ortho_learner import _OrthoLearner, _crossfit -from sklearn.linear_model import LinearRegression from sklearn.preprocessing import PolynomialFeatures from sklearn.linear_model import LinearRegression, LassoCV, Lasso from sklearn.model_selection import KFold @@ -28,7 +27,7 @@ class Wrapper: def __init__(self, model): self._model = model - def train(self, is_selecting, X, y, Q, W=None): + def train(self, is_selecting, folds, X, y, Q, W=None): self._model.fit(X, y) return self @@ -38,6 +37,10 @@ def predict(self, X, y, Q, W=None): def score(self, X, y, Q, W=None): return self._model.score(X, y) + @property + def needs_fit(self): + return False + np.random.seed(123) X = np.random.normal(size=(5000, 3)) y = X[:, 0] + np.random.normal(size=(5000,)) @@ -108,13 +111,17 @@ class Wrapper: def __init__(self, model): self._model = model - def train(self, is_selecting, X, y, W=None): + def train(self, is_selecting, folds, X, y, W=None): self._model.fit(X, y) return self def predict(self, X, y, W=None): return self._model.predict(X), y - self._model.predict(X), X + @property + def needs_fit(self): + return False + np.random.seed(123) X = np.random.normal(size=(5000, 3)) y = X[:, 0] + np.random.normal(size=(5000,)) @@ -178,7 +185,7 @@ class Wrapper: def __init__(self, model): self._model = model - def train(self, is_selecting, X, y, Q, W=None): + def train(self, is_selecting, folds, X, y, Q, W=None): self._model.fit(X, y) return self @@ -188,6 +195,10 @@ def predict(self, X, y, Q, W=None): def score(self, X, y, Q, W=None): return self._model.score(X, y) + @property + def needs_fit(self): + return False + # Generate synthetic data X, y = make_regression(n_samples=10, n_features=5, noise=0.1, random_state=42) folds = list(KFold(2).split(X, y)) @@ -218,7 +229,7 @@ def __init__(self, model_t, model_y): self._model_t = model_t self._model_y = model_y - def train(self, is_selecting, Y, T, W=None): + def train(self, is_selecting, folds, Y, T, W=None): self._model_t.fit(W, T) self._model_y.fit(W, Y) return self @@ -226,6 +237,10 @@ def train(self, is_selecting, Y, T, W=None): def predict(self, Y, T, W=None): return Y - self._model_y.predict(W), T - self._model_t.predict(W) + @property + def needs_fit(self): + return False + class ModelFinal: def __init__(self): @@ -330,7 +345,7 @@ def __init__(self, model_t, model_y): self._model_t = model_t self._model_y = model_y - def train(self, is_selecting, Y, T, W=None): + def train(self, is_selecting, folds, Y, T, W=None): self._model_t.fit(W, T) self._model_y.fit(W, Y) return self @@ -338,6 +353,10 @@ def train(self, is_selecting, Y, T, W=None): def predict(self, Y, T, W=None): return Y - self._model_y.predict(W), T - self._model_t.predict(W) + @property + def needs_fit(self): + return False + class ModelFinal: def __init__(self): @@ -378,7 +397,7 @@ def __init__(self, model_t, model_y): self._model_t = model_t self._model_y = model_y - def train(self, is_selecting, Y, T, W=None): + def train(self, is_selecting, folds, Y, T, W=None): self._model_t.fit(W, T) self._model_y.fit(W, Y) return self @@ -389,6 +408,10 @@ def predict(self, Y, T, W=None): def score(self, Y, T, W=None): return (self._model_t.score(W, Y), self._model_y.score(W, T)) + @property + def needs_fit(self): + return False + class ModelFinal: def __init__(self): @@ -435,7 +458,7 @@ def __init__(self, model_t, model_y): self._model_t = model_t self._model_y = model_y - def train(self, is_selecting, Y, T, W=None): + def train(self, is_selecting, folds, Y, T, W=None): self._model_t.fit(W, np.matmul(T, np.arange(1, T.shape[1] + 1))) self._model_y.fit(W, Y) return self @@ -443,6 +466,10 @@ def train(self, is_selecting, Y, T, W=None): def predict(self, Y, T, W=None): return Y - self._model_y.predict(W), T - self._model_t.predict_proba(W)[:, 1:] + @property + def needs_fit(self): + return False + class ModelFinal: def __init__(self): diff --git a/econml/tests/test_shap.py b/econml/tests/test_shap.py index 6c524a536..fecbb202c 100644 --- a/econml/tests/test_shap.py +++ b/econml/tests/test_shap.py @@ -4,10 +4,10 @@ import numpy as np import unittest import shap -from econml.dml import * -from econml.orf import * -from econml.dr import * -from econml.metalearners import * +from econml.dml import LinearDML, CausalForestDML, NonParamDML +from econml.orf import DMLOrthoForest, DROrthoForest +from econml.dr import DRLearner, ForestDRLearner +from econml.metalearners import TLearner, SLearner, XLearner, DomainAdaptationLearner from sklearn.linear_model import LinearRegression, LogisticRegression, Lasso from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier from sklearn.preprocessing import PolynomialFeatures diff --git a/econml/tests/test_statsmodels.py b/econml/tests/test_statsmodels.py index 958d538c4..db667e18e 100644 --- a/econml/tests/test_statsmodels.py +++ b/econml/tests/test_statsmodels.py @@ -416,7 +416,7 @@ def test_inference(self): "{}, {}".format(est.coef__interval()[1][t], np.array([scipy.stats.norm.ppf(.975, loc=1, scale=1)] + [scipy.stats.norm.ppf(.975, loc=0, scale=1)] * (d - 1))) - assert np.all(np.abs(est.intercept_[t]) <= 1e-12), "{}, {}".format(est.intercept_[t]) + assert np.all(np.abs(est.intercept_[t]) <= 1e-12), "{}".format(est.intercept_[t]) assert np.all(np.abs(est.intercept_stderr_[t]) <= 1e-12), "{}".format(est.intercept_stderr_[t]) assert np.all(np.abs(est.intercept__interval()[0][t]) <= 1e-12), "{}".format(est.intercept__interval()[0][t]) @@ -446,7 +446,7 @@ def test_inference(self): "{}, {}".format(est.coef__interval()[1][t], np.array([scipy.stats.norm.ppf(.975, loc=1, scale=np.sqrt(2))] + [scipy.stats.norm.ppf(.975, loc=0, scale=np.sqrt(2))] * (d - 1))) - assert np.all(np.abs(est.intercept_[t]) <= 1e-12), "{}, {}".format(est.intercept_[t]) + assert np.all(np.abs(est.intercept_[t]) <= 1e-12), "{}".format(est.intercept_[t]) assert np.all(np.abs(est.intercept_stderr_[t] - 1) <= 1e-12), "{}".format(est.intercept_stderr_[t]) assert np.all(np.abs(est.intercept__interval()[0][t] - scipy.stats.norm.ppf(.025, loc=0, scale=1)) <= 1e-12), \ diff --git a/monte_carlo_tests/monte_carlo_statsmodels.py b/monte_carlo_tests/monte_carlo_statsmodels.py index 1d6301cc5..9c061e3f8 100644 --- a/monte_carlo_tests/monte_carlo_statsmodels.py +++ b/monte_carlo_tests/monte_carlo_statsmodels.py @@ -14,8 +14,6 @@ import warnings import joblib from sklearn.model_selection import GridSearchCV -from statsmodels.tools.tools import add_constant -from econml.utilities import cross_product from sklearn.multioutput import MultiOutputRegressor @@ -251,7 +249,7 @@ def true_effect(x, t): (hetero_coef * X[:, [0]] + 1) * np.random.normal(0, 1, size=(n, p)) XT = np.hstack([X, T]) - X1, X2, y1, y2, X_final_first, X_final_sec, y_sum_first, y_sum_sec,\ + X1, X2, y1, y2, X_final_first, X_final_sec, y_sum_first, y_sum_sec, \ n_sum_first, n_sum_sec, var_first, var_sec = _summarize(XT, y) X = np.vstack([X1, X2]) y = np.concatenate((y1, y2)) @@ -420,7 +418,8 @@ def first_stage(): min_samples_leaf=10, random_state=123), MultiOutputRegressor(GradientBoostingRegressor(n_estimators=20, max_depth=3, - min_samples_leaf=10, random_state=123))], + min_samples_leaf=10, + random_state=123))], param_grid_list=[{}, {}, {},