From a00da0298cb653cb0485f9ddbf4e694bf46686d6 Mon Sep 17 00:00:00 2001 From: Vasilis Date: Fri, 1 Nov 2019 21:37:38 -0400 Subject: [PATCH 01/64] created ortho learner meta class. Put _Rlearner in separate file. Restructured _RLearner to be subclass of _OrthoLearner. Small modifiications to first stage and final wrappers in DML and in preprocessing done at the beginning of statsmodels inference. --- econml/_ortho_learner.py | 250 ++++++++++++++++++++++++++++++++++ econml/_rlearner.py | 127 +++++++++++++++++ econml/dml.py | 284 +++++---------------------------------- econml/inference.py | 12 +- 4 files changed, 417 insertions(+), 256 deletions(-) create mode 100644 econml/_ortho_learner.py create mode 100644 econml/_rlearner.py diff --git a/econml/_ortho_learner.py b/econml/_ortho_learner.py new file mode 100644 index 000000000..4bbee1e4a --- /dev/null +++ b/econml/_ortho_learner.py @@ -0,0 +1,250 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. + +"""Double ML. + +"Double Machine Learning" is an algorithm that applies arbitrary machine learning methods +to fit the treatment and response, then uses a linear model to predict the response residuals +from the treatment residuals. + +""" + +import numpy as np +import copy +from warnings import warn +from .utilities import (shape, reshape, ndim, hstack, cross_product, transpose, + broadcast_unit_treatments, reshape_treatmentwise_effects, + StatsModelsLinearRegression, LassoCVWrapper) +from sklearn.model_selection import KFold, StratifiedKFold, check_cv +from sklearn.linear_model import LinearRegression, LassoCV +from sklearn.preprocessing import (PolynomialFeatures, LabelEncoder, OneHotEncoder, + FunctionTransformer) +from sklearn.base import clone, TransformerMixin +from sklearn.pipeline import Pipeline +from sklearn.utils import check_random_state +from .cate_estimator import (BaseCateEstimator, LinearCateEstimator, + TreatmentExpansionMixin, StatsModelsCateEstimatorMixin) +from .inference import StatsModelsInference + + +def _crossfit(model, folds, *args, **kwargs): + model_list = [] + for idx, (train_idxs, test_idxs) in enumerate(folds): + model_list.append(clone(model, safe=False)) + + args_train = () + args_test = () + for var in args: + args_train += (var[train_idxs],) + args_test += (var[test_idxs],) + + kwargs_train = {} + kwargs_test = {} + for key, var in kwargs.items(): + if var is not None: + kwargs_train[key] = var[train_idxs] + kwargs_test[key] = var[test_idxs] + + model_list[idx].fit(*args_train, **kwargs_train) + + nuisance_temp = model_list[idx].predict(*args_test, **kwargs_test) + + if not isinstance(nuisance_temp, tuple): + nuisance_temp = (nuisance_temp,) + + if idx == 0: + nuisances = tuple([np.zeros((args[0].shape[0],) + nuis.shape[1:]) for nuis in nuisance_temp]) + + for it, nuis in enumerate(nuisance_temp): + nuisances[it][test_idxs] = nuis + + return nuisances, model_list + +class _OrthoLearner(TreatmentExpansionMixin, LinearCateEstimator): + """ + Base class for all orthogonal learners. + + Parameters + ---------- + model_nuisance: estimator + The estimator for fitting the nuisance function. Must implement + `fit` and `predict` methods that both take as input Y, T, X, W, Z. + + model_final: estimator for fitting the response residuals to the features and treatment residuals + Must implement `fit` and `predict` methods. The fit method takes as input, Y, T, X, W, Z, nuisances. + Predict, on the other hand, should just take the features X and return the constant marginal effect. + + discrete_treatment: bool + Whether the treatment values should be treated as categorical, rather than continuous, quantities + + n_splits: int, cross-validation generator or an iterable, optional + Determines the cross-validation splitting strategy. + Possible inputs for cv are: + + - None, to use the default 3-fold cross-validation, + - integer, to specify the number of folds. + - :term:`CV splitter` + - An iterable yielding (train, test) splits as arrays of indices. + + For integer/None inputs, if the treatment is discrete + :class:`~sklearn.model_selection.StratifiedKFold` is used, else, + :class:`~sklearn.model_selection.KFold` is used + (with a random shuffle in either case). + + Unless an iterable is used, we call `split(X,T)` to generate the splits. + + random_state: int, :class:`~numpy.random.mtrand.RandomState` instance or None + If int, random_state is the seed used by the random number generator; + If :class:`~numpy.random.mtrand.RandomState` instance, random_state is the random number generator; + If None, the random number generator is the :class:`~numpy.random.mtrand.RandomState` instance used + by `np.random`. + """ + + def __init__(self, model_nuisance, model_final, + discrete_treatment, n_splits, random_state): + self._model_nuisance = clone(model_nuisance, safe=False) + self._models_nuisance = [] + self._model_final = clone(model_final, safe=False) + self._n_splits = n_splits + self._discrete_treatment = discrete_treatment + self._random_state = check_random_state(random_state) + if discrete_treatment: + self._label_encoder = LabelEncoder() + self._one_hot_encoder = OneHotEncoder(categories='auto', sparse=False) + super().__init__() + + def _check_input_dims(self, Y, T, X=None, W=None, Z=None, sample_weight=None, sample_var=None): + assert shape(Y)[0] == shape(T)[0], "Dimension mis-match!" + assert (X is None) or (X.shape[0] == Y.shape[0]), "Dimension mis-match!" + assert (W is None) or (W.shape[0] == Y.shape[0]), "Dimension mis-match!" + assert (Z is None) or (Z.shape[0] == Y.shape[0]), "Dimension mis-match!" + assert (sample_weight is None) or (sample_weight.shape[0] == Y.shape[0]), "Dimension mis-match!" + assert (sample_var is None) or (sample_var.shape[0] == Y.shape[0]), "Dimension mis-match!" + self._d_x = X.shape[1:] if X is not None else None + + def _check_fitted_dims(self, X): + if X is None: + assert self._d_x is None, "X was not None when fitting, so can't be none for effect" + else: + assert self._d_x == X.shape[1:], "Dimension mis-match of X with fitted X" + + @BaseCateEstimator._wrap_fit + def fit(self, Y, T, X=None, W=None, Z=None, sample_weight=None, sample_var=None, inference=None): + """ + Estimate the counterfactual model from data, i.e. estimates functions τ(·,·,·), ∂τ(·,·). + + Parameters + ---------- + Y: (n × d_y) matrix or vector of length n + Outcomes for each sample + T: (n × dₜ) matrix or vector of length n + Treatments for each sample + X: optional (n × dₓ) matrix + Features for each sample + W: optional (n × d_w) matrix + Controls for each sample + Z: optional (n × d_z) matrix + Instruments for each sample + sample_weight: optional (n,) vector + Weights for each row + sample_var: optional (n,) vector + Sample variance + inference: string, `Inference` instance, or None + Method for performing inference. This estimator supports 'bootstrap' + (or an instance of `BootstrapInference`). + + Returns + ------- + self + """ + self._check_input_dims(Y, T, X, W, Z, sample_weight, sample_var) + nuisances = self.fit_nuisances(Y, T, X, W, Z, sample_weight=sample_weight) + self.fit_final(Y, T, X, W, Z, nuisances, sample_weight=sample_weight, sample_var=sample_var) + return self + + def fit_nuisances(self, Y, T, X=None, W=None, Z=None, sample_weight=None): + # use a binary array to get stratified split in case of discrete treatment + splitter = check_cv(self._n_splits, [0], classifier=self._discrete_treatment) + # if check_cv produced a new KFold or StratifiedKFold object, we need to set shuffle and random_state + if splitter != self._n_splits and isinstance(splitter, (KFold, StratifiedKFold)): + splitter.shuffle = True + splitter.random_state = self._random_state + + all_vars = [var if np.ndim(var)==2 else var.reshape(-1, 1) for var in [Z, W, X] if var is not None] + if all_vars: + all_vars = np.hstack(all_vars) + folds = splitter.split(all_vars, T) + else: + folds = splitter.split(np.ones((T.shape[0], 1)), T) + + if self._discrete_treatment: + T = self._label_encoder.fit_transform(T) + T = self._one_hot_encoder.fit_transform(reshape(T, (-1, 1)))[:, 1:] # drop first column since all columns sum to one + self._d_t = shape(T)[1:] + self.transformer = FunctionTransformer( + func=(lambda T: + self._one_hot_encoder.transform( + reshape(self._label_encoder.transform(T), (-1, 1)))[:, 1:]), + validate=False) + + nuisances, fitted_models = _crossfit(self._model_nuisance, folds, + Y, T, X=X, W=W, Z=Z,sample_weight=sample_weight) + self._models_nuisance = fitted_models + return nuisances + + def fit_final(self, Y, T, X=None, W=None, Z=None, nuisances=None, sample_weight=None, sample_var=None): + self._model_final.fit(Y, T, X=X, W=W, Z=Z, nuisances=nuisances, sample_weight=sample_weight, sample_var=sample_var) + + def const_marginal_effect(self, X=None): + """ + Calculate the constant marginal CATE θ(·). + + The marginal effect is conditional on a vector of + features on a set of m test samples {Xᵢ}. + + Parameters + ---------- + X: optional (m × dₓ) matrix + Features for each sample. + If X is None, it will be treated as a column of ones with a single row + + Returns + ------- + theta: (m × d_y × dₜ) matrix + Constant marginal CATE of each treatment on each outcome for each sample. + Note that when Y or T is a vector rather than a 2-dimensional array, + the corresponding singleton dimensions in the output will be collapsed + (e.g. if both are vectors, then the output of this method will also be a vector) + """ + self._check_fitted_dims(X) + return self._model_final.predict(X) + + def const_marginal_effect_interval(self, X=None, *, alpha=0.1): + self._check_fitted_dims(X) + return super().const_marginal_effect_interval(X, alpha=alpha) + + def effect_interval(self, X=None, T0=0, T1=1, *, alpha=0.1): + self._check_fitted_dims(X) + return super().effect_interval(X, T0=T0, T1=T1, alpha=alpha) + + def score(self, Y, T, X=None, W=None, Z=None): + n_splits = len(self._models_nuisance) + for idx, mdl in enumerate(self._models_nuisance): + nuisance_temp = mdl.predict(Y, T, X, W, Z) + if not isinstance(nuisance_temp, tuple): + nuisance_temp = (nuisance_temp,) + + if idx == 0: + nuisances = [np.zeros((n_splits,) + nuis.shape) for nuis in nuisance_temp] + + for it, nuis in enumerate(nuisance_temp): + nuisances[it][idx] = nuis + + for it in range(len(nuisances)): + nuisances[it] = np.mean(nuisances[it], axis=0) + + return self._model_final.score(Y, T, X=X, W=W, Z=Z, nuisances=tuple(nuisances)) + + @property + def model_final(self): + return self._model_final diff --git a/econml/_rlearner.py b/econml/_rlearner.py new file mode 100644 index 000000000..818077f19 --- /dev/null +++ b/econml/_rlearner.py @@ -0,0 +1,127 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. + +"""Double ML. + +"Double Machine Learning" is an algorithm that applies arbitrary machine learning methods +to fit the treatment and response, then uses a linear model to predict the response residuals +from the treatment residuals. + +""" + +import numpy as np +import copy +from warnings import warn +from .utilities import (shape, reshape, ndim, hstack, cross_product, transpose, + broadcast_unit_treatments, reshape_treatmentwise_effects, + StatsModelsLinearRegression, LassoCVWrapper) +from sklearn.model_selection import KFold, StratifiedKFold, check_cv +from sklearn.linear_model import LinearRegression, LassoCV +from sklearn.preprocessing import (PolynomialFeatures, LabelEncoder, OneHotEncoder, + FunctionTransformer) +from sklearn.base import clone, TransformerMixin +from sklearn.pipeline import Pipeline +from sklearn.utils import check_random_state +from .cate_estimator import (BaseCateEstimator, LinearCateEstimator, + TreatmentExpansionMixin, StatsModelsCateEstimatorMixin) +from .inference import StatsModelsInference +from ._ortho_learner import _OrthoLearner + +class _RLearner(_OrthoLearner): + """ + Base class for orthogonal learners. + + Parameters + ---------- + model_y: estimator + The estimator for fitting the response to the features and controls. Must implement + `fit` and `predict` methods. Unlike sklearn estimators both methods must + take an extra second argument (the controls). + + model_t: estimator + The estimator for fitting the treatment to the features and controls. Must implement + `fit` and `predict` methods. Unlike sklearn estimators both methods must + take an extra second argument (the controls). + + model_final: estimator for fitting the response residuals to the features and treatment residuals + Must implement `fit` and `predict` methods. Unlike sklearn estimators the fit methods must + take an extra second argument (the treatment residuals). Predict, on the other hand, + should just take the features and return the constant marginal effect. + + discrete_treatment: bool + Whether the treatment values should be treated as categorical, rather than continuous, quantities + + n_splits: int, cross-validation generator or an iterable, optional + Determines the cross-validation splitting strategy. + Possible inputs for cv are: + + - None, to use the default 3-fold cross-validation, + - integer, to specify the number of folds. + - :term:`CV splitter` + - An iterable yielding (train, test) splits as arrays of indices. + + For integer/None inputs, if the treatment is discrete + :class:`~sklearn.model_selection.StratifiedKFold` is used, else, + :class:`~sklearn.model_selection.KFold` is used + (with a random shuffle in either case). + + Unless an iterable is used, we call `split(X,T)` to generate the splits. + + random_state: int, :class:`~numpy.random.mtrand.RandomState` instance or None + If int, random_state is the seed used by the random number generator; + If :class:`~numpy.random.mtrand.RandomState` instance, random_state is the random number generator; + If None, the random number generator is the :class:`~numpy.random.mtrand.RandomState` instance used + by `np.random`. + """ + + def __init__(self, model_y, model_t, model_final, + discrete_treatment, n_splits, random_state): + self._model_y = clone(model_y, safe=False) + self._model_t = clone(model_t, safe=False) + self._models_y = [] + self._models_t = [] + self._model_final = clone(model_final, safe=False) + self._n_splits = n_splits + self._discrete_treatment = discrete_treatment + + + class ModelNuisance: + def __init__(self, model_y, model_t): + self._model_y = clone(model_y, safe=False) + self._model_t = clone(model_t, safe=False) + + def fit(self, Y, T, X=None, W=None, Z=None, sample_weight=None): + assert Z is None, "Cannot accept instrument!" + self._model_t.fit(X, W, T, sample_weight=sample_weight) + self._model_y.fit(X, W, Y, sample_weight=sample_weight) + return self + + def predict(self, Y, T, X=None, W=None, Z=None, sample_weight=None): + Y_res = Y - self._model_y.predict(X, W).reshape(Y.shape) + T_res = T - self._model_t.predict(X, W) + return Y_res, T_res + + class ModelFinal: + def __init__(self, model_final): + self._model_final = clone(model_final, safe=False) + + def fit(self, Y, T, X=None, W=None, Z=None, nuisances=None, sample_weight=None, sample_var=None): + Y_res, T_res = nuisances + self._model_final.fit(X, T_res, Y_res, sample_weight=sample_weight, sample_var=sample_var) + return self + + def predict(self, X): + return self._model_final.predict(X) + + def score(self, Y, T, X=None, W=None, Z=None, nuisances=None, sample_weight=None, sample_var=None): + Y_res, T_res = nuisances + effects = self._model_final.predict(X).reshape(-1, shape(Y)[1], shape(T_res)[1]) + Y_res_pred = np.einsum('ijk,ik->ij', effects, T_res).reshape(shape(Y)) + return ((Y_res - Y_res_pred)**2).mean() + + super().__init__(ModelNuisance(model_y, model_t), + ModelFinal(model_final), discrete_treatment, n_splits, random_state) + + @property + def model_final(self): + return super().model_final._model_final diff --git a/econml/dml.py b/econml/dml.py index 249d7c22e..02d2cc105 100644 --- a/econml/dml.py +++ b/econml/dml.py @@ -25,243 +25,7 @@ from .cate_estimator import (BaseCateEstimator, LinearCateEstimator, TreatmentExpansionMixin, StatsModelsCateEstimatorMixin) from .inference import StatsModelsInference - - -class _RLearner(TreatmentExpansionMixin, LinearCateEstimator): - """ - Base class for orthogonal learners. - - Parameters - ---------- - model_y: estimator - The estimator for fitting the response to the features and controls. Must implement - `fit` and `predict` methods. Unlike sklearn estimators both methods must - take an extra second argument (the controls). - - model_t: estimator - The estimator for fitting the treatment to the features and controls. Must implement - `fit` and `predict` methods. Unlike sklearn estimators both methods must - take an extra second argument (the controls). - - model_final: estimator for fitting the response residuals to the features and treatment residuals - Must implement `fit` and `predict` methods. Unlike sklearn estimators the fit methods must - take an extra second argument (the treatment residuals). Predict, on the other hand, - should just take the features and return the constant marginal effect. - - discrete_treatment: bool - Whether the treatment values should be treated as categorical, rather than continuous, quantities - - n_splits: int, cross-validation generator or an iterable, optional - Determines the cross-validation splitting strategy. - Possible inputs for cv are: - - - None, to use the default 3-fold cross-validation, - - integer, to specify the number of folds. - - :term:`CV splitter` - - An iterable yielding (train, test) splits as arrays of indices. - - For integer/None inputs, if the treatment is discrete - :class:`~sklearn.model_selection.StratifiedKFold` is used, else, - :class:`~sklearn.model_selection.KFold` is used - (with a random shuffle in either case). - - Unless an iterable is used, we call `split(X,T)` to generate the splits. - - random_state: int, :class:`~numpy.random.mtrand.RandomState` instance or None - If int, random_state is the seed used by the random number generator; - If :class:`~numpy.random.mtrand.RandomState` instance, random_state is the random number generator; - If None, the random number generator is the :class:`~numpy.random.mtrand.RandomState` instance used - by `np.random`. - """ - - def __init__(self, model_y, model_t, model_final, - discrete_treatment, n_splits, random_state): - self._model_y = clone(model_y, safe=False) - self._model_t = clone(model_t, safe=False) - self._model_final = clone(model_final, safe=False) - self._n_splits = n_splits - self._discrete_treatment = discrete_treatment - self._random_state = check_random_state(random_state) - if discrete_treatment: - self._label_encoder = LabelEncoder() - self._one_hot_encoder = OneHotEncoder(categories='auto', sparse=False) - super().__init__() - - @staticmethod - def _check_X_W(X, W, Y): - if X is None: - X = np.ones((shape(Y)[0], 1)) - if W is None: - W = np.empty((shape(Y)[0], 0)) - return X, W - - @BaseCateEstimator._wrap_fit - def fit(self, Y, T, X=None, W=None, sample_weight=None, sample_var=None, inference=None): - """ - Estimate the counterfactual model from data, i.e. estimates functions τ(·,·,·), ∂τ(·,·). - - Parameters - ---------- - Y: (n × d_y) matrix or vector of length n - Outcomes for each sample - T: (n × dₜ) matrix or vector of length n - Treatments for each sample - X: optional (n × dₓ) matrix - Features for each sample - W: optional (n × d_w) matrix - Controls for each sample - sample_weight: optional (n,) vector - Weights for each row - inference: string, `Inference` instance, or None - Method for performing inference. This estimator supports 'bootstrap' - (or an instance of `BootstrapInference`). - - Returns - ------- - self - """ - X, W = self._check_X_W(X, W, Y) - assert shape(Y)[0] == shape(T)[0] == shape(X)[0] == shape(W)[0] - - self._d_x = shape(X)[1:] - - Y_res, T_res = self.fit_nuisances(Y, T, X, W, sample_weight=sample_weight) - - self.fit_final(X, Y_res, T_res, sample_weight=sample_weight, sample_var=sample_var) - - def fit_nuisances(self, Y, T, X, W, sample_weight=None): - self._models_y = [] - self._models_t = [] - # use a binary array to get stratified split in case of discrete treatment - splitter = check_cv(self._n_splits, [0], classifier=self._discrete_treatment) - # if check_cv produced a new KFold or StratifiedKFold object, we need to set shuffle and random_state - if splitter != self._n_splits and isinstance(splitter, (KFold, StratifiedKFold)): - splitter.shuffle = True - splitter.random_state = self._random_state - - folds = splitter.split(X, T) - - if self._discrete_treatment: - T = self._label_encoder.fit_transform(T) - T_out = self._one_hot_encoder.fit_transform(reshape(T, (-1, 1))) - T_out = T_out[:, 1:] # drop first column since all columns sum to one - self._d_t = shape(T_out)[1:] - self.transformer = FunctionTransformer( - func=(lambda T: - self._one_hot_encoder.transform( - reshape(self._label_encoder.transform(T), (-1, 1)))[:, 1:]), - validate=False) - else: - T_out = T - - Y_res = np.zeros(shape(Y)) - T_res = np.zeros(shape(T_out)) - for idx, (train_idxs, test_idxs) in enumerate(folds): - self._models_y.append(clone(self._model_y, safe=False)) - self._models_t.append(clone(self._model_t, safe=False)) - Y_train, Y_test = Y[train_idxs], Y[test_idxs] - T_train, T_test = T[train_idxs], T_out[test_idxs] - X_train, X_test = X[train_idxs], X[test_idxs] - W_train, W_test = W[train_idxs], W[test_idxs] - # TODO: If T is a vector rather than a 2-D array, then the model's fit must accept a vector... - # Do we want to reshape to an nx1, or just trust the user's choice of input? - # (Likewise for Y below) - if sample_weight is not None: - self._models_t[idx].fit(X_train, W_train, T_train, sample_weight=sample_weight[train_idxs]) - else: - self._models_t[idx].fit(X_train, W_train, T_train) - if self._discrete_treatment: - # TODO: can we easily detect and flag if the number of classes in the training and test sets differ? - # we'll eventually throw an unhelpful error below, but it would be better to detect eagerly - T_pred = self._models_t[idx].predict(X_test, W_test)[:, 1:] - else: - T_pred = self._models_t[idx].predict(X_test, W_test) - if shape(T_pred) != shape(T_test): - T_pred = reshape(T_pred, shape(T_test)) - T_res[test_idxs] = T_test - T_pred - if sample_weight is not None: - self._models_y[idx].fit(X_train, W_train, Y_train, sample_weight=sample_weight[train_idxs]) - else: - self._models_y[idx].fit(X_train, W_train, Y_train) - Y_pred = self._models_y[idx].predict(X_test, W_test) - if shape(Y_pred) != shape(Y_test): - Y_pred = reshape(Y_pred, shape(Y_test)) - Y_res[test_idxs] = Y_test - Y_pred - return Y_res, T_res - - def fit_final(self, X, Y_res, T_res, sample_weight=None, sample_var=None): - if sample_weight is not None: - if sample_var is None: - self._model_final.fit(X, T_res, Y_res, sample_weight=sample_weight) - else: - self._model_final.fit(X, T_res, Y_res, sample_weight=sample_weight, sample_var=sample_var) - else: - self._model_final.fit(X, T_res, Y_res) - - def const_marginal_effect(self, X=None): - """ - Calculate the constant marginal CATE θ(·). - - The marginal effect is conditional on a vector of - features on a set of m test samples {Xᵢ}. - - Parameters - ---------- - X: optional (m × dₓ) matrix - Features for each sample. - If X is None, it will be treated as a column of ones with a single row - - Returns - ------- - theta: (m × d_y × dₜ) matrix - Constant marginal CATE of each treatment on each outcome for each sample. - Note that when Y or T is a vector rather than a 2-dimensional array, - the corresponding singleton dimensions in the output will be collapsed - (e.g. if both are vectors, then the output of this method will also be a vector) - """ - if X is None: - assert self._d_x == (1,), "X was not None when fitting, so can't be none for effect" - X = np.ones((1, 1)) - return self._model_final.predict(X) - - def const_marginal_effect_interval(self, X=None, *, alpha=0.1): - if X is None: - assert self._d_x == (1,), "X was not None when fitting, so can't be none for effect" - X = np.ones((1, 1)) - return super().const_marginal_effect_interval(X, alpha=alpha) - - def effect_interval(self, X=None, T0=0, T1=1, *, alpha=0.1): - if X is None: - n_rows = shape(T0)[0] if ndim(T0) > 0 else 1 - assert self._d_x == (1,), "X was not None when fitting, so can't be none for effect" - X = np.ones((n_rows, 1)) - return super().effect_interval(X, T0=T0, T1=T1, alpha=alpha) - - def score(self, Y, T, X=None, W=None): - X, W = self._check_X_W(X, W, Y) - X, T = self._expand_treatments(X, T) - if T.ndim == 1: - T = reshape(T, (-1, 1)) - if Y.ndim == 1: - Y = reshape(Y, (-1, 1)) - n = len(self._models_t) - Y_test_pred = np.zeros(shape(Y) + (n,)) - T_test_pred = np.zeros(shape(T) + (n,)) - for ind in range(n): - if self._discrete_treatment: - T_test_pred[:, :, ind] = reshape(self._models_t[ind].predict(X, W)[:, 1:], shape(T)) - else: - T_test_pred[:, :, ind] = reshape(self._models_t[ind].predict(X, W), shape(T)) - Y_test_pred[:, :, ind] = reshape(self._models_y[ind].predict(X, W), shape(Y)) - Y_test_pred = Y_test_pred.mean(axis=2) - T_test_pred = T_test_pred.mean(axis=2) - Y_test_res = Y - Y_test_pred - T_test_res = T - T_test_pred - effects = reshape(self._model_final.predict(X), (-1, shape(Y)[1], shape(T)[1])) - Y_test_res_pred = reshape(np.einsum('ijk,ik->ij', effects, T_test_res), shape(Y)) - mse = ((Y_test_res - Y_test_res_pred)**2).mean() - return mse - +from ._rlearner import _RLearner class DMLCateEstimator(_RLearner): """ @@ -332,25 +96,39 @@ def __init__(self, model, is_Y): self._featurizer = clone(featurizer, safe=False) self._is_Y = is_Y - def _combine(self, X, W, fitting=True): + def _combine(self, X, W, n_samples, fitting=True): if self._is_Y and linear_first_stages: - F = self._featurizer.fit_transform(X) if fitting else self._featurizer.transform(X) + if X is not None: + F = self._featurizer.fit_transform(X) if fitting else self._featurizer.transform(X) + else: + X = np.ones((n_samples, 1)) + F = np.ones((n_samples, 1)) + if W is None: + W = np.empty((n_samples, 0)) XW = hstack([X, W]) return cross_product(XW, hstack([np.ones((shape(XW)[0], 1)), F, W])) else: + if X is None: + X = np.ones((n_samples, 1)) + if W is None: + W = np.empty((n_samples, 0)) return hstack([X, W]) def fit(self, X, W, Target, sample_weight=None): + if (not self._is_Y) and discrete_treatment: + Target = np.matmul(Target, np.arange(1, Target.shape[1] + 1)).flatten() + if sample_weight is not None: - self._model.fit(self._combine(X, W), Target, sample_weight=sample_weight) + self._model.fit(self._combine(X, W, Target.shape[0]), Target, sample_weight=sample_weight) else: - self._model.fit(self._combine(X, W), Target) + self._model.fit(self._combine(X, W, Target.shape[0]), Target) def predict(self, X, W): + n_samples = X.shape[0] if X is not None else (W.shape[0] if W is not None else 1) if (not self._is_Y) and discrete_treatment: - return self._model.predict_proba(self._combine(X, W, fitting=False)) + return self._model.predict_proba(self._combine(X, W, n_samples, fitting=False))[:, 1:] else: - return self._model.predict(self._combine(X, W, fitting=False)) + return self._model.predict(self._combine(X, W, n_samples, fitting=False)) class FinalWrapper: def __init__(self): @@ -361,7 +139,8 @@ def fit(self, X, T_res, Y_res, sample_weight=None, sample_var=None): # Track training dimensions to see if Y or T is a vector instead of a 2-dimensional array self._d_t = shape(T_res)[1:] self._d_y = shape(Y_res)[1:] - fts = self._combine(X, T_res) + F = self._featurizer.fit_transform(X) if X is not None else np.ones((T_res.shape[0], 1)) + fts = cross_product(F, T_res) if sample_weight is not None: if sample_var is not None: self._model.fit(fts, @@ -380,13 +159,10 @@ def fit(self, X, T_res, Y_res, sample_weight=None, sample_var=None): UserWarning) self._intercept = intercept - def _combine(self, X, T, fitting=True): - F = self._featurizer.fit_transform(X) if fitting else self._featurizer.transform(X) - return cross_product(F, T) - def predict(self, X): - X, T = broadcast_unit_treatments(X, self._d_t[0] if self._d_t else 1) - prediction = self._model.predict(self._combine(X, T, fitting=False)) + F = self._featurizer.transform(X) if X is not None else np.ones((1, 1)) + F, T = broadcast_unit_treatments(F, self._d_t[0] if self._d_t else 1) + prediction = self._model.predict(cross_product(F, T)) return reshape_treatmentwise_effects(prediction - self._intercept if self._intercept else prediction, self._d_t, self._d_y) @@ -404,7 +180,11 @@ def coef_(self): @property def featurizer(self): - return self._model_final._featurizer + return super().model_final._featurizer + + @property + def model_final(self): + return super().model_final._model class LinearDMLCateEstimator(StatsModelsCateEstimatorMixin, DMLCateEstimator): @@ -503,7 +283,7 @@ def fit(self, Y, T, X=None, W=None, sample_weight=None, sample_var=None, inferen @property def statsmodels(self): - return self._model_final._model + return self.model_final class SparseLinearDMLCateEstimator(DMLCateEstimator): diff --git a/econml/inference.py b/econml/inference.py index 76983e825..d5f987256 100644 --- a/econml/inference.py +++ b/econml/inference.py @@ -110,14 +110,18 @@ def fit(self, estimator, *args, **kwargs): def effect_interval(self, X, *, T0, T1, alpha=0.1): X, T0, T1 = self._est._expand_treatments(X, T0, T1) - if self.featurizer is not None: - X = self.featurizer.fit_transform(X) + if X is None: + X = np.ones((T0.shape[0], 1)) + elif self.featurizer is not None: + X = self.featurizer.transform(X) return self.statsmodels.predict_interval(cross_product(X, T1 - T0), alpha=alpha) def const_marginal_effect_interval(self, X, *, alpha=0.1): - X, T = broadcast_unit_treatments(X, self._d_t[0] if self._d_t else 1) - if self.featurizer is not None: + if X is None: + X = np.ones((1, 1)) + elif self.featurizer is not None: X = self.featurizer.fit_transform(X) + X, T = broadcast_unit_treatments(X, self._d_t[0] if self._d_t else 1) preds = self.statsmodels.predict_interval(cross_product(X, T), alpha=alpha) return tuple(reshape_treatmentwise_effects(pred, self._d_t, self._d_y) for pred in preds) From d9449e0aa6669354d46fdd110dc4e5a1147ecf0d Mon Sep 17 00:00:00 2001 From: Vasilis Date: Fri, 1 Nov 2019 21:44:36 -0400 Subject: [PATCH 02/64] notebook for ortholearner testing --- notebooks/OrthoLearner.ipynb | 377 +++++++++++++++++++++++++++++++++++ 1 file changed, 377 insertions(+) create mode 100644 notebooks/OrthoLearner.ipynb diff --git a/notebooks/OrthoLearner.ipynb b/notebooks/OrthoLearner.ipynb new file mode 100644 index 000000000..972a491a5 --- /dev/null +++ b/notebooks/OrthoLearner.ipynb @@ -0,0 +1,377 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2\n", + "from econml._ortho_learner import _crossfit" + ] + }, + { + "cell_type": "code", + "execution_count": 107, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 107, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from econml.dml import LinearDMLCateEstimator\n", + "from sklearn.preprocessing import PolynomialFeatures\n", + "from sklearn.linear_model import LinearRegression, LassoCV, Lasso\n", + "import numpy as np\n", + "X = np.random.normal(size=(100000, 3))\n", + "y = X[:, 0] + np.random.normal(size=(100000,))\n", + "est = LinearDMLCateEstimator(model_y=LinearRegression(), model_t=LinearRegression())\n", + "est.fit(y, X[:, 0], X[:, [1]], X[:, 2:], inference='statsmodels')" + ] + }, + { + "cell_type": "code", + "execution_count": 108, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[ 0.04347998, -0.15620713, 0.8605264 ]])" + ] + }, + "execution_count": 108, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "X[:1]" + ] + }, + { + "cell_type": "code", + "execution_count": 109, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(array([0.99336201]), array([1.00388333]))" + ] + }, + "execution_count": 109, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "est.effect_interval(X[:1, [1]])" + ] + }, + { + "cell_type": "code", + "execution_count": 110, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([9.98645400e-01, 1.45519805e-04])" + ] + }, + "execution_count": 110, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "est.coef_" + ] + }, + { + "cell_type": "code", + "execution_count": 111, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(array([ 0.99344973, -0.00509725]), array([1.00384107, 0.00538829]))" + ] + }, + "execution_count": 111, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "est.coef__interval()" + ] + }, + { + "cell_type": "code", + "execution_count": 112, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([0.99862267])" + ] + }, + "execution_count": 112, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "est.const_marginal_effect(X[:1, [1]])" + ] + }, + { + "cell_type": "code", + "execution_count": 113, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([0.99862267])" + ] + }, + "execution_count": 113, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "est.effect(X[:1, [1]])" + ] + }, + { + "cell_type": "code", + "execution_count": 114, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([-4.99311334])" + ] + }, + "execution_count": 114, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "est.effect(X[:1, [1]], T0=5, T1=0)" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "ename": "AttributeError", + "evalue": "'NoneType' object has no attribute 'shape'", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mAttributeError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[0mX\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mNone\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 2\u001b[1;33m \u001b[0mX\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[1;31mAttributeError\u001b[0m: 'NoneType' object has no attribute 'shape'" + ] + } + ], + "source": [ + "X=None\n", + "X.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(1, array([[1., 1.],\n", + " [1., 1.],\n", + " [1., 1.],\n", + " [1., 1.],\n", + " [1., 1.]]), 3)" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import numpy as np\n", + "x = (1,np.zeros((5,2)),3)\n", + "x[1][:]=np.ones((5,2))\n", + "x" + ] + }, + { + "cell_type": "code", + "execution_count": 86, + "metadata": {}, + "outputs": [ + { + "ename": "AssertionError", + "evalue": "\nNot equal to tolerance rtol=0, atol=0.08\n\nMismatch: 33.3%\nMax absolute difference: 0.10025215\nMax relative difference: 1.\n x: array([1., 0., 0.])\n y: array([1.100252, 0.043722, 0.018593])", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mAssertionError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 24\u001b[0m \u001b[0mcoef_\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mzeros\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mX\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m 25\u001b[0m \u001b[0mcoef_\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;36m1\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m---> 26\u001b[0;31m \u001b[1;33m[\u001b[0m\u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtesting\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0massert_allclose\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcoef_\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mmdl\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_model\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcoef_\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mrtol\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0matol\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m0.08\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mmdl\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mmodel_list\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m 24\u001b[0m \u001b[0mcoef_\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mzeros\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mX\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m 25\u001b[0m \u001b[0mcoef_\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;36m1\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m---> 26\u001b[0;31m \u001b[1;33m[\u001b[0m\u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtesting\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0massert_allclose\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcoef_\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mmdl\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_model\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcoef_\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mrtol\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0matol\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m0.08\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mmdl\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mmodel_list\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32mC:\\ProgramData\\Anaconda3\\lib\\site-packages\\numpy\\testing\\_private\\utils.py\u001b[0m in \u001b[0;36massert_allclose\u001b[0;34m(actual, desired, rtol, atol, equal_nan, err_msg, verbose)\u001b[0m\n\u001b[1;32m 1491\u001b[0m \u001b[0mheader\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;34m'Not equal to tolerance rtol=%g, atol=%g'\u001b[0m \u001b[1;33m%\u001b[0m \u001b[1;33m(\u001b[0m\u001b[0mrtol\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0matol\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m 1492\u001b[0m assert_array_compare(compare, actual, desired, err_msg=str(err_msg),\n\u001b[0;32m-> 1493\u001b[0;31m verbose=verbose, header=header, equal_nan=equal_nan)\n\u001b[0m\u001b[1;32m 1494\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m 1495\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[0;32mC:\\ProgramData\\Anaconda3\\lib\\site-packages\\numpy\\testing\\_private\\utils.py\u001b[0m in \u001b[0;36massert_array_compare\u001b[0;34m(comparison, x, y, err_msg, verbose, header, precision, equal_nan, equal_inf)\u001b[0m\n\u001b[1;32m 817\u001b[0m \u001b[0mverbose\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mverbose\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mheader\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mheader\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m 818\u001b[0m names=('x', 'y'), precision=precision)\n\u001b[0;32m--> 819\u001b[0;31m \u001b[1;32mraise\u001b[0m \u001b[0mAssertionError\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmsg\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 820\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0mValueError\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m 821\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mtraceback\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[0;31mAssertionError\u001b[0m: \nNot equal to tolerance rtol=0, atol=0.08\n\nMismatch: 33.3%\nMax absolute difference: 0.10025215\nMax relative difference: 1.\n x: array([1., 0., 0.])\n y: array([1.100252, 0.043722, 0.018593])" + ] + } + ], + "source": [ + "from sklearn.linear_model import LinearRegression, LassoCV, Lasso\n", + "import numpy as np\n", + "from sklearn.model_selection import KFold\n", + "\n", + "class Wrapper:\n", + " def __init__(self, model):\n", + " self._model = model\n", + " def fit(self, X, y, W=None):\n", + " self._model.fit(X, y)\n", + " return self\n", + " def predict(self, X, y, W=None):\n", + " return self._model.predict(X), y - self._model.predict(X), X\n", + "\n", + "X = np.random.normal(size=(1000, 3))\n", + "y = X[:, 0] + np.random.normal(size=(1000,))\n", + "folds = list(KFold(2).split(X, y))\n", + "model = Lasso(alpha=0.01)\n", + "nuisance, model_list= _crossfit(Wrapper(model),\n", + " folds,\n", + " X, y, W=y, Z=None)\n", + "np.testing.assert_allclose(nuisance[0][folds[0][1]], model.fit(X[folds[0][0]], y[folds[0][0]]).predict(X[folds[0][1]]))\n", + "np.testing.assert_allclose(nuisance[0][folds[0][0]], model.fit(X[folds[0][1]], y[folds[0][1]]).predict(X[folds[0][0]]))\n", + "\n", + "coef_ = np.zeros(X.shape[1])\n", + "coef_[0] = 1\n", + "[np.testing.assert_allclose(coef_, mdl._model.coef_, rtol=0, atol=0.08) for mdl in model_list]" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXYAAAD8CAYAAABjAo9vAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAIABJREFUeJztnX10VOd957+/O7oSI0gk8XI2RgLj\npDm4wRBYKw5noenaTkwax4pibyExaXKatE7Wp3VwEhFwXCO89loJaUzc1tt4HWfrGrvIgSgQNwfH\ngbQr7+JYRLyEGNrYrg2DswXDEIMGaTTz7B+jO7pz53nuy9y3mdHvc44P1sydex/NjL739/xeSQgB\nhmEYpn7Q4l4AwzAMEyws7AzDMHUGCzvDMEydwcLOMAxTZ7CwMwzD1Bks7AzDMHUGCzvDMEydwcLO\nMAxTZ7CwMwzD1BkNcVx09uzZYsGCBXFcmmEYpmY5cODAGSHEHKfjYhH2BQsWYGhoKI5LMwzD1CxE\n9Jqb49gVwzAMU2ewsDMMw9QZLOwMwzB1Bgs7wzBMncHCzjAMU2ewsDMMw9QZsaQ7MgzDhMHAcApb\n9hzHqXQGc1uT6Fm1EN3L2uNeVuQEJuxElAAwBCAlhPhoUOdlGIZxw8BwCht3HkEmmwMApNIZbNx5\nBACmnLgH6Yr5IoCXAjwfwzCMa7bsOV4UdYNMNocte47HtKL4CETYiagDwI0AHg3ifAzDMF45lc54\neryeCcpi3wpgPYB8QOdjGIbxxNzWpKfH6xnfwk5EHwXw70KIAw7H3UZEQ0Q0dPr0ab+XZRiGKaFn\n1UIk9UTJY0k9gZ5VC0O/9sBwCiv69uKKDc9gRd9eDAynQr+mHUEET1cA6CKijwCYBuDtRPSEEOJT\n5oOEEI8AeAQAOjs7RQDXZRimzvGS5WI8HnVWTDUGbUmI4DSWiP4zgK84ZcV0dnYK7u7IMIwdVsEE\nChb4AzcvrqoslxV9e5GS+PHbW5N4fsN1gV6LiA4IITqdjuMCJYZhqpI4slwqcamogrOpdCY210yg\nwi6E+BnnsDMMEwRRZ7kYO4RUOgOBSZeKkyjbBWe9nCdI2GJnGKYqiTrLpdIdgixoayXqfHoWdoZh\nqpKos1wq3SF0L2vHAzcvRntrElTB+cOAe8UwDFOVRJ3lMrc1KQ2CWncIqkwdY12qYGqU+fQs7AzD\nKIm7qZZZMMOmZ9VCaRaOeYfgJrXRzXnChoWdYRgpKhEbeu0s9h07HarYx3FDcbNDsPPDG8fFlU9v\nJtA8drdwHjvDVD8qlwKhkO1hEHRueTXnr1+x4RnIFJMAvNp3Y+jX5zx2hmF8oQr2WYUt6IwPL9kp\nUZfy10o/GnbFMEzAROFGiOIaqmCijCAzPtxmp8RRyl8N/nM3sMXOMAFSaZFLtV0DkKcbqtL5grRY\n3VrFcVSmWlMb21uTVeEissI+doYJEDd9Q/xa21H2JrGu9dor52DHgVSo/m+Zj93w67eb3i+VvxsT\nx9XjeDy3PnZ2xTBMgDi5EYJwH0RZai9LN+y8fGaobiBzVkkqnSkJ1prfL5WriCaOsx5fL+LuBnbF\nMEyAOLkRgnAfxB3A617Wjuc3XIdX+27E8xuuC0UwjWu0JnVlsFblKgo7uFsLsMXOMAHiFFwLwtp2\nW0gTZx51pZjX3ZLUkc5kpcel0hlpvrgq2DvVxuOxsDNMgDgVp7gtW/dzjWoc/OAG67pVog4ULPOB\n4VSZq6gayvnNxHWD5eApw0RIFMU3UQZXg0S1bhWy36eaipvCWAsHTxmmComi3NxLHng1uWu8uktk\nx0ddzm/3HrppPxAWLOwMEzFhN7Zy4+6pRneNl4IoAGht1ssei/Jm5fQeRj0oxAxnxTBMFVNJybxT\nH/OB4RS+3H8o8uIeJ9wMrDBz4dJ4yfsRVeGWgVOG09zWJLq0QQw23oFXmm7FYOMd6NIGI/H3s8XO\nMFWKV6t6YDiF3l1Hi0FHjYC8KC3qMc6ZU8TWgrImK7GcvWS5AEA2L0rcGlG7Pk6lM+jSBrG+oR9z\n6QxOidn4xvhq7E6vBABsfc+/4qoDjyJJYwCADjqDr+uP4pfvWQAg3FgHCzsTGdXm0612vAjVwHAK\nPU8fQjY/Kdh5AegJcvT7mnE7VMIOP24et1kuBuYbUdSuj8/M+DnWZx9Fs0m4+/RHMVNvBA5fxPuG\nNwJU+l4naQzve/mvAHw+lDUZsLAzkVCNPt1qx4tQbdlzvETUDbK5UqvWSeRS6QwWbHgGbc06blxy\nWUn7ALefWZCWsyxn34z5RhREKikO9wM/vRc4fxJo6QCuvwdYslp6zKbxEyBL85xmGsNG+l/A7lFA\nKG6g50+6X0+FsI+diYQ4GjbVOl4qTO0E2/ycW5E7N5LFE/tfr+gzU1nYXgKjBkbTrTZJoNRalOV7\nRurhfmD3HcD5EwAEcP4EMjv/DF+8a+NkfMN0jKohWlM2DWRtfteWDnfr8QELOxMJcWYI1CpehMpO\nsM3PeQ1QynD6zBJWM9bhcTsMV1B6JIu2Zh2tSV3ZVdFv58XR3T1lgpzEKHoa+ou7lZEf32Mv2k7o\nycIuIGTYFcNEQiDb5Bql0tiCl5zsnlULy3zswKSP3e6cXi1pp89MFZhVPa7C6r47N5JFUk/gwTVL\nle+fm1RS8+fR2qxDCOADo/vwbT0t7Us8l94EUNitTMv8xtPvUAIlgJseKnfthAALOxMJtTKgIGj8\nxhbc5rwbx5izYtqadWy6aVHZ663nXLDhGeV5ZWPwnD6zdsXNot3jTTwIX71T2+FzI4X3an1jf5m/\n3OCUmDX5//lZ6NDOePo9ABQs9YhEHWBhZyKiGgb8xkGUKXiVFj612jTbWrt8vufB1UHdxP2672Q3\n1W37X5f2cJ9LcrEWAvjG+KQYP9r4KfTSdzy4Y0gdhA0RFnYmMsKuuKxGgo4thJEy2tu1SOrGAYB9\nx057vobsJn7tlXOwZc9xrNt+EAki5IQoya+X0dqsFy1qM27dd7KbqvU3NPLQVd7/PICt+sNYL/qx\nFZ/A0htvBxKLJjJnTtgvoPNzwEe/5WqtQcPCzjAh4jW2YCfcQaeMWlvkEqFMSK3XcHtjsQ7LMFvK\nhq/dbv0DwylcuDRedl5rzMAOVQHRrnyhgKhLG0SfPpmHbkUIIDGh+B10Blvw16BUpiDWS1ZPZshY\nrXfSgKv/ODZRBzgrhmFC5dor57h+3KkkPsiUUeu10pmsVEjN1/BSsm8+Fii3lJ3Wr8rLn97Y4Pom\n9pkZP0ef/ig6tDPQCOjQzmCr/jDubXgMALC+oV8q6kIU1mv1uRMADD1WEHSgIO43PQS0zCs82zIP\nuPl/ApvOxSrqAAs7w4TKvmOnpY//6NAbZY85CXeQbh3ZtWRCar6GlxuLU4Wr9dxuHgOA89ZYwOF+\n4MGrgN7Wwr+G6AJYr28vE26NgD9qeA4Hp30e7Qq/Okg9tBsQBTeMwZLVwJ2/BHrThX8j9KPbwa4Y\nhgkRlUClM9nioAinY1PpDAaGU4GmjFaS4ujlxuLlZtOSLC8+cvxdD/cDP/4qkDk7+eT5EwXXyOv7\ngX99Fs2Z8psnUBDtVrylVG/HbPsIKkf9whY7w4SIneiaLd2B4RQ0mwKejTuP4Nor5/irrDThtVio\nZ9VCT5WwXm42sqXYFmcZvm2zqBtkM8DQd50DmwpcZdpLKkcr6cIZJizsDBMidqJr9GX53b/4MXq+\nf8i2gCeTzWHfsdO+KivNeC0W6l7W7qkS1kuFa1qS+WJbRfrTe/1Vf6pomYez+Rn2x0gqR6NuF+wG\nHo3H1Cy10i1y2b3PStP2vEIAXu270f+C4H0MXYIIy9/Zhl+98Vbxd2lN6ujtWlSStWNNcTTnwF8c\nHbedY5ogwiffPw/3dS+2X0xvK1za1u5pmQfc+Uv03rcJ67MPl/jm86Kwq6CWedJ89ChHEfJoPKau\n8ZL6F/cNYNNNi2w7FLqlkhRJFbIiIl0r5JfLYqg5IfD8y6Wuj/OZLNZtP4gte46XVXSm0hnsOJAq\n2VHIZoBar/HE/tcBAPe98yV1l8WWjopdLUjOBMYuADlTUNVkhS+98Tbc84NxrBP/gLn0Jk6JWdiK\nT2Bl9+3K97Qa+yD5FnYimgfgcQDvQCGf/xEhxLf9npdh7HBb0VkN7YKtOd2VQJC7dSr5/YwbQSab\nKysW8rJOQ/9VFZ3Wz8Pt+3DhxaeAo49NulvOnwB2/inwg88DIl8QZ00H8hXsgv7g64V/FTeNwhpv\nx5o917u+UVZjHyTfrhgiugzAZUKIXxDR2wAcANAthPiV6jXsimH8csWGZ6Sbcau7QrVNbmvW0dzY\nEKkVb4yk8+rfBoBmXUMmmy9bq1c3gMxqTuoJPHBzwf1hWP5BOTpk7iMny32w8Y7K+rG4IdEIfOxv\nAk1LtHtPg/5OReaKEUK8AeCNif9/i4heAtAOQCnsDOMXt1aSajt8biRb9BVHYcU7jaRTQQAaNMJI\nNg+gsoHJZleNNmGhm8lkc+jddRSj43nf7iIrMqvVcYqTKr88CHJjhTTJAIW9GvsgBepjJ6IFAJYB\neEHy3G0AbgOA+fPnB3lZps6R+ZDdNppy25Y2zNmYfix1oLxwyLxWpxvc3QNHpOX8VuyCmm6xdoIk\nFG5EK/r2lgidk+/5lJiNjjDFXZYm6ZNq64MUWLojEc0AsAPAOiHEb63PCyEeEUJ0CiE658yRl1kz\njBVVKhkAV6l/XtLunASnklzlSi11A9WrjLXapSAODKeU3QyDhlDoBGm05jWLvPGZvbjrO8CDV+Hl\naWsx2HgHurTBsvN0aYOYTWlU6iEe1VvxG8xBXgSeN1NTBGKxE5GOgqhvE0LsDOKcDAPYB0mf33Cd\nq+HIxnmc0u7sgl2yIOW67QexefdRac9zu/UHgXmtTQ1a8RrmHuwr+va6EreknsA0XXOVktnWrONS\nttRlY4i6kaYo8/t/KPdPuOoX3wUwCg2Fvi19+qNAFiVNub6pP4JGkvescUIA+P7o+/D7GAaokKaY\nkNVhJWdWdP5aIoisGALwXQAvCSHi7XzD1B1BpJJZt8mqYJddMZFKoM+NZG3985VmwdihAUiPjEkH\nZFya8MUD7t4jIxcdgGNKZlJPYNNNhWPt/Mmy665v6EcSoyWPNdMY1jf0Y9dYQdh79ccrFnUAGEET\nPk7/VJKDLkRpZesYEjj0uxvwvoqvUhsE4YpZAeCPAFxHRAcn/vtIAOdlGE9l7G6pZDamnUiqGmEN\nDKcc+454nwJayCm+OCYXYPNa3LxH05saijc+63vyqQnXivU9MqpQjf4xRudHA+O6XdogBhvvwCtN\ntyobbrVrZ3Cg8Ta82nQr2nDB0/tQgp7Epbxe1vSLCBgXGvKCcDI/G18Z+zw+/eLlsZf8h00QWTGD\nqOz7yTCOBDlSr2CpH0bGZNU265qrDAanIKxM+LfsOe7oCgnDD2w0DZO9d6pjDcE2FxNt3n206J65\nODppSTvlzvesWojBHzyMe0nd67yIAGZpPgQdKM4Sbdt5m/RpDQLvHN02+UA+vEB5tcC9Ypiqxu/k\neYOB4RS+tP1giagDwEg2jy/1HyxacKoAqVMQVtahMM7KQ8P/f8vV7Y6zRu/cfhB3Dxwp/jwwnELP\n9w+V+NzTmSx6nj5UUtxkxrxT6F7WjvubnnAWdcgbgHlG5IElq3Ep+Q7p0+aZpcXHYvxsooBbCjBV\nTxCpZFv2HEde8VxeTHZadKrivLP/oDRjQyZQblMtrWmCZrSJIGAlnBvJFsv6AbUPXQDYtv91dF4+\nE93L2gtDLnLlF83mRdG3LuPUhPV/8JlHsCmbjm4fP9FtsfkP7sX4D/8cDblLxadGRGPJzFKDOKtC\no4AtdmZK4Jg77WKQRPeydmUanqxDodtUSzvdfvu08p2AF8w574bAq9YgG+hh9pMPNt6Bq3/7E8xt\nTZY9vrnhMQw23YGugffgnuzWYCxxN5i7LS5ZjYaP/RVGkpcVfeobsn9SzLoxULVnqIRqa9drwBY7\nMyVwsp7dDJKw/aMllA3OsKZaejW8CeWFQ23NOm5cchmeOfyG646RZj+6XZ8W4/c03ivrTNAOKqQo\nHmp+A+/NPFPy+KfpuVDFPA9A6/wcMH+5ujkYACxZjQ/942ykRtWf9drl8wPxr1dDHyIVbLEzU4Ke\nVQuVX3aN3A2S6N11VHl+IVDWg9tcMdvarNt6JqzP2bln9h07jfRI1tF3bmbjziO4e+AIRsbU6YTG\n72lYs7KZoM00hve9+UNp9kmQCAG8mZ9RtLzvHLt9coi0wyg6u92ZniB0Xu4+j93OIg9yBm3QsLAz\nU4LuZe341pqlSOqlX/lmXcO3Vi91HCQxMJxyLLs3/1EbAUijYvbcSNbWYjc/19asK489N5ItqcJ1\nSyabwxP7X7e18i+OjpfsOlQ9WxLKaEVwCABXjz2Cd45uw8qxhzD09g+5fq2d/zybE66F12mARjW2\n6zVgVwwzZXAKwqqaOQEobrGdMP6oN+8+Kg1AukHmrw+bLm0Q63P9mDtwBid/MBvdidXh92yx4ZSY\nXfx/r+mtTmmeboXXqTV0NbbrNWBhZ2qWMAZoWMV9y57jGBkbd90WwPij9jMxKeoeJzJf+rfo4YI7\nSJS7WcIOjI6KBP5GuxUEVPS5GseqGq+5FV4nizzIGougYWFnapKwAley83phZGy8ajIj3CLzpWsx\nlBwKAZwVM7B5/NP4SWIFHlxTXq9g3MxT6UzZkBBZ4NqP8DpZ5NXYrteAZ54yNYX5D1uG3zmTQcwn\nLfjpRVkxVLXyStOtsQi5lbygkgrRBBHyQpS5xGS7J9VgCz+7uigHaLiFZ54ydYfT5B3AX+BqYDgV\nyNDpTDaHtmYd4zlR1ks9Trq0Qaxv6C/p25JH9fQDsVaIGm6UVDqDnqcPYca0BuVnr+qn76e4rZot\ncidY2JmawO2wCj+BqyDT1NIjWTy4ZqmvARt2JDRCY4KKuwKiCX845D56qx+9eJ7AV+bMuNCgIV+y\nS1BViBpk88LxphtGNkq1DdBwCws7EymVbI3dDqvwG7gKUhg0Ity5/WBogdBcXiBj2g00EEFvmByh\nZ6ZLG8S39L9FA8XnGspP3HRSYnZRwL+q92MuvYk3MAt92dVlFaJeqYZslGqBhZ2JjEoDnm6GVcgC\naF5x29vFDWFY6XZk83K3j2GpxyXqQgCXRAJfHf98mXA/l/8ABMjTIJKknlD62KshG6Va4AIlJjIq\nrdSzs6STegJb1yx1NU3JiTiFIWg/t9HL5dv6w666LIYFEfAm2qTW+EhWPjxbFchta9aLnT6Ncxs0\nNbCUmWGLnYmMSiv1VJY0Ab4zFKyuIS+0JnW8dWk8EOtcAJjemFAO0FBhBETn0hnkoSGBPPIgaBDR\nNeJyYC696en4vCiU/psLvPQEFUf+dS9rLw7pNkhnSidZhVHjUEvwbY6JjEqnIfWsWghdYsY1SAda\nTuLUeU9WMq46o/XxpJ5Ab9ci5AN0uVQi6n36o+jQzkAjoIHyIAISVD2iDhSyXWSfnwoCsOZ980p6\n8G/5L+8tGQIiG9Jt7P6cWgFMBTiPnYkMP3nBqvxyVd663bUA2ObCqzAXxCyYlcT+V85F7ks3M9h4\nBzq0eEr+zYwLDRoJnMtPx9voUsnc0hHRWGyda7x/brCrR5ANyzYwqlVlz/utcagGOI+dqTr85AWr\n+qek0hms6Ntbdj6VP/9L/QeRIKoovzwnBHSNsGBWEs+/fNbz64NG1aQrTKwtBszC3ZrU8YHRfROu\noTdxSszCN8Yns1283ATt3HN2z7lpvzwVYGFnIqXSvGC7jBXjcXOWjeqPOC/gy32SzYvIRb1LG0Sv\n/nhx2LNAvEVFb+ZnoI0ulgk3EfCTxO9j15i/tEXA3j1nF3PpWbVQuRubSumQ7GNnagK304gy2RzW\nbT8IrZqczBXSpQ3iQONt+Lb+MGbSBRAVxFOb+DeOX5EIyGBasZ2uOdslPZItmU9bKU6pi7LvAmFy\ngIZd++WpAlvsTOy4yWCwunE0B39tnL7vSjBnt5wSs/HT/FL8YeKfY01VVKHKcmlt1kt2ZE6+cNkn\n1JrU0du1qKL2ysbjtdwKICg4eMrESqUB1Ss2PBN5e9uwkJX7y9rlho0hBU7XPZmfjZVjD5U9rmuE\nLX9Ymr2ybvtB6TkIwINrlk5p8a0EDp4yNYFT0dLm3UeL2TBmay7IKtGosVrnrfRW6KPm3GC+phCT\nvnxrsFTV0yWbFyWNuLqXtZd8fmbmtiZrtg9LLcA+diZWVEHOVDqDnu8fKhGFdCaLnqcPFcbOufS5\nR4kbLe7SBrFF/04x97xDO4PpGA19bV4hKkwx+mL2dpzMzy7OHjUyYFRYb7abblo05f3dccAWOxMJ\ndw8cwVMvnEBOCCSI8Mn3z8N93YvRktSls0SJIB0tl80LfLn/EP5y9XvxwM2Li1v5lqSOi2PjFY+j\n80tbs44bl1yGJ0zVkDI2NTyOJirdoVRrnHcuvYndYqWnLBcCinNTjdhJJpuzHYrBBA8LO+MaN0FO\n2TFDr50tEbycEMWfVaJmF/rJCYGNO4/ggZsX4/kN1xWvmc5kPRXBBMlvM+N4Yv/r0KiQUmnFcL/M\npAuRr83A/LacwwwIAbTRReRB0iZhp8QsEIAGrTTvX08QdE3eSVJgsv2xOXZifCYXR8fLXsMEDwdP\nGVe4CXKqjrk0npMKtTEhx8830PABm8VUTxDG88L25hAVXdog7m/4LmbQaKyW+bjQ8KXsF6RuFFnw\n1lx4ZL5ZJXUN0/QE0iNZ5edmV/1ZOEe8U4hqGbfBU/axM65w05lRdYxKYHMTY89ktCZ16A69YICC\nhWi1kLO5+EW9SxvEL5oKOehv0+IV9RHRqBR1ANiVX4kN2T9R+tLN728mm8c5G1EH7Ks/C+fIoXfX\n0Up+FcYlLOyMK9yUaXst2U4QSYOgukZFH3s1zOL0yuP6/SVFRXEhBHBBNDkGPIGCuK8ce0haeOQF\nIzDqVOWZzmSnVFOuqGFhZ1zhpjOj6phmXf41++T756F7WXtJtWJrUgcIxWyYvCgIfaJKFN7oc/5K\n060YbLwDXdpgyfObGx7D72lHYw+ICgE8nvsgrhr9nu/JRG4wujAaLhY3WUtBjiJkSmEfO+MKPz72\nB25ejKHXzkqzYqyoqhVVlYpRYueLvlr7F6xN7EUC+dhFfUw04CvZ23wLuioQbMWuw+bXfnDEth1x\n+4TbxqlAaar3Vzdw62NnYWdcU2lWjJc/wGquKFW1yTXmecYl6HkBpDEDrShvzOWH1qSO0XH5lCMD\no4JU9Rl7aSugCqr6afdcb7CwM6ERpvWkEoK40hjNvNJ0a9X5/IUAvpi9HbvyK9HemkR6ZMzzwA4V\n5rJ/YwiJsDy/dvl86c7LwOuNWmb9q74T9dBf3SuRthQgog8D+DaABIBHhRB9QZyXqT4qHUjtlp5V\nC6XW2X+c3+KpXW57CC0HTonZ6IihB7qBtX9MXgB/n/tg0ToP+ve1lv073dCtz1975RzHZm1WZAF4\n7q/uHd/CTkQJAH8D4EMATgJ4kYh2CSF+5ffcTPVhl/boRtidxEHVmc9roM0oWb9z+0Hfrh2juKid\nzsTSnMtgFAmcybdJh1gEjazs3663i+yGr6rCTeoJTNM1ZQ8Z2WNTvb+6V4Kw2K8B8GshxCsAQET/\nAOBjAFjY6xC73i5XbHjG1jXj1tqXCcidii6BKnp3HcXBTTdg6LWz0vmYbnlcv78qslyygrA++/lI\nMlwA4JarvTXo6t111NYXb5AgKo4nlO3MZD1kVLs47jejJoh0x3YAJ0w/n5x4jKlD7Kwkp8HBboqc\nKrmuDKP/zH3di/HgmqVo9/B6I6Xx1aZbYxH1Qv75NLyZn1EsGPpy9r9GJuoA8NQLJ1znmQ8Mp6T9\nfmTkhSjeuM1pruZUSStejmUKBGGxy772ZQYSEd0G4DYAmD9/fgCXZeJAZj1ZUblm7Kx9o3GUimuv\nnOPYYMuKdRaqamSauY3uCKahGZdiC5Kag6FxYvTjAZxjJ17cZOYbtJe2vdzi1xtBCPtJAPNMP3cA\nOGU9SAjxCIBHgEJWTADXZWLA6gNXfZAyEbfrH7Jx5xEMvXYW+46dLvO/DwynsOOA9ypF6yxU2c1o\nc8Nj+HTiuaJVPgOXPF8nSFJidpmotyo6YIaNeTdlFxdxG8Rk90l0BCHsLwJ4NxFdASAF4BMAbg3g\nvEwVoAp2Oo0/k7lO7Kz9TDZX4gs3+99lLhyvmF9vttDjzD+3IhtiYbS5tXbIrARZ3ngun8eYTatj\n601RFhdR3bCnNybQ2tw45YuK4iCQPHYi+giArSikOz4mhLjf7njOY68N7h44UhZ4dFNtaghI+0TK\nm9kK9+pSMSoT/XxLzVktxTVWgZiPiQa8JaahjYItLJKR1BO45er2sh2RU9aQqn7AnEPOBUTRwQVK\njC8GhlPKP3prYYhh1cuKWKzoCfI0DMOpBayKarXKCyPnKHQhlyEbclHJwGnjuVf7biz+rBqkwgQL\nt+1lfLFlz3HX/vPuZe14fsN1aG9NOlrWXicctTbrOHfR2+g4o6eLMX6uWkQdKPjQ/XZQrPjakowl\nO5+3QGEylAyzq82IgRiWfU4I7DiQ4u6NMcLCzkixs5BVqYdhVAKeH8lKJ/XYafX6hv6y4dBRI0T5\nFCi7QdBRYU0v7V7WXuioKaGtWceFS+UTj/QEldwQ/KSxMuHAo/HqlEr7uQwMp7B5t3oIAmHSyrNe\nQzW/1A/lkl7AdtBDTGX/hpBfRBPuyn4OACbcQeFXinrBfAMeGE5JdzQaFWoBZJ7a8bzAndsPYsue\n4+hZtZBL/qsQFvY6pNJ+LrIgmJW1y+cXUxCt1zBmYWbd9HoNCWt/9ChJidlYOfZQyWNeBkFHReuE\ne8Xu87b7CA2xN75Xqhs6l/zHBwt7HVJpPxc3aYVGQEx2rFf/uR1JPYGmBs12B9ClDWJTw+NlA6Lj\n8KmH5WZpatAwOq7at9jzqeXzsf3FE2Wfy4VL48XdVhBppNN0DUk9wSX/VQT72OuQSrfGTs+by/LD\n3Ga3JnU8cPNi9HYtUn5Bu7RBfEv/H5ilFcbPmf+LCiEKlu0pzMZd487j5yph9owm23iCitakjvu6\nF2N6Y7ntls2LogstCNIjWS7SnFtfAAAamklEQVT5rzLYYq9DKu2G5zRZ3myBVZKC6IYubRB3iafx\njh+eAVo68MK0j+OpS8vLrHMBxFL2X0hXLLTw/cb4ahx4+4fQs2ohfrbrKBBCdagRv/D6Xn/0vZcB\nAM4r1lTpeWVY2/sy8cPCXgfI+mDvOJDyvDVWVYa2JnX0di0q+cN10zPGLeacc8C0jTx/An8h/hZX\nNvwKn0jsQxOVFkGFjTVwKFDof75p/LOTD6Yz6Pn+oUDdUGaMwLeqCEzFjgMpdF4+0/Ym7+UzNAqc\nvHyveJxdfLCw1ziyIOaOAylplaHTH5WqF7qq4x4AfLn/kHKQgpupR7I5omaaaQxrE3vRQJX5mSvF\nGAZdIuIKwhJ1QzStn0tLUkc2l7edlGTEVOxa3prPm0pnip+XuWLYeDyTzWHfsdOuv1dhD2Rh7OHK\n0xrHz9iwICwqu9FnW01j1VSo5oiaiWq4hfGnkIOGbbnrXIl60JjbMahmyrq1so3qUD+pr5W2CuBx\nduEQ6Wi8WqOetoiVBkr9pESa37vWZl06CaetWUf3snbHARlucs6jEnW3Fnqo60C5RW3GSyaLEVOp\n1P/tZ1oW57bHy5TLijEELTXRWMpuMEQtoAqIOgVKVX+0dsVJsvfu3Ei2LIiZ1BPYdNMiAJM50yrS\nmGH7fFQQAddr3qY0hYWsanNgOGXb10XGtVfO8bUOP+Jc6feSCYYpJ+z1Vv7cs2ohknqi5DE3gVLV\nH+e5kSzuHjgifU5lLeZFIcAqS3UbtbEuu7RBtOKC8vmomUtvhnr+Bg9pPIaADwynsOzeZ7Fu+0HP\nGSz7jp32dLwVP+Jc6feSCYYp54qpty2il4CnGbtUt237X0fn5TNdT0ACgOlNDejtWoQte44Xy82v\nvXJOsc9LlzaIXv1xtGEyXTHsrouGz9ztNU6JWeEtBoVSfLckiDz502X4+U4PDKdwcbS8T4xbca70\ne8kEw5QT9nqceF6JD7Vn1UKsU/i/BSD1o9rdDGQDGbZN9F3v0gbxTf0RNNKkUESVgv547oNYm9iL\nBPLIgzAiGjGDRsvy4KuhQZeZnBCu/OnNuiZtkgY4u8FUqG4obc06Nt20yPV3jXPb42PKuWLqfYto\n+GKv2PAMVvTtVcYO7Lr6AXJrr2fVQqUgGylxZgz7dH1Df4moR8VZMQObxj+L3xl9AleMPol3jW7D\n4rHv4YrRJ7EueztO5mcXh0VvyIZTOVopxoARJ9qmNymfqzThTXVDaW5sYKGuEaacxV7PW0SvmS69\nXYuUwzRkO5juZe0Yeu2sdKqSVQhkU4uiZFQksHn809LnpjcmsGtsZVU26DIw55GraE3qts+rqk6d\nqDd35VRkygk7UL9bRC/paQPDKfTuOioVdWMHI0sLva97MTovn1ny+IJZSTz/8tni6zc3PIY/SjwX\nW8l/aqLcX2WB2xX2VAv7jp22dZcBhbYB1pusGePm7DW9tx7dlVONKSns9YpbS2tgOIWepw9J2+sa\n7QMA2Fr/hjDcPXAET+x/vcxCj6vDYrW5VAycWgBYOZXOoHtZOzbuPIyMxIferGvYd+y08pwaoXhz\n9lqvYFetytQGU87HXs+4TU/bsue4smf69KaCH9U2LfRwP/DgVRC9Ldg8/Ht4telWfFt/GB3amdA7\nLJb1b5mYVBSGnzyoX8OrqAOTn9kDNy+Bbtn66Brhv9+8xNY1kph4TSXpvd3L2rlbY43DFnud4CU9\nzU4QUumMbZuAzt/+BNj9PSBbGFydoGhbUlxEE9L5t4U+laitWcfwPTd4LgqSUck7ZBQX2cWE7No1\nZHP2rXmd/OX16q6cKrCw1wFe09Oc2rXaCdHGxqeBbDxBtLwA7sp+LnRXS0KjYuVskF0svfCjQ28U\nh5qoRNZpbXatedlfXt+wK6YO8Jqe1rNqYdn23g1JPYH/gHCyXJxS8/Ki0DJ3dwT+81xeYPPuo7h7\n4EjxvU1EHDRwMzvWcJmo1mZY9/Wc3svIYYu9DnCz3bZmRqy5Zh5+dOgNVwJCmBQJ+lkHcP5EUEuf\nvAYBOUHQJvYLF0QTsqSjFRdLXC7GLgSYbDdbiQ/biXMjWTwxUWAFFAqGdI0ACq9NbyUYN243rXnr\nLb2XUcPCXgc4bbfterbLZmJa+WbycXx89FloPyxkZ4TVRpcAXDH6pO0x50ay2LjzCB64eTGe33Bd\nID5wt0Q5pNvLhspJvNlfPvVgYa8DnNLTVJkRT77wesk0evMkozRmQAigjS6A8qVCHpZXwm2vFnNu\nvlMQMAxrPgq83kNYvBkz7GOvA5zS01TiZxX1Pv1RdGhnoBEwky5glnYBWkQDovMCnnq1GL9Ti6It\nQluzjq1rlkYq6kG+Te0c3GR8wBZ7nWBnsclcNWbr/JSYjSQuKcfThY0RGPWS7TK3NVlI8RyT96C5\ncGnctrd8GDjdRPQElbm9mho0jI6XFiBxcJPxC1vsUwBrZkSXNoivm6zzDu0MZlK4fdGtWS95U2HR\nuuztnicXLZiVxObdR5XxgWxeSCc7xcm4ZK1WUW9N6lwMxPiGLfYpgDW4dlfj00ii1Dr3625x6n2e\nB+GN/KzACovMvWkqob01ibMXR6Xl+mHhxi1kVP4yjB9Y2Oudw/3AT+9F9/mT6G7pAG69B9gZfC46\nkToXXQjgidz1sc8TNfjU8vnovHymbYOtuOAOikwQsLCHQJjDsj2d+3A/sPuOyUrR8ycgdvwp8kRI\nyF/hCzFhrZuNdiGAY6K9akQdKHRO9Ds2Liy4IpQJAhb2gLHrpgf4KxTx3Knvp/eWlf8TAQmIinPR\nxcRMO9lLtZZ5eLl1BS7/t34kkEcOGrblrqsqUQcQWd67V9wETcM0Gpj6gYU9YFQ54727jmJ0PO+p\nfarbc8v6rQMAzp9UnsuPT33d2O34y+RjaMhdmnxQT+LFd/05PvF/5yEnuio/eQD4yV3XyHsOOVAQ\nZbcFXzLaXYh0JS14makJC3vAqHykstJ9W1H2cO6yx3/0JeDA9xBGaU5KzMYP8yvRRo3obdlRuHm0\ndODFd/05Pv3i5ciJ+IdYCBTSvbyGRWWToOwwbiCGKAPA9p+7b7dAANYun19s9uWE5xs7M2XxJexE\ntAXATQDGALwM4I+FEOkgFlarOHVOtOIlWKbKR7+38e+B3rdcn6dSzAOf/+7CNei9e3PxuXV9e5Hx\n2fXREMoEEXKVDuxEQWjfOJ/xdF8zxHnz7qPKNMmERnhbUwPOZ7JSN8iKvr2ObQeMWaaVuFF4ZB3j\nFr957D8BcJUQYgmAfwGw0f+SahtVN702xcR4L8EyWT76N/XvoBXhirpskIV13UGIiyGJb5vWAD1R\nma+IUHifvLhT2luTeH7DdQAKhU0yWpM6PnnNPExvUttCTjd04zqv9t2I5zdc59nKVn1XNCLH4eXM\n1MKXsAshnhVCGH8J+wF0+F9SbaMq79900yLf7VOt576r8Wk0UriuDwFgXfZ2rBx7qCjqsnUHmc2R\nzmQBUWgLQCiIqlvWLp+P7mXtrtvsWnvq2I0L3HEghVQ6A4FJ/7ZZSO2uGUQ1qcxoAAqdJ1VrYqYm\nQfrYPwtge4DnU1LtmQF25f1+111y7t61fpfqAIE6P4tr2/8MQw7r7lm1EHduP+ja+9GsaxixKQ7K\n5gWaGxswfM8NAIAFG55xPKdGQOflMwEAn3z/vJK2uzISRK566pzPZF35t+3cR0FUk1oLzTSJy4p9\n7gzgQtiJ6DkA75A89TUhxA8njvkagHEA22zOcxuA2wBg/vz5FS0WqO3MAOsfpjF3sqJ1H+4HSAOC\nCFZSAhB5INlW+DlzDmjpAK6/B1iyGt0u1ti9rN1Twc+oi8wRs9C2u4hd5AWKonZf92LsOHBSWVma\n1BNlYquKj2hEymu7WWN7azKw76b5xn6F4mbHPnfGUdiFEB+0e56IPgPgowCuF0JtsgghHgHwCAB0\ndnZWHBmr5cyAim5KE5WjRvYJ3n0DcPQHQMZfSX0RPQnc9BCwxH1nRRVuxNcg58IJbnbv9Kxa6OrG\nYRa1B25eIh0dZ7hWZLsO2fF2lrh1jXbtk4OGx94xKnz52InowwC+CqBLCDESzJLsqeXMAM8T443K\n0fMnAIjCv0PfrUjUhQDygvBmfgbOihkACGiZF5ioA2ofcCUYQVADtzdts6jJ4h1b1yzFwU03SM/n\nNGrOilW0ndonBw2PvWNU+PWx/zWAJgA/ocIfw34hxBd8r8qGWrZSTqUzZe1yvzG+GrvTimZYksrR\nSkmJ2Vg59hCA0iyQIJFN8rn2yjl46oUTntMXBcrF3GlHIBO1SgZQuFkrAbjl6vJzRznwgsfeMSp8\nCbsQ4neCWohbot7uBslnZvwc67OPFvued9AZ9OmPYqbeCODG8hcENFvUOsTi4ug4BoZTjlWOlQiG\nIWzG67ftfx0tSR0Xx8Y9VWTKBk3YBWitgdBKMFxlbhCA634zYQb7eXISI6PmKk9r2UpZr29H83hp\nu9xmGsN6fTuAzaUHH+5HUIPdCChpkZvOZG19+34D1HcPHMG2/a8XV25U3U5vTGBkLIfWZt22V7r5\nRm0Vxf/0rpn4Py+fLXlXZIFQ8+/i9rsic5XZ4cb9V8vBfqZ2qTlhB2rXSmnO/Ebx+BvAg1dNBkiv\nv6fghgmoJcAbNLvsMbuAs58A9cBwqkTUzYyM5fDgmqXoXtZeJv4Gbc06Nt20qGj1W0Xx7MUxrF0+\nH/uOnXYUa6+i6jVO48b9V8vBfqZ2qUlhr1mSbYrAJ026Xc6fKG2164KslgTlR5EQ+bLOiyOiEX1Z\neXDUayDajfBt2XNceTsSKE1H7Lx8ZtGabm3WIQSQHskWg8kqUdx37LSrGIFXUbVrB2HdO7l1/9Vy\nsJ+pXXg0XlQc7gfGVOPnLFKYzRRyy6WUZmyMJ6Zh4/jn8DuXnsAVo0/ii2O342R+NvKCytoAWFFZ\nnE6PDwynsKJvr7SM3UmwzM93L2vH8xuuw9rl85EeySKdyZZUULrJHbdD9XrV46osk61rluLBNUsr\nynbx+h4zTBCwxR4VP70XyHkYFi1yhRxzs+WuJ4H33gr867NFt819F2/B98euKR6yK78Su8acR87Z\nWZx2AWon94ZTEzSroKlcN5lsTjmVyRhk7eQ7VzUTU6UzOsVvKnGd1HKwn6ldWNijwqY3upSWeZO+\ndrPv3ZJz/ncuSu2Bgu+6ubGhxO1x5/aD2LLneFG8zGLZ2qyjqUEr62S4om+v1L3x5f5DANRFPoBc\n0GxdNwLQE1SSTZPUE7j2yjmufOeqtEW7dEav8RunG0wtB/uZ2oWFPSpaOuTpi8mZwHim3DI3RNyh\neMhNm2A9QbYByY07j2DotbPYcSBVfPzcSBZJPVEMdhqo3CA5IbBx5xE8cPNiPHDzYmzZcxypdKZo\nNSeISoqxnPqzGIznBNqadaRHJm8wbn3ndiX+QeA2OFurwX6mdmEfe1gc7i9kuvS2Fv599w0FwTaj\nJ4E/+Hqh+rNlHiqpBnVV7WkyUFWi+NQLJ1xVxbYq2g+bjzd85//WdyP+cvV7kdQTRSvZ2oHQydcs\nUAiorl0+v9jq1m1AMuzKTM+VxAwTESzsYSBrBXDoyYJ/XCbgS1YDd/4S6E0X/vVQ4m8tY5f5j7N5\nURQblXWvck9YxdKpKDOVzpQEVZ3Ez82NSQDYtv91x5uB9fGwS/xVN5hUOsOtc5lYYVdMGPz4q+Xp\nitlMIeh55y8Dv5zbjn8DwyllyZMq0GgVy/OSEX9WzJktqoIfQxSNddtNLjLOaewGvAQkw3SD2LnB\nuAiJiRO22FVYXSmH+92/TtWky2sAtQJU1qwA8OX+Q1JRJxT6l6vcFub0Ri9ksjllBoq1WdfwPTdg\n65qltg24zDeDoCxxu9RNJ+x2G+ySYeKELXYZhivFsLqNoiHA2U3y03vVz7XIB0z57SVizWbRNZJO\nAlK5WwRQVjBkrAOAreXtRE6IsiHRdtY1AGU/GOvNwK817Lfc3zhG1U6Yi5CYuGCLXYasq2I2Yy/a\nBnZW+fX3lD1kiIvdyDU7rK8/N5IFyNs4OSNLxAh6mmdyeu2fIju3F+u6e1k71i6fD6vdHkbudxDB\nz+5l7cosGy5CYuKCLXYZKnF240qxS2uUWPt+e4nIXp/NCUxvasD5iUpOJ1LpDFb07ZXuFNxanW3N\nOi5l81LL3K11bd15CIGyPPogCarcn4uQmGqDLXYZCpeJ8nEz19+jTmuU4Fdc7F7vxWJU7RTcnCOp\nJ7DppkW+/N6yncfoeB4Prlla3D0ETVDl/lEP2GAYJ6amxW4dN2et6Lz+nvJGXEbRkBPGeRwqRg38\nDg6xe71dFagM2U5Bdg5dI8yY1lBSNOSn7B6IpwtikJY2FyEx1cTUE3Y3gVGP4lyGi4pRA7/icu2V\nc8p6rZhdIACKVaBusO4AoiqJj6MLIpf7M/UK2cyfDo3Ozk4xNDQU+XUBTPQ9l/jAW+aFkmPuBlVW\njFO2jDWrAyikLq5dPh/3dS8uu4Yb6z2ssXlOrOjbqyz/j2M9DFONENEBIUSn03FTz2L3ExgNCdk2\n3k0qnsx9oRrZZrVOZePq4gz41UsAMswxeAzjlqkn7KqsFTeB0Qhx43P26r6w3kCqSYSidIuE9Xvz\nGDymWph6wu4nMBohbkTbb+C12gJ+UawnTPHlMXhMtTD10h2XrPbVTTEq3KTihd29sB4JsyMjj8Fj\nqoWpZ7EDnrJW4kLlc772yjlY0be36Ea45ep2V4OdmQJhiq/fHRTDBMXUFPYaQOZzvvbKOSXDMFLp\nDHYcSHExjAfCFN96CQAztQ8LexVj9TmrxtKxD9c9YYov58Uz1UJtCrtT5Widwj5c/4QtvtUWkGam\nJrUn7H5a6tY47MMNBhZfpt6pvawYPy11a5xayoLxM8CCYRh/1J7FXoWVo1FRKz7cai7UqaaiLIYJ\ni9oT9hqpHA2LWnAjqHLFe3cdjXXtfm84fFNgaoXac8Wo+p1XWeXoVEYVzE1nsrG6ZPwUJ/mddMUw\nUVJ7wl4jlaNTGbtgbpwDnv1kFYVZscowQVN7rhggkspR3nZXTs+qhVU54NlPVhGnmjK1RO1Z7BHA\n225/dC9rR1uzfJh2nKmZfrKKghqjxzBRwMIuoZq23aq0wWpPJ9x006KqS830M5u0llJNGSYQVwwR\nfQXAFgBzhBBngjhnnFTLtluVxTH02tmynjHVkk5oUK2pmZVmFVXr78MwMnwLOxHNA/AhAK/7X051\nUC0Vnqqdw1MvnEDOMtKwGnvG1EJqphfq7fdh6pcgXDEPAlgPIPrhqSFRLdtu1Q7BKupOxzMMM7Xw\nJexE1AUgJYQ45OLY24hoiIiGTp8un8lZTfjxxQaJaoeQIPJ0PMMwUwsSCuuveADRcwDeIXnqawDu\nAnCDEOI8Ef0bgE43PvbOzk4xNDRUwXKnFlYfO1DYOdxydXuJj914nPuyM0x9Q0QHhBCdTsc5+tiF\nEB9UXGAxgCsAHKKCBdkB4BdEdI0Q4jce18tIsAvYdV4+kwN5DMNIcbTYXZ+ILXaGYZhQCcxiZ2oL\nrphlGCYwYRdCLAjqXExlVHO7XIZhooMrT+uIaqqYZRgmPljY64hqqZhlGCZeWNjrCG5UxTAMwMJe\nV1RLxSzDMPHCWTF1BDeqYhgGYGGvO7hRFcMw7IphGIapM1jYGYZh6gwWdoZhmDqDhZ1hGKbOYGFn\nGIapM1jYGYZh6gxOdwwJ7rLIMExcsLCHAHdZZBgmTtgVEwLcZZFhmDhhYQ8B7rLIMEycsLCHAHdZ\nZBgmTljYQ4C7LDIMEyccPA0B7rLIMEycsLCHBHdZZBgmLtgVwzAMU2ewsDMMw9QZLOwMwzB1Bgs7\nwzBMncHCzjAMU2eQECL6ixKdBvBa5Bd2z2wAZ+JehA9qef21vHagttdfy2sHpsb6LxdCzHE6USzC\nXu0Q0ZAQojPudVRKLa+/ltcO1Pb6a3ntAK/fDLtiGIZh6gwWdoZhmDqDhV3OI3EvwCe1vP5aXjtQ\n2+uv5bUDvP4i7GNnGIapM9hiZxiGqTNY2BUQ0X8josNEdJCIniWiuXGvyS1EtIWIjk2s/wdE1Br3\nmrxARH9IREeJKE9ENZHlQEQfJqLjRPRrItoQ93q8QESPEdG/E9Ev415LJRDRPCLaR0QvTXxvvhj3\nmtxCRNOI6OdEdGhi7ZsDOS+7YuQQ0duFEL+d+P87ALxHCPGFmJflCiK6AcBeIcQ4EX0dAIQQX415\nWa4hot8FkAfwHQBfEUIMxbwkW4goAeBfAHwIwEkALwL4pBDiV7EuzCVE9AEAFwA8LoS4Ku71eIWI\nLgNwmRDiF0T0NgAHAHTXwvtPRARguhDiAhHpAAYBfFEIsd/PedliV2CI+gTTAdTMHVAI8awQYnzi\nx/0AOuJcj1eEEC8JIWppQOw1AH4thHhFCDEG4B8AfCzmNblGCPHPAM7GvY5KEUK8IYT4xcT/vwXg\nJQA10TNbFLgw8aM+8Z9vrWFht4GI7ieiEwDWArgn7vVUyGcB/DjuRdQ57QBOmH4+iRoRlnqDiBYA\nWAbghXhX4h4iShDRQQD/DuAnQgjfa5/Swk5EzxHRLyX/fQwAhBBfE0LMA7ANwJ/Fu9pSnNY+cczX\nAIyjsP6qws36awiSPFYzO7x6gYhmANgBYJ1lx13VCCFyQoilKOysryEi3+6wKT1BSQjxQZeHPgng\nGQCbQlyOJ5zWTkSfAfBRANeLKgykeHjva4GTAOaZfu4AcCqmtUxJJvzTOwBsE0LsjHs9lSCESBPR\nzwB8GICvQPaUttjtIKJ3m37sAnAsrrV4hYg+DOCrALqEECNxr2cK8CKAdxPRFUTUCOATAHbFvKYp\nw0QA8rsAXhJCfCvu9XiBiOYYWWtElATwQQSgNZwVo4CIdgBYiEJ2xmsAviCESMW7KncQ0a8BNAF4\nc+Kh/bWS0QMARPRxAH8FYA6ANICDQohV8a7KHiL6CICtABIAHhNC3B/zklxDRE8B+M8odBf8fwA2\nCSG+G+uiPEBEKwH8bwBHUPh7BYC7hBD/GN+q3EFESwD8HQrfGw1AvxDiXt/nZWFnGIapL9gVwzAM\nU2ewsDMMw9QZLOwMwzB1Bgs7wzBMncHCzjAMU2ewsDMMw9QZLOwMwzB1Bgs7wzBMnfH/ATbjpsIf\nX0Q6AAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXoAAAD8CAYAAAB5Pm/hAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAADmBJREFUeJzt3XGonXd9x/H3Z21XhzpW6W2JSdgt\nkg1b0QiXzNF/nHU2s2LsoCNlk8AK8Y8WKghbqrA6RiDDqftj0xHXYmHVLqBiMN3a2HUUYbO97bKa\nNHYGm9lrQnOdG60MOpJ+98d9Os/iufece885Pff+eL/gcs/53eec55u2effJc55zkqpCktSun5v2\nAJKkyTL0ktQ4Qy9JjTP0ktQ4Qy9JjTP0ktQ4Qy9JjTP0ktQ4Qy9Jjbt02gMAXHnllTU7OzvtMSRp\nQ3nyySd/VFUzg7ZbF6GfnZ1lfn5+2mNI0oaS5N+H2c5TN5LUOEMvSY0z9JLUOEMvSY0z9JLUOEMv\nSY0z9JLUOEMvSY0z9JLUuHXxzlhpkNl9R6a279MHbpravqVx8Ihekhpn6CWpcYZekhrnOXppgGm9\nPuBrAxoXj+glqXGGXpIaZ+glqXGGXpIaZ+glqXEDQ59ka5JHk5xMciLJnd36J5P8MMmx7uv9PY+5\nK8mpJM8muXGSvwBJ0sqGubzyPPCxqnoqyRuBJ5Mc7X722ar6s96Nk1wL7AauA94MfDPJr1TVhXEO\nLkkazsAj+qo6W1VPdbdfAk4Cm1d4yC7ggap6uaqeA04BO8YxrCRp9VZ1jj7JLPBO4Nvd0h1Jnk5y\nb5IrurXNwPM9D1tg5f8xSJImaOjQJ3kD8BXgo1X1IvB54C3AduAs8OlXN+3z8OrzfHuTzCeZX1xc\nXPXgkqThDBX6JJexFPn7q+qrAFX1QlVdqKpXgC/w09MzC8DWnodvAc5c/JxVdbCq5qpqbmZmZpRf\ngyRpBcNcdRPgHuBkVX2mZ31Tz2Y3A8e724eB3UkuT3INsA14fHwjS5JWY5irbq4HPgx8J8mxbu3j\nwK1JtrN0WuY08BGAqjqR5BDwDEtX7NzuFTeSND0DQ19V36L/efcHV3jMfmD/CHNJksbEd8ZKUuMM\nvSQ1ztBLUuMMvSQ1ztBLUuMMvSQ1ztBLUuMMvSQ1ztBLUuMMvSQ1ztBLUuMMvSQ1ztBLUuMMvSQ1\nztBLUuMMvSQ1ztBLUuMMvSQ1ztBLUuMMvSQ1ztBLUuMMvSQ1ztBLUuMMvSQ1ztBLUuMMvSQ1ztBL\nUuMMvSQ1ztBLUuMMvSQ1bmDok2xN8miSk0lOJLmzW39TkqNJvtd9v6LnMXclOZXk2SQ3TvIXIEla\n2TBH9OeBj1XVW4F3AbcnuRbYBzxSVduAR7r7dD/bDVwH7AQ+l+SSSQwvSRpsYOir6mxVPdXdfgk4\nCWwGdgH3dZvdB3you70LeKCqXq6q54BTwI5xDy5JGs6qztEnmQXeCXwbuLqqzsLS/wyAq7rNNgPP\n9zxsoVu7+Ln2JplPMr+4uLj6ySVJQxk69EneAHwF+GhVvbjSpn3W6mcWqg5W1VxVzc3MzAw7hiRp\nlYYKfZLLWIr8/VX11W75hSSbup9vAs516wvA1p6HbwHOjGdcSdJqDXPVTYB7gJNV9ZmeHx0G9nS3\n9wBf71nfneTyJNcA24DHxzeyJGk1Lh1im+uBDwPfSXKsW/s4cAA4lOQ24AfALQBVdSLJIeAZlq7Y\nub2qLox9cknSUAaGvqq+Rf/z7gA3LPOY/cD+EeaSJI2J74yVpMYZeklqnKGXpMYZeklqnKGXpMYZ\neklqnKGXpMYZeklqnKGXpMYZeklqnKGXpMYZeklqnKGXpMYZeklq3DCfRy/9n9l9R6Y9gqRV8ohe\nkhpn6CWpcYZekhpn6CWpcYZekhpn6CWpcYZekhpn6CWpcYZekhpn6CWpcYZekhpn6CWpcYZekhpn\n6CWpcQNDn+TeJOeSHO9Z+2SSHyY51n29v+dndyU5leTZJDdOanBJ0nCGOaL/IrCzz/pnq2p79/Ug\nQJJrgd3Add1jPpfkknENK0lavYGhr6rHgB8P+Xy7gAeq6uWqeg44BewYYT5J0ohGOUd/R5Knu1M7\nV3Rrm4Hne7ZZ6NYkSVOy1tB/HngLsB04C3y6W0+fbavfEyTZm2Q+yfzi4uIax5AkDbKm0FfVC1V1\noapeAb7AT0/PLABbezbdApxZ5jkOVtVcVc3NzMysZQxJ0hDWFPokm3ru3gy8ekXOYWB3ksuTXANs\nAx4fbURJ0iguHbRBki8D7wauTLIA3A28O8l2lk7LnAY+AlBVJ5IcAp4BzgO3V9WFyYwuSRrGwNBX\n1a19lu9ZYfv9wP5RhpIkjY/vjJWkxhl6SWqcoZekxhl6SWqcoZekxhl6SWqcoZekxhl6SWrcwDdM\nSZqO2X1Hprbv0wdumtq+NX4e0UtS4wy9JDXO0EtS4wy9JDXO0EtS4wy9JDXO0EtS4wy9JDXO0EtS\n4wy9JDXO0EtS4wy9JDXO0EtS4wy9JDXO0EtS4wy9JDXO0EtS4wy9JDXO0EtS4wy9JDXO0EtS4waG\nPsm9Sc4lOd6z9qYkR5N8r/t+Rc/P7kpyKsmzSW6c1OCSpOEMc0T/RWDnRWv7gEeqahvwSHefJNcC\nu4Hrusd8LsklY5tWkrRqA0NfVY8BP75oeRdwX3f7PuBDPesPVNXLVfUccArYMaZZJUlrsNZz9FdX\n1VmA7vtV3fpm4Pme7Ra6NUnSlIz7xdj0Wau+GyZ7k8wnmV9cXBzzGJKkV6019C8k2QTQfT/XrS8A\nW3u22wKc6fcEVXWwquaqam5mZmaNY0iSBllr6A8De7rbe4Cv96zvTnJ5kmuAbcDjo40oSRrFpYM2\nSPJl4N3AlUkWgLuBA8ChJLcBPwBuAaiqE0kOAc8A54Hbq+rChGaXJA1hYOir6tZlfnTDMtvvB/aP\nMpQkaXx8Z6wkNc7QS1LjDL0kNc7QS1LjDL0kNc7QS1LjDL0kNc7QS1LjBr5hSuvP7L4j0x5B0gbi\nEb0kNc7QS1LjDL0kNc7QS1LjDL0kNc7QS1LjDL0kNc7QS1LjDL0kNc7QS1LjDL0kNc7QS1LjDL0k\nNc7QS1LjDL0kNc7QS1LjDL0kNc7QS1LjDL0kNc7QS1LjDL0kNe7SUR6c5DTwEnABOF9Vc0neBPwt\nMAucBn6nqv5ztDElSWs1jiP636iq7VU1193fBzxSVduAR7r7kqQpmcSpm13Afd3t+4APTWAfkqQh\njRr6Ah5O8mSSvd3a1VV1FqD7flW/BybZm2Q+yfzi4uKIY0iSljPSOXrg+qo6k+Qq4GiS7w77wKo6\nCBwEmJubqxHnkCQtY6TQV9WZ7vu5JF8DdgAvJNlUVWeTbALOjWFOSa+h2X1HprLf0wdumsp+W7fm\nUzdJXp/kja/eBt4HHAcOA3u6zfYAXx91SEnS2o1yRH818LUkrz7Pl6rq75M8ARxKchvwA+CW0ceU\nJK3VmkNfVd8H3tFn/T+AG0YZSpI0Pr4zVpIaZ+glqXGGXpIaZ+glqXGGXpIaZ+glqXGGXpIaZ+gl\nqXGGXpIaZ+glqXGGXpIaZ+glqXGGXpIaZ+glqXGGXpIaZ+glqXGGXpIaZ+glqXGGXpIaZ+glqXFr\n/svBBbP7jkx7BEkayCN6SWqcoZekxhl6SWqc5+glrRvTet3r9IGbprLf14pH9JLUOEMvSY0z9JLU\nOEMvSY2bWOiT7EzybJJTSfZNaj+SpJVN5KqbJJcAfwn8JrAAPJHkcFU9M4n9+Q5VSVrepI7odwCn\nqur7VfU/wAPArgntS5K0gkldR78ZeL7n/gLwaxPalySNZJpnBV6La/gnFfr0Wav/t0GyF9jb3f1J\nkmcnNMsorgR+NO0hhrAR5nTG8dkIczrjkPKnK/540Iy/PMw+JhX6BWBrz/0twJneDarqIHBwQvsf\niyTzVTU37TkG2QhzOuP4bIQ5nXE8xjXjpM7RPwFsS3JNkp8HdgOHJ7QvSdIKJnJEX1Xnk9wBPARc\nAtxbVScmsS9J0som9qFmVfUg8OCknv81sq5PLfXYCHM64/hshDmdcTzGMmOqavBWkqQNy49AkKTG\nGfoBkvxJkqeTHEvycJI3T3umiyX5VJLvdnN+LckvTXumfpLckuREkleSrKurHTbCR3YkuTfJuSTH\npz1LP0m2Jnk0ycnu3/Od056pnySvS/J4kn/t5vzjac+0nCSXJPmXJN8Y5XkM/WCfqqq3V9V24BvA\nH017oD6OAm+rqrcD/wbcNeV5lnMc+G3gsWkP0qvnIzt+C7gWuDXJtdOdqq8vAjunPcQKzgMfq6q3\nAu8Cbl+n/xxfBt5TVe8AtgM7k7xryjMt507g5KhPYugHqKoXe+6+nove+LUeVNXDVXW+u/vPLL1v\nYd2pqpNVtR7fGLchPrKjqh4DfjztOZZTVWer6qnu9kssBWrzdKf6WbXkJ93dy7qvdff7OskW4Cbg\nr0d9LkM/hCT7kzwP/C7r84i+1+8DfzftITaYfh/Zse4CtZEkmQXeCXx7upP0150SOQacA45W1Xqc\n88+BPwBeGfWJDD2Q5JtJjvf52gVQVZ+oqq3A/cAd63HGbptPsPTH5/unMeOwc65DAz+yQ8NL8gbg\nK8BHL/oT8bpRVRe607FbgB1J3jbtmXol+QBwrqqeHMfz+ZeDA1X13iE3/RJwBLh7guP0NWjGJHuA\nDwA31BSvmV3FP8v1ZOBHdmg4SS5jKfL3V9VXpz3PIFX1X0n+kaXXPtbTi9zXAx9M8n7gdcAvJvmb\nqvq9tTyZR/QDJNnWc/eDwHenNctykuwE/hD4YFX997Tn2YD8yI4xSBLgHuBkVX1m2vMsJ8nMq1em\nJfkF4L2ss9/XVXVXVW2pqlmW/nv8h7VGHgz9MA50px6eBt7H0qvg681fAG8EjnaXgf7VtAfqJ8nN\nSRaAXweOJHlo2jPB0kd2sHRK7iGWXkA8tB4/siPJl4F/An41yUKS26Y900WuBz4MvKf77/BYd0S6\n3mwCHu1+Tz/B0jn6kS5fXO98Z6wkNc4jeklqnKGXpMYZeklqnKGXpMYZeklqnKGXpMYZeklqnKGX\npMb9L7j1oYoFZAnYAAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXYAAAD8CAYAAABjAo9vAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAFHpJREFUeJzt3X2QnWV5x/HflcMGlgjdOkQlIWsc\nYaIt4WVmh+DQaRmFJqOUt0I1DR2nzpjhD6coNoWQjBA1Qs0U7ah/NBSmOqYIlHBAQxvCKGNlSCRx\nlywxRMEWkoMDsbjD21qSzdU/djddNmfPy3Pf5zxv389Mhpyz5zznOgP8cud6rud+zN0FACiOWWkX\nAACIi2AHgIIh2AGgYAh2ACgYgh0ACoZgB4CCIdgBoGCCg93MTjCzn5rZU2a2x8zWxSgMAJCMhV6g\nZGYmaY67v25mPZJ+Iuk6d98eo0AAQHuOCz2Aj//J8PrEw56JXw3/tDjllFN84cKFoR8NAKWya9eu\n37j73GavCw52STKziqRdkk6X9C1339Ho9QsXLtTOnTtjfDQAlIaZPd/K66KcPHX3MXc/R9Jpks4z\nszPrFLTSzHaa2c6DBw/G+FgAQB1Rp2LcfUTSY5KW1fnZRncfcPeBuXOb/k0CAJBQjKmYuWbWN/H7\nXkkXSXom9LgAgGRi9NhPlfTtiT77LEn3uvsPIhwXAJBAjKmY3ZLOjVALACACrjwFgIKJMu4IAJjZ\n2uqw7t6xX2Puqphp+ZIF+vLlizv2eQQ7AHRIdbCmmzbv1puHjhx9bsxd393+giR1LNxpxQBAB1QH\na1q9efhtoT7V3Tv2d+yzCXYA6IANW/dp9NDYjD8fC9ynqxFaMQAQSXWwpg1b9+nFkdHGG2ZJqph1\nrA6CHQAimGy9NFqlT7V8yYKO1UKwA0AEzVovk8ykFUv6mYoBgKx7cWR0xp+ZpHl9vVq1dJEuP3d+\nx2sh2AEggnl9varVCff5fb16/MYPd7UWpmIAIIJVSxept6fytud6eypatXRR12thxQ4ALZg68VKv\nrTL5+0av6RaCHQCamD7xUhsZ1erNw5J0TLinEeTT0YoBgCbqTbyMHhrThq37UqqoMVbsADBFvZbL\nTBMvjSZh0kSwA8CEFXc8ocefe+Xo48mWy+/19mhk9NAxr5/X19vN8lpGKwYANL617tRQnzR6aExm\nyszESysIdgClVh2s6YLbfnh0K916Rt48pFuvXKz5fb0yjc+m33rl4kycKK2HVgyA0lpbHdam7S80\n3bBrXl9vZiZeWkGwAyidejfAaCSrLZeZEOwASqM6WNOaB4b1xlut7cAoSRe8/525WalPItgBlMLa\n6nDDPvp03bg3aacQ7AAKa3Imvd7mXDMxSV/7+Dm5W6VPRbADKKRWT4xOt+L8/lyHuhRh3NHMFpjZ\nj8xsr5ntMbPrYhQGAElNtl3aDfVrzu/sDTC6JcaK/bCkz7v7z8zsJEm7zGybu/88wrEBoC3Trx5t\nRW/PLN165Vm5X6lPCg52d/+1pF9P/P41M9srab4kgh1AV8109ehMTuyZpa8UKNAnRe2xm9lCSedK\n2hHzuAAwk+pgTeu+v0e/ffPYvVxmMj/FvdK7IVqwm9k7JN0v6bPu/mqdn6+UtFKS+vv7Y30sgJIa\n3yN9t0ZbvMhIkubMrmjPF5d1sKpsiLJXjJn1aDzUN7n75nqvcfeN7j7g7gNz586N8bEASqo6WNP1\n9wy1FeqVWab1V+T/xGgrglfsZmaS7pS0191vDy8JAGbW7oVGkjS7YvrqVWcXtvUyXYxWzAWS/krS\nsJkNTTx3k7s/HOHYACCp/f1dpHxfPRoixlTMTzR+sRYARJdkfxdJ+nrOrx4NwZWnADIryUy6NH6h\nUVlDXSLYAWTQkvXb9NJrb7X9vqJdaJQUwQ4gU5KE+pzZFa2/Irt3NOo2gh1AJiQ5OWomrVhSjP1d\nYiLYAaQuSS+9KBt2dQLBDiA1SWbSpfG7GhHqMyPYAXRd0kCXWKm3gmAH0FUhI4wEemsIdgBdUR2s\nadV9Q2rj3Kik4m6t20kEO4COC+mlb/r0hzpQUbER7AA66qyb/0Ov/m972wFUTPqHvyjvlgChCHYA\nHVEdrOmz9ww1f+E0rNLDEewAogqZeCnzxl0xEewAokm6x8sZ75qjbddfGL+gkiLYAQRL2naRWKV3\nAsEOILGke6VL9NI7iWAHkEjSXvpxJj1768c6UBEmRbmZNYByCZlLJ9Q7jxU7gJadvnqLDnv77+Pk\naHcR7ACaYtOufCHYATR08e2P6Zcvv9H2+zg5mh6CHUBdSUcYTdLXGGFMFcEO4BhJWy8nH1/R7nXL\nOlAR2kGwAzgqadtldsX01avOZpWeEVGC3czuknSJpJfd/cwYxwTQXUknXuilZ0+sFfu/SPqmpO9E\nOh6ALkm6SqeXnl1Rgt3df2xmC2McC0B3hIwwMpeebfTYgRJKcvOLScylZ1/Xgt3MVkpaKUn9/f3d\n+lgAU4TswkgvPT+6FuzuvlHSRkkaGBhIcIoGQIikJ0fppecPrRig4JKeHJXYKz2vYo073i3pQkmn\nmNkBSTe7+50xjg0guffduEVJ/nrMhUb5FmsqZnmM4wCIZ+GNWxK9j156/tGKAQom6cTLCRXTM+s/\n2oGK0G0EO1AQIXPp9NKLhWAHCmDJ+m166bW32n7fu0+arR1rLu5ARUgTwQ7kWMhcOqv04iLYgRyq\nDtb0uXuGEk28cHK0+Ah2IGdW3PGEHn/ulUTvZZVeDgQ7kBMhFxqxaVe5EOxADiSdSZdYpZcRwQ5k\nWMjJ0eNMevbWj0WuCHlAsAMZFbK17n/fRqCXGcEOZEzIhUZMvEAi2IFMCemls0rHpFlpFwBgvJee\nNNTffdJsQh1vw4odSBmrdMRGsAMpCZlLp5eORgh2IAVJV+km6b9YpaMJgh3oIiZe0A0EO9AFIYHO\nDTDQLoId6LAPrHlYvxtLsg8j2wEgGYId6JCQ7QDYtAshCHagA0K2A2CVjlAEOxARq3RkAcEORMKF\nRsgKgh0IFHJHI24mjU6IEuxmtkzSP0qqSPpnd78txnGBrDt99RYdTjDwwoVG6KTgYDeziqRvSbpY\n0gFJT5rZQ+7+89BjA1kVsh0AJ0fRaTFW7OdJetbdfyVJZvY9SZdJIthRSEl76dzRCN0SI9jnS9o/\n5fEBSUsiHBfIlCXrt+ml195K9N5rzu/Xly9fHLkioL4YwW51njum62hmKyWtlKT+/v4IHwt0T9JV\nOtsBIA0xgv2ApAVTHp8m6cXpL3L3jZI2StLAwECy66uBLkt6clRilY70xAj2JyWdYWbvk1ST9AlJ\nfxnhuEBqQjbt4kIjpC042N39sJl9RtJWjY873uXue4IrA1KSdDsA2i7Iiihz7O7+sKSHYxwLSEvI\nhUYnH1/R7nXLIlcEJMOVp4DYDgDFQrCj1EJW6ZwcRVYR7CgtVukoKoIdpROyHQCbdiEPCHaUStJV\nOpt2IU8IdpRCSC+duXTkDcGOQgu5o5FELx35RLCjsELuO8pcOvKMYEfhhGwHwAgjioBgR6GE9NK5\nAQaKgmBHIYQE+gXvf6c2ffpDkSsC0kOwI/dCQp2Toygigh25VR2sadV9Qzp0pP33skpHkRHsyKWk\nEy/cdxRlQLAjV0Lm0mm7oCwIduRG0ptJcwMMlA3BjswLWaUzwogyItiRaUl76ezCiDIj2JFJXD0K\nJEewI1NCAp22CzCOYEdmsB0AEAfBjtSxHQAQF8GOVIVsrctcOlAfwY7UJL1NHat0oLGgYDezqyXd\nIumDks5z950xikKxfWDNw/rdmLf9Pi40AlozK/D9T0u6UtKPI9SCgqsO1rTwxi2JQv2a8/sJdaBF\nQSt2d98rSWYWpxoUFtsBAN1Djx0ddfHtj+mXL7+R6L2MMALJNA12M3tU0nvq/GiNuz/Y6geZ2UpJ\nKyWpv7+/5QKRX2wHAKSjabC7+0UxPsjdN0raKEkDAwPtN1mRGyFXjzLCCISjFYOokvbSWaUD8YSO\nO14h6RuS5kraYmZD7r40SmXIFW6AAWRH6FTMA5IeiFQLcirpCdKTj69o97plHagIKDdaMUisOljT\n5+4ZUpITJky8AJ1DsCMRVulAdhHsaEvSiZcz3jVH266/MH5BAI5BsKMlIRcasWkX0F0EO5pKGuqM\nMALpINgxo+pgTavuG9KhI+2/l/uOAukh2FFX0lU6vXQgfQQ73iZkOwBW6UA2EOw4Ksm9R0/smaWv\nXHkWM+lAhhDskJRsjxemXYBsIthLrDpY04at+1QbGW37vYQ6kF0Ee0mNT7w8pUNH2tsQgJOjQPYR\n7CWU5ASpSfoa+7sAuUCwl0jSuXTaLkC+EOwlkHQXRq4cBfKJYC84tgMAyodgL6ikFxr19fbolkv/\nkF46kGMEewElWaXPmV3R+isWE+hAARDsBbPijifaDnVOjgLFQrAXRHWwptWbd2u0zZEXQh0oHoI9\n56qDNd3y0B6NjB5q+71s2gUUE8GeU9XBmtZ9f49++2b7gc6NpIFiI9hzKOlcOit0oBwI9hz6u397\nqq1Qp48OlEtQsJvZBkl/JuktSc9J+mt3H4lRGN4u6YVGrNKB8pkV+P5tks5097Mk/ULS6vCSMN2S\n9dvaDvXfP7FHX//4OYQ6UEJBK3Z3f2TKw+2SrgorB9OtrQ63dQOMnoppw1Vnc3IUKLGYPfZPSbon\n4vFKLcnUy+yK6auEOlB6TYPdzB6V9J46P1rj7g9OvGaNpMOSNjU4zkpJKyWpv78/UbFlwIVGAEI1\nDXZ3v6jRz83sk5IukfQRd59xWMPdN0raKEkDAwPtTuoVXtJNu5hJBzBd6FTMMkk3SPoTd38zTknl\nwx2NAMQU2mP/pqTjJW0zM0na7u7XBldVEkn66CZpBSOMABoInYo5PVYhZVMdrOnz9z2lsTZuJs1e\n6QBawZWnKagO1nT9vUNqI9O50AhAywj2LhufehluOdR7ZkkbrqaXDqB1BHuXVAdr2rB1n2ojoy29\nnl46gKQI9i5YcccTevy5V1p+PSOMAEKE7hWDJtZWh9sK9WvO7yfUAQRhxR5Z0jsaMfECIBaCPaLq\nYE3X3zOkdjYDmN/Xq1VLFxHoAKIh2CO65aE9bYU6vXQAnUCwRzA58dJO++WC97+TUAfQEQR7oMm5\n9NFDYy29vmKm5UsWMMYIoGMI9oTanUun7QKgWwj2BNpdpTPCCKCbCPYENmzd11KoM/ECIA0Eewsm\n2y4vjoxqXl9v0/ZLb09Ft165mEAHkAqCvYnpbZfayKhM0kx7eLFKB5A2gr2Jem0Xl44Jd1bpALKC\nvWKaeHGGtotrfHVuE/8k1AFkBSv2Jmbqqc/v69XjN344hYoAoDFW7E2sWrpIvT2Vtz3X21PRqqWL\nUqoIABpjxd7EZHtl6lQMJ0cBZBnB3oLLz51PkAPIDVoxAFAwBDsAFEwpWjHTrxylRw6gyAof7PWu\nHF29eViSCHcAhRTUijGzL5nZbjMbMrNHzGxerMJiqXfl6OihMW3Yui+ligCgs0J77Bvc/Sx3P0fS\nDyR9IUJNUc105ehMzwNA3gUFu7u/OuXhHM28N1Zq5vX1tvU8AORd8FSMma03s/2SVqjBit3MVprZ\nTjPbefDgwdCPbRlXjgIoG3NvvMg2s0clvafOj9a4+4NTXrda0gnufnOzDx0YGPCdO3e2W2tiTMUA\nKAIz2+XuA01f1yzY2/jA90ra4u5nNnttt4MdAIqg1WAPnYo5Y8rDSyU9E3I8AEC40Dn228xskaQj\nkp6XdG14STNbWx3W3Tv2a8xdFTMtX7JAX758cSc/EgByJyjY3f3PYxXSzNrqsL67/YWjj8fcjz4m\n3AHg/+Vmr5i7d+xv63kAKKvcBPvYDCd5Z3oeAMoqN8FeMWvreQAoq9wE+/IlC9p6HgDKKje7O06e\nIGUqBgAai3aBUju4QAkA2teVC5QAANlDsANAwRDsAFAwBDsAFAzBDgAFQ7ADQMGkMu5oZgc1vhtk\nN5wi6Tdd+qxO47tkE98lm4r4Xd7r7nObvTiVYO8mM9vZytxnHvBdsonvkk1l/i60YgCgYAh2ACiY\nMgT7xrQLiIjvkk18l2wq7XcpfI8dAMqmDCt2ACiVUgS7mX3JzHab2ZCZPWJm89KuKSkz22Bmz0x8\nnwfMrC/tmpIys6vNbI+ZHTGz3E0vmNkyM9tnZs+a2Y1p1xPCzO4ys5fN7Om0awllZgvM7Edmtnfi\nv6/r0q4pKTM7wcx+amZPTXyXdS29rwytGDM72d1fnfj930j6A3e/NuWyEjGzP5X0Q3c/bGZ/L0nu\nfkPKZSViZh+UdETSP0n6W3fPzV7OZlaR9AtJF0s6IOlJScvd/eepFpaQmf2xpNclfcfdz0y7nhBm\ndqqkU939Z2Z2kqRdki7P478bMzNJc9z9dTPrkfQTSde5+/ZG7yvFin0y1CfMkZTbP83c/RF3Pzzx\ncLuk09KsJ4S773X3fWnXkdB5kp5191+5+1uSvifpspRrSszdfyzplbTriMHdf+3uP5v4/WuS9kqa\nn25Vyfi41yce9kz8appfpQh2STKz9Wa2X9IKSV9Iu55IPiXp39MuoqTmS9o/5fEB5TQ8iszMFko6\nV9KOdCtJzswqZjYk6WVJ29y96XcpTLCb2aNm9nSdX5dJkruvcfcFkjZJ+ky61TbW7LtMvGaNpMMa\n/z6Z1cp3yal6d1HP7d8Ei8jM3iHpfkmfnfa39lxx9zF3P0fjfzs/z8yatspyc8/TZtz9ohZf+q+S\ntki6uYPlBGn2Xczsk5IukfQRz/hJkjb+veTNAUlT76R+mqQXU6oF00z0o++XtMndN6ddTwzuPmJm\nj0laJqnhSe7CrNgbMbMzpjy8VNIzadUSysyWSbpB0qXu/mba9ZTYk5LOMLP3mdlsSZ+Q9FDKNUFH\nTzjeKWmvu9+edj0hzGzu5OSbmfVKukgt5FdZpmLul7RI4xMYz0u61t1r6VaVjJk9K+l4Sf8z8dT2\nHE/4XCHpG5LmShqRNOTuS9OtqnVm9lFJX5dUkXSXu69PuaTEzOxuSRdqfBfBlyTd7O53plpUQmb2\nR5L+U9Kwxv+fl6Sb3P3h9KpKxszOkvRtjf83NkvSve7+xabvK0OwA0CZlKIVAwBlQrADQMEQ7ABQ\nMAQ7ABQMwQ4ABUOwA0DBEOwAUDAEOwAUzP8BYxTuYmu3P3AAAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import matplotlib.pyplot as plt\n", + "plt.scatter(X[:, 0], y)\n", + "plt.scatter(X[:, 0], nuisance[0])\n", + "plt.show()\n", + "plt.hist(nuisance[1])\n", + "plt.show()\n", + "plt.scatter(X[:, 2], nuisance[2][:, 2])\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([ 0.95540328, -0.05546807, -0.04144514, 0.02004237, -0.01020521])" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model_list[1]._model.coef_" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.1" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} From 8c5194d37de481198392eea008cbd73dbc5ead32 Mon Sep 17 00:00:00 2001 From: Vasilis Date: Fri, 1 Nov 2019 21:48:56 -0400 Subject: [PATCH 03/64] check for non keyword argument being None in crossfit --- econml/_ortho_learner.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/econml/_ortho_learner.py b/econml/_ortho_learner.py index 4bbee1e4a..99ed85140 100644 --- a/econml/_ortho_learner.py +++ b/econml/_ortho_learner.py @@ -35,8 +35,8 @@ def _crossfit(model, folds, *args, **kwargs): args_train = () args_test = () for var in args: - args_train += (var[train_idxs],) - args_test += (var[test_idxs],) + args_train += (var[train_idxs],) if var is not None else (None,) + args_test += (var[test_idxs],) if var is not None else (None,) kwargs_train = {} kwargs_test = {} From 4a87052aea610ac5b57b965442d4f0a41ee06dac Mon Sep 17 00:00:00 2001 From: Vasilis Date: Fri, 1 Nov 2019 21:50:42 -0400 Subject: [PATCH 04/64] started tests for ortho learner --- econml/tests/test_ortho_learner.py | 40 ++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 econml/tests/test_ortho_learner.py diff --git a/econml/tests/test_ortho_learner.py b/econml/tests/test_ortho_learner.py new file mode 100644 index 000000000..193ffd58e --- /dev/null +++ b/econml/tests/test_ortho_learner.py @@ -0,0 +1,40 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. + +from econml._ortho_learner import _OrthoLearner, _crossfit +from sklearn.linear_model import LinearRegression +from sklearn.preprocessing import PolynomialFeatures +from sklearn.linear_model import LinearRegression, LassoCV, Lasso +from sklearn.model_selection import KFold +import numpy as np +import unittest +import joblib + + +class TestOrthoLearner(unittest.TestCase): + + def test_crossfit(self): + class Wrapper: + def __init__(self, model): + self._model = model + def fit(self, X, y, W=None): + self._model.fit(X, y) + return self + def predict(self, X, y, W=None): + return self._model.predict(X), y - self._model.predict(X), X + + X = np.random.normal(size=(1000, 3)) + y = X[:, 0] + np.random.normal(size=(1000,)) + folds = list(KFold(2).split(X, y)) + model = Lasso(alpha=0.01) + nuisance, model_list= _crossfit(Wrapper(model), + folds, + X, y, W=y, Z=None) + np.testing.assert_allclose(nuisance[0][folds[0][1]], model.fit(X[folds[0][0]], y[folds[0][0]]).predict(X[folds[0][1]])) + np.testing.assert_allclose(nuisance[0][folds[0][0]], model.fit(X[folds[0][1]], y[folds[0][1]]).predict(X[folds[0][0]])) + + coef_ = np.zeros(X.shape[1]) + coef_[0] = 1 + [np.testing.assert_allclose(coef_, mdl._model.coef_, rtol=0, atol=0.08) for mdl in model_list] + + From e44296892fa070ffd58a32884ff49ff2cf904f06 Mon Sep 17 00:00:00 2001 From: Vasilis Date: Fri, 1 Nov 2019 22:03:27 -0400 Subject: [PATCH 05/64] linting style for ortho learner.py --- econml/_ortho_learner.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/econml/_ortho_learner.py b/econml/_ortho_learner.py index 99ed85140..f8ea393bd 100644 --- a/econml/_ortho_learner.py +++ b/econml/_ortho_learner.py @@ -60,6 +60,7 @@ def _crossfit(model, folds, *args, **kwargs): return nuisances, model_list + class _OrthoLearner(TreatmentExpansionMixin, LinearCateEstimator): """ Base class for all orthogonal learners. @@ -112,7 +113,7 @@ def __init__(self, model_nuisance, model_final, self._label_encoder = LabelEncoder() self._one_hot_encoder = OneHotEncoder(categories='auto', sparse=False) super().__init__() - + def _check_input_dims(self, Y, T, X=None, W=None, Z=None, sample_weight=None, sample_var=None): assert shape(Y)[0] == shape(T)[0], "Dimension mis-match!" assert (X is None) or (X.shape[0] == Y.shape[0]), "Dimension mis-match!" @@ -148,7 +149,7 @@ def fit(self, Y, T, X=None, W=None, Z=None, sample_weight=None, sample_var=None, sample_weight: optional (n,) vector Weights for each row sample_var: optional (n,) vector - Sample variance + Sample variance inference: string, `Inference` instance, or None Method for performing inference. This estimator supports 'bootstrap' (or an instance of `BootstrapInference`). @@ -170,7 +171,7 @@ def fit_nuisances(self, Y, T, X=None, W=None, Z=None, sample_weight=None): splitter.shuffle = True splitter.random_state = self._random_state - all_vars = [var if np.ndim(var)==2 else var.reshape(-1, 1) for var in [Z, W, X] if var is not None] + all_vars = [var if np.ndim(var) == 2 else var.reshape(-1, 1) for var in [Z, W, X] if var is not None] if all_vars: all_vars = np.hstack(all_vars) folds = splitter.split(all_vars, T) @@ -179,7 +180,8 @@ def fit_nuisances(self, Y, T, X=None, W=None, Z=None, sample_weight=None): if self._discrete_treatment: T = self._label_encoder.fit_transform(T) - T = self._one_hot_encoder.fit_transform(reshape(T, (-1, 1)))[:, 1:] # drop first column since all columns sum to one + # drop first column since all columns sum to one + T = self._one_hot_encoder.fit_transform(reshape(T, (-1, 1)))[:, 1:] self._d_t = shape(T)[1:] self.transformer = FunctionTransformer( func=(lambda T: @@ -188,12 +190,13 @@ def fit_nuisances(self, Y, T, X=None, W=None, Z=None, sample_weight=None): validate=False) nuisances, fitted_models = _crossfit(self._model_nuisance, folds, - Y, T, X=X, W=W, Z=Z,sample_weight=sample_weight) + Y, T, X=X, W=W, Z=Z, sample_weight=sample_weight) self._models_nuisance = fitted_models return nuisances def fit_final(self, Y, T, X=None, W=None, Z=None, nuisances=None, sample_weight=None, sample_var=None): - self._model_final.fit(Y, T, X=X, W=W, Z=Z, nuisances=nuisances, sample_weight=sample_weight, sample_var=sample_var) + self._model_final.fit(Y, T, X=X, W=W, Z=Z, nuisances=nuisances, + sample_weight=sample_weight, sample_var=sample_var) def const_marginal_effect(self, X=None): """ @@ -242,9 +245,9 @@ def score(self, Y, T, X=None, W=None, Z=None): for it in range(len(nuisances)): nuisances[it] = np.mean(nuisances[it], axis=0) - + return self._model_final.score(Y, T, X=X, W=W, Z=Z, nuisances=tuple(nuisances)) - + @property def model_final(self): return self._model_final From 3adb74854a0f97b54254992bd14c2ed2b77f8cae Mon Sep 17 00:00:00 2001 From: Vasilis Date: Fri, 1 Nov 2019 22:05:52 -0400 Subject: [PATCH 06/64] linting style for _rlearner.py --- econml/_rlearner.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/econml/_rlearner.py b/econml/_rlearner.py index 818077f19..9d029bbab 100644 --- a/econml/_rlearner.py +++ b/econml/_rlearner.py @@ -27,6 +27,7 @@ from .inference import StatsModelsInference from ._ortho_learner import _OrthoLearner + class _RLearner(_OrthoLearner): """ Base class for orthogonal learners. @@ -83,7 +84,6 @@ def __init__(self, model_y, model_t, model_final, self._model_final = clone(model_final, safe=False) self._n_splits = n_splits self._discrete_treatment = discrete_treatment - class ModelNuisance: def __init__(self, model_y, model_t): @@ -95,7 +95,7 @@ def fit(self, Y, T, X=None, W=None, Z=None, sample_weight=None): self._model_t.fit(X, W, T, sample_weight=sample_weight) self._model_y.fit(X, W, Y, sample_weight=sample_weight) return self - + def predict(self, Y, T, X=None, W=None, Z=None, sample_weight=None): Y_res = Y - self._model_y.predict(X, W).reshape(Y.shape) T_res = T - self._model_t.predict(X, W) @@ -121,7 +121,7 @@ def score(self, Y, T, X=None, W=None, Z=None, nuisances=None, sample_weight=None super().__init__(ModelNuisance(model_y, model_t), ModelFinal(model_final), discrete_treatment, n_splits, random_state) - + @property def model_final(self): return super().model_final._model_final From 51728b9252af17d08a9186aef6e1df5bd284c192 Mon Sep 17 00:00:00 2001 From: Vasilis Date: Fri, 1 Nov 2019 22:09:22 -0400 Subject: [PATCH 07/64] linting style for test_ortho_learner.py --- econml/tests/test_ortho_learner.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/econml/tests/test_ortho_learner.py b/econml/tests/test_ortho_learner.py index 193ffd58e..280e97d17 100644 --- a/econml/tests/test_ortho_learner.py +++ b/econml/tests/test_ortho_learner.py @@ -14,12 +14,16 @@ class TestOrthoLearner(unittest.TestCase): def test_crossfit(self): + class Wrapper: + def __init__(self, model): self._model = model + def fit(self, X, y, W=None): self._model.fit(X, y) return self + def predict(self, X, y, W=None): return self._model.predict(X), y - self._model.predict(X), X @@ -30,11 +34,11 @@ def predict(self, X, y, W=None): nuisance, model_list= _crossfit(Wrapper(model), folds, X, y, W=y, Z=None) - np.testing.assert_allclose(nuisance[0][folds[0][1]], model.fit(X[folds[0][0]], y[folds[0][0]]).predict(X[folds[0][1]])) - np.testing.assert_allclose(nuisance[0][folds[0][0]], model.fit(X[folds[0][1]], y[folds[0][1]]).predict(X[folds[0][0]])) + np.testing.assert_allclose(nuisance[0][folds[0][1]], + model.fit(X[folds[0][0]], y[folds[0][0]]).predict(X[folds[0][1]])) + np.testing.assert_allclose(nuisance[0][folds[0][0]], + model.fit(X[folds[0][1]], y[folds[0][1]]).predict(X[folds[0][0]])) coef_ = np.zeros(X.shape[1]) coef_[0] = 1 [np.testing.assert_allclose(coef_, mdl._model.coef_, rtol=0, atol=0.08) for mdl in model_list] - - From 2776d9eaf1f11dbd3e64c4e550a0abbf393dcc35 Mon Sep 17 00:00:00 2001 From: Vasilis Date: Fri, 1 Nov 2019 22:09:52 -0400 Subject: [PATCH 08/64] lintiny --- econml/dml.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/econml/dml.py b/econml/dml.py index 02d2cc105..0aa94bdf8 100644 --- a/econml/dml.py +++ b/econml/dml.py @@ -27,6 +27,7 @@ from .inference import StatsModelsInference from ._rlearner import _RLearner + class DMLCateEstimator(_RLearner): """ The base class for parametric Double ML estimators. @@ -181,7 +182,7 @@ def coef_(self): @property def featurizer(self): return super().model_final._featurizer - + @property def model_final(self): return super().model_final._model From e83ded96d983c5d327b8c5643d462478cf424f50 Mon Sep 17 00:00:00 2001 From: Vasilis Date: Fri, 1 Nov 2019 22:14:06 -0400 Subject: [PATCH 09/64] linting --- econml/tests/test_ortho_learner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/econml/tests/test_ortho_learner.py b/econml/tests/test_ortho_learner.py index 280e97d17..513c8e105 100644 --- a/econml/tests/test_ortho_learner.py +++ b/econml/tests/test_ortho_learner.py @@ -31,7 +31,7 @@ def predict(self, X, y, W=None): y = X[:, 0] + np.random.normal(size=(1000,)) folds = list(KFold(2).split(X, y)) model = Lasso(alpha=0.01) - nuisance, model_list= _crossfit(Wrapper(model), + nuisance, model_list = _crossfit(Wrapper(model), folds, X, y, W=y, Z=None) np.testing.assert_allclose(nuisance[0][folds[0][1]], From bec81284f5f375b734d0c871180c01d33a0c84a9 Mon Sep 17 00:00:00 2001 From: Vasilis Date: Fri, 1 Nov 2019 22:17:57 -0400 Subject: [PATCH 10/64] linting --- econml/tests/test_ortho_learner.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/econml/tests/test_ortho_learner.py b/econml/tests/test_ortho_learner.py index 513c8e105..ffac3e2b6 100644 --- a/econml/tests/test_ortho_learner.py +++ b/econml/tests/test_ortho_learner.py @@ -32,8 +32,8 @@ def predict(self, X, y, W=None): folds = list(KFold(2).split(X, y)) model = Lasso(alpha=0.01) nuisance, model_list = _crossfit(Wrapper(model), - folds, - X, y, W=y, Z=None) + folds, + X, y, W=y, Z=None) np.testing.assert_allclose(nuisance[0][folds[0][1]], model.fit(X[folds[0][0]], y[folds[0][0]]).predict(X[folds[0][1]])) np.testing.assert_allclose(nuisance[0][folds[0][0]], From c11d99d8436f73f32ae34de2749f1d051d5732fc Mon Sep 17 00:00:00 2001 From: Vasilis Date: Fri, 1 Nov 2019 23:23:07 -0400 Subject: [PATCH 11/64] fixed issues related to input output shapes. One mistake was in residualization inside the _RLearner when both X nad W are None --- econml/_ortho_learner.py | 1 + econml/_rlearner.py | 17 +++++++++++++---- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/econml/_ortho_learner.py b/econml/_ortho_learner.py index f8ea393bd..baf4be9bf 100644 --- a/econml/_ortho_learner.py +++ b/econml/_ortho_learner.py @@ -231,6 +231,7 @@ def effect_interval(self, X=None, T0=0, T1=1, *, alpha=0.1): return super().effect_interval(X, T0=T0, T1=T1, alpha=alpha) def score(self, Y, T, X=None, W=None, Z=None): + X, T = self._expand_treatments(X, T) n_splits = len(self._models_nuisance) for idx, mdl in enumerate(self._models_nuisance): nuisance_temp = mdl.predict(Y, T, X, W, Z) diff --git a/econml/_rlearner.py b/econml/_rlearner.py index 9d029bbab..5fe34691e 100644 --- a/econml/_rlearner.py +++ b/econml/_rlearner.py @@ -97,8 +97,13 @@ def fit(self, Y, T, X=None, W=None, Z=None, sample_weight=None): return self def predict(self, Y, T, X=None, W=None, Z=None, sample_weight=None): - Y_res = Y - self._model_y.predict(X, W).reshape(Y.shape) - T_res = T - self._model_t.predict(X, W) + Y_pred = self._model_y.predict(X, W) + T_pred = self._model_t.predict(X, W) + if (X is None) and (W is None): + Y_pred = np.tile(Y_pred, Y.shape[0]) + T_pred = np.tile(T_pred, T.shape[0]) + Y_res = Y - Y_pred.reshape(Y.shape) + T_res = T - T_pred.reshape(T.shape) return Y_res, T_res class ModelFinal: @@ -115,8 +120,12 @@ def predict(self, X): def score(self, Y, T, X=None, W=None, Z=None, nuisances=None, sample_weight=None, sample_var=None): Y_res, T_res = nuisances - effects = self._model_final.predict(X).reshape(-1, shape(Y)[1], shape(T_res)[1]) - Y_res_pred = np.einsum('ijk,ik->ij', effects, T_res).reshape(shape(Y)) + if Y_res.ndim == 1: + Y_res = Y_res.reshape((-1, 1)) + if T_res.ndim == 1: + T_res = T_res.reshape((-1, 1)) + effects = self._model_final.predict(X).reshape((-1, Y_res.shape[1], T_res.shape[1])) + Y_res_pred = np.einsum('ijk,ik->ij', effects, T_res).reshape(Y_res.shape) return ((Y_res - Y_res_pred)**2).mean() super().__init__(ModelNuisance(model_y, model_t), From 5cda0067a07ecba913c5c3e514e358da97f0ddc9 Mon Sep 17 00:00:00 2001 From: Vasilis Date: Fri, 1 Nov 2019 23:33:36 -0400 Subject: [PATCH 12/64] added coef to sparse linear dml cate and also to LassoCVWrapper --- econml/dml.py | 9 ++++----- econml/utilities.py | 4 ++++ 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/econml/dml.py b/econml/dml.py index 0aa94bdf8..c2fc79423 100644 --- a/econml/dml.py +++ b/econml/dml.py @@ -167,11 +167,6 @@ def predict(self, X): return reshape_treatmentwise_effects(prediction - self._intercept if self._intercept else prediction, self._d_t, self._d_y) - @property - def coef_(self): - # TODO: handle case where final model doesn't directly expose coef_? - return reshape(self._model.coef_, self._d_y + self._d_t + (-1,)) - super().__init__(model_y=FirstStageWrapper(model_y, is_Y=True), model_t=FirstStageWrapper(model_t, is_Y=False), model_final=FinalWrapper(), @@ -359,6 +354,10 @@ def __init__(self, n_splits=n_splits, random_state=random_state) + @property + def coef_(self): + return reshape(self.model_final.coef_, self._d_y + self._d_t + (-1,)) + class KernelDMLCateEstimator(LinearDMLCateEstimator): """ diff --git a/econml/utilities.py b/econml/utilities.py index ade20af46..f6afaf9f2 100644 --- a/econml/utilities.py +++ b/econml/utilities.py @@ -1801,3 +1801,7 @@ def fit(self, X, Y): def predict(self, X): predictions = self.model.predict(X) return reshape(predictions, (-1, 1)) if self.needs_unravel else predictions + + @property + def coef_(self): + return self.model.coef_ From 2450a9921daa1949d9ae72645c47e0fe29d6bd3e Mon Sep 17 00:00:00 2001 From: Vasilis Date: Fri, 1 Nov 2019 23:35:37 -0400 Subject: [PATCH 13/64] removed coef from within final wrapper of dmlcateestimator --- econml/dml.py | 4 ---- econml/utilities.py | 4 ---- 2 files changed, 8 deletions(-) diff --git a/econml/dml.py b/econml/dml.py index c2fc79423..b67a53593 100644 --- a/econml/dml.py +++ b/econml/dml.py @@ -354,10 +354,6 @@ def __init__(self, n_splits=n_splits, random_state=random_state) - @property - def coef_(self): - return reshape(self.model_final.coef_, self._d_y + self._d_t + (-1,)) - class KernelDMLCateEstimator(LinearDMLCateEstimator): """ diff --git a/econml/utilities.py b/econml/utilities.py index f6afaf9f2..ade20af46 100644 --- a/econml/utilities.py +++ b/econml/utilities.py @@ -1801,7 +1801,3 @@ def fit(self, X, Y): def predict(self, X): predictions = self.model.predict(X) return reshape(predictions, (-1, 1)) if self.needs_unravel else predictions - - @property - def coef_(self): - return self.model.coef_ From fde1b7c28e58bbb33e891ab46bc4040aaf006641 Mon Sep 17 00:00:00 2001 From: Vasilis Date: Sat, 2 Nov 2019 00:00:16 -0400 Subject: [PATCH 14/64] making ortho test deterministic --- econml/tests/test_ortho_learner.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/econml/tests/test_ortho_learner.py b/econml/tests/test_ortho_learner.py index ffac3e2b6..756cb315b 100644 --- a/econml/tests/test_ortho_learner.py +++ b/econml/tests/test_ortho_learner.py @@ -27,8 +27,9 @@ def fit(self, X, y, W=None): def predict(self, X, y, W=None): return self._model.predict(X), y - self._model.predict(X), X - X = np.random.normal(size=(1000, 3)) - y = X[:, 0] + np.random.normal(size=(1000,)) + np.random.seed(123) + X = np.random.normal(size=(5000, 3)) + y = X[:, 0] + np.random.normal(size=(5000,)) folds = list(KFold(2).split(X, y)) model = Lasso(alpha=0.01) nuisance, model_list = _crossfit(Wrapper(model), From 32e8cd422e18b75f4af69f72e54057811703a5a8 Mon Sep 17 00:00:00 2001 From: Vasileios Syrgkanis Date: Sat, 2 Nov 2019 07:56:29 -0400 Subject: [PATCH 15/64] testing noteboook --- notebooks/OrthoLearner.ipynb | 358 ++++++----------------------------- 1 file changed, 59 insertions(+), 299 deletions(-) diff --git a/notebooks/OrthoLearner.ipynb b/notebooks/OrthoLearner.ipynb index 972a491a5..df42644e1 100644 --- a/notebooks/OrthoLearner.ipynb +++ b/notebooks/OrthoLearner.ipynb @@ -2,8 +2,10 @@ "cells": [ { "cell_type": "code", - "execution_count": 14, - "metadata": {}, + "execution_count": 1, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "%load_ext autoreload\n", @@ -13,18 +15,20 @@ }, { "cell_type": "code", - "execution_count": 107, + "execution_count": 7, "metadata": {}, "outputs": [ { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 107, - "metadata": {}, - "output_type": "execute_result" + "name": "stdout", + "output_type": "stream", + "text": [ + "[[ 1.30061979 1.91105845 -0.46179321]]\n", + "[1.00171855]\n", + "(array([0.99046797]), array([1.01296913]))\n", + "[0.99920984 0.00131274]\n", + "(array([ 0.99401435, -0.00390604]), array([1.00440533, 0.00653151]))\n", + "[1.00171855]\n" + ] } ], "source": [ @@ -35,312 +39,68 @@ "X = np.random.normal(size=(100000, 3))\n", "y = X[:, 0] + np.random.normal(size=(100000,))\n", "est = LinearDMLCateEstimator(model_y=LinearRegression(), model_t=LinearRegression())\n", - "est.fit(y, X[:, 0], X[:, [1]], X[:, 2:], inference='statsmodels')" - ] - }, - { - "cell_type": "code", - "execution_count": 108, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([[ 0.04347998, -0.15620713, 0.8605264 ]])" - ] - }, - "execution_count": 108, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "X[:1]" - ] - }, - { - "cell_type": "code", - "execution_count": 109, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(array([0.99336201]), array([1.00388333]))" - ] - }, - "execution_count": 109, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "est.effect_interval(X[:1, [1]])" + "est.fit(y, X[:, 0], X[:, [1]], X[:, 2:], inference='statsmodels')\n", + "print(X[:1])\n", + "print(est.effect(X[:1, [1]]))\n", + "print(est.effect_interval(X[:1, [1]]))\n", + "print(est.coef_)\n", + "print(est.coef__interval())\n", + "print(est.const_marginal_effect(X[:1, [1]]))" ] }, { "cell_type": "code", - "execution_count": 110, + "execution_count": 25, "metadata": {}, "outputs": [ { - "data": { - "text/plain": [ - "array([9.98645400e-01, 1.45519805e-04])" - ] - }, - "execution_count": 110, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "est.coef_" - ] - }, - { - "cell_type": "code", - "execution_count": 111, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(array([ 0.99344973, -0.00509725]), array([1.00384107, 0.00538829]))" - ] - }, - "execution_count": 111, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "est.coef__interval()" - ] - }, - { - "cell_type": "code", - "execution_count": 112, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([0.99862267])" - ] - }, - "execution_count": 112, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "est.const_marginal_effect(X[:1, [1]])" - ] - }, - { - "cell_type": "code", - "execution_count": 113, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([0.99862267])" - ] - }, - "execution_count": 113, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "est.effect(X[:1, [1]])" - ] - }, - { - "cell_type": "code", - "execution_count": 114, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([-4.99311334])" - ] - }, - "execution_count": 114, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "est.effect(X[:1, [1]], T0=5, T1=0)" - ] - }, - { - "cell_type": "code", - "execution_count": 34, - "metadata": {}, - "outputs": [ - { - "ename": "AttributeError", - "evalue": "'NoneType' object has no attribute 'shape'", - "output_type": "error", - "traceback": [ - "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[1;31mAttributeError\u001b[0m Traceback (most recent call last)", - "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[0mX\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mNone\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 2\u001b[1;33m \u001b[0mX\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", - "\u001b[1;31mAttributeError\u001b[0m: 'NoneType' object has no attribute 'shape'" + "name": "stdout", + "output_type": "stream", + "text": [ + "[[-0.68287838 -0.80819115 -0.18035347 0.91400753 0.35464864 0.02063007\n", + " -2.98266229 0.97470114 1.21184005 0.46500733]]\n" ] - } - ], - "source": [ - "X=None\n", - "X.shape" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [ + }, { - "data": { - "text/plain": [ - "(1, array([[1., 1.],\n", - " [1., 1.],\n", - " [1., 1.],\n", - " [1., 1.],\n", - " [1., 1.]]), 3)" - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "import numpy as np\n", - "x = (1,np.zeros((5,2)),3)\n", - "x[1][:]=np.ones((5,2))\n", - "x" - ] - }, - { - "cell_type": "code", - "execution_count": 86, - "metadata": {}, - "outputs": [ + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/vasilis/Documents/EconML/econml/dml.py:160: UserWarning: The final model has a nonzero intercept for at least one outcome; it will be subtracted, but consider fitting a model without an intercept if possible.\n", + " UserWarning)\n" + ] + }, { - "ename": "AssertionError", - "evalue": "\nNot equal to tolerance rtol=0, atol=0.08\n\nMismatch: 33.3%\nMax absolute difference: 0.10025215\nMax relative difference: 1.\n x: array([1., 0., 0.])\n y: array([1.100252, 0.043722, 0.018593])", + "ename": "ValueError", + "evalue": "The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mAssertionError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 24\u001b[0m \u001b[0mcoef_\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mzeros\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mX\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m 25\u001b[0m \u001b[0mcoef_\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;36m1\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m---> 26\u001b[0;31m \u001b[1;33m[\u001b[0m\u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtesting\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0massert_allclose\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcoef_\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mmdl\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_model\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcoef_\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mrtol\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0matol\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m0.08\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mmdl\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mmodel_list\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", - "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m 24\u001b[0m \u001b[0mcoef_\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mzeros\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mX\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m 25\u001b[0m \u001b[0mcoef_\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;36m1\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m---> 26\u001b[0;31m \u001b[1;33m[\u001b[0m\u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtesting\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0massert_allclose\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcoef_\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mmdl\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_model\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcoef_\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mrtol\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0matol\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m0.08\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mmdl\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mmodel_list\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", - "\u001b[0;32mC:\\ProgramData\\Anaconda3\\lib\\site-packages\\numpy\\testing\\_private\\utils.py\u001b[0m in \u001b[0;36massert_allclose\u001b[0;34m(actual, desired, rtol, atol, equal_nan, err_msg, verbose)\u001b[0m\n\u001b[1;32m 1491\u001b[0m \u001b[0mheader\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;34m'Not equal to tolerance rtol=%g, atol=%g'\u001b[0m \u001b[1;33m%\u001b[0m \u001b[1;33m(\u001b[0m\u001b[0mrtol\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0matol\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m 1492\u001b[0m assert_array_compare(compare, actual, desired, err_msg=str(err_msg),\n\u001b[0;32m-> 1493\u001b[0;31m verbose=verbose, header=header, equal_nan=equal_nan)\n\u001b[0m\u001b[1;32m 1494\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m 1495\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", - "\u001b[0;32mC:\\ProgramData\\Anaconda3\\lib\\site-packages\\numpy\\testing\\_private\\utils.py\u001b[0m in \u001b[0;36massert_array_compare\u001b[0;34m(comparison, x, y, err_msg, verbose, header, precision, equal_nan, equal_inf)\u001b[0m\n\u001b[1;32m 817\u001b[0m \u001b[0mverbose\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mverbose\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mheader\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mheader\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m 818\u001b[0m names=('x', 'y'), precision=precision)\n\u001b[0;32m--> 819\u001b[0;31m \u001b[1;32mraise\u001b[0m \u001b[0mAssertionError\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmsg\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 820\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0mValueError\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m 821\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mtraceback\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", - "\u001b[0;31mAssertionError\u001b[0m: \nNot equal to tolerance rtol=0, atol=0.08\n\nMismatch: 33.3%\nMax absolute difference: 0.10025215\nMax relative difference: 1.\n x: array([1., 0., 0.])\n y: array([1.100252, 0.043722, 0.018593])" + "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 10\u001b[0m \u001b[0mest\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mhstack\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0my\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreshape\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreshape\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mX\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mX\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0mdx\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mX\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdx\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 11\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 12\u001b[0;31m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mest\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0meffect\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0mdx\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 13\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mest\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmodel_final\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcoef_\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 14\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mest\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconst_marginal_effect\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0mdx\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/Users/vasilis/Documents/EconML/econml/cate_estimator.py\u001b[0m in \u001b[0;36meffect\u001b[0;34m(self, X, T0, T1)\u001b[0m\n\u001b[1;32m 316\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0meffect\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mX\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mT0\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mT1\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 317\u001b[0m \u001b[0;31m# NOTE: don't explicitly expand treatments here, because it's done in the super call\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 318\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0msuper\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0meffect\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mT0\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mT0\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mT1\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mT1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 319\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 320\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/Users/vasilis/Documents/EconML/econml/cate_estimator.py\u001b[0m in \u001b[0;36meffect\u001b[0;34m(self, X, T0, T1)\u001b[0m\n\u001b[1;32m 234\u001b[0m \u001b[0;31m# TODO: what if input is sparse? - there's no equivalent to einsum,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 235\u001b[0m \u001b[0;31m# but tensordot can't be applied to this problem because we don't sum over m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 236\u001b[0;31m \u001b[0meff\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconst_marginal_effect\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 237\u001b[0m \u001b[0;31m# if X is None then the shape of const_marginal_effect will be wrong because the number\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 238\u001b[0m \u001b[0;31m# of rows of T was not taken into account\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/Users/vasilis/Documents/EconML/econml/_ortho_learner.py\u001b[0m in \u001b[0;36mconst_marginal_effect\u001b[0;34m(self, X)\u001b[0m\n\u001b[1;32m 221\u001b[0m \"\"\"\n\u001b[1;32m 222\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_check_fitted_dims\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 223\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_model_final\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 224\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 225\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mconst_marginal_effect_interval\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mX\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0malpha\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0.1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/Users/vasilis/Documents/EconML/econml/_rlearner.py\u001b[0m in \u001b[0;36mpredict\u001b[0;34m(self, X)\u001b[0m\n\u001b[1;32m 117\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 118\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mpredict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mX\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 119\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_model_final\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 120\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 121\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mscore\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mY\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mT\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mX\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mW\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mZ\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnuisances\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msample_weight\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msample_var\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/Users/vasilis/Documents/EconML/econml/dml.py\u001b[0m in \u001b[0;36mpredict\u001b[0;34m(self, X)\u001b[0m\n\u001b[1;32m 165\u001b[0m \u001b[0mF\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mT\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mbroadcast_unit_treatments\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mF\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_d_t\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_d_t\u001b[0m \u001b[0;32melse\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 166\u001b[0m \u001b[0mprediction\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_model\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcross_product\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mF\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mT\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 167\u001b[0;31m return reshape_treatmentwise_effects(prediction - self._intercept if self._intercept else prediction,\n\u001b[0m\u001b[1;32m 168\u001b[0m self._d_t, self._d_y)\n\u001b[1;32m 169\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mValueError\u001b[0m: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()" ] } ], "source": [ - "from sklearn.linear_model import LinearRegression, LassoCV, Lasso\n", + "from econml.dml import SparseLinearDMLCateEstimator\n", + "from sklearn.preprocessing import PolynomialFeatures\n", + "from sklearn.linear_model import LinearRegression, LassoCV, Lasso, MultiTaskLassoCV\n", "import numpy as np\n", - "from sklearn.model_selection import KFold\n", - "\n", - "class Wrapper:\n", - " def __init__(self, model):\n", - " self._model = model\n", - " def fit(self, X, y, W=None):\n", - " self._model.fit(X, y)\n", - " return self\n", - " def predict(self, X, y, W=None):\n", - " return self._model.predict(X), y - self._model.predict(X), X\n", - "\n", - "X = np.random.normal(size=(1000, 3))\n", + "X = np.random.normal(size=(1000, 10))\n", "y = X[:, 0] + np.random.normal(size=(1000,))\n", - "folds = list(KFold(2).split(X, y))\n", - "model = Lasso(alpha=0.01)\n", - "nuisance, model_list= _crossfit(Wrapper(model),\n", - " folds,\n", - " X, y, W=y, Z=None)\n", - "np.testing.assert_allclose(nuisance[0][folds[0][1]], model.fit(X[folds[0][0]], y[folds[0][0]]).predict(X[folds[0][1]]))\n", - "np.testing.assert_allclose(nuisance[0][folds[0][0]], model.fit(X[folds[0][1]], y[folds[0][1]]).predict(X[folds[0][0]]))\n", - "\n", - "coef_ = np.zeros(X.shape[1])\n", - "coef_[0] = 1\n", - "[np.testing.assert_allclose(coef_, mdl._model.coef_, rtol=0, atol=0.08) for mdl in model_list]" - ] - }, - { - "cell_type": "code", - "execution_count": 46, - "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXYAAAD8CAYAAABjAo9vAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAIABJREFUeJztnX10VOd957+/O7oSI0gk8XI2RgLj\npDm4wRBYKw5noenaTkwax4pibyExaXKatE7Wp3VwEhFwXCO89loJaUzc1tt4HWfrGrvIgSgQNwfH\ngbQr7+JYRLyEGNrYrg2DswXDEIMGaTTz7B+jO7pz53nuy9y3mdHvc44P1sydex/NjL739/xeSQgB\nhmEYpn7Q4l4AwzAMEyws7AzDMHUGCzvDMEydwcLOMAxTZ7CwMwzD1Bks7AzDMHUGCzvDMEydwcLO\nMAxTZ7CwMwzD1BkNcVx09uzZYsGCBXFcmmEYpmY5cODAGSHEHKfjYhH2BQsWYGhoKI5LMwzD1CxE\n9Jqb49gVwzAMU2ewsDMMw9QZLOwMwzB1Bgs7wzBMncHCzjAMU2ewsDMMw9QZsaQ7MgzDhMHAcApb\n9hzHqXQGc1uT6Fm1EN3L2uNeVuQEJuxElAAwBCAlhPhoUOdlGIZxw8BwCht3HkEmmwMApNIZbNx5\nBACmnLgH6Yr5IoCXAjwfwzCMa7bsOV4UdYNMNocte47HtKL4CETYiagDwI0AHg3ifAzDMF45lc54\neryeCcpi3wpgPYB8QOdjGIbxxNzWpKfH6xnfwk5EHwXw70KIAw7H3UZEQ0Q0dPr0ab+XZRiGKaFn\n1UIk9UTJY0k9gZ5VC0O/9sBwCiv69uKKDc9gRd9eDAynQr+mHUEET1cA6CKijwCYBuDtRPSEEOJT\n5oOEEI8AeAQAOjs7RQDXZRimzvGS5WI8HnVWTDUGbUmI4DSWiP4zgK84ZcV0dnYK7u7IMIwdVsEE\nChb4AzcvrqoslxV9e5GS+PHbW5N4fsN1gV6LiA4IITqdjuMCJYZhqpI4slwqcamogrOpdCY210yg\nwi6E+BnnsDMMEwRRZ7kYO4RUOgOBSZeKkyjbBWe9nCdI2GJnGKYqiTrLpdIdgixoayXqfHoWdoZh\nqpKos1wq3SF0L2vHAzcvRntrElTB+cOAe8UwDFOVRJ3lMrc1KQ2CWncIqkwdY12qYGqU+fQs7AzD\nKIm7qZZZMMOmZ9VCaRaOeYfgJrXRzXnChoWdYRgpKhEbeu0s9h07HarYx3FDcbNDsPPDG8fFlU9v\nJtA8drdwHjvDVD8qlwKhkO1hEHRueTXnr1+x4RnIFJMAvNp3Y+jX5zx2hmF8oQr2WYUt6IwPL9kp\nUZfy10o/GnbFMEzAROFGiOIaqmCijCAzPtxmp8RRyl8N/nM3sMXOMAFSaZFLtV0DkKcbqtL5grRY\n3VrFcVSmWlMb21uTVeEissI+doYJEDd9Q/xa21H2JrGu9dor52DHgVSo/m+Zj93w67eb3i+VvxsT\nx9XjeDy3PnZ2xTBMgDi5EYJwH0RZai9LN+y8fGaobiBzVkkqnSkJ1prfL5WriCaOsx5fL+LuBnbF\nMEyAOLkRgnAfxB3A617Wjuc3XIdX+27E8xuuC0UwjWu0JnVlsFblKgo7uFsLsMXOMAHiFFwLwtp2\nW0gTZx51pZjX3ZLUkc5kpcel0hlpvrgq2DvVxuOxsDNMgDgVp7gtW/dzjWoc/OAG67pVog4ULPOB\n4VSZq6gayvnNxHWD5eApw0RIFMU3UQZXg0S1bhWy36eaipvCWAsHTxmmComi3NxLHng1uWu8uktk\nx0ddzm/3HrppPxAWLOwMEzFhN7Zy4+6pRneNl4IoAGht1ssei/Jm5fQeRj0oxAxnxTBMFVNJybxT\nH/OB4RS+3H8o8uIeJ9wMrDBz4dJ4yfsRVeGWgVOG09zWJLq0QQw23oFXmm7FYOMd6NIGI/H3s8XO\nMFWKV6t6YDiF3l1Hi0FHjYC8KC3qMc6ZU8TWgrImK7GcvWS5AEA2L0rcGlG7Pk6lM+jSBrG+oR9z\n6QxOidn4xvhq7E6vBABsfc+/4qoDjyJJYwCADjqDr+uP4pfvWQAg3FgHCzsTGdXm0612vAjVwHAK\nPU8fQjY/Kdh5AegJcvT7mnE7VMIOP24et1kuBuYbUdSuj8/M+DnWZx9Fs0m4+/RHMVNvBA5fxPuG\nNwJU+l4naQzve/mvAHw+lDUZsLAzkVCNPt1qx4tQbdlzvETUDbK5UqvWSeRS6QwWbHgGbc06blxy\nWUn7ALefWZCWsyxn34z5RhREKikO9wM/vRc4fxJo6QCuvwdYslp6zKbxEyBL85xmGsNG+l/A7lFA\nKG6g50+6X0+FsI+diYQ4GjbVOl4qTO0E2/ycW5E7N5LFE/tfr+gzU1nYXgKjBkbTrTZJoNRalOV7\nRurhfmD3HcD5EwAEcP4EMjv/DF+8a+NkfMN0jKohWlM2DWRtfteWDnfr8QELOxMJcWYI1CpehMpO\nsM3PeQ1QynD6zBJWM9bhcTsMV1B6JIu2Zh2tSV3ZVdFv58XR3T1lgpzEKHoa+ou7lZEf32Mv2k7o\nycIuIGTYFcNEQiDb5Bql0tiCl5zsnlULy3zswKSP3e6cXi1pp89MFZhVPa7C6r47N5JFUk/gwTVL\nle+fm1RS8+fR2qxDCOADo/vwbT0t7Us8l94EUNitTMv8xtPvUAIlgJseKnfthAALOxMJtTKgIGj8\nxhbc5rwbx5izYtqadWy6aVHZ663nXLDhGeV5ZWPwnD6zdsXNot3jTTwIX71T2+FzI4X3an1jf5m/\n3OCUmDX5//lZ6NDOePo9ABQs9YhEHWBhZyKiGgb8xkGUKXiVFj612jTbWrt8vufB1UHdxP2672Q3\n1W37X5f2cJ9LcrEWAvjG+KQYP9r4KfTSdzy4Y0gdhA0RFnYmMsKuuKxGgo4thJEy2tu1SOrGAYB9\nx057vobsJn7tlXOwZc9xrNt+EAki5IQoya+X0dqsFy1qM27dd7KbqvU3NPLQVd7/PICt+sNYL/qx\nFZ/A0htvBxKLJjJnTtgvoPNzwEe/5WqtQcPCzjAh4jW2YCfcQaeMWlvkEqFMSK3XcHtjsQ7LMFvK\nhq/dbv0DwylcuDRedl5rzMAOVQHRrnyhgKhLG0SfPpmHbkUIIDGh+B10Blvw16BUpiDWS1ZPZshY\nrXfSgKv/ODZRBzgrhmFC5dor57h+3KkkPsiUUeu10pmsVEjN1/BSsm8+Fii3lJ3Wr8rLn97Y4Pom\n9pkZP0ef/ig6tDPQCOjQzmCr/jDubXgMALC+oV8q6kIU1mv1uRMADD1WEHSgIO43PQS0zCs82zIP\nuPl/ApvOxSrqAAs7w4TKvmOnpY//6NAbZY85CXeQbh3ZtWRCar6GlxuLU4Wr9dxuHgOA89ZYwOF+\n4MGrgN7Wwr+G6AJYr28vE26NgD9qeA4Hp30e7Qq/Okg9tBsQBTeMwZLVwJ2/BHrThX8j9KPbwa4Y\nhgkRlUClM9nioAinY1PpDAaGU4GmjFaS4ujlxuLlZtOSLC8+cvxdD/cDP/4qkDk7+eT5EwXXyOv7\ngX99Fs2Z8psnUBDtVrylVG/HbPsIKkf9whY7w4SIneiaLd2B4RQ0mwKejTuP4Nor5/irrDThtVio\nZ9VCT5WwXm42sqXYFmcZvm2zqBtkM8DQd50DmwpcZdpLKkcr6cIZJizsDBMidqJr9GX53b/4MXq+\nf8i2gCeTzWHfsdO+KivNeC0W6l7W7qkS1kuFa1qS+WJbRfrTe/1Vf6pomYez+Rn2x0gqR6NuF+wG\nHo3H1Cy10i1y2b3PStP2vEIAXu270f+C4H0MXYIIy9/Zhl+98Vbxd2lN6ujtWlSStWNNcTTnwF8c\nHbedY5ogwiffPw/3dS+2X0xvK1za1u5pmQfc+Uv03rcJ67MPl/jm86Kwq6CWedJ89ChHEfJoPKau\n8ZL6F/cNYNNNi2w7FLqlkhRJFbIiIl0r5JfLYqg5IfD8y6Wuj/OZLNZtP4gte46XVXSm0hnsOJAq\n2VHIZoBar/HE/tcBAPe98yV1l8WWjopdLUjOBMYuADlTUNVkhS+98Tbc84NxrBP/gLn0Jk6JWdiK\nT2Bl9+3K97Qa+yD5FnYimgfgcQDvQCGf/xEhxLf9npdh7HBb0VkN7YKtOd2VQJC7dSr5/YwbQSab\nKysW8rJOQ/9VFZ3Wz8Pt+3DhxaeAo49NulvOnwB2/inwg88DIl8QZ00H8hXsgv7g64V/FTeNwhpv\nx5o917u+UVZjHyTfrhgiugzAZUKIXxDR2wAcANAthPiV6jXsimH8csWGZ6Sbcau7QrVNbmvW0dzY\nEKkVb4yk8+rfBoBmXUMmmy9bq1c3gMxqTuoJPHBzwf1hWP5BOTpk7iMny32w8Y7K+rG4IdEIfOxv\nAk1LtHtPg/5OReaKEUK8AeCNif9/i4heAtAOQCnsDOMXt1aSajt8biRb9BVHYcU7jaRTQQAaNMJI\nNg+gsoHJZleNNmGhm8lkc+jddRSj43nf7iIrMqvVcYqTKr88CHJjhTTJAIW9GvsgBepjJ6IFAJYB\neEHy3G0AbgOA+fPnB3lZps6R+ZDdNppy25Y2zNmYfix1oLxwyLxWpxvc3QNHpOX8VuyCmm6xdoIk\nFG5EK/r2lgidk+/5lJiNjjDFXZYm6ZNq64MUWLojEc0AsAPAOiHEb63PCyEeEUJ0CiE658yRl1kz\njBVVKhkAV6l/XtLunASnklzlSi11A9WrjLXapSAODKeU3QyDhlDoBGm05jWLvPGZvbjrO8CDV+Hl\naWsx2HgHurTBsvN0aYOYTWlU6iEe1VvxG8xBXgSeN1NTBGKxE5GOgqhvE0LsDOKcDAPYB0mf33Cd\nq+HIxnmc0u7sgl2yIOW67QexefdRac9zu/UHgXmtTQ1a8RrmHuwr+va6EreknsA0XXOVktnWrONS\nttRlY4i6kaYo8/t/KPdPuOoX3wUwCg2Fvi19+qNAFiVNub6pP4JGkvescUIA+P7o+/D7GAaokKaY\nkNVhJWdWdP5aIoisGALwXQAvCSHi7XzD1B1BpJJZt8mqYJddMZFKoM+NZG3985VmwdihAUiPjEkH\nZFya8MUD7t4jIxcdgGNKZlJPYNNNhWPt/Mmy665v6EcSoyWPNdMY1jf0Y9dYQdh79ccrFnUAGEET\nPk7/VJKDLkRpZesYEjj0uxvwvoqvUhsE4YpZAeCPAFxHRAcn/vtIAOdlGE9l7G6pZDamnUiqGmEN\nDKcc+454nwJayCm+OCYXYPNa3LxH05saijc+63vyqQnXivU9MqpQjf4xRudHA+O6XdogBhvvwCtN\ntyobbrVrZ3Cg8Ta82nQr2nDB0/tQgp7Epbxe1vSLCBgXGvKCcDI/G18Z+zw+/eLlsZf8h00QWTGD\nqOz7yTCOBDlSr2CpH0bGZNU265qrDAanIKxM+LfsOe7oCgnDD2w0DZO9d6pjDcE2FxNt3n206J65\nODppSTvlzvesWojBHzyMe0nd67yIAGZpPgQdKM4Sbdt5m/RpDQLvHN02+UA+vEB5tcC9Ypiqxu/k\neYOB4RS+tP1giagDwEg2jy/1HyxacKoAqVMQVtahMM7KQ8P/f8vV7Y6zRu/cfhB3Dxwp/jwwnELP\n9w+V+NzTmSx6nj5UUtxkxrxT6F7WjvubnnAWdcgbgHlG5IElq3Ep+Q7p0+aZpcXHYvxsooBbCjBV\nTxCpZFv2HEde8VxeTHZadKrivLP/oDRjQyZQblMtrWmCZrSJIGAlnBvJFsv6AbUPXQDYtv91dF4+\nE93L2gtDLnLlF83mRdG3LuPUhPV/8JlHsCmbjm4fP9FtsfkP7sX4D/8cDblLxadGRGPJzFKDOKtC\no4AtdmZK4Jg77WKQRPeydmUanqxDodtUSzvdfvu08p2AF8w574bAq9YgG+hh9pMPNt6Bq3/7E8xt\nTZY9vrnhMQw23YGugffgnuzWYCxxN5i7LS5ZjYaP/RVGkpcVfeobsn9SzLoxULVnqIRqa9drwBY7\nMyVwsp7dDJKw/aMllA3OsKZaejW8CeWFQ23NOm5cchmeOfyG646RZj+6XZ8W4/c03ivrTNAOKqQo\nHmp+A+/NPFPy+KfpuVDFPA9A6/wcMH+5ujkYACxZjQ/942ykRtWf9drl8wPxr1dDHyIVbLEzU4Ke\nVQuVX3aN3A2S6N11VHl+IVDWg9tcMdvarNt6JqzP2bln9h07jfRI1tF3bmbjziO4e+AIRsbU6YTG\n72lYs7KZoM00hve9+UNp9kmQCAG8mZ9RtLzvHLt9coi0wyg6u92ZniB0Xu4+j93OIg9yBm3QsLAz\nU4LuZe341pqlSOqlX/lmXcO3Vi91HCQxMJxyLLs3/1EbAUijYvbcSNbWYjc/19asK489N5ItqcJ1\nSyabwxP7X7e18i+OjpfsOlQ9WxLKaEVwCABXjz2Cd45uw8qxhzD09g+5fq2d/zybE66F12mARjW2\n6zVgVwwzZXAKwqqaOQEobrGdMP6oN+8+Kg1AukHmrw+bLm0Q63P9mDtwBid/MBvdidXh92yx4ZSY\nXfx/r+mtTmmeboXXqTV0NbbrNWBhZ2qWMAZoWMV9y57jGBkbd90WwPij9jMxKeoeJzJf+rfo4YI7\nSJS7WcIOjI6KBP5GuxUEVPS5GseqGq+5FV4nizzIGougYWFnapKwAley83phZGy8ajIj3CLzpWsx\nlBwKAZwVM7B5/NP4SWIFHlxTXq9g3MxT6UzZkBBZ4NqP8DpZ5NXYrteAZ54yNYX5D1uG3zmTQcwn\nLfjpRVkxVLXyStOtsQi5lbygkgrRBBHyQpS5xGS7J9VgCz+7uigHaLiFZ54ydYfT5B3AX+BqYDgV\nyNDpTDaHtmYd4zlR1ks9Trq0Qaxv6C/p25JH9fQDsVaIGm6UVDqDnqcPYca0BuVnr+qn76e4rZot\ncidY2JmawO2wCj+BqyDT1NIjWTy4ZqmvARt2JDRCY4KKuwKiCX845D56qx+9eJ7AV+bMuNCgIV+y\nS1BViBpk88LxphtGNkq1DdBwCws7EymVbI3dDqvwG7gKUhg0Ity5/WBogdBcXiBj2g00EEFvmByh\nZ6ZLG8S39L9FA8XnGspP3HRSYnZRwL+q92MuvYk3MAt92dVlFaJeqYZslGqBhZ2JjEoDnm6GVcgC\naF5x29vFDWFY6XZk83K3j2GpxyXqQgCXRAJfHf98mXA/l/8ABMjTIJKknlD62KshG6Va4AIlJjIq\nrdSzs6STegJb1yx1NU3JiTiFIWg/t9HL5dv6w666LIYFEfAm2qTW+EhWPjxbFchta9aLnT6Ncxs0\nNbCUmWGLnYmMSiv1VJY0Ab4zFKyuIS+0JnW8dWk8EOtcAJjemFAO0FBhBETn0hnkoSGBPPIgaBDR\nNeJyYC696en4vCiU/psLvPQEFUf+dS9rLw7pNkhnSidZhVHjUEvwbY6JjEqnIfWsWghdYsY1SAda\nTuLUeU9WMq46o/XxpJ5Ab9ci5AN0uVQi6n36o+jQzkAjoIHyIAISVD2iDhSyXWSfnwoCsOZ980p6\n8G/5L+8tGQIiG9Jt7P6cWgFMBTiPnYkMP3nBqvxyVd663bUA2ObCqzAXxCyYlcT+V85F7ks3M9h4\nBzq0eEr+zYwLDRoJnMtPx9voUsnc0hHRWGyda7x/brCrR5ANyzYwqlVlz/utcagGOI+dqTr85AWr\n+qek0hms6Ntbdj6VP/9L/QeRIKoovzwnBHSNsGBWEs+/fNbz64NG1aQrTKwtBszC3ZrU8YHRfROu\noTdxSszCN8Yns1283ATt3HN2z7lpvzwVYGFnIqXSvGC7jBXjcXOWjeqPOC/gy32SzYvIRb1LG0Sv\n/nhx2LNAvEVFb+ZnoI0ulgk3EfCTxO9j15i/tEXA3j1nF3PpWbVQuRubSumQ7GNnagK304gy2RzW\nbT8IrZqczBXSpQ3iQONt+Lb+MGbSBRAVxFOb+DeOX5EIyGBasZ2uOdslPZItmU9bKU6pi7LvAmFy\ngIZd++WpAlvsTOy4yWCwunE0B39tnL7vSjBnt5wSs/HT/FL8YeKfY01VVKHKcmlt1kt2ZE6+cNkn\n1JrU0du1qKL2ysbjtdwKICg4eMrESqUB1Ss2PBN5e9uwkJX7y9rlho0hBU7XPZmfjZVjD5U9rmuE\nLX9Ymr2ybvtB6TkIwINrlk5p8a0EDp4yNYFT0dLm3UeL2TBmay7IKtGosVrnrfRW6KPm3GC+phCT\nvnxrsFTV0yWbFyWNuLqXtZd8fmbmtiZrtg9LLcA+diZWVEHOVDqDnu8fKhGFdCaLnqcPFcbOufS5\nR4kbLe7SBrFF/04x97xDO4PpGA19bV4hKkwx+mL2dpzMzy7OHjUyYFRYb7abblo05f3dccAWOxMJ\ndw8cwVMvnEBOCCSI8Mn3z8N93YvRktSls0SJIB0tl80LfLn/EP5y9XvxwM2Li1v5lqSOi2PjFY+j\n80tbs44bl1yGJ0zVkDI2NTyOJirdoVRrnHcuvYndYqWnLBcCinNTjdhJJpuzHYrBBA8LO+MaN0FO\n2TFDr50tEbycEMWfVaJmF/rJCYGNO4/ggZsX4/kN1xWvmc5kPRXBBMlvM+N4Yv/r0KiQUmnFcL/M\npAuRr83A/LacwwwIAbTRReRB0iZhp8QsEIAGrTTvX08QdE3eSVJgsv2xOXZifCYXR8fLXsMEDwdP\nGVe4CXKqjrk0npMKtTEhx8830PABm8VUTxDG88L25hAVXdog7m/4LmbQaKyW+bjQ8KXsF6RuFFnw\n1lx4ZL5ZJXUN0/QE0iNZ5edmV/1ZOEe8U4hqGbfBU/axM65w05lRdYxKYHMTY89ktCZ16A69YICC\nhWi1kLO5+EW9SxvEL5oKOehv0+IV9RHRqBR1ANiVX4kN2T9R+tLN728mm8c5G1EH7Ks/C+fIoXfX\n0Up+FcYlLOyMK9yUaXst2U4QSYOgukZFH3s1zOL0yuP6/SVFRXEhBHBBNDkGPIGCuK8ce0haeOQF\nIzDqVOWZzmSnVFOuqGFhZ1zhpjOj6phmXf41++T756F7WXtJtWJrUgcIxWyYvCgIfaJKFN7oc/5K\n060YbLwDXdpgyfObGx7D72lHYw+ICgE8nvsgrhr9nu/JRG4wujAaLhY3WUtBjiJkSmEfO+MKPz72\nB25ejKHXzkqzYqyoqhVVlYpRYueLvlr7F6xN7EUC+dhFfUw04CvZ23wLuioQbMWuw+bXfnDEth1x\n+4TbxqlAaar3Vzdw62NnYWdcU2lWjJc/wGquKFW1yTXmecYl6HkBpDEDrShvzOWH1qSO0XH5lCMD\no4JU9Rl7aSugCqr6afdcb7CwM6ERpvWkEoK40hjNvNJ0a9X5/IUAvpi9HbvyK9HemkR6ZMzzwA4V\n5rJ/YwiJsDy/dvl86c7LwOuNWmb9q74T9dBf3SuRthQgog8D+DaABIBHhRB9QZyXqT4qHUjtlp5V\nC6XW2X+c3+KpXW57CC0HTonZ6IihB7qBtX9MXgB/n/tg0ToP+ve1lv073dCtz1975RzHZm1WZAF4\n7q/uHd/CTkQJAH8D4EMATgJ4kYh2CSF+5ffcTPVhl/boRtidxEHVmc9roM0oWb9z+0Hfrh2juKid\nzsTSnMtgFAmcybdJh1gEjazs3663i+yGr6rCTeoJTNM1ZQ8Z2WNTvb+6V4Kw2K8B8GshxCsAQET/\nAOBjAFjY6xC73i5XbHjG1jXj1tqXCcidii6BKnp3HcXBTTdg6LWz0vmYbnlcv78qslyygrA++/lI\nMlwA4JarvTXo6t111NYXb5AgKo4nlO3MZD1kVLs47jejJoh0x3YAJ0w/n5x4jKlD7Kwkp8HBboqc\nKrmuDKP/zH3di/HgmqVo9/B6I6Xx1aZbYxH1Qv75NLyZn1EsGPpy9r9GJuoA8NQLJ1znmQ8Mp6T9\nfmTkhSjeuM1pruZUSStejmUKBGGxy772ZQYSEd0G4DYAmD9/fgCXZeJAZj1ZUblm7Kx9o3GUimuv\nnOPYYMuKdRaqamSauY3uCKahGZdiC5Kag6FxYvTjAZxjJ17cZOYbtJe2vdzi1xtBCPtJAPNMP3cA\nOGU9SAjxCIBHgEJWTADXZWLA6gNXfZAyEbfrH7Jx5xEMvXYW+46dLvO/DwynsOOA9ypF6yxU2c1o\nc8Nj+HTiuaJVPgOXPF8nSFJidpmotyo6YIaNeTdlFxdxG8Rk90l0BCHsLwJ4NxFdASAF4BMAbg3g\nvEwVoAp2Oo0/k7lO7Kz9TDZX4gs3+99lLhyvmF9vttDjzD+3IhtiYbS5tXbIrARZ3ngun8eYTatj\n601RFhdR3bCnNybQ2tw45YuK4iCQPHYi+giArSikOz4mhLjf7njOY68N7h44UhZ4dFNtaghI+0TK\nm9kK9+pSMSoT/XxLzVktxTVWgZiPiQa8JaahjYItLJKR1BO45er2sh2RU9aQqn7AnEPOBUTRwQVK\njC8GhlPKP3prYYhh1cuKWKzoCfI0DMOpBayKarXKCyPnKHQhlyEbclHJwGnjuVf7biz+rBqkwgQL\nt+1lfLFlz3HX/vPuZe14fsN1aG9NOlrWXicctTbrOHfR2+g4o6eLMX6uWkQdKPjQ/XZQrPjakowl\nO5+3QGEylAyzq82IgRiWfU4I7DiQ4u6NMcLCzkixs5BVqYdhVAKeH8lKJ/XYafX6hv6y4dBRI0T5\nFCi7QdBRYU0v7V7WXuioKaGtWceFS+UTj/QEldwQ/KSxMuHAo/HqlEr7uQwMp7B5t3oIAmHSyrNe\nQzW/1A/lkl7AdtBDTGX/hpBfRBPuyn4OACbcQeFXinrBfAMeGE5JdzQaFWoBZJ7a8bzAndsPYsue\n4+hZtZBL/qsQFvY6pNJ+LrIgmJW1y+cXUxCt1zBmYWbd9HoNCWt/9ChJidlYOfZQyWNeBkFHReuE\ne8Xu87b7CA2xN75Xqhs6l/zHBwt7HVJpPxc3aYVGQEx2rFf/uR1JPYGmBs12B9ClDWJTw+NlA6Lj\n8KmH5WZpatAwOq7at9jzqeXzsf3FE2Wfy4VL48XdVhBppNN0DUk9wSX/VQT72OuQSrfGTs+by/LD\n3Ga3JnU8cPNi9HYtUn5Bu7RBfEv/H5ilFcbPmf+LCiEKlu0pzMZd487j5yph9owm23iCitakjvu6\nF2N6Y7ntls2LogstCNIjWS7SnFtfAAAamklEQVT5rzLYYq9DKu2G5zRZ3myBVZKC6IYubRB3iafx\njh+eAVo68MK0j+OpS8vLrHMBxFL2X0hXLLTw/cb4ahx4+4fQs2ohfrbrKBBCdagRv/D6Xn/0vZcB\nAM4r1lTpeWVY2/sy8cPCXgfI+mDvOJDyvDVWVYa2JnX0di0q+cN10zPGLeacc8C0jTx/An8h/hZX\nNvwKn0jsQxOVFkGFjTVwKFDof75p/LOTD6Yz6Pn+oUDdUGaMwLeqCEzFjgMpdF4+0/Ym7+UzNAqc\nvHyveJxdfLCw1ziyIOaOAylplaHTH5WqF7qq4x4AfLn/kHKQgpupR7I5omaaaQxrE3vRQJX5mSvF\nGAZdIuIKwhJ1QzStn0tLUkc2l7edlGTEVOxa3prPm0pnip+XuWLYeDyTzWHfsdOuv1dhD2Rh7OHK\n0xrHz9iwICwqu9FnW01j1VSo5oiaiWq4hfGnkIOGbbnrXIl60JjbMahmyrq1so3qUD+pr5W2CuBx\nduEQ6Wi8WqOetoiVBkr9pESa37vWZl06CaetWUf3snbHARlucs6jEnW3Fnqo60C5RW3GSyaLEVOp\n1P/tZ1oW57bHy5TLijEELTXRWMpuMEQtoAqIOgVKVX+0dsVJsvfu3Ei2LIiZ1BPYdNMiAJM50yrS\nmGH7fFQQAddr3qY0hYWsanNgOGXb10XGtVfO8bUOP+Jc6feSCYYpJ+z1Vv7cs2ohknqi5DE3gVLV\nH+e5kSzuHjgifU5lLeZFIcAqS3UbtbEuu7RBtOKC8vmomUtvhnr+Bg9pPIaADwynsOzeZ7Fu+0HP\nGSz7jp32dLwVP+Jc6feSCYYp54qpty2il4CnGbtUt237X0fn5TNdT0ACgOlNDejtWoQte44Xy82v\nvXJOsc9LlzaIXv1xtGEyXTHsrouGz9ztNU6JWeEtBoVSfLckiDz502X4+U4PDKdwcbS8T4xbca70\ne8kEw5QT9nqceF6JD7Vn1UKsU/i/BSD1o9rdDGQDGbZN9F3v0gbxTf0RNNKkUESVgv547oNYm9iL\nBPLIgzAiGjGDRsvy4KuhQZeZnBCu/OnNuiZtkgY4u8FUqG4obc06Nt20yPV3jXPb42PKuWLqfYto\n+GKv2PAMVvTtVcYO7Lr6AXJrr2fVQqUgGylxZgz7dH1Df4moR8VZMQObxj+L3xl9AleMPol3jW7D\n4rHv4YrRJ7EueztO5mcXh0VvyIZTOVopxoARJ9qmNymfqzThTXVDaW5sYKGuEaacxV7PW0SvmS69\nXYuUwzRkO5juZe0Yeu2sdKqSVQhkU4uiZFQksHn809LnpjcmsGtsZVU26DIw55GraE3qts+rqk6d\nqDd35VRkygk7UL9bRC/paQPDKfTuOioVdWMHI0sLva97MTovn1ny+IJZSTz/8tni6zc3PIY/SjwX\nW8l/aqLcX2WB2xX2VAv7jp22dZcBhbYB1pusGePm7DW9tx7dlVONKSns9YpbS2tgOIWepw9J2+sa\n7QMA2Fr/hjDcPXAET+x/vcxCj6vDYrW5VAycWgBYOZXOoHtZOzbuPIyMxIferGvYd+y08pwaoXhz\n9lqvYFetytQGU87HXs+4TU/bsue4smf69KaCH9U2LfRwP/DgVRC9Ldg8/Ht4telWfFt/GB3amdA7\nLJb1b5mYVBSGnzyoX8OrqAOTn9kDNy+Bbtn66Brhv9+8xNY1kph4TSXpvd3L2rlbY43DFnud4CU9\nzU4QUumMbZuAzt/+BNj9PSBbGFydoGhbUlxEE9L5t4U+laitWcfwPTd4LgqSUck7ZBQX2cWE7No1\nZHP2rXmd/OX16q6cKrCw1wFe09Oc2rXaCdHGxqeBbDxBtLwA7sp+LnRXS0KjYuVskF0svfCjQ28U\nh5qoRNZpbXatedlfXt+wK6YO8Jqe1rNqYdn23g1JPYH/gHCyXJxS8/Ki0DJ3dwT+81xeYPPuo7h7\n4EjxvU1EHDRwMzvWcJmo1mZY9/Wc3svIYYu9DnCz3bZmRqy5Zh5+dOgNVwJCmBQJ+lkHcP5EUEuf\nvAYBOUHQJvYLF0QTsqSjFRdLXC7GLgSYbDdbiQ/biXMjWTwxUWAFFAqGdI0ACq9NbyUYN243rXnr\nLb2XUcPCXgc4bbfterbLZmJa+WbycXx89FloPyxkZ4TVRpcAXDH6pO0x50ay2LjzCB64eTGe33Bd\nID5wt0Q5pNvLhspJvNlfPvVgYa8DnNLTVJkRT77wesk0evMkozRmQAigjS6A8qVCHpZXwm2vFnNu\nvlMQMAxrPgq83kNYvBkz7GOvA5zS01TiZxX1Pv1RdGhnoBEwky5glnYBWkQDovMCnnq1GL9Ti6It\nQluzjq1rlkYq6kG+Te0c3GR8wBZ7nWBnsclcNWbr/JSYjSQuKcfThY0RGPWS7TK3NVlI8RyT96C5\ncGnctrd8GDjdRPQElbm9mho0jI6XFiBxcJPxC1vsUwBrZkSXNoivm6zzDu0MZlK4fdGtWS95U2HR\nuuztnicXLZiVxObdR5XxgWxeSCc7xcm4ZK1WUW9N6lwMxPiGLfYpgDW4dlfj00ii1Dr3625x6n2e\nB+GN/KzACovMvWkqob01ibMXR6Xl+mHhxi1kVP4yjB9Y2Oudw/3AT+9F9/mT6G7pAG69B9gZfC46\nkToXXQjgidz1sc8TNfjU8vnovHymbYOtuOAOikwQsLCHQJjDsj2d+3A/sPuOyUrR8ycgdvwp8kRI\nyF/hCzFhrZuNdiGAY6K9akQdKHRO9Ds2Liy4IpQJAhb2gLHrpgf4KxTx3Knvp/eWlf8TAQmIinPR\nxcRMO9lLtZZ5eLl1BS7/t34kkEcOGrblrqsqUQcQWd67V9wETcM0Gpj6gYU9YFQ54727jmJ0PO+p\nfarbc8v6rQMAzp9UnsuPT33d2O34y+RjaMhdmnxQT+LFd/05PvF/5yEnuio/eQD4yV3XyHsOOVAQ\nZbcFXzLaXYh0JS14makJC3vAqHykstJ9W1H2cO6yx3/0JeDA9xBGaU5KzMYP8yvRRo3obdlRuHm0\ndODFd/05Pv3i5ciJ+IdYCBTSvbyGRWWToOwwbiCGKAPA9p+7b7dAANYun19s9uWE5xs7M2XxJexE\ntAXATQDGALwM4I+FEOkgFlarOHVOtOIlWKbKR7+38e+B3rdcn6dSzAOf/+7CNei9e3PxuXV9e5Hx\n2fXREMoEEXKVDuxEQWjfOJ/xdF8zxHnz7qPKNMmERnhbUwPOZ7JSN8iKvr2ObQeMWaaVuFF4ZB3j\nFr957D8BcJUQYgmAfwGw0f+SahtVN702xcR4L8EyWT76N/XvoBXhirpskIV13UGIiyGJb5vWAD1R\nma+IUHifvLhT2luTeH7DdQAKhU0yWpM6PnnNPExvUttCTjd04zqv9t2I5zdc59nKVn1XNCLH4eXM\n1MKXsAshnhVCGH8J+wF0+F9SbaMq79900yLf7VOt576r8Wk0UriuDwFgXfZ2rBx7qCjqsnUHmc2R\nzmQBUWgLQCiIqlvWLp+P7mXtrtvsWnvq2I0L3HEghVQ6A4FJ/7ZZSO2uGUQ1qcxoAAqdJ1VrYqYm\nQfrYPwtge4DnU1LtmQF25f1+111y7t61fpfqAIE6P4tr2/8MQw7r7lm1EHduP+ja+9GsaxixKQ7K\n5gWaGxswfM8NAIAFG55xPKdGQOflMwEAn3z/vJK2uzISRK566pzPZF35t+3cR0FUk1oLzTSJy4p9\n7gzgQtiJ6DkA75A89TUhxA8njvkagHEA22zOcxuA2wBg/vz5FS0WqO3MAOsfpjF3sqJ1H+4HSAOC\nCFZSAhB5INlW+DlzDmjpAK6/B1iyGt0u1ti9rN1Twc+oi8wRs9C2u4hd5AWKonZf92LsOHBSWVma\n1BNlYquKj2hEymu7WWN7azKw76b5xn6F4mbHPnfGUdiFEB+0e56IPgPgowCuF0JtsgghHgHwCAB0\ndnZWHBmr5cyAim5KE5WjRvYJ3n0DcPQHQMZfSX0RPQnc9BCwxH1nRRVuxNcg58IJbnbv9Kxa6OrG\nYRa1B25eIh0dZ7hWZLsO2fF2lrh1jXbtk4OGx94xKnz52InowwC+CqBLCDESzJLsqeXMAM8T443K\n0fMnAIjCv0PfrUjUhQDygvBmfgbOihkACGiZF5ioA2ofcCUYQVADtzdts6jJ4h1b1yzFwU03SM/n\nNGrOilW0ndonBw2PvWNU+PWx/zWAJgA/ocIfw34hxBd8r8qGWrZSTqUzZe1yvzG+GrvTimZYksrR\nSkmJ2Vg59hCA0iyQIJFN8rn2yjl46oUTntMXBcrF3GlHIBO1SgZQuFkrAbjl6vJzRznwgsfeMSp8\nCbsQ4neCWohbot7uBslnZvwc67OPFvued9AZ9OmPYqbeCODG8hcENFvUOsTi4ug4BoZTjlWOlQiG\nIWzG67ftfx0tSR0Xx8Y9VWTKBk3YBWitgdBKMFxlbhCA634zYQb7eXISI6PmKk9r2UpZr29H83hp\nu9xmGsN6fTuAzaUHH+5HUIPdCChpkZvOZG19+34D1HcPHMG2/a8XV25U3U5vTGBkLIfWZt22V7r5\nRm0Vxf/0rpn4Py+fLXlXZIFQ8+/i9rsic5XZ4cb9V8vBfqZ2qTlhB2rXSmnO/Ebx+BvAg1dNBkiv\nv6fghgmoJcAbNLvsMbuAs58A9cBwqkTUzYyM5fDgmqXoXtZeJv4Gbc06Nt20qGj1W0Xx7MUxrF0+\nH/uOnXYUa6+i6jVO48b9V8vBfqZ2qUlhr1mSbYrAJ026Xc6fKG2164KslgTlR5EQ+bLOiyOiEX1Z\neXDUayDajfBt2XNceTsSKE1H7Lx8ZtGabm3WIQSQHskWg8kqUdx37LSrGIFXUbVrB2HdO7l1/9Vy\nsJ+pXXg0XlQc7gfGVOPnLFKYzRRyy6WUZmyMJ6Zh4/jn8DuXnsAVo0/ii2O342R+NvKCytoAWFFZ\nnE6PDwynsKJvr7SM3UmwzM93L2vH8xuuw9rl85EeySKdyZZUULrJHbdD9XrV46osk61rluLBNUsr\nynbx+h4zTBCwxR4VP70XyHkYFi1yhRxzs+WuJ4H33gr867NFt819F2/B98euKR6yK78Su8acR87Z\nWZx2AWon94ZTEzSroKlcN5lsTjmVyRhk7eQ7VzUTU6UzOsVvKnGd1HKwn6ldWNijwqY3upSWeZO+\ndrPv3ZJz/ncuSu2Bgu+6ubGhxO1x5/aD2LLneFG8zGLZ2qyjqUEr62S4om+v1L3x5f5DANRFPoBc\n0GxdNwLQE1SSTZPUE7j2yjmufOeqtEW7dEav8RunG0wtB/uZ2oWFPSpaOuTpi8mZwHim3DI3RNyh\neMhNm2A9QbYByY07j2DotbPYcSBVfPzcSBZJPVEMdhqo3CA5IbBx5xE8cPNiPHDzYmzZcxypdKZo\nNSeISoqxnPqzGIznBNqadaRHJm8wbn3ndiX+QeA2OFurwX6mdmEfe1gc7i9kuvS2Fv599w0FwTaj\nJ4E/+Hqh+rNlHiqpBnVV7WkyUFWi+NQLJ1xVxbYq2g+bjzd85//WdyP+cvV7kdQTRSvZ2oHQydcs\nUAiorl0+v9jq1m1AMuzKTM+VxAwTESzsYSBrBXDoyYJ/XCbgS1YDd/4S6E0X/vVQ4m8tY5f5j7N5\nURQblXWvck9YxdKpKDOVzpQEVZ3Ez82NSQDYtv91x5uB9fGwS/xVN5hUOsOtc5lYYVdMGPz4q+Xp\nitlMIeh55y8Dv5zbjn8DwyllyZMq0GgVy/OSEX9WzJktqoIfQxSNddtNLjLOaewGvAQkw3SD2LnB\nuAiJiRO22FVYXSmH+92/TtWky2sAtQJU1qwA8OX+Q1JRJxT6l6vcFub0Ri9ksjllBoq1WdfwPTdg\n65qltg24zDeDoCxxu9RNJ+x2G+ySYeKELXYZhivFsLqNoiHA2U3y03vVz7XIB0z57SVizWbRNZJO\nAlK5WwRQVjBkrAOAreXtRE6IsiHRdtY1AGU/GOvNwK817Lfc3zhG1U6Yi5CYuGCLXYasq2I2Yy/a\nBnZW+fX3lD1kiIvdyDU7rK8/N5IFyNs4OSNLxAh6mmdyeu2fIju3F+u6e1k71i6fD6vdHkbudxDB\nz+5l7cosGy5CYuKCLXYZKnF240qxS2uUWPt+e4nIXp/NCUxvasD5iUpOJ1LpDFb07ZXuFNxanW3N\nOi5l81LL3K11bd15CIGyPPogCarcn4uQmGqDLXYZCpeJ8nEz19+jTmuU4Fdc7F7vxWJU7RTcnCOp\nJ7DppkW+/N6yncfoeB4Prlla3D0ETVDl/lEP2GAYJ6amxW4dN2et6Lz+nvJGXEbRkBPGeRwqRg38\nDg6xe71dFagM2U5Bdg5dI8yY1lBSNOSn7B6IpwtikJY2FyEx1cTUE3Y3gVGP4lyGi4pRA7/icu2V\nc8p6rZhdIACKVaBusO4AoiqJj6MLIpf7M/UK2cyfDo3Ozk4xNDQU+XUBTPQ9l/jAW+aFkmPuBlVW\njFO2jDWrAyikLq5dPh/3dS8uu4Yb6z2ssXlOrOjbqyz/j2M9DFONENEBIUSn03FTz2L3ExgNCdk2\n3k0qnsx9oRrZZrVOZePq4gz41UsAMswxeAzjlqkn7KqsFTeB0Qhx43P26r6w3kCqSYSidIuE9Xvz\nGDymWph6wu4nMBohbkTbb+C12gJ+UawnTPHlMXhMtTD10h2XrPbVTTEq3KTihd29sB4JsyMjj8Fj\nqoWpZ7EDnrJW4kLlc772yjlY0be36Ea45ep2V4OdmQJhiq/fHRTDBMXUFPYaQOZzvvbKOSXDMFLp\nDHYcSHExjAfCFN96CQAztQ8LexVj9TmrxtKxD9c9YYov58Uz1UJtCrtT5Widwj5c/4QtvtUWkGam\nJrUn7H5a6tY47MMNBhZfpt6pvawYPy11a5xayoLxM8CCYRh/1J7FXoWVo1FRKz7cai7UqaaiLIYJ\ni9oT9hqpHA2LWnAjqHLFe3cdjXXtfm84fFNgaoXac8Wo+p1XWeXoVEYVzE1nsrG6ZPwUJ/mddMUw\nUVJ7wl4jlaNTGbtgbpwDnv1kFYVZscowQVN7rhggkspR3nZXTs+qhVU54NlPVhGnmjK1RO1Z7BHA\n225/dC9rR1uzfJh2nKmZfrKKghqjxzBRwMIuoZq23aq0wWpPJ9x006KqS830M5u0llJNGSYQVwwR\nfQXAFgBzhBBngjhnnFTLtluVxTH02tmynjHVkk5oUK2pmZVmFVXr78MwMnwLOxHNA/AhAK/7X051\nUC0Vnqqdw1MvnEDOMtKwGnvG1EJqphfq7fdh6pcgXDEPAlgPIPrhqSFRLdtu1Q7BKupOxzMMM7Xw\nJexE1AUgJYQ45OLY24hoiIiGTp8un8lZTfjxxQaJaoeQIPJ0PMMwUwsSCuuveADRcwDeIXnqawDu\nAnCDEOI8Ef0bgE43PvbOzk4xNDRUwXKnFlYfO1DYOdxydXuJj914nPuyM0x9Q0QHhBCdTsc5+tiF\nEB9UXGAxgCsAHKKCBdkB4BdEdI0Q4jce18tIsAvYdV4+kwN5DMNIcbTYXZ+ILXaGYZhQCcxiZ2oL\nrphlGCYwYRdCLAjqXExlVHO7XIZhooMrT+uIaqqYZRgmPljY64hqqZhlGCZeWNjrCG5UxTAMwMJe\nV1RLxSzDMPHCWTF1BDeqYhgGYGGvO7hRFcMw7IphGIapM1jYGYZh6gwWdoZhmDqDhZ1hGKbOYGFn\nGIapM1jYGYZh6gxOdwwJ7rLIMExcsLCHAHdZZBgmTtgVEwLcZZFhmDhhYQ8B7rLIMEycsLCHAHdZ\nZBgmTljYQ4C7LDIMEyccPA0B7rLIMEycsLCHBHdZZBgmLtgVwzAMU2ewsDMMw9QZLOwMwzB1Bgs7\nwzBMncHCzjAMU2eQECL6ixKdBvBa5Bd2z2wAZ+JehA9qef21vHagttdfy2sHpsb6LxdCzHE6USzC\nXu0Q0ZAQojPudVRKLa+/ltcO1Pb6a3ntAK/fDLtiGIZh6gwWdoZhmDqDhV3OI3EvwCe1vP5aXjtQ\n2+uv5bUDvP4i7GNnGIapM9hiZxiGqTNY2BUQ0X8josNEdJCIniWiuXGvyS1EtIWIjk2s/wdE1Br3\nmrxARH9IREeJKE9ENZHlQEQfJqLjRPRrItoQ93q8QESPEdG/E9Ev415LJRDRPCLaR0QvTXxvvhj3\nmtxCRNOI6OdEdGhi7ZsDOS+7YuQQ0duFEL+d+P87ALxHCPGFmJflCiK6AcBeIcQ4EX0dAIQQX415\nWa4hot8FkAfwHQBfEUIMxbwkW4goAeBfAHwIwEkALwL4pBDiV7EuzCVE9AEAFwA8LoS4Ku71eIWI\nLgNwmRDiF0T0NgAHAHTXwvtPRARguhDiAhHpAAYBfFEIsd/PedliV2CI+gTTAdTMHVAI8awQYnzi\nx/0AOuJcj1eEEC8JIWppQOw1AH4thHhFCDEG4B8AfCzmNblGCPHPAM7GvY5KEUK8IYT4xcT/vwXg\nJQA10TNbFLgw8aM+8Z9vrWFht4GI7ieiEwDWArgn7vVUyGcB/DjuRdQ57QBOmH4+iRoRlnqDiBYA\nWAbghXhX4h4iShDRQQD/DuAnQgjfa5/Swk5EzxHRLyX/fQwAhBBfE0LMA7ANwJ/Fu9pSnNY+cczX\nAIyjsP6qws36awiSPFYzO7x6gYhmANgBYJ1lx13VCCFyQoilKOysryEi3+6wKT1BSQjxQZeHPgng\nGQCbQlyOJ5zWTkSfAfBRANeLKgykeHjva4GTAOaZfu4AcCqmtUxJJvzTOwBsE0LsjHs9lSCESBPR\nzwB8GICvQPaUttjtIKJ3m37sAnAsrrV4hYg+DOCrALqEECNxr2cK8CKAdxPRFUTUCOATAHbFvKYp\nw0QA8rsAXhJCfCvu9XiBiOYYWWtElATwQQSgNZwVo4CIdgBYiEJ2xmsAviCESMW7KncQ0a8BNAF4\nc+Kh/bWS0QMARPRxAH8FYA6ANICDQohV8a7KHiL6CICtABIAHhNC3B/zklxDRE8B+M8odBf8fwA2\nCSG+G+uiPEBEKwH8bwBHUPh7BYC7hBD/GN+q3EFESwD8HQrfGw1AvxDiXt/nZWFnGIapL9gVwzAM\nU2ewsDMMw9QZLOwMwzB1Bgs7wzBMncHCzjAMU2ewsDMMw9QZLOwMwzB1Bgs7wzBMnfH/ATbjpsIf\nX0Q6AAAAAElFTkSuQmCC\n", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXoAAAD8CAYAAAB5Pm/hAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAADmBJREFUeJzt3XGonXd9x/H3Z21XhzpW6W2JSdgt\nkg1b0QiXzNF/nHU2s2LsoCNlk8AK8Y8WKghbqrA6RiDDqftj0xHXYmHVLqBiMN3a2HUUYbO97bKa\nNHYGm9lrQnOdG60MOpJ+98d9Os/iufece885Pff+eL/gcs/53eec55u2effJc55zkqpCktSun5v2\nAJKkyTL0ktQ4Qy9JjTP0ktQ4Qy9JjTP0ktQ4Qy9JjTP0ktQ4Qy9Jjbt02gMAXHnllTU7OzvtMSRp\nQ3nyySd/VFUzg7ZbF6GfnZ1lfn5+2mNI0oaS5N+H2c5TN5LUOEMvSY0z9JLUOEMvSY0z9JLUOEMv\nSY0z9JLUOEMvSY0z9JLUuHXxzlhpkNl9R6a279MHbpravqVx8Ihekhpn6CWpcYZekhrnOXppgGm9\nPuBrAxoXj+glqXGGXpIaZ+glqXGGXpIaZ+glqXEDQ59ka5JHk5xMciLJnd36J5P8MMmx7uv9PY+5\nK8mpJM8muXGSvwBJ0sqGubzyPPCxqnoqyRuBJ5Mc7X722ar6s96Nk1wL7AauA94MfDPJr1TVhXEO\nLkkazsAj+qo6W1VPdbdfAk4Cm1d4yC7ggap6uaqeA04BO8YxrCRp9VZ1jj7JLPBO4Nvd0h1Jnk5y\nb5IrurXNwPM9D1tg5f8xSJImaOjQJ3kD8BXgo1X1IvB54C3AduAs8OlXN+3z8OrzfHuTzCeZX1xc\nXPXgkqThDBX6JJexFPn7q+qrAFX1QlVdqKpXgC/w09MzC8DWnodvAc5c/JxVdbCq5qpqbmZmZpRf\ngyRpBcNcdRPgHuBkVX2mZ31Tz2Y3A8e724eB3UkuT3INsA14fHwjS5JWY5irbq4HPgx8J8mxbu3j\nwK1JtrN0WuY08BGAqjqR5BDwDEtX7NzuFTeSND0DQ19V36L/efcHV3jMfmD/CHNJksbEd8ZKUuMM\nvSQ1ztBLUuMMvSQ1ztBLUuMMvSQ1ztBLUuMMvSQ1ztBLUuMMvSQ1ztBLUuMMvSQ1ztBLUuMMvSQ1\nztBLUuMMvSQ1ztBLUuMMvSQ1ztBLUuMMvSQ1ztBLUuMMvSQ1ztBLUuMMvSQ1ztBLUuMMvSQ1ztBL\nUuMMvSQ1ztBLUuMMvSQ1bmDok2xN8miSk0lOJLmzW39TkqNJvtd9v6LnMXclOZXk2SQ3TvIXIEla\n2TBH9OeBj1XVW4F3AbcnuRbYBzxSVduAR7r7dD/bDVwH7AQ+l+SSSQwvSRpsYOir6mxVPdXdfgk4\nCWwGdgH3dZvdB3you70LeKCqXq6q54BTwI5xDy5JGs6qztEnmQXeCXwbuLqqzsLS/wyAq7rNNgPP\n9zxsoVu7+Ln2JplPMr+4uLj6ySVJQxk69EneAHwF+GhVvbjSpn3W6mcWqg5W1VxVzc3MzAw7hiRp\nlYYKfZLLWIr8/VX11W75hSSbup9vAs516wvA1p6HbwHOjGdcSdJqDXPVTYB7gJNV9ZmeHx0G9nS3\n9wBf71nfneTyJNcA24DHxzeyJGk1Lh1im+uBDwPfSXKsW/s4cAA4lOQ24AfALQBVdSLJIeAZlq7Y\nub2qLox9cknSUAaGvqq+Rf/z7gA3LPOY/cD+EeaSJI2J74yVpMYZeklqnKGXpMYZeklqnKGXpMYZ\neklqnKGXpMYZeklqnKGXpMYZeklqnKGXpMYZeklqnKGXpMYZeklq3DCfRy/9n9l9R6Y9gqRV8ohe\nkhpn6CWpcYZekhpn6CWpcYZekhpn6CWpcYZekhpn6CWpcYZekhpn6CWpcYZekhpn6CWpcYZekhpn\n6CWpcQNDn+TeJOeSHO9Z+2SSHyY51n29v+dndyU5leTZJDdOanBJ0nCGOaL/IrCzz/pnq2p79/Ug\nQJJrgd3Add1jPpfkknENK0lavYGhr6rHgB8P+Xy7gAeq6uWqeg44BewYYT5J0ohGOUd/R5Knu1M7\nV3Rrm4Hne7ZZ6NYkSVOy1tB/HngLsB04C3y6W0+fbavfEyTZm2Q+yfzi4uIax5AkDbKm0FfVC1V1\noapeAb7AT0/PLABbezbdApxZ5jkOVtVcVc3NzMysZQxJ0hDWFPokm3ru3gy8ekXOYWB3ksuTXANs\nAx4fbURJ0iguHbRBki8D7wauTLIA3A28O8l2lk7LnAY+AlBVJ5IcAp4BzgO3V9WFyYwuSRrGwNBX\n1a19lu9ZYfv9wP5RhpIkjY/vjJWkxhl6SWqcoZekxhl6SWqcoZekxhl6SWqcoZekxhl6SWrcwDdM\nSZqO2X1Hprbv0wdumtq+NX4e0UtS4wy9JDXO0EtS4wy9JDXO0EtS4wy9JDXO0EtS4wy9JDXO0EtS\n4wy9JDXO0EtS4wy9JDXO0EtS4wy9JDXO0EtS4wy9JDXO0EtS4wy9JDXO0EtS4wy9JDXO0EtS4waG\nPsm9Sc4lOd6z9qYkR5N8r/t+Rc/P7kpyKsmzSW6c1OCSpOEMc0T/RWDnRWv7gEeqahvwSHefJNcC\nu4Hrusd8LsklY5tWkrRqA0NfVY8BP75oeRdwX3f7PuBDPesPVNXLVfUccArYMaZZJUlrsNZz9FdX\n1VmA7vtV3fpm4Pme7Ra6NUnSlIz7xdj0Wau+GyZ7k8wnmV9cXBzzGJKkV6019C8k2QTQfT/XrS8A\nW3u22wKc6fcEVXWwquaqam5mZmaNY0iSBllr6A8De7rbe4Cv96zvTnJ5kmuAbcDjo40oSRrFpYM2\nSPJl4N3AlUkWgLuBA8ChJLcBPwBuAaiqE0kOAc8A54Hbq+rChGaXJA1hYOir6tZlfnTDMtvvB/aP\nMpQkaXx8Z6wkNc7QS1LjDL0kNc7QS1LjDL0kNc7QS1LjDL0kNc7QS1LjBr5hSuvP7L4j0x5B0gbi\nEb0kNc7QS1LjDL0kNc7QS1LjDL0kNc7QS1LjDL0kNc7QS1LjDL0kNc7QS1LjDL0kNc7QS1LjDL0k\nNc7QS1LjDL0kNc7QS1LjDL0kNc7QS1LjDL0kNc7QS1LjDL0kNe7SUR6c5DTwEnABOF9Vc0neBPwt\nMAucBn6nqv5ztDElSWs1jiP636iq7VU1193fBzxSVduAR7r7kqQpmcSpm13Afd3t+4APTWAfkqQh\njRr6Ah5O8mSSvd3a1VV1FqD7flW/BybZm2Q+yfzi4uKIY0iSljPSOXrg+qo6k+Qq4GiS7w77wKo6\nCBwEmJubqxHnkCQtY6TQV9WZ7vu5JF8DdgAvJNlUVWeTbALOjWFOSa+h2X1HprLf0wdumsp+W7fm\nUzdJXp/kja/eBt4HHAcOA3u6zfYAXx91SEnS2o1yRH818LUkrz7Pl6rq75M8ARxKchvwA+CW0ceU\nJK3VmkNfVd8H3tFn/T+AG0YZSpI0Pr4zVpIaZ+glqXGGXpIaZ+glqXGGXpIaZ+glqXGGXpIaZ+gl\nqXGGXpIaZ+glqXGGXpIaZ+glqXGGXpIaZ+glqXGGXpIaZ+glqXGGXpIaZ+glqXGGXpIaZ+glqXFr\n/svBBbP7jkx7BEkayCN6SWqcoZekxhl6SWqc5+glrRvTet3r9IGbprLf14pH9JLUOEMvSY0z9JLU\nOEMvSY2bWOiT7EzybJJTSfZNaj+SpJVN5KqbJJcAfwn8JrAAPJHkcFU9M4n9+Q5VSVrepI7odwCn\nqur7VfU/wAPArgntS5K0gkldR78ZeL7n/gLwaxPalySNZJpnBV6La/gnFfr0Wav/t0GyF9jb3f1J\nkmcnNMsorgR+NO0hhrAR5nTG8dkIczrjkPKnK/540Iy/PMw+JhX6BWBrz/0twJneDarqIHBwQvsf\niyTzVTU37TkG2QhzOuP4bIQ5nXE8xjXjpM7RPwFsS3JNkp8HdgOHJ7QvSdIKJnJEX1Xnk9wBPARc\nAtxbVScmsS9J0som9qFmVfUg8OCknv81sq5PLfXYCHM64/hshDmdcTzGMmOqavBWkqQNy49AkKTG\nGfoBkvxJkqeTHEvycJI3T3umiyX5VJLvdnN+LckvTXumfpLckuREkleSrKurHTbCR3YkuTfJuSTH\npz1LP0m2Jnk0ycnu3/Od056pnySvS/J4kn/t5vzjac+0nCSXJPmXJN8Y5XkM/WCfqqq3V9V24BvA\nH017oD6OAm+rqrcD/wbcNeV5lnMc+G3gsWkP0qvnIzt+C7gWuDXJtdOdqq8vAjunPcQKzgMfq6q3\nAu8Cbl+n/xxfBt5TVe8AtgM7k7xryjMt507g5KhPYugHqKoXe+6+nove+LUeVNXDVXW+u/vPLL1v\nYd2pqpNVtR7fGLchPrKjqh4DfjztOZZTVWer6qnu9kssBWrzdKf6WbXkJ93dy7qvdff7OskW4Cbg\nr0d9LkM/hCT7kzwP/C7r84i+1+8DfzftITaYfh/Zse4CtZEkmQXeCXx7upP0150SOQacA45W1Xqc\n88+BPwBeGfWJDD2Q5JtJjvf52gVQVZ+oqq3A/cAd63HGbptPsPTH5/unMeOwc65DAz+yQ8NL8gbg\nK8BHL/oT8bpRVRe607FbgB1J3jbtmXol+QBwrqqeHMfz+ZeDA1X13iE3/RJwBLh7guP0NWjGJHuA\nDwA31BSvmV3FP8v1ZOBHdmg4SS5jKfL3V9VXpz3PIFX1X0n+kaXXPtbTi9zXAx9M8n7gdcAvJvmb\nqvq9tTyZR/QDJNnWc/eDwHenNctykuwE/hD4YFX997Tn2YD8yI4xSBLgHuBkVX1m2vMsJ8nMq1em\nJfkF4L2ss9/XVXVXVW2pqlmW/nv8h7VGHgz9MA50px6eBt7H0qvg681fAG8EjnaXgf7VtAfqJ8nN\nSRaAXweOJHlo2jPB0kd2sHRK7iGWXkA8tB4/siPJl4F/An41yUKS26Y900WuBz4MvKf77/BYd0S6\n3mwCHu1+Tz/B0jn6kS5fXO98Z6wkNc4jeklqnKGXpMYZeklqnKGXpMYZeklqnKGXpMYZeklqnKGX\npMb9L7j1oYoFZAnYAAAAAElFTkSuQmCC\n", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXYAAAD8CAYAAABjAo9vAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAFHpJREFUeJzt3X2QnWV5x/HflcMGlgjdOkQlIWsc\nYaIt4WVmh+DQaRmFJqOUt0I1DR2nzpjhD6coNoWQjBA1Qs0U7ah/NBSmOqYIlHBAQxvCKGNlSCRx\nlywxRMEWkoMDsbjD21qSzdU/djddNmfPy3Pf5zxv389Mhpyz5zznOgP8cud6rud+zN0FACiOWWkX\nAACIi2AHgIIh2AGgYAh2ACgYgh0ACoZgB4CCIdgBoGCCg93MTjCzn5rZU2a2x8zWxSgMAJCMhV6g\nZGYmaY67v25mPZJ+Iuk6d98eo0AAQHuOCz2Aj//J8PrEw56JXw3/tDjllFN84cKFoR8NAKWya9eu\n37j73GavCw52STKziqRdkk6X9C1339Ho9QsXLtTOnTtjfDQAlIaZPd/K66KcPHX3MXc/R9Jpks4z\nszPrFLTSzHaa2c6DBw/G+FgAQB1Rp2LcfUTSY5KW1fnZRncfcPeBuXOb/k0CAJBQjKmYuWbWN/H7\nXkkXSXom9LgAgGRi9NhPlfTtiT77LEn3uvsPIhwXAJBAjKmY3ZLOjVALACACrjwFgIKJMu4IAJjZ\n2uqw7t6xX2Puqphp+ZIF+vLlizv2eQQ7AHRIdbCmmzbv1puHjhx9bsxd393+giR1LNxpxQBAB1QH\na1q9efhtoT7V3Tv2d+yzCXYA6IANW/dp9NDYjD8fC9ynqxFaMQAQSXWwpg1b9+nFkdHGG2ZJqph1\nrA6CHQAimGy9NFqlT7V8yYKO1UKwA0AEzVovk8ykFUv6mYoBgKx7cWR0xp+ZpHl9vVq1dJEuP3d+\nx2sh2AEggnl9varVCff5fb16/MYPd7UWpmIAIIJVSxept6fytud6eypatXRR12thxQ4ALZg68VKv\nrTL5+0av6RaCHQCamD7xUhsZ1erNw5J0TLinEeTT0YoBgCbqTbyMHhrThq37UqqoMVbsADBFvZbL\nTBMvjSZh0kSwA8CEFXc8ocefe+Xo48mWy+/19mhk9NAxr5/X19vN8lpGKwYANL617tRQnzR6aExm\nyszESysIdgClVh2s6YLbfnh0K916Rt48pFuvXKz5fb0yjc+m33rl4kycKK2HVgyA0lpbHdam7S80\n3bBrXl9vZiZeWkGwAyidejfAaCSrLZeZEOwASqM6WNOaB4b1xlut7cAoSRe8/525WalPItgBlMLa\n6nDDPvp03bg3aacQ7AAKa3Imvd7mXDMxSV/7+Dm5W6VPRbADKKRWT4xOt+L8/lyHuhRh3NHMFpjZ\nj8xsr5ntMbPrYhQGAElNtl3aDfVrzu/sDTC6JcaK/bCkz7v7z8zsJEm7zGybu/88wrEBoC3Trx5t\nRW/PLN165Vm5X6lPCg52d/+1pF9P/P41M9srab4kgh1AV8109ehMTuyZpa8UKNAnRe2xm9lCSedK\n2hHzuAAwk+pgTeu+v0e/ffPYvVxmMj/FvdK7IVqwm9k7JN0v6bPu/mqdn6+UtFKS+vv7Y30sgJIa\n3yN9t0ZbvMhIkubMrmjPF5d1sKpsiLJXjJn1aDzUN7n75nqvcfeN7j7g7gNz586N8bEASqo6WNP1\n9wy1FeqVWab1V+T/xGgrglfsZmaS7pS0191vDy8JAGbW7oVGkjS7YvrqVWcXtvUyXYxWzAWS/krS\nsJkNTTx3k7s/HOHYACCp/f1dpHxfPRoixlTMTzR+sRYARJdkfxdJ+nrOrx4NwZWnADIryUy6NH6h\nUVlDXSLYAWTQkvXb9NJrb7X9vqJdaJQUwQ4gU5KE+pzZFa2/Irt3NOo2gh1AJiQ5OWomrVhSjP1d\nYiLYAaQuSS+9KBt2dQLBDiA1SWbSpfG7GhHqMyPYAXRd0kCXWKm3gmAH0FUhI4wEemsIdgBdUR2s\nadV9Q2rj3Kik4m6t20kEO4COC+mlb/r0hzpQUbER7AA66qyb/0Ov/m972wFUTPqHvyjvlgChCHYA\nHVEdrOmz9ww1f+E0rNLDEewAogqZeCnzxl0xEewAokm6x8sZ75qjbddfGL+gkiLYAQRL2naRWKV3\nAsEOILGke6VL9NI7iWAHkEjSXvpxJj1768c6UBEmRbmZNYByCZlLJ9Q7jxU7gJadvnqLDnv77+Pk\naHcR7ACaYtOufCHYATR08e2P6Zcvv9H2+zg5mh6CHUBdSUcYTdLXGGFMFcEO4BhJWy8nH1/R7nXL\nOlAR2kGwAzgqadtldsX01avOZpWeEVGC3czuknSJpJfd/cwYxwTQXUknXuilZ0+sFfu/SPqmpO9E\nOh6ALkm6SqeXnl1Rgt3df2xmC2McC0B3hIwwMpeebfTYgRJKcvOLScylZ1/Xgt3MVkpaKUn9/f3d\n+lgAU4TswkgvPT+6FuzuvlHSRkkaGBhIcIoGQIikJ0fppecPrRig4JKeHJXYKz2vYo073i3pQkmn\nmNkBSTe7+50xjg0guffduEVJ/nrMhUb5FmsqZnmM4wCIZ+GNWxK9j156/tGKAQom6cTLCRXTM+s/\n2oGK0G0EO1AQIXPp9NKLhWAHCmDJ+m166bW32n7fu0+arR1rLu5ARUgTwQ7kWMhcOqv04iLYgRyq\nDtb0uXuGEk28cHK0+Ah2IGdW3PGEHn/ulUTvZZVeDgQ7kBMhFxqxaVe5EOxADiSdSZdYpZcRwQ5k\nWMjJ0eNMevbWj0WuCHlAsAMZFbK17n/fRqCXGcEOZEzIhUZMvEAi2IFMCemls0rHpFlpFwBgvJee\nNNTffdJsQh1vw4odSBmrdMRGsAMpCZlLp5eORgh2IAVJV+km6b9YpaMJgh3oIiZe0A0EO9AFIYHO\nDTDQLoId6LAPrHlYvxtLsg8j2wEgGYId6JCQ7QDYtAshCHagA0K2A2CVjlAEOxARq3RkAcEORMKF\nRsgKgh0IFHJHI24mjU6IEuxmtkzSP0qqSPpnd78txnGBrDt99RYdTjDwwoVG6KTgYDeziqRvSbpY\n0gFJT5rZQ+7+89BjA1kVsh0AJ0fRaTFW7OdJetbdfyVJZvY9SZdJIthRSEl76dzRCN0SI9jnS9o/\n5fEBSUsiHBfIlCXrt+ml195K9N5rzu/Xly9fHLkioL4YwW51njum62hmKyWtlKT+/v4IHwt0T9JV\nOtsBIA0xgv2ApAVTHp8m6cXpL3L3jZI2StLAwECy66uBLkt6clRilY70xAj2JyWdYWbvk1ST9AlJ\nfxnhuEBqQjbt4kIjpC042N39sJl9RtJWjY873uXue4IrA1KSdDsA2i7Iiihz7O7+sKSHYxwLSEvI\nhUYnH1/R7nXLIlcEJMOVp4DYDgDFQrCj1EJW6ZwcRVYR7CgtVukoKoIdpROyHQCbdiEPCHaUStJV\nOpt2IU8IdpRCSC+duXTkDcGOQgu5o5FELx35RLCjsELuO8pcOvKMYEfhhGwHwAgjioBgR6GE9NK5\nAQaKgmBHIYQE+gXvf6c2ffpDkSsC0kOwI/dCQp2Toygigh25VR2sadV9Qzp0pP33skpHkRHsyKWk\nEy/cdxRlQLAjV0Lm0mm7oCwIduRG0ptJcwMMlA3BjswLWaUzwogyItiRaUl76ezCiDIj2JFJXD0K\nJEewI1NCAp22CzCOYEdmsB0AEAfBjtSxHQAQF8GOVIVsrctcOlAfwY7UJL1NHat0oLGgYDezqyXd\nIumDks5z950xikKxfWDNw/rdmLf9Pi40AlozK/D9T0u6UtKPI9SCgqsO1rTwxi2JQv2a8/sJdaBF\nQSt2d98rSWYWpxoUFtsBAN1Djx0ddfHtj+mXL7+R6L2MMALJNA12M3tU0nvq/GiNuz/Y6geZ2UpJ\nKyWpv7+/5QKRX2wHAKSjabC7+0UxPsjdN0raKEkDAwPtN1mRGyFXjzLCCISjFYOokvbSWaUD8YSO\nO14h6RuS5kraYmZD7r40SmXIFW6AAWRH6FTMA5IeiFQLcirpCdKTj69o97plHagIKDdaMUisOljT\n5+4ZUpITJky8AJ1DsCMRVulAdhHsaEvSiZcz3jVH266/MH5BAI5BsKMlIRcasWkX0F0EO5pKGuqM\nMALpINgxo+pgTavuG9KhI+2/l/uOAukh2FFX0lU6vXQgfQQ73iZkOwBW6UA2EOw4Ksm9R0/smaWv\nXHkWM+lAhhDskJRsjxemXYBsIthLrDpY04at+1QbGW37vYQ6kF0Ee0mNT7w8pUNH2tsQgJOjQPYR\n7CWU5ASpSfoa+7sAuUCwl0jSuXTaLkC+EOwlkHQXRq4cBfKJYC84tgMAyodgL6ikFxr19fbolkv/\nkF46kGMEewElWaXPmV3R+isWE+hAARDsBbPijifaDnVOjgLFQrAXRHWwptWbd2u0zZEXQh0oHoI9\n56qDNd3y0B6NjB5q+71s2gUUE8GeU9XBmtZ9f49++2b7gc6NpIFiI9hzKOlcOit0oBwI9hz6u397\nqq1Qp48OlEtQsJvZBkl/JuktSc9J+mt3H4lRGN4u6YVGrNKB8pkV+P5tks5097Mk/ULS6vCSMN2S\n9dvaDvXfP7FHX//4OYQ6UEJBK3Z3f2TKw+2SrgorB9OtrQ63dQOMnoppw1Vnc3IUKLGYPfZPSbon\n4vFKLcnUy+yK6auEOlB6TYPdzB6V9J46P1rj7g9OvGaNpMOSNjU4zkpJKyWpv78/UbFlwIVGAEI1\nDXZ3v6jRz83sk5IukfQRd59xWMPdN0raKEkDAwPtTuoVXtJNu5hJBzBd6FTMMkk3SPoTd38zTknl\nwx2NAMQU2mP/pqTjJW0zM0na7u7XBldVEkn66CZpBSOMABoInYo5PVYhZVMdrOnz9z2lsTZuJs1e\n6QBawZWnKagO1nT9vUNqI9O50AhAywj2LhufehluOdR7ZkkbrqaXDqB1BHuXVAdr2rB1n2ojoy29\nnl46gKQI9i5YcccTevy5V1p+PSOMAEKE7hWDJtZWh9sK9WvO7yfUAQRhxR5Z0jsaMfECIBaCPaLq\nYE3X3zOkdjYDmN/Xq1VLFxHoAKIh2CO65aE9bYU6vXQAnUCwRzA58dJO++WC97+TUAfQEQR7oMm5\n9NFDYy29vmKm5UsWMMYIoGMI9oTanUun7QKgWwj2BNpdpTPCCKCbCPYENmzd11KoM/ECIA0Eewsm\n2y4vjoxqXl9v0/ZLb09Ft165mEAHkAqCvYnpbZfayKhM0kx7eLFKB5A2gr2Jem0Xl44Jd1bpALKC\nvWKaeHGGtotrfHVuE/8k1AFkBSv2Jmbqqc/v69XjN344hYoAoDFW7E2sWrpIvT2Vtz3X21PRqqWL\nUqoIABpjxd7EZHtl6lQMJ0cBZBnB3oLLz51PkAPIDVoxAFAwBDsAFEwpWjHTrxylRw6gyAof7PWu\nHF29eViSCHcAhRTUijGzL5nZbjMbMrNHzGxerMJiqXfl6OihMW3Yui+ligCgs0J77Bvc/Sx3P0fS\nDyR9IUJNUc105ehMzwNA3gUFu7u/OuXhHM28N1Zq5vX1tvU8AORd8FSMma03s/2SVqjBit3MVprZ\nTjPbefDgwdCPbRlXjgIoG3NvvMg2s0clvafOj9a4+4NTXrda0gnufnOzDx0YGPCdO3e2W2tiTMUA\nKAIz2+XuA01f1yzY2/jA90ra4u5nNnttt4MdAIqg1WAPnYo5Y8rDSyU9E3I8AEC40Dn228xskaQj\nkp6XdG14STNbWx3W3Tv2a8xdFTMtX7JAX758cSc/EgByJyjY3f3PYxXSzNrqsL67/YWjj8fcjz4m\n3AHg/+Vmr5i7d+xv63kAKKvcBPvYDCd5Z3oeAMoqN8FeMWvreQAoq9wE+/IlC9p6HgDKKje7O06e\nIGUqBgAai3aBUju4QAkA2teVC5QAANlDsANAwRDsAFAwBDsAFAzBDgAFQ7ADQMGkMu5oZgc1vhtk\nN5wi6Tdd+qxO47tkE98lm4r4Xd7r7nObvTiVYO8mM9vZytxnHvBdsonvkk1l/i60YgCgYAh2ACiY\nMgT7xrQLiIjvkk18l2wq7XcpfI8dAMqmDCt2ACiVUgS7mX3JzHab2ZCZPWJm89KuKSkz22Bmz0x8\nnwfMrC/tmpIys6vNbI+ZHTGz3E0vmNkyM9tnZs+a2Y1p1xPCzO4ys5fN7Om0awllZgvM7Edmtnfi\nv6/r0q4pKTM7wcx+amZPTXyXdS29rwytGDM72d1fnfj930j6A3e/NuWyEjGzP5X0Q3c/bGZ/L0nu\nfkPKZSViZh+UdETSP0n6W3fPzV7OZlaR9AtJF0s6IOlJScvd/eepFpaQmf2xpNclfcfdz0y7nhBm\ndqqkU939Z2Z2kqRdki7P478bMzNJc9z9dTPrkfQTSde5+/ZG7yvFin0y1CfMkZTbP83c/RF3Pzzx\ncLuk09KsJ4S773X3fWnXkdB5kp5191+5+1uSvifpspRrSszdfyzplbTriMHdf+3uP5v4/WuS9kqa\nn25Vyfi41yce9kz8appfpQh2STKz9Wa2X9IKSV9Iu55IPiXp39MuoqTmS9o/5fEB5TQ8iszMFko6\nV9KOdCtJzswqZjYk6WVJ29y96XcpTLCb2aNm9nSdX5dJkruvcfcFkjZJ+ky61TbW7LtMvGaNpMMa\n/z6Z1cp3yal6d1HP7d8Ei8jM3iHpfkmfnfa39lxx9zF3P0fjfzs/z8yatspyc8/TZtz9ohZf+q+S\ntki6uYPlBGn2Xczsk5IukfQRz/hJkjb+veTNAUlT76R+mqQXU6oF00z0o++XtMndN6ddTwzuPmJm\nj0laJqnhSe7CrNgbMbMzpjy8VNIzadUSysyWSbpB0qXu/mba9ZTYk5LOMLP3mdlsSZ+Q9FDKNUFH\nTzjeKWmvu9+edj0hzGzu5OSbmfVKukgt5FdZpmLul7RI4xMYz0u61t1r6VaVjJk9K+l4Sf8z8dT2\nHE/4XCHpG5LmShqRNOTuS9OtqnVm9lFJX5dUkXSXu69PuaTEzOxuSRdqfBfBlyTd7O53plpUQmb2\nR5L+U9Kwxv+fl6Sb3P3h9KpKxszOkvRtjf83NkvSve7+xabvK0OwA0CZlKIVAwBlQrADQMEQ7ABQ\nMAQ7ABQMwQ4ABUOwA0DBEOwAUDAEOwAUzP8BYxTuYmu3P3AAAAAASUVORK5CYII=\n", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "import matplotlib.pyplot as plt\n", - "plt.scatter(X[:, 0], y)\n", - "plt.scatter(X[:, 0], nuisance[0])\n", - "plt.show()\n", - "plt.hist(nuisance[1])\n", - "plt.show()\n", - "plt.scatter(X[:, 2], nuisance[2][:, 2])\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([ 0.95540328, -0.05546807, -0.04144514, 0.02004237, -0.01020521])" - ] - }, - "execution_count": 21, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "model_list[1]._model.coef_" + "est = SparseLinearDMLCateEstimator(model_y=MultiTaskLassoCV(cv=3), model_t=LassoCV(cv=3),\n", + " model_final=MultiTaskLassoCV(cv=3))\n", + "dx = 8\n", + "est.fit(np.hstack([y.reshape(-1,1), y.reshape(-1,1)]), X[:, 0], X[:, 1:dx], X[:, dx:])\n", + "print(X[:1])\n", + "print(est.effect(X[:1, 1:dx]))\n", + "print(est.model_final.coef_)\n", + "print(est.const_marginal_effect(X[:1, 1:dx]))" ] }, { @@ -369,7 +129,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.1" + "version": "3.6.1" } }, "nbformat": 4, From b0e839053b237351969416b4837b25a2ce1a979c Mon Sep 17 00:00:00 2001 From: Vasileios Syrgkanis Date: Sat, 2 Nov 2019 08:10:21 -0400 Subject: [PATCH 16/64] fixed intercept problem in model_final with multidim output --- econml/dml.py | 2 +- notebooks/OrthoLearner.ipynb | 28 +++++++++------------------- 2 files changed, 10 insertions(+), 20 deletions(-) diff --git a/econml/dml.py b/econml/dml.py index b67a53593..fc8af9950 100644 --- a/econml/dml.py +++ b/econml/dml.py @@ -164,7 +164,7 @@ def predict(self, X): F = self._featurizer.transform(X) if X is not None else np.ones((1, 1)) F, T = broadcast_unit_treatments(F, self._d_t[0] if self._d_t else 1) prediction = self._model.predict(cross_product(F, T)) - return reshape_treatmentwise_effects(prediction - self._intercept if self._intercept else prediction, + return reshape_treatmentwise_effects(prediction - self._intercept if self._intercept is not None else prediction, self._d_t, self._d_y) super().__init__(model_y=FirstStageWrapper(model_y, is_Y=True), diff --git a/notebooks/OrthoLearner.ipynb b/notebooks/OrthoLearner.ipynb index df42644e1..600a6feac 100644 --- a/notebooks/OrthoLearner.ipynb +++ b/notebooks/OrthoLearner.ipynb @@ -50,15 +50,21 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 26, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "[[-0.68287838 -0.80819115 -0.18035347 0.91400753 0.35464864 0.02063007\n", - " -2.98266229 0.97470114 1.21184005 0.46500733]]\n" + "[[ 1.31195309 0.60137619 -0.54273135 0.13270353 -0.37173155 -0.91041002\n", + " 0.69958168 -0.73883975 -0.76040264 0.984971 ]]\n", + "[[0.94534005 0.94534005]]\n", + "[[ 0.94377282 0. -0.00288768 0. 0. 0.\n", + " 0. 0. ]\n", + " [ 0.94377282 0. -0.00288768 0. 0. 0.\n", + " 0. 0. ]]\n", + "[[0.94534005 0.94534005]]\n" ] }, { @@ -68,22 +74,6 @@ "/Users/vasilis/Documents/EconML/econml/dml.py:160: UserWarning: The final model has a nonzero intercept for at least one outcome; it will be subtracted, but consider fitting a model without an intercept if possible.\n", " UserWarning)\n" ] - }, - { - "ename": "ValueError", - "evalue": "The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 10\u001b[0m \u001b[0mest\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mhstack\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0my\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreshape\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreshape\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mX\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mX\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0mdx\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mX\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdx\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 11\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 12\u001b[0;31m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mest\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0meffect\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0mdx\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 13\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mest\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmodel_final\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcoef_\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 14\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mest\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconst_marginal_effect\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0mdx\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/Users/vasilis/Documents/EconML/econml/cate_estimator.py\u001b[0m in \u001b[0;36meffect\u001b[0;34m(self, X, T0, T1)\u001b[0m\n\u001b[1;32m 316\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0meffect\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mX\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mT0\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mT1\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 317\u001b[0m \u001b[0;31m# NOTE: don't explicitly expand treatments here, because it's done in the super call\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 318\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0msuper\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0meffect\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mT0\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mT0\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mT1\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mT1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 319\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 320\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/Users/vasilis/Documents/EconML/econml/cate_estimator.py\u001b[0m in \u001b[0;36meffect\u001b[0;34m(self, X, T0, T1)\u001b[0m\n\u001b[1;32m 234\u001b[0m \u001b[0;31m# TODO: what if input is sparse? - there's no equivalent to einsum,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 235\u001b[0m \u001b[0;31m# but tensordot can't be applied to this problem because we don't sum over m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 236\u001b[0;31m \u001b[0meff\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconst_marginal_effect\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 237\u001b[0m \u001b[0;31m# if X is None then the shape of const_marginal_effect will be wrong because the number\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 238\u001b[0m \u001b[0;31m# of rows of T was not taken into account\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/Users/vasilis/Documents/EconML/econml/_ortho_learner.py\u001b[0m in \u001b[0;36mconst_marginal_effect\u001b[0;34m(self, X)\u001b[0m\n\u001b[1;32m 221\u001b[0m \"\"\"\n\u001b[1;32m 222\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_check_fitted_dims\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 223\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_model_final\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 224\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 225\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mconst_marginal_effect_interval\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mX\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0malpha\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0.1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/Users/vasilis/Documents/EconML/econml/_rlearner.py\u001b[0m in \u001b[0;36mpredict\u001b[0;34m(self, X)\u001b[0m\n\u001b[1;32m 117\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 118\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mpredict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mX\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 119\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_model_final\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 120\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 121\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mscore\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mY\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mT\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mX\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mW\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mZ\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnuisances\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msample_weight\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msample_var\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/Users/vasilis/Documents/EconML/econml/dml.py\u001b[0m in \u001b[0;36mpredict\u001b[0;34m(self, X)\u001b[0m\n\u001b[1;32m 165\u001b[0m \u001b[0mF\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mT\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mbroadcast_unit_treatments\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mF\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_d_t\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_d_t\u001b[0m \u001b[0;32melse\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 166\u001b[0m \u001b[0mprediction\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_model\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcross_product\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mF\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mT\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 167\u001b[0;31m return reshape_treatmentwise_effects(prediction - self._intercept if self._intercept else prediction,\n\u001b[0m\u001b[1;32m 168\u001b[0m self._d_t, self._d_y)\n\u001b[1;32m 169\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mValueError\u001b[0m: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()" - ] } ], "source": [ From 5e8e0c9d763ec9a9329148cc3a6a1c806275c5c4 Mon Sep 17 00:00:00 2001 From: Vasileios Syrgkanis Date: Sat, 2 Nov 2019 13:12:27 -0400 Subject: [PATCH 17/64] comments on the crossfit function --- econml/_ortho_learner.py | 65 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 64 insertions(+), 1 deletion(-) diff --git a/econml/_ortho_learner.py b/econml/_ortho_learner.py index baf4be9bf..c4143476f 100644 --- a/econml/_ortho_learner.py +++ b/econml/_ortho_learner.py @@ -28,6 +28,69 @@ def _crossfit(model, folds, *args, **kwargs): + """ + General crossfit based calculation of nuisance parameters. + + Parameters + ---------- + model : object + An object that supports fit and predict. Fit must accept all the args + and the keyword arguments kwargs. Similarly predict must all accept + all the args as arguments and kwards as keyword arguments. The fit + function estimates a model of the nuisance function, based on the input + data to fit. Predict evaluates the fitted nuisance function on the input + data to predict. + folds : list of tuples + The crossfitting fold structure. Every entry in the list is a tuple whose + first element are the training indices of the args and kwargs data and + the second entry are the test indices. If the union of the test indices + is not the full set of all indices, then the remaining nuisance parameters + for the missing indices have value NaN. + args : a sequence of (numpy matrices or None) + Each matrix is a data variable whose first index corresponds to a sample + kwargs : a sequence of key-value args, with values being (numpy matrices or None) + Each keyword argument is of the form Var=x, with x a numpy array. Each + of these arrays are data variables. The model fit and predict will be + called with signature: `model.fit(*args, **kwargs)` and + `model.predict(*args, **kwargs)`. Key-value arguments that have value + None, are ommitted from the two calls. So all the args and the non None + kwargs variables must be part of the models signature. + + Returns + ------- + nuisances : tuple of numpy matrices + Each entry in the tuple is a nuisance parameter matrix. Each row i-th in the + matric corresponds to the value of the nuisancee parameter for the i-th input + sample. + model_list : list of objects of same type as input model + The cloned and fitted models for each fold. Can be used for inspection of the + variability of the fitted models across folds. + + Examples + -------- + >>> import numpy as np + >>> from sklearn.model_selection import KFold + >>> from sklearn.linear_model import Lasso + >>> class Wrapper: + >>> def __init__(self, model): + >>> self._model = model + >>> def fit(self, X, y, W=None): + >>> self._model.fit(X, y) + >>> return self + >>> def predict(self, X, y, W=None): + >>> return self._model.predict(X) + >>> np.random.seed(123) + >>> X = np.random.normal(size=(5000, 3)) + >>> y = X[:, 0] + np.random.normal(size=(5000,)) + >>> folds = list(KFold(2).split(X, y)) + >>> model = Lasso(alpha=0.01) + >>> nuisance, model_list = _crossfit(Wrapper(model), folds, X, y, W=y, Z=None) + >>> nuisance + (array([-1.1057289 , -1.53756637, -2.4518278 , ..., 1.10628792, + -1.82966233, -1.78227335]),) + >>> model_list + [<__main__.Wrapper object at 0x12f41e518>, <__main__.Wrapper object at 0x12f41e6d8>] + """ model_list = [] for idx, (train_idxs, test_idxs) in enumerate(folds): model_list.append(clone(model, safe=False)) @@ -53,7 +116,7 @@ def _crossfit(model, folds, *args, **kwargs): nuisance_temp = (nuisance_temp,) if idx == 0: - nuisances = tuple([np.zeros((args[0].shape[0],) + nuis.shape[1:]) for nuis in nuisance_temp]) + nuisances = tuple([np.full((args[0].shape[0],) + nuis.shape[1:], np.nan) for nuis in nuisance_temp]) for it, nuis in enumerate(nuisance_temp): nuisances[it][test_idxs] = nuis From dc129d356b4d2f5f9447ae86228ff04463a4d0ed Mon Sep 17 00:00:00 2001 From: Vasileios Syrgkanis Date: Sat, 2 Nov 2019 15:12:36 -0400 Subject: [PATCH 18/64] handling the case where the test folds in a custom splitter in ortho learner do not contain all the indices. We now call fit model final only on that subset of indices for which we have calculated their nuisance values. --- econml/_ortho_learner.py | 30 +++++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/econml/_ortho_learner.py b/econml/_ortho_learner.py index c4143476f..93b476cd7 100644 --- a/econml/_ortho_learner.py +++ b/econml/_ortho_learner.py @@ -65,7 +65,11 @@ def _crossfit(model, folds, *args, **kwargs): model_list : list of objects of same type as input model The cloned and fitted models for each fold. Can be used for inspection of the variability of the fitted models across folds. - + fitted_inds : np array1d + The indices of the arrays for which the nuisance value was calculated. This + corresponds to the union of the indices of the test part of each fold in + the input fold list. + Examples -------- >>> import numpy as np @@ -92,8 +96,10 @@ def _crossfit(model, folds, *args, **kwargs): [<__main__.Wrapper object at 0x12f41e518>, <__main__.Wrapper object at 0x12f41e6d8>] """ model_list = [] + fitted_inds = [] for idx, (train_idxs, test_idxs) in enumerate(folds): model_list.append(clone(model, safe=False)) + fitted_inds = np.concatenate((fitted_inds, test_idxs)) args_train = () args_test = () @@ -121,7 +127,7 @@ def _crossfit(model, folds, *args, **kwargs): for it, nuis in enumerate(nuisance_temp): nuisances[it][test_idxs] = nuis - return nuisances, model_list + return nuisances, model_list, np.sort(fitted_inds.astype(int)) class _OrthoLearner(TreatmentExpansionMixin, LinearCateEstimator): @@ -192,6 +198,9 @@ def _check_fitted_dims(self, X): else: assert self._d_x == X.shape[1:], "Dimension mis-match of X with fitted X" + def _subinds_check_none(self, var, inds): + return var[inds] if var is not None else None + @BaseCateEstimator._wrap_fit def fit(self, Y, T, X=None, W=None, Z=None, sample_weight=None, sample_var=None, inference=None): """ @@ -222,8 +231,15 @@ def fit(self, Y, T, X=None, W=None, Z=None, sample_weight=None, sample_var=None, self """ self._check_input_dims(Y, T, X, W, Z, sample_weight, sample_var) - nuisances = self.fit_nuisances(Y, T, X, W, Z, sample_weight=sample_weight) - self.fit_final(Y, T, X, W, Z, nuisances, sample_weight=sample_weight, sample_var=sample_var) + nuisances, fitted_inds = self.fit_nuisances(Y, T, X, W, Z, sample_weight=sample_weight) + self.fit_final(self._subinds_check_none(Y, fitted_inds), + self._subinds_check_none(T, fitted_inds), + self._subinds_check_none(X, fitted_inds), + self._subinds_check_none(W, fitted_inds), + self._subinds_check_none(Z, fitted_inds), + tuple([self._subinds_check_none(nuis, fitted_inds) for nuis in nuisances]), + sample_weight=self._subinds_check_none(sample_weight, fitted_inds), + sample_var=self._subinds_check_none(sample_var, fitted_inds)) return self def fit_nuisances(self, Y, T, X=None, W=None, Z=None, sample_weight=None): @@ -252,10 +268,10 @@ def fit_nuisances(self, Y, T, X=None, W=None, Z=None, sample_weight=None): reshape(self._label_encoder.transform(T), (-1, 1)))[:, 1:]), validate=False) - nuisances, fitted_models = _crossfit(self._model_nuisance, folds, - Y, T, X=X, W=W, Z=Z, sample_weight=sample_weight) + nuisances, fitted_models, fitted_inds = _crossfit(self._model_nuisance, folds, + Y, T, X=X, W=W, Z=Z, sample_weight=sample_weight) self._models_nuisance = fitted_models - return nuisances + return nuisances, fitted_inds def fit_final(self, Y, T, X=None, W=None, Z=None, nuisances=None, sample_weight=None, sample_var=None): self._model_final.fit(Y, T, X=X, W=W, Z=Z, nuisances=nuisances, From 7679328ec7890cc374cced78c1e7fe7963192b2b Mon Sep 17 00:00:00 2001 From: Vasileios Syrgkanis Date: Sat, 2 Nov 2019 15:13:50 -0400 Subject: [PATCH 19/64] handling the case where the test folds in a custom splitter in ortho learner do not contain all the indices. We now call fit model final only on that subset of indices for which we have calculated their nuisance values. --- econml/_ortho_learner.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/econml/_ortho_learner.py b/econml/_ortho_learner.py index 93b476cd7..c1282291e 100644 --- a/econml/_ortho_learner.py +++ b/econml/_ortho_learner.py @@ -88,12 +88,15 @@ def _crossfit(model, folds, *args, **kwargs): >>> y = X[:, 0] + np.random.normal(size=(5000,)) >>> folds = list(KFold(2).split(X, y)) >>> model = Lasso(alpha=0.01) - >>> nuisance, model_list = _crossfit(Wrapper(model), folds, X, y, W=y, Z=None) + >>> nuisance, model_list, fitted_inds = _crossfit(Wrapper(model), folds, X, y, W=y, Z=None) >>> nuisance (array([-1.1057289 , -1.53756637, -2.4518278 , ..., 1.10628792, -1.82966233, -1.78227335]),) >>> model_list [<__main__.Wrapper object at 0x12f41e518>, <__main__.Wrapper object at 0x12f41e6d8>] + >>> fitted_inds + array([ 0, 1, 2, ..., 4997, 4998, 4999]) + """ model_list = [] fitted_inds = [] From c67f745bd054eb13a6fa4a6486e679039ed2f208 Mon Sep 17 00:00:00 2001 From: Vasileios Syrgkanis Date: Sat, 2 Nov 2019 15:15:49 -0400 Subject: [PATCH 20/64] testing notebook updates --- notebooks/OrthoLearner.ipynb | 100 ++++++++++++++++++++++++----------- 1 file changed, 70 insertions(+), 30 deletions(-) diff --git a/notebooks/OrthoLearner.ipynb b/notebooks/OrthoLearner.ipynb index 600a6feac..b8624537b 100644 --- a/notebooks/OrthoLearner.ipynb +++ b/notebooks/OrthoLearner.ipynb @@ -3,9 +3,7 @@ { "cell_type": "code", "execution_count": 1, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "%load_ext autoreload\n", @@ -15,19 +13,69 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(array([-1.1057289 , -1.53756637, -2.4518278 , ..., 1.10628792,\n", + " -1.82966233, -1.78227335]),)\n", + "[<__main__.Wrapper object at 0x1273b70f0>, <__main__.Wrapper object at 0x1273b77b8>]\n" + ] + }, + { + "data": { + "text/plain": [ + "array([ 0, 1, 2, ..., 4997, 4998, 4999])" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import numpy as np\n", + "from sklearn.model_selection import KFold\n", + "from sklearn.linear_model import Lasso\n", + "class Wrapper:\n", + " def __init__(self, model):\n", + " self._model = model\n", + " def fit(self, X, y, W=None):\n", + " self._model.fit(X, y)\n", + " return self\n", + " def predict(self, X, y, W=None):\n", + " return self._model.predict(X)\n", + "np.random.seed(123)\n", + "X = np.random.normal(size=(5000, 3))\n", + "y = X[:, 0] + np.random.normal(size=(5000,))\n", + "folds = list(KFold(2).split(X, y))\n", + "model = Lasso(alpha=0.01)\n", + "nuisance, model_list, fitted_inds = _crossfit(Wrapper(model),\n", + " folds,\n", + " X, y, W=y, Z=None)\n", + "print(nuisance)\n", + "print(model_list)\n", + "fitted_inds" + ] + }, + { + "cell_type": "code", + "execution_count": 15, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "[[ 1.30061979 1.91105845 -0.46179321]]\n", - "[1.00171855]\n", - "(array([0.99046797]), array([1.01296913]))\n", - "[0.99920984 0.00131274]\n", - "(array([ 0.99401435, -0.00390604]), array([1.00440533, 0.00653151]))\n", - "[1.00171855]\n" + "[[ 0.731492 0.27402924 -2.20337664]]\n", + "[1.00000239]\n", + "(array([0.99461543]), array([1.00538936]))\n", + "[ 1.00035091 -0.00127181]\n", + "(array([ 0.99514528, -0.00641798]), array([1.00555653, 0.00387436]))\n", + "[1.00000239]\n" ] } ], @@ -50,29 +98,21 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 17, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "[[ 1.31195309 0.60137619 -0.54273135 0.13270353 -0.37173155 -0.91041002\n", - " 0.69958168 -0.73883975 -0.76040264 0.984971 ]]\n", - "[[0.94534005 0.94534005]]\n", - "[[ 0.94377282 0. -0.00288768 0. 0. 0.\n", - " 0. 0. ]\n", - " [ 0.94377282 0. -0.00288768 0. 0. 0.\n", - " 0. 0. ]]\n", - "[[0.94534005 0.94534005]]\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/vasilis/Documents/EconML/econml/dml.py:160: UserWarning: The final model has a nonzero intercept for at least one outcome; it will be subtracted, but consider fitting a model without an intercept if possible.\n", - " UserWarning)\n" + "[[ 0.0379858 0.52725655 -0.92624606 -0.0813923 -3.68794624 0.58108286\n", + " -0.25362371 -1.84854315 0.28964067 0.24566678]]\n", + "[[0.99416019 0.99416019]]\n", + "[[0.99416019 0. 0. 0. 0. 0.\n", + " 0. 0. ]\n", + " [0.99416019 0. 0. 0. 0. 0.\n", + " 0. 0. ]]\n", + "[[0.99416019 0.99416019]]\n" ] } ], @@ -81,10 +121,10 @@ "from sklearn.preprocessing import PolynomialFeatures\n", "from sklearn.linear_model import LinearRegression, LassoCV, Lasso, MultiTaskLassoCV\n", "import numpy as np\n", - "X = np.random.normal(size=(1000, 10))\n", - "y = X[:, 0] + np.random.normal(size=(1000,))\n", + "X = np.random.normal(size=(5000, 10))\n", + "y = X[:, 0] + np.random.normal(size=(5000,))\n", "est = SparseLinearDMLCateEstimator(model_y=MultiTaskLassoCV(cv=3), model_t=LassoCV(cv=3),\n", - " model_final=MultiTaskLassoCV(cv=3))\n", + " model_final=MultiTaskLassoCV(cv=3, fit_intercept=False))\n", "dx = 8\n", "est.fit(np.hstack([y.reshape(-1,1), y.reshape(-1,1)]), X[:, 0], X[:, 1:dx], X[:, dx:])\n", "print(X[:1])\n", From 23ef44dc5bdf56a935f2690938de920e4f5fead5 Mon Sep 17 00:00:00 2001 From: Vasileios Syrgkanis Date: Sat, 2 Nov 2019 16:03:14 -0400 Subject: [PATCH 21/64] updated crossfit test to include fitted_inds --- econml/dml.py | 3 ++- econml/tests/test_ortho_learner.py | 7 ++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/econml/dml.py b/econml/dml.py index fc8af9950..c064c1e4b 100644 --- a/econml/dml.py +++ b/econml/dml.py @@ -164,7 +164,8 @@ def predict(self, X): F = self._featurizer.transform(X) if X is not None else np.ones((1, 1)) F, T = broadcast_unit_treatments(F, self._d_t[0] if self._d_t else 1) prediction = self._model.predict(cross_product(F, T)) - return reshape_treatmentwise_effects(prediction - self._intercept if self._intercept is not None else prediction, + prediction -= self._intercept if self._intercept is not None else 0 + return reshape_treatmentwise_effects(prediction, self._d_t, self._d_y) super().__init__(model_y=FirstStageWrapper(model_y, is_Y=True), diff --git a/econml/tests/test_ortho_learner.py b/econml/tests/test_ortho_learner.py index 756cb315b..5154d963c 100644 --- a/econml/tests/test_ortho_learner.py +++ b/econml/tests/test_ortho_learner.py @@ -32,9 +32,9 @@ def predict(self, X, y, W=None): y = X[:, 0] + np.random.normal(size=(5000,)) folds = list(KFold(2).split(X, y)) model = Lasso(alpha=0.01) - nuisance, model_list = _crossfit(Wrapper(model), - folds, - X, y, W=y, Z=None) + nuisance, model_list, fitted_inds = _crossfit(Wrapper(model), + folds, + X, y, W=y, Z=None) np.testing.assert_allclose(nuisance[0][folds[0][1]], model.fit(X[folds[0][0]], y[folds[0][0]]).predict(X[folds[0][1]])) np.testing.assert_allclose(nuisance[0][folds[0][0]], @@ -43,3 +43,4 @@ def predict(self, X, y, W=None): coef_ = np.zeros(X.shape[1]) coef_[0] = 1 [np.testing.assert_allclose(coef_, mdl._model.coef_, rtol=0, atol=0.08) for mdl in model_list] + np.testing.assert_array_equal(fitted_inds, np.arange(X.shape[0])) From de8a1591f14623b8192f00eab710cd3ab72e2e2a Mon Sep 17 00:00:00 2001 From: Vasileios Syrgkanis Date: Sat, 2 Nov 2019 16:25:44 -0400 Subject: [PATCH 22/64] comments in ortho learner --- econml/_ortho_learner.py | 35 +++++++++++++++++++++++++++++++---- 1 file changed, 31 insertions(+), 4 deletions(-) diff --git a/econml/_ortho_learner.py b/econml/_ortho_learner.py index c1282291e..b85d8875c 100644 --- a/econml/_ortho_learner.py +++ b/econml/_ortho_learner.py @@ -135,17 +135,44 @@ def _crossfit(model, folds, *args, **kwargs): class _OrthoLearner(TreatmentExpansionMixin, LinearCateEstimator): """ - Base class for all orthogonal learners. + Base class for all orthogonal learners. This class is a parent class to any method that has + the following architecture: + 1) The CATE $\theta(X)$ is either the minimizer of some expected loss function $E[L(V; \theta(X), h(V))] + where V are all the random variables and h is a vector of nuisance functions. + 2) To estimate $\theta(X)$ we first fit the h functions can calculate $h(V_i)$ for each sample $i$ + in a crossfit manner: + - Estimate a model $\hat{h}$ for h using half of the data + - Evaluate the learned $\hat{h}$ model on the other half + Or more generally in a KFold fit/predict approach with more folds + 3) Estimate the model for theta(X) by minimizing the empirical (regularized) plugin loss: + $E_n[L(V; \theta(X), \hat{h}(V))]$ + + The method is a bit more general in that the final step does not need to be a loss minimization step. + The class takes as input a model for fitting an estimate of the nuisance h given a set of samples + and predicting the value of the learned nuisance model on any other set of samples. It also + takes as input a model for the final estimation, that takes as input the data and their associated + estimated nuisance values from the first stage and fits a model for the CATE $\theta(X)$. Then + at predict time, the final model given any set of samples of the X variable, returns the estimated + $\theta(X)$. + + The method essentially implements all the crossfit and plugin logic, so that any child classes need + to only implement the appropriate `model_nuisance` and `model_final` and essentially nothing more. + It also implements the basic preprocessing logic behind the expansion of discrete treatments into + one-hot encodings. Parameters ---------- model_nuisance: estimator The estimator for fitting the nuisance function. Must implement - `fit` and `predict` methods that both take as input Y, T, X, W, Z. + `fit` and `predict` methods that both have signatures: + `model_nuisance.fit(Y, T, X=X, W=W, Z=Z, sample_weight=sample_weight, sample_var=sample_var)` + `model_nuisance.predict(Y, T, X=X, W=W, Z=Z, sample_weight=sample_weight, sample_var=sample_var)` model_final: estimator for fitting the response residuals to the features and treatment residuals - Must implement `fit` and `predict` methods. The fit method takes as input, Y, T, X, W, Z, nuisances. - Predict, on the other hand, should just take the features X and return the constant marginal effect. + Must implement `fit` and `predict` methods that must have signatures: + `model_final.fit(Y, T, X=X, W=W, Z=Z, nuisances=nuisances, sample_weight=sample_weight, sample_var=sample_var)` + `model_nuisance.predict(X)` + Predict, should just take the features X and return the constant marginal effect. discrete_treatment: bool Whether the treatment values should be treated as categorical, rather than continuous, quantities From 1e3825a4c93f0dd804299f260bf6130ece2b9c33 Mon Sep 17 00:00:00 2001 From: Vasileios Syrgkanis Date: Sat, 2 Nov 2019 20:32:06 -0400 Subject: [PATCH 23/64] documentation related changes to include the new python files. --- doc/_templates/autosummary/module.rst | 1 + doc/reference.rst | 2 ++ econml/_ortho_learner.py | 34 ++++++++++++++++++--------- 3 files changed, 26 insertions(+), 11 deletions(-) diff --git a/doc/_templates/autosummary/module.rst b/doc/_templates/autosummary/module.rst index 6090b5e35..12f1a763f 100644 --- a/doc/_templates/autosummary/module.rst +++ b/doc/_templates/autosummary/module.rst @@ -3,3 +3,4 @@ .. automodule:: {{ fullname }} :members: + :private-members: diff --git a/doc/reference.rst b/doc/reference.rst index 672cce42d..5f0fe5821 100644 --- a/doc/reference.rst +++ b/doc/reference.rst @@ -8,6 +8,8 @@ Module reference econml.cate_estimator econml.deepiv econml.dgp + econml._ortho_learner + econml._rlearner econml.dml econml.inference econml.ortho_forest diff --git a/econml/_ortho_learner.py b/econml/_ortho_learner.py index b85d8875c..e5228c4b3 100644 --- a/econml/_ortho_learner.py +++ b/econml/_ortho_learner.py @@ -137,23 +137,34 @@ class _OrthoLearner(TreatmentExpansionMixin, LinearCateEstimator): """ Base class for all orthogonal learners. This class is a parent class to any method that has the following architecture: - 1) The CATE $\theta(X)$ is either the minimizer of some expected loss function $E[L(V; \theta(X), h(V))] - where V are all the random variables and h is a vector of nuisance functions. - 2) To estimate $\theta(X)$ we first fit the h functions can calculate $h(V_i)$ for each sample $i$ - in a crossfit manner: - - Estimate a model $\hat{h}$ for h using half of the data - - Evaluate the learned $\hat{h}$ model on the other half + + 1. The CATE :math:`\\theta(X)` is either the minimizer of some expected loss function + + .. math :: + \\mathbb{E}[\\ell(V; \\theta(X), h(V))] + + where :math:`V` are all the random variables and h is a vector of nuisance functions. + + 2. To estimate :math:`\\theta(X)` we first fit the h functions can calculate :math:`h(V_i)` for each sample + :math:`i` in a crossfit manner: + + - Estimate a model :math:`\\hat{h}` for h using half of the data + - Evaluate the learned :math:`\\hat{h}` model on the other half + Or more generally in a KFold fit/predict approach with more folds - 3) Estimate the model for theta(X) by minimizing the empirical (regularized) plugin loss: - $E_n[L(V; \theta(X), \hat{h}(V))]$ + + 3. Estimate the model for :math:`\\theta(X)` by minimizing the empirical (regularized) plugin loss: + + .. math :: + \\mathbb{E}_n[\\ell(V; \\theta(X), \\hat{h}(V))] The method is a bit more general in that the final step does not need to be a loss minimization step. The class takes as input a model for fitting an estimate of the nuisance h given a set of samples and predicting the value of the learned nuisance model on any other set of samples. It also takes as input a model for the final estimation, that takes as input the data and their associated - estimated nuisance values from the first stage and fits a model for the CATE $\theta(X)$. Then + estimated nuisance values from the first stage and fits a model for the CATE :math:`\\theta(X)`. Then at predict time, the final model given any set of samples of the X variable, returns the estimated - $\theta(X)$. + :math:`\\theta(X)`. The method essentially implements all the crossfit and plugin logic, so that any child classes need to only implement the appropriate `model_nuisance` and `model_final` and essentially nothing more. @@ -198,6 +209,7 @@ class _OrthoLearner(TreatmentExpansionMixin, LinearCateEstimator): If :class:`~numpy.random.mtrand.RandomState` instance, random_state is the random number generator; If None, the random number generator is the :class:`~numpy.random.mtrand.RandomState` instance used by `np.random`. + """ def __init__(self, model_nuisance, model_final, @@ -309,7 +321,7 @@ def fit_final(self, Y, T, X=None, W=None, Z=None, nuisances=None, sample_weight= def const_marginal_effect(self, X=None): """ - Calculate the constant marginal CATE θ(·). + Calculate the constant marginal CATE :math:`\\theta(·)`. The marginal effect is conditional on a vector of features on a set of m test samples {Xᵢ}. From 3ddb59a13e27dd140873e0afff3b81a4ef57e2a3 Mon Sep 17 00:00:00 2001 From: Vasileios Syrgkanis Date: Sat, 2 Nov 2019 21:42:29 -0400 Subject: [PATCH 24/64] more docstring for _OrthoLearner. Some small fixes to allow for child classes and models of child classesto omit keyword arguments in their signature if those will always be equal to None. --- econml/_ortho_learner.py | 131 +++++++++++++++++++++++++++-------- econml/_rlearner.py | 2 +- notebooks/OrthoLearner.ipynb | 107 +++++++++++++++++++++++----- 3 files changed, 191 insertions(+), 49 deletions(-) diff --git a/econml/_ortho_learner.py b/econml/_ortho_learner.py index e5228c4b3..045f15cda 100644 --- a/econml/_ortho_learner.py +++ b/econml/_ortho_learner.py @@ -72,23 +72,28 @@ def _crossfit(model, folds, *args, **kwargs): Examples -------- - >>> import numpy as np - >>> from sklearn.model_selection import KFold - >>> from sklearn.linear_model import Lasso - >>> class Wrapper: - >>> def __init__(self, model): - >>> self._model = model - >>> def fit(self, X, y, W=None): - >>> self._model.fit(X, y) - >>> return self - >>> def predict(self, X, y, W=None): - >>> return self._model.predict(X) - >>> np.random.seed(123) - >>> X = np.random.normal(size=(5000, 3)) - >>> y = X[:, 0] + np.random.normal(size=(5000,)) - >>> folds = list(KFold(2).split(X, y)) - >>> model = Lasso(alpha=0.01) - >>> nuisance, model_list, fitted_inds = _crossfit(Wrapper(model), folds, X, y, W=y, Z=None) + + .. highlight:: python + .. code-block:: python + + import numpy as np + from sklearn.model_selection import KFold + from sklearn.linear_model import Lasso + class Wrapper: + def __init__(self, model): + self._model = model + def fit(self, X, y, W=None): + self._model.fit(X, y) + return self + def predict(self, X, y, W=None): + return self._model.predict(X) + np.random.seed(123) + X = np.random.normal(size=(5000, 3)) + y = X[:, 0] + np.random.normal(size=(5000,)) + folds = list(KFold(2).split(X, y)) + model = Lasso(alpha=0.01) + nuisance, model_list, fitted_inds = _crossfit(Wrapper(model), folds, X, y, W=y, Z=None) + >>> nuisance (array([-1.1057289 , -1.53756637, -2.4518278 , ..., 1.10628792, -1.82966233, -1.78227335]),) @@ -140,12 +145,12 @@ class _OrthoLearner(TreatmentExpansionMixin, LinearCateEstimator): 1. The CATE :math:`\\theta(X)` is either the minimizer of some expected loss function - .. math :: + .. math :: \\mathbb{E}[\\ell(V; \\theta(X), h(V))] where :math:`V` are all the random variables and h is a vector of nuisance functions. - 2. To estimate :math:`\\theta(X)` we first fit the h functions can calculate :math:`h(V_i)` for each sample + 2. To estimate :math:`\\theta(X)` we first fit the h functions can calculate :math:`h(V_i)` for each sample :math:`i` in a crossfit manner: - Estimate a model :math:`\\hat{h}` for h using half of the data @@ -178,12 +183,19 @@ class _OrthoLearner(TreatmentExpansionMixin, LinearCateEstimator): `fit` and `predict` methods that both have signatures: `model_nuisance.fit(Y, T, X=X, W=W, Z=Z, sample_weight=sample_weight, sample_var=sample_var)` `model_nuisance.predict(Y, T, X=X, W=W, Z=Z, sample_weight=sample_weight, sample_var=sample_var)` + In fact we allow for the model method signatures to skip any of the keyword arguments + as long as the class is always called with the omitted keyword argument set to `None`. This can be enforced + in child classes by re-implementing the fit and the various effect methods. model_final: estimator for fitting the response residuals to the features and treatment residuals Must implement `fit` and `predict` methods that must have signatures: `model_final.fit(Y, T, X=X, W=W, Z=Z, nuisances=nuisances, sample_weight=sample_weight, sample_var=sample_var)` - `model_nuisance.predict(X)` - Predict, should just take the features X and return the constant marginal effect. + `model_nuisance.predict(X=X)` + Predict, should just take the features X and return the constant marginal effect. In fact we allow for the model + method signatures to skip any of the keyword arguments as long as the class is always called with the omitted keyword + argument set to `None`. Moreover, the predict function of the final model can take no argument if the class + is always called with `X=None`. This can be enforced in child classes by re-implementing the fit and + the various effect methods. discrete_treatment: bool Whether the treatment values should be treated as categorical, rather than continuous, quantities @@ -210,6 +222,54 @@ class _OrthoLearner(TreatmentExpansionMixin, LinearCateEstimator): If None, the random number generator is the :class:`~numpy.random.mtrand.RandomState` instance used by `np.random`. + Examples + -------- + + The example code below implements a very simple version of the double machine learning + method on top of the :py:class:`~econml._ortho_learner._OrthoLearner` class, for expository purposes. + For a more elaborate implementation of a Double Machine Learning child class of the class + :py:class:`~econml._ortho_learner._OrthoLearner` checkout :py:class:`~econml.dml.DMLCateEstimator` + and its child classes. + + .. highlight:: python + .. code-block:: python + + import numpy as np + from sklearn.linear_model import LinearRegression + from econml._ortho_learner import _OrthoLearner + class ModelNuisance: + def __init__(self, model_t, model_y): + self._model_t = model_t + self._model_y = model_y + def fit(self, Y, T, W=None): + self._model_t.fit(W, T) + self._model_y.fit(W, Y) + return self + def predict(self, Y, T, W=None): + return Y - self._model_y.predict(W), T - self._model_t.predict(W) + class ModelFinal: + def __init__(self): + return + def fit(self, Y, T, W=None, nuisances=None): + Y_res, T_res = nuisances + self.model = LinearRegression().fit(T_res.reshape(-1, 1), Y_res) + return self + def predict(self): + return self.model.coef_[0] + def score(self, Y, T, W=None, nuisances=None): + Y_res, T_res = nuisances + return np.mean(Y_res - self.model.coef_[0]*T_res)**2 + np.random.seed(123) + X = np.random.normal(size=(100, 3)) + y = X[:, 0] + X[:, 1] + np.random.normal(size=(100,)) + est = _OrthoLearner(ModelNuisance(LinearRegression(), LinearRegression()), ModelFinal(), + n_splits=2, discrete_treatment=False, random_state=None) + est.fit(y, X[:, 0], W=X[:, 1:]) + + >>> est.effect() + array([1.23440172]) + >>> est.score(y, X[:, 0], W=X[:, 1:]) + 0.0003880489502537651 """ def __init__(self, model_nuisance, model_final, @@ -243,6 +303,13 @@ def _check_fitted_dims(self, X): def _subinds_check_none(self, var, inds): return var[inds] if var is not None else None + def _filter_none_kwargs(self, **kwargs): + non_none_kwargs = {} + for key, value in kwargs.items(): + if value is not None: + non_none_kwargs[key] = value + return non_none_kwargs + @BaseCateEstimator._wrap_fit def fit(self, Y, T, X=None, W=None, Z=None, sample_weight=None, sample_var=None, inference=None): """ @@ -276,10 +343,10 @@ def fit(self, Y, T, X=None, W=None, Z=None, sample_weight=None, sample_var=None, nuisances, fitted_inds = self.fit_nuisances(Y, T, X, W, Z, sample_weight=sample_weight) self.fit_final(self._subinds_check_none(Y, fitted_inds), self._subinds_check_none(T, fitted_inds), - self._subinds_check_none(X, fitted_inds), - self._subinds_check_none(W, fitted_inds), - self._subinds_check_none(Z, fitted_inds), - tuple([self._subinds_check_none(nuis, fitted_inds) for nuis in nuisances]), + X=self._subinds_check_none(X, fitted_inds), + W=self._subinds_check_none(W, fitted_inds), + Z=self._subinds_check_none(Z, fitted_inds), + nuisances=tuple([self._subinds_check_none(nuis, fitted_inds) for nuis in nuisances]), sample_weight=self._subinds_check_none(sample_weight, fitted_inds), sample_var=self._subinds_check_none(sample_var, fitted_inds)) return self @@ -316,8 +383,9 @@ def fit_nuisances(self, Y, T, X=None, W=None, Z=None, sample_weight=None): return nuisances, fitted_inds def fit_final(self, Y, T, X=None, W=None, Z=None, nuisances=None, sample_weight=None, sample_var=None): - self._model_final.fit(Y, T, X=X, W=W, Z=Z, nuisances=nuisances, - sample_weight=sample_weight, sample_var=sample_var) + self._model_final.fit(Y, T, **self._filter_none_kwargs(X=X, W=W, Z=Z, + nuisances=nuisances, sample_weight=sample_weight, + sample_var=sample_var)) def const_marginal_effect(self, X=None): """ @@ -341,7 +409,10 @@ def const_marginal_effect(self, X=None): (e.g. if both are vectors, then the output of this method will also be a vector) """ self._check_fitted_dims(X) - return self._model_final.predict(X) + if X is None: + return self._model_final.predict() + else: + return self._model_final.predict(X) def const_marginal_effect_interval(self, X=None, *, alpha=0.1): self._check_fitted_dims(X) @@ -355,7 +426,7 @@ def score(self, Y, T, X=None, W=None, Z=None): X, T = self._expand_treatments(X, T) n_splits = len(self._models_nuisance) for idx, mdl in enumerate(self._models_nuisance): - nuisance_temp = mdl.predict(Y, T, X, W, Z) + nuisance_temp = mdl.predict(Y, T, **self._filter_none_kwargs(X=X, W=W, Z=Z)) if not isinstance(nuisance_temp, tuple): nuisance_temp = (nuisance_temp,) @@ -368,7 +439,7 @@ def score(self, Y, T, X=None, W=None, Z=None): for it in range(len(nuisances)): nuisances[it] = np.mean(nuisances[it], axis=0) - return self._model_final.score(Y, T, X=X, W=W, Z=Z, nuisances=tuple(nuisances)) + return self._model_final.score(Y, T, **self._filter_none_kwargs(X=X, W=W, Z=Z, nuisances=nuisances)) @property def model_final(self): diff --git a/econml/_rlearner.py b/econml/_rlearner.py index 5fe34691e..b1c62d810 100644 --- a/econml/_rlearner.py +++ b/econml/_rlearner.py @@ -115,7 +115,7 @@ def fit(self, Y, T, X=None, W=None, Z=None, nuisances=None, sample_weight=None, self._model_final.fit(X, T_res, Y_res, sample_weight=sample_weight, sample_var=sample_var) return self - def predict(self, X): + def predict(self, X=None): return self._model_final.predict(X) def score(self, Y, T, X=None, W=None, Z=None, nuisances=None, sample_weight=None, sample_var=None): diff --git a/notebooks/OrthoLearner.ipynb b/notebooks/OrthoLearner.ipynb index b8624537b..f2cf39c2f 100644 --- a/notebooks/OrthoLearner.ipynb +++ b/notebooks/OrthoLearner.ipynb @@ -13,7 +13,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 3, "metadata": {}, "outputs": [ { @@ -22,7 +22,7 @@ "text": [ "(array([-1.1057289 , -1.53756637, -2.4518278 , ..., 1.10628792,\n", " -1.82966233, -1.78227335]),)\n", - "[<__main__.Wrapper object at 0x1273b70f0>, <__main__.Wrapper object at 0x1273b77b8>]\n" + "[<__main__.Wrapper object at 0x12fd9d438>, <__main__.Wrapper object at 0x118e252b0>]\n" ] }, { @@ -31,7 +31,7 @@ "array([ 0, 1, 2, ..., 4997, 4998, 4999])" ] }, - "execution_count": 14, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" } @@ -61,6 +61,77 @@ "fitted_inds" ] }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([1.23440172])" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import numpy as np\n", + "from sklearn.linear_model import LinearRegression\n", + "from econml._ortho_learner import _OrthoLearner\n", + "class ModelNuisance:\n", + " def __init__(self, model_t, model_y):\n", + " self._model_t = model_t\n", + " self._model_y = model_y\n", + " def fit(self, Y, T, W=None):\n", + " self._model_t.fit(W, T)\n", + " self._model_y.fit(W, Y)\n", + " return self\n", + " def predict(self, Y, T, W=None):\n", + " return Y - self._model_y.predict(W), T - self._model_t.predict(W)\n", + "class ModelFinal:\n", + " def __init__(self):\n", + " return\n", + " def fit(self, Y, T, W=None, nuisances=None):\n", + " Y_res, T_res = nuisances\n", + " self.model = LinearRegression().fit(T_res.reshape(-1, 1), Y_res)\n", + " return self\n", + " def predict(self, X=None):\n", + " return self.model.coef_[0]\n", + " def score(self, Y, T, W=None, nuisances=None):\n", + " Y_res, T_res = nuisances\n", + " return np.mean(Y_res - self.model.coef_[0]*T_res)**2\n", + "np.random.seed(123)\n", + "X = np.random.normal(size=(100, 3))\n", + "y = X[:, 0] + X[:, 1] + np.random.normal(size=(100,))\n", + "est = _OrthoLearner(ModelNuisance(LinearRegression(), LinearRegression()), ModelFinal(),\n", + " n_splits=2, discrete_treatment=False, random_state=None)\n", + "est.fit(y, X[:, 0], W=X[:, 1:])\n", + "est.effect()" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.0003880489502537651" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "est.score(y, X[:, 0], W=X[:, 1:])" + ] + }, { "cell_type": "code", "execution_count": 15, @@ -70,12 +141,12 @@ "name": "stdout", "output_type": "stream", "text": [ - "[[ 0.731492 0.27402924 -2.20337664]]\n", - "[1.00000239]\n", - "(array([0.99461543]), array([1.00538936]))\n", - "[ 1.00035091 -0.00127181]\n", - "(array([ 0.99514528, -0.00641798]), array([1.00555653, 0.00387436]))\n", - "[1.00000239]\n" + "[[ 1.0613513 0.0760928 -0.53482069]]\n", + "[0.99780349]\n", + "(array([0.99255731]), array([1.00304966]))\n", + "[ 0.99804439 -0.00316585]\n", + "(array([ 0.99281255, -0.00838755]), array([1.00327622, 0.00205585]))\n", + "[0.99780349]\n" ] } ], @@ -98,21 +169,21 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 16, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "[[ 0.0379858 0.52725655 -0.92624606 -0.0813923 -3.68794624 0.58108286\n", - " -0.25362371 -1.84854315 0.28964067 0.24566678]]\n", - "[[0.99416019 0.99416019]]\n", - "[[0.99416019 0. 0. 0. 0. 0.\n", - " 0. 0. ]\n", - " [0.99416019 0. 0. 0. 0. 0.\n", - " 0. 0. ]]\n", - "[[0.99416019 0.99416019]]\n" + "[[-0.88666276 0.14006212 0.72897065 1.41017893 0.31083606 0.32659312\n", + " 0.53522834 1.21111218 -0.74004013 -0.6584672 ]]\n", + "[[0.99937549 0.99937549]]\n", + "[[9.99291159e-01 0.00000000e+00 0.00000000e+00 0.00000000e+00\n", + " 2.71306507e-04 0.00000000e+00 0.00000000e+00 0.00000000e+00]\n", + " [9.99291159e-01 0.00000000e+00 0.00000000e+00 0.00000000e+00\n", + " 2.71306507e-04 0.00000000e+00 0.00000000e+00 0.00000000e+00]]\n", + "[[0.99937549 0.99937549]]\n" ] } ], From 9b0726c7878fbd98daab255ef073e5829d30358d Mon Sep 17 00:00:00 2001 From: Vasileios Syrgkanis Date: Sat, 2 Nov 2019 21:54:37 -0400 Subject: [PATCH 25/64] pylint errors --- econml/_ortho_learner.py | 36 +++++++++++++++++++++++++----------- 1 file changed, 25 insertions(+), 11 deletions(-) diff --git a/econml/_ortho_learner.py b/econml/_ortho_learner.py index 045f15cda..231b1c7c5 100644 --- a/econml/_ortho_learner.py +++ b/econml/_ortho_learner.py @@ -181,21 +181,35 @@ class _OrthoLearner(TreatmentExpansionMixin, LinearCateEstimator): model_nuisance: estimator The estimator for fitting the nuisance function. Must implement `fit` and `predict` methods that both have signatures: - `model_nuisance.fit(Y, T, X=X, W=W, Z=Z, sample_weight=sample_weight, sample_var=sample_var)` - `model_nuisance.predict(Y, T, X=X, W=W, Z=Z, sample_weight=sample_weight, sample_var=sample_var)` + + .. highlight:: python + .. code-block:: python + + model_nuisance.fit(Y, T, X=X, W=W, Z=Z, + sample_weight=sample_weight, sample_var=sample_var) + model_nuisance.predict(Y, T, X=X, W=W, Z=Z, + sample_weight=sample_weight, sample_var=sample_var) + In fact we allow for the model method signatures to skip any of the keyword arguments - as long as the class is always called with the omitted keyword argument set to `None`. This can be enforced - in child classes by re-implementing the fit and the various effect methods. + as long as the class is always called with the omitted keyword argument set to `None`. + This can be enforced in child classes by re-implementing the fit and the various effect + methods. model_final: estimator for fitting the response residuals to the features and treatment residuals Must implement `fit` and `predict` methods that must have signatures: - `model_final.fit(Y, T, X=X, W=W, Z=Z, nuisances=nuisances, sample_weight=sample_weight, sample_var=sample_var)` - `model_nuisance.predict(X=X)` - Predict, should just take the features X and return the constant marginal effect. In fact we allow for the model - method signatures to skip any of the keyword arguments as long as the class is always called with the omitted keyword - argument set to `None`. Moreover, the predict function of the final model can take no argument if the class - is always called with `X=None`. This can be enforced in child classes by re-implementing the fit and - the various effect methods. + + .. highlight:: python + .. code-block:: python + + model_final.fit(Y, T, X=X, W=W, Z=Z, nuisances=nuisances, + sample_weight=sample_weight, sample_var=sample_var) + model_nuisance.predict(X=X) + + Predict, should just take the features X and return the constant marginal effect. In fact we allow for the + model method signatures to skip any of the keyword arguments as long as the class is always called with the + omitted keyword argument set to `None`. Moreover, the predict function of the final model can take no argument + if the class is always called with `X=None`. This can be enforced in child classes by re-implementing the fit + and the various effect methods. discrete_treatment: bool Whether the treatment values should be treated as categorical, rather than continuous, quantities From 23cf7c2ec5359911e5246c24c176647a21026fb4 Mon Sep 17 00:00:00 2001 From: Vasileios Syrgkanis Date: Sat, 2 Nov 2019 21:59:04 -0400 Subject: [PATCH 26/64] docstring typo --- econml/_ortho_learner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/econml/_ortho_learner.py b/econml/_ortho_learner.py index 231b1c7c5..e5f065e80 100644 --- a/econml/_ortho_learner.py +++ b/econml/_ortho_learner.py @@ -143,7 +143,7 @@ class _OrthoLearner(TreatmentExpansionMixin, LinearCateEstimator): Base class for all orthogonal learners. This class is a parent class to any method that has the following architecture: - 1. The CATE :math:`\\theta(X)` is either the minimizer of some expected loss function + 1. The CATE :math:`\\theta(X)` is the minimizer of some expected loss function .. math :: \\mathbb{E}[\\ell(V; \\theta(X), h(V))] From c8b6a91e75db4e347a5254a7f3cd3a39ddf7edf9 Mon Sep 17 00:00:00 2001 From: Vasileios Syrgkanis Date: Sat, 2 Nov 2019 22:35:58 -0400 Subject: [PATCH 27/64] docstrings --- econml/_ortho_learner.py | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/econml/_ortho_learner.py b/econml/_ortho_learner.py index e5f065e80..27cfac9fa 100644 --- a/econml/_ortho_learner.py +++ b/econml/_ortho_learner.py @@ -1,11 +1,26 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. -"""Double ML. +""" + +Orthogonal Machine Learning is a general approach to estimating causal models +by formulating them as minimizers of some loss function that depends on +auxiliary regression models that also need to be estimated from data. The +class in this module implements the general logic in a very versatile way +so that various child classes can simply instantiate the appropriate models +and save a lot of code repetition. + +References +---------- + +Dylan Foster, Vasilis Syrgkanis (2019). Orthogonal Statistical Learning. + ACM Conference on Learning Theory. https://arxiv.org/abs/1901.09036 + +Xinkun Nie, Stefan Wager (2017). Quasi-Oracle Estimation of Heterogeneous Treatment Effects. + https://arxiv.org/abs/1712.04912 -"Double Machine Learning" is an algorithm that applies arbitrary machine learning methods -to fit the treatment and response, then uses a linear model to predict the response residuals -from the treatment residuals. +Chernozhukov et al. (2017). Double/debiased machine learning for treatment and structural parameters. + The Econometrics Journal. https://arxiv.org/abs/1608.00060 """ From c93b8b1079b0b9dc2b34cfb0c110c4b3f8839140 Mon Sep 17 00:00:00 2001 From: Vasileios Syrgkanis Date: Sat, 2 Nov 2019 22:38:36 -0400 Subject: [PATCH 28/64] docstring typo --- econml/_ortho_learner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/econml/_ortho_learner.py b/econml/_ortho_learner.py index 27cfac9fa..4cbed683e 100644 --- a/econml/_ortho_learner.py +++ b/econml/_ortho_learner.py @@ -218,7 +218,7 @@ class _OrthoLearner(TreatmentExpansionMixin, LinearCateEstimator): model_final.fit(Y, T, X=X, W=W, Z=Z, nuisances=nuisances, sample_weight=sample_weight, sample_var=sample_var) - model_nuisance.predict(X=X) + model_final.predict(X=X) Predict, should just take the features X and return the constant marginal effect. In fact we allow for the model method signatures to skip any of the keyword arguments as long as the class is always called with the From e6e959368ce1b9cf8c9dbbc5517b42ee2b7c8d05 Mon Sep 17 00:00:00 2001 From: Vasileios Syrgkanis Date: Sun, 3 Nov 2019 18:08:12 -0500 Subject: [PATCH 29/64] many changes to the docstrings of the _OrthoLearner. Addition of score_ attribute, available after fit of the _OrthoLearner. Changing to the sphinx module template, so that private methods are printed and methods are inheritted from parents. Added all tests to the _OrthoLearner. Added docstring to the effect function that is replaced in the DiscreteTreatmentExpansionMixin. --- doc/_templates/autosummary/module.rst | 2 + econml/_ortho_learner.py | 236 ++++++++++++----- econml/_rlearner.py | 5 +- econml/cate_estimator.py | 13 +- econml/tests/test_ortho_learner.py | 179 +++++++++++++ notebooks/OrthoLearner.ipynb | 361 +++++++++++++++++++++++--- 6 files changed, 693 insertions(+), 103 deletions(-) diff --git a/doc/_templates/autosummary/module.rst b/doc/_templates/autosummary/module.rst index 12f1a763f..83c4f1748 100644 --- a/doc/_templates/autosummary/module.rst +++ b/doc/_templates/autosummary/module.rst @@ -4,3 +4,5 @@ .. automodule:: {{ fullname }} :members: :private-members: + :inherited-members: + :show-inheritance: diff --git a/econml/_ortho_learner.py b/econml/_ortho_learner.py index 4cbed683e..51c875e21 100644 --- a/econml/_ortho_learner.py +++ b/econml/_ortho_learner.py @@ -158,33 +158,35 @@ class _OrthoLearner(TreatmentExpansionMixin, LinearCateEstimator): Base class for all orthogonal learners. This class is a parent class to any method that has the following architecture: - 1. The CATE :math:`\\theta(X)` is the minimizer of some expected loss function + 1. The CATE :math:`\\theta(X)` is the minimizer of some expected loss function - .. math :: - \\mathbb{E}[\\ell(V; \\theta(X), h(V))] + .. math :: + \\mathbb{E}[\\ell(V; \\theta(X), h(V))] - where :math:`V` are all the random variables and h is a vector of nuisance functions. + where :math:`V` are all the random variables and h is a vector of nuisance functions. Alternatively, + the class would also work if :math:`\\theta(X)` is the solution to a set of moment equations that + also depend on nuisance functions :math:`h`. - 2. To estimate :math:`\\theta(X)` we first fit the h functions can calculate :math:`h(V_i)` for each sample - :math:`i` in a crossfit manner: + 2. To estimate :math:`\\theta(X)` we first fit the h functions can calculate :math:`h(V_i)` for each sample + :math:`i` in a crossfit manner: - - Estimate a model :math:`\\hat{h}` for h using half of the data - - Evaluate the learned :math:`\\hat{h}` model on the other half + - Estimate a model :math:`\\hat{h}` for h using half of the data + - Evaluate the learned :math:`\\hat{h}` model on the other half - Or more generally in a KFold fit/predict approach with more folds + Or more generally in a KFold fit/predict approach with more folds - 3. Estimate the model for :math:`\\theta(X)` by minimizing the empirical (regularized) plugin loss: + 3. Estimate the model for :math:`\\theta(X)` by minimizing the empirical (regularized) plugin loss: - .. math :: - \\mathbb{E}_n[\\ell(V; \\theta(X), \\hat{h}(V))] + .. math :: + \\mathbb{E}_n[\\ell(V; \\theta(X), \\hat{h}(V))] - The method is a bit more general in that the final step does not need to be a loss minimization step. - The class takes as input a model for fitting an estimate of the nuisance h given a set of samples - and predicting the value of the learned nuisance model on any other set of samples. It also - takes as input a model for the final estimation, that takes as input the data and their associated - estimated nuisance values from the first stage and fits a model for the CATE :math:`\\theta(X)`. Then - at predict time, the final model given any set of samples of the X variable, returns the estimated - :math:`\\theta(X)`. + The method is a bit more general in that the final step does not need to be a loss minimization step. + The class takes as input a model for fitting an estimate of the nuisance h given a set of samples + and predicting the value of the learned nuisance model on any other set of samples. It also + takes as input a model for the final estimation, that takes as input the data and their associated + estimated nuisance values from the first stage and fits a model for the CATE :math:`\\theta(X)`. Then + at predict time, the final model given any set of samples of the X variable, returns the estimated + :math:`\\theta(X)`. The method essentially implements all the crossfit and plugin logic, so that any child classes need to only implement the appropriate `model_nuisance` and `model_final` and essentially nothing more. @@ -208,7 +210,8 @@ class _OrthoLearner(TreatmentExpansionMixin, LinearCateEstimator): In fact we allow for the model method signatures to skip any of the keyword arguments as long as the class is always called with the omitted keyword argument set to `None`. This can be enforced in child classes by re-implementing the fit and the various effect - methods. + methods. If `discrete_treatment=True`, then the input `T` to both above calls will be the + one-hot encoding of the original input `T`, excluding the first column of the one-hot. model_final: estimator for fitting the response residuals to the features and treatment residuals Must implement `fit` and `predict` methods that must have signatures: @@ -224,7 +227,8 @@ class _OrthoLearner(TreatmentExpansionMixin, LinearCateEstimator): model method signatures to skip any of the keyword arguments as long as the class is always called with the omitted keyword argument set to `None`. Moreover, the predict function of the final model can take no argument if the class is always called with `X=None`. This can be enforced in child classes by re-implementing the fit - and the various effect methods. + and the various effect methods. If `discrete_treatment=True`, then the input `T` to both above calls will be the + one-hot encoding of the original input `T`, excluding the first column of the one-hot. discrete_treatment: bool Whether the treatment values should be treated as categorical, rather than continuous, quantities @@ -281,13 +285,14 @@ def __init__(self): return def fit(self, Y, T, W=None, nuisances=None): Y_res, T_res = nuisances - self.model = LinearRegression().fit(T_res.reshape(-1, 1), Y_res) + self.model = LinearRegression(fit_intercept=False).fit(T_res.reshape(-1, 1), Y_res) + self.score_ = self.score(Y, T, W=W, nuisances=nuisances) return self - def predict(self): + def predict(self, X=None): return self.model.coef_[0] def score(self, Y, T, W=None, nuisances=None): Y_res, T_res = nuisances - return np.mean(Y_res - self.model.coef_[0]*T_res)**2 + return np.mean(Y_res - self.model.predict(T_res.reshape(-1, 1)))**2 np.random.seed(123) X = np.random.normal(size=(100, 3)) y = X[:, 0] + X[:, 1] + np.random.normal(size=(100,)) @@ -295,10 +300,88 @@ def score(self, Y, T, W=None, nuisances=None): n_splits=2, discrete_treatment=False, random_state=None) est.fit(y, X[:, 0], W=X[:, 1:]) + >>> est.score_ + 0.0015439892272404935 + >>> est.const_marginal_effect() + 1.2344017222060417 >>> est.effect() array([1.23440172]) + >>> est.effect(T0=0, T1=10) + array([12.34401722]) >>> est.score(y, X[:, 0], W=X[:, 1:]) 0.0003880489502537651 + >>> est.model_final.model + LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, + normalize=False) + >>> est.model_final.model.coef_ + array([1.23440172]) + + The following example shows how to do double machine learning with discrete treatments, using + the _OrthoLearner. + + .. highlight:: python + .. code-block:: python + + class ModelNuisance: + def __init__(self, model_t, model_y): + self._model_t = model_t + self._model_y = model_y + + def fit(self, Y, T, W=None): + self._model_t.fit(W, np.matmul(T, np.arange(1, T.shape[1]+1))) + self._model_y.fit(W, Y) + return self + + def predict(self, Y, T, W=None): + return Y - self._model_y.predict(W), T - self._model_t.predict_proba(W)[:, 1:] + + class ModelFinal: + + def __init__(self): + return + + def fit(self, Y, T, W=None, nuisances=None): + Y_res, T_res = nuisances + self.model = LinearRegression(fit_intercept=False).fit(T_res.reshape(-1, 1), Y_res) + return self + + def predict(self): + # theta needs to be of dimension (1, d_t) if T is (n, d_t) + return np.array([[self.model.coef_[0]]]) + + def score(self, Y, T, W=None, nuisances=None): + Y_res, T_res = nuisances + return np.mean((Y_res - self.model.predict(T_res.reshape(-1, 1)))**2) + + np.random.seed(123) + X = np.random.normal(size=(100, 3)) + import scipy.special + from sklearn.linear_model import LogisticRegression + T = np.random.binomial(1, scipy.special.expit(X[:, 0])) + y = T + X[:, 0] + np.random.normal(0, 0.01, size=(100,)) + est = _OrthoLearner(ModelNuisance(LogisticRegression(solver='lbfgs'), LinearRegression()), + ModelFinal(), n_splits=2, discrete_treatment=True, random_state=None) + est.fit(y, T, W=X) + + >>> est.score_ + 0.0031604059708364245 + >>> est.const_marginal_effect() + array([[1.00123159]]) + >>> est.effect() + array([1.00123159]) + >>> est.score(y, T, W=X) + 0.002569588332146612 + >>> est.model_final.model.coef_[0] + 1.0012315874866917 + + Attributes + ---------- + model_final : object of type(model_final) + An instance of the model_final object that was fitted after calling fit. + score_ : float or array of floats + If the model_final has a score method, then `score_` contains the outcome of the final model + score when evaluated on the fitted nuisances from the first stage. Represents goodness of fit, + of the final CATE model. """ def __init__(self, model_nuisance, model_final, @@ -340,47 +423,47 @@ def _filter_none_kwargs(self, **kwargs): return non_none_kwargs @BaseCateEstimator._wrap_fit - def fit(self, Y, T, X=None, W=None, Z=None, sample_weight=None, sample_var=None, inference=None): + def fit(self, Y, T, X=None, W=None, Z=None, sample_weight=None, sample_var=None, *, inference=None): """ - Estimate the counterfactual model from data, i.e. estimates functions τ(·,·,·), ∂τ(·,·). + Estimate the counterfactual model from data, i.e. estimates function :math:`\\theta(\\cdot)`. Parameters ---------- - Y: (n × d_y) matrix or vector of length n + Y: :math:`(n \\times d_y)` matrix or vector of length n Outcomes for each sample - T: (n × dₜ) matrix or vector of length n + T: :math:`(n \\times d_t)` matrix or vector of length n Treatments for each sample - X: optional (n × dₓ) matrix + X: optional :math:`(n \\times d_x)` matrix or None (Default=None) Features for each sample - W: optional (n × d_w) matrix + W: optional :math:`(n \\times d_w)` matrix or None (Default=None) Controls for each sample - Z: optional (n × d_z) matrix + Z: optional :math:`(n \\times d_z)` matrix or None (Default=None) Instruments for each sample - sample_weight: optional (n,) vector - Weights for each row - sample_var: optional (n,) vector - Sample variance + sample_weight: optional (n,) vector or None (Default=None) + Weights for each samples + sample_var: optional (n,) vector or None (Default=None) + Sample variance for each sample inference: string, `Inference` instance, or None Method for performing inference. This estimator supports 'bootstrap' (or an instance of `BootstrapInference`). Returns ------- - self + self : _OrthoLearner instance """ self._check_input_dims(Y, T, X, W, Z, sample_weight, sample_var) - nuisances, fitted_inds = self.fit_nuisances(Y, T, X, W, Z, sample_weight=sample_weight) - self.fit_final(self._subinds_check_none(Y, fitted_inds), - self._subinds_check_none(T, fitted_inds), - X=self._subinds_check_none(X, fitted_inds), - W=self._subinds_check_none(W, fitted_inds), - Z=self._subinds_check_none(Z, fitted_inds), - nuisances=tuple([self._subinds_check_none(nuis, fitted_inds) for nuis in nuisances]), - sample_weight=self._subinds_check_none(sample_weight, fitted_inds), - sample_var=self._subinds_check_none(sample_var, fitted_inds)) + nuisances, fitted_inds = self._fit_nuisances(Y, T, X, W, Z, sample_weight=sample_weight) + self._fit_final(self._subinds_check_none(Y, fitted_inds), + self._subinds_check_none(T, fitted_inds), + X=self._subinds_check_none(X, fitted_inds), + W=self._subinds_check_none(W, fitted_inds), + Z=self._subinds_check_none(Z, fitted_inds), + nuisances=tuple([self._subinds_check_none(nuis, fitted_inds) for nuis in nuisances]), + sample_weight=self._subinds_check_none(sample_weight, fitted_inds), + sample_var=self._subinds_check_none(sample_var, fitted_inds)) return self - def fit_nuisances(self, Y, T, X=None, W=None, Z=None, sample_weight=None): + def _fit_nuisances(self, Y, T, X=None, W=None, Z=None, sample_weight=None): # use a binary array to get stratified split in case of discrete treatment splitter = check_cv(self._n_splits, [0], classifier=self._discrete_treatment) # if check_cv produced a new KFold or StratifiedKFold object, we need to set shuffle and random_state @@ -411,47 +494,66 @@ def fit_nuisances(self, Y, T, X=None, W=None, Z=None, sample_weight=None): self._models_nuisance = fitted_models return nuisances, fitted_inds - def fit_final(self, Y, T, X=None, W=None, Z=None, nuisances=None, sample_weight=None, sample_var=None): + def _fit_final(self, Y, T, X=None, W=None, Z=None, nuisances=None, sample_weight=None, sample_var=None): self._model_final.fit(Y, T, **self._filter_none_kwargs(X=X, W=W, Z=Z, nuisances=nuisances, sample_weight=sample_weight, sample_var=sample_var)) + self.score_ = None + if hasattr(self._model_final, 'score'): + self.score_ = self._model_final.score(Y, T, **self._filter_none_kwargs(X=X, W=W, Z=Z, + nuisances=nuisances, + sample_weight=sample_weight, + sample_var=sample_var)) def const_marginal_effect(self, X=None): - """ - Calculate the constant marginal CATE :math:`\\theta(·)`. - - The marginal effect is conditional on a vector of - features on a set of m test samples {Xᵢ}. - - Parameters - ---------- - X: optional (m × dₓ) matrix - Features for each sample. - If X is None, it will be treated as a column of ones with a single row - - Returns - ------- - theta: (m × d_y × dₜ) matrix - Constant marginal CATE of each treatment on each outcome for each sample. - Note that when Y or T is a vector rather than a 2-dimensional array, - the corresponding singleton dimensions in the output will be collapsed - (e.g. if both are vectors, then the output of this method will also be a vector) - """ self._check_fitted_dims(X) if X is None: return self._model_final.predict() else: return self._model_final.predict(X) + const_marginal_effect.__doc__ = LinearCateEstimator.const_marginal_effect.__doc__ def const_marginal_effect_interval(self, X=None, *, alpha=0.1): self._check_fitted_dims(X) return super().const_marginal_effect_interval(X, alpha=alpha) + const_marginal_effect_interval.__doc__ = LinearCateEstimator.const_marginal_effect_interval.__doc__ def effect_interval(self, X=None, T0=0, T1=1, *, alpha=0.1): self._check_fitted_dims(X) return super().effect_interval(X, T0=T0, T1=T1, alpha=alpha) + effect_interval.__doc__ = LinearCateEstimator.effect_interval.__doc__ def score(self, Y, T, X=None, W=None, Z=None): + """ + Score the fitted CATE model on a new data set. Generates nuisance parameters + for the new data set based on the fitted nuisance models created at fit time. + It uses the mean prediction of the models fitted by the different crossfit folds. + Then calls the score function of the model_final and returns the calculated score. + The model_final model must have a score method. + + If model_final does not have a score method, then it raises an `AttributeError` + + Parameters + ---------- + Y: :math:`(n \\times d_y)` matrix or vector of length n + Outcomes for each sample + T: :math:`(n \\times d_t)` matrix or vector of length n + Treatments for each sample + X: optional :math:`(n \\times d_x)` matrix or None (Default=None) + Features for each sample + W: optional :math:`(n \\times d_w)` matrix or None (Default=None) + Controls for each sample + Z: optional :math:`(n \\times d_z)` matrix or None (Default=None) + Instruments for each sample + + Returns + ------- + score : float or (array of float) + The score of the final CATE model on the new data. Same type as the return + type of the model_final.score method. + """ + if not hasattr(self._model_final, 'score'): + raise AttributeError("Final model does not have a score method!") X, T = self._expand_treatments(X, T) n_splits = len(self._models_nuisance) for idx, mdl in enumerate(self._models_nuisance): diff --git a/econml/_rlearner.py b/econml/_rlearner.py index b1c62d810..78ac7d103 100644 --- a/econml/_rlearner.py +++ b/econml/_rlearner.py @@ -126,7 +126,10 @@ def score(self, Y, T, X=None, W=None, Z=None, nuisances=None, sample_weight=None T_res = T_res.reshape((-1, 1)) effects = self._model_final.predict(X).reshape((-1, Y_res.shape[1], T_res.shape[1])) Y_res_pred = np.einsum('ijk,ik->ij', effects, T_res).reshape(Y_res.shape) - return ((Y_res - Y_res_pred)**2).mean() + if sample_weight is not None: + return np.average((Y_res - Y_res_pred)**2, weights=sample_weight) + else: + return np.mean((Y_res - Y_res_pred)**2) super().__init__(ModelNuisance(model_y, model_t), ModelFinal(model_final), discrete_treatment, n_splits, random_state) diff --git a/econml/cate_estimator.py b/econml/cate_estimator.py index c5f696a81..3ae08827a 100644 --- a/econml/cate_estimator.py +++ b/econml/cate_estimator.py @@ -185,20 +185,20 @@ class LinearCateEstimator(BaseCateEstimator): @abc.abstractmethod def const_marginal_effect(self, X=None): """ - Calculate the constant marginal CATE θ(·). + Calculate the constant marginal CATE :math:`\\theta(·)`. The marginal effect is conditional on a vector of - features on a set of m test samples {Xᵢ}. + features on a set of m test samples X[i]. Parameters ---------- - X: optional (m × dₓ) matrix - Features for each sample + X: optional :math:`(m \\times d_x)` matrix or None (Default=None) + Features for each sample. Returns ------- - theta: (m × d_y × dₜ) matrix - Constant marginal CATE of each treatment on each outcome for each sample. + theta: :math:`(m \\times d_y \\times d_t)` matrix or :math:`(d_y \\times d_t)` matrix if X is None + Constant marginal CATE of each treatment on each outcome for each sample X[i]. Note that when Y or T is a vector rather than a 2-dimensional array, the corresponding singleton dimensions in the output will be collapsed (e.g. if both are vectors, then the output of this method will also be a vector) @@ -316,6 +316,7 @@ def _expand_treatments(self, X=None, *Ts): def effect(self, X=None, T0=0, T1=1): # NOTE: don't explicitly expand treatments here, because it's done in the super call return super().effect(X, T0=T0, T1=T1) + effect.__doc__ = BaseCateEstimator.effect.__doc__ class StatsModelsCateEstimatorMixin(BaseCateEstimator): diff --git a/econml/tests/test_ortho_learner.py b/econml/tests/test_ortho_learner.py index 5154d963c..ad8595f77 100644 --- a/econml/tests/test_ortho_learner.py +++ b/econml/tests/test_ortho_learner.py @@ -44,3 +44,182 @@ def predict(self, X, y, W=None): coef_[0] = 1 [np.testing.assert_allclose(coef_, mdl._model.coef_, rtol=0, atol=0.08) for mdl in model_list] np.testing.assert_array_equal(fitted_inds, np.arange(X.shape[0])) + + def test_ol(self): + + class ModelNuisance: + def __init__(self, model_t, model_y): + self._model_t = model_t + self._model_y = model_y + + def fit(self, Y, T, W=None): + self._model_t.fit(W, T) + self._model_y.fit(W, Y) + return self + + def predict(self, Y, T, W=None): + return Y - self._model_y.predict(W), T - self._model_t.predict(W) + + class ModelFinal: + + def __init__(self): + return + + def fit(self, Y, T, W=None, nuisances=None): + Y_res, T_res = nuisances + self.model = LinearRegression(fit_intercept=False).fit(T_res.reshape(-1, 1), Y_res) + return self + + def predict(self, X=None): + return self.model.coef_[0] + + def score(self, Y, T, W=None, nuisances=None): + Y_res, T_res = nuisances + return np.mean((Y_res - self.model.predict(T_res.reshape(-1, 1)))**2) + + np.random.seed(123) + X = np.random.normal(size=(10000, 3)) + sigma = 0.1 + y = X[:, 0] + X[:, 1] + np.random.normal(0, sigma, size=(10000,)) + est = _OrthoLearner(ModelNuisance(LinearRegression(), LinearRegression()), ModelFinal(), + n_splits=2, discrete_treatment=False, random_state=None) + est.fit(y, X[:, 0], W=X[:, 1:]) + np.testing.assert_almost_equal(est.const_marginal_effect(), 1, decimal=3) + np.testing.assert_array_almost_equal(est.effect(), np.ones(1), decimal=3) + np.testing.assert_array_almost_equal(est.effect(T0=0, T1=10), np.ones(1) * 10, decimal=2) + np.testing.assert_almost_equal(est.score(y, X[:, 0], W=X[:, 1:]), sigma**2, decimal=3) + np.testing.assert_almost_equal(est.score_, sigma**2, decimal=3) + np.testing.assert_almost_equal(est.model_final.model.coef_[0], 1, decimal=3) + + # Test non keyword based calls to fit + np.random.seed(123) + X = np.random.normal(size=(10000, 3)) + sigma = 0.1 + y = X[:, 0] + X[:, 1] + np.random.normal(0, sigma, size=(10000,)) + est = _OrthoLearner(ModelNuisance(LinearRegression(), LinearRegression()), ModelFinal(), + n_splits=2, discrete_treatment=False, random_state=None) + est.fit(y, X[:, 0], None, X[:, 1:]) + np.testing.assert_almost_equal(est.const_marginal_effect(), 1, decimal=3) + np.testing.assert_array_almost_equal(est.effect(), np.ones(1), decimal=3) + np.testing.assert_array_almost_equal(est.effect(T0=0, T1=10), np.ones(1) * 10, decimal=2) + np.testing.assert_almost_equal(est.score(y, X[:, 0], None, X[:, 1:]), sigma**2, decimal=3) + np.testing.assert_almost_equal(est.score_, sigma**2, decimal=3) + np.testing.assert_almost_equal(est.model_final.model.coef_[0], 1, decimal=3) + + # Test custom splitter + np.random.seed(123) + X = np.random.normal(size=(10000, 3)) + sigma = 0.1 + y = X[:, 0] + X[:, 1] + np.random.normal(0, sigma, size=(10000,)) + est = _OrthoLearner(ModelNuisance(LinearRegression(), LinearRegression()), ModelFinal(), + n_splits=KFold(n_splits=3), discrete_treatment=False, random_state=None) + est.fit(y, X[:, 0], None, X[:, 1:]) + np.testing.assert_almost_equal(est.const_marginal_effect(), 1, decimal=3) + np.testing.assert_array_almost_equal(est.effect(), np.ones(1), decimal=3) + np.testing.assert_array_almost_equal(est.effect(T0=0, T1=10), np.ones(1) * 10, decimal=2) + np.testing.assert_almost_equal(est.score(y, X[:, 0], W=X[:, 1:]), sigma**2, decimal=3) + np.testing.assert_almost_equal(est.score_, sigma**2, decimal=3) + np.testing.assert_almost_equal(est.model_final.model.coef_[0], 1, decimal=3) + + # Test incomplete set of test folds + np.random.seed(123) + X = np.random.normal(size=(10000, 3)) + sigma = 0.1 + y = X[:, 0] + X[:, 1] + np.random.normal(0, sigma, size=(10000,)) + folds = [(np.arange(X.shape[0] // 2), np.arange(X.shape[0] // 2, X.shape[0]))] + est = _OrthoLearner(ModelNuisance(LinearRegression(), LinearRegression()), ModelFinal(), + n_splits=KFold(n_splits=3), discrete_treatment=False, random_state=None) + est.fit(y, X[:, 0], None, X[:, 1:]) + np.testing.assert_almost_equal(est.const_marginal_effect(), 1, decimal=3) + np.testing.assert_array_almost_equal(est.effect(), np.ones(1), decimal=3) + np.testing.assert_array_almost_equal(est.effect(T0=0, T1=10), np.ones(1) * 10, decimal=2) + np.testing.assert_almost_equal(est.score(y, X[:, 0], W=X[:, 1:]), sigma**2, decimal=3) + np.testing.assert_almost_equal(est.score_, sigma**2, decimal=3) + np.testing.assert_almost_equal(est.model_final.model.coef_[0], 1, decimal=3) + + def test_ol_no_score_final(self): + class ModelNuisance: + def __init__(self, model_t, model_y): + self._model_t = model_t + self._model_y = model_y + + def fit(self, Y, T, W=None): + self._model_t.fit(W, T) + self._model_y.fit(W, Y) + return self + + def predict(self, Y, T, W=None): + return Y - self._model_y.predict(W), T - self._model_t.predict(W) + + class ModelFinal: + + def __init__(self): + return + + def fit(self, Y, T, W=None, nuisances=None): + Y_res, T_res = nuisances + self.model = LinearRegression(fit_intercept=False).fit(T_res.reshape(-1, 1), Y_res) + return self + + def predict(self, X=None): + return self.model.coef_[0] + + np.random.seed(123) + X = np.random.normal(size=(10000, 3)) + sigma = 0.1 + y = X[:, 0] + X[:, 1] + np.random.normal(0, sigma, size=(10000,)) + est = _OrthoLearner(ModelNuisance(LinearRegression(), LinearRegression()), ModelFinal(), + n_splits=2, discrete_treatment=False, random_state=None) + est.fit(y, X[:, 0], W=X[:, 1:]) + np.testing.assert_almost_equal(est.const_marginal_effect(), 1, decimal=3) + np.testing.assert_array_almost_equal(est.effect(), np.ones(1), decimal=3) + np.testing.assert_array_almost_equal(est.effect(T0=0, T1=10), np.ones(1) * 10, decimal=2) + assert est.score_ is None + np.testing.assert_almost_equal(est.model_final.model.coef_[0], 1, decimal=3) + + def test_ol_discrete_treatment(self): + class ModelNuisance: + def __init__(self, model_t, model_y): + self._model_t = model_t + self._model_y = model_y + + def fit(self, Y, T, W=None): + self._model_t.fit(W, np.matmul(T, np.arange(1, T.shape[1] + 1))) + self._model_y.fit(W, Y) + return self + + def predict(self, Y, T, W=None): + return Y - self._model_y.predict(W), T - self._model_t.predict_proba(W)[:, 1:] + + class ModelFinal: + + def __init__(self): + return + + def fit(self, Y, T, W=None, nuisances=None): + Y_res, T_res = nuisances + self.model = LinearRegression(fit_intercept=False).fit(T_res.reshape(-1, 1), Y_res) + return self + + def predict(self): + # theta needs to be of dimension (1, d_t) if T is (n, d_t) + return np.array([[self.model.coef_[0]]]) + + def score(self, Y, T, W=None, nuisances=None): + Y_res, T_res = nuisances + return np.mean((Y_res - self.model.predict(T_res.reshape(-1, 1)))**2) + + np.random.seed(123) + X = np.random.normal(size=(10000, 3)) + import scipy.special + from sklearn.linear_model import LogisticRegression + T = np.random.binomial(1, scipy.special.expit(X[:, 0])) + sigma = 0.01 + y = T + X[:, 0] + np.random.normal(0, sigma, size=(10000,)) + est = _OrthoLearner(ModelNuisance(LogisticRegression(solver='lbfgs'), LinearRegression()), ModelFinal(), + n_splits=2, discrete_treatment=True, random_state=None) + est.fit(y, T, W=X) + np.testing.assert_almost_equal(est.const_marginal_effect(), 1, decimal=3) + np.testing.assert_array_almost_equal(est.effect(), np.ones(1), decimal=3) + np.testing.assert_almost_equal(est.score(y, T, W=X), sigma**2, decimal=3) + np.testing.assert_almost_equal(est.model_final.model.coef_[0], 1, decimal=3) diff --git a/notebooks/OrthoLearner.ipynb b/notebooks/OrthoLearner.ipynb index f2cf39c2f..1f80a554c 100644 --- a/notebooks/OrthoLearner.ipynb +++ b/notebooks/OrthoLearner.ipynb @@ -13,7 +13,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 2, "metadata": {}, "outputs": [ { @@ -22,7 +22,7 @@ "text": [ "(array([-1.1057289 , -1.53756637, -2.4518278 , ..., 1.10628792,\n", " -1.82966233, -1.78227335]),)\n", - "[<__main__.Wrapper object at 0x12fd9d438>, <__main__.Wrapper object at 0x118e252b0>]\n" + "[<__main__.Wrapper object at 0x131b74dd8>, <__main__.Wrapper object at 0x11abf0be0>]\n" ] }, { @@ -31,7 +31,7 @@ "array([ 0, 1, 2, ..., 4997, 4998, 4999])" ] }, - "execution_count": 3, + "execution_count": 2, "metadata": {}, "output_type": "execute_result" } @@ -63,16 +63,16 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "array([1.23440172])" + "" ] }, - "execution_count": 13, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" } @@ -96,22 +96,264 @@ " return\n", " def fit(self, Y, T, W=None, nuisances=None):\n", " Y_res, T_res = nuisances\n", - " self.model = LinearRegression().fit(T_res.reshape(-1, 1), Y_res)\n", + " self.model = LinearRegression(fit_intercept=False).fit(T_res.reshape(-1, 1), Y_res)\n", + " self.score_ = self.score(Y, T, W=W, nuisances=nuisances)\n", " return self\n", " def predict(self, X=None):\n", " return self.model.coef_[0]\n", " def score(self, Y, T, W=None, nuisances=None):\n", " Y_res, T_res = nuisances\n", - " return np.mean(Y_res - self.model.coef_[0]*T_res)**2\n", + " return np.mean((Y_res - self.model.predict(T_res.reshape(-1, 1)))**2)\n", "np.random.seed(123)\n", - "X = np.random.normal(size=(100, 3))\n", - "y = X[:, 0] + X[:, 1] + np.random.normal(size=(100,))\n", - "est = _OrthoLearner(ModelNuisance(LinearRegression(), LinearRegression()), ModelFinal(),\n", + "X = np.random.normal(size=(10000, 3))\n", + "y = X[:, 0] + X[:, 1] + np.random.normal(0, 0.01, size=(10000,))\n", + "est = _OrthoLearner(ModelNuisance(LinearRegression(), LinearRegression()),\n", + " ModelFinal(),\n", " n_splits=2, discrete_treatment=False, random_state=None)\n", - "est.fit(y, X[:, 0], W=X[:, 1:])\n", + "est.fit(y, X[:, 0], W=X[:, 1:])" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "1.000143376851309" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "est.const_marginal_effect()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([1.00014338])" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "est.effect(T0=0, T1=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.00010033038763819668" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "est.score(y, X[:, 0], W=X[:, 1:])" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "LinearRegression(copy_X=True, fit_intercept=False, n_jobs=None,\n", + " normalize=False)" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "est.model_final.model" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([1.00014338])" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "est.model_final.model.coef_" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.00010038314872390833" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "est.score_" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "class ModelNuisance:\n", + " def __init__(self, model_t, model_y):\n", + " self._model_t = model_t\n", + " self._model_y = model_y\n", + "\n", + " def fit(self, Y, T, W=None):\n", + " self._model_t.fit(W, np.matmul(T, np.arange(1, T.shape[1]+1)))\n", + " self._model_y.fit(W, Y)\n", + " return self\n", + "\n", + " def predict(self, Y, T, W=None):\n", + " return Y - self._model_y.predict(W), T - self._model_t.predict_proba(W)[:, 1:]\n", + "\n", + "class ModelFinal:\n", + "\n", + " def __init__(self):\n", + " return\n", + "\n", + " def fit(self, Y, T, W=None, nuisances=None):\n", + " Y_res, T_res = nuisances\n", + " self.model = LinearRegression(fit_intercept=False).fit(T_res.reshape(-1, 1), Y_res)\n", + " return self\n", + "\n", + " def predict(self):\n", + " # theta needs to be of dimension (1, d_t) if T is (n, d_t)\n", + " return np.array([[self.model.coef_[0]]])\n", + "\n", + " def score(self, Y, T, W=None, nuisances=None):\n", + " Y_res, T_res = nuisances\n", + " return np.mean((Y_res - self.model.predict(T_res.reshape(-1, 1)))**2)\n", + "\n", + "np.random.seed(123)\n", + "X = np.random.normal(size=(100, 3))\n", + "import scipy.special\n", + "from sklearn.linear_model import LogisticRegression\n", + "T = np.random.binomial(1, scipy.special.expit(X[:, 0]))\n", + "sigma = 0.01\n", + "y = T + X[:, 0] + np.random.normal(0, sigma, size=(100,))\n", + "est = _OrthoLearner(ModelNuisance(LogisticRegression(solver='lbfgs'), LinearRegression()), ModelFinal(),\n", + " n_splits=2, discrete_treatment=True, random_state=None)\n", + "est.fit(y, T, W=X)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[1.00123159]])" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "est.const_marginal_effect()" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([1.00123159])" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ "est.effect()" ] }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.002569588332146612" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "est.score(y, T, W=X)" + ] + }, { "cell_type": "code", "execution_count": 14, @@ -120,7 +362,7 @@ { "data": { "text/plain": [ - "0.0003880489502537651" + "1.0012315874866917" ] }, "execution_count": 14, @@ -129,24 +371,85 @@ } ], "source": [ - "est.score(y, X[:, 0], W=X[:, 1:])" + "est.model_final.model.coef_[0]" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "LinearRegression(copy_X=True, fit_intercept=False, n_jobs=None,\n", + " normalize=False)" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "est.model_final.model" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([1.00123159])" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "est.model_final.model.coef_" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.0031604059708364245" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "est.score_" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "[[ 1.0613513 0.0760928 -0.53482069]]\n", - "[0.99780349]\n", - "(array([0.99255731]), array([1.00304966]))\n", - "[ 0.99804439 -0.00316585]\n", - "(array([ 0.99281255, -0.00838755]), array([1.00327622, 0.00205585]))\n", - "[0.99780349]\n" + "[[1.64721067 1.68104532 0.07458696]]\n", + "[1.00979079]\n", + "(array([0.99962896]), array([1.01995263]))\n", + "[1.00096897 0.00524782]\n", + "(array([9.95750740e-01, 5.09187652e-05]), array([1.0061872 , 0.01044472]))\n", + "[1.00979079]\n" ] } ], @@ -169,21 +472,21 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 19, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "[[-0.88666276 0.14006212 0.72897065 1.41017893 0.31083606 0.32659312\n", - " 0.53522834 1.21111218 -0.74004013 -0.6584672 ]]\n", - "[[0.99937549 0.99937549]]\n", - "[[9.99291159e-01 0.00000000e+00 0.00000000e+00 0.00000000e+00\n", - " 2.71306507e-04 0.00000000e+00 0.00000000e+00 0.00000000e+00]\n", - " [9.99291159e-01 0.00000000e+00 0.00000000e+00 0.00000000e+00\n", - " 2.71306507e-04 0.00000000e+00 0.00000000e+00 0.00000000e+00]]\n", - "[[0.99937549 0.99937549]]\n" + "[[ 0.89256841 0.27329632 1.20577242 -1.19175328 0.15390398 -0.23511674\n", + " -0.55378668 -2.64852199 0.73121095 -0.73312251]]\n", + "[[0.99664838 0.99664838]]\n", + "[[ 0.99040771 0.00504131 0. -0.0034206 -0.01300061 -0.01033077\n", + " -0.01303745 0.00259075]\n", + " [ 0.99040771 0.00504131 0. -0.0034206 -0.01300061 -0.01033077\n", + " -0.01303745 0.00259075]]\n", + "[[0.99664838 0.99664838]]\n" ] } ], From 8fe4de9f21f18835444e9652fd4f8c964c7c8a49 Mon Sep 17 00:00:00 2001 From: Vasileios Syrgkanis Date: Sun, 3 Nov 2019 18:11:14 -0500 Subject: [PATCH 30/64] fixed bug in score of _rlearner with multidim outcome, to average first across samples and then take the mean across outcome dimensions --- econml/_rlearner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/econml/_rlearner.py b/econml/_rlearner.py index 78ac7d103..fe48489bb 100644 --- a/econml/_rlearner.py +++ b/econml/_rlearner.py @@ -127,7 +127,7 @@ def score(self, Y, T, X=None, W=None, Z=None, nuisances=None, sample_weight=None effects = self._model_final.predict(X).reshape((-1, Y_res.shape[1], T_res.shape[1])) Y_res_pred = np.einsum('ijk,ik->ij', effects, T_res).reshape(Y_res.shape) if sample_weight is not None: - return np.average((Y_res - Y_res_pred)**2, weights=sample_weight) + return np.mean(np.average((Y_res - Y_res_pred)**2, weights=sample_weight, axis=0)) else: return np.mean((Y_res - Y_res_pred)**2) From 3af072d0bf909649701957afcba71fa4b366ba01 Mon Sep 17 00:00:00 2001 From: Vasileios Syrgkanis Date: Sun, 3 Nov 2019 19:00:19 -0500 Subject: [PATCH 31/64] linting --- econml/_ortho_learner.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/econml/_ortho_learner.py b/econml/_ortho_learner.py index 51c875e21..4ce34eb47 100644 --- a/econml/_ortho_learner.py +++ b/econml/_ortho_learner.py @@ -227,8 +227,8 @@ class _OrthoLearner(TreatmentExpansionMixin, LinearCateEstimator): model method signatures to skip any of the keyword arguments as long as the class is always called with the omitted keyword argument set to `None`. Moreover, the predict function of the final model can take no argument if the class is always called with `X=None`. This can be enforced in child classes by re-implementing the fit - and the various effect methods. If `discrete_treatment=True`, then the input `T` to both above calls will be the - one-hot encoding of the original input `T`, excluding the first column of the one-hot. + and the various effect methods. If `discrete_treatment=True`, then the input `T` to both above calls will be + the one-hot encoding of the original input `T`, excluding the first column of the one-hot. discrete_treatment: bool Whether the treatment values should be treated as categorical, rather than continuous, quantities From 077770581f65a4a880043e482168659ae1bff97e Mon Sep 17 00:00:00 2001 From: Vasileios Syrgkanis Date: Sun, 3 Nov 2019 19:02:15 -0500 Subject: [PATCH 32/64] dosctring small changes in utilities --- econml/utilities.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/econml/utilities.py b/econml/utilities.py index ade20af46..2e4f97f3f 100644 --- a/econml/utilities.py +++ b/econml/utilities.py @@ -1636,6 +1636,7 @@ def intercept_(self): @property def _param_var(self): """ + Returns ------- var : {(d (+1), d (+1)), (p, d (+1), d (+1))} nd array like @@ -1651,6 +1652,7 @@ def _param_var(self): @property def _param_stderr(self): """ + Returns ------- _param_stderr : {(d (+1),) (d (+1), p)} nd array like From 7524cda0c1d70c34cfd0c9c59201cc64a68cfc29 Mon Sep 17 00:00:00 2001 From: Vasileios Syrgkanis Date: Sun, 3 Nov 2019 19:16:58 -0500 Subject: [PATCH 33/64] doc string updates to base cate estimator. --- econml/_ortho_learner.py | 20 +++++++-------- econml/cate_estimator.py | 53 ++++++++++++++++++++-------------------- 2 files changed, 37 insertions(+), 36 deletions(-) diff --git a/econml/_ortho_learner.py b/econml/_ortho_learner.py index 4ce34eb47..df6f4e945 100644 --- a/econml/_ortho_learner.py +++ b/econml/_ortho_learner.py @@ -429,15 +429,15 @@ def fit(self, Y, T, X=None, W=None, Z=None, sample_weight=None, sample_var=None, Parameters ---------- - Y: :math:`(n \\times d_y)` matrix or vector of length n + Y: (n, d_y) matrix or vector of length n Outcomes for each sample - T: :math:`(n \\times d_t)` matrix or vector of length n + T: (n, d_t) matrix or vector of length n Treatments for each sample - X: optional :math:`(n \\times d_x)` matrix or None (Default=None) + X: optional (n, d_x) matrix or None (Default=None) Features for each sample - W: optional :math:`(n \\times d_w)` matrix or None (Default=None) + W: optional (n, d_w) matrix or None (Default=None) Controls for each sample - Z: optional :math:`(n \\times d_z)` matrix or None (Default=None) + Z: optional (n, d_z) matrix or None (Default=None) Instruments for each sample sample_weight: optional (n,) vector or None (Default=None) Weights for each samples @@ -535,15 +535,15 @@ def score(self, Y, T, X=None, W=None, Z=None): Parameters ---------- - Y: :math:`(n \\times d_y)` matrix or vector of length n + Y: (n, d_y) matrix or vector of length n Outcomes for each sample - T: :math:`(n \\times d_t)` matrix or vector of length n + T: (n, d_t) matrix or vector of length n Treatments for each sample - X: optional :math:`(n \\times d_x)` matrix or None (Default=None) + X: optional (n, d_x) matrix or None (Default=None) Features for each sample - W: optional :math:`(n \\times d_w)` matrix or None (Default=None) + W: optional (n, d_w) matrix or None (Default=None) Controls for each sample - Z: optional :math:`(n \\times d_z)` matrix or None (Default=None) + Z: optional (n, d_z) matrix or None (Default=None) Instruments for each sample Returns diff --git a/econml/cate_estimator.py b/econml/cate_estimator.py index 3ae08827a..8025eec7b 100644 --- a/econml/cate_estimator.py +++ b/econml/cate_estimator.py @@ -48,22 +48,23 @@ def _prefit(self, Y, T, *args, **kwargs): @abc.abstractmethod def fit(self, *args, inference=None, **kwargs): """ - Estimate the counterfactual model from data, i.e. estimates functions τ(·,·,·), ∂τ(·,·). + Estimate the counterfactual model from data, i.e. estimates functions + tau(X, T0, T1), marginal_tau(T, X). Note that the signature of this method may vary in subclasses (e.g. classes that don't support instruments will not allow a `Z` argument) Parameters ---------- - Y: (n × d_y) matrix or vector of length n + Y: (n, d_y) matrix or vector of length n Outcomes for each sample - T: (n × dₜ) matrix or vector of length n + T: (n, d_t) matrix or vector of length n Treatments for each sample - X: optional (n × dₓ) matrix + X: optional (n, d_x) matrix Features for each sample - W: optional (n × d_w) matrix + W: optional (n, d_w) matrix Controls for each sample - Z: optional (n × d_z) matrix + Z: optional (n, d_z) matrix Instruments for each sample inference: optional string, `Inference` instance, or None Method for performing inference. All estimators support 'bootstrap' @@ -95,23 +96,23 @@ def call(self, Y, T, *args, inference=None, **kwargs): @abc.abstractmethod def effect(self, X=None, *, T0, T1): """ - Calculate the heterogeneous treatment effect τ(·,·,·). + Calculate the heterogeneous treatment effect tau(X, T0, T1). The effect is calculated between the two treatment points conditional on a vector of features on a set of m test samples {T0ᵢ, T1ᵢ, Xᵢ}. Parameters ---------- - T0: (m × dₜ) matrix or vector of length m + T0: (m, d_t) matrix or vector of length m Base treatments for each sample - T1: (m × dₜ) matrix or vector of length m + T1: (m, d_t) matrix or vector of length m Target treatments for each sample - X: optional (m × dₓ) matrix + X: optional (m, d_x) matrix Features for each sample Returns ------- - τ: (m × d_y) matrix + τ: (m, d_y) matrix Heterogeneous treatment effects on each outcome for each sample Note that when Y is a vector rather than a 2-dimensional array, the corresponding singleton dimension will be collapsed (so this method will return a vector) @@ -121,21 +122,21 @@ def effect(self, X=None, *, T0, T1): @abc.abstractmethod def marginal_effect(self, T, X=None): """ - Calculate the heterogeneous marginal effect ∂τ(·,·). + Calculate the heterogeneous marginal effect marginal(T, X). The marginal effect is calculated around a base treatment point conditional on a vector of features on a set of m test samples {Tᵢ, Xᵢ}. Parameters ---------- - T: (m × dₜ) matrix + T: (m, d_t) matrix Base treatments for each sample - X: optional (m × dₓ) matrix + X: optional (m, d_x) matrix Features for each sample Returns ------- - grad_tau: (m × d_y × dₜ) array + grad_tau: (m, d_y, d_t) array Heterogeneous marginal effects on each outcome for each sample Note that when Y or T is a vector rather than a 2-dimensional array, the corresponding singleton dimensions in the output will be collapsed @@ -149,9 +150,9 @@ def _expand_treatments(self, X=None, *Ts): Parameters ---------- - X : optional (m × dₓ) matrix + X: optional (m, d_x) matrix Features for each sample, or None - Ts: sequence of (m × dₜ) matrices + Ts: sequence of (m, d_t) matrices Base treatments for each sample Returns @@ -192,12 +193,12 @@ def const_marginal_effect(self, X=None): Parameters ---------- - X: optional :math:`(m \\times d_x)` matrix or None (Default=None) + X: optional (m, d_x) matrix or None (Default=None) Features for each sample. Returns ------- - theta: :math:`(m \\times d_y \\times d_t)` matrix or :math:`(d_y \\times d_t)` matrix if X is None + theta: (m, d_y, d_t) matrix or (d_y, d_t) matrix if X is None Constant marginal CATE of each treatment on each outcome for each sample X[i]. Note that when Y or T is a vector rather than a 2-dimensional array, the corresponding singleton dimensions in the output will be collapsed @@ -216,16 +217,16 @@ def effect(self, X=None, *, T0, T1): Parameters ---------- - T0: (m × dₜ) matrix + T0: (m, d_t) matrix Base treatments for each sample - T1: (m × dₜ) matrix + T1: (m, d_t) matrix Target treatments for each sample - X: optional (m × dₓ) matrix + X: optional (m, d_x) matrix Features for each sample Returns ------- - τ: (m × d_y) matrix (or length m vector if Y was a vector) + effect: (m, d_y) matrix (or length m vector if Y was a vector) Heterogeneous treatment effects on each outcome for each sample. Note that when Y is a vector rather than a 2-dimensional array, the corresponding singleton dimension will be collapsed (so this method will return a vector) @@ -257,14 +258,14 @@ def marginal_effect(self, T, X=None): Parameters ---------- - T: (m × dₜ) matrix + T: (m, d_t) matrix Base treatments for each sample - X: optional (m × dₓ) matrix + X: optional (m, d_x) matrix Features for each sample Returns ------- - grad_tau: (m × d_y × dₜ) array + grad_tau: (m, d_y, d_t) array Heterogeneous marginal effects on each outcome for each sample Note that when Y or T is a vector rather than a 2-dimensional array, the corresponding singleton dimensions in the output will be collapsed From e93f7b1603d0b4351ffa39bb025663c0acdf1720 Mon Sep 17 00:00:00 2001 From: Vasileios Syrgkanis Date: Sun, 3 Nov 2019 20:32:12 -0500 Subject: [PATCH 34/64] better access to fitted nuisance mdoells in _OrthoLearner and _RLearner. Improved docstrings in both --- econml/_ortho_learner.py | 11 ++- econml/_rlearner.py | 149 ++++++++++++++++++++++++++++++----- notebooks/OrthoLearner.ipynb | 103 +++++++++++++++--------- 3 files changed, 205 insertions(+), 58 deletions(-) diff --git a/econml/_ortho_learner.py b/econml/_ortho_learner.py index df6f4e945..e7b2344fc 100644 --- a/econml/_ortho_learner.py +++ b/econml/_ortho_learner.py @@ -376,7 +376,10 @@ def score(self, Y, T, W=None, nuisances=None): Attributes ---------- - model_final : object of type(model_final) + models_nuisance: list of objects of type(model_nuisance) + A list of instances of the model_nuisance object. Each element corresponds to a crossfitting + fold and is the model instance that was fitted for that training fold. + model_final: object of type(model_final) An instance of the model_final object that was fitted after calling fit. score_ : float or array of floats If the model_final has a score method, then `score_` contains the outcome of the final model @@ -387,7 +390,7 @@ def score(self, Y, T, W=None, nuisances=None): def __init__(self, model_nuisance, model_final, discrete_treatment, n_splits, random_state): self._model_nuisance = clone(model_nuisance, safe=False) - self._models_nuisance = [] + self._models_nuisance = None self._model_final = clone(model_final, safe=False) self._n_splits = n_splits self._discrete_treatment = discrete_treatment @@ -575,3 +578,7 @@ def score(self, Y, T, X=None, W=None, Z=None): @property def model_final(self): return self._model_final + + @property + def models_nuisance(self): + return self._models_nuisance diff --git a/econml/_rlearner.py b/econml/_rlearner.py index fe48489bb..6280e6103 100644 --- a/econml/_rlearner.py +++ b/econml/_rlearner.py @@ -1,12 +1,28 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. -"""Double ML. +""" + +The R Learner is an approach for estimating flexible non-parametric models +of conditional average treatment effects in the setting with no unobserved confounders. +The method is based on the idea of Neyman orthogonality and estimates a CATE +whose mean squared error is robust to the estimation errors of auxiliary submodels +that also need to be estimated from data: + + 1) the outcome or regression model + 2) the treatment or propensity or policy or logging policy model + +References +---------- + +Xinkun Nie, Stefan Wager (2017). Quasi-Oracle Estimation of Heterogeneous Treatment Effects. + https://arxiv.org/abs/1712.04912 -"Double Machine Learning" is an algorithm that applies arbitrary machine learning methods -to fit the treatment and response, then uses a linear model to predict the response residuals -from the treatment residuals. +Dylan Foster, Vasilis Syrgkanis (2019). Orthogonal Statistical Learning. + ACM Conference on Learning Theory. https://arxiv.org/abs/1901.09036 +Chernozhukov et al. (2017). Double/debiased machine learning for treatment and structural parameters. + The Econometrics Journal. https://arxiv.org/abs/1608.00060 """ import numpy as np @@ -34,20 +50,39 @@ class _RLearner(_OrthoLearner): Parameters ---------- - model_y: estimator + model_y: estimator of E[Y | X, W] The estimator for fitting the response to the features and controls. Must implement `fit` and `predict` methods. Unlike sklearn estimators both methods must - take an extra second argument (the controls). + take an extra second argument (the controls), i.e. : - model_t: estimator + .. highlight:: python + .. code-block:: python + + model_y.fit(X, W, Y, sample_weight=sample_weight) + model_y.predict(X, W) + + model_t: estimator of E[T | X, W] The estimator for fitting the treatment to the features and controls. Must implement `fit` and `predict` methods. Unlike sklearn estimators both methods must - take an extra second argument (the controls). + take an extra second argument (the controls), i.e. : + + .. highlight:: python + .. code-block:: python + + model_t.fit(X, W, T, sample_weight=sample_weight) + model_t.predict(X, W) model_final: estimator for fitting the response residuals to the features and treatment residuals Must implement `fit` and `predict` methods. Unlike sklearn estimators the fit methods must - take an extra second argument (the treatment residuals). Predict, on the other hand, - should just take the features and return the constant marginal effect. + take an extra second argument (the treatment residuals). Predict, on the other hand, + should just take the features and return the constant marginal effect. More, concretely: + + .. highlight:: python + .. code-block:: python + + model_final.fit(X, T_res, Y_res, + sample_weight=sample_weight, sample_var=sample_var) + model_final.predict(X) discrete_treatment: bool Whether the treatment values should be treated as categorical, rather than continuous, quantities @@ -73,18 +108,29 @@ class _RLearner(_OrthoLearner): If :class:`~numpy.random.mtrand.RandomState` instance, random_state is the random number generator; If None, the random number generator is the :class:`~numpy.random.mtrand.RandomState` instance used by `np.random`. + + Attributes + ---------- + models_y: list of objects of type(model_y) + A list of instances of the model_y object. Each element corresponds to a crossfitting + fold and is the model instance that was fitted for that training fold. + models_t: list of objects of type(model_t) + A list of instances of the model_t object. Each element corresponds to a crossfitting + fold and is the model instance that was fitted for that training fold. + model_final : object of type(model_final) + An instance of the model_final object that was fitted after calling fit. + score_ : float + The MSE in the final residual on residual regression, i.e. + + .. math:: + \\frac{1}{n} \\sum_{i=1}^n (Y_i - \\hat{E}[Y|X_i, W_i] - \\hat{\\theta}(X_i)\cdot (T_i - \\hat{E}[T|X_i, W_i]))^2 + + If `sample_weight` is not None at fit time, then a weighted average is returned. If the outcome Y + is multidimensional, then the average of the MSEs for each dimension of Y is returned. """ def __init__(self, model_y, model_t, model_final, discrete_treatment, n_splits, random_state): - self._model_y = clone(model_y, safe=False) - self._model_t = clone(model_t, safe=False) - self._models_y = [] - self._models_t = [] - self._model_final = clone(model_final, safe=False) - self._n_splits = n_splits - self._discrete_treatment = discrete_treatment - class ModelNuisance: def __init__(self, model_y, model_t): self._model_y = clone(model_y, safe=False) @@ -99,7 +145,7 @@ def fit(self, Y, T, X=None, W=None, Z=None, sample_weight=None): def predict(self, Y, T, X=None, W=None, Z=None, sample_weight=None): Y_pred = self._model_y.predict(X, W) T_pred = self._model_t.predict(X, W) - if (X is None) and (W is None): + if (X is None) and (W is None): # In this case predict above returns a single row Y_pred = np.tile(Y_pred, Y.shape[0]) T_pred = np.tile(T_pred, T.shape[0]) Y_res = Y - Y_pred.reshape(Y.shape) @@ -134,6 +180,71 @@ def score(self, Y, T, X=None, W=None, Z=None, nuisances=None, sample_weight=None super().__init__(ModelNuisance(model_y, model_t), ModelFinal(model_final), discrete_treatment, n_splits, random_state) + def fit(self, Y, T, X=None, W=None, sample_weight=None, sample_var=None, *, inference=None): + """ + Estimate the counterfactual model from data, i.e. estimates function :math:`\\theta(\\cdot)`. + + Parameters + ---------- + Y: (n, d_y) matrix or vector of length n + Outcomes for each sample + T: (n, d_t) matrix or vector of length n + Treatments for each sample + X: optional (n, d_x) matrix or None (Default=None) + Features for each sample + W: optional (n, d_w) matrix or None (Default=None) + Controls for each sample + sample_weight: optional (n,) vector or None (Default=None) + Weights for each samples + sample_var: optional (n,) vector or None (Default=None) + Sample variance for each sample + inference: string, `Inference` instance, or None + Method for performing inference. This estimator supports 'bootstrap' + (or an instance of `BootstrapInference`). + + Returns + ------- + self : _RLearner instance + """ + # Replacing fit from _OrthoLearner, to enforce Z=None and improve the docstring + return super().fit(Y, T, X=X, W=W, sample_weight=sample_weight, sample_var=sample_var, inference=inference) + + def score(self, Y, T, X=None, W=None): + """ + Score the fitted CATE model on a new data set. Generates nuisance parameters + for the new data set based on the fitted residual nuisance models created at fit time. + It uses the mean prediction of the models fitted by the different crossfit folds. + Then calculates the MSE of the final residual Y on residual T regression. + + If model_final does not have a score method, then it raises an `AttributeError` + + Parameters + ---------- + Y: (n, d_y) matrix or vector of length n + Outcomes for each sample + T: (n, d_t) matrix or vector of length n + Treatments for each sample + X: optional (n, d_x) matrix or None (Default=None) + Features for each sample + W: optional (n, d_w) matrix or None (Default=None) + Controls for each sample + + Returns + ------- + score : float + The MSE of the final CATE model on the new data. + """ + # Replacing score from _OrthoLearner, to enforce Z=None and improve the docstring + return super().score(Y, T, X=X, W=W) + @property def model_final(self): return super().model_final._model_final + + @property + def models_y(self): + return [mdl._model_y for mdl in super().models_nuisance] + + @property + def models_t(self): + return [mdl._model_t for mdl in super().models_nuisance] diff --git a/notebooks/OrthoLearner.ipynb b/notebooks/OrthoLearner.ipynb index 1f80a554c..1fdaee023 100644 --- a/notebooks/OrthoLearner.ipynb +++ b/notebooks/OrthoLearner.ipynb @@ -2,9 +2,18 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": 2, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The autoreload extension is already loaded. To reload it, use:\n", + " %reload_ext autoreload\n" + ] + } + ], "source": [ "%load_ext autoreload\n", "%autoreload 2\n", @@ -13,7 +22,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 3, "metadata": {}, "outputs": [ { @@ -22,7 +31,7 @@ "text": [ "(array([-1.1057289 , -1.53756637, -2.4518278 , ..., 1.10628792,\n", " -1.82966233, -1.78227335]),)\n", - "[<__main__.Wrapper object at 0x131b74dd8>, <__main__.Wrapper object at 0x11abf0be0>]\n" + "[<__main__.Wrapper object at 0x1297feef0>, <__main__.Wrapper object at 0x112869c50>]\n" ] }, { @@ -31,7 +40,7 @@ "array([ 0, 1, 2, ..., 4997, 4998, 4999])" ] }, - "execution_count": 2, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" } @@ -63,16 +72,16 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "" + "" ] }, - "execution_count": 3, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -115,7 +124,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 5, "metadata": {}, "outputs": [ { @@ -124,7 +133,7 @@ "1.000143376851309" ] }, - "execution_count": 4, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -135,7 +144,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 6, "metadata": {}, "outputs": [ { @@ -144,7 +153,7 @@ "array([1.00014338])" ] }, - "execution_count": 5, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -155,7 +164,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 7, "metadata": {}, "outputs": [ { @@ -164,7 +173,7 @@ "0.00010033038763819668" ] }, - "execution_count": 6, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -175,7 +184,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 8, "metadata": {}, "outputs": [ { @@ -185,7 +194,7 @@ " normalize=False)" ] }, - "execution_count": 7, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -196,7 +205,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 9, "metadata": {}, "outputs": [ { @@ -205,7 +214,7 @@ "array([1.00014338])" ] }, - "execution_count": 8, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -216,7 +225,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 10, "metadata": {}, "outputs": [ { @@ -225,7 +234,7 @@ "0.00010038314872390833" ] }, - "execution_count": 9, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } @@ -236,16 +245,16 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "" + "" ] }, - "execution_count": 10, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } @@ -296,7 +305,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 12, "metadata": {}, "outputs": [ { @@ -305,7 +314,7 @@ "array([[1.00123159]])" ] }, - "execution_count": 11, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } @@ -316,7 +325,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 13, "metadata": {}, "outputs": [ { @@ -325,7 +334,7 @@ "array([1.00123159])" ] }, - "execution_count": 12, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } @@ -336,7 +345,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 14, "metadata": {}, "outputs": [ { @@ -345,7 +354,7 @@ "0.002569588332146612" ] }, - "execution_count": 13, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } @@ -356,7 +365,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 15, "metadata": {}, "outputs": [ { @@ -365,7 +374,7 @@ "1.0012315874866917" ] }, - "execution_count": 14, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } @@ -376,7 +385,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 16, "metadata": {}, "outputs": [ { @@ -386,7 +395,7 @@ " normalize=False)" ] }, - "execution_count": 15, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" } @@ -397,7 +406,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 17, "metadata": {}, "outputs": [ { @@ -406,7 +415,7 @@ "array([1.00123159])" ] }, - "execution_count": 16, + "execution_count": 17, "metadata": {}, "output_type": "execute_result" } @@ -417,7 +426,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 18, "metadata": {}, "outputs": [ { @@ -426,7 +435,7 @@ "0.0031604059708364245" ] }, - "execution_count": 17, + "execution_count": 18, "metadata": {}, "output_type": "execute_result" } @@ -435,6 +444,26 @@ "est.score_" ] }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([1.28171346, 0.03749846, 0.10120681])" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "est.models_nuisance[0]._model_y.coef_" + ] + }, { "cell_type": "code", "execution_count": 18, From f8fdbb2984ba3924c82ebdea60f3e014ad05783a Mon Sep 17 00:00:00 2001 From: Vasileios Syrgkanis Date: Sun, 3 Nov 2019 20:50:59 -0500 Subject: [PATCH 35/64] improved docstrings --- econml/_rlearner.py | 37 ++++++++++++++++++++++++++---------- notebooks/OrthoLearner.ipynb | 8 ++++---- 2 files changed, 31 insertions(+), 14 deletions(-) diff --git a/econml/_rlearner.py b/econml/_rlearner.py index 6280e6103..3b8530794 100644 --- a/econml/_rlearner.py +++ b/econml/_rlearner.py @@ -123,7 +123,7 @@ class _RLearner(_OrthoLearner): The MSE in the final residual on residual regression, i.e. .. math:: - \\frac{1}{n} \\sum_{i=1}^n (Y_i - \\hat{E}[Y|X_i, W_i] - \\hat{\\theta}(X_i)\cdot (T_i - \\hat{E}[T|X_i, W_i]))^2 + \\frac{1}{n} \\sum_{i=1}^n (Y_i - \\hat{E}[Y|X_i, W_i] - \\hat{\\theta}(X_i)\\cdot (T_i - \\hat{E}[T|X_i, W_i]))^2 If `sample_weight` is not None at fit time, then a weighted average is returned. If the outcome Y is multidimensional, then the average of the MSEs for each dimension of Y is returned. @@ -132,6 +132,12 @@ class _RLearner(_OrthoLearner): def __init__(self, model_y, model_t, model_final, discrete_treatment, n_splits, random_state): class ModelNuisance: + """ + Nuisance model fits the model_y and model_t at fit time and at predict time + calculates the residual Y and residual T based on the fitted models and returns + the residuals as two nuisance parameters. + """ + def __init__(self, model_y, model_t): self._model_y = clone(model_y, safe=False) self._model_t = clone(model_t, safe=False) @@ -153,6 +159,17 @@ def predict(self, Y, T, X=None, W=None, Z=None, sample_weight=None): return Y_res, T_res class ModelFinal: + """ + Final model at fit time, fits a residual on residual regression with a heterogeneous coefficient + that depends on X, i.e. + + .. math :: + Y - E[Y | X, W] = \\theta(X) \\cdot (T - E[T | X, W]) + \\epsilon + + and at predict time returns :math:`\\theta(X)`. The score method returns the MSE of this final + residual on residual regression. + """ + def __init__(self, model_final): self._model_final = clone(model_final, safe=False) @@ -182,7 +199,7 @@ def score(self, Y, T, X=None, W=None, Z=None, nuisances=None, sample_weight=None def fit(self, Y, T, X=None, W=None, sample_weight=None, sample_var=None, *, inference=None): """ - Estimate the counterfactual model from data, i.e. estimates function :math:`\\theta(\\cdot)`. + Estimate the counterfactual model from data, i.e. estimates function: math: `\\theta(\\cdot)`. Parameters ---------- @@ -190,13 +207,13 @@ def fit(self, Y, T, X=None, W=None, sample_weight=None, sample_var=None, *, infe Outcomes for each sample T: (n, d_t) matrix or vector of length n Treatments for each sample - X: optional (n, d_x) matrix or None (Default=None) + X: optional(n, d_x) matrix or None (Default=None) Features for each sample - W: optional (n, d_w) matrix or None (Default=None) + W: optional(n, d_w) matrix or None (Default=None) Controls for each sample - sample_weight: optional (n,) vector or None (Default=None) + sample_weight: optional(n,) vector or None (Default=None) Weights for each samples - sample_var: optional (n,) vector or None (Default=None) + sample_var: optional(n,) vector or None (Default=None) Sample variance for each sample inference: string, `Inference` instance, or None Method for performing inference. This estimator supports 'bootstrap' @@ -204,7 +221,7 @@ def fit(self, Y, T, X=None, W=None, sample_weight=None, sample_var=None, *, infe Returns ------- - self : _RLearner instance + self: _RLearner instance """ # Replacing fit from _OrthoLearner, to enforce Z=None and improve the docstring return super().fit(Y, T, X=X, W=W, sample_weight=sample_weight, sample_var=sample_var, inference=inference) @@ -224,14 +241,14 @@ def score(self, Y, T, X=None, W=None): Outcomes for each sample T: (n, d_t) matrix or vector of length n Treatments for each sample - X: optional (n, d_x) matrix or None (Default=None) + X: optional(n, d_x) matrix or None (Default=None) Features for each sample - W: optional (n, d_w) matrix or None (Default=None) + W: optional(n, d_w) matrix or None (Default=None) Controls for each sample Returns ------- - score : float + score: float The MSE of the final CATE model on the new data. """ # Replacing score from _OrthoLearner, to enforce Z=None and improve the docstring diff --git a/notebooks/OrthoLearner.ipynb b/notebooks/OrthoLearner.ipynb index 1fdaee023..594e7a9b0 100644 --- a/notebooks/OrthoLearner.ipynb +++ b/notebooks/OrthoLearner.ipynb @@ -446,7 +446,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 30, "metadata": {}, "outputs": [ { @@ -455,7 +455,7 @@ "array([1.28171346, 0.03749846, 0.10120681])" ] }, - "execution_count": 29, + "execution_count": 30, "metadata": {}, "output_type": "execute_result" } @@ -466,7 +466,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 31, "metadata": {}, "outputs": [ { @@ -501,7 +501,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 32, "metadata": {}, "outputs": [ { From b5d5e142620778de453e444cae0e5205ba645024 Mon Sep 17 00:00:00 2001 From: Vasileios Syrgkanis Date: Sun, 3 Nov 2019 21:01:42 -0500 Subject: [PATCH 36/64] linting --- econml/_rlearner.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/econml/_rlearner.py b/econml/_rlearner.py index 3b8530794..3a40e208f 100644 --- a/econml/_rlearner.py +++ b/econml/_rlearner.py @@ -123,7 +123,8 @@ class _RLearner(_OrthoLearner): The MSE in the final residual on residual regression, i.e. .. math:: - \\frac{1}{n} \\sum_{i=1}^n (Y_i - \\hat{E}[Y|X_i, W_i] - \\hat{\\theta}(X_i)\\cdot (T_i - \\hat{E}[T|X_i, W_i]))^2 + \\frac{1}{n} \\sum_{i=1}^n (Y_i - \\hat{E}[Y|X_i, W_i]\ + - \\hat{\\theta}(X_i)\\cdot (T_i - \\hat{E}[T|X_i, W_i]))^2 If `sample_weight` is not None at fit time, then a weighted average is returned. If the outcome Y is multidimensional, then the average of the MSEs for each dimension of Y is returned. From 96bd4eb2eb17047326cfbf4d8ff0e77506846f60 Mon Sep 17 00:00:00 2001 From: Vasileios Syrgkanis Date: Sun, 3 Nov 2019 21:35:48 -0500 Subject: [PATCH 37/64] docstring example for RLearner --- econml/_ortho_learner.py | 18 ++++++------ econml/_rlearner.py | 62 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 71 insertions(+), 9 deletions(-) diff --git a/econml/_ortho_learner.py b/econml/_ortho_learner.py index e7b2344fc..9f3484f1c 100644 --- a/econml/_ortho_learner.py +++ b/econml/_ortho_learner.py @@ -286,35 +286,35 @@ def __init__(self): def fit(self, Y, T, W=None, nuisances=None): Y_res, T_res = nuisances self.model = LinearRegression(fit_intercept=False).fit(T_res.reshape(-1, 1), Y_res) - self.score_ = self.score(Y, T, W=W, nuisances=nuisances) return self def predict(self, X=None): return self.model.coef_[0] def score(self, Y, T, W=None, nuisances=None): Y_res, T_res = nuisances - return np.mean(Y_res - self.model.predict(T_res.reshape(-1, 1)))**2 + return np.mean((Y_res - self.model.predict(T_res.reshape(-1, 1)))**2) np.random.seed(123) X = np.random.normal(size=(100, 3)) - y = X[:, 0] + X[:, 1] + np.random.normal(size=(100,)) - est = _OrthoLearner(ModelNuisance(LinearRegression(), LinearRegression()), ModelFinal(), + y = X[:, 0] + X[:, 1] + np.random.normal(0, 0.1, size=(100,)) + est = _OrthoLearner(ModelNuisance(LinearRegression(), LinearRegression()), + ModelFinal(), n_splits=2, discrete_treatment=False, random_state=None) est.fit(y, X[:, 0], W=X[:, 1:]) >>> est.score_ - 0.0015439892272404935 + 0.007568302109999707 >>> est.const_marginal_effect() - 1.2344017222060417 + 1.0236499258047582 >>> est.effect() - array([1.23440172]) + array([1.02364993]) >>> est.effect(T0=0, T1=10) array([12.34401722]) >>> est.score(y, X[:, 0], W=X[:, 1:]) - 0.0003880489502537651 + 0.00727995424098179 >>> est.model_final.model LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False) >>> est.model_final.model.coef_ - array([1.23440172]) + array([1.02364993]) The following example shows how to do double machine learning with discrete treatments, using the _OrthoLearner. diff --git a/econml/_rlearner.py b/econml/_rlearner.py index 3a40e208f..dc9eec559 100644 --- a/econml/_rlearner.py +++ b/econml/_rlearner.py @@ -109,6 +109,68 @@ class _RLearner(_OrthoLearner): If None, the random number generator is the :class:`~numpy.random.mtrand.RandomState` instance used by `np.random`. + Examples + -------- + The example code below implements a very simple version of the double machine learning + method on top of the :py:class:`~econml._ortho_learner._RLearner` class, for expository purposes. + For a more elaborate implementation of a Double Machine Learning child class of the class + checkout :py:class:`~econml.dml.DMLCateEstimator` and its child classes. + + .. highlight:: python + .. code-block:: python + + import numpy as np + from sklearn.linear_model import LinearRegression + from econml._rlearner import _RLearner + from sklearn.base import clone + class ModelFirst: + def __init__(self, model): + self._model = clone(model, safe=False) + def fit(self, X, W, Y, sample_weight=None): + self._model.fit(np.hstack([X, W]), Y) + return self + def predict(self, X, W): + return self._model.predict(np.hstack([X, W])) + class ModelFinal: + def fit(self, X, T_res, Y_res, sample_weight=None, sample_var=None): + self.model = LinearRegression(fit_intercept=False).fit(X * T_res.reshape(-1, 1), + Y_res) + return self + def predict(self, X): + return self.model.predict(X) + np.random.seed(123) + X = np.random.normal(size=(1000, 3)) + y = X[:, 0] + X[:, 1] + np.random.normal(0, 0.01, size=(1000,)) + est = _RLearner(ModelFirst(LinearRegression()), + ModelFirst(LinearRegression()), + ModelFinal(), + n_splits=2, discrete_treatment=False, random_state=None) + est.fit(y, X[:, 0], X=np.ones((X.shape[0], 1)), W=X[:, 1:]) + + >>> est.const_marginal_effect(np.ones((1,1))) + array([0.99963147]) + >>> est.effect(np.ones((1,1)), T0=0, T1=10) + array([9.99631472]) + >>> est.score(y, X[:, 0], X=np.ones((X.shape[0], 1)), W=X[:, 1:]) + 9.736380060274913e-05 + >>> est.model_final.model + LinearRegression(copy_X=True, fit_intercept=False, n_jobs=None, + normalize=False) + >>> est.model_final.model.coef_ + array([0.99963147]) + >>> est.score_ + 9.826232040878233e-05 + >>> [mdl._model for mdl in est.models_y] + [LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, + normalize=False), + LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, + normalize=False)] + >>> [mdl._model for mdl in est.models_t] + [LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, + normalize=False), + LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, + normalize=False)] + Attributes ---------- models_y: list of objects of type(model_y) From 919bc064a724f1dbbbe9d35fed885277050fe5fa Mon Sep 17 00:00:00 2001 From: Vasileios Syrgkanis Date: Sun, 3 Nov 2019 21:36:07 -0500 Subject: [PATCH 38/64] testing notebook --- notebooks/OrthoLearner.ipynb | 257 +++++++++++++++++++++++++++++++---- 1 file changed, 234 insertions(+), 23 deletions(-) diff --git a/notebooks/OrthoLearner.ipynb b/notebooks/OrthoLearner.ipynb index 594e7a9b0..06f0a14f6 100644 --- a/notebooks/OrthoLearner.ipynb +++ b/notebooks/OrthoLearner.ipynb @@ -72,16 +72,16 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 71, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "" + "" ] }, - "execution_count": 4, + "execution_count": 71, "metadata": {}, "output_type": "execute_result" } @@ -106,7 +106,6 @@ " def fit(self, Y, T, W=None, nuisances=None):\n", " Y_res, T_res = nuisances\n", " self.model = LinearRegression(fit_intercept=False).fit(T_res.reshape(-1, 1), Y_res)\n", - " self.score_ = self.score(Y, T, W=W, nuisances=nuisances)\n", " return self\n", " def predict(self, X=None):\n", " return self.model.coef_[0]\n", @@ -114,8 +113,8 @@ " Y_res, T_res = nuisances\n", " return np.mean((Y_res - self.model.predict(T_res.reshape(-1, 1)))**2)\n", "np.random.seed(123)\n", - "X = np.random.normal(size=(10000, 3))\n", - "y = X[:, 0] + X[:, 1] + np.random.normal(0, 0.01, size=(10000,))\n", + "X = np.random.normal(size=(100, 3))\n", + "y = X[:, 0] + X[:, 1] + np.random.normal(0, 0.1, size=(100,))\n", "est = _OrthoLearner(ModelNuisance(LinearRegression(), LinearRegression()),\n", " ModelFinal(),\n", " n_splits=2, discrete_treatment=False, random_state=None)\n", @@ -124,16 +123,16 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 72, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "1.000143376851309" + "1.0236499258047582" ] }, - "execution_count": 5, + "execution_count": 72, "metadata": {}, "output_type": "execute_result" } @@ -144,16 +143,16 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 73, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "array([1.00014338])" + "array([1.02364993])" ] }, - "execution_count": 6, + "execution_count": 73, "metadata": {}, "output_type": "execute_result" } @@ -164,16 +163,16 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 74, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "0.00010033038763819668" + "0.00727995424098179" ] }, - "execution_count": 7, + "execution_count": 74, "metadata": {}, "output_type": "execute_result" } @@ -184,7 +183,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 75, "metadata": {}, "outputs": [ { @@ -194,7 +193,7 @@ " normalize=False)" ] }, - "execution_count": 8, + "execution_count": 75, "metadata": {}, "output_type": "execute_result" } @@ -205,16 +204,16 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 76, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "array([1.00014338])" + "array([1.02364993])" ] }, - "execution_count": 9, + "execution_count": 76, "metadata": {}, "output_type": "execute_result" } @@ -225,16 +224,16 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 77, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "0.00010038314872390833" + "0.007568302109999707" ] }, - "execution_count": 10, + "execution_count": 77, "metadata": {}, "output_type": "execute_result" } @@ -536,6 +535,218 @@ "print(est.const_marginal_effect(X[:1, 1:dx]))" ] }, + { + "cell_type": "code", + "execution_count": 101, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 101, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import numpy as np\n", + "from sklearn.linear_model import LinearRegression\n", + "from econml._rlearner import _RLearner\n", + "from sklearn.base import clone\n", + "class ModelFirst:\n", + " def __init__(self, model):\n", + " self._model = clone(model, safe=False)\n", + " def fit(self, X, W, Y, sample_weight=None):\n", + " self._model.fit(np.hstack([X, W]), Y)\n", + " return self\n", + " def predict(self, X, W):\n", + " return self._model.predict(np.hstack([X, W]))\n", + "class ModelFinal:\n", + " def fit(self, X, T_res, Y_res, sample_weight=None, sample_var=None):\n", + " self.model = LinearRegression(fit_intercept=False).fit(X * T_res.reshape(-1, 1), Y_res)\n", + " return self\n", + " def predict(self, X):\n", + " return self.model.predict(X)\n", + "np.random.seed(123)\n", + "X = np.random.normal(size=(1000, 3))\n", + "y = X[:, 0] + X[:, 1] + np.random.normal(0, 0.01, size=(1000,))\n", + "est = _RLearner(ModelFirst(LinearRegression()),\n", + " ModelFirst(LinearRegression()),\n", + " ModelFinal(),\n", + " n_splits=2, discrete_treatment=False, random_state=None)\n", + "est.fit(y, X[:, 0], X=np.ones((X.shape[0], 1)), W=X[:, 1:])" + ] + }, + { + "cell_type": "code", + "execution_count": 102, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([0.99963147])" + ] + }, + "execution_count": 102, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "est.const_marginal_effect(np.ones((1,1)))" + ] + }, + { + "cell_type": "code", + "execution_count": 110, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([9.99631472])" + ] + }, + "execution_count": 110, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "est.effect(np.ones((1,1)), T0=0, T1=10)" + ] + }, + { + "cell_type": "code", + "execution_count": 111, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "9.736380060274913e-05" + ] + }, + "execution_count": 111, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "est.score(y, X[:, 0], X=np.ones((X.shape[0], 1)), W=X[:, 1:])" + ] + }, + { + "cell_type": "code", + "execution_count": 105, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "LinearRegression(copy_X=True, fit_intercept=False, n_jobs=None,\n", + " normalize=False)" + ] + }, + "execution_count": 105, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "est.model_final.model" + ] + }, + { + "cell_type": "code", + "execution_count": 106, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([0.99963147])" + ] + }, + "execution_count": 106, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "est.model_final.model.coef_" + ] + }, + { + "cell_type": "code", + "execution_count": 107, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "9.826232040878233e-05" + ] + }, + "execution_count": 107, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "est.score_" + ] + }, + { + "cell_type": "code", + "execution_count": 108, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,\n", + " normalize=False),\n", + " LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,\n", + " normalize=False)]" + ] + }, + "execution_count": 108, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "[mdl._model for mdl in est.models_y]" + ] + }, + { + "cell_type": "code", + "execution_count": 109, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,\n", + " normalize=False),\n", + " LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,\n", + " normalize=False)]" + ] + }, + "execution_count": 109, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "[mdl._model for mdl in est.models_t]" + ] + }, { "cell_type": "code", "execution_count": null, From de3c81bd23e30eedb81bc9cdf3d1191a65e9fc96 Mon Sep 17 00:00:00 2001 From: Vasileios Syrgkanis Date: Sun, 3 Nov 2019 21:40:16 -0500 Subject: [PATCH 39/64] accessing fitted models_y and models_t in DMLCateEstimator --- econml/dml.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/econml/dml.py b/econml/dml.py index c064c1e4b..e13ce369a 100644 --- a/econml/dml.py +++ b/econml/dml.py @@ -183,6 +183,14 @@ def featurizer(self): def model_final(self): return super().model_final._model + @property + def models_y(self): + return [mdl._model for mdl in super().models_y] + + @property + def models_t(self): + return [mdl._model for mdl in super().models_t] + class LinearDMLCateEstimator(StatsModelsCateEstimatorMixin, DMLCateEstimator): """ From ea7680a8e29fb8e7a11f3cd5c5d4d1a942adac33 Mon Sep 17 00:00:00 2001 From: Vasileios Syrgkanis Date: Sun, 3 Nov 2019 21:47:52 -0500 Subject: [PATCH 40/64] notebook --- notebooks/OrthoLearner.ipynb | 173 +++++++++++++++++++++++++++++++++++ 1 file changed, 173 insertions(+) diff --git a/notebooks/OrthoLearner.ipynb b/notebooks/OrthoLearner.ipynb index 06f0a14f6..1f0a816db 100644 --- a/notebooks/OrthoLearner.ipynb +++ b/notebooks/OrthoLearner.ipynb @@ -747,6 +747,179 @@ "[mdl._model for mdl in est.models_t]" ] }, + { + "cell_type": "code", + "execution_count": 114, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 114, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from econml.dml import LinearDMLCateEstimator\n", + "\n", + "np.random.seed(123)\n", + "X = np.random.normal(size=(1000, 3))\n", + "y = X[:, 0] + X[:, 1] + np.random.normal(0, 0.01, size=(1000,))\n", + "est = LinearDMLCateEstimator(model_y=LinearRegression(),\n", + " model_t=LinearRegression())\n", + "est.fit(y, X[:, 0], W=X[:, 1:], inference='statsmodels')" + ] + }, + { + "cell_type": "code", + "execution_count": 115, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([1.00089549])" + ] + }, + "execution_count": 115, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "est.effect()" + ] + }, + { + "cell_type": "code", + "execution_count": 116, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(array([0.99404817]), array([1.0077428]))" + ] + }, + "execution_count": 116, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "est.effect_interval()" + ] + }, + { + "cell_type": "code", + "execution_count": 117, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,\n", + " normalize=False),\n", + " LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,\n", + " normalize=False)]" + ] + }, + "execution_count": 117, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "est.models_y" + ] + }, + { + "cell_type": "code", + "execution_count": 118, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,\n", + " normalize=False),\n", + " LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,\n", + " normalize=False)]" + ] + }, + "execution_count": 118, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "est.models_t" + ] + }, + { + "cell_type": "code", + "execution_count": 119, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([1.00089549])" + ] + }, + "execution_count": 119, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "est.coef_" + ] + }, + { + "cell_type": "code", + "execution_count": 121, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(array([0.99404817]), array([1.0077428]))" + ] + }, + "execution_count": 121, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "est.coef__interval()" + ] + }, + { + "cell_type": "code", + "execution_count": 127, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([0.00416287])" + ] + }, + "execution_count": 127, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "est.model_final._param_stderr" + ] + }, { "cell_type": "code", "execution_count": null, From 187f32bfe9cde2df32d244e074f80c723159ace0 Mon Sep 17 00:00:00 2001 From: Vasileios Syrgkanis Date: Mon, 4 Nov 2019 08:24:03 -0500 Subject: [PATCH 41/64] added draft example implementation of DRLearner based on the _OrthoLearner in the OrthoLearner notebook --- notebooks/OrthoLearner.ipynb | 458 ++++++++++++++++++++++++----------- 1 file changed, 315 insertions(+), 143 deletions(-) diff --git a/notebooks/OrthoLearner.ipynb b/notebooks/OrthoLearner.ipynb index 1f0a816db..89b03a773 100644 --- a/notebooks/OrthoLearner.ipynb +++ b/notebooks/OrthoLearner.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 2, + "execution_count": 64, "metadata": {}, "outputs": [ { @@ -16,13 +16,19 @@ ], "source": [ "%load_ext autoreload\n", - "%autoreload 2\n", - "from econml._ortho_learner import _crossfit" + "%autoreload 2" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Testing _crossfit Function" ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 65, "metadata": {}, "outputs": [ { @@ -31,7 +37,7 @@ "text": [ "(array([-1.1057289 , -1.53756637, -2.4518278 , ..., 1.10628792,\n", " -1.82966233, -1.78227335]),)\n", - "[<__main__.Wrapper object at 0x1297feef0>, <__main__.Wrapper object at 0x112869c50>]\n" + "[<__main__.Wrapper object at 0x12b7c2940>, <__main__.Wrapper object at 0x12b85ef28>]\n" ] }, { @@ -40,7 +46,7 @@ "array([ 0, 1, 2, ..., 4997, 4998, 4999])" ] }, - "execution_count": 3, + "execution_count": 65, "metadata": {}, "output_type": "execute_result" } @@ -49,6 +55,8 @@ "import numpy as np\n", "from sklearn.model_selection import KFold\n", "from sklearn.linear_model import Lasso\n", + "from econml._ortho_learner import _crossfit\n", + "\n", "class Wrapper:\n", " def __init__(self, model):\n", " self._model = model\n", @@ -70,18 +78,25 @@ "fitted_inds" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Simple DML with the _OrthoLearner" + ] + }, { "cell_type": "code", - "execution_count": 71, + "execution_count": 66, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "" + "" ] }, - "execution_count": 71, + "execution_count": 66, "metadata": {}, "output_type": "execute_result" } @@ -123,7 +138,7 @@ }, { "cell_type": "code", - "execution_count": 72, + "execution_count": 67, "metadata": {}, "outputs": [ { @@ -132,7 +147,7 @@ "1.0236499258047582" ] }, - "execution_count": 72, + "execution_count": 67, "metadata": {}, "output_type": "execute_result" } @@ -143,7 +158,7 @@ }, { "cell_type": "code", - "execution_count": 73, + "execution_count": 68, "metadata": {}, "outputs": [ { @@ -152,7 +167,7 @@ "array([1.02364993])" ] }, - "execution_count": 73, + "execution_count": 68, "metadata": {}, "output_type": "execute_result" } @@ -163,7 +178,7 @@ }, { "cell_type": "code", - "execution_count": 74, + "execution_count": 69, "metadata": {}, "outputs": [ { @@ -172,7 +187,7 @@ "0.00727995424098179" ] }, - "execution_count": 74, + "execution_count": 69, "metadata": {}, "output_type": "execute_result" } @@ -183,7 +198,7 @@ }, { "cell_type": "code", - "execution_count": 75, + "execution_count": 70, "metadata": {}, "outputs": [ { @@ -193,7 +208,7 @@ " normalize=False)" ] }, - "execution_count": 75, + "execution_count": 70, "metadata": {}, "output_type": "execute_result" } @@ -204,7 +219,7 @@ }, { "cell_type": "code", - "execution_count": 76, + "execution_count": 71, "metadata": {}, "outputs": [ { @@ -213,7 +228,7 @@ "array([1.02364993])" ] }, - "execution_count": 76, + "execution_count": 71, "metadata": {}, "output_type": "execute_result" } @@ -224,7 +239,7 @@ }, { "cell_type": "code", - "execution_count": 77, + "execution_count": 72, "metadata": {}, "outputs": [ { @@ -233,7 +248,7 @@ "0.007568302109999707" ] }, - "execution_count": 77, + "execution_count": 72, "metadata": {}, "output_type": "execute_result" } @@ -242,18 +257,25 @@ "est.score_" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Simple DML with Discrete Treatments with the _OrthoLearner" + ] + }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 73, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "" + "" ] }, - "execution_count": 11, + "execution_count": 73, "metadata": {}, "output_type": "execute_result" } @@ -304,7 +326,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 74, "metadata": {}, "outputs": [ { @@ -313,7 +335,7 @@ "array([[1.00123159]])" ] }, - "execution_count": 12, + "execution_count": 74, "metadata": {}, "output_type": "execute_result" } @@ -324,7 +346,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 75, "metadata": {}, "outputs": [ { @@ -333,7 +355,7 @@ "array([1.00123159])" ] }, - "execution_count": 13, + "execution_count": 75, "metadata": {}, "output_type": "execute_result" } @@ -344,7 +366,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 76, "metadata": {}, "outputs": [ { @@ -353,7 +375,7 @@ "0.002569588332146612" ] }, - "execution_count": 14, + "execution_count": 76, "metadata": {}, "output_type": "execute_result" } @@ -364,7 +386,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 77, "metadata": {}, "outputs": [ { @@ -373,7 +395,7 @@ "1.0012315874866917" ] }, - "execution_count": 15, + "execution_count": 77, "metadata": {}, "output_type": "execute_result" } @@ -384,7 +406,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 78, "metadata": {}, "outputs": [ { @@ -394,7 +416,7 @@ " normalize=False)" ] }, - "execution_count": 16, + "execution_count": 78, "metadata": {}, "output_type": "execute_result" } @@ -405,7 +427,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 79, "metadata": {}, "outputs": [ { @@ -414,7 +436,7 @@ "array([1.00123159])" ] }, - "execution_count": 17, + "execution_count": 79, "metadata": {}, "output_type": "execute_result" } @@ -425,7 +447,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 80, "metadata": {}, "outputs": [ { @@ -434,7 +456,7 @@ "0.0031604059708364245" ] }, - "execution_count": 18, + "execution_count": 80, "metadata": {}, "output_type": "execute_result" } @@ -445,7 +467,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 81, "metadata": {}, "outputs": [ { @@ -454,7 +476,7 @@ "array([1.28171346, 0.03749846, 0.10120681])" ] }, - "execution_count": 30, + "execution_count": 81, "metadata": {}, "output_type": "execute_result" } @@ -464,89 +486,24 @@ ] }, { - "cell_type": "code", - "execution_count": 31, + "cell_type": "markdown", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[[1.64721067 1.68104532 0.07458696]]\n", - "[1.00979079]\n", - "(array([0.99962896]), array([1.01995263]))\n", - "[1.00096897 0.00524782]\n", - "(array([9.95750740e-01, 5.09187652e-05]), array([1.0061872 , 0.01044472]))\n", - "[1.00979079]\n" - ] - } - ], "source": [ - "from econml.dml import LinearDMLCateEstimator\n", - "from sklearn.preprocessing import PolynomialFeatures\n", - "from sklearn.linear_model import LinearRegression, LassoCV, Lasso\n", - "import numpy as np\n", - "X = np.random.normal(size=(100000, 3))\n", - "y = X[:, 0] + np.random.normal(size=(100000,))\n", - "est = LinearDMLCateEstimator(model_y=LinearRegression(), model_t=LinearRegression())\n", - "est.fit(y, X[:, 0], X[:, [1]], X[:, 2:], inference='statsmodels')\n", - "print(X[:1])\n", - "print(est.effect(X[:1, [1]]))\n", - "print(est.effect_interval(X[:1, [1]]))\n", - "print(est.coef_)\n", - "print(est.coef__interval())\n", - "print(est.const_marginal_effect(X[:1, [1]]))" + "# Simple DML with the _RLearner" ] }, { "cell_type": "code", - "execution_count": 32, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[[ 0.89256841 0.27329632 1.20577242 -1.19175328 0.15390398 -0.23511674\n", - " -0.55378668 -2.64852199 0.73121095 -0.73312251]]\n", - "[[0.99664838 0.99664838]]\n", - "[[ 0.99040771 0.00504131 0. -0.0034206 -0.01300061 -0.01033077\n", - " -0.01303745 0.00259075]\n", - " [ 0.99040771 0.00504131 0. -0.0034206 -0.01300061 -0.01033077\n", - " -0.01303745 0.00259075]]\n", - "[[0.99664838 0.99664838]]\n" - ] - } - ], - "source": [ - "from econml.dml import SparseLinearDMLCateEstimator\n", - "from sklearn.preprocessing import PolynomialFeatures\n", - "from sklearn.linear_model import LinearRegression, LassoCV, Lasso, MultiTaskLassoCV\n", - "import numpy as np\n", - "X = np.random.normal(size=(5000, 10))\n", - "y = X[:, 0] + np.random.normal(size=(5000,))\n", - "est = SparseLinearDMLCateEstimator(model_y=MultiTaskLassoCV(cv=3), model_t=LassoCV(cv=3),\n", - " model_final=MultiTaskLassoCV(cv=3, fit_intercept=False))\n", - "dx = 8\n", - "est.fit(np.hstack([y.reshape(-1,1), y.reshape(-1,1)]), X[:, 0], X[:, 1:dx], X[:, dx:])\n", - "print(X[:1])\n", - "print(est.effect(X[:1, 1:dx]))\n", - "print(est.model_final.coef_)\n", - "print(est.const_marginal_effect(X[:1, 1:dx]))" - ] - }, - { - "cell_type": "code", - "execution_count": 101, + "execution_count": 82, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "" + "" ] }, - "execution_count": 101, + "execution_count": 82, "metadata": {}, "output_type": "execute_result" } @@ -582,7 +539,7 @@ }, { "cell_type": "code", - "execution_count": 102, + "execution_count": 83, "metadata": {}, "outputs": [ { @@ -591,7 +548,7 @@ "array([0.99963147])" ] }, - "execution_count": 102, + "execution_count": 83, "metadata": {}, "output_type": "execute_result" } @@ -602,7 +559,7 @@ }, { "cell_type": "code", - "execution_count": 110, + "execution_count": 84, "metadata": {}, "outputs": [ { @@ -611,7 +568,7 @@ "array([9.99631472])" ] }, - "execution_count": 110, + "execution_count": 84, "metadata": {}, "output_type": "execute_result" } @@ -622,7 +579,7 @@ }, { "cell_type": "code", - "execution_count": 111, + "execution_count": 85, "metadata": {}, "outputs": [ { @@ -631,7 +588,7 @@ "9.736380060274913e-05" ] }, - "execution_count": 111, + "execution_count": 85, "metadata": {}, "output_type": "execute_result" } @@ -642,7 +599,7 @@ }, { "cell_type": "code", - "execution_count": 105, + "execution_count": 86, "metadata": {}, "outputs": [ { @@ -652,7 +609,7 @@ " normalize=False)" ] }, - "execution_count": 105, + "execution_count": 86, "metadata": {}, "output_type": "execute_result" } @@ -663,7 +620,7 @@ }, { "cell_type": "code", - "execution_count": 106, + "execution_count": 87, "metadata": {}, "outputs": [ { @@ -672,7 +629,7 @@ "array([0.99963147])" ] }, - "execution_count": 106, + "execution_count": 87, "metadata": {}, "output_type": "execute_result" } @@ -683,7 +640,7 @@ }, { "cell_type": "code", - "execution_count": 107, + "execution_count": 88, "metadata": {}, "outputs": [ { @@ -692,7 +649,7 @@ "9.826232040878233e-05" ] }, - "execution_count": 107, + "execution_count": 88, "metadata": {}, "output_type": "execute_result" } @@ -703,7 +660,7 @@ }, { "cell_type": "code", - "execution_count": 108, + "execution_count": 89, "metadata": {}, "outputs": [ { @@ -715,7 +672,7 @@ " normalize=False)]" ] }, - "execution_count": 108, + "execution_count": 89, "metadata": {}, "output_type": "execute_result" } @@ -726,7 +683,7 @@ }, { "cell_type": "code", - "execution_count": 109, + "execution_count": 90, "metadata": {}, "outputs": [ { @@ -738,7 +695,7 @@ " normalize=False)]" ] }, - "execution_count": 109, + "execution_count": 90, "metadata": {}, "output_type": "execute_result" } @@ -747,18 +704,25 @@ "[mdl._model for mdl in est.models_t]" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Checking All Good with LinearDMLCateEstimator" + ] + }, { "cell_type": "code", - "execution_count": 114, + "execution_count": 91, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "" + "" ] }, - "execution_count": 114, + "execution_count": 91, "metadata": {}, "output_type": "execute_result" } @@ -776,7 +740,7 @@ }, { "cell_type": "code", - "execution_count": 115, + "execution_count": 92, "metadata": {}, "outputs": [ { @@ -785,7 +749,7 @@ "array([1.00089549])" ] }, - "execution_count": 115, + "execution_count": 92, "metadata": {}, "output_type": "execute_result" } @@ -796,7 +760,7 @@ }, { "cell_type": "code", - "execution_count": 116, + "execution_count": 93, "metadata": {}, "outputs": [ { @@ -805,7 +769,7 @@ "(array([0.99404817]), array([1.0077428]))" ] }, - "execution_count": 116, + "execution_count": 93, "metadata": {}, "output_type": "execute_result" } @@ -816,7 +780,7 @@ }, { "cell_type": "code", - "execution_count": 117, + "execution_count": 94, "metadata": {}, "outputs": [ { @@ -828,7 +792,7 @@ " normalize=False)]" ] }, - "execution_count": 117, + "execution_count": 94, "metadata": {}, "output_type": "execute_result" } @@ -839,7 +803,7 @@ }, { "cell_type": "code", - "execution_count": 118, + "execution_count": 95, "metadata": {}, "outputs": [ { @@ -851,7 +815,7 @@ " normalize=False)]" ] }, - "execution_count": 118, + "execution_count": 95, "metadata": {}, "output_type": "execute_result" } @@ -862,7 +826,7 @@ }, { "cell_type": "code", - "execution_count": 119, + "execution_count": 96, "metadata": {}, "outputs": [ { @@ -871,7 +835,7 @@ "array([1.00089549])" ] }, - "execution_count": 119, + "execution_count": 96, "metadata": {}, "output_type": "execute_result" } @@ -882,7 +846,7 @@ }, { "cell_type": "code", - "execution_count": 121, + "execution_count": 97, "metadata": {}, "outputs": [ { @@ -891,7 +855,7 @@ "(array([0.99404817]), array([1.0077428]))" ] }, - "execution_count": 121, + "execution_count": 97, "metadata": {}, "output_type": "execute_result" } @@ -902,7 +866,27 @@ }, { "cell_type": "code", - "execution_count": 127, + "execution_count": 98, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 98, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "est.model_final" + ] + }, + { + "cell_type": "code", + "execution_count": 99, "metadata": {}, "outputs": [ { @@ -911,7 +895,7 @@ "array([0.00416287])" ] }, - "execution_count": 127, + "execution_count": 99, "metadata": {}, "output_type": "execute_result" } @@ -920,6 +904,194 @@ "est.model_final._param_stderr" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# DR Learner Based on _OrthoLearner" + ] + }, + { + "cell_type": "code", + "execution_count": 100, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 100, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import numpy as np\n", + "from sklearn.linear_model import LinearRegression\n", + "from econml._ortho_learner import _OrthoLearner\n", + "class ModelNuisance:\n", + " def __init__(self, model_t, model_y):\n", + " self._model_t = model_t\n", + " self._model_y = model_y\n", + "\n", + " def fit(self, Y, T, X=None, W=None):\n", + " self._model_t.fit(np.hstack([X, W]), np.matmul(T, np.arange(1, T.shape[1]+1)))\n", + " self._model_y.fit(np.hstack([T, X, W]), Y)\n", + " return self\n", + "\n", + " def predict(self, Y, T, X=None, W=None):\n", + " propensities = self._model_t.predict_proba(np.hstack([X, W]))\n", + " Y_pred = np.zeros((T.shape[0], T.shape[1] + 1))\n", + " T_counter = np.zeros(T.shape)\n", + " Y_pred[:, 0] = self._model_y.predict(np.hstack([T_counter, X, W]))\n", + " Y_pred[:, 0] += (Y - Y_pred[:, 0]) * np.all(T==0, axis=1) / propensities[:, 0]\n", + " for t in np.arange(T.shape[1]):\n", + " T_counter = np.zeros(T.shape)\n", + " T_counter[:, t] = 1\n", + " Y_pred[:, t + 1] = self._model_y.predict(np.hstack([T_counter, X, W]))\n", + " Y_pred[:, t + 1] += (Y - Y_pred[:, t + 1]) * (T[:, t] == 1) / propensities[:, t + 1]\n", + " return Y_pred\n", + "\n", + "class ModelFinal:\n", + "\n", + " def __init__(self):\n", + " return\n", + "\n", + " def fit(self, Y, T, X=None, W=None, nuisances=None):\n", + " Y_pred, = nuisances\n", + " self.models_cate = [LinearRegression().fit(X, Y_pred[:, t] - Y_pred[:, 0])\n", + " for t in np.arange(1, Y_pred.shape[1])]\n", + " return self\n", + "\n", + " def predict(self, X=None):\n", + " # theta needs to be of dimension (1, d_t) if T is (n, d_t)\n", + " return np.array([mdl.predict(X) for mdl in self.models_cate]).T\n", + "\n", + "np.random.seed(123)\n", + "X = np.random.normal(size=(1000, 3))\n", + "import scipy.special\n", + "from sklearn.linear_model import LogisticRegression\n", + "T = np.random.binomial(1, scipy.special.expit(X[:, 0]))\n", + "sigma = 0.01\n", + "y = (1 + .5*X[:, 0]) * T + X[:, 0] + np.random.normal(0, sigma, size=(1000,))\n", + "est = _OrthoLearner(ModelNuisance(LogisticRegression(solver='lbfgs'), LinearRegression()), ModelFinal(),\n", + " n_splits=2, discrete_treatment=True, random_state=None)\n", + "est.fit(y, T, X=X[:, [0]], W=X[:, 1:])" + ] + }, + { + "cell_type": "code", + "execution_count": 101, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[ 0.43111746],\n", + " [ 0.21377249],\n", + " [-0.26176354],\n", + " [ 0.54421168],\n", + " [ 1.76258919],\n", + " [ 0.76761463],\n", + " [ 1.51079693],\n", + " [ 1.76224943],\n", + " [ 0.34418752],\n", + " [ 0.2538734 ]])" + ] + }, + "execution_count": 101, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "est.const_marginal_effect(X[:10, [0]])" + ] + }, + { + "cell_type": "code", + "execution_count": 102, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[array([[ 1.07859458, -0.09378512, -0.16819498]]),\n", + " array([[ 0.87520635, -0.07950399, 0.06037872]])]" + ] + }, + "execution_count": 102, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "[mdl._model_t.coef_ for mdl in est.models_nuisance]" + ] + }, + { + "cell_type": "code", + "execution_count": 103, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[array([ 0.99703455, 1.25799456, -0.00554411, 0.00216083]),\n", + " array([ 1.04547656e+00, 1.21020962e+00, 7.94069500e-04, -9.68240609e-03])]" + ] + }, + "execution_count": 103, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "[mdl._model_y.coef_ for mdl in est.models_nuisance]" + ] + }, + { + "cell_type": "code", + "execution_count": 104, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([0.51667104])" + ] + }, + "execution_count": 104, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "est.model_final.models_cate[0].coef_" + ] + }, + { + "cell_type": "code", + "execution_count": 105, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.9920313527587243" + ] + }, + "execution_count": 105, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "est.model_final.models_cate[0].intercept_" + ] + }, { "cell_type": "code", "execution_count": null, From d5bc747f3d0d5da02b8dcf246eccae38106e661a Mon Sep 17 00:00:00 2001 From: Vasilis Date: Mon, 4 Nov 2019 19:32:36 -0500 Subject: [PATCH 42/64] improving docstring by removing unncessary highlight of code --- econml/_ortho_learner.py | 20 ++++---------------- econml/_rlearner.py | 20 ++++---------------- 2 files changed, 8 insertions(+), 32 deletions(-) diff --git a/econml/_ortho_learner.py b/econml/_ortho_learner.py index 9f3484f1c..3faa667f0 100644 --- a/econml/_ortho_learner.py +++ b/econml/_ortho_learner.py @@ -197,10 +197,7 @@ class _OrthoLearner(TreatmentExpansionMixin, LinearCateEstimator): ---------- model_nuisance: estimator The estimator for fitting the nuisance function. Must implement - `fit` and `predict` methods that both have signatures: - - .. highlight:: python - .. code-block:: python + `fit` and `predict` methods that both have signatures:: model_nuisance.fit(Y, T, X=X, W=W, Z=Z, sample_weight=sample_weight, sample_var=sample_var) @@ -214,10 +211,7 @@ class _OrthoLearner(TreatmentExpansionMixin, LinearCateEstimator): one-hot encoding of the original input `T`, excluding the first column of the one-hot. model_final: estimator for fitting the response residuals to the features and treatment residuals - Must implement `fit` and `predict` methods that must have signatures: - - .. highlight:: python - .. code-block:: python + Must implement `fit` and `predict` methods that must have signatures:: model_final.fit(Y, T, X=X, W=W, Z=Z, nuisances=nuisances, sample_weight=sample_weight, sample_var=sample_var) @@ -262,10 +256,7 @@ class _OrthoLearner(TreatmentExpansionMixin, LinearCateEstimator): method on top of the :py:class:`~econml._ortho_learner._OrthoLearner` class, for expository purposes. For a more elaborate implementation of a Double Machine Learning child class of the class :py:class:`~econml._ortho_learner._OrthoLearner` checkout :py:class:`~econml.dml.DMLCateEstimator` - and its child classes. - - .. highlight:: python - .. code-block:: python + and its child classes:: import numpy as np from sklearn.linear_model import LinearRegression @@ -317,10 +308,7 @@ def score(self, Y, T, W=None, nuisances=None): array([1.02364993]) The following example shows how to do double machine learning with discrete treatments, using - the _OrthoLearner. - - .. highlight:: python - .. code-block:: python + the _OrthoLearner:: class ModelNuisance: def __init__(self, model_t, model_y): diff --git a/econml/_rlearner.py b/econml/_rlearner.py index dc9eec559..2c4881531 100644 --- a/econml/_rlearner.py +++ b/econml/_rlearner.py @@ -53,10 +53,7 @@ class _RLearner(_OrthoLearner): model_y: estimator of E[Y | X, W] The estimator for fitting the response to the features and controls. Must implement `fit` and `predict` methods. Unlike sklearn estimators both methods must - take an extra second argument (the controls), i.e. : - - .. highlight:: python - .. code-block:: python + take an extra second argument (the controls), i.e. :: model_y.fit(X, W, Y, sample_weight=sample_weight) model_y.predict(X, W) @@ -64,10 +61,7 @@ class _RLearner(_OrthoLearner): model_t: estimator of E[T | X, W] The estimator for fitting the treatment to the features and controls. Must implement `fit` and `predict` methods. Unlike sklearn estimators both methods must - take an extra second argument (the controls), i.e. : - - .. highlight:: python - .. code-block:: python + take an extra second argument (the controls), i.e. :: model_t.fit(X, W, T, sample_weight=sample_weight) model_t.predict(X, W) @@ -75,10 +69,7 @@ class _RLearner(_OrthoLearner): model_final: estimator for fitting the response residuals to the features and treatment residuals Must implement `fit` and `predict` methods. Unlike sklearn estimators the fit methods must take an extra second argument (the treatment residuals). Predict, on the other hand, - should just take the features and return the constant marginal effect. More, concretely: - - .. highlight:: python - .. code-block:: python + should just take the features and return the constant marginal effect. More, concretely:: model_final.fit(X, T_res, Y_res, sample_weight=sample_weight, sample_var=sample_var) @@ -114,10 +105,7 @@ class _RLearner(_OrthoLearner): The example code below implements a very simple version of the double machine learning method on top of the :py:class:`~econml._ortho_learner._RLearner` class, for expository purposes. For a more elaborate implementation of a Double Machine Learning child class of the class - checkout :py:class:`~econml.dml.DMLCateEstimator` and its child classes. - - .. highlight:: python - .. code-block:: python + checkout :py:class:`~econml.dml.DMLCateEstimator` and its child classes:: import numpy as np from sklearn.linear_model import LinearRegression From b56048b27592b285256e291f1178b4c84be272ba Mon Sep 17 00:00:00 2001 From: Vasilis Date: Mon, 4 Nov 2019 19:34:06 -0500 Subject: [PATCH 43/64] made sample weight and sample var keyword only in the _RLearner --- econml/_rlearner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/econml/_rlearner.py b/econml/_rlearner.py index 2c4881531..f4a9e08be 100644 --- a/econml/_rlearner.py +++ b/econml/_rlearner.py @@ -248,7 +248,7 @@ def score(self, Y, T, X=None, W=None, Z=None, nuisances=None, sample_weight=None super().__init__(ModelNuisance(model_y, model_t), ModelFinal(model_final), discrete_treatment, n_splits, random_state) - def fit(self, Y, T, X=None, W=None, sample_weight=None, sample_var=None, *, inference=None): + def fit(self, Y, T, X=None, W=None, *, sample_weight=None, sample_var=None, inference=None): """ Estimate the counterfactual model from data, i.e. estimates function: math: `\\theta(\\cdot)`. From 5d2288fc37f672e76c90c90938b63a70bf864e2e Mon Sep 17 00:00:00 2001 From: Vasilis Date: Mon, 4 Nov 2019 19:37:15 -0500 Subject: [PATCH 44/64] improved docstring in cate estimator --- econml/cate_estimator.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/econml/cate_estimator.py b/econml/cate_estimator.py index 8025eec7b..2faed1dce 100644 --- a/econml/cate_estimator.py +++ b/econml/cate_estimator.py @@ -122,7 +122,7 @@ def effect(self, X=None, *, T0, T1): @abc.abstractmethod def marginal_effect(self, T, X=None): """ - Calculate the heterogeneous marginal effect marginal(T, X). + Calculate the heterogeneous marginal effect marginal_tau(T, X). The marginal effect is calculated around a base treatment point conditional on a vector of features on a set of m test samples {Tᵢ, Xᵢ}. @@ -208,7 +208,7 @@ def const_marginal_effect(self, X=None): def effect(self, X=None, *, T0, T1): """ - Calculate the heterogeneous treatment effect τ(·,·,·). + Calculate the heterogeneous treatment effect tau(X, T0, T1). The effect is calculatred between the two treatment points conditional on a vector of features on a set of m test samples {T0ᵢ, T1ᵢ, Xᵢ}. @@ -250,7 +250,7 @@ def effect(self, X=None, *, T0, T1): def marginal_effect(self, T, X=None): """ - Calculate the heterogeneous marginal effect ∂τ(·,·). + Calculate the heterogeneous marginal effect marginal_tau(T, X). The marginal effect is calculated around a base treatment point conditional on a vector of features on a set of m test samples {Tᵢ, Xᵢ}. From fafbb29628883a0aa06d3130160e39115516f493 Mon Sep 17 00:00:00 2001 From: Vasilis Date: Mon, 4 Nov 2019 19:41:45 -0500 Subject: [PATCH 45/64] added comment regarding going back from the one-hot encoding to the label version. --- econml/dml.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/econml/dml.py b/econml/dml.py index e13ce369a..5194cc28c 100644 --- a/econml/dml.py +++ b/econml/dml.py @@ -117,6 +117,9 @@ def _combine(self, X, W, n_samples, fitting=True): def fit(self, X, W, Target, sample_weight=None): if (not self._is_Y) and discrete_treatment: + # In this case, the Target is the one-hot-encoding of the treatment variable + # We need to go back to the label representation of the one-hot so as to call + # the classifier. Target = np.matmul(Target, np.arange(1, Target.shape[1] + 1)).flatten() if sample_weight is not None: From 5bd9c24e41ea8f5d0cc0ea1a7401e06f582cb1b8 Mon Sep 17 00:00:00 2001 From: Vasilis Date: Mon, 4 Nov 2019 19:43:04 -0500 Subject: [PATCH 46/64] improved formatting of conditional subtraction --- econml/dml.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/econml/dml.py b/econml/dml.py index 5194cc28c..9a4db027e 100644 --- a/econml/dml.py +++ b/econml/dml.py @@ -167,7 +167,8 @@ def predict(self, X): F = self._featurizer.transform(X) if X is not None else np.ones((1, 1)) F, T = broadcast_unit_treatments(F, self._d_t[0] if self._d_t else 1) prediction = self._model.predict(cross_product(F, T)) - prediction -= self._intercept if self._intercept is not None else 0 + if self._intercept is not None: + prediction -= self._intercept return reshape_treatmentwise_effects(prediction, self._d_t, self._d_y) From cfe03c164563276cf4d0dec170c6684dd986f7b3 Mon Sep 17 00:00:00 2001 From: Vasilis Date: Mon, 4 Nov 2019 19:51:30 -0500 Subject: [PATCH 47/64] typo in ortho learner docstring --- econml/_ortho_learner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/econml/_ortho_learner.py b/econml/_ortho_learner.py index 3faa667f0..de64a83fd 100644 --- a/econml/_ortho_learner.py +++ b/econml/_ortho_learner.py @@ -167,7 +167,7 @@ class _OrthoLearner(TreatmentExpansionMixin, LinearCateEstimator): the class would also work if :math:`\\theta(X)` is the solution to a set of moment equations that also depend on nuisance functions :math:`h`. - 2. To estimate :math:`\\theta(X)` we first fit the h functions can calculate :math:`h(V_i)` for each sample + 2. To estimate :math:`\\theta(X)` we first fit the h functions and calculate :math:`h(V_i)` for each sample :math:`i` in a crossfit manner: - Estimate a model :math:`\\hat{h}` for h using half of the data From 0b7b932d7f3143c6bc17a6f9b489ef4ce7eda9b0 Mon Sep 17 00:00:00 2001 From: Vasilis Date: Mon, 4 Nov 2019 20:15:14 -0500 Subject: [PATCH 48/64] docstring updated to improve on crossfit training description --- econml/_ortho_learner.py | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/econml/_ortho_learner.py b/econml/_ortho_learner.py index de64a83fd..ddfc09e9f 100644 --- a/econml/_ortho_learner.py +++ b/econml/_ortho_learner.py @@ -170,15 +170,26 @@ class _OrthoLearner(TreatmentExpansionMixin, LinearCateEstimator): 2. To estimate :math:`\\theta(X)` we first fit the h functions and calculate :math:`h(V_i)` for each sample :math:`i` in a crossfit manner: - - Estimate a model :math:`\\hat{h}` for h using half of the data - - Evaluate the learned :math:`\\hat{h}` model on the other half - - Or more generally in a KFold fit/predict approach with more folds + - Let (F1_train, F1_test), ..., (Fk_train, Fk_test) be any KFold partition + of the data, where Ft_train, Ft_test are subsets of indices of the input samples and such that + F1_train is disjoint from F1_test. Typically, the sets F1_test, ..., Fk_test will form a partition + of all the data-sets (i.e. they will be disjoint and their union will be the set of all input indices). + However, this is not enforced and does not need to be the case. For instance, in a time series split + F0_train could be a prefix of the data and F0_test the suffix. Moreover, for simplicity of code we + even allow the F1_test to not be disjoint. In that case, the model trained on the last fold whose + Ft_test contains index i will be used to calculate the nuisance. Typically, Ft_test will be created + by a KFold split, i.e. if S1, ..., Sk is any partition of the data, then Ft_train is the set of + all indices except St and Ft_test = St. + - Then for each t in [1, ..., k] + - Estimate a model :math:`\\hat{h}` for h using Ft_train + - Evaluate the learned :math:`\\hat{h}` model on the data in Ft_test and use that value + as the nuisance value :math:`\\hat{h}(V_i)` for the indices i in Ft_test 3. Estimate the model for :math:`\\theta(X)` by minimizing the empirical (regularized) plugin loss: .. math :: - \\mathbb{E}_n[\\ell(V; \\theta(X), \\hat{h}(V))] + \\mathbb{E}_n[\\ell(V; \\theta(X), \\hat{h}(V))]\ + = \\frac{1}{n} \\sum_{i=1}^n \\sum_i \\ell(V_i; \\theta(X_i), \\hat{h}(V_i)) The method is a bit more general in that the final step does not need to be a loss minimization step. The class takes as input a model for fitting an estimate of the nuisance h given a set of samples From c78a78de04f95de90ac456e6caac6245aaca5021 Mon Sep 17 00:00:00 2001 From: Vasilis Date: Mon, 4 Nov 2019 20:35:24 -0500 Subject: [PATCH 49/64] added checks that crossfit fold structure is valid and raising appropriate error messages. Added tests that check that these messages are correctly raised. --- econml/_ortho_learner.py | 9 ++++++++- econml/tests/test_ortho_learner.py | 22 ++++++++++++++++++++++ 2 files changed, 30 insertions(+), 1 deletion(-) diff --git a/econml/_ortho_learner.py b/econml/_ortho_learner.py index ddfc09e9f..28840826a 100644 --- a/econml/_ortho_learner.py +++ b/econml/_ortho_learner.py @@ -122,6 +122,11 @@ def predict(self, X, y, W=None): fitted_inds = [] for idx, (train_idxs, test_idxs) in enumerate(folds): model_list.append(clone(model, safe=False)) + if len(np.intersect1d(train_idxs, test_idxs)) > 0: + raise AttributeError("Invalid crossfitting fold structure." + + "Train and test indices of each fold must be disjoint.") + if len(np.intersect1d(fitted_inds, test_idxs)) > 0: + raise AttributeError("Invalid crossfitting fold structure. The same index appears in two test folds.") fitted_inds = np.concatenate((fitted_inds, test_idxs)) args_train = () @@ -179,7 +184,9 @@ class _OrthoLearner(TreatmentExpansionMixin, LinearCateEstimator): even allow the F1_test to not be disjoint. In that case, the model trained on the last fold whose Ft_test contains index i will be used to calculate the nuisance. Typically, Ft_test will be created by a KFold split, i.e. if S1, ..., Sk is any partition of the data, then Ft_train is the set of - all indices except St and Ft_test = St. + all indices except St and Ft_test = St. If the union of the Ft_test is not all the data, then only the + subset of the data in the union of the Ft_test sets will be used in the final stage calculation for + :math:`\\theta(X)`. - Then for each t in [1, ..., k] - Estimate a model :math:`\\hat{h}` for h using Ft_train - Evaluate the learned :math:`\\hat{h}` model on the data in Ft_test and use that value diff --git a/econml/tests/test_ortho_learner.py b/econml/tests/test_ortho_learner.py index ad8595f77..293c2b68a 100644 --- a/econml/tests/test_ortho_learner.py +++ b/econml/tests/test_ortho_learner.py @@ -9,6 +9,7 @@ import numpy as np import unittest import joblib +import pytest class TestOrthoLearner(unittest.TestCase): @@ -45,6 +46,27 @@ def predict(self, X, y, W=None): [np.testing.assert_allclose(coef_, mdl._model.coef_, rtol=0, atol=0.08) for mdl in model_list] np.testing.assert_array_equal(fitted_inds, np.arange(X.shape[0])) + np.random.seed(123) + X = np.random.normal(size=(5000, 3)) + y = X[:, 0] + np.random.normal(size=(5000,)) + folds = [(np.arange(X.shape[0] // 2), np.arange(X.shape[0] // 2, X.shape[0])), + (np.arange(X.shape[0] // 2), np.arange(X.shape[0] // 2, X.shape[0]))] + model = Lasso(alpha=0.01) + with pytest.raises(AttributeError) as e_info: + nuisance, model_list, fitted_inds = _crossfit(Wrapper(model), + folds, + X, y, W=y, Z=None) + + np.random.seed(123) + X = np.random.normal(size=(5000, 3)) + y = X[:, 0] + np.random.normal(size=(5000,)) + folds = [(np.arange(X.shape[0]), np.arange(X.shape[0]))] + model = Lasso(alpha=0.01) + with pytest.raises(AttributeError) as e_info: + nuisance, model_list, fitted_inds = _crossfit(Wrapper(model), + folds, + X, y, W=y, Z=None) + def test_ol(self): class ModelNuisance: From 42384d066a4aaf3db773d3eb4f61492d8c592b62 Mon Sep 17 00:00:00 2001 From: Vasilis Date: Mon, 4 Nov 2019 20:45:04 -0500 Subject: [PATCH 50/64] better docstring in ortho learner --- econml/_ortho_learner.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/econml/_ortho_learner.py b/econml/_ortho_learner.py index 28840826a..6998098cd 100644 --- a/econml/_ortho_learner.py +++ b/econml/_ortho_learner.py @@ -88,7 +88,6 @@ def _crossfit(model, folds, *args, **kwargs): Examples -------- - .. highlight:: python .. code-block:: python import numpy as np @@ -273,7 +272,7 @@ class _OrthoLearner(TreatmentExpansionMixin, LinearCateEstimator): The example code below implements a very simple version of the double machine learning method on top of the :py:class:`~econml._ortho_learner._OrthoLearner` class, for expository purposes. For a more elaborate implementation of a Double Machine Learning child class of the class - :py:class:`~econml._ortho_learner._OrthoLearner` checkout :py:class:`~econml.dml.DMLCateEstimator` + :py:class:`~econml._ortho_learner._OrthoLearner` check out :py:class:`~econml.dml.DMLCateEstimator` and its child classes:: import numpy as np From c4e0cf4ec26d4ca9755883b6b10c702e7182657a Mon Sep 17 00:00:00 2001 From: Vasilis Date: Mon, 4 Nov 2019 20:46:43 -0500 Subject: [PATCH 51/64] W in example in ortholearner docstring --- econml/_ortho_learner.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/econml/_ortho_learner.py b/econml/_ortho_learner.py index 6998098cd..4ea72608b 100644 --- a/econml/_ortho_learner.py +++ b/econml/_ortho_learner.py @@ -359,14 +359,14 @@ def score(self, Y, T, W=None, nuisances=None): return np.mean((Y_res - self.model.predict(T_res.reshape(-1, 1)))**2) np.random.seed(123) - X = np.random.normal(size=(100, 3)) + W = np.random.normal(size=(100, 3)) import scipy.special from sklearn.linear_model import LogisticRegression - T = np.random.binomial(1, scipy.special.expit(X[:, 0])) - y = T + X[:, 0] + np.random.normal(0, 0.01, size=(100,)) + T = np.random.binomial(1, scipy.special.expit(W[:, 0])) + y = T + W[:, 0] + np.random.normal(0, 0.01, size=(100,)) est = _OrthoLearner(ModelNuisance(LogisticRegression(solver='lbfgs'), LinearRegression()), ModelFinal(), n_splits=2, discrete_treatment=True, random_state=None) - est.fit(y, T, W=X) + est.fit(y, T, W=W) >>> est.score_ 0.0031604059708364245 @@ -374,7 +374,7 @@ def score(self, Y, T, W=None, nuisances=None): array([[1.00123159]]) >>> est.effect() array([1.00123159]) - >>> est.score(y, T, W=X) + >>> est.score(y, T, W=W) 0.002569588332146612 >>> est.model_final.model.coef_[0] 1.0012315874866917 From 4ad02cc0b67ad05ae349fcfc65fbb3d696738f3b Mon Sep 17 00:00:00 2001 From: Vasilis Date: Mon, 4 Nov 2019 20:51:00 -0500 Subject: [PATCH 52/64] simplified input checks code in ortho learner --- econml/_ortho_learner.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/econml/_ortho_learner.py b/econml/_ortho_learner.py index 4ea72608b..9f2a31abc 100644 --- a/econml/_ortho_learner.py +++ b/econml/_ortho_learner.py @@ -407,11 +407,8 @@ def __init__(self, model_nuisance, model_final, def _check_input_dims(self, Y, T, X=None, W=None, Z=None, sample_weight=None, sample_var=None): assert shape(Y)[0] == shape(T)[0], "Dimension mis-match!" - assert (X is None) or (X.shape[0] == Y.shape[0]), "Dimension mis-match!" - assert (W is None) or (W.shape[0] == Y.shape[0]), "Dimension mis-match!" - assert (Z is None) or (Z.shape[0] == Y.shape[0]), "Dimension mis-match!" - assert (sample_weight is None) or (sample_weight.shape[0] == Y.shape[0]), "Dimension mis-match!" - assert (sample_var is None) or (sample_var.shape[0] == Y.shape[0]), "Dimension mis-match!" + for arr in [X, W, Z, sample_weight, sample_var]: + assert (arr is None) or (arr.shape[0] == Y.shape[0]), "Dimension mismatch" self._d_x = X.shape[1:] if X is not None else None def _check_fitted_dims(self, X): From 1268bb33c3f6d9c48d1f98190dd6d4b7f2cf2005 Mon Sep 17 00:00:00 2001 From: Vasilis Date: Mon, 4 Nov 2019 20:54:54 -0500 Subject: [PATCH 53/64] updated docstring regarding how we call the split method with all the varialbes in the ortho learner --- econml/_ortho_learner.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/econml/_ortho_learner.py b/econml/_ortho_learner.py index 9f2a31abc..780cd115a 100644 --- a/econml/_ortho_learner.py +++ b/econml/_ortho_learner.py @@ -258,7 +258,8 @@ class _OrthoLearner(TreatmentExpansionMixin, LinearCateEstimator): :class:`~sklearn.model_selection.KFold` is used (with a random shuffle in either case). - Unless an iterable is used, we call `split(X,T)` to generate the splits. + Unless an iterable is used, we call `split(concat[Z, W, X], T)` to generate the splits. If all + Z, W, X are None, then we call `split(ones((T.shape[0], 1)), T)`. random_state: int, :class:`~numpy.random.mtrand.RandomState` instance or None If int, random_state is the seed used by the random number generator; From 17f35323f083fff86b574acad875a8c6258b7c78 Mon Sep 17 00:00:00 2001 From: Vasilis Date: Mon, 4 Nov 2019 21:02:48 -0500 Subject: [PATCH 54/64] added some more tests related to input None's in _crossfit --- econml/tests/test_ortho_learner.py | 51 ++++++++++++++++++++++++++++-- 1 file changed, 48 insertions(+), 3 deletions(-) diff --git a/econml/tests/test_ortho_learner.py b/econml/tests/test_ortho_learner.py index 293c2b68a..42c1c2916 100644 --- a/econml/tests/test_ortho_learner.py +++ b/econml/tests/test_ortho_learner.py @@ -21,11 +21,11 @@ class Wrapper: def __init__(self, model): self._model = model - def fit(self, X, y, W=None): + def fit(self, X, y, Q, W=None): self._model.fit(X, y) return self - def predict(self, X, y, W=None): + def predict(self, X, y, Q, W=None): return self._model.predict(X), y - self._model.predict(X), X np.random.seed(123) @@ -35,17 +35,62 @@ def predict(self, X, y, W=None): model = Lasso(alpha=0.01) nuisance, model_list, fitted_inds = _crossfit(Wrapper(model), folds, - X, y, W=y, Z=None) + X, y, y, W=y, Z=None) np.testing.assert_allclose(nuisance[0][folds[0][1]], model.fit(X[folds[0][0]], y[folds[0][0]]).predict(X[folds[0][1]])) np.testing.assert_allclose(nuisance[0][folds[0][0]], model.fit(X[folds[0][1]], y[folds[0][1]]).predict(X[folds[0][0]])) + coef_ = np.zeros(X.shape[1]) + coef_[0] = 1 + [np.testing.assert_allclose(coef_, mdl._model.coef_, rtol=0, atol=0.08) for mdl in model_list] + np.testing.assert_array_equal(fitted_inds, np.arange(X.shape[0])) + np.random.seed(123) + X = np.random.normal(size=(5000, 3)) + y = X[:, 0] + np.random.normal(size=(5000,)) + folds = list(KFold(2).split(X, y)) + model = Lasso(alpha=0.01) + nuisance, model_list, fitted_inds = _crossfit(Wrapper(model), + folds, + X, y, None, W=y, Z=None) + np.testing.assert_allclose(nuisance[0][folds[0][1]], + model.fit(X[folds[0][0]], y[folds[0][0]]).predict(X[folds[0][1]])) + np.testing.assert_allclose(nuisance[0][folds[0][0]], + model.fit(X[folds[0][1]], y[folds[0][1]]).predict(X[folds[0][0]])) coef_ = np.zeros(X.shape[1]) coef_[0] = 1 [np.testing.assert_allclose(coef_, mdl._model.coef_, rtol=0, atol=0.08) for mdl in model_list] np.testing.assert_array_equal(fitted_inds, np.arange(X.shape[0])) + np.random.seed(123) + X = np.random.normal(size=(5000, 3)) + y = X[:, 0] + np.random.normal(size=(5000,)) + folds = list(KFold(2).split(X, y)) + model = Lasso(alpha=0.01) + nuisance, model_list, fitted_inds = _crossfit(Wrapper(model), + folds, + X, y, None, W=None, Z=None) + np.testing.assert_allclose(nuisance[0][folds[0][1]], + model.fit(X[folds[0][0]], y[folds[0][0]]).predict(X[folds[0][1]])) + np.testing.assert_allclose(nuisance[0][folds[0][0]], + model.fit(X[folds[0][1]], y[folds[0][1]]).predict(X[folds[0][0]])) + coef_ = np.zeros(X.shape[1]) + coef_[0] = 1 + [np.testing.assert_allclose(coef_, mdl._model.coef_, rtol=0, atol=0.08) for mdl in model_list] + np.testing.assert_array_equal(fitted_inds, np.arange(X.shape[0])) + + class Wrapper: + + def __init__(self, model): + self._model = model + + def fit(self, X, y, W=None): + self._model.fit(X, y) + return self + + def predict(self, X, y, W=None): + return self._model.predict(X), y - self._model.predict(X), X + np.random.seed(123) X = np.random.normal(size=(5000, 3)) y = X[:, 0] + np.random.normal(size=(5000,)) From 3bb63d238e2be6a55a07f460559543fd95a13e52 Mon Sep 17 00:00:00 2001 From: Vasilis Date: Mon, 4 Nov 2019 21:08:22 -0500 Subject: [PATCH 55/64] added description in docstring of _param_var of StatsModelsLinearRegression in utilities --- econml/utilities.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/econml/utilities.py b/econml/utilities.py index 2e4f97f3f..fa7ba087d 100644 --- a/econml/utilities.py +++ b/econml/utilities.py @@ -1635,7 +1635,8 @@ def intercept_(self): @property def _param_var(self): - """ + """The covariance matrix of all the parameters in the regression (including the intercept + as the first parameter). Returns ------- From d8d524760af7bd1cf10da5f62c3948a05e701dbe Mon Sep 17 00:00:00 2001 From: Vasilis Date: Mon, 4 Nov 2019 21:12:02 -0500 Subject: [PATCH 56/64] docstrings in utilities --- econml/utilities.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/econml/utilities.py b/econml/utilities.py index fa7ba087d..7b0816134 100644 --- a/econml/utilities.py +++ b/econml/utilities.py @@ -1601,7 +1601,8 @@ def predict(self, X): @property def coef_(self): - """Get the model's coefficients on the covariates. + """ + Get the model's coefficients on the covariates. Returns ------- @@ -1623,7 +1624,8 @@ def coef_(self): @property def intercept_(self): - """Get the intercept(s) (or 0 if no intercept was fit). + """ + Get the intercept(s) (or 0 if no intercept was fit). Returns ------- @@ -1635,7 +1637,8 @@ def intercept_(self): @property def _param_var(self): - """The covariance matrix of all the parameters in the regression (including the intercept + """ + The covariance matrix of all the parameters in the regression (including the intercept as the first parameter). Returns @@ -1653,6 +1656,7 @@ def _param_var(self): @property def _param_stderr(self): """ + The standard error of each parameter that was estimated. Returns ------- From 6c38a0be076a30537e431aa9096fd18c78356164 Mon Sep 17 00:00:00 2001 From: Vasilis Date: Mon, 4 Nov 2019 21:47:28 -0500 Subject: [PATCH 57/64] removed private members from doc autosummary template. --- doc/_templates/autosummary/module.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/doc/_templates/autosummary/module.rst b/doc/_templates/autosummary/module.rst index 83c4f1748..2a255286c 100644 --- a/doc/_templates/autosummary/module.rst +++ b/doc/_templates/autosummary/module.rst @@ -3,6 +3,5 @@ .. automodule:: {{ fullname }} :members: - :private-members: :inherited-members: :show-inheritance: From a046515ce8737dc96bc35952969638ea86870c6f Mon Sep 17 00:00:00 2001 From: Vasilis Date: Mon, 4 Nov 2019 21:56:25 -0500 Subject: [PATCH 58/64] hardcoding the autosummary template for ortho_learner and rlearner for now until better solution is found. Also creating separate headers for private and public modules on sidebar of docs --- doc/_autosummary/econml._ortho_learner.rst | 8 ++++++++ doc/_autosummary/econml._rlearner.rst | 8 ++++++++ doc/reference.rst | 15 +++++++++++---- 3 files changed, 27 insertions(+), 4 deletions(-) create mode 100644 doc/_autosummary/econml._ortho_learner.rst create mode 100644 doc/_autosummary/econml._rlearner.rst diff --git a/doc/_autosummary/econml._ortho_learner.rst b/doc/_autosummary/econml._ortho_learner.rst new file mode 100644 index 000000000..3af8d80fd --- /dev/null +++ b/doc/_autosummary/econml._ortho_learner.rst @@ -0,0 +1,8 @@ +econml._ortho_learner +===================== + +.. automodule:: econml._ortho_learner + :members: + :private-members: + :inherited-members: + :show-inheritance: \ No newline at end of file diff --git a/doc/_autosummary/econml._rlearner.rst b/doc/_autosummary/econml._rlearner.rst new file mode 100644 index 000000000..f848dbe1c --- /dev/null +++ b/doc/_autosummary/econml._rlearner.rst @@ -0,0 +1,8 @@ +econml._rlearner +================ + +.. automodule:: econml._rlearner + :members: + :private-members: + :inherited-members: + :show-inheritance: \ No newline at end of file diff --git a/doc/reference.rst b/doc/reference.rst index 5f0fe5821..65f6464b0 100644 --- a/doc/reference.rst +++ b/doc/reference.rst @@ -1,5 +1,5 @@ -Module reference -================ +Public Module Reference +======================= .. autosummary:: :toctree: _autosummary @@ -8,8 +8,6 @@ Module reference econml.cate_estimator econml.deepiv econml.dgp - econml._ortho_learner - econml._rlearner econml.dml econml.inference econml.ortho_forest @@ -17,3 +15,12 @@ Module reference econml.metalearners econml.two_stage_least_squares econml.utilities + +Private Module Reference +======================== + +.. autosummary:: + :toctree: _autosummary + + econml._ortho_learner + econml._rlearner \ No newline at end of file From 6ee1944fa7dad09d6278733c6335a6ad9d47e9f6 Mon Sep 17 00:00:00 2001 From: Vasilis Date: Mon, 4 Nov 2019 22:01:21 -0500 Subject: [PATCH 59/64] removed the OrthoLearner testing notebook --- notebooks/OrthoLearner.ipynb | 1126 ---------------------------------- 1 file changed, 1126 deletions(-) delete mode 100644 notebooks/OrthoLearner.ipynb diff --git a/notebooks/OrthoLearner.ipynb b/notebooks/OrthoLearner.ipynb deleted file mode 100644 index 89b03a773..000000000 --- a/notebooks/OrthoLearner.ipynb +++ /dev/null @@ -1,1126 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 64, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The autoreload extension is already loaded. To reload it, use:\n", - " %reload_ext autoreload\n" - ] - } - ], - "source": [ - "%load_ext autoreload\n", - "%autoreload 2" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Testing _crossfit Function" - ] - }, - { - "cell_type": "code", - "execution_count": 65, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "(array([-1.1057289 , -1.53756637, -2.4518278 , ..., 1.10628792,\n", - " -1.82966233, -1.78227335]),)\n", - "[<__main__.Wrapper object at 0x12b7c2940>, <__main__.Wrapper object at 0x12b85ef28>]\n" - ] - }, - { - "data": { - "text/plain": [ - "array([ 0, 1, 2, ..., 4997, 4998, 4999])" - ] - }, - "execution_count": 65, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "import numpy as np\n", - "from sklearn.model_selection import KFold\n", - "from sklearn.linear_model import Lasso\n", - "from econml._ortho_learner import _crossfit\n", - "\n", - "class Wrapper:\n", - " def __init__(self, model):\n", - " self._model = model\n", - " def fit(self, X, y, W=None):\n", - " self._model.fit(X, y)\n", - " return self\n", - " def predict(self, X, y, W=None):\n", - " return self._model.predict(X)\n", - "np.random.seed(123)\n", - "X = np.random.normal(size=(5000, 3))\n", - "y = X[:, 0] + np.random.normal(size=(5000,))\n", - "folds = list(KFold(2).split(X, y))\n", - "model = Lasso(alpha=0.01)\n", - "nuisance, model_list, fitted_inds = _crossfit(Wrapper(model),\n", - " folds,\n", - " X, y, W=y, Z=None)\n", - "print(nuisance)\n", - "print(model_list)\n", - "fitted_inds" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Simple DML with the _OrthoLearner" - ] - }, - { - "cell_type": "code", - "execution_count": 66, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 66, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "import numpy as np\n", - "from sklearn.linear_model import LinearRegression\n", - "from econml._ortho_learner import _OrthoLearner\n", - "class ModelNuisance:\n", - " def __init__(self, model_t, model_y):\n", - " self._model_t = model_t\n", - " self._model_y = model_y\n", - " def fit(self, Y, T, W=None):\n", - " self._model_t.fit(W, T)\n", - " self._model_y.fit(W, Y)\n", - " return self\n", - " def predict(self, Y, T, W=None):\n", - " return Y - self._model_y.predict(W), T - self._model_t.predict(W)\n", - "class ModelFinal:\n", - " def __init__(self):\n", - " return\n", - " def fit(self, Y, T, W=None, nuisances=None):\n", - " Y_res, T_res = nuisances\n", - " self.model = LinearRegression(fit_intercept=False).fit(T_res.reshape(-1, 1), Y_res)\n", - " return self\n", - " def predict(self, X=None):\n", - " return self.model.coef_[0]\n", - " def score(self, Y, T, W=None, nuisances=None):\n", - " Y_res, T_res = nuisances\n", - " return np.mean((Y_res - self.model.predict(T_res.reshape(-1, 1)))**2)\n", - "np.random.seed(123)\n", - "X = np.random.normal(size=(100, 3))\n", - "y = X[:, 0] + X[:, 1] + np.random.normal(0, 0.1, size=(100,))\n", - "est = _OrthoLearner(ModelNuisance(LinearRegression(), LinearRegression()),\n", - " ModelFinal(),\n", - " n_splits=2, discrete_treatment=False, random_state=None)\n", - "est.fit(y, X[:, 0], W=X[:, 1:])" - ] - }, - { - "cell_type": "code", - "execution_count": 67, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "1.0236499258047582" - ] - }, - "execution_count": 67, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "est.const_marginal_effect()" - ] - }, - { - "cell_type": "code", - "execution_count": 68, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([1.02364993])" - ] - }, - "execution_count": 68, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "est.effect(T0=0, T1=1)" - ] - }, - { - "cell_type": "code", - "execution_count": 69, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0.00727995424098179" - ] - }, - "execution_count": 69, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "est.score(y, X[:, 0], W=X[:, 1:])" - ] - }, - { - "cell_type": "code", - "execution_count": 70, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "LinearRegression(copy_X=True, fit_intercept=False, n_jobs=None,\n", - " normalize=False)" - ] - }, - "execution_count": 70, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "est.model_final.model" - ] - }, - { - "cell_type": "code", - "execution_count": 71, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([1.02364993])" - ] - }, - "execution_count": 71, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "est.model_final.model.coef_" - ] - }, - { - "cell_type": "code", - "execution_count": 72, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0.007568302109999707" - ] - }, - "execution_count": 72, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "est.score_" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Simple DML with Discrete Treatments with the _OrthoLearner" - ] - }, - { - "cell_type": "code", - "execution_count": 73, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 73, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "class ModelNuisance:\n", - " def __init__(self, model_t, model_y):\n", - " self._model_t = model_t\n", - " self._model_y = model_y\n", - "\n", - " def fit(self, Y, T, W=None):\n", - " self._model_t.fit(W, np.matmul(T, np.arange(1, T.shape[1]+1)))\n", - " self._model_y.fit(W, Y)\n", - " return self\n", - "\n", - " def predict(self, Y, T, W=None):\n", - " return Y - self._model_y.predict(W), T - self._model_t.predict_proba(W)[:, 1:]\n", - "\n", - "class ModelFinal:\n", - "\n", - " def __init__(self):\n", - " return\n", - "\n", - " def fit(self, Y, T, W=None, nuisances=None):\n", - " Y_res, T_res = nuisances\n", - " self.model = LinearRegression(fit_intercept=False).fit(T_res.reshape(-1, 1), Y_res)\n", - " return self\n", - "\n", - " def predict(self):\n", - " # theta needs to be of dimension (1, d_t) if T is (n, d_t)\n", - " return np.array([[self.model.coef_[0]]])\n", - "\n", - " def score(self, Y, T, W=None, nuisances=None):\n", - " Y_res, T_res = nuisances\n", - " return np.mean((Y_res - self.model.predict(T_res.reshape(-1, 1)))**2)\n", - "\n", - "np.random.seed(123)\n", - "X = np.random.normal(size=(100, 3))\n", - "import scipy.special\n", - "from sklearn.linear_model import LogisticRegression\n", - "T = np.random.binomial(1, scipy.special.expit(X[:, 0]))\n", - "sigma = 0.01\n", - "y = T + X[:, 0] + np.random.normal(0, sigma, size=(100,))\n", - "est = _OrthoLearner(ModelNuisance(LogisticRegression(solver='lbfgs'), LinearRegression()), ModelFinal(),\n", - " n_splits=2, discrete_treatment=True, random_state=None)\n", - "est.fit(y, T, W=X)" - ] - }, - { - "cell_type": "code", - "execution_count": 74, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([[1.00123159]])" - ] - }, - "execution_count": 74, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "est.const_marginal_effect()" - ] - }, - { - "cell_type": "code", - "execution_count": 75, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([1.00123159])" - ] - }, - "execution_count": 75, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "est.effect()" - ] - }, - { - "cell_type": "code", - "execution_count": 76, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0.002569588332146612" - ] - }, - "execution_count": 76, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "est.score(y, T, W=X)" - ] - }, - { - "cell_type": "code", - "execution_count": 77, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "1.0012315874866917" - ] - }, - "execution_count": 77, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "est.model_final.model.coef_[0]" - ] - }, - { - "cell_type": "code", - "execution_count": 78, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "LinearRegression(copy_X=True, fit_intercept=False, n_jobs=None,\n", - " normalize=False)" - ] - }, - "execution_count": 78, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "est.model_final.model" - ] - }, - { - "cell_type": "code", - "execution_count": 79, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([1.00123159])" - ] - }, - "execution_count": 79, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "est.model_final.model.coef_" - ] - }, - { - "cell_type": "code", - "execution_count": 80, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0.0031604059708364245" - ] - }, - "execution_count": 80, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "est.score_" - ] - }, - { - "cell_type": "code", - "execution_count": 81, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([1.28171346, 0.03749846, 0.10120681])" - ] - }, - "execution_count": 81, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "est.models_nuisance[0]._model_y.coef_" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Simple DML with the _RLearner" - ] - }, - { - "cell_type": "code", - "execution_count": 82, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 82, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "import numpy as np\n", - "from sklearn.linear_model import LinearRegression\n", - "from econml._rlearner import _RLearner\n", - "from sklearn.base import clone\n", - "class ModelFirst:\n", - " def __init__(self, model):\n", - " self._model = clone(model, safe=False)\n", - " def fit(self, X, W, Y, sample_weight=None):\n", - " self._model.fit(np.hstack([X, W]), Y)\n", - " return self\n", - " def predict(self, X, W):\n", - " return self._model.predict(np.hstack([X, W]))\n", - "class ModelFinal:\n", - " def fit(self, X, T_res, Y_res, sample_weight=None, sample_var=None):\n", - " self.model = LinearRegression(fit_intercept=False).fit(X * T_res.reshape(-1, 1), Y_res)\n", - " return self\n", - " def predict(self, X):\n", - " return self.model.predict(X)\n", - "np.random.seed(123)\n", - "X = np.random.normal(size=(1000, 3))\n", - "y = X[:, 0] + X[:, 1] + np.random.normal(0, 0.01, size=(1000,))\n", - "est = _RLearner(ModelFirst(LinearRegression()),\n", - " ModelFirst(LinearRegression()),\n", - " ModelFinal(),\n", - " n_splits=2, discrete_treatment=False, random_state=None)\n", - "est.fit(y, X[:, 0], X=np.ones((X.shape[0], 1)), W=X[:, 1:])" - ] - }, - { - "cell_type": "code", - "execution_count": 83, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([0.99963147])" - ] - }, - "execution_count": 83, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "est.const_marginal_effect(np.ones((1,1)))" - ] - }, - { - "cell_type": "code", - "execution_count": 84, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([9.99631472])" - ] - }, - "execution_count": 84, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "est.effect(np.ones((1,1)), T0=0, T1=10)" - ] - }, - { - "cell_type": "code", - "execution_count": 85, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "9.736380060274913e-05" - ] - }, - "execution_count": 85, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "est.score(y, X[:, 0], X=np.ones((X.shape[0], 1)), W=X[:, 1:])" - ] - }, - { - "cell_type": "code", - "execution_count": 86, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "LinearRegression(copy_X=True, fit_intercept=False, n_jobs=None,\n", - " normalize=False)" - ] - }, - "execution_count": 86, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "est.model_final.model" - ] - }, - { - "cell_type": "code", - "execution_count": 87, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([0.99963147])" - ] - }, - "execution_count": 87, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "est.model_final.model.coef_" - ] - }, - { - "cell_type": "code", - "execution_count": 88, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "9.826232040878233e-05" - ] - }, - "execution_count": 88, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "est.score_" - ] - }, - { - "cell_type": "code", - "execution_count": 89, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,\n", - " normalize=False),\n", - " LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,\n", - " normalize=False)]" - ] - }, - "execution_count": 89, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "[mdl._model for mdl in est.models_y]" - ] - }, - { - "cell_type": "code", - "execution_count": 90, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,\n", - " normalize=False),\n", - " LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,\n", - " normalize=False)]" - ] - }, - "execution_count": 90, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "[mdl._model for mdl in est.models_t]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Checking All Good with LinearDMLCateEstimator" - ] - }, - { - "cell_type": "code", - "execution_count": 91, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 91, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from econml.dml import LinearDMLCateEstimator\n", - "\n", - "np.random.seed(123)\n", - "X = np.random.normal(size=(1000, 3))\n", - "y = X[:, 0] + X[:, 1] + np.random.normal(0, 0.01, size=(1000,))\n", - "est = LinearDMLCateEstimator(model_y=LinearRegression(),\n", - " model_t=LinearRegression())\n", - "est.fit(y, X[:, 0], W=X[:, 1:], inference='statsmodels')" - ] - }, - { - "cell_type": "code", - "execution_count": 92, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([1.00089549])" - ] - }, - "execution_count": 92, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "est.effect()" - ] - }, - { - "cell_type": "code", - "execution_count": 93, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(array([0.99404817]), array([1.0077428]))" - ] - }, - "execution_count": 93, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "est.effect_interval()" - ] - }, - { - "cell_type": "code", - "execution_count": 94, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,\n", - " normalize=False),\n", - " LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,\n", - " normalize=False)]" - ] - }, - "execution_count": 94, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "est.models_y" - ] - }, - { - "cell_type": "code", - "execution_count": 95, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,\n", - " normalize=False),\n", - " LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,\n", - " normalize=False)]" - ] - }, - "execution_count": 95, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "est.models_t" - ] - }, - { - "cell_type": "code", - "execution_count": 96, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([1.00089549])" - ] - }, - "execution_count": 96, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "est.coef_" - ] - }, - { - "cell_type": "code", - "execution_count": 97, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(array([0.99404817]), array([1.0077428]))" - ] - }, - "execution_count": 97, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "est.coef__interval()" - ] - }, - { - "cell_type": "code", - "execution_count": 98, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 98, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "est.model_final" - ] - }, - { - "cell_type": "code", - "execution_count": 99, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([0.00416287])" - ] - }, - "execution_count": 99, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "est.model_final._param_stderr" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# DR Learner Based on _OrthoLearner" - ] - }, - { - "cell_type": "code", - "execution_count": 100, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 100, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "import numpy as np\n", - "from sklearn.linear_model import LinearRegression\n", - "from econml._ortho_learner import _OrthoLearner\n", - "class ModelNuisance:\n", - " def __init__(self, model_t, model_y):\n", - " self._model_t = model_t\n", - " self._model_y = model_y\n", - "\n", - " def fit(self, Y, T, X=None, W=None):\n", - " self._model_t.fit(np.hstack([X, W]), np.matmul(T, np.arange(1, T.shape[1]+1)))\n", - " self._model_y.fit(np.hstack([T, X, W]), Y)\n", - " return self\n", - "\n", - " def predict(self, Y, T, X=None, W=None):\n", - " propensities = self._model_t.predict_proba(np.hstack([X, W]))\n", - " Y_pred = np.zeros((T.shape[0], T.shape[1] + 1))\n", - " T_counter = np.zeros(T.shape)\n", - " Y_pred[:, 0] = self._model_y.predict(np.hstack([T_counter, X, W]))\n", - " Y_pred[:, 0] += (Y - Y_pred[:, 0]) * np.all(T==0, axis=1) / propensities[:, 0]\n", - " for t in np.arange(T.shape[1]):\n", - " T_counter = np.zeros(T.shape)\n", - " T_counter[:, t] = 1\n", - " Y_pred[:, t + 1] = self._model_y.predict(np.hstack([T_counter, X, W]))\n", - " Y_pred[:, t + 1] += (Y - Y_pred[:, t + 1]) * (T[:, t] == 1) / propensities[:, t + 1]\n", - " return Y_pred\n", - "\n", - "class ModelFinal:\n", - "\n", - " def __init__(self):\n", - " return\n", - "\n", - " def fit(self, Y, T, X=None, W=None, nuisances=None):\n", - " Y_pred, = nuisances\n", - " self.models_cate = [LinearRegression().fit(X, Y_pred[:, t] - Y_pred[:, 0])\n", - " for t in np.arange(1, Y_pred.shape[1])]\n", - " return self\n", - "\n", - " def predict(self, X=None):\n", - " # theta needs to be of dimension (1, d_t) if T is (n, d_t)\n", - " return np.array([mdl.predict(X) for mdl in self.models_cate]).T\n", - "\n", - "np.random.seed(123)\n", - "X = np.random.normal(size=(1000, 3))\n", - "import scipy.special\n", - "from sklearn.linear_model import LogisticRegression\n", - "T = np.random.binomial(1, scipy.special.expit(X[:, 0]))\n", - "sigma = 0.01\n", - "y = (1 + .5*X[:, 0]) * T + X[:, 0] + np.random.normal(0, sigma, size=(1000,))\n", - "est = _OrthoLearner(ModelNuisance(LogisticRegression(solver='lbfgs'), LinearRegression()), ModelFinal(),\n", - " n_splits=2, discrete_treatment=True, random_state=None)\n", - "est.fit(y, T, X=X[:, [0]], W=X[:, 1:])" - ] - }, - { - "cell_type": "code", - "execution_count": 101, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([[ 0.43111746],\n", - " [ 0.21377249],\n", - " [-0.26176354],\n", - " [ 0.54421168],\n", - " [ 1.76258919],\n", - " [ 0.76761463],\n", - " [ 1.51079693],\n", - " [ 1.76224943],\n", - " [ 0.34418752],\n", - " [ 0.2538734 ]])" - ] - }, - "execution_count": 101, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "est.const_marginal_effect(X[:10, [0]])" - ] - }, - { - "cell_type": "code", - "execution_count": 102, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[array([[ 1.07859458, -0.09378512, -0.16819498]]),\n", - " array([[ 0.87520635, -0.07950399, 0.06037872]])]" - ] - }, - "execution_count": 102, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "[mdl._model_t.coef_ for mdl in est.models_nuisance]" - ] - }, - { - "cell_type": "code", - "execution_count": 103, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[array([ 0.99703455, 1.25799456, -0.00554411, 0.00216083]),\n", - " array([ 1.04547656e+00, 1.21020962e+00, 7.94069500e-04, -9.68240609e-03])]" - ] - }, - "execution_count": 103, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "[mdl._model_y.coef_ for mdl in est.models_nuisance]" - ] - }, - { - "cell_type": "code", - "execution_count": 104, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([0.51667104])" - ] - }, - "execution_count": 104, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "est.model_final.models_cate[0].coef_" - ] - }, - { - "cell_type": "code", - "execution_count": 105, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0.9920313527587243" - ] - }, - "execution_count": 105, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "est.model_final.models_cate[0].intercept_" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.1" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} From 203f9e5bd58f033a6a84e2c5f94e2529cf980fbd Mon Sep 17 00:00:00 2001 From: Date: Tue, 5 Nov 2019 11:24:04 -0500 Subject: [PATCH 60/64] improved cate estimator docstrings --- econml/cate_estimator.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/econml/cate_estimator.py b/econml/cate_estimator.py index 2faed1dce..c746343e8 100644 --- a/econml/cate_estimator.py +++ b/econml/cate_estimator.py @@ -11,7 +11,7 @@ from .bootstrap import BootstrapEstimator from .inference import BootstrapInference from .utilities import tensordot, ndim, reshape, shape -from .inference import StatsModelsInference +from .inference import StatsModelsInference, StatsModelsInferenceDiscrete class BaseCateEstimator(metaclass=abc.ABCMeta): @@ -96,10 +96,10 @@ def call(self, Y, T, *args, inference=None, **kwargs): @abc.abstractmethod def effect(self, X=None, *, T0, T1): """ - Calculate the heterogeneous treatment effect tau(X, T0, T1). + Calculate the heterogeneous treatment effect :mat:`\\tau(X, T0, T1)`. The effect is calculated between the two treatment points - conditional on a vector of features on a set of m test samples {T0ᵢ, T1ᵢ, Xᵢ}. + conditional on a vector of features on a set of m test samples :math:`{T0_i, T1_i, X_i}`. Parameters ---------- @@ -122,10 +122,10 @@ def effect(self, X=None, *, T0, T1): @abc.abstractmethod def marginal_effect(self, T, X=None): """ - Calculate the heterogeneous marginal effect marginal_tau(T, X). + Calculate the heterogeneous marginal effect :math:`\\partial\\tau(T, X)`. The marginal effect is calculated around a base treatment - point conditional on a vector of features on a set of m test samples {Tᵢ, Xᵢ}. + point conditional on a vector of features on a set of m test samples :math:`\{T_i, X_i\}`. Parameters ---------- @@ -208,10 +208,10 @@ def const_marginal_effect(self, X=None): def effect(self, X=None, *, T0, T1): """ - Calculate the heterogeneous treatment effect tau(X, T0, T1). + Calculate the heterogeneous treatment effect :mat:`\\tau(X, T0, T1)`. The effect is calculatred between the two treatment points - conditional on a vector of features on a set of m test samples {T0ᵢ, T1ᵢ, Xᵢ}. + conditional on a vector of features on a set of m test samples :math:`{T0_i, T1_i, X_i}`. Since this class assumes a linear effect, only the difference between T0ᵢ and T1ᵢ matters for this computation. @@ -250,10 +250,10 @@ def effect(self, X=None, *, T0, T1): def marginal_effect(self, T, X=None): """ - Calculate the heterogeneous marginal effect marginal_tau(T, X). + Calculate the heterogeneous marginal effect :math:`\\partial\\tau(T, X)`. The marginal effect is calculated around a base treatment - point conditional on a vector of features on a set of m test samples {Tᵢ, Xᵢ}. + point conditional on a vector of features on a set of m test samples :math:`\{T_i, X_i\}`. Since this class assumes a linear model, the base treatment is ignored in this calculation. Parameters From 2628ac1a05e13609164b747cf0ae0e5b8cd4d451 Mon Sep 17 00:00:00 2001 From: Date: Tue, 5 Nov 2019 11:26:50 -0500 Subject: [PATCH 61/64] cate estimator small bug between brunches --- econml/cate_estimator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/econml/cate_estimator.py b/econml/cate_estimator.py index c746343e8..b19133285 100644 --- a/econml/cate_estimator.py +++ b/econml/cate_estimator.py @@ -11,7 +11,7 @@ from .bootstrap import BootstrapEstimator from .inference import BootstrapInference from .utilities import tensordot, ndim, reshape, shape -from .inference import StatsModelsInference, StatsModelsInferenceDiscrete +from .inference import StatsModelsInference class BaseCateEstimator(metaclass=abc.ABCMeta): From 66084b378ae10b0f32be16f9bfe1c4a7c06a73a5 Mon Sep 17 00:00:00 2001 From: Date: Tue, 5 Nov 2019 11:27:56 -0500 Subject: [PATCH 62/64] cate estimator small bug between brunches --- econml/cate_estimator.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/econml/cate_estimator.py b/econml/cate_estimator.py index b19133285..62e90b148 100644 --- a/econml/cate_estimator.py +++ b/econml/cate_estimator.py @@ -96,7 +96,7 @@ def call(self, Y, T, *args, inference=None, **kwargs): @abc.abstractmethod def effect(self, X=None, *, T0, T1): """ - Calculate the heterogeneous treatment effect :mat:`\\tau(X, T0, T1)`. + Calculate the heterogeneous treatment effect :math:`\\tau(X, T0, T1)`. The effect is calculated between the two treatment points conditional on a vector of features on a set of m test samples :math:`{T0_i, T1_i, X_i}`. @@ -208,7 +208,7 @@ def const_marginal_effect(self, X=None): def effect(self, X=None, *, T0, T1): """ - Calculate the heterogeneous treatment effect :mat:`\\tau(X, T0, T1)`. + Calculate the heterogeneous treatment effect :math:`\\tau(X, T0, T1)`. The effect is calculatred between the two treatment points conditional on a vector of features on a set of m test samples :math:`{T0_i, T1_i, X_i}`. From dbd0f0c0b70525c381d0d9b03554a30dbf84dd84 Mon Sep 17 00:00:00 2001 From: Date: Tue, 5 Nov 2019 11:59:05 -0500 Subject: [PATCH 63/64] linting errors --- econml/cate_estimator.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/econml/cate_estimator.py b/econml/cate_estimator.py index 62e90b148..68527b5c0 100644 --- a/econml/cate_estimator.py +++ b/econml/cate_estimator.py @@ -99,7 +99,7 @@ def effect(self, X=None, *, T0, T1): Calculate the heterogeneous treatment effect :math:`\\tau(X, T0, T1)`. The effect is calculated between the two treatment points - conditional on a vector of features on a set of m test samples :math:`{T0_i, T1_i, X_i}`. + conditional on a vector of features on a set of m test samples :math:`\\{T0_i, T1_i, X_i\\}`. Parameters ---------- @@ -125,7 +125,7 @@ def marginal_effect(self, T, X=None): Calculate the heterogeneous marginal effect :math:`\\partial\\tau(T, X)`. The marginal effect is calculated around a base treatment - point conditional on a vector of features on a set of m test samples :math:`\{T_i, X_i\}`. + point conditional on a vector of features on a set of m test samples :math:`\\{T_i, X_i\\}`. Parameters ---------- @@ -211,7 +211,7 @@ def effect(self, X=None, *, T0, T1): Calculate the heterogeneous treatment effect :math:`\\tau(X, T0, T1)`. The effect is calculatred between the two treatment points - conditional on a vector of features on a set of m test samples :math:`{T0_i, T1_i, X_i}`. + conditional on a vector of features on a set of m test samples :math:`\\{T0_i, T1_i, X_i\\}`. Since this class assumes a linear effect, only the difference between T0ᵢ and T1ᵢ matters for this computation. @@ -253,7 +253,7 @@ def marginal_effect(self, T, X=None): Calculate the heterogeneous marginal effect :math:`\\partial\\tau(T, X)`. The marginal effect is calculated around a base treatment - point conditional on a vector of features on a set of m test samples :math:`\{T_i, X_i\}`. + point conditional on a vector of features on a set of m test samples :math:`\\{T_i, X_i\\}`. Since this class assumes a linear model, the base treatment is ignored in this calculation. Parameters From a27ccccf5436051823acb38a61d9ee4f7674c143 Mon Sep 17 00:00:00 2001 From: Date: Tue, 5 Nov 2019 12:02:29 -0500 Subject: [PATCH 64/64] linting errors --- econml/cate_estimator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/econml/cate_estimator.py b/econml/cate_estimator.py index 68527b5c0..37c4cf020 100644 --- a/econml/cate_estimator.py +++ b/econml/cate_estimator.py @@ -49,7 +49,7 @@ def _prefit(self, Y, T, *args, **kwargs): def fit(self, *args, inference=None, **kwargs): """ Estimate the counterfactual model from data, i.e. estimates functions - tau(X, T0, T1), marginal_tau(T, X). + :math:`\\tau(X, T0, T1)`, :math:`\\partial \\tau(T, X)`. Note that the signature of this method may vary in subclasses (e.g. classes that don't support instruments will not allow a `Z` argument)