py-why · vsyrgkanis · Mar 9, 2021 · Jan 21, 2021 · Jan 21, 2021 · Jan 23, 2021
diff --git a/econml/_ortho_learner.py b/econml/_ortho_learner.py
@@ -28,6 +28,9 @@ class in this module implements the general logic in a very versatile way
 from collections import namedtuple
 from warnings import warn
 from abc import abstractmethod
+import inspect
+from collections import defaultdict
+import re
 
 import numpy as np
 from sklearn.base import clone
@@ -436,6 +439,33 @@ def __init__(self, *,
  self.mc_agg = mc_agg
  super().__init__()
 
+ @classmethod
+ def _get_param_names(cls):
+ """Get parameter names for the estimator"""
+ # fetch the constructor or the original constructor before
+ # deprecation wrapping if any
+ init = cls.__init__
+ if init is object.__init__:
+ # No explicit constructor to introspect
+ return []
+
+ # introspect the constructor arguments to find the model parameters
+ # to represent
+ init_signature = inspect.signature(init)
+ # Consider the constructor parameters excluding 'self'
+ parameters = [p for p in init_signature.parameters.values()
+ if p.name != 'self' and p.kind != p.VAR_KEYWORD]
+ for p in parameters:
+ if p.kind == p.VAR_POSITIONAL:
+ raise RuntimeError("ortho learner cate estimators should always "
+ "specify their parameters in the signature"
+ " of their __init__ (no varargs)."
+ " %s with constructor %s doesn't "
+ " follow this convention."
+ % (cls, init_signature))
+ # Extract and sort argument names excluding 'self'
+ return sorted([p.name for p in parameters])
+
  @abstractmethod
  def _gen_ortho_learner_model_nuisance(self):
  """ Must return a fresh instance of a nuisance model

diff --git a/econml/dml/causal_forest.py b/econml/dml/causal_forest.py
@@ -4,21 +4,24 @@
 from warnings import warn
 
 import numpy as np
+from sklearn.linear_model import LogisticRegressionCV
+from sklearn.base import clone, BaseEstimator
+from sklearn.preprocessing import FunctionTransformer
+from sklearn.pipeline import Pipeline
+from sklearn.model_selection import train_test_split
+from itertools import product
 from .dml import _BaseDML
 from .dml import _FirstStageWrapper, _FinalWrapper
 from ..sklearn_extensions.linear_model import WeightedLassoCVWrapper
 from ..sklearn_extensions.model_selection import WeightedStratifiedKFold
 from ..inference import NormalInferenceResults
 from ..inference._inference import Inference
-from sklearn.linear_model import LogisticRegressionCV
-from sklearn.base import clone, BaseEstimator
-from sklearn.preprocessing import FunctionTransformer
-from sklearn.pipeline import Pipeline
 from ..utilities import add_intercept, shape, check_inputs, _deprecate_positional
 from ..grf import CausalForest, MultiOutputGRF
 from .._cate_estimator import LinearCateEstimator
 from .._shap import _shap_explain_multitask_model_cate
 from .._ortho_learner import _OrthoLearner
+from ..score import RScorer
 
 
 class _CausalForestFinalWrapper(_FinalWrapper):
@@ -173,7 +176,7 @@ class CausalForestDML(_BaseDML):
 
  sum_{child} E[(Y - <theta(child), T> - beta(child))^2 | X=child] weight(child)
 
- Internally, for the case of more than two treatments or for the case of one treatment with
+ Internally, for the case of more than two treatments or for the case of two treatments with
  ``fit_intercept=True`` then this criterion is approximated by computationally simpler variants for
  computationaly purposes. In particular, it is replaced by:
 
@@ -394,7 +397,7 @@ def __init__(self, *,
  min_samples_leaf=5,
  min_weight_fraction_leaf=0.,
  min_var_fraction_leaf=None,
- min_var_leaf_on_val=True,
+ min_var_leaf_on_val=False,
  max_features="auto",
  min_impurity_decrease=0.,
  max_samples=.45,
@@ -498,7 +501,103 @@ def _gen_model_final(self):
  def _gen_rlearner_model_final(self):
  return _CausalForestFinalWrapper(self._gen_model_final(), False, self._gen_featurizer(), False)
 
+ def tune(self, Y, T, *, X=None, W=None,
+ sample_weight=None, sample_var=None, groups=None,
+ params='auto'):
+ """
+ Tunes the major hyperparameters of the final stage causal forest based on out-of-sample R-score
+ performance. It trains small forests of size 100 trees on a grid of parameters and tests the
+ out of sample R-score. After the function is called, then all parameters of `self` have been
+ set to the optimal hyperparameters found.
+
+ Parameters
+ ----------
+ Y: (n × d_y) matrix or vector of length n
+ Outcomes for each sample
+ T: (n × dₜ) matrix or vector of length n
+ Treatments for each sample
+ X: (n × dₓ) matrix
+ Features for each sample
+ W: optional (n × d_w) matrix
+ Controls for each sample
+ sample_weight: optional (n,) vector
+ Weights for each row
+ sample_var: optional (n, n_y) vector
+ Variance of sample, in case it corresponds to summary of many samples. Currently
+ not in use by this method (as inference method does not require sample variance info).
+ groups: (n,) vector, optional
+ All rows corresponding to the same group will be kept together during splitting.
+ If groups is not None, the `cv` argument passed to this class's initializer
+ must support a 'groups' argument to its split method.
+ params: dict or 'auto', optional (default='auto')
+ A dictionary that contains the grid of hyperparameters to try, i.e.
+ {'param1': [value1, value2, ...], 'param2': [value1, value2, ...], ...}
+ If `params='auto'`, then a default grid is used.
+
+ Returns
+ -------
+ self : CausalForestDML object
+ The tuned causal forest object. This is the same object (not a copy) as the original one, but where
+ all parameters of the object have been set to the best performing parameters from the tuning grid.
+ """
+ if params == 'auto':
+ params = {'max_samples': [.3, .5],
+ 'min_balancedness_tol': [.3, .5],
+ 'min_samples_leaf': [5, 50],
+ 'max_depth': [3, None],
+ 'min_var_fraction_leaf': [None, .01]}
+ else:
+ # If custom param grid, check that only estimator parameters are being altered
+ estimator_param_names = self._get_param_names()
+ for key in params.keys():
+ if key not in estimator_param_names:
+ raise ValueError("Parameter `{}` is not an estimator parameter.".format(key))
+
+ train, test = train_test_split(np.arange(Y.shape[0]), train_size=.7, random_state=self.random_state)
+ ytrain, yval, Ttrain, Tval = Y[train], Y[test], T[train], T[test]
+ Xtrain, Xval = (X[train], X[test]) if X is not None else (None, None)
+ Wtrain, Wval = (W[train], W[test]) if W is not None else (None, None)
+ groups_train, groups_val = (groups[train], groups[test]) if groups is not None else (None, None)
+ if sample_weight is not None:
+ sample_weight_train, sample_weight_val = sample_weight[train], sample_weight[test]
+ else:
+ sample_weight_train, sample_weight_val = None, None
+ if sample_var is not None:
+ sample_var_train, _ = sample_var[train], sample_var[test]
+ else:
+ sample_var_train, _ = None, None
+
+ est = clone(self, safe=False)
+ est.n_estimators = 100
+ est.inference = False
+
+ scorer = RScorer(model_y=est.model_y, model_t=est.model_t,
+ discrete_treatment=est.discrete_treatment, categories=est.categories,
+ cv=est.cv, mc_iters=est.mc_iters, mc_agg=est.mc_agg,
+ random_state=est.random_state)
+ scorer.fit(yval, Tval, X=Xval, W=Wval, sample_weight=sample_weight_val, groups=groups_val)
+
+ names = params.keys()
+ scores = []
+ for it, values in enumerate(product(*params.values())):
+ for key, value in zip(names, values):
+ setattr(est, key, value)
+ if it == 0:
+ est.fit(ytrain, Ttrain, X=Xtrain, W=Wtrain, sample_weight=sample_weight_train,
+ sample_var=sample_var_train, groups=groups_train, cache_values=True)
+ else:
+ est.refit_final()
+ scores.append((scorer.score(est), tuple(zip(names, values))))
+
+ bestind = np.argmax([s[0] for s in scores])
+ _, best_params = scores[bestind]
+ for key, value in best_params:
+ setattr(self, key, value)
+
+ return self
+
  # override only so that we can update the docstring to indicate support for `blb`
+
  @_deprecate_positional("X and W should be passed by keyword only. In a future release "
  "we will disallow passing X and W by position.", ['X', 'W'])
  def fit(self, Y, T, X=None, W=None, *, sample_weight=None, sample_var=None, groups=None,

diff --git a/econml/grf/classes.py b/econml/grf/classes.py
@@ -107,7 +107,7 @@ class CausalForest(BaseGRF):
 
  sum_{child} E[(Y - <theta(child), T> - beta(child))^2 | X=child] weight(child)
 
- Internally, for the case of more than two treatments or for the case of one treatment with
+ Internally, for the case of more than two treatments or for the case of two treatments with
  ``fit_intercept=True`` then this criterion is approximated by computationally simpler variants for
  computationaly purposes. In particular, it is replaced by::
 

diff --git a/econml/tests/test_dml.py b/econml/tests/test_dml.py
@@ -610,7 +610,7 @@ def true_fn(x):
  y_sum = np.concatenate((y1_sum, y2_sum)) # outcome
  n_sum = np.concatenate((n1_sum, n2_sum)) # number of summarized points
  var_sum = np.concatenate((var1_sum, var2_sum)) # variance of the summarized points
- for summarized, min_samples_leaf in [(False, 20), (True, 1)]:
+ for summarized, min_samples_leaf, tune in [(False, 20, False), (True, 1, False), (False, 20, True)]:
  est = CausalForestDML(model_y=GradientBoostingRegressor(n_estimators=30, min_samples_leaf=30),
  model_t=GradientBoostingClassifier(n_estimators=30, min_samples_leaf=30),
  discrete_treatment=True,
@@ -626,6 +626,8 @@ def true_fn(x):
  est.fit(y_sum, T_sum, X=X_sum[:, :4], W=X_sum[:, 4:],
  sample_weight=n_sum)
  else:
+ if tune:
+ est.tune(y, T, X=X[:, :4], W=X[:, 4:])
  est.fit(y, T, X=X[:, :4], W=X[:, 4:])
  X_test = np.array(list(itertools.product([0, 1], repeat=4)))
  point = est.effect(X_test)
@@ -650,6 +652,8 @@ def true_fn(x):
  est.fit(y_sum, T_sum, X=X_sum[:, :4], W=X_sum[:, 4:],
  sample_weight=n_sum)
  else:
+ if tune:
+ est.tune(y, T, X=X[:, :4], W=X[:, 4:], params={'max_samples': [.1, .3]})
  est.fit(y, T, X=X[:, :4], W=X[:, 4:])
  X_test = np.array(list(itertools.product([0, 1], repeat=4)))
  point = est.effect(X_test)

diff --git a/notebooks/Causal Forest and Orthogonal Random Forest Examples.ipynb b/notebooks/Causal Forest and Orthogonal Random Forest Examples.ipynb
diff --git a/notebooks/Double Machine Learning Examples.ipynb b/notebooks/Double Machine Learning Examples.ipynb
diff --git a/notebooks/ForestLearners Basic Example.ipynb b/notebooks/ForestLearners Basic Example.ipynb