Fix bug in marginal effect for 2SLS

py-why · Feb 12, 2020 · 827100e · 827100e
1 parent 70cd6d3
commit 827100e
Show file tree

Hide file tree

Showing 2 changed files with 141 additions and 7 deletions.
diff --git a/econml/tests/test_two_stage_least_squares.py b/econml/tests/test_two_stage_least_squares.py
@@ -9,8 +9,9 @@
 import pytest
 
 from econml.utilities import shape, reshape
-from econml.two_stage_least_squares import NonparametricTwoStageLeastSquares, HermiteFeatures
+from econml.two_stage_least_squares import (NonparametricTwoStageLeastSquares, HermiteFeatures, DPolynomialFeatures)
 from sklearn.linear_model import LinearRegression
+from sklearn.preprocessing import PolynomialFeatures
 
 
 class Test2SLS(unittest.TestCase):
@@ -53,11 +54,64 @@ def err(k, j):
             hf = HermiteFeatures(k, joint=j)
             m = LinearRegression()
             m.fit(hf.fit_transform(x[:n // 2, :]), y[:n // 2])
-            return ((y[n // 2:] - m.predict(hf.fit_transform(x[n // 2:, :])))**2).mean()
+            return ((y[n // 2:] - m.predict(hf.fit_transform(x[n // 2:, :]))) ** 2).mean()
+        # TODO: test something rather than just print...
         print([(k, j, err(k, j)) for k in range(2, 15) for j in [False, True]])
 
     def test_2sls_shape(self):
-        pass
+        n = 100
+
+        def make_random(d):
+            sz = (n, d) if d >= 0 else (n,)
+            return np.random.normal(size=sz)
+
+        for d_t in [1, 2]:
+            n_t = d_t if d_t > 0 else 1
+            for d_y in [1, 2]:
+                for d_x in [1, 5]:
+                    for d_z in [1, 2]:
+                        d_w = 1
+                        if d_z >= n_t:
+                            T, Y, X, Z, W = [make_random(d) for d in [d_t, d_y, d_x, d_z, d_w]]
+                            est = NonparametricTwoStageLeastSquares(
+                                t_featurizer=PolynomialFeatures(),
+                                x_featurizer=PolynomialFeatures(),
+                                z_featurizer=PolynomialFeatures(),
+                                dt_featurizer=DPolynomialFeatures())
+
+                            est.fit(Y, T, X, W, Z)
+                            eff = est.effect(X)
+                            marg_eff = est.marginal_effect(T, X)
+
+    def test_marg_eff(self):
+        X = np.random.normal(size=(5000, 2))
+        Z = np.random.normal(size=(5000, 2))
+        W = np.random.normal(size=(5000, 1))
+        # Note: no noise, just testing that we can exactly recover when we ought to be able to
+        T = np.hstack([np.cross(X, Z).reshape(-1, 1) + W, (np.prod(X, axis=1) + np.prod(Z, axis=1)).reshape(-1, 1)])
+        Y = X * T + X**2
+
+        est = NonparametricTwoStageLeastSquares(
+            t_featurizer=PolynomialFeatures(degree=2, interaction_only=False, include_bias=True),
+            x_featurizer=PolynomialFeatures(degree=2, interaction_only=False, include_bias=True),
+            z_featurizer=PolynomialFeatures(degree=2, interaction_only=False, include_bias=True),
+            dt_featurizer=DPolynomialFeatures(degree=2, interaction_only=False, include_bias=True))
+
+        est.fit(Y, T, X, W, Z)
+
+        # pick some arbitrary X
+        X_test = np.array([[0.3, 0.7],
+                           [0.2, 0.1]])
+        eff = est.effect(X_test)  # effect = (X * 1 + X^2) - (X * 0 + X^2) = X
+        np.testing.assert_almost_equal(eff, X_test)
+
+        # pick some arbitrary T
+        T_test = np.array([[-0.3, 0.1],
+                           [0.6, -1.2]])
+        marg_eff = est.marginal_effect(T_test, X_test)  # marg effect_{i,j} = X_i if i=j, 0 otherwise
+        marg_eff_truth = np.zeros((X_test.shape[0], Y.shape[1], T.shape[1]))
+        marg_eff_truth[:, range(X.shape[1]), range(X.shape[1])] = X_test[:, :]
+        np.testing.assert_almost_equal(marg_eff, marg_eff_truth)
 
     # TODO: this tests that we can run the method; how do we test that the results are reasonable?
     def test_2sls(self):

diff --git a/econml/two_stage_least_squares.py b/econml/two_stage_least_squares.py
@@ -1,7 +1,7 @@
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # Licensed under the MIT License.
 
-"""Provides a non - parametric two - stage least squares instrumental variable estimator."""
+"""Provides a non-parametric two-stage least squares instrumental variable estimator."""
 
 import numpy as np
 from copy import deepcopy
@@ -11,6 +11,7 @@
 from .cate_estimator import BaseCateEstimator, LinearCateEstimator
 from numpy.polynomial.hermite_e import hermeval
 from sklearn.base import TransformerMixin
+from sklearn.preprocessing import PolynomialFeatures
 from itertools import product
 
 
@@ -85,6 +86,77 @@ def transform(self, X):
         return reshape(np.hstack(columns), (n,) + (ncols,) * self._shift + (-1,))
 
 
+class DPolynomialFeatures(TransformerMixin):
+    """
+    Featurizer that returns the derivatives of :class:`~sklearn.preprocessing.PolynomialFeatures` features in
+    a way that's compativle with the expectations of :class:`.NonparametricTwoStageLeastSquares`'s
+    `dt_featurizer` parameter.
+
+    If the input has shape `(n, x)` and
+    :meth:`PolynomialFeatures.transform<sklearn.preprocessing.PolynomialFeatures.transform>` returns an output
+    of shape `(n, f)`, then :meth:`.transform` will return an array of shape `(n, x, f)`.
+
+    Parameters
+    ----------
+    degree: integer, default = 2
+        The degree of the polynomial features.
+
+    interaction_only: boolean, default = False
+        If true, only derivatives of interaction features are produced: features that are products of at most degree
+        distinct input features (so not `x[1] ** 2`, `x[0] * x[2] ** 3`, etc.).
+
+    include_bias: boolean, default = True
+        If True (default), then include the derivative of a bias column, the feature in which all polynomial powers
+        are zero.
+    """
+
+    def __init__(self, degree=2, interaction_only=False, include_bias=True):
+        self.F = PolynomialFeatures(degree=degree, interaction_only=interaction_only, include_bias=include_bias)
+
+    def fit(self, X, y=None):
+        """
+        Compute number of output features.
+
+        Parameters
+        ----------
+        X : array-like, shape (n_samples, n_features)
+            The data.
+        y : array, optional
+            Not used
+
+        Returns
+        -------
+        self : instance
+        """
+        return self
+
+    def transform(self, X):
+        """
+        Transform data to derivatives of polynomial features
+
+        Parameters
+        ----------
+        X: array-like, shape (n_samples, n_features)
+            The data to transform, row by row.
+
+        Returns
+        -------
+        XP: array-like, shape (n_samples, n_features, n_output_features)
+            The matrix of features, where `n_output_features` is the number of features that
+            would be returned from :class:`~sklearn.preprocessing.PolynomialFeatures`. 
+        """
+        self.F.fit(X)
+        powers = self.F.powers_
+        result = np.zeros(X.shape + (self.F.n_output_features_,))
+        for i in range(X.shape[1]):
+            p = powers.copy()
+            c = powers[:, i]
+            p[:, i] -= 1
+            M = np.float_power(X[:, np.newaxis, :], p[np.newaxis, :, :])
+            result[:, i, :] = c[np.newaxis, :] * np.prod(M, axis=-1)
+        return result
+
+
 def _add_ones(arr):
     """Add a column of ones to the front of an array."""
     return np.hstack([np.ones((shape(arr)[0], 1)), arr])
@@ -166,13 +238,21 @@ def fit(self, Y, T, X, W, Z, inference=None):
         # store number of columns of W so that we can create correctly shaped zero array in effect and marginal effect
         self._d_w = shape(W)[1]
         # store number of columns of T so that we can pass scalars to effect
+        # TODO: support vector T and Y
         self._d_t = shape(T)[1]
 
         # two stage approximation
         # first, get basis expansions of T, X, and Z
         ft_X = self._x_featurizer.fit_transform(X)
         ft_Z = self._z_featurizer.fit_transform(Z)
         ft_T = self._t_featurizer.fit_transform(T)
+        # TODO: is it right that the effective number of intruments is the
+        #       product of ft_X and ft_Z, not just ft_Z?
+        assert shape(ft_T)[1] <= shape(ft_X)[1] * shape(ft_Z)[1], ("There can be no more T features than the product "
+                                                                   "of the number of X and Z features; otherwise "
+                                                                   "there is not enough information to identify their "
+                                                                   "structure")
+
         # regress T expansion on X,Z expansions concatenated with W
         features = _add_ones(np.hstack([W, cross_product(ft_X, ft_Z)]))
         self._model_T.fit(features, ft_T)
@@ -223,7 +303,7 @@ def effect(self, X=None, T0=0, T1=1):
 
     def marginal_effect(self, T, X=None):
         """
-        Calculate the heterogeneous marginal effect ∂τ(·,·).
+        Calculate the heterogeneous marginal effect ∂τ(·, ·).
 
         The marginal effect is calculated around a base treatment
         point conditional on a vector of features on a set of m test samples {Tᵢ, Xᵢ}.
@@ -232,7 +312,7 @@ def marginal_effect(self, T, X=None):
         ----------
         T: (m × dₜ) matrix
             Base treatments for each sample
-        X: optional (m × dₓ) matrix
+        X: optional(m × dₓ) matrix
             Features for each sample
 
         Returns
@@ -250,7 +330,7 @@ def marginal_effect(self, T, X=None):
         ft_X = self._x_featurizer.fit_transform(X)
         n = shape(T)[0]
         dT = self._dt_featurizer.fit_transform(T)
-        W = np.zeros((n, self._d_w))
+        W = np.zeros((size(T), self._d_w))
         # dT should be an n×dₜ×fₜ array (but if T was a vector, or if there is only one feature,
         # dT may be only 2-dimensional)
         # promote dT to 3D if necessary (e.g. if T was a vector)