Skip to content

Commit

Permalink
Fix bug in marginal effect for 2SLS
Browse files Browse the repository at this point in the history
  • Loading branch information
kbattocchi committed Feb 14, 2020
1 parent 70cd6d3 commit aad78c1
Show file tree
Hide file tree
Showing 2 changed files with 141 additions and 7 deletions.
60 changes: 57 additions & 3 deletions econml/tests/test_two_stage_least_squares.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,9 @@
import pytest

from econml.utilities import shape, reshape
from econml.two_stage_least_squares import NonparametricTwoStageLeastSquares, HermiteFeatures
from econml.two_stage_least_squares import (NonparametricTwoStageLeastSquares, HermiteFeatures, DPolynomialFeatures)
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures


class Test2SLS(unittest.TestCase):
Expand Down Expand Up @@ -53,11 +54,64 @@ def err(k, j):
hf = HermiteFeatures(k, joint=j)
m = LinearRegression()
m.fit(hf.fit_transform(x[:n // 2, :]), y[:n // 2])
return ((y[n // 2:] - m.predict(hf.fit_transform(x[n // 2:, :])))**2).mean()
return ((y[n // 2:] - m.predict(hf.fit_transform(x[n // 2:, :]))) ** 2).mean()
# TODO: test something rather than just print...
print([(k, j, err(k, j)) for k in range(2, 15) for j in [False, True]])

def test_2sls_shape(self):
pass
n = 100

def make_random(d):
sz = (n, d) if d >= 0 else (n,)
return np.random.normal(size=sz)

for d_t in [1, 2]:
n_t = d_t if d_t > 0 else 1
for d_y in [1, 2]:
for d_x in [1, 5]:
for d_z in [1, 2]:
d_w = 1
if d_z >= n_t:
T, Y, X, Z, W = [make_random(d) for d in [d_t, d_y, d_x, d_z, d_w]]
est = NonparametricTwoStageLeastSquares(
t_featurizer=PolynomialFeatures(),
x_featurizer=PolynomialFeatures(),
z_featurizer=PolynomialFeatures(),
dt_featurizer=DPolynomialFeatures())

est.fit(Y, T, X, W, Z)
eff = est.effect(X)
marg_eff = est.marginal_effect(T, X)

def test_marg_eff(self):
X = np.random.normal(size=(5000, 2))
Z = np.random.normal(size=(5000, 2))
W = np.random.normal(size=(5000, 1))
# Note: no noise, just testing that we can exactly recover when we ought to be able to
T = np.hstack([np.cross(X, Z).reshape(-1, 1) + W, (np.prod(X, axis=1) + np.prod(Z, axis=1)).reshape(-1, 1)])
Y = X * T + X**2

est = NonparametricTwoStageLeastSquares(
t_featurizer=PolynomialFeatures(degree=2, interaction_only=False, include_bias=True),
x_featurizer=PolynomialFeatures(degree=2, interaction_only=False, include_bias=True),
z_featurizer=PolynomialFeatures(degree=2, interaction_only=False, include_bias=True),
dt_featurizer=DPolynomialFeatures(degree=2, interaction_only=False, include_bias=True))

est.fit(Y, T, X, W, Z)

# pick some arbitrary X
X_test = np.array([[0.3, 0.7],
[0.2, 0.1]])
eff = est.effect(X_test) # effect = (X * 1 + X^2) - (X * 0 + X^2) = X
np.testing.assert_almost_equal(eff, X_test)

# pick some arbitrary T
T_test = np.array([[-0.3, 0.1],
[0.6, -1.2]])
marg_eff = est.marginal_effect(T_test, X_test) # marg effect_{i,j} = X_i if i=j, 0 otherwise
marg_eff_truth = np.zeros((X_test.shape[0], Y.shape[1], T.shape[1]))
marg_eff_truth[:, range(X.shape[1]), range(X.shape[1])] = X_test[:, :]
np.testing.assert_almost_equal(marg_eff, marg_eff_truth)

# TODO: this tests that we can run the method; how do we test that the results are reasonable?
def test_2sls(self):
Expand Down
88 changes: 84 additions & 4 deletions econml/two_stage_least_squares.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.

"""Provides a non - parametric two - stage least squares instrumental variable estimator."""
"""Provides a non-parametric two-stage least squares instrumental variable estimator."""

import numpy as np
from copy import deepcopy
Expand All @@ -11,6 +11,7 @@
from .cate_estimator import BaseCateEstimator, LinearCateEstimator
from numpy.polynomial.hermite_e import hermeval
from sklearn.base import TransformerMixin
from sklearn.preprocessing import PolynomialFeatures
from itertools import product


Expand Down Expand Up @@ -85,6 +86,77 @@ def transform(self, X):
return reshape(np.hstack(columns), (n,) + (ncols,) * self._shift + (-1,))


class DPolynomialFeatures(TransformerMixin):
"""
Featurizer that returns the derivatives of :class:`~sklearn.preprocessing.PolynomialFeatures` features in
a way that's compativle with the expectations of :class:`.NonparametricTwoStageLeastSquares`'s
`dt_featurizer` parameter.
If the input has shape `(n, x)` and
:meth:`PolynomialFeatures.transform<sklearn.preprocessing.PolynomialFeatures.transform>` returns an output
of shape `(n, f)`, then :meth:`.transform` will return an array of shape `(n, x, f)`.
Parameters
----------
degree: integer, default = 2
The degree of the polynomial features.
interaction_only: boolean, default = False
If true, only derivatives of interaction features are produced: features that are products of at most degree
distinct input features (so not `x[1] ** 2`, `x[0] * x[2] ** 3`, etc.).
include_bias: boolean, default = True
If True (default), then include the derivative of a bias column, the feature in which all polynomial powers
are zero.
"""

def __init__(self, degree=2, interaction_only=False, include_bias=True):
self.F = PolynomialFeatures(degree=degree, interaction_only=interaction_only, include_bias=include_bias)

def fit(self, X, y=None):
"""
Compute number of output features.
Parameters
----------
X : array-like, shape (n_samples, n_features)
The data.
y : array, optional
Not used
Returns
-------
self : instance
"""
return self

def transform(self, X):
"""
Transform data to derivatives of polynomial features
Parameters
----------
X: array-like, shape (n_samples, n_features)
The data to transform, row by row.
Returns
-------
XP: array-like, shape (n_samples, n_features, n_output_features)
The matrix of features, where `n_output_features` is the number of features that
would be returned from :class:`~sklearn.preprocessing.PolynomialFeatures`.
"""
self.F.fit(X)
powers = self.F.powers_
result = np.zeros(X.shape + (self.F.n_output_features_,))
for i in range(X.shape[1]):
p = powers.copy()
c = powers[:, i]
p[:, i] -= 1
M = np.float_power(X[:, np.newaxis, :], p[np.newaxis, :, :])
result[:, i, :] = c[np.newaxis, :] * np.prod(M, axis=-1)
return result


def _add_ones(arr):
"""Add a column of ones to the front of an array."""
return np.hstack([np.ones((shape(arr)[0], 1)), arr])
Expand Down Expand Up @@ -166,13 +238,21 @@ def fit(self, Y, T, X, W, Z, inference=None):
# store number of columns of W so that we can create correctly shaped zero array in effect and marginal effect
self._d_w = shape(W)[1]
# store number of columns of T so that we can pass scalars to effect
# TODO: support vector T and Y
self._d_t = shape(T)[1]

# two stage approximation
# first, get basis expansions of T, X, and Z
ft_X = self._x_featurizer.fit_transform(X)
ft_Z = self._z_featurizer.fit_transform(Z)
ft_T = self._t_featurizer.fit_transform(T)
# TODO: is it right that the effective number of intruments is the
# product of ft_X and ft_Z, not just ft_Z?
assert shape(ft_T)[1] <= shape(ft_X)[1] * shape(ft_Z)[1], ("There can be no more T features than the product "
"of the number of X and Z features; otherwise "
"there is not enough information to identify their "
"structure")

# regress T expansion on X,Z expansions concatenated with W
features = _add_ones(np.hstack([W, cross_product(ft_X, ft_Z)]))
self._model_T.fit(features, ft_T)
Expand Down Expand Up @@ -223,7 +303,7 @@ def effect(self, X=None, T0=0, T1=1):

def marginal_effect(self, T, X=None):
"""
Calculate the heterogeneous marginal effect ∂τ(·,·).
Calculate the heterogeneous marginal effect ∂τ(·, ·).
The marginal effect is calculated around a base treatment
point conditional on a vector of features on a set of m test samples {Tᵢ, Xᵢ}.
Expand All @@ -232,7 +312,7 @@ def marginal_effect(self, T, X=None):
----------
T: (m × dₜ) matrix
Base treatments for each sample
X: optional (m × dₓ) matrix
X: optional(m × dₓ) matrix
Features for each sample
Returns
Expand All @@ -250,7 +330,7 @@ def marginal_effect(self, T, X=None):
ft_X = self._x_featurizer.fit_transform(X)
n = shape(T)[0]
dT = self._dt_featurizer.fit_transform(T)
W = np.zeros((n, self._d_w))
W = np.zeros((size(T), self._d_w))
# dT should be an n×dₜ×fₜ array (but if T was a vector, or if there is only one feature,
# dT may be only 2-dimensional)
# promote dT to 3D if necessary (e.g. if T was a vector)
Expand Down

0 comments on commit aad78c1

Please sign in to comment.