Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix bug in marginal effect for 2SLS #220

Merged
merged 1 commit into from
Feb 14, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 57 additions & 3 deletions econml/tests/test_two_stage_least_squares.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,9 @@
import pytest

from econml.utilities import shape, reshape
from econml.two_stage_least_squares import NonparametricTwoStageLeastSquares, HermiteFeatures
from econml.two_stage_least_squares import (NonparametricTwoStageLeastSquares, HermiteFeatures, DPolynomialFeatures)
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures


class Test2SLS(unittest.TestCase):
Expand Down Expand Up @@ -53,11 +54,64 @@ def err(k, j):
hf = HermiteFeatures(k, joint=j)
m = LinearRegression()
m.fit(hf.fit_transform(x[:n // 2, :]), y[:n // 2])
return ((y[n // 2:] - m.predict(hf.fit_transform(x[n // 2:, :])))**2).mean()
return ((y[n // 2:] - m.predict(hf.fit_transform(x[n // 2:, :]))) ** 2).mean()
# TODO: test something rather than just print...
print([(k, j, err(k, j)) for k in range(2, 15) for j in [False, True]])

def test_2sls_shape(self):
pass
n = 100

def make_random(d):
sz = (n, d) if d >= 0 else (n,)
return np.random.normal(size=sz)

for d_t in [1, 2]:
n_t = d_t if d_t > 0 else 1
for d_y in [1, 2]:
for d_x in [1, 5]:
for d_z in [1, 2]:
d_w = 1
if d_z >= n_t:
T, Y, X, Z, W = [make_random(d) for d in [d_t, d_y, d_x, d_z, d_w]]
est = NonparametricTwoStageLeastSquares(
t_featurizer=PolynomialFeatures(),
x_featurizer=PolynomialFeatures(),
z_featurizer=PolynomialFeatures(),
dt_featurizer=DPolynomialFeatures())

est.fit(Y, T, X, W, Z)
eff = est.effect(X)
marg_eff = est.marginal_effect(T, X)

def test_marg_eff(self):
X = np.random.normal(size=(5000, 2))
Z = np.random.normal(size=(5000, 2))
W = np.random.normal(size=(5000, 1))
# Note: no noise, just testing that we can exactly recover when we ought to be able to
T = np.hstack([np.cross(X, Z).reshape(-1, 1) + W, (np.prod(X, axis=1) + np.prod(Z, axis=1)).reshape(-1, 1)])
Y = X * T + X**2

est = NonparametricTwoStageLeastSquares(
t_featurizer=PolynomialFeatures(degree=2, interaction_only=False, include_bias=True),
x_featurizer=PolynomialFeatures(degree=2, interaction_only=False, include_bias=True),
z_featurizer=PolynomialFeatures(degree=2, interaction_only=False, include_bias=True),
dt_featurizer=DPolynomialFeatures(degree=2, interaction_only=False, include_bias=True))

est.fit(Y, T, X, W, Z)

# pick some arbitrary X
X_test = np.array([[0.3, 0.7],
[0.2, 0.1]])
eff = est.effect(X_test) # effect = (X * 1 + X^2) - (X * 0 + X^2) = X
np.testing.assert_almost_equal(eff, X_test)

# pick some arbitrary T
T_test = np.array([[-0.3, 0.1],
[0.6, -1.2]])
marg_eff = est.marginal_effect(T_test, X_test) # marg effect_{i,j} = X_i if i=j, 0 otherwise
marg_eff_truth = np.zeros((X_test.shape[0], Y.shape[1], T.shape[1]))
marg_eff_truth[:, range(X.shape[1]), range(X.shape[1])] = X_test[:, :]
np.testing.assert_almost_equal(marg_eff, marg_eff_truth)

# TODO: this tests that we can run the method; how do we test that the results are reasonable?
def test_2sls(self):
Expand Down
88 changes: 84 additions & 4 deletions econml/two_stage_least_squares.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.

"""Provides a non - parametric two - stage least squares instrumental variable estimator."""
"""Provides a non-parametric two-stage least squares instrumental variable estimator."""

import numpy as np
from copy import deepcopy
Expand All @@ -11,6 +11,7 @@
from .cate_estimator import BaseCateEstimator, LinearCateEstimator
from numpy.polynomial.hermite_e import hermeval
from sklearn.base import TransformerMixin
from sklearn.preprocessing import PolynomialFeatures
from itertools import product


Expand Down Expand Up @@ -85,6 +86,77 @@ def transform(self, X):
return reshape(np.hstack(columns), (n,) + (ncols,) * self._shift + (-1,))


class DPolynomialFeatures(TransformerMixin):
"""
Featurizer that returns the derivatives of :class:`~sklearn.preprocessing.PolynomialFeatures` features in
a way that's compativle with the expectations of :class:`.NonparametricTwoStageLeastSquares`'s
`dt_featurizer` parameter.

If the input has shape `(n, x)` and
:meth:`PolynomialFeatures.transform<sklearn.preprocessing.PolynomialFeatures.transform>` returns an output
of shape `(n, f)`, then :meth:`.transform` will return an array of shape `(n, x, f)`.

Parameters
----------
degree: integer, default = 2
The degree of the polynomial features.

interaction_only: boolean, default = False
If true, only derivatives of interaction features are produced: features that are products of at most degree
distinct input features (so not `x[1] ** 2`, `x[0] * x[2] ** 3`, etc.).

include_bias: boolean, default = True
If True (default), then include the derivative of a bias column, the feature in which all polynomial powers
are zero.
"""

def __init__(self, degree=2, interaction_only=False, include_bias=True):
self.F = PolynomialFeatures(degree=degree, interaction_only=interaction_only, include_bias=include_bias)

def fit(self, X, y=None):
"""
Compute number of output features.

Parameters
----------
X : array-like, shape (n_samples, n_features)
The data.
y : array, optional
Not used

Returns
-------
self : instance
"""
return self

def transform(self, X):
"""
Transform data to derivatives of polynomial features

Parameters
----------
X: array-like, shape (n_samples, n_features)
The data to transform, row by row.

Returns
-------
XP: array-like, shape (n_samples, n_features, n_output_features)
The matrix of features, where `n_output_features` is the number of features that
would be returned from :class:`~sklearn.preprocessing.PolynomialFeatures`.
"""
self.F.fit(X)
powers = self.F.powers_
result = np.zeros(X.shape + (self.F.n_output_features_,))
for i in range(X.shape[1]):
p = powers.copy()
c = powers[:, i]
p[:, i] -= 1
M = np.float_power(X[:, np.newaxis, :], p[np.newaxis, :, :])
result[:, i, :] = c[np.newaxis, :] * np.prod(M, axis=-1)
return result


def _add_ones(arr):
"""Add a column of ones to the front of an array."""
return np.hstack([np.ones((shape(arr)[0], 1)), arr])
Expand Down Expand Up @@ -166,13 +238,21 @@ def fit(self, Y, T, X, W, Z, inference=None):
# store number of columns of W so that we can create correctly shaped zero array in effect and marginal effect
self._d_w = shape(W)[1]
# store number of columns of T so that we can pass scalars to effect
# TODO: support vector T and Y
self._d_t = shape(T)[1]

# two stage approximation
# first, get basis expansions of T, X, and Z
ft_X = self._x_featurizer.fit_transform(X)
ft_Z = self._z_featurizer.fit_transform(Z)
ft_T = self._t_featurizer.fit_transform(T)
# TODO: is it right that the effective number of intruments is the
# product of ft_X and ft_Z, not just ft_Z?
assert shape(ft_T)[1] <= shape(ft_X)[1] * shape(ft_Z)[1], ("There can be no more T features than the product "
"of the number of X and Z features; otherwise "
"there is not enough information to identify their "
"structure")

# regress T expansion on X,Z expansions concatenated with W
features = _add_ones(np.hstack([W, cross_product(ft_X, ft_Z)]))
self._model_T.fit(features, ft_T)
Expand Down Expand Up @@ -223,7 +303,7 @@ def effect(self, X=None, T0=0, T1=1):

def marginal_effect(self, T, X=None):
"""
Calculate the heterogeneous marginal effect ∂τ(·,·).
Calculate the heterogeneous marginal effect ∂τ(·, ·).

The marginal effect is calculated around a base treatment
point conditional on a vector of features on a set of m test samples {Tᵢ, Xᵢ}.
Expand All @@ -232,7 +312,7 @@ def marginal_effect(self, T, X=None):
----------
T: (m × dₜ) matrix
Base treatments for each sample
X: optional (m × dₓ) matrix
X: optional(m × dₓ) matrix
Features for each sample

Returns
Expand All @@ -250,7 +330,7 @@ def marginal_effect(self, T, X=None):
ft_X = self._x_featurizer.fit_transform(X)
n = shape(T)[0]
dT = self._dt_featurizer.fit_transform(T)
W = np.zeros((n, self._d_w))
W = np.zeros((size(T), self._d_w))
Comment on lines -253 to +333
Copy link
Collaborator Author

@kbattocchi kbattocchi Feb 12, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the only real change in this PR - the shape of W was wrong when there was more than a single treatment.

The other changes are shoring up our tests for this class and adding a DPolynomialFeatures class that can be used for dt_features when t_features is a PolynomialFeatures instance.

# dT should be an n×dₜ×fₜ array (but if T was a vector, or if there is only one feature,
# dT may be only 2-dimensional)
# promote dT to 3D if necessary (e.g. if T was a vector)
Expand Down