diff --git a/README.md b/README.md index c98c099cb..21ce547ab 100644 --- a/README.md +++ b/README.md @@ -3,8 +3,6 @@ [![PyPI wheel](https://img.shields.io/pypi/wheel/econml.svg)](https://pypi.org/project/econml/) [![Supported Python versions](https://img.shields.io/pypi/pyversions/econml.svg)](https://pypi.org/project/econml/) - -

econml-logo @@ -51,6 +49,8 @@ For information on use cases and background material on causal inference and het # News +If you'd like to contribute to this project, see the [Help Wanted](#help-wanted) section below. + **February 12, 2024:** Release v0.15.0, see release notes [here](https://github.com/py-why/EconML/releases/tag/v0.15.0)
Previous releases @@ -665,11 +665,15 @@ You can get started by cloning this repository. We use We rely on some recent features of setuptools, so make sure to upgrade to a recent version with `pip install setuptools --upgrade`. Then from your local copy of the repository you can run `pip install -e .` to get started (but depending on what you're doing you might want to install with extras instead, like `pip install -e .[plt]` if you want to use matplotlib integration, or you can use `pip install -e .[all]` to include all extras). +## Help wanted + +If you're looking to contribute to the project, we have a number of issues tagged with the [`help wanted`](https://github.com/py-why/EconML/issues?q=is%3Aopen+is%3Aissue+label%3A%22help+wanted%22) label that are valuable improvements to the library that our team currently does not have time to prioritize where we would greatly appreciate community-initiated PRs. + ## Running the tests -This project uses [pytest](https://docs.pytest.org/) for testing. To run tests locally after installing the package, you can use `pip install pytest-runner` followed by `python setup.py pytest`. +This project uses [pytest](https://docs.pytest.org/) for testing. To run all tests locally after installing the package, you can use `pip install pytest-runner` followed by `python setup.py pytest`. -We have added pytest marks to some tests to make it easier to run a subset, and you can set the PYTEST_ADDOPTS environment variable to take advantage of this. For instance, you can set it to `-m "not (notebook or automl)"` to skip notebook and automl tests that have some additional dependencies. +However, running all tests can be very time-consuming, so you may prefer to run just a relevant subset of tests when developing locally. The easiest way to do this is to rely on `pytest`'s compatibility with `unittest`, so you can just run `python -m unittest econml.tests.test_module` to run all tests in a given module, or `python -m unittest econml.tests.test_module.TestClass` to run all tests in a given class. You can also run `python -m unittest econml.tests.test_module.TestClass.test_method` to run a single test method. ## Generating the documentation @@ -691,6 +695,8 @@ We use GitHub Actions to build and publish the package and documentation. To cr # Blogs and Publications +* May 2021: [Be Careful When Interpreting Predictive Models in Search of Causal Insights](https://towardsdatascience.com/be-careful-when-interpreting-predictive-models-in-search-of-causal-insights-e68626e664b6) + * June 2019: [Treatment Effects with Instruments paper](https://arxiv.org/pdf/1905.10176.pdf) * May 2019: [Open Data Science Conference Workshop](https://odsc.com/speakers/machine-learning-estimation-of-heterogeneous-treatment-effect-the-microsoft-econml-library/) diff --git a/econml/_ensemble/_ensemble.py b/econml/_ensemble/_ensemble.py index cdc23da4c..fc37bac59 100644 --- a/econml/_ensemble/_ensemble.py +++ b/econml/_ensemble/_ensemble.py @@ -13,9 +13,14 @@ import numpy as np from abc import ABCMeta, abstractmethod from sklearn.base import BaseEstimator, clone -from sklearn.utils import _print_elapsed_time from sklearn.utils import check_random_state from joblib import effective_n_jobs +from packaging.version import parse +import sklearn +if parse(sklearn.__version__) < parse("1.5"): + from sklearn.utils import _print_elapsed_time +else: + from sklearn.utils._user_interface import _print_elapsed_time def _fit_single_estimator(estimator, X, y, sample_weight=None, diff --git a/econml/dml/causal_forest.py b/econml/dml/causal_forest.py index bbe44008f..8babc6d18 100644 --- a/econml/dml/causal_forest.py +++ b/econml/dml/causal_forest.py @@ -567,7 +567,7 @@ class CausalForestDML(_BaseDML): ate_ : ndarray of shape (n_outcomes, n_treatments) The average constant marginal treatment effect of each treatment for each outcome, averaged over the training data and with a doubly robust correction. Available only - when `discrete_treatment=True` and `oob=True`. + when `discrete_treatment=True` and `drate=True`. ate_stderr_ : ndarray of shape (n_outcomes, n_treatments) The standard error of the `ate_` attribute. feature_importances_ : ndarray of shape (n_features,) @@ -997,7 +997,7 @@ def att__inference(self, *, T): Inference results information for the `att_` attribute, which is the average constant marginal treatment effect of each treatment for each outcome, averaged over the training data treated with treatment T and with a doubly robust correction. - Available only when `discrete_treatment=True` and `oob=True`. + Available only when `discrete_treatment=True` and `drate=True`. """ return NormalInferenceResults(d_t=self._d_t[0] if self._d_t else 1, d_y=self._d_y[0] if self._d_y else 1, diff --git a/econml/policy/_drlearner.py b/econml/policy/_drlearner.py index a3018d197..66f5913c4 100644 --- a/econml/policy/_drlearner.py +++ b/econml/policy/_drlearner.py @@ -854,7 +854,8 @@ def _gen_drpolicy_learner(self): cv=self.cv, mc_iters=self.mc_iters, mc_agg=self.mc_agg, - model_final=PolicyForest(max_depth=self.max_depth, + model_final=PolicyForest(n_estimators=self.n_estimators, + max_depth=self.max_depth, min_samples_split=self.min_samples_split, min_samples_leaf=self.min_samples_leaf, min_weight_fraction_leaf=self.min_weight_fraction_leaf, diff --git a/econml/solutions/causal_analysis/_causal_analysis.py b/econml/solutions/causal_analysis/_causal_analysis.py index 422572b22..7a1e72df9 100644 --- a/econml/solutions/causal_analysis/_causal_analysis.py +++ b/econml/solutions/causal_analysis/_causal_analysis.py @@ -30,7 +30,12 @@ # TODO: this utility is documented but internal; reimplement? from sklearn.utils import _safe_indexing # TODO: this utility is even less public... -from sklearn.utils import _get_column_indices +from packaging.version import parse +import sklearn +if parse(sklearn.__version__) < parse("1.5"): + from sklearn.utils import _get_column_indices +else: + from sklearn.utils._indexing import _get_column_indices class _CausalInsightsConstants: diff --git a/econml/tests/test_policy_forest.py b/econml/tests/test_policy_forest.py index 762c7a55c..caf2016e8 100644 --- a/econml/tests/test_policy_forest.py +++ b/econml/tests/test_policy_forest.py @@ -321,8 +321,8 @@ def test_non_standard_input(self,): model_propensity=DummyClassifier(strategy='uniform'), featurizer=PolynomialFeatures(degree=1, include_bias=False), cv=GroupKFold(n_splits=2), - n_estimators=20, n_jobs=1, random_state=123).fit(y, T, X=X, - groups=groups) + n_estimators=100, n_jobs=1, random_state=123).fit(y, T, X=X, + groups=groups) mask = np.abs(Xraw[:, 0]) > .1 np.testing.assert_allclose(pred[mask], forest.predict(Xraw[mask])) np.testing.assert_allclose(pred_val[mask, 1] - pred_val[mask, 0], diff --git a/pyproject.toml b/pyproject.toml index b7114e94f..19802ad33 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,9 +20,9 @@ classifiers = [ "Operating System :: POSIX :: Linux" ] dependencies = [ - "numpy", + "numpy<2", "scipy > 1.4.0", - "scikit-learn >= 1.0, < 1.5", + "scikit-learn >= 1.0, < 1.6", "sparse", "joblib >= 0.13.0", "statsmodels >= 0.10",