Skip to content

Commit

Permalink
Move fixed propensity model to utils. (#72)
Browse files Browse the repository at this point in the history
* Move fixed propensity model to utils.

* Add changelog entry.

* Add tests.

* Fix typo.

* Fix class reference.
  • Loading branch information
kklein authored Aug 2, 2024
1 parent 3cad00e commit 6d8bc68
Show file tree
Hide file tree
Showing 4 changed files with 120 additions and 45 deletions.
6 changes: 4 additions & 2 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,17 @@ Changelog

**New features**

* Added :meth:`metalearners.metalearner.MetaLearner.init_params`.
* Add :meth:`metalearners.metalearner.MetaLearner.init_params`.

* Add :class:`metalearners.utils.FixedBinaryPropensity`.


0.8.0 (2024-07-22)
------------------

**New features**

* Added :meth:`metalearners.metalearner.MetaLearner.fit_all_nuisance` and
* Add :meth:`metalearners.metalearner.MetaLearner.fit_all_nuisance` and
:meth:`metalearners.metalearner.MetaLearner.fit_all_treatment`.

* Add optional ``store_raw_results`` and ``store_results`` parameters to :class:`metalearners.grid_search.MetaLearnerGridSearch`.
Expand Down
66 changes: 24 additions & 42 deletions docs/examples/example_propensity.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,8 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"Creating our own estimator\n",
"--------------------------\n",
"Using a dummy estimator\n",
"-----------------------\n",
"\n",
"In this tutorial we will assume that we know that all observations were assigned to the\n",
"treatment with a fixed probability of 0.3, which is close to the fraction of the observations\n",
Expand Down Expand Up @@ -89,43 +89,10 @@
"dataset, we just use it for illustrational purposes.\n",
"```\n",
"\n",
"Now we can define our custom ``sklearn``-like classifier. We recommend inheriting from\n",
"the ``sklearn`` base classes and following the rules explained in the\n",
"[sklearn documentation](https://scikit-learn.org/stable/developers/develop.html) to avoid\n",
"having to define helper functions and ensure the correct functionality of the ``metalearners``\n",
"library."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"vscode": {
"languageId": "plaintext"
}
},
"outputs": [],
"source": [
"from sklearn.base import BaseEstimator, ClassifierMixin\n",
"from typing import Any\n",
"from typing_extensions import Self\n",
"import numpy as np\n",
"import pandas as pd\n",
"\n",
"\n",
"class FixedPropensityModel(ClassifierMixin, BaseEstimator):\n",
" def __init__(self, propensity_score: float) -> None:\n",
" self.propensity_score = propensity_score\n",
"\n",
" def fit(self, X: pd.DataFrame, y: pd.Series) -> Self:\n",
" self.classes_ = np.unique(y.to_numpy()) # sklearn requires this\n",
" return self\n",
"\n",
" def predict(self, X: pd.DataFrame) -> np.ndarray[Any, Any]:\n",
" return np.argmax(self.predict_proba(X), axis=1)\n",
"\n",
" def predict_proba(self, X: pd.DataFrame) -> np.ndarray[Any, Any]:\n",
" return np.full((len(X), 2), [1 - self.propensity_score, self.propensity_score])"
"Now we can use a custom ``sklearn``-like classifier: {class}`~metalearners.utils.FixedBinaryPropensity`.\n",
"The latter can be used like any ``sklearn`` classifier but will always return the same propensity,\n",
"independently of the observed covariates. This propensity has to be provided at initialization via the\n",
"``propensity_score`` parameter."
]
},
{
Expand All @@ -149,11 +116,12 @@
"outputs": [],
"source": [
"from metalearners import RLearner\n",
"from metalearners.utils import FixedBinaryPropensity\n",
"from lightgbm import LGBMRegressor\n",
"\n",
"rlearner = RLearner(\n",
" nuisance_model_factory=LGBMRegressor,\n",
" propensity_model_factory=FixedPropensityModel,\n",
" propensity_model_factory=FixedBinaryPropensity,\n",
" treatment_model_factory=LGBMRegressor,\n",
" nuisance_model_params={\"verbose\": -1},\n",
" propensity_model_params={\"propensity_score\": 0.3},\n",
Expand Down Expand Up @@ -205,10 +173,24 @@
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"name": "python"
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
"nbformat_minor": 4
}
32 changes: 32 additions & 0 deletions metalearners/utils.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,14 @@
# Copyright (c) QuantCo 2024-2024
# SPDX-License-Identifier: BSD-3-Clause

from typing import Any

import numpy as np
import pandas as pd
from sklearn.base import BaseEstimator, ClassifierMixin
from typing_extensions import Self

from metalearners._typing import Matrix, Vector
from metalearners.drlearner import DRLearner
from metalearners.metalearner import MetaLearner
from metalearners.rlearner import RLearner
Expand Down Expand Up @@ -73,3 +79,29 @@ def simplify_output(tensor: np.ndarray) -> np.ndarray:
if n_outputs == 2:
return tensor[:, :, 1].reshape(n_obs, n_variants)
return tensor


class FixedBinaryPropensity(ClassifierMixin, BaseEstimator):
"""Binary classifier propensity dummy model which outputs a fixed propensity,
independently of covariates."""

def __init__(self, propensity_score: float) -> None:
if not 0 <= propensity_score <= 1:
raise ValueError(
f"Expected a propensity score between 0 and 1 but got {propensity_score}."
)
self.propensity_score = propensity_score

def fit(self, X: Matrix, y: Vector) -> Self:
self.classes_ = np.unique(y) # sklearn requires this
if (n_classes := len(self.classes_)) > 2:
raise ValueError(
f"FixedBinaryPropensityModel only supports binary outcomes but {n_classes} were provided ."
)
return self

def predict(self, X: Matrix) -> np.ndarray[Any, Any]:
return np.argmax(self.predict_proba(X), axis=1)

def predict_proba(self, X: pd.DataFrame) -> np.ndarray[Any, Any]:
return np.full((len(X), 2), [1 - self.propensity_score, self.propensity_score])
61 changes: 60 additions & 1 deletion tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,16 @@
# SPDX-License-Identifier: BSD-3-Clause

import numpy as np
import pandas as pd
import pytest
from lightgbm import LGBMRegressor

from metalearners.metalearner import MetaLearner
from metalearners.utils import metalearner_factory, simplify_output
from metalearners.utils import (
FixedBinaryPropensity,
metalearner_factory,
simplify_output,
)


@pytest.mark.parametrize("prefix", ["T"])
Expand Down Expand Up @@ -52,3 +57,57 @@ def test_simplify_output(input, expected):
def test_simplify_output_raises(input):
with pytest.raises(ValueError, match="needs to be 3-dimensional"):
simplify_output(input)


@pytest.mark.parametrize("use_pd", [True, False])
def test_fixed_binary_propensity(use_pd):
propensity_score = 0.3
dominant_class = propensity_score >= 0.5

model = FixedBinaryPropensity(propensity_score=propensity_score)

n_samples = 5
X_train = np.ones((n_samples, 5))
y_train = np.ones(n_samples)
if use_pd:
X_train = pd.DataFrame(X_train)
y_train = pd.Series(y_train)

model.fit(X_train, y_train)

n_test_samples = 3
X_test = np.zeros(n_test_samples)

class_predictions = model.predict(X_test)
assert np.array_equal(
class_predictions, np.array(np.ones(n_test_samples) * dominant_class)
)

probability_estimates = model.predict_proba(X_test)
assert np.array_equal(
probability_estimates,
np.column_stack(
(
np.ones(n_test_samples) * (1 - propensity_score),
np.ones(n_test_samples) * propensity_score,
)
),
)


@pytest.mark.parametrize("propensity_score", [-1, 100, 1.1])
def test_fixed_binary_propensity_not_a_propbability(propensity_score):
with pytest.raises(ValueError, match="between 0 and 1 but got"):
FixedBinaryPropensity(propensity_score=propensity_score)


def test_fixed_binary_propensity_non_binary():
propensity_score = 0.3

model = FixedBinaryPropensity(propensity_score=propensity_score)

n_samples = 5
X_train = np.ones((n_samples, 5))
y_train = np.fromiter(range(n_samples), dtype=int)
with pytest.raises(ValueError, match="only supports binary outcomes"):
model.fit(X_train, y_train)

0 comments on commit 6d8bc68

Please sign in to comment.