Skip to content

Commit

Permalink
Global Sensitivity Analysis for Categorical Features (#2357)
Browse files Browse the repository at this point in the history
Summary:
Pull Request resolved: #2357

Sobol sensitivity analysis and the sensitivity plots currently do not support categorical features. The plots in particular error out when categorical features are present because the sensitivity analysis (both first order and gradient-based).

This commit adds support for categorical features by
- introducing a uniformly random integer-valued point distribution for categorical and ordinal features in `SobolSensitivity`, in line with the usual integer-valued encoding,
-  ignoring the sign of the derivative-based sensitivity analysis of the categorical features, since the `CategoricalKernel` is non-differentiable, and the "direction" of categorical features is not well defined,
- adding a separate case for categorical features to the sensitivity analysis plot, stating that the categorical features "affect" but don't "increase" or "decrease" the metric.

Note that the results for the categorical features are still first order or total Sobol indices, so all the results in the plot are on the same scale.

Reviewed By: dme65

Differential Revision: D56070326

fbshipit-source-id: 5ace4cfda0468bd42f71cbe523304a443b7acac2
  • Loading branch information
SebastianAment authored and facebook-github-bot committed Apr 16, 2024
1 parent bedc207 commit 43cef62
Show file tree
Hide file tree
Showing 3 changed files with 137 additions and 33 deletions.
31 changes: 26 additions & 5 deletions ax/plot/feature_importances.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import numpy as np
import pandas as pd
import plotly.graph_objs as go
from ax.core.parameter import ChoiceParameter
from ax.exceptions.core import NoDataError
from ax.modelbridge import ModelBridge
from ax.plot.base import AxPlotConfig, AxPlotTypes
Expand Down Expand Up @@ -143,6 +144,14 @@ def plot_feature_importance_by_feature_plotly(
}
traces = []
dropdown = []
categorical_features = []
if model is not None:
categorical_features = [
name
for name, par in model.model_space.parameters.items()
if isinstance(par, ChoiceParameter)
]

for i, metric_name in enumerate(sorted(sensitivity_values.keys())):
importances = sensitivity_values[metric_name]
factor_col = "Factor"
Expand All @@ -157,7 +166,11 @@ def plot_feature_importance_by_feature_plotly(
factor_col: factor,
importance_col: np.asarray(importance)[0],
importance_col_se: np.asarray(importance)[2],
sign_col: np.sign(np.asarray(importance)[0]).astype(int),
sign_col: (
0
if factor in categorical_features
else 2 * (np.asarray(importance)[0] >= 0).astype(int) - 1
),
}
for factor, importance in importances.items()
]
Expand All @@ -172,7 +185,11 @@ def plot_feature_importance_by_feature_plotly(
{
factor_col: factor,
importance_col: importance,
sign_col: np.sign(importance).astype(int),
sign_col: (
0
if factor in categorical_features
else 2 * (importance >= 0).astype(int) - 1
),
}
for factor, importance in importances.items()
]
Expand All @@ -183,9 +200,13 @@ def plot_feature_importance_by_feature_plotly(
if relative:
df[importance_col] = df[importance_col].div(df[importance_col].sum())

colors = {-1: "darkorange", 1: "steelblue"}
names = {-1: "Decreases metric", 1: "Increases metric"}
legend_counter = {-1: 0, 1: 0}
colors = {-1: "darkorange", 0: "gray", 1: "steelblue"}
names = {
-1: "Decreases metric",
0: "Affects metric (categorical choice)",
1: "Increases metric",
}
legend_counter = {-1: 0, 0: 0, 1: 0}
all_positive = all(df[sign_col] >= 0)
for _, row in df.iterrows():
traces.append(
Expand Down
39 changes: 39 additions & 0 deletions ax/utils/sensitivity/sobol_measures.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
# pyre-strict

from copy import deepcopy
from functools import partial

from typing import Any, Callable, Dict, List, Optional, Union

Expand Down Expand Up @@ -36,6 +37,7 @@ def __init__(
second_order: bool = False,
num_bootstrap_samples: int = 1,
bootstrap_array: bool = False,
discrete_features: Optional[List[int]] = None,
) -> None:
r"""Computes three types of Sobol indices:
first order indices, total indices and second order indices (if specified ).
Expand All @@ -51,6 +53,9 @@ def __init__(
to be specified.
bootstrap_array: If true, all the num_bootstrap_samples extimated indices
are returned instead of their mean and Var.
discrete_features: If specified, the inputs associated with the indices in
this list are generated using an integer-valued uniform distribution,
rather than the default (pseudo-)random continuous uniform distribution.
"""
self.input_function = input_function
self.dim: int = bounds.shape[-1]
Expand All @@ -71,6 +76,16 @@ def __init__(
else:
self.A = unnormalize(torch.rand(num_mc_samples, self.dim), bounds=bounds)
self.B = unnormalize(torch.rand(num_mc_samples, self.dim), bounds=bounds)

# uniform integral distribution for discrete features
if discrete_features is not None:
all_low = bounds[0, discrete_features].to(dtype=torch.int).tolist()
all_high = (bounds[1, discrete_features]).to(dtype=torch.int).tolist()
for i, low, high in zip(discrete_features, all_low, all_high):
randint = partial(torch.randint, low=low, high=high + 1)
self.A[:, i] = randint(size=self.A.shape[:-1])
self.B[:, i] = randint(size=self.B.shape[:-1])

# pyre-fixme[4]: Attribute must be annotated.
self.A_B_ABi = self.generate_all_input_matrix().to(torch.double)

Expand Down Expand Up @@ -395,6 +410,7 @@ def __init__(
[torch.Tensor, torch.Tensor], torch.Tensor
] = GaussianLinkMean,
mini_batch_size: int = 128,
discrete_features: Optional[List[int]] = None,
) -> None:
r"""Computes three types of Sobol indices:
first order indices, total indices and second order indices (if specified ).
Expand All @@ -411,6 +427,9 @@ def __init__(
to be specified.
mini_batch_size: The size of the mini-batches used while evaluating the
model posterior. Increasing this will increase the memory usage.
discrete_features: If specified, the inputs associated with the indices in
this list are generated using an integer-valued uniform distribution,
rather than the default (pseudo-)random continuous uniform distribution.
"""
self.model = model
self.second_order = second_order
Expand Down Expand Up @@ -438,6 +457,7 @@ def input_function(x: Tensor) -> Tensor:
second_order=self.second_order,
input_qmc=self.input_qmc,
num_bootstrap_samples=self.num_bootstrap_samples,
discrete_features=discrete_features,
)
self.sensitivity.evalute_function()

Expand Down Expand Up @@ -486,6 +506,7 @@ def __init__(
input_qmc: bool = False,
gp_sample_qmc: bool = False,
num_bootstrap_samples: int = 1,
discrete_features: Optional[List[int]] = None,
) -> None:
r"""Computes three types of Sobol indices:
first order indices, total indices and second order indices (if specified ).
Expand All @@ -502,6 +523,9 @@ def __init__(
SobolQMCNormalSampler.
num_bootstrap_samples: If bootstrap is true, the number of bootstraps has
to be specified.
discrete_features: If specified, the inputs associated with the indices in
this list are generated using an integer-valued uniform distribution,
rather than the default (pseudo-)random continuous uniform distribution.
"""
self.model = model
self.second_order = second_order
Expand All @@ -519,6 +543,7 @@ def __init__(
input_qmc=self.input_qmc,
num_bootstrap_samples=self.num_bootstrap_samples,
bootstrap_array=True,
discrete_features=discrete_features,
)
# TODO: Ideally, we would reduce the memory consumption here as well
# but this is a tricky since it uses joint posterior sampling.
Expand Down Expand Up @@ -717,6 +742,7 @@ def compute_sobol_indices_from_model_list(
model_list: List[Model],
bounds: Tensor,
order: str = "first",
discrete_features: Optional[List[int]] = None,
**sobol_kwargs: Any,
) -> Tensor:
"""
Expand All @@ -728,6 +754,9 @@ def compute_sobol_indices_from_model_list(
bounds: A 2 x d Tensor of lower and upper bounds of the domain of the models.
order: A string specifying the order of the Sobol indices to be computed.
Supports "first" and "total" and defaults to "first".
discrete_features: If specified, the inputs associated with the indices in
this list are generated using an integer-valued uniform distribution,
rather than the default (pseudo-)random continuous uniform distribution.
sobol_kwargs: keyword arguments passed on to SobolSensitivityGPMean.
Returns:
Expand All @@ -739,6 +768,7 @@ def compute_sobol_indices_from_model_list(
sens_class = SobolSensitivityGPMean(
model=model,
bounds=bounds,
discrete_features=discrete_features,
**sobol_kwargs,
)
indices.append(method(sens_class))
Expand Down Expand Up @@ -789,6 +819,7 @@ def ax_parameter_sens(
model_list=model_list,
bounds=bounds,
order=order,
discrete_features=digest.categorical_features + digest.ordinal_features,
**sobol_kwargs,
)
if signed:
Expand All @@ -797,6 +828,14 @@ def ax_parameter_sens(
bounds=bounds,
**sobol_kwargs,
)
# categorical features don't have a direction, so we set the derivative to 1.0
# in order not to zero our their sensitivity. We treat categorical features
# separately in the sensitivity analysis plot as well, to make clear that they
# are affecting the metric, but neither increasing nor decreasing. Note that the
# orginal variables have a well defined direction, so we do not need to treat
# them differently here.
for i in digest.categorical_features:
ind_deriv[:, i] = 1.0
ind *= torch.sign(ind_deriv)
return _array_with_string_indices_to_dict(
rows=metrics, cols=digest.feature_names, A=ind.numpy()
Expand Down
100 changes: 72 additions & 28 deletions ax/utils/sensitivity/tests/test_sensitivity.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
# pyre-strict


import copy
import math
from typing import cast
from unittest.mock import patch, PropertyMock
Expand Down Expand Up @@ -270,35 +271,52 @@ def test_SobolGPMean(self) -> None:
)
# Test with signed
model_bridge = get_modelbridge(modular=True)
# Unsigned

# adding a categorical feature
cat_model_bridge = copy.deepcopy(model_bridge)
digest = cat_model_bridge.model.search_space_digest
digest.categorical_features = [0]

sobol_kwargs = {"input_qmc": True, "num_mc_samples": 10}
ind_dict = ax_parameter_sens(
model_bridge, # pyre-ignore
order="total",
signed=False,
**sobol_kwargs, # pyre-ignore
)
ind_deriv = compute_derivatives_from_model_list(
model_list=[model_bridge.model.surrogate.model],
bounds=torch.tensor(model_bridge.model.search_space_digest.bounds).T,
**sobol_kwargs,
)
ind_dict_signed = ax_parameter_sens(
model_bridge, # pyre-ignore
order="total",
# signed=True
**sobol_kwargs, # pyre-ignore
)
for i, pname in enumerate(["x1", "x2"]):
self.assertEqual(
torch.sign(ind_deriv[0, i]).item(),
math.copysign(1, ind_dict_signed["branin"][pname]),
)
self.assertAlmostEqual(
(torch.sign(ind_deriv[0, i]) * ind_dict["branin"][pname]).item(),
ind_dict_signed["branin"][pname],
) # signed
self.assertTrue(ind_dict["branin"][pname] >= 0) # unsigned
seed = 1234
for bridge in [model_bridge, cat_model_bridge]:
with self.subTest(model_bridge=bridge):
torch.manual_seed(seed)
# Unsigned
ind_dict = ax_parameter_sens(
bridge, # pyre-ignore
order="total",
signed=False,
**sobol_kwargs, # pyre-ignore
)
ind_deriv = compute_derivatives_from_model_list(
model_list=[bridge.model.surrogate.model],
bounds=torch.tensor(bridge.model.search_space_digest.bounds).T,
**sobol_kwargs,
)
torch.manual_seed(seed) # reset seed to keep discrete features the same
cat_indices = bridge.model.search_space_digest.categorical_features
ind_dict_signed = ax_parameter_sens(
bridge, # pyre-ignore
order="total",
# signed=True
**sobol_kwargs, # pyre-ignore
)
for i, pname in enumerate(["x1", "x2"]):
if i in cat_indices: # special case for categorical features
expected_sign = 1
else:
expected_sign = torch.sign(ind_deriv[0, i]).item()

self.assertEqual(
expected_sign,
math.copysign(1, ind_dict_signed["branin"][pname]),
)
self.assertAlmostEqual(
(expected_sign * ind_dict["branin"][pname]).item(),
ind_dict_signed["branin"][pname],
) # signed
self.assertTrue(ind_dict["branin"][pname] >= 0) # unsigned

def test_SobolGPSampling(self) -> None:
bounds = torch.tensor([(0.0, 1.0) for _ in range(2)]).t()
Expand Down Expand Up @@ -349,6 +367,32 @@ def test_SobolGPSampling(self) -> None:
total_order = sensitivity_sampling.total_order_indices()
second_order = sensitivity_sampling.second_order_indices()

discrete_feature = 0
sensitivity_sampling_discrete = SobolSensitivityGPSampling(
self.model,
num_mc_samples=10,
num_gp_samples=10,
bounds=bounds,
second_order=True,
discrete_features=[discrete_feature],
)
sens = sensitivity_sampling_discrete.sensitivity
A = sens.A
B = sens.B
Arnd = A.round()
Brnd = B.round()
# testing that the discrete feature is integer valued
self.assertTrue(
torch.allclose(Arnd[:, discrete_feature], A[:, discrete_feature])
)
self.assertTrue(
torch.allclose(Brnd[:, discrete_feature], B[:, discrete_feature])
)

# testing that the other features are not integer valued
self.assertFalse(torch.allclose(Arnd, A))
self.assertFalse(torch.allclose(Brnd, B))

def test_DerivativeGp(self) -> None:
test_x = torch.rand(2, 2)
posterior = posterior_derivative(self.model, test_x, kernel_type="matern")
Expand Down

0 comments on commit 43cef62

Please sign in to comment.