diff --git a/ax/modelbridge/registry.py b/ax/modelbridge/registry.py index 919d8263d57..a07c905dc6d 100644 --- a/ax/modelbridge/registry.py +++ b/ax/modelbridge/registry.py @@ -42,15 +42,21 @@ from ax.modelbridge.transforms.ivw import IVW from ax.modelbridge.transforms.log import Log from ax.modelbridge.transforms.logit import Logit +from ax.modelbridge.transforms.merge_repeated_measurements import ( + MergeRepeatedMeasurements, +) from ax.modelbridge.transforms.one_hot import OneHot +from ax.modelbridge.transforms.relativize import Relativize from ax.modelbridge.transforms.remove_fixed import RemoveFixed from ax.modelbridge.transforms.search_space_to_choice import SearchSpaceToChoice from ax.modelbridge.transforms.standardize_y import StandardizeY from ax.modelbridge.transforms.stratified_standardize_y import StratifiedStandardizeY from ax.modelbridge.transforms.task_encode import TaskChoiceToIntTaskChoice +from ax.modelbridge.transforms.transform_to_new_sq import TransformToNewSQ from ax.modelbridge.transforms.trial_as_task import TrialAsTask from ax.modelbridge.transforms.unit_x import UnitX from ax.models.base import Model +from ax.models.discrete.eb_ashr import EBAshr from ax.models.discrete.eb_thompson import EmpiricalBayesThompsonSampler from ax.models.discrete.full_factorial import FullFactorialGenerator from ax.models.discrete.thompson import ThompsonSampler @@ -108,6 +114,18 @@ Discrete_X_trans: list[type[Transform]] = [IntRangeToChoice] +EB_ashr_trans: list[type[Transform]] = [ + TransformToNewSQ, + MergeRepeatedMeasurements, + SearchSpaceToChoice, +] + +rel_EB_ashr_trans: list[type[Transform]] = [ + Relativize, + MergeRepeatedMeasurements, + SearchSpaceToChoice, +] + # This is a modification of Cont_X_trans that replaces OneHot and # OrderedChoiceToIntegerRange with ChoiceToNumericChoice. This results in retaining # all choice parameters as discrete, while using continuous relaxation for integer @@ -179,6 +197,11 @@ class ModelSetup(NamedTuple): model_class=EmpiricalBayesThompsonSampler, transforms=TS_trans, ), + "EB_Ashr": ModelSetup( + bridge_class=DiscreteModelBridge, + model_class=EBAshr, + transforms=EB_ashr_trans, + ), "Factorial": ModelSetup( bridge_class=DiscreteModelBridge, model_class=FullFactorialGenerator, @@ -424,6 +447,7 @@ class Models(ModelRegistryBase): LEGACY_BOTORCH = "Legacy_GPEI" BOTORCH_MODULAR = "BoTorch" EMPIRICAL_BAYES_THOMPSON = "EB" + EB_ASHR = "EB_Ashr" UNIFORM = "Uniform" ST_MTGP = "ST_MTGP" BO_MIXED = "BO_MIXED" diff --git a/ax/models/discrete/ashr_utils.py b/ax/models/discrete/ashr_utils.py new file mode 100644 index 00000000000..7753a41ad7d --- /dev/null +++ b/ax/models/discrete/ashr_utils.py @@ -0,0 +1,355 @@ +#!/usr/bin/env python3 +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +# pyre-strict + +from __future__ import annotations + +import warnings + +import numpy as np +import numpy.typing as npt +from ax.exceptions.core import UserInputError +from scipy.stats import norm + + +class Ashr: + r""" + An empirical Bayes model for estimating the effect sizes. Given the observations + Y_i and their variances Yvar_i, Ashr estimates the underlying effect sizes mu_i + by placing a mixture of Gaussians prior on the effect sizes. The prior consists of + a point mass at zero and a set of centered Gaussians with specified variaces. + The mixture proportions in the prior as well as the variances of the Gaussians + in the mixture are learned based on observed outcome data. + Methodology is based on the paper: False discovery rates: a new deal by + M. Stephens https://academic.oup.com/biostatistics/article/18/2/275/2557030. + """ + + def __init__( + self, + Y: npt.NDArray, + Yvar: npt.NDArray, + prior_vars: npt.NDArray | None = None, + eb_grid_param: float = 2.0, + ) -> None: + r""" + Args: + Y: A length n array denoting the observed treatment effects. + Yvar: A length n array denoting the variances + of the observed values. + prior_vars: A length k array denoting the variances + of normal distributions in the mixture of Gaussians prior. + In case of None, the variances are estimated based on data + using the provided grid parameter. + eb_grid_param: A grid parameter for estimating the prior variances + based on data in case none were given. + """ + self.Y: npt.NDArray = Y + self.Yvar: npt.NDArray = Yvar + if prior_vars is None: + prior_stds = prior_grid(Y=Y, Yvar=Yvar, grid_param=eb_grid_param) + prior_vars = prior_stds**2 + self.prior_vars: npt.NDArray = prior_vars + self.ll: npt.NDArray = marginal_densities(Y=Y, Yvar=Yvar, prior_vars=prior_vars) + + def posterior(self, w: npt.NDArray) -> GaussianMixture: + r""" + The posterior for mu_i can be calculated via the following rules. + For the normal prior mu_i~N(0, sigma_k^2) and + likelihood hat{mu}_i~N(mu_i, s_i^2), + posterior for mu_i is N(sigma_1^2*hat{mu}_i/s_i*2, sigma_1^2), + where sigma_1^2=sigma_k^2*s_i^2/(sigma_k^2+s_i^2). + + Args: + w: (n,k) dim matrix containing the weights of the posterior mixture. + + Returns: + Mixutre of Gaussians distribution. + """ + + # (n, k) matrix of normal variances + # a variance per mixture component and per observation + normal_vars_by_class = np.divide( + np.multiply.outer(self.Yvar, self.prior_vars), + np.add.outer(self.Yvar, self.prior_vars), + ) + # (n, k) matrix of normal standard deviations + # a mean per mixture component and per observation + normal_means_by_class = normal_vars_by_class * (self.Y / self.Yvar)[:, None] + + return GaussianMixture( + normal_means_by_class=normal_means_by_class, + normal_vars_by_class=normal_vars_by_class, + weights=w, + ) + + def fit( + self, + lambdas: npt.NDArray | None = None, + threshold: float = 10e-4, + nsteps: int = 1000, + ) -> dict[str, npt.NDArray]: + r""" + Fit Ashr to estimate prior proportions pi, posterior weights and lfdr. + + Args: + lambdas: A length k array of penalty levels corresponding to each of + k classes with entry equal to one meaning no penalty + for the corresponding class. + thereshold: The threshold used in the EM stoping rule. + If the difference between two consecutive estimates of + weights in the EM algorithm is smaller than the threshold, + the algorithm stops. + nsteps: The maximum number of steps in the EM algorithm. + + Returns: + A dict containing + - weights: (n, k)-dim estimated weight matrix for computing posteriors, + - pi: parameter estimates: (n, k) dim matrix of proportions, and + - lfdr: length n local False Discovery Rate (FDR) sequence. lfdr equals + the posterior probability of true parameter being zero. + """ + k = len(self.prior_vars) # total number of classes + if lambdas is None: + lambdas = np.ones(k) # no penalty + if len(lambdas) != k: + raise ValueError( + "The length of the penalty sequence should be the number of " + "prior classes." + ) + lambdas = lambdas.astype(np.float64) + + results = fit_ashr_em( + ll=self.ll, lambdas=lambdas, threshold=threshold, nsteps=nsteps + ) + + return results + + +class GaussianMixture: + r""" + A weighted mixure of Gaussian distributions. The class computes the mean, + standard errors and tails of each of n random variables + from a mixture of k Gaussians. + The Gaussians in the mixture are allowed to be degenerate, + i.e., have zero variance. This is used in the Ashr model since one of the + distributions in the prior mixture is a point mass at zero. + """ + + def __init__( + self, + normal_means_by_class: npt.NDArray, + normal_vars_by_class: npt.NDArray, + weights: npt.NDArray, + ) -> None: + r""" + Args: + normal_means_by_class: (n, k)-dim matrix of normal means for + each of the classes. Each of the n random variables comes from + a mixture of k normal distributions. + normal_var_by_class: (n, k)-dim matrix of normal variances + for each of the k classes. The first column is all zeros, + the variance of the null class. + weights: (n, k)-dim matrix of weights on individual distributions + per prior class. + """ + self.normal_means_by_class = normal_means_by_class + self.normal_vars_by_class = normal_vars_by_class + self.weights = weights + + def tail_probabilities( + self, left_tail: bool = True, threshold: float = 0.0 + ) -> npt.NDArray: + r""" + Args: + left_tail: An indicator for the tail probability to calculate. + Note that neither tail includes null class. + + threshold: For left tail, the returned value measures probability of the + effect being less than the threshold. For right tail, it is the + probability of the effect being larger than the threshold. + + Returns: + Length n array of tail probabilities for each of n rvs. + """ + tails_by_class = np.zeros_like(self.normal_means_by_class) + + # normal left tails + for i in range(tails_by_class.shape[0]): + for j in range(tails_by_class.shape[1]): + tails_by_class[i, j] = ( + norm.cdf( + -np.divide( + self.normal_means_by_class[i, j] - threshold, + np.sqrt(self.normal_vars_by_class[i, j]), + ) + ) + if self.normal_vars_by_class[i, j] > 0 + else 0.0 + ) + # correcting the normal tails in case of a right tail + if left_tail is False and self.normal_vars_by_class[i, j] > 0: + tails_by_class[i, j] = 1.0 - tails_by_class[i, j] + + return np.multiply(tails_by_class, self.weights).sum(axis=1) + + @property + def means(self) -> npt.NDArray: + r""" + Returns: + Length n array of final means for each rv. + """ + return np.multiply(self.weights, self.normal_means_by_class).sum(axis=1) + + @property + def vars(self) -> npt.NDArray: + r""" " + Returns: + Length n array of final standard deviations for each effect. + """ + # standard errors of the mixture distributions + # https://en.wikipedia.org/wiki/Mixture_distribution#Moments + return ( + np.multiply( + self.weights, + self.normal_means_by_class**2 + self.normal_vars_by_class, + ).sum(axis=1) + - self.means**2 + ) + + +def prior_grid( + Y: npt.NDArray, Yvar: npt.NDArray, grid_param: float = 2.0 +) -> npt.NDArray: + r""" + Produces the grid of standard deviations for each of the Gaussians in the prior + mixture based on the observed data. + + Args: + Y: A length n array of the observed treatment effects. + Yvar: A length n array of observed variances of the above observations. + grid_param: A grid parameter. Default 2.0 recommended in the paper to control + the number of Gaussians in the mixture. + + Returns: + A length n array of the standard deviations of the centered Gaussians in the + mixture of Gaussians prior. + """ + m = np.sqrt(grid_param) + sigma_lower = np.min(np.sqrt(Yvar)) / 10.0 + sigma_upper = np.max(Y**2 - Yvar) + sigma_upper = 2 * np.sqrt(sigma_upper) if sigma_upper > 0 else 8 * sigma_lower + max_power = int(np.ceil(np.log(sigma_upper / sigma_lower) / np.log(m))) + return np.array([0] + [sigma_lower * (m**power) for power in range(max_power + 1)]) + + +def marginal_densities( + Y: npt.NDArray, + Yvar: npt.NDArray, + prior_vars: npt.NDArray, +) -> npt.NDArray: + r""" + Evaluates marginal densities for each observed statistics and each prior class. + + Args: + Y: A length n array denoting the observed treatment effects. + Yvar: A length n array denoting the standard variances + of the observed values. + prior_vars: A length k array denoting the variances + of prior classes. + + Returns: + (n, k) dim matrix ll, where ll_{jk} is marginal density of j-th statistics + eval at its observed value, assuming prior is coming from k-th class. + """ + k = len(prior_vars) # total number of classes + n = len(Y) # total number of observations + if prior_vars[0] != 0: + raise UserInputError( + "Ashr prior consists of a mixture of Gaussians where the " + "first Gaussian in the prior mixture should be a point mass at zero. \ + This degenerate Gaussian represents the prior on the effects being null." + ) + ll = np.zeros((n, k), dtype=np.float64) + + # marginal densities when prior is mass at zero + ll[:, 0] = norm.pdf(Y, loc=0, scale=np.sqrt(Yvar)) + + for i in range(1, k): + ll[:, i] = norm.pdf( + Y, + loc=0.0, + scale=np.sqrt(prior_vars[i] + Yvar), + ) + return ll + + +def compute_weights(ll: npt.NDArray, pi: npt.NDArray) -> npt.NDArray: + r""" + Compute posterior weights based on marginal densities and prior probabilities. + + Args: + ll: (n,k)-dim matrix of marginal densities eval at each observation, + pi: length k vector of prior mixture proportions. + + Returns: + (n,k)-dim matrix of weights. + """ + # multiply each row of ll with the corresponding element of pi vector + w = np.multiply(ll, pi) + # divide weights by the sum across each row + w = w / w.sum(axis=1, keepdims=True) + return w + + +def fit_ashr_em( + ll: npt.NDArray, + lambdas: npt.NDArray, + threshold: float = 10e-4, + nsteps: int = 1000, +) -> dict[str, npt.NDArray]: + r""" + Estimating proportions and posterior weights via an + Expectation Maximization (EM) algorithm. + + Args: + ll: (n,k)-dim matrix of marginal densities eval at each observation, + marginalizing over the true effects. + lambdas: A length k array of penalty levels. + thereshold: If the difference between two consecutive estimates of + weights is smaller than the threshold, the algorithm stops. + nsteps: The maximum number of steps in the EM algorithm. + + Returns: + A dictionary containing: + - weights: (n,k)-dim matrix of weights, + - pi: length k vector of estimates of prior mixture proportions, and + - lfdr: length n local False Discovery Rate (FDR) sequence. lfdr equals + the posterior probability of true parameter being zero. + """ + n, k = ll.shape + # initializing pi vector + if k - 1 < n: + pi = np.ones(k) / n + pi[0] = 1 - (k - 1) / n + else: + pi = np.ones(k) / k + + w = np.zeros_like(ll) + + for _ in range(nsteps): + # E-step: compute weight matrix w; size of w: (n, k) + w = compute_weights(ll=ll, pi=pi) + # M-step: update pi + ns = w.sum(axis=0).squeeze() + lambdas - 1.0 # length k + pi_new = ns / ns.sum() # length k + if sum(abs(pi - pi_new)) <= threshold: + w = compute_weights(ll=ll, pi=pi_new) + return {"weights": w, "pi": pi_new, "lfdr": w[:, 0]} + pi = pi_new + + warnings.warn("EM did not converge.", stacklevel=2) + return {"weights": w, "pi": pi, "lfdr": w[:, 0]} diff --git a/ax/models/discrete/eb_ashr.py b/ax/models/discrete/eb_ashr.py new file mode 100644 index 00000000000..6909b15b7d9 --- /dev/null +++ b/ax/models/discrete/eb_ashr.py @@ -0,0 +1,385 @@ +#!/usr/bin/env python3 +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +# pyre-strict + +import functools +import warnings +from collections.abc import Callable, Mapping, Sequence + +import numpy as np +import numpy.typing as npt +from ax.core.types import TGenMetadata, TParamValue, TParamValueList +from ax.models.discrete.ashr_utils import Ashr, GaussianMixture +from ax.models.discrete.thompson import ThompsonSampler +from ax.models.discrete_base import DiscreteModel +from ax.models.types import TConfig +from ax.utils.common.docutils import copy_doc +from pyre_extensions import none_throws + + +class EBAshr(ThompsonSampler): + """ + Determines feasible arms and sorts them by the objective + improvements. Feasibile arms are required to have probability + of feasibility with respect to all metrics in objective and constraints + above 1-regression_prob_threshold, otherwise they are deemed infeasible. + These probabilities are computed via the discrete emprirical Bayes Ashr model. + Shrinkage: Ashr model provides shrinkage to metric outcomes across arms + with model fitted separately for each metric. + Prior: Ashr model uses a prior consisting of mixture of Gaussian + distributions, all of which are centered at zero. The number of Gaussians + in the prior and their variances are chosen based on observed data to cover + the whole data range. One of the variances is chosen to be zero so that + one of the Gaussians in the prior mixture is a point mass at zero. + The mixture proportions in the prior are learned based on observed outcome + data (Ys) and their standard errors (Yvars) via standard empirical Bayes + methodology. The method operates in the outcome space and it doesn't use + the space of features (Xs). + """ + + def __init__( + self, + eb_penalty_param: float = 1.0, + eb_nsteps: int = 1000, + eb_threshold: float = 10e-4, + eb_grid_param: float = 2, + min_variance_threshold: float = 10e-16, + ) -> None: + r""" + Args: + eb_penalty_param: Penalty parameter, part of Ashr model log-likelihood, + represents the amount of shrinkage towards zero. Penalty parameter takes + real values no smaller than 1. The default 1 means there will be regular + shrinkage from the Bayes model and the prior but not additional + shrinkage. Parameter value above one encourages overestimation of the + null (zero) class so there is additional shrinkage. Recommended + parameter range is [1, 10]. + eb_nsteps: The number of steps in Ashr fitting EM algorithm. + eb_threshold: The precision threshold in Ashr fitting EM algorithm. + eb_grid_param: The grid parameter affecting the number of Gaaussians in the + prior mixture for Ashr prior distribution. + min_variance_thresold: All observed test groups/arms with variance below + this threshold will be considered fixed variables with zero variance. + """ + self.X: Sequence[Sequence[TParamValue]] | None = None + self.Ys: Sequence[Sequence[float]] | None = None + self.Yvars: Sequence[Sequence[float]] | None = None + self.X_to_Ys_and_Yvars: ( + list[dict[TParamValueList, tuple[float, float]]] | None + ) = None + # for each metric, posterior_feasibility gives + # posterior probabilities of feasibility across all arms + self.posterior_feasibility: ( + list[Callable[[float, float], npt.NDArray]] | None + ) = None + self.eb_penalty_param: float = eb_penalty_param + self.eb_nsteps: int = eb_nsteps + self.eb_threshold: float = eb_threshold + self.eb_grid_param: float = eb_grid_param + self.min_variance_threshold: float = min_variance_threshold + + def _fit_Ys_and_Yvars( + self, + Ys: Sequence[Sequence[float]], + Yvars: Sequence[Sequence[float]], + outcome_names: Sequence[str], + ) -> tuple[list[list[float]], list[list[float]]]: + r""" + Args: + Ys: List of length m of measured metrics across k arms. The outcomes + are assumed to be relativized with respect to status quo. + Yvars: List of length m of measured metrics variances across arms. + outcome_names: List of m metric names. + + Returns: A tuple containing + - shrinken outcome estimates and + - their variances. + """ + newYs = [] + newYvars = [] + self.posterior_feasibility = [] + + for Y, Yvar in zip(Ys, Yvars, strict=True): + newY: npt.NDArray = np.array(Y, dtype=float) + newYvar = np.array(Yvar, dtype=float) + + # excluding arms with zero variance, e.g. status quo + nonconstant_rvs = np.abs(newYvar) > self.min_variance_threshold + # Case where the standard deviations are not infinitesimal, so we do + # shrinkage. + if nonconstant_rvs.any(): + # Ashr model applied to arms with non-zero variance + stats = newY[nonconstant_rvs] + variances = newYvar[nonconstant_rvs] + + # setting up the Ashr model + model = Ashr(Y=stats, Yvar=variances, eb_grid_param=self.eb_grid_param) + k = len(model.prior_vars) + # run Ashr fitting procedure + model_fit = model.fit( + lambdas=np.array([self.eb_penalty_param] + [1.0] * (k - 1)), + nsteps=self.eb_nsteps, + threshold=self.eb_threshold, + ) + + # Ashr model posterior + posterior = model.posterior(model_fit["weights"]) + newY[nonconstant_rvs] = posterior.means + newYvar[nonconstant_rvs] = posterior.vars + + f_posterior_feas: Callable[[float, float], npt.NDArray] = ( + functools.partial( + posterior_feasibility_util, + posterior=posterior, + nonconstant_rvs=nonconstant_rvs, + Y=newY, + ) + ) + else: + f_posterior_feas: Callable[[float, float], npt.NDArray] = ( + functools.partial(no_model_feasibility_util, Y=newY) + ) + + none_throws(self.posterior_feasibility).append(f_posterior_feas) + + newYs.append(list(newY)) + newYvars.append(list(newYvar)) + + return newYs, newYvars + + def _check_metric_direction( + self, + objective_weights: npt.NDArray, + outcome_constraints: tuple[npt.NDArray, npt.NDArray] | None = None, + ) -> npt.NDArray: + r""" + Args: + objective_weights: A length m array corresponding to the metrics weights in + a maximization problem. A positive weight means the corresponding metric + succeeding in the positive direction. A negative weight means + the corresponding metric succeeding in the negative direction. + Outcomes that are modeled but not part of the objective get objective + weight 0. + outcome_constraints: A tuple of (A, b), where A is of size + (num of constraints x m), m is the number of outputs/metrics, + and b is of size (num of constraints x 1). + + Returns: + A boolean array of length m indicating the direction of success + for each metric. + """ + # metrics in the objective + upper_is_better = objective_weights > 0 + + # constraints + if outcome_constraints: + A, b = outcome_constraints + upper_is_better_constraints = -np.apply_along_axis( + np.sum, 0, A + ) # column-wise (arm-wise) + objectives = objective_weights != 0 + upper_is_better[~objectives] = upper_is_better_constraints[~objectives] + + return upper_is_better + + def _get_regression_indicator( + self, + objective_weights: npt.NDArray, + outcome_constraints: tuple[npt.NDArray, npt.NDArray] | None = None, + regression_prob_threshold: float = 0.90, + ) -> tuple[npt.NDArray, npt.NDArray]: + r""" + Args: + regression_prob_threshold: Arms having a regression probability with respect + to any metric (in either objective or constraints) above a given + threshold are marked as regressions. + + Returns: A tuple containing: + - A length k array containing regression indicators for each of the k arms. + - (num of arms, num of metrics) matrix of arm, metric pairs, (i, j)-entry + corresponds to the probability of infeasibility or i-th arm and j-th metric. + """ + num_metrics = len(none_throws(self.Ys)) + num_arms = len(none_throws(self.Ys)[0]) + prob_infeasibility = np.zeros( + (num_arms, num_metrics), dtype=float + ) # (num of arms) x (num of metrics) + + upper_is_better = self._check_metric_direction( + objective_weights=objective_weights, outcome_constraints=outcome_constraints + ) + if np.sum(abs(objective_weights) > 0) > 1: + warnings.warn( + "In case of multi-objective adding metric values together might" + " not lead to a meaningful result.", + stacklevel=2, + ) + # compute probabilities of feasibility for metrics in the objective + # equals regression probabilities in this case + for i in range(num_metrics): + if objective_weights[i] != 0.0: + lb = 0.0 if upper_is_better[i] else -np.inf + ub = np.inf if upper_is_better[i] else 0.0 + prob_infeasibility[:, i] = 1.0 - none_throws( + self.posterior_feasibility + )[i](lb, ub) # per i-th metric, across arms + + # compute probabilities of feasibility for constraints + if outcome_constraints: + A, b = outcome_constraints + + if np.any( + np.apply_along_axis(lambda x: np.linalg.norm(x, ord=0) > 1, 1, A) + ): + raise ValueError( + "Only one metric per constraint allowed. Scalarized " + " OutcomeConstraint with multiple metrics per constraint not " + "supported." + ) + + for i in range(A.shape[1]): + if np.linalg.norm(A[:, i], ord=1) == 0: + continue + upper = A[:, i] > 0 + lower = A[:, i] < 0 + ub = np.min(b[upper] / A[upper, i]) if np.any(upper) else np.inf + lb = np.max(b[lower] / A[lower, i]) if np.any(lower) else -np.inf + prob_infeasibility[:, i] = 1.0 - none_throws( + self.posterior_feasibility + )[i]( + lb, ub + ) # per i-th metric, across arms; probability a metric strictly + # smaller than lb or strictly larger than ub + + # for each arm check if it is infeasible with respect to any metric + regressions = np.apply_along_axis( + np.any, 1, prob_infeasibility >= regression_prob_threshold + ) + + return regressions, prob_infeasibility + + def _get_success_measurement(self, objective_weights: npt.NDArray) -> npt.NDArray: + r""" + Returns: + A length k array returning a measure of success for each of k arms. + """ + + posterior_means = np.array(none_throws(self.Ys)).T + success = np.dot(posterior_means, objective_weights) + return success + + @copy_doc(DiscreteModel.gen) + def gen( + self, + n: int, + parameter_values: Sequence[Sequence[TParamValue]], + objective_weights: npt.NDArray | None, + outcome_constraints: tuple[npt.NDArray, npt.NDArray] | None = None, + fixed_features: Mapping[int, TParamValue] | None = None, + pending_observations: Sequence[Sequence[Sequence[TParamValue]]] | None = None, + model_gen_options: TConfig | None = None, + ) -> tuple[list[Sequence[TParamValue]], list[float], TGenMetadata]: + if objective_weights is None: + raise ValueError("EB Ashr model requires objective weights.") + + model_gen_options = model_gen_options or {} + regression_prob_threshold = model_gen_options.get( + "regression_prob_threshold", 0.90 + ) + if not isinstance(regression_prob_threshold, float): + raise TypeError( + "`regression_prob_threshold` is required among `model_gen_kwargs` \ + and must be set to a float." + ) + + # prob_infeasibility: matrix containing probabilities of infeasibilities + # for each arm, metric pair + regression, prob_infeasibility = self._get_regression_indicator( + objective_weights=objective_weights, + outcome_constraints=outcome_constraints, + regression_prob_threshold=regression_prob_threshold, + ) + success = self._get_success_measurement(objective_weights=objective_weights) + + arms = none_throws(self.X) + sorted_arms = sorted( + zip(arms, list(success), list(regression), np.arange(len(arms))), + key=lambda x: x[1] if not x[2] else -1.0, + reverse=True, + ) + + top_arms = [sorted_arms[i][0] for i in range(n)] + + return ( + top_arms, + [1.0] * len(top_arms), # uniform weights + { + "regression": regression, + "success": success, + "prob_infeasibility": prob_infeasibility, + "best_x": sorted_arms[0][0], + }, + ) + + +def posterior_feasibility_util( + lb: float, + ub: float, + posterior: GaussianMixture, + nonconstant_rvs: npt.NDArray, + Y: npt.NDArray, +) -> npt.NDArray: + r""" + Probabilities of a posterior rv being inside the given interval [lb, up] + with bounds included per single metric across arms. + + Args: + lb: Lower bound on the random variable. + ub: Upper bound on the random variable. + posterior: Posterior classs computing posterior distribution for all + non-degenerate (non-constant) arms. + nonconstant_rvs: Length k boolean vector indicating which variables + are constants. + Y: Length k outcome vector. + + Returns: + Length k array of probabilities of feasibility across k arms. + """ + # Pr(m>ub) + # TODO: jmarkovic simplify this directly in Ashr + upper_tail = ( + posterior.tail_probabilities(left_tail=False, threshold=ub) + + posterior.weights[:, 0] * (ub < 0) + if ub < np.inf + else 0 + ) + # Pr(m 0) + if lb > -np.inf + else 0 + ) + + prob_feas = np.zeros_like(nonconstant_rvs, dtype=float) + # pyre-fixme[58]: `-` is not supported for operand types `float` and + # `Union[np.ndarray[typing.Any, np.dtype[typing.Any]], int]`. + prob_feas[nonconstant_rvs] = 1.0 - upper_tail - lower_tail + prob_feas[~nonconstant_rvs] = (Y[~nonconstant_rvs] >= lb) & ( + Y[~nonconstant_rvs] <= ub + ) + return prob_feas + + +def no_model_feasibility_util(lb: float, ub: float, Y: npt.NDArray) -> npt.NDArray: + r""" + Probabilities (0 or 1) of whether Y is inside [lb, ub]. + + This covers the degenerate case when all arms have zero variance, and + `posterior_feasibility_util` is not usable because there is no posterior. + """ + return (Y >= lb) & (Y <= ub) diff --git a/ax/models/tests/test_ashr_utils.py b/ax/models/tests/test_ashr_utils.py new file mode 100644 index 00000000000..d125f01615b --- /dev/null +++ b/ax/models/tests/test_ashr_utils.py @@ -0,0 +1,84 @@ +#!/usr/bin/env python3 +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +# pyre-strict + +import numpy as np +from ax.models.discrete.ashr_utils import ( + Ashr, + fit_ashr_em, + GaussianMixture, + marginal_densities, + prior_grid, +) +from ax.utils.common.testutils import TestCase + + +class AshrModelTest(TestCase): + def test_gaussian_mixture(self) -> None: + normal_vars_by_class = np.array( + [[0.0, 1.0, 1.0], [0.0, 1.0, 0.25], [0.0, 2.25, 1.0], [0.0, 4.0, 4.0]] + ) + + gm = GaussianMixture( + normal_means_by_class=np.zeros((4, 3)), + normal_vars_by_class=normal_vars_by_class, + weights=np.ones((4, 3)) / 3.0, + ) + + variances = normal_vars_by_class.sum(axis=1) / 3.0 + self.assertTrue(np.allclose(gm.means, np.zeros(4))) + self.assertTrue(np.allclose(gm.vars, variances)) + self.assertTrue( + np.allclose(gm.tail_probabilities(left_tail=True), np.ones(4) / 3) + ) + self.assertTrue( + np.allclose(gm.tail_probabilities(left_tail=False), np.ones(4) / 3) + ) + + def test_prior_grid(self) -> None: + prior_stds = prior_grid( + Y=np.array([1, 2]), Yvar=np.array([1, 1]), grid_param=16.0 + ) + self.assertTrue((prior_stds == np.array([0.0, 0.1, 0.4, 1.6, 6.4])).all()) + + def test_marginal_densities(self) -> None: + ll = marginal_densities( + Y=np.zeros(2), Yvar=np.array([1, 2]), prior_vars=np.array([0, 1]) + ) + self.assertTrue( + ( + ll + == np.array( + [ + [1 / np.sqrt(2 * np.pi), 1 / np.sqrt(2 * 2 * np.pi)], + [1 / np.sqrt(2 * 2 * np.pi), 1 / np.sqrt(3 * 2 * np.pi)], + ] + ) + ).all() + ) + + def test_fit_ashr_em(self) -> None: + results = fit_ashr_em( + ll=np.array([[1, 2], [0, 3]]), lambdas=np.ones(2), threshold=1.0, nsteps=1 + ) + self.assertTrue(np.allclose(results["pi"], np.array([1 / 6, 5 / 6]))) + self.assertTrue( + np.allclose( + results["weights"], np.array([[1.0 / 11, 10.0 / 11], [0.0, 1.0]]) + ) + ) + self.assertTrue(np.allclose(results["lfdr"], np.array([1.0 / 11, 0.0]))) + + def test_ashr_posterior(self) -> None: + a = Ashr(Y=np.ones(2), Yvar=np.ones(2), prior_vars=np.array([0, 1])) + gm = a.posterior(w=np.ones((2, 2))) + self.assertTrue( + (gm.normal_means_by_class == np.array([[0, 0.5], [0, 0.5]])).all() + ) + self.assertTrue( + (gm.normal_vars_by_class == np.array([[0, 0.5], [0, 0.5]])).all() + ) diff --git a/ax/models/tests/test_eb_ashr.py b/ax/models/tests/test_eb_ashr.py new file mode 100644 index 00000000000..97d70eee8ca --- /dev/null +++ b/ax/models/tests/test_eb_ashr.py @@ -0,0 +1,107 @@ +#!/usr/bin/env python3 +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +# pyre-strict + +from warnings import catch_warnings + +import numpy as np +from ax.models.discrete.eb_ashr import EBAshr, no_model_feasibility_util +from ax.utils.common.testutils import TestCase + + +class EBAshrTest(TestCase): + def setUp(self) -> None: + super().setUp() + self.Xs = [ + [[1, 1], [2, 2], [3, 3], [4, 4], [5, 5]] + ] # 5 arms, each of dimensionality 2 + self.Ys = [ + [1, -1, 2, -2, 0], + [-1, 1, -2, 2, 0], + ] # two metrics evaluated on each of the five arms + self.Yvars = [[1, 1, 1, 1, 1], [1, 1, 1, 1, 1]] + self.parameter_values = [ + [1, 2, 3, 4, 5], + [1, 2, 3, 4, 5], + ] # not used in best arm selection + self.outcome_names = ["x", "y"] # metric names, not used in best arm selection + + def test_EBAshr(self) -> None: + generator = EBAshr() + generator.fit( + Xs=self.Xs, + Ys=self.Ys, + Yvars=self.Yvars, + parameter_values=self.parameter_values, + outcome_names=self.outcome_names, + ) + arms, weights, gen_metadata = generator.gen( + n=3, + parameter_values=self.parameter_values, + outcome_constraints=(np.array([[0.0, -1.0]]), np.array([[0.0]])), + objective_weights=np.array([1, 0]), + ) + + self.assertEqual(arms, [[1, 1], [5, 5], [2, 2]]) + self.assertEqual( + list(gen_metadata["regression"]), [False, False, True, True, False] + ) + self.assertTrue(gen_metadata["prob_infeasibility"][0, 0] < 0.9) + self.assertTrue(gen_metadata["prob_infeasibility"][1, 0] < 0.9) + self.assertTrue(gen_metadata["prob_infeasibility"][2, 1] > 0.9) + self.assertTrue(gen_metadata["prob_infeasibility"][3, 0] > 0.9) + self.assertTrue(gen_metadata["prob_infeasibility"][4, 0] < 0.9) + self.assertEqual(weights, [1.0] * 3) + self.assertEqual(gen_metadata["best_x"], arms[0]) + + with self.subTest("No variance"): + generator.fit( + Xs=self.Xs, + Ys=self.Ys, + Yvars=[[0] * len(self.Ys[0]) for _ in self.Ys], + parameter_values=self.parameter_values, + outcome_names=self.outcome_names, + ) + arms, weights, gen_metadata = generator.gen( + n=3, + parameter_values=self.parameter_values, + outcome_constraints=(np.array([[0.0, -1.0]]), np.array([[0.0]])), + objective_weights=np.array([1, 0]), + ) + self.assertEqual(sorted([tuple(a) for a in arms]), [(1, 1), (2, 2), (5, 5)]) + + self.assertEqual( + list(gen_metadata["regression"]), [True, True, True, True, False] + ) + prob_infeasibility = gen_metadata["prob_infeasibility"] + self.assertEqual(prob_infeasibility[0, 0], 0) + self.assertEqual(prob_infeasibility[1, 0], 1) + self.assertEqual(prob_infeasibility[2, 1], 1) + self.assertEqual(prob_infeasibility[3, 0], 1) + self.assertEqual(prob_infeasibility[4, 0], 0) + + self.assertEqual(weights, [1.0] * 3) + self.assertEqual(gen_metadata["best_x"], arms[0]) + + with catch_warnings(record=True) as warning_list: + arms, weights, gen_metadata = generator.gen( + n=3, + parameter_values=self.parameter_values, + outcome_constraints=None, + objective_weights=np.array([1, -1]), + ) + self.assertEqual( + "In case of multi-objective adding metric values together might" + " not lead to a meaningful result.", + str(warning_list[0].message), + ) + + def test_no_model_feasibility_util(self) -> None: + probabilities = no_model_feasibility_util( + lb=1.0, ub=2.0, Y=np.array([0.0, 1.0, 1.5, 2.0, 3.0]) + ) + self.assertTrue((probabilities == [False, True, True, True, False]).all()) diff --git a/sphinx/source/models.rst b/sphinx/source/models.rst index 04f9386b26f..113632d5bfe 100644 --- a/sphinx/source/models.rst +++ b/sphinx/source/models.rst @@ -87,6 +87,21 @@ ax.models.discrete.thompson module :undoc-members: :show-inheritance: +ax.models.discrete.eb\_ashr module +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. automodule:: ax.models.discrete.eb_ashr + :members: + :undoc-members: + :show-inheritance: + +ax.models.discrete.ashr\_utils module +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. automodule:: ax.models.discrete.ashr_utils + :members: + :undoc-members: + :show-inheritance: Random Models