diff --git a/botorch/acquisition/__init__.py b/botorch/acquisition/__init__.py index 7ff09b30c5..276cbd3aa0 100644 --- a/botorch/acquisition/__init__.py +++ b/botorch/acquisition/__init__.py @@ -16,6 +16,8 @@ AnalyticAcquisitionFunction, ConstrainedExpectedImprovement, ExpectedImprovement, + LogExpectedImprovement, + LogNoisyExpectedImprovement, NoisyExpectedImprovement, PosteriorMean, ProbabilityOfImprovement, @@ -32,6 +34,11 @@ qKnowledgeGradient, qMultiFidelityKnowledgeGradient, ) +from botorch.acquisition.logei import ( + LogImprovementMCAcquisitionFunction, + qLogExpectedImprovement, + qLogNoisyExpectedImprovement, +) from botorch.acquisition.max_value_entropy_search import ( MaxValueBase, qLowerBoundMaxValueEntropy, @@ -46,6 +53,7 @@ qProbabilityOfImprovement, qSimpleRegret, qUpperConfidenceBound, + SampleReducingMCAcquisitionFunction, ) from botorch.acquisition.multi_step_lookahead import qMultiStepLookahead from botorch.acquisition.objective import ( @@ -71,6 +79,8 @@ "AnalyticExpectedUtilityOfBestOption", "ConstrainedExpectedImprovement", "ExpectedImprovement", + "LogExpectedImprovement", + "LogNoisyExpectedImprovement", "FixedFeatureAcquisitionFunction", "GenericCostAwareUtility", "InverseCostWeightedUtility", @@ -85,6 +95,9 @@ "UpperConfidenceBound", "qAnalyticProbabilityOfImprovement", "qExpectedImprovement", + "LogImprovementMCAcquisitionFunction", + "qLogExpectedImprovement", + "qLogNoisyExpectedImprovement", "qKnowledgeGradient", "MaxValueBase", "qMultiFidelityKnowledgeGradient", @@ -104,6 +117,7 @@ "LearnedObjective", "LinearMCObjective", "MCAcquisitionFunction", + "SampleReducingMCAcquisitionFunction", "MCAcquisitionObjective", "ScalarizedPosteriorTransform", "get_acquisition_function", diff --git a/botorch/acquisition/fixed_feature.py b/botorch/acquisition/fixed_feature.py index 0f3b85faa7..31a746b954 100644 --- a/botorch/acquisition/fixed_feature.py +++ b/botorch/acquisition/fixed_feature.py @@ -20,6 +20,35 @@ from torch.nn import Module +def get_dtype_of_sequence(values: Sequence[Union[Tensor, float]]) -> torch.dtype: + """ + Return torch.float32 if everything is single-precision and torch.float64 + otherwise. + + Numbers (non-tensors) are double-precision. + """ + + def _is_single(value: Union[Tensor, float]) -> bool: + return isinstance(value, Tensor) and value.dtype == torch.float32 + + all_single_precision = all(_is_single(value) for value in values) + return torch.float32 if all_single_precision else torch.float64 + + +def get_device_of_sequence(values: Sequence[Union[Tensor, float]]) -> torch.dtype: + """ + CPU if everything is on the CPU; Cuda otherwise. + + Numbers (non-tensors) are considered to be on the CPU. + """ + + def _is_cuda(value: Union[Tensor, float]) -> bool: + return hasattr(value, "device") and value.device == torch.device("cuda") + + any_cuda = any(_is_cuda(value) for value in values) + return torch.device("cuda") if any_cuda else torch.device("cpu") + + class FixedFeatureAcquisitionFunction(AcquisitionFunction): """A wrapper around AquisitionFunctions to fix a subset of features. @@ -58,27 +87,25 @@ def __init__( """ Module.__init__(self) self.acq_func = acq_function - dtype = torch.float - device = torch.device("cpu") self.d = d + if isinstance(values, Tensor): new_values = values.detach().clone() else: + + dtype = get_dtype_of_sequence(values) + device = get_device_of_sequence(values) + new_values = [] for value in values: if isinstance(value, Number): - new_values.append(torch.tensor([float(value)])) + value = torch.tensor([value], dtype=dtype) else: - # if any value uses double, use double for all values - # likewise if any value uses cuda, use cuda for all values - dtype = value.dtype if value.dtype == torch.double else dtype - device = value.device if value.device.type == "cuda" else device if value.ndim == 0: # since we can't broadcast with zero-d tensors value = value.unsqueeze(0) - new_values.append(value.detach().clone()) - # move all values to same device - for i, val in enumerate(new_values): - new_values[i] = val.to(dtype=dtype, device=device) + value = value.detach().clone() + + new_values.append(value.to(dtype=dtype, device=device)) # There are 3 cases for when `values` is a `Sequence`. # 1) `values` == list of floats as earlier. diff --git a/botorch/acquisition/input_constructors.py b/botorch/acquisition/input_constructors.py index 6e6f7dcd62..c1b3fa02f2 100644 --- a/botorch/acquisition/input_constructors.py +++ b/botorch/acquisition/input_constructors.py @@ -47,6 +47,12 @@ qKnowledgeGradient, qMultiFidelityKnowledgeGradient, ) +from botorch.acquisition.logei import ( + qLogExpectedImprovement, + qLogNoisyExpectedImprovement, + TAU_MAX, + TAU_RELU, +) from botorch.acquisition.max_value_entropy_search import ( qMaxValueEntropy, qMultiFidelityMaxValueEntropy, @@ -78,6 +84,7 @@ from botorch.acquisition.preference import AnalyticExpectedUtilityOfBestOption from botorch.acquisition.risk_measures import RiskMeasureMCObjective from botorch.acquisition.utils import ( + compute_best_feasible_objective, expand_trace_observations, get_optimal_samples, project_to_target_fidelity, @@ -457,7 +464,9 @@ def construct_inputs_qEI( X_pending: Optional[Tensor] = None, sampler: Optional[MCSampler] = None, best_f: Optional[Union[float, Tensor]] = None, - **kwargs: Any, + constraints: Optional[List[Callable[[Tensor], Tensor]]] = None, + eta: Union[Tensor, float] = 1e-3, + **ignored: Any, ) -> Dict[str, Any]: r"""Construct kwargs for the `qExpectedImprovement` constructor. @@ -473,7 +482,15 @@ def construct_inputs_qEI( sampler: The sampler used to draw base samples. If omitted, uses the acquisition functions's default sampler. best_f: Threshold above (or below) which improvement is defined. - kwargs: Not used. + constraints: A list of constraint callables which map a Tensor of posterior + samples of dimension `sample_shape x batch-shape x q x m`-dim to a + `sample_shape x batch-shape x q`-dim Tensor. The associated constraints + are considered satisfied if the output is less than zero. + eta: Temperature parameter(s) governing the smoothness of the sigmoid + approximation to the constraint indicators. For more details, on this + parameter, see the docs of `compute_smoothed_feasibility_indicator`. + ignored: Not used. + Returns: A dict mapping kwarg names of the constructor to values. """ @@ -489,9 +506,77 @@ def construct_inputs_qEI( training_data=training_data, objective=objective, posterior_transform=posterior_transform, + constraints=constraints, + model=model, ) - return {**base_inputs, "best_f": best_f} + return {**base_inputs, "best_f": best_f, "constraints": constraints, "eta": eta} + + +@acqf_input_constructor(qLogExpectedImprovement) +def construct_inputs_qLogEI( + model: Model, + training_data: MaybeDict[SupervisedDataset], + objective: Optional[MCAcquisitionObjective] = None, + posterior_transform: Optional[PosteriorTransform] = None, + X_pending: Optional[Tensor] = None, + sampler: Optional[MCSampler] = None, + best_f: Optional[Union[float, Tensor]] = None, + constraints: Optional[List[Callable[[Tensor], Tensor]]] = None, + eta: Union[Tensor, float] = 1e-3, + fat: bool = True, + tau_max: float = TAU_MAX, + tau_relu: float = TAU_RELU, + **ignored: Any, +) -> Dict[str, Any]: + r"""Construct kwargs for the `qExpectedImprovement` constructor. + + Args: + model: The model to be used in the acquisition function. + training_data: Dataset(s) used to train the model. + objective: The objective to be used in the acquisition function. + posterior_transform: The posterior transform to be used in the + acquisition function. + X_pending: A `m x d`-dim Tensor of `m` design points that have been + submitted for function evaluation but have not yet been evaluated. + Concatenated into X upon forward call. + sampler: The sampler used to draw base samples. If omitted, uses + the acquisition functions's default sampler. + best_f: Threshold above (or below) which improvement is defined. + constraints: A list of constraint callables which map a Tensor of posterior + samples of dimension `sample_shape x batch-shape x q x m`-dim to a + `sample_shape x batch-shape x q`-dim Tensor. The associated constraints + are considered satisfied if the output is less than zero. + eta: Temperature parameter(s) governing the smoothness of the sigmoid + approximation to the constraint indicators. For more details, on this + parameter, see the docs of `compute_smoothed_feasibility_indicator`. + fat: Toggles the logarithmic / linear asymptotic behavior of the smooth + approximation to the ReLU. + tau_max: Temperature parameter controlling the sharpness of the smooth + approximations to max. + tau_relu: Temperature parameter controlling the sharpness of the smooth + approximations to ReLU. + ignored: Not used. + + Returns: + A dict mapping kwarg names of the constructor to values. + """ + return { + **construct_inputs_qEI( + model=model, + training_data=training_data, + objective=objective, + posterior_transform=posterior_transform, + X_pending=X_pending, + sampler=sampler, + best_f=best_f, + constraints=constraints, + eta=eta, + ), + "fat": fat, + "tau_max": tau_max, + "tau_relu": tau_relu, + } @acqf_input_constructor(qNoisyExpectedImprovement) @@ -505,7 +590,9 @@ def construct_inputs_qNEI( X_baseline: Optional[Tensor] = None, prune_baseline: Optional[bool] = True, cache_root: Optional[bool] = True, - **kwargs: Any, + constraints: Optional[List[Callable[[Tensor], Tensor]]] = None, + eta: Union[Tensor, float] = 1e-3, + **ignored: Any, ) -> Dict[str, Any]: r"""Construct kwargs for the `qNoisyExpectedImprovement` constructor. @@ -527,7 +614,14 @@ def construct_inputs_qNEI( prune_baseline: If True, remove points in `X_baseline` that are highly unlikely to be the best point. This can significantly improve performance and is generally recommended. - kwargs: Not used. + constraints: A list of constraint callables which map a Tensor of posterior + samples of dimension `sample_shape x batch-shape x q x m`-dim to a + `sample_shape x batch-shape x q`-dim Tensor. The associated constraints + are considered satisfied if the output is less than zero. + eta: Temperature parameter(s) governing the smoothness of the sigmoid + approximation to the constraint indicators. For more details, on this + parameter, see the docs of `compute_smoothed_feasibility_indicator`. + ignored: Not used. Returns: A dict mapping kwarg names of the constructor to values. @@ -547,12 +641,89 @@ def construct_inputs_qNEI( assert_shared=True, first_only=True, ) - return { **base_inputs, "X_baseline": X_baseline, "prune_baseline": prune_baseline, "cache_root": cache_root, + "constraints": constraints, + "eta": eta, + } + + +@acqf_input_constructor(qLogNoisyExpectedImprovement) +def construct_inputs_qLogNEI( + model: Model, + training_data: MaybeDict[SupervisedDataset], + objective: Optional[MCAcquisitionObjective] = None, + posterior_transform: Optional[PosteriorTransform] = None, + X_pending: Optional[Tensor] = None, + sampler: Optional[MCSampler] = None, + X_baseline: Optional[Tensor] = None, + prune_baseline: Optional[bool] = True, + cache_root: Optional[bool] = True, + constraints: Optional[List[Callable[[Tensor], Tensor]]] = None, + eta: Union[Tensor, float] = 1e-3, + fat: bool = True, + tau_max: float = TAU_MAX, + tau_relu: float = TAU_RELU, + **ignored: Any, +): + r"""Construct kwargs for the `qNoisyExpectedImprovement` constructor. + + Args: + model: The model to be used in the acquisition function. + training_data: Dataset(s) used to train the model. + objective: The objective to be used in the acquisition function. + posterior_transform: The posterior transform to be used in the + acquisition function. + X_pending: A `m x d`-dim Tensor of `m` design points that have been + submitted for function evaluation but have not yet been evaluated. + Concatenated into X upon forward call. + sampler: The sampler used to draw base samples. If omitted, uses + the acquisition functions's default sampler. + X_baseline: A `batch_shape x r x d`-dim Tensor of `r` design points + that have already been observed. These points are considered as + the potential best design point. If omitted, checks that all + training_data have the same input features and take the first `X`. + prune_baseline: If True, remove points in `X_baseline` that are + highly unlikely to be the best point. This can significantly + improve performance and is generally recommended. + constraints: A list of constraint callables which map a Tensor of posterior + samples of dimension `sample_shape x batch-shape x q x m`-dim to a + `sample_shape x batch-shape x q`-dim Tensor. The associated constraints + are considered satisfied if the output is less than zero. + eta: Temperature parameter(s) governing the smoothness of the sigmoid + approximation to the constraint indicators. For more details, on this + parameter, see the docs of `compute_smoothed_feasibility_indicator`. + fat: Toggles the logarithmic / linear asymptotic behavior of the smooth + approximation to the ReLU. + tau_max: Temperature parameter controlling the sharpness of the smooth + approximations to max. + tau_relu: Temperature parameter controlling the sharpness of the smooth + approximations to ReLU. + ignored: Not used. + + Returns: + A dict mapping kwarg names of the constructor to values. + """ + return { + **construct_inputs_qNEI( + model=model, + training_data=training_data, + objective=objective, + posterior_transform=posterior_transform, + X_pending=X_pending, + sampler=sampler, + X_baseline=X_baseline, + prune_baseline=prune_baseline, + cache_root=cache_root, + constraint=constraints, + eta=eta, + ), + "fat": fat, + "tau_max": tau_max, + "tau_relu": tau_relu, } @@ -566,7 +737,9 @@ def construct_inputs_qPI( sampler: Optional[MCSampler] = None, tau: float = 1e-3, best_f: Optional[Union[float, Tensor]] = None, - **kwargs: Any, + constraints: Optional[List[Callable[[Tensor], Tensor]]] = None, + eta: Union[Tensor, float] = 1e-3, + **ignored: Any, ) -> Dict[str, Any]: r"""Construct kwargs for the `qProbabilityOfImprovement` constructor. @@ -588,13 +761,26 @@ def construct_inputs_qPI( best_f: The best objective value observed so far (assumed noiseless). Can be a `batch_shape`-shaped tensor, which in case of a batched model specifies potentially different values for each element of the batch. - kwargs: Not used. + constraints: A list of constraint callables which map a Tensor of posterior + samples of dimension `sample_shape x batch-shape x q x m`-dim to a + `sample_shape x batch-shape x q`-dim Tensor. The associated constraints + are considered satisfied if the output is less than zero. + eta: Temperature parameter(s) governing the smoothness of the sigmoid + approximation to the constraint indicators. For more details, on this + parameter, see the docs of `compute_smoothed_feasibility_indicator`. + ignored: Not used. + Returns: A dict mapping kwarg names of the constructor to values. """ if best_f is None: - best_f = get_best_f_mc(training_data=training_data, objective=objective) - + best_f = get_best_f_mc( + training_data=training_data, + objective=objective, + posterior_transform=posterior_transform, + constraints=constraints, + model=model, + ) base_inputs = _construct_inputs_mc_base( model=model, objective=objective, @@ -603,7 +789,13 @@ def construct_inputs_qPI( X_pending=X_pending, ) - return {**base_inputs, "tau": tau, "best_f": best_f} + return { + **base_inputs, + "tau": tau, + "best_f": best_f, + "constraints": constraints, + "eta": eta, + } @acqf_input_constructor(qUpperConfidenceBound) @@ -615,7 +807,7 @@ def construct_inputs_qUCB( X_pending: Optional[Tensor] = None, sampler: Optional[MCSampler] = None, beta: float = 0.2, - **kwargs: Any, + **ignored: Any, ) -> Dict[str, Any]: r"""Construct kwargs for the `qUpperConfidenceBound` constructor. @@ -631,7 +823,7 @@ def construct_inputs_qUCB( sampler: The sampler used to draw base samples. If omitted, uses the acquisition functions's default sampler. beta: Controls tradeoff between mean and standard deviation in UCB. - kwargs: Not used. + ignored: Not used. Returns: A dict mapping kwarg names of the constructor to values. @@ -1083,18 +1275,28 @@ def get_best_f_mc( training_data: MaybeDict[SupervisedDataset], objective: Optional[MCAcquisitionObjective] = None, posterior_transform: Optional[PosteriorTransform] = None, + constraints: Optional[List[Callable[[Tensor], Tensor]]] = None, + model: Optional[Model] = None, ) -> Tensor: if isinstance(training_data, dict) and not _field_is_shared( training_data, fieldname="X" ): raise NotImplementedError("Currently only block designs are supported.") + X_baseline = _get_dataset_field( + training_data, + fieldname="X", + transform=lambda field: field(), + assert_shared=True, + first_only=True, + ) + Y = _get_dataset_field( training_data, fieldname="Y", transform=lambda field: field(), join_rule=lambda field_tensors: torch.cat(field_tensors, dim=-1), - ) + ) # batch_shape x n x d if posterior_transform is not None: # retain the original tensor dimension since objective expects explicit @@ -1111,7 +1313,16 @@ def get_best_f_mc( "acquisition functions)." ) objective = IdentityMCObjective() - return objective(Y).max(-1).values + obj = objective(Y, X=X_baseline) # batch_shape x n + return compute_best_feasible_objective( + samples=Y, + obj=obj, + constraints=constraints, + model=model, + objective=objective, + posterior_transform=posterior_transform, + X_baseline=X_baseline, + ) def optimize_objective( diff --git a/botorch/acquisition/logei.py b/botorch/acquisition/logei.py new file mode 100644 index 0000000000..e12228c1a5 --- /dev/null +++ b/botorch/acquisition/logei.py @@ -0,0 +1,521 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +r""" +Batch implementations of the LogEI family of improvements-based acquisition functions. +""" + +from __future__ import annotations + +from copy import deepcopy + +from functools import partial + +from typing import Any, Callable, List, Optional, Tuple, TypeVar, Union + +import torch +from botorch.acquisition.cached_cholesky import CachedCholeskyMCAcquisitionFunction +from botorch.acquisition.monte_carlo import SampleReducingMCAcquisitionFunction +from botorch.acquisition.objective import ( + ConstrainedMCObjective, + MCAcquisitionObjective, + PosteriorTransform, +) +from botorch.acquisition.utils import ( + compute_best_feasible_objective, + prune_inferior_points, +) +from botorch.exceptions.errors import BotorchError +from botorch.models.model import Model +from botorch.sampling.base import MCSampler +from botorch.utils.safe_math import ( + fatmax, + log_fatplus, + log_softplus, + logmeanexp, + smooth_amax, +) +from botorch.utils.transforms import match_batch_shape +from torch import Tensor + +""" +NOTE: On the default temperature parameters: + +tau_relu: It is generally important to set `tau_relu` to be very small, in particular, +smaller than the expected improvement value. Otherwise, the optimization can stagnate. +By setting `tau_relu=1e-6` by default, stagnation is exceedingly unlikely to occur due +to the smooth ReLU approximation for practical applications of BO. +IDEA: We could consider shrinking `tau_relu` with the progression of the optimization. + +tau_max: This is only relevant for the batch (`q > 1`) case, and `tau_max=1e-2` is +sufficient to get a good approximation to the maximum improvement in the batch of +candidates. If `fat=False`, the smooth approximation to the maximum can saturate +numerically. It is therefore recommended to use `fat=True` when optimizing batches +of `q > 1` points. +""" +TAU_RELU = 1e-6 +TAU_MAX = 1e-2 +FloatOrTensor = TypeVar("FloatOrTensor", float, Tensor) + + +class LogImprovementMCAcquisitionFunction(SampleReducingMCAcquisitionFunction): + r""" + Abstract base class for Monte-Carlo-based batch LogEI acquisition functions. + + :meta private: + """ + + _log: bool = True + + def __init__( + self, + model: Model, + sampler: Optional[MCSampler] = None, + objective: Optional[MCAcquisitionObjective] = None, + posterior_transform: Optional[PosteriorTransform] = None, + X_pending: Optional[Tensor] = None, + constraints: Optional[List[Callable[[Tensor], Tensor]]] = None, + eta: Union[Tensor, float] = 1e-3, + fat: bool = True, + tau_max: float = TAU_MAX, + ) -> None: + r""" + Args: + model: A fitted model. + sampler: The sampler used to draw base samples. If not given, + a sampler is generated using `get_sampler`. + NOTE: For posteriors that do not support base samples, + a sampler compatible with intended use case must be provided. + See `ForkedRNGSampler` and `StochasticSampler` as examples. + objective: The MCAcquisitionObjective under which the samples are + evaluated. Defaults to `IdentityMCObjective()`. + posterior_transform: A PosteriorTransform (optional). + X_pending: A `batch_shape, m x d`-dim Tensor of `m` design points + that have points that have been submitted for function evaluation + but have not yet been evaluated. + constraints: A list of constraint callables which map a Tensor of posterior + samples of dimension `sample_shape x batch-shape x q x m`-dim to a + `sample_shape x batch-shape x q`-dim Tensor. The associated constraints + are satisfied if `constraint(samples) < 0`. + eta: Temperature parameter(s) governing the smoothness of the sigmoid + approximation to the constraint indicators. See the docs of + `compute_(log_)constraint_indicator` for more details on this parameter. + fat: Toggles the logarithmic / linear asymptotic behavior of the smooth + approximation to the ReLU. + tau_max: Temperature parameter controlling the sharpness of the + approximation to the `max` operator over the `q` candidate points. + """ + if isinstance(objective, ConstrainedMCObjective): + raise BotorchError( + "Log-Improvement should not be used with `ConstrainedMCObjective`." + "Please pass the `constraints` directly to the constructor of the " + "acquisition function." + ) + q_reduction = partial(fatmax if fat else smooth_amax, tau=tau_max) + super().__init__( + model=model, + sampler=sampler, + objective=objective, + posterior_transform=posterior_transform, + X_pending=X_pending, + sample_reduction=logmeanexp, + q_reduction=q_reduction, + constraints=constraints, + eta=eta, + fat=fat, + ) + self.tau_max = tau_max + + +class qLogExpectedImprovement(LogImprovementMCAcquisitionFunction): + r"""MC-based batch Log Expected Improvement. + + This computes qLogEI by + (1) sampling the joint posterior over q points, + (2) evaluating the smoothed log improvement over the current best for each sample, + (3) smoothly maximizing over q, and + (4) averaging over the samples in log space. + + `qLogEI(X) ~ log(qEI(X)) = log(E(max(max Y - best_f, 0)))`, + + where `Y ~ f(X)`, and `X = (x_1,...,x_q)`. + + Example: + >>> model = SingleTaskGP(train_X, train_Y) + >>> best_f = train_Y.max()[0] + >>> sampler = SobolQMCNormalSampler(1024) + >>> qLogEI = qLogExpectedImprovement(model, best_f, sampler) + >>> qei = qLogEI(test_X) + """ + + def __init__( + self, + model: Model, + best_f: Union[float, Tensor], + sampler: Optional[MCSampler] = None, + objective: Optional[MCAcquisitionObjective] = None, + posterior_transform: Optional[PosteriorTransform] = None, + X_pending: Optional[Tensor] = None, + constraints: Optional[List[Callable[[Tensor], Tensor]]] = None, + eta: Union[Tensor, float] = 1e-3, + fat: bool = True, + tau_max: float = TAU_MAX, + tau_relu: float = TAU_RELU, + ) -> None: + r"""q-Log Expected Improvement. + + Args: + model: A fitted model. + best_f: The best objective value observed so far (assumed noiseless). Can be + a `batch_shape`-shaped tensor, which in case of a batched model + specifies potentially different values for each element of the batch. + sampler: The sampler used to draw base samples. See `MCAcquisitionFunction` + more details. + objective: The MCAcquisitionObjective under which the samples are evaluated. + Defaults to `IdentityMCObjective()`. + posterior_transform: A PosteriorTransform (optional). + X_pending: A `m x d`-dim Tensor of `m` design points that have been + submitted for function evaluation but have not yet been evaluated. + Concatenated into `X` upon forward call. Copied and set to have no + gradient. + constraints: A list of constraint callables which map a Tensor of posterior + samples of dimension `sample_shape x batch-shape x q x m`-dim to a + `sample_shape x batch-shape x q`-dim Tensor. The associated constraints + are satisfied if `constraint(samples) < 0`. + eta: Temperature parameter(s) governing the smoothness of the sigmoid + approximation to the constraint indicators. See the docs of + `compute_(log_)smoothed_constraint_indicator` for details. + fat: Toggles the logarithmic / linear asymptotic behavior of the smooth + approximation to the ReLU. + tau_max: Temperature parameter controlling the sharpness of the smooth + approximations to max. + tau_relu: Temperature parameter controlling the sharpness of the smooth + approximations to ReLU. + """ + super().__init__( + model=model, + sampler=sampler, + objective=objective, + posterior_transform=posterior_transform, + X_pending=X_pending, + constraints=constraints, + eta=eta, + tau_max=check_tau(tau_max, name="tau_max"), + fat=fat, + ) + self.register_buffer("best_f", torch.as_tensor(best_f)) + self.tau_relu = check_tau(tau_relu, name="tau_relu") + + def _sample_forward(self, obj: Tensor) -> Tensor: + r"""Evaluate qLogExpectedImprovement on the candidate set `X`. + + Args: + obj: `mc_shape x batch_shape x q`-dim Tensor of MC objective values. + + Returns: + A `mc_shape x batch_shape x q`-dim Tensor of expected improvement values. + """ + li = _log_improvement( + Y=obj, + best_f=self.best_f, + tau=self.tau_relu, + fat=self._fat, + ) + return li + + +class qLogNoisyExpectedImprovement( + LogImprovementMCAcquisitionFunction, CachedCholeskyMCAcquisitionFunction +): + r"""MC-based batch Log Noisy Expected Improvement. + + This function does not assume a `best_f` is known (which would require + noiseless observations). Instead, it uses samples from the joint posterior + over the `q` test points and previously observed points. A smooth approximation + to the canonical improvement over previously observed points is computed + for each sample and the logarithm of the average is returned. + + `qLogNEI(X) ~ log(qNEI(X)) = Log E(max(max Y - max Y_baseline, 0))`, where + `(Y, Y_baseline) ~ f((X, X_baseline)), X = (x_1,...,x_q)` + + Example: + >>> model = SingleTaskGP(train_X, train_Y) + >>> sampler = SobolQMCNormalSampler(1024) + >>> qLogNEI = qLogNoisyExpectedImprovement(model, train_X, sampler) + >>> acqval = qLogNEI(test_X) + """ + + def __init__( + self, + model: Model, + X_baseline: Tensor, + sampler: Optional[MCSampler] = None, + objective: Optional[MCAcquisitionObjective] = None, + posterior_transform: Optional[PosteriorTransform] = None, + X_pending: Optional[Tensor] = None, + constraints: Optional[List[Callable[[Tensor], Tensor]]] = None, + eta: Union[Tensor, float] = 1e-3, + fat: bool = True, + prune_baseline: bool = False, + cache_root: bool = True, + tau_max: float = TAU_MAX, + tau_relu: float = TAU_RELU, + **kwargs: Any, + ) -> None: + r"""q-Noisy Expected Improvement. + + Args: + model: A fitted model. + X_baseline: A `batch_shape x r x d`-dim Tensor of `r` design points + that have already been observed. These points are considered as + the potential best design point. + sampler: The sampler used to draw base samples. See `MCAcquisitionFunction` + more details. + objective: The MCAcquisitionObjective under which the samples are + evaluated. Defaults to `IdentityMCObjective()`. + posterior_transform: A PosteriorTransform (optional). + X_pending: A `batch_shape x m x d`-dim Tensor of `m` design points + that have points that have been submitted for function evaluation + but have not yet been evaluated. Concatenated into `X` upon + forward call. Copied and set to have no gradient. + constraints: A list of constraint callables which map a Tensor of posterior + samples of dimension `sample_shape x batch-shape x q x m`-dim to a + `sample_shape x batch-shape x q`-dim Tensor. The associated constraints + are satisfied if `constraint(samples) < 0`. + eta: Temperature parameter(s) governing the smoothness of the sigmoid + approximation to the constraint indicators. See the docs of + `compute_(log_)smoothed_constraint_indicator` for details. + fat: Toggles the logarithmic / linear asymptotic behavior of the smooth + approximation to the ReLU. + prune_baseline: If True, remove points in `X_baseline` that are + highly unlikely to be the best point. This can significantly + improve performance and is generally recommended. In order to + customize pruning parameters, instead manually call + `botorch.acquisition.utils.prune_inferior_points` on `X_baseline` + before instantiating the acquisition function. + cache_root: A boolean indicating whether to cache the root + decomposition over `X_baseline` and use low-rank updates. + tau_max: Temperature parameter controlling the sharpness of the smooth + approximations to max. + tau_relu: Temperature parameter controlling the sharpness of the smooth + approximations to ReLU. + kwargs: Here for qNEI for compatibility. + + TODO: similar to qNEHVI, when we are using sequential greedy candidate + selection, we could incorporate pending points X_baseline and compute + the incremental q(Log)NEI from the new point. This would greatly increase + efficiency for large batches. + """ + # TODO: separate out baseline variables initialization and other functions + # in qNEI to avoid duplication of both code and work at runtime. + super().__init__( + model=model, + sampler=sampler, + objective=objective, + posterior_transform=posterior_transform, + X_pending=X_pending, + constraints=constraints, + eta=eta, + fat=fat, + tau_max=tau_max, + ) + self.tau_relu = tau_relu + self._init_baseline( + model=model, + X_baseline=X_baseline, + sampler=sampler, + objective=objective, + posterior_transform=posterior_transform, + prune_baseline=prune_baseline, + cache_root=cache_root, + **kwargs, + ) + + def _sample_forward(self, obj: Tensor) -> Tensor: + r"""Evaluate qLogNoisyExpectedImprovement per sample on the candidate set `X`. + + Args: + obj: `mc_shape x batch_shape x q`-dim Tensor of MC objective values. + + Returns: + A `sample_shape x batch_shape x q`-dim Tensor of log noisy expected smoothed + improvement values. + """ + return _log_improvement( + Y=obj, + best_f=self.compute_best_f(obj), + tau=self.tau_relu, + fat=self._fat, + ) + + def _init_baseline( + self, + model: Model, + X_baseline: Tensor, + sampler: Optional[MCSampler] = None, + objective: Optional[MCAcquisitionObjective] = None, + posterior_transform: Optional[PosteriorTransform] = None, + prune_baseline: bool = False, + cache_root: bool = True, + **kwargs: Any, + ) -> None: + # setup of CachedCholeskyMCAcquisitionFunction + self._setup(model=model, cache_root=cache_root) + if prune_baseline: + X_baseline = prune_inferior_points( + model=model, + X=X_baseline, + objective=objective, + posterior_transform=posterior_transform, + marginalize_dim=kwargs.get("marginalize_dim"), + ) + self.register_buffer("X_baseline", X_baseline) + # registering buffers for _get_samples_and_objectives in the next `if` block + self.register_buffer("baseline_samples", None) + self.register_buffer("baseline_obj", None) + if self._cache_root: + self.q_in = -1 + # set baseline samples + with torch.no_grad(): # this is _get_samples_and_objectives(X_baseline) + posterior = self.model.posterior( + X_baseline, posterior_transform=self.posterior_transform + ) + # Note: The root decomposition is cached in two different places. It + # may be confusing to have two different caches, but this is not + # trivial to change since each is needed for a different reason: + # - LinearOperator caching to `posterior.mvn` allows for reuse within + # this function, which may be helpful if the same root decomposition + # is produced by the calls to `self.base_sampler` and + # `self._cache_root_decomposition`. + # - self._baseline_L allows a root decomposition to be persisted outside + # this method. + self.baseline_samples = self.get_posterior_samples(posterior) + self.baseline_obj = self.objective(self.baseline_samples, X=X_baseline) + + # We make a copy here because we will write an attribute `base_samples` + # to `self.base_sampler.base_samples`, and we don't want to mutate + # `self.sampler`. + self.base_sampler = deepcopy(self.sampler) + self.register_buffer( + "_baseline_best_f", + self._compute_best_feasible_objective( + samples=self.baseline_samples, obj=self.baseline_obj + ), + ) + self._baseline_L = self._compute_root_decomposition(posterior=posterior) + + def compute_best_f(self, obj: Tensor) -> Tensor: + """Computes the best (feasible) noisy objective value. + + Args: + obj: `sample_shape x batch_shape x q`-dim Tensor of objectives in forward. + + Returns: + A `sample_shape x batch_shape x 1`-dim Tensor of best feasible objectives. + """ + if self._cache_root: + val = self._baseline_best_f + else: + val = self._compute_best_feasible_objective( + samples=self.baseline_samples, obj=self.baseline_obj + ) + # ensuring shape, dtype, device compatibility with obj + n_sample_dims = len(self.sample_shape) + view_shape = torch.Size( + [ + *val.shape[:n_sample_dims], # sample dimensions + *(1,) * (obj.ndim - val.ndim), # pad to match obj + *val.shape[n_sample_dims:], # the rest + ] + ) + return val.view(view_shape).to(obj) + + def _get_samples_and_objectives(self, X: Tensor) -> Tuple[Tensor, Tensor]: + r"""Compute samples at new points, using the cached root decomposition. + + Args: + X: A `batch_shape x q x d`-dim tensor of inputs. + + Returns: + A two-tuple `(samples, obj)`, where `samples` is a tensor of posterior + samples with shape `sample_shape x batch_shape x q x m`, and `obj` is a + tensor of MC objective values with shape `sample_shape x batch_shape x q`. + """ + n_baseline, q = self.X_baseline.shape[-2], X.shape[-2] + X_full = torch.cat([match_batch_shape(self.X_baseline, X), X], dim=-2) + # TODO: Implement more efficient way to compute posterior over both training and + # test points in GPyTorch (https://github.com/cornellius-gp/gpytorch/issues/567) + posterior = self.model.posterior( + X_full, posterior_transform=self.posterior_transform + ) + if not self._cache_root: + samples_full = super().get_posterior_samples(posterior) + obj_full = self.objective(samples_full, X=X_full) + # assigning baseline buffers so `best_f` can be computed in _sample_forward + self.baseline_samples, samples = samples_full.split([n_baseline, q], dim=-2) + self.baseline_obj, obj = obj_full.split([n_baseline, q], dim=-1) + return samples, obj + + # handle one-to-many input transforms + n_plus_q = X_full.shape[-2] + n_w = posterior._extended_shape()[-2] // n_plus_q + q_in = q * n_w + self._set_sampler(q_in=q_in, posterior=posterior) + samples = self._get_f_X_samples(posterior=posterior, q_in=q_in) + obj = self.objective(samples, X=X_full[..., -q:, :]) + return samples, obj + + def _compute_best_feasible_objective(self, samples: Tensor, obj: Tensor) -> Tensor: + return compute_best_feasible_objective( + samples=samples, + obj=obj, + constraints=self._constraints, + model=self.model, + objective=self.objective, + posterior_transform=self.posterior_transform, + X_baseline=self.X_baseline, + ) + + +""" +###################################### utils ########################################## +""" + + +def _log_improvement( + Y: Tensor, + best_f: Tensor, + tau: Union[float, Tensor], + fat: bool, +) -> Tensor: + """Computes the logarithm of the softplus-smoothed improvement, i.e. + `log_softplus(Y - best_f, beta=(1 / tau))`. + Note that softplus is an approximation to the regular ReLU objective whose maximum + pointwise approximation error is linear with respect to tau as tau goes to zero. + + Args: + obj: `mc_samples x batch_shape x q`-dim Tensor of output samples. + best_f: Best previously observed objective value(s), broadcastable with `obj`. + tau: Temperature parameter for smooth approximation of ReLU. + as `tau -> 0`, maximum pointwise approximation error is linear w.r.t. `tau`. + fat: Toggles the logarithmic / linear asymptotic behavior of the + smooth approximation to ReLU. + + Returns: + A `mc_samples x batch_shape x q`-dim Tensor of improvement values. + """ + log_soft_clamp = log_fatplus if fat else log_softplus + Z = Y - best_f.to(Y) + return log_soft_clamp(Z, tau=tau) # ~ ((Y - best_f) / Y_std).clamp(0) + + +def check_tau(tau: FloatOrTensor, name: str) -> FloatOrTensor: + """Checks the validity of the tau arguments of the functions below, and returns + `tau` if it is valid.""" + if isinstance(tau, Tensor) and tau.numel() != 1: + raise ValueError(name + f" is not a scalar: {tau.numel() = }.") + if not (tau > 0): + raise ValueError(name + f" is non-positive: {tau = }.") + return tau diff --git a/botorch/acquisition/monte_carlo.py b/botorch/acquisition/monte_carlo.py index 330c4bdcef..5d58f8d332 100644 --- a/botorch/acquisition/monte_carlo.py +++ b/botorch/acquisition/monte_carlo.py @@ -27,7 +27,6 @@ from typing import Any, Callable, List, Optional, Protocol, Tuple, Union import torch -from botorch import acquisition from botorch.acquisition.acquisition import AcquisitionFunction, MCSamplerMixin from botorch.acquisition.cached_cholesky import CachedCholeskyMCAcquisitionFunction from botorch.acquisition.objective import ( @@ -36,11 +35,14 @@ MCAcquisitionObjective, PosteriorTransform, ) -from botorch.acquisition.utils import prune_inferior_points +from botorch.acquisition.utils import ( + compute_best_feasible_objective, + prune_inferior_points, +) from botorch.exceptions.errors import UnsupportedError from botorch.models.model import Model from botorch.sampling.base import MCSampler -from botorch.utils.objective import compute_smoothed_constraint_indicator +from botorch.utils.objective import compute_smoothed_feasibility_indicator from botorch.utils.transforms import ( concatenate_pending_points, match_batch_shape, @@ -168,6 +170,8 @@ class SampleReducingMCAcquisitionFunction(MCAcquisitionFunction): forward pass. These problems are circumvented by the design of this class. """ + _log: bool = False # whether the acquisition utilities are in log-space + def __init__( self, model: Model, @@ -179,6 +183,7 @@ def __init__( q_reduction: SampleReductionProtocol = torch.amax, constraints: Optional[List[Callable[[Tensor], Tensor]]] = None, eta: Union[Tensor, float] = 1e-3, + fat: bool = False, ): r"""Constructor of SampleReducingMCAcquisitionFunction. @@ -213,7 +218,9 @@ def __init__( acquistion utilities, e.g. all improvement-based acquisition functions. eta: Temperature parameter(s) governing the smoothness of the sigmoid approximation to the constraint indicators. For more details, on this - parameter, see the docs of `compute_smoothed_constraint_indicator`. + parameter, see the docs of `compute_smoothed_feasibility_indicator`. + fat: Wether to apply a fat-tailed smooth approximation to the feasibility + indicator or the canonical sigmoid approximation. """ if constraints is not None and isinstance(objective, ConstrainedMCObjective): raise ValueError( @@ -234,6 +241,7 @@ def __init__( self._q_reduction = partial(q_reduction, dim=-1) self._constraints = constraints self._eta = eta + self._fat = fat @concatenate_pending_points @t_batch_mode_transform() @@ -255,10 +263,22 @@ def forward(self, X: Tensor) -> Tensor: A Tensor with shape `batch_shape'`, where `batch_shape'` is the broadcasted batch shape of model and input `X`. """ + non_reduced_acqval = self._non_reduced_forward(X=X) + return self._sample_reduction(self._q_reduction(non_reduced_acqval)) + + def _non_reduced_forward(self, X: Tensor) -> Tensor: + """Compute the constrained acquisition values at the MC-sample, q level. + + Args: + X: A `batch_shape x q x d` Tensor of t-batches with `q` `d`-dim + design points each. + + Returns: + A Tensor with shape `sample_sample x batch_shape x q`. + """ samples, obj = self._get_samples_and_objectives(X) acqval = self._sample_forward(obj) # `sample_sample x batch_shape x q` - weighted_acqval = self._apply_constraints(acqval=acqval, samples=samples) - return self._sample_reduction(self._q_reduction(weighted_acqval)) + return self._apply_constraints(acqval=acqval, samples=samples) @abstractmethod def _sample_forward(self, obj: Tensor) -> Tensor: @@ -286,14 +306,19 @@ def _apply_constraints(self, acqval: Tensor, samples: Tensor) -> Tensor: multiplied by a smoothed constraint indicator per sample. """ if self._constraints is not None: - if (acqval < 0).any(): + if not self._log and (acqval < 0).any(): raise ValueError( "Constraint-weighting requires unconstrained " "acquisition values to be non-negative." ) - acqval = acqval * compute_smoothed_constraint_indicator( - constraints=self._constraints, samples=samples, eta=self._eta + ind = compute_smoothed_feasibility_indicator( + constraints=self._constraints, + samples=samples, + eta=self._eta, + log=self._log, + fat=self._fat, ) + acqval = acqval.add(ind) if self._log else acqval.mul(ind) return acqval @@ -352,7 +377,7 @@ def __init__( are considered satisfied if the output is less than zero. eta: Temperature parameter(s) governing the smoothness of the sigmoid approximation to the constraint indicators. For more details, on this - parameter, see the docs of `compute_smoothed_constraint_indicator`. + parameter, see the docs of `compute_smoothed_feasibility_indicator`. """ super().__init__( model=model, @@ -443,7 +468,7 @@ def __init__( are considered satisfied if the output is less than zero. eta: Temperature parameter(s) governing the smoothness of the sigmoid approximation to the constraint indicators. For more details, on this - parameter, see the docs of `compute_smoothed_constraint_indicator`. + parameter, see the docs of `compute_smoothed_feasibility_indicator`. TODO: similar to qNEHVI, when we are using sequential greedy candidate selection, we could incorporate pending points X_baseline and compute @@ -579,7 +604,8 @@ def _get_samples_and_objectives(self, X: Tensor) -> Tuple[Tensor, Tensor]: return samples, obj def _compute_best_feasible_objective(self, samples: Tensor, obj: Tensor) -> Tensor: - """ + r"""Computes best feasible objective value from samples. + Args: samples: `sample_shape x batch_shape x q x m`-dim posterior samples. obj: A `sample_shape x batch_shape x q`-dim Tensor of MC objective values. @@ -587,38 +613,15 @@ def _compute_best_feasible_objective(self, samples: Tensor, obj: Tensor) -> Tens Returns: A `sample_shape x batch_shape x 1`-dim Tensor of best feasible objectives. """ - if self._constraints is not None: - # is_feasible is sample_shape x batch_shape x q - is_feasible = compute_smoothed_constraint_indicator( - constraints=self._constraints, samples=samples, eta=self._eta - ) - is_feasible = is_feasible > 0.5 # due to smooth approximation - if is_feasible.any(): - obj = torch.where(is_feasible, obj, -torch.inf) - else: # if there are no feasible observations, estimate a lower - # bound on the objective by sampling convex combinations of X_baseline. - convex_weights = torch.rand( - 32, - self.X_baseline.shape[-2], - dtype=self.X_baseline.dtype, - device=self.X_baseline.device, - ) - weights_sum = convex_weights.sum(dim=0, keepdim=True) - convex_weights = convex_weights / weights_sum - # infeasible cost M is such that -M < min_x f(x), thus - # 0 < min_x f(x) - (-M), so we should take -M as a lower - # bound on the best feasible objective - return -acquisition.utils.get_infeasible_cost( - X=convex_weights @ self.X_baseline, - model=self.model, - objective=self.objective, - posterior_transform=self.posterior_transform, - ).expand(*obj.shape[:-1], 1) - - # we don't need to differentiate through X_baseline for now, so taking - # the regular max over the n points to get best_f is fine - with torch.no_grad(): - return obj.amax(dim=-1, keepdim=True) + return compute_best_feasible_objective( + samples=samples, + obj=obj, + constraints=self._constraints, + model=self.model, + objective=self.objective, + posterior_transform=self.posterior_transform, + X_baseline=self.X_baseline, + ) class qProbabilityOfImprovement(SampleReducingMCAcquisitionFunction): @@ -679,7 +682,7 @@ def __init__( scalar is less than zero. eta: Temperature parameter(s) governing the smoothness of the sigmoid approximation to the constraint indicators. For more details, on this - parameter, see the docs of `compute_smoothed_constraint_indicator`. + parameter, see the docs of `compute_smoothed_feasibility_indicator`. """ super().__init__( model=model, diff --git a/botorch/acquisition/multi_objective/monte_carlo.py b/botorch/acquisition/multi_objective/monte_carlo.py index 8c4e857c1b..36e119fcae 100644 --- a/botorch/acquisition/multi_objective/monte_carlo.py +++ b/botorch/acquisition/multi_objective/monte_carlo.py @@ -57,7 +57,7 @@ from botorch.utils.multi_objective.box_decompositions.utils import ( _pad_batch_pareto_frontier, ) -from botorch.utils.objective import compute_smoothed_constraint_indicator +from botorch.utils.objective import compute_smoothed_feasibility_indicator from botorch.utils.torch import BufferDict from botorch.utils.transforms import ( concatenate_pending_points, @@ -279,7 +279,7 @@ def _compute_qehvi(self, samples: Tensor, X: Optional[Tensor] = None) -> Tensor: obj = self.objective(samples, X=X) q = obj.shape[-2] if self.constraints is not None: - feas_weights = compute_smoothed_constraint_indicator( + feas_weights = compute_smoothed_feasibility_indicator( constraints=self.constraints, samples=samples, eta=self.eta ) # `sample_shape x batch-shape x q` self._cache_q_subset_indices(q_out=q) @@ -414,7 +414,7 @@ def __init__( tensor the length of the tensor must match the number of provided constraints. The i-th constraint is then estimated with the i-th eta value. For more details, on this parameter, see the docs of - `compute_smoothed_constraint_indicator`. + `compute_smoothed_feasibility_indicator`. prune_baseline: If True, remove points in `X_baseline` that are highly unlikely to be the pareto optimal and better than the reference point. This can significantly improve computation time and diff --git a/botorch/acquisition/penalized.py b/botorch/acquisition/penalized.py index 6b929a8c96..304c3c2f8a 100644 --- a/botorch/acquisition/penalized.py +++ b/botorch/acquisition/penalized.py @@ -176,6 +176,29 @@ def __call__(self, X: Tensor) -> Tensor: return nnz_approx(X=X, target_point=self.target_point, a=self.a) +class L0PenaltyApprox(L0Approximation): + r"""Differentiable relaxation of the L0 norm to be added to any arbitrary + acquisition function to construct a PenalizedAcquisitionFunction.""" + + def __init__(self, target_point: Tensor, a: float = 1.0, **tkwargs: Any) -> None: + r"""Initializing L0 penalty with differentiable relaxation. + + Args: + target_point: A tensor corresponding to the target point. + a: A hyperparameter that controls the differentiable relaxation. + """ + super().__init__(target_point=target_point, a=a, **tkwargs) + + def __call__(self, X: Tensor) -> Tensor: + r""" + Args: + X: A "batch_shape x q x dim" representing the points to be evaluated. + Returns: + A tensor of size "batch_shape" representing the acqfn for each q-batch. + """ + return super().__call__(X=X).squeeze(dim=-1).min(dim=-1).values + + class PenalizedAcquisitionFunction(AcquisitionFunction): r"""Single-outcome acquisition function regularized by the given penalty. @@ -297,6 +320,7 @@ def __init__( objective: Callable[[Tensor, Optional[Tensor]], Tensor], penalty_objective: torch.nn.Module, regularization_parameter: float, + expand_dim: Optional[int] = None, ) -> None: r"""Penalized MC objective. @@ -309,10 +333,13 @@ def __init__( `batch-shape x q x d`-dim Tensor `X` and outputs a `1 x batch-shape x q`-dim Tensor of penalty objective values. regularization_parameter: weight of the penalty (regularization) term + expand_dim: dim to expand penalty_objective to match with objective when + fully bayesian model is used. If None, no expansion is performed. """ super().__init__(objective=objective) self.penalty_objective = penalty_objective self.regularization_parameter = regularization_parameter + self.expand_dim = expand_dim def forward(self, samples: Tensor, X: Optional[Tensor] = None) -> Tensor: r"""Evaluate the penalized objective on the samples. @@ -329,4 +356,36 @@ def forward(self, samples: Tensor, X: Optional[Tensor] = None) -> Tensor: """ obj = super().forward(samples=samples, X=X) penalty_obj = self.penalty_objective(X) + # when fully bayesian model is used, we pass unmarginalize_dim to match the + # shape between obj `sample_shape x batch-shape x mcmc_samples x q` and + # penalty_obj `1 x batch-shape x q` + if self.expand_dim is not None: + # reshape penalty_obj to match the dim + penalty_obj = penalty_obj.unsqueeze(self.expand_dim) return obj - self.regularization_parameter * penalty_obj + + +class L0PenaltyApproxObjective(L0Approximation): + r"""Differentiable relaxation of the L0 norm penalty objective class. + An instance of this class can be added to any arbitrary objective to + construct a PenalizedMCObjective. + """ + + def __init__(self, target_point: Tensor, a: float = 1.0, **tkwargs: Any) -> None: + r"""Initializing L0 penalty with differentiable relaxation. + + Args: + target_point: A tensor corresponding to the target point. + a: A hyperparameter that controls the differentiable relaxation. + """ + super().__init__(target_point=target_point, a=a, **tkwargs) + + def __call__(self, X: Tensor) -> Tensor: + r""" + Args: + X: A "batch_shape x q x dim" representing the points to be evaluated. + Returns: + A "1 x batch_shape x q" tensor representing the penalty for each point. + The first dimension corresponds to the dimension of MC samples. + """ + return super().__call__(X=X).squeeze(dim=-1).unsqueeze(dim=0) diff --git a/botorch/acquisition/prior_guided.py b/botorch/acquisition/prior_guided.py index 72d3407631..fd5fe97d11 100644 --- a/botorch/acquisition/prior_guided.py +++ b/botorch/acquisition/prior_guided.py @@ -20,6 +20,8 @@ from typing import Optional from botorch.acquisition.acquisition import AcquisitionFunction +from botorch.acquisition.monte_carlo import SampleReducingMCAcquisitionFunction +from botorch.utils.transforms import concatenate_pending_points, t_batch_mode_transform from torch import Tensor from torch.nn import Module @@ -28,6 +30,9 @@ class PriorGuidedAcquisitionFunction(AcquisitionFunction): r"""Class for weighting acquisition functions by a prior distribution. + Supports MC and batch acquisition functions via + SampleReducingAcquisitionFunction. + See [Hvarfner2022]_ for details. """ @@ -37,6 +42,7 @@ def __init__( prior_module: Module, log: bool = False, prior_exponent: float = 1.0, + X_pending: Optional[Tensor] = None, ) -> None: r"""Initialize the prior-guided acquisition function. @@ -44,36 +50,50 @@ def __init__( acq_function: The base acquisition function. prior_module: A Module that computes the probability (or log probability) for the provided inputs. + `prior_module.forward` should take a `batch_shape x q`-dim + tensor of inputs and return a `batch_shape x q`-dim tensor + of probabilities. log: A boolean that should be true if the acquisition function emits a log-transformed value and the prior module emits a log probability. prior_exponent: The exponent applied to the prior. This can be used for example to decay the effect the prior over time as in [Hvarfner2022]_. + X_pending: `n x d` Tensor with `n` `d`-dim design points that have + been submitted for evaluation but have not yet been evaluated. """ - Module.__init__(self) + super().__init__(model=acq_function.model) self.acq_func = acq_function self.prior_module = prior_module self._log = log self._prior_exponent = prior_exponent + self._is_sample_reducing_af = isinstance( + acq_function, SampleReducingMCAcquisitionFunction + ) + self.set_X_pending(X_pending=X_pending) - @property - def X_pending(self): - r"""Return the `X_pending` of the base acquisition function.""" - try: - return self.acq_func.X_pending - except (ValueError, AttributeError): - raise ValueError( - f"Base acquisition function {type(self.acq_func).__name__} " - "does not have an `X_pending` attribute." - ) - - @X_pending.setter - def X_pending(self, X_pending: Optional[Tensor]): - r"""Sets the `X_pending` of the base acquisition function.""" - self.acq_func.X_pending = X_pending - + @concatenate_pending_points + @t_batch_mode_transform() def forward(self, X: Tensor) -> Tensor: r"""Compute the acquisition function weighted by the prior.""" + # batch_shape x q + prior = self.prior_module(X) + if self._is_sample_reducing_af: + # sample_shape x batch_shape x q + af_val = self.acq_func._non_reduced_forward(X) + else: + if prior.shape[-1] > 1: + raise NotImplementedError( + "q-batches with q>1 are only supported using " + "SampleReducingMCAcquisitionFunction." + ) + # batch_shape x q + af_val = self.acq_func(X).unsqueeze(-1) if self._log: - return self.acq_func(X) + self.prior_module(X) * self._prior_exponent - return self.acq_func(X) * self.prior_module(X).pow(self._prior_exponent) + weighted_af_val = af_val + prior * self._prior_exponent + else: + weighted_af_val = af_val * prior.pow(self._prior_exponent) + if self._is_sample_reducing_af: + return self.acq_func._sample_reduction( + self.acq_func._q_reduction(weighted_af_val) + ) + return weighted_af_val.squeeze(-1) # squeeze q-dim diff --git a/botorch/acquisition/utils.py b/botorch/acquisition/utils.py index 595bd6750c..a683896840 100644 --- a/botorch/acquisition/utils.py +++ b/botorch/acquisition/utils.py @@ -32,6 +32,7 @@ FastNondominatedPartitioning, NondominatedPartitioning, ) +from botorch.utils.objective import compute_feasibility_indicator from botorch.utils.sampling import optimize_posterior_samples from botorch.utils.transforms import is_fully_bayesian from torch import Tensor @@ -100,10 +101,19 @@ def get_acquisition_function( ) # instantiate and return the requested acquisition function if acquisition_function_name in ("qEI", "qPI"): - obj = objective( - model.posterior(X_observed, posterior_transform=posterior_transform).mean + # Since these are the non-noisy variants, use the posterior mean at the observed + # inputs directly to compute the best feasible value without sampling. + Y = model.posterior(X_observed, posterior_transform=posterior_transform).mean + obj = objective(samples=Y, X=X_observed) + best_f = compute_best_feasible_objective( + samples=Y, + obj=obj, + constraints=constraints, + model=model, + objective=objective, + posterior_transform=posterior_transform, + X_baseline=X_observed, ) - best_f = obj.max(dim=-1).values if acquisition_function_name == "qEI": return monte_carlo.qExpectedImprovement( model=model, @@ -112,6 +122,8 @@ def get_acquisition_function( objective=objective, posterior_transform=posterior_transform, X_pending=X_pending, + constraints=constraints, + eta=eta, ) elif acquisition_function_name == "qPI": return monte_carlo.qProbabilityOfImprovement( @@ -122,6 +134,8 @@ def get_acquisition_function( posterior_transform=posterior_transform, X_pending=X_pending, tau=kwargs.get("tau", 1e-3), + constraints=constraints, + eta=eta, ) elif acquisition_function_name == "qNEI": return monte_carlo.qNoisyExpectedImprovement( @@ -134,6 +148,8 @@ def get_acquisition_function( prune_baseline=kwargs.get("prune_baseline", True), marginalize_dim=kwargs.get("marginalize_dim"), cache_root=kwargs.get("cache_root", True), + constraints=constraints, + eta=eta, ) elif acquisition_function_name == "qSR": return monte_carlo.qSimpleRegret( @@ -213,6 +229,113 @@ def get_acquisition_function( ) +def compute_best_feasible_objective( + samples: Tensor, + obj: Tensor, + constraints: Optional[List[Callable[[Tensor], Tensor]]], + model: Optional[Model] = None, + objective: Optional[MCAcquisitionObjective] = None, + posterior_transform: Optional[PosteriorTransform] = None, + X_baseline: Optional[Tensor] = None, + infeasible_obj: Optional[Tensor] = None, +) -> Tensor: + """Computes the largest `obj` value that is feasible under the `constraints`. If + `constraints` is None, returns the best unconstrained objective value. + + When no feasible observations exist and `infeasible_obj` is not `None`, returns + `infeasible_obj` (potentially reshaped). When no feasible observations exist and + `infeasible_obj` is `None`, uses `model`, `objective`, `posterior_transform`, and + `X_baseline` to infer and return an `infeasible_obj` `M` s.t. `M < min_x f(x)`. + + Args: + samples: `(sample_shape) x batch_shape x q x m`-dim posterior samples. + obj: A `(sample_shape) x batch_shape x q`-dim Tensor of MC objective values. + constraints: A list of constraint callables which map posterior samples to + a scalar. The associated constraint is considered satisfied if this + scalar is less than zero. + model: A Model, only required when there are no feasible observations. + objective: An MCAcquisitionObjective, only optionally used when there are no + feasible observations. + posterior_transform: A PosteriorTransform, only optionally used when there are + no feasible observations. + X_baseline: A `batch_shape x d`-dim Tensor of baseline points, only required + when there are no feasible observations. + infeasible_obj: A Tensor to be returned when no feasible points exist. + + Returns: + A `(sample_shape) x batch_shape x 1`-dim Tensor of best feasible objectives. + """ + if constraints is None: # unconstrained case + # we don't need to differentiate through X_baseline for now, so taking + # the regular max over the n points to get best_f is fine + with torch.no_grad(): + return obj.amax(dim=-1, keepdim=True) + + is_feasible = compute_feasibility_indicator( + constraints=constraints, samples=samples + ) # sample_shape x batch_shape x q + if is_feasible.any(): + obj = torch.where(is_feasible, obj, -torch.inf) + with torch.no_grad(): + return obj.amax(dim=-1, keepdim=True) + + elif infeasible_obj is not None: + return infeasible_obj.expand(*obj.shape[:-1], 1) + + else: + if model is None: + raise ValueError( + "Must specify `model` when no feasible observation exists." + ) + if X_baseline is None: + raise ValueError( + "Must specify `X_baseline` when no feasible observation exists." + ) + return _estimate_objective_lower_bound( + model=model, + objective=objective, + posterior_transform=posterior_transform, + X=X_baseline, + ).expand(*obj.shape[:-1], 1) + + +def _estimate_objective_lower_bound( + model: Model, + objective: Optional[MCAcquisitionObjective], + posterior_transform: Optional[PosteriorTransform], + X: Tensor, +) -> Tensor: + """Estimates a lower bound on the objective values by evaluating the model at convex + combinations of `X`, returning the 6-sigma lower bound of the computed statistics. + + Args: + model: A fitted model. + objective: An MCAcquisitionObjective with `m` outputs. + posterior_transform: A PosteriorTransform. + X: A `n x d`-dim Tensor of design points from which to draw convex combinations. + + Returns: + A `m`-dimensional Tensor of lower bounds of the objectives. + """ + convex_weights = torch.rand( + 32, + X.shape[-2], + dtype=X.dtype, + device=X.device, + ) + weights_sum = convex_weights.sum(dim=0, keepdim=True) + convex_weights = convex_weights / weights_sum + # infeasible cost M is such that -M < min_x f(x), thus + # 0 < min_x f(x) - (-M), so we should take -M as a lower + # bound on the best feasible objective + return -get_infeasible_cost( + X=convex_weights @ X, + model=model, + objective=objective, + posterior_transform=posterior_transform, + ) + + def get_infeasible_cost( X: Tensor, model: Model, diff --git a/botorch/models/model.py b/botorch/models/model.py index 0b5112ef0b..8b20ff0edd 100644 --- a/botorch/models/model.py +++ b/botorch/models/model.py @@ -32,10 +32,11 @@ import numpy as np import torch from botorch import settings -from botorch.exceptions.errors import InputDataError +from botorch.exceptions.errors import BotorchTensorDimensionError, InputDataError from botorch.models.utils.assorted import fantasize as fantasize_flag from botorch.posteriors import Posterior, PosteriorList from botorch.sampling.base import MCSampler +from botorch.sampling.list_sampler import ListSampler from botorch.utils.datasets import BotorchDataset from botorch.utils.transforms import is_fully_bayesian from torch import Tensor @@ -327,6 +328,19 @@ def fantasize( Returns: The constructed fantasy model. """ + # if the inputs are empty, expand the inputs + if X.shape[-2] == 0: + output_shape = ( + sampler.sample_shape + + X.shape[:-2] + + self.batch_shape + + torch.Size([0, self.num_outputs]) + ) + return self.condition_on_observations( + X=self.transform_inputs(X), + Y=torch.empty(output_shape, dtype=X.dtype, device=X.device), + **kwargs, + ) propagate_grads = kwargs.pop("propagate_grads", False) with fantasize_flag(): with settings.propagate_grads(propagate_grads): @@ -529,6 +543,72 @@ def load_state_dict( m.load_state_dict(filtered_dict) super().load_state_dict(state_dict=state_dict, strict=strict) + def fantasize( + self, + X: Tensor, + sampler: MCSampler, + observation_noise: bool = True, + evaluation_mask: Optional[Tensor] = None, + **kwargs: Any, + ) -> Model: + r"""Construct a fantasy model. + + Constructs a fantasy model in the following fashion: + (1) compute the model posterior at `X` (including observation noise if + `observation_noise=True`). + (2) sample from this posterior (using `sampler`) to generate "fake" + observations. + (3) condition the model on the new fake observations. + + Args: + X: A `batch_shape x n' x d`-dim Tensor, where `d` is the dimension of + the feature space, `n'` is the number of points per batch, and + `batch_shape` is the batch shape (must be compatible with the + batch shape of the model). + sampler: The sampler used for sampling from the posterior at `X`. If + evaluation_mask is not None, this must be a `ListSampler`. + observation_noise: If True, include observation noise. + evaluation_mask: A `n' x m`-dim tensor of booleans indicating which + outputs should be fantasized for a given design. This uses the same + evaluation mask for all batches. + + Returns: + The constructed fantasy model. + """ + if evaluation_mask is not None: + if evaluation_mask.ndim != 2 or evaluation_mask.shape != torch.Size( + [X.shape[-2], self.num_outputs] + ): + raise BotorchTensorDimensionError( + f"Expected evaluation_mask of shape `{X.shape[0]} " + f"x {self.num_outputs}`, but got `" + f"{' x '.join(str(i) for i in evaluation_mask.shape)}`." + ) + if not isinstance(sampler, ListSampler): + raise ValueError("Decoupled fantasization requires a list of samplers.") + + fant_models = [] + X_i = X + for i in range(self.num_outputs): + # get the inputs to fantasize at for output i + if evaluation_mask is not None: + mask_i = evaluation_mask[:, i] + X_i = X[..., mask_i, :] + # TODO (T158701749): implement a QMC DecoupledSampler that draws all + # samples from a single Sobol sequence or consider requiring that the + # sampling is IID to ensure good coverage. + sampler_i = sampler.samplers[i] + else: + sampler_i = sampler + fant_model = self.models[i].fantasize( + X=X_i, + sampler=sampler_i, + observation_noise=observation_noise, + **kwargs, + ) + fant_models.append(fant_model) + return self.__class__(*fant_models) + class ModelDict(ModuleDict): r"""A lightweight container mapping model names to models.""" diff --git a/botorch/optim/optimize.py b/botorch/optim/optimize.py index 3431f3e32f..5ec71767ba 100644 --- a/botorch/optim/optimize.py +++ b/botorch/optim/optimize.py @@ -305,14 +305,13 @@ def _optimize_batch_candidates() -> Tuple[Tensor, Tensor, List[Warning]]: # only add parameter constraints to gen_kwargs if they are specified # to avoid unnecessary warnings in _filter_kwargs - if opt_inputs.inequality_constraints is not None: - gen_kwargs["inequality_constraints"] = opt_inputs.inequality_constraints - if opt_inputs.equality_constraints is not None: - gen_kwargs["equality_constraints"] = opt_inputs.equality_constraints - if opt_inputs.nonlinear_inequality_constraints is not None: - gen_kwargs[ - "nonlinear_inequality_constraints" - ] = opt_inputs.nonlinear_inequality_constraints + for constraint_name in [ + "inequality_constraints", + "equality_constraints", + "nonlinear_inequality_constraints", + ]: + if (constraint := getattr(opt_inputs, constraint_name)) is not None: + gen_kwargs[constraint_name] = constraint filtered_gen_kwargs = _filter_kwargs(opt_inputs.gen_candidates, **gen_kwargs) diff --git a/botorch/utils/datasets.py b/botorch/utils/datasets.py index c33a040d32..bfe12aa047 100644 --- a/botorch/utils/datasets.py +++ b/botorch/utils/datasets.py @@ -37,28 +37,29 @@ def __call__(cls, *args: Any, **kwargs: Any): r"""Converts Tensor-valued fields to DenseContainer under the assumption that said fields house collections of feature vectors.""" hints = get_type_hints(cls) - f_iter = filter(lambda f: f.init, fields(cls)) + fields_iter = (item for item in fields(cls) if item.init is not None) f_dict = {} - for obj, f in chain( - zip(args, f_iter), ((kwargs.pop(f.name, MISSING), f) for f in f_iter) + for value, field in chain( + zip(args, fields_iter), + ((kwargs.pop(field.name, MISSING), field) for field in fields_iter), ): - if obj is MISSING: - if f.default is not MISSING: - obj = f.default - elif f.default_factory is not MISSING: - obj = f.default_factory() + if value is MISSING: + if field.default is not MISSING: + value = field.default + elif field.default_factory is not MISSING: + value = field.default_factory() else: - raise RuntimeError(f"Missing required field `{f.name}`.") + raise RuntimeError(f"Missing required field `{field.name}`.") - if issubclass(hints[f.name], BotorchContainer): - if isinstance(obj, Tensor): - obj = DenseContainer(obj, event_shape=obj.shape[-1:]) - elif not isinstance(obj, BotorchContainer): + if issubclass(hints[field.name], BotorchContainer): + if isinstance(value, Tensor): + value = DenseContainer(value, event_shape=value.shape[-1:]) + elif not isinstance(value, BotorchContainer): raise TypeError( - f"Expected for field `{f.name}` " - f"but was {type(obj)}." + "Expected for field " + f"`{field.name}` but was {type(value)}." ) - f_dict[f.name] = obj + f_dict[field.name] = value return super().__call__(**f_dict, **kwargs) diff --git a/botorch/utils/objective.py b/botorch/utils/objective.py index 0738812287..c7ce67794d 100644 --- a/botorch/utils/objective.py +++ b/botorch/utils/objective.py @@ -10,9 +10,12 @@ from __future__ import annotations +import warnings + from typing import Callable, List, Optional, Union import torch +from botorch.utils.safe_math import log_fatmoid, logexpit from torch import Tensor @@ -87,7 +90,7 @@ def apply_constraints_nonnegative_soft( Returns: A `n_samples x b x q (x m')`-dim tensor of feasibility-weighted objectives. """ - w = compute_smoothed_constraint_indicator( + w = compute_smoothed_feasibility_indicator( constraints=constraints, samples=samples, eta=eta ) if obj.dim() == samples.dim(): @@ -95,14 +98,42 @@ def apply_constraints_nonnegative_soft( return obj.clamp_min(0).mul(w) # Enforce non-negativity of obj, apply constraints. -def compute_smoothed_constraint_indicator( +def compute_feasibility_indicator( + constraints: Optional[List[Callable[[Tensor], Tensor]]], + samples: Tensor, +) -> Tensor: + r"""Computes the feasibility of a list of constraints given posterior samples. + + Args: + constraints: A list of callables, each mapping a batch_shape x q x m`-dim Tensor + to a `batch_shape x q`-dim Tensor, where negative values imply feasibility. + samples: A batch_shape x q x m`-dim Tensor of posterior samples. + + Returns: + A `batch_shape x q`-dim tensor of Boolean feasibility values. + """ + ind = torch.ones(samples.shape[:-1], dtype=torch.bool, device=samples.device) + if constraints is not None: + for constraint in constraints: + ind = ind.logical_and(constraint(samples) < 0) + return ind + + +def compute_smoothed_feasibility_indicator( constraints: List[Callable[[Tensor], Tensor]], samples: Tensor, eta: Union[Tensor, float], + log: bool = False, + fat: bool = False, ) -> Tensor: - r"""Computes the feasibility indicator of a list of constraints given posterior - samples, using a sigmoid to smoothly approximate the feasibility indicator - of each individual constraint to ensure differentiability and high gradient signal. + r"""Computes the smoothed feasibility indicator of a list of constraints. + + Given posterior samples, using a sigmoid to smoothly approximate the feasibility + indicator of each individual constraint to ensure differentiability and high + gradient signal. The `fat` and `log` options improve the numerical behavior of + the smooth approximation. + + NOTE: *Negative* constraint values are associated with feasibility. Args: constraints: A list of callables, each mapping a Tensor of size `b x q x m` @@ -115,6 +146,8 @@ def compute_smoothed_constraint_indicator( constraint in constraints. In case of a tensor the length of the tensor must match the number of provided constraints. The i-th constraint is then estimated with the i-th eta value. + log: Toggles the computation of the log-feasibility indicator. + fat: Toggles the computation of the fat-tailed feasibility indicator. Returns: A `n_samples x b x q`-dim tensor of feasibility indicator values. @@ -125,14 +158,17 @@ def compute_smoothed_constraint_indicator( raise ValueError( "Number of provided constraints and number of provided etas do not match." ) - is_feasible = torch.ones_like(samples[..., 0]) + if not (eta > 0).all(): + raise ValueError("eta must be positive.") + is_feasible = torch.zeros_like(samples[..., 0]) + log_sigmoid = log_fatmoid if fat else logexpit for constraint, e in zip(constraints, eta): - w = soft_eval_constraint(constraint(samples), eta=e) - is_feasible = is_feasible.mul(w) # TODO: add log version. + is_feasible = is_feasible + log_sigmoid(-constraint(samples) / e) - return is_feasible + return is_feasible if log else is_feasible.exp() +# TODO: deprecate this function def soft_eval_constraint(lhs: Tensor, eta: float = 1e-3) -> Tensor: r"""Element-wise evaluation of a constraint in a 'soft' fashion @@ -148,8 +184,13 @@ def soft_eval_constraint(lhs: Tensor, eta: float = 1e-3) -> Tensor: For each element `x`, `value(x) -> 0` as `x` becomes positive, and `value(x) -> 1` as x becomes negative. """ + warnings.warn( + "`soft_eval_constraint` is deprecated. Please consider `torch.utils.sigmoid` " + + "with its `fat` and `log` options to compute feasibility indicators.", + DeprecationWarning, + ) if eta <= 0: - raise ValueError("eta must be positive") + raise ValueError("eta must be positive.") return torch.sigmoid(-lhs / eta) diff --git a/botorch/utils/probability/mvnxpb.py b/botorch/utils/probability/mvnxpb.py index ee479fa40f..4d6edb3539 100644 --- a/botorch/utils/probability/mvnxpb.py +++ b/botorch/utils/probability/mvnxpb.py @@ -176,18 +176,20 @@ def solve(self, num_steps: Optional[int] = None, eps: float = 1e-10) -> Tensor: if pivot is not None and torch.any(pivot > i): self.pivot_(pivot=pivot) - # Initialize `i`-th plug-in value as univariate conditional expectation + # Compute whitened bounds conditional on preceding plug-ins Lii = L[..., i, i].clone() if should_update_chol: - Lii = Lii.clip(min=0).sqrt() + Lii = Lii.clip(min=0).sqrt() # conditional stddev inv_Lii = Lii.reciprocal() - if i == 0: - lb, ub = bounds[..., i, :].clone().unbind(dim=-1) - else: - db = (L[..., i, :i].clone() * y[..., :i].clone()).sum(-1, keepdim=True) - lb, ub = (bounds[..., i, :].clone() - db).unbind(dim=-1) + bounds_i = bounds[..., i, :].clone() + if i != 0: + bounds_i = bounds_i - torch.sum( + L[..., i, :i].clone() * y[..., :i].clone(), dim=-1, keepdim=True + ) + lb, ub = (inv_Lii.unsqueeze(-1) * bounds_i).unbind(dim=-1) - Phi_i = Phi(inv_Lii * ub) - Phi(inv_Lii * lb) + # Initialize `i`-th plug-in value as univariate conditional expectation + Phi_i = Phi(ub) - Phi(lb) small = Phi_i <= i * eps y[..., i] = case_dispatcher( # used to select next pivot out=(phi(lb) - phi(ub)) / Phi_i, @@ -224,7 +226,7 @@ def solve(self, num_steps: Optional[int] = None, eps: float = 1e-10) -> Tensor: # Replace 1D expectations with 2D ones `L[blk, blk]^{-1} y[..., blk]` mask = blk_prob > zero y[..., h] = torch.where(mask, zh, zero) - y[..., i] = torch.where(mask, (std_i * zi - Lih * zh) / Lii, zero) + y[..., i] = torch.where(mask, inv_Lii * (std_i * zi - Lih * zh), zero) # Update running approximation to log probability self.log_prob = self.log_prob + safe_log(blk_prob) diff --git a/botorch/utils/safe_math.py b/botorch/utils/safe_math.py index 83a2155a68..72b2951773 100644 --- a/botorch/utils/safe_math.py +++ b/botorch/utils/safe_math.py @@ -20,10 +20,13 @@ from typing import Tuple, Union import torch +from botorch.exceptions import UnsupportedError from botorch.utils.constants import get_constants_like from torch import finfo, Tensor +from torch.nn.functional import softplus _log2 = math.log(2) +_inv_sqrt_3 = math.sqrt(1 / 3) # Unary ops @@ -76,6 +79,23 @@ def log1mexp(x: Tensor) -> Tensor: ) +def log1pexp(x: Tensor) -> Tensor: + """Numerically accurate evaluation of log(1 + exp(x)). + See [Maechler2012accurate]_ for details. + """ + mask = x <= 18 + return torch.where( + mask, + (lambda z: z.exp().log1p())(x.masked_fill(~mask, 0)), + (lambda z: z + (-z).exp())(x.masked_fill(mask, 0)), + ) + + +def logexpit(X: Tensor) -> Tensor: + """Computes the logarithm of the expit (a.k.a. sigmoid) function.""" + return -log1pexp(-X) + + def logdiffexp(log_a: Tensor, log_b: Tensor) -> Tensor: """Computes log(b - a) accurately given log(a) and log(b). Assumes, log_b > log_a, i.e. b > a > 0. @@ -93,7 +113,7 @@ def logdiffexp(log_a: Tensor, log_b: Tensor) -> Tensor: def logmeanexp( X: Tensor, dim: Union[int, Tuple[int, ...]], keepdim: bool = False ) -> Tensor: - """Computes log(mean(exp(X), dim=dim, keepdim=keepdim)). + """Computes `log(mean(exp(X), dim=dim, keepdim=keepdim))`. Args: X: Values of which to compute the logmeanexp. @@ -101,7 +121,156 @@ def logmeanexp( keepdim: If True, keeps the reduced dimensions. Returns: - A Tensor of values corresponding to log(mean(exp(X), dim=dim)). + A Tensor of values corresponding to `log(mean(exp(X), dim=dim))`. """ n = X.shape[dim] if isinstance(dim, int) else math.prod(X.shape[i] for i in dim) return torch.logsumexp(X, dim=dim, keepdim=keepdim) - math.log(n) + + +def log_softplus(x: Tensor, tau: Union[float, Tensor] = 1.0) -> Tensor: + """Computes the logarithm of the softplus function with high numerical accuracy. + + Args: + x: Input tensor, should have single or double precision floats. + tau: Decreasing tau increases the tightness of the + approximation to ReLU. Non-negative and defaults to 1.0. + + Returns: + Tensor corresponding to `log(softplus(x))`. + """ + check_dtype_float32_or_float64(x) + tau = torch.as_tensor(tau, dtype=x.dtype, device=x.device) + # cutoff chosen to achieve accuracy to machine epsilon + upper = 16 if x.dtype == torch.float32 else 32 + lower = -15 if x.dtype == torch.float32 else -35 + mask = x / tau > lower + return torch.where( + mask, + softplus(x.masked_fill(~mask, lower), beta=(1 / tau), threshold=upper).log(), + x / tau + tau.log(), + ) + + +def smooth_amax(X: Tensor, tau: Union[float, Tensor] = 1e-3, dim: int = -1) -> Tensor: + """Computes a smooth approximation to `max(X, dim=dim)`, i.e the maximum value of + `X` over dimension `dim`, using the logarithm of the `l_(1/tau)` norm of `exp(X)`. + Note that when `X = log(U)` is the *logarithm* of an acquisition utility `U`, + + `logsumexp(log(U) / tau) * tau = log(sum(U^(1/tau))^tau) = log(norm(U, ord=(1/tau))` + + Args: + X: A Tensor from which to compute the smoothed amax. + tau: Temperature parameter controlling the smooth approximation + to max operator, becomes tighter as tau goes to 0. Needs to be positive. + + Returns: + A Tensor of smooth approximations to `max(X, dim=dim)`. + """ + # consider normalizing by log_n = math.log(X.shape[dim]) to reduce error + return torch.logsumexp(X / tau, dim=dim) * tau # ~ X.amax(dim=dim) + + +def check_dtype_float32_or_float64(X: Tensor) -> None: + if X.dtype != torch.float32 and X.dtype != torch.float64: + raise UnsupportedError( + f"Only dtypes float32 and float64 are supported, but received {X.dtype}." + ) + + +def log_fatplus(x: Tensor, tau: Union[float, Tensor] = 1.0) -> Tensor: + """Computes the logarithm of the fat-tailed softplus. + + NOTE: Separated out in case the complexity of the `log` implementation increases + in the future. + """ + return fatplus(x, tau=tau).log() + + +def fatplus(x: Tensor, tau: Union[float, Tensor] = 1.0) -> Tensor: + """Computes a fat-tailed approximation to `ReLU(x) = max(x, 0)` by linearly + combining a regular softplus function and the density function of a Cauchy + distribution. The coefficient `alpha` of the Cauchy density is chosen to guarantee + monotonicity and convexity. + + Args: + x: A Tensor on whose values to compute the smoothed function. + + Returns: + A Tensor of values of the fat-tailed softplus. + """ + + def _fatplus(x: Tensor) -> Tensor: + alpha = 1e-1 # guarantees monotonicity and convexity (TODO: ref + Lemma 4) + return softplus(x) + alpha * cauchy(x) + + return tau * _fatplus(x / tau) + + +def fatmax(X: Tensor, dim: int, tau: Union[float, Tensor] = 1.0) -> Tensor: + """Computes a smooth approximation to amax(X, dim=dim) with a fat tail. + + Args: + X: A Tensor from which to compute the smoothed amax. + tau: Temperature parameter controlling the smooth approximation + to max operator, becomes tighter as tau goes to 0. Needs to be positive. + standardize: Toggles the temperature standardization of the smoothed function. + + Returns: + A Tensor of smooth approximations to `max(X, dim=dim)` with a fat tail. + """ + if X.shape[dim] == 1: + return X.squeeze(dim) + + M = X.amax(dim=dim, keepdim=True) + Y = (X - M) / tau # NOTE: this would cause NaNs when X has Infs. + M = M.squeeze(dim) + return M + tau * cauchy(Y).sum(dim=dim).log() # could change to mean + + +def log_fatmoid(X: Tensor, tau: Union[float, Tensor] = 1.0) -> Tensor: + """Computes the logarithm of the fatmoid. Separated out in case the implementation + of the logarithm becomes more complex in the future to ensure numerical stability. + """ + return fatmoid(X, tau=tau).log() + + +def fatmoid(X: Tensor, tau: Union[float, Tensor] = 1.0) -> Tensor: + """Computes a twice continuously differentiable approximation to the Heaviside + step function with a fat tail, i.e. `O(1 / x^2)` as `x` goes to -inf. + + Args: + X: A Tensor from which to compute the smoothed step function. + tau: Temperature parameter controlling the smoothness of the approximation. + + Returns: + A tensor of fat-tailed approximations to the Heaviside step function. + """ + X = X / tau + m = _inv_sqrt_3 # this defines the inflection point + return torch.where( + X < 0, + 2 / 3 * cauchy(X - m), + 1 - 2 / 3 * cauchy(X + m), + ) + + +def cauchy(x: Tensor) -> Tensor: + """Computes a Lorentzian, i.e. an un-normalized Cauchy density function.""" + return 1 / (1 + x.square()) + + +def sigmoid(X: Tensor, log: bool = False, fat: bool = False) -> Tensor: + """A sigmoid function with an optional fat tail and evaluation in log space for + better numerical behavior. Notably, the fat-tailed sigmoid can be used to remedy + numerical underflow problems in the value and gradient of the canonical sigmoid. + + Args: + X: The Tensor on which to evaluate the sigmoid. + log: Toggles the evaluation of the log sigmoid. + fat: Toggles the evaluation of the fat-tailed sigmoid. + + Returns: + A Tensor of (log-)sigmoid values. + """ + Y = log_fatmoid(X) if fat else logexpit(X) + return Y if log else Y.exp() diff --git a/sphinx/source/acquisition.rst b/sphinx/source/acquisition.rst index 3aae7e277b..c46c906a89 100644 --- a/sphinx/source/acquisition.rst +++ b/sphinx/source/acquisition.rst @@ -60,6 +60,9 @@ Monte-Carlo Acquisition Functions :members: :exclude-members: MCAcquisitionFunction +.. automodule:: botorch.acquisition.logei + :members: + Multi-Objective Analytic Acquisition Functions ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. automodule:: botorch.acquisition.multi_objective.analytic diff --git a/sphinx/source/optim.rst b/sphinx/source/optim.rst index e6b9b3e11a..d3bf9e2db8 100644 --- a/sphinx/source/optim.rst +++ b/sphinx/source/optim.rst @@ -36,6 +36,11 @@ Stopping Criteria .. automodule:: botorch.optim.stopping :members: +Acquisition Function Optimization with Homotopy +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. automodule:: botorch.optim.optimize_homotopy + :members: + Closures ------------------------------------------- @@ -87,3 +92,8 @@ Parameter Constraint Utilities ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. automodule:: botorch.optim.parameter_constraints :members: + +Homotopy Utilities +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. automodule:: botorch.optim.homotopy + :members: diff --git a/test/acquisition/test_fixed_feature.py b/test/acquisition/test_fixed_feature.py index 8dcc02f1df..1f5218d228 100644 --- a/test/acquisition/test_fixed_feature.py +++ b/test/acquisition/test_fixed_feature.py @@ -6,14 +6,18 @@ import torch from botorch.acquisition.analytic import ExpectedImprovement -from botorch.acquisition.fixed_feature import FixedFeatureAcquisitionFunction +from botorch.acquisition.fixed_feature import ( + FixedFeatureAcquisitionFunction, + get_device_of_sequence, + get_dtype_of_sequence, +) from botorch.acquisition.monte_carlo import qExpectedImprovement from botorch.models import SingleTaskGP -from botorch.utils.testing import BotorchTestCase +from botorch.utils.testing import BotorchTestCase, MockAcquisitionFunction class TestFixedFeatureAcquisitionFunction(BotorchTestCase): - def test_fixed_features(self): + def test_fixed_features(self) -> None: train_X = torch.rand(5, 3, device=self.device) train_Y = train_X.norm(dim=-1, keepdim=True) model = SingleTaskGP(train_X, train_Y).to(device=self.device).eval() @@ -132,3 +136,63 @@ def test_fixed_features(self): ) with self.assertRaises(ValueError): EI_ff.X_pending + + def test_values_dtypes(self) -> None: + acqf = MockAcquisitionFunction() + + for input, d, expected_dtype in [ + (torch.tensor([0.0], dtype=torch.float32), 1, torch.float32), + (torch.tensor([0.0], dtype=torch.float64), 1, torch.float64), + ( + [ + torch.tensor([0.0], dtype=torch.float32), + torch.tensor([0.0], dtype=torch.float64), + ], + 2, + torch.float64, + ), + ([0.0], 1, torch.float64), + ([torch.tensor(0.0, dtype=torch.float32), 0.0], 2, torch.float64), + ]: + with self.subTest(input=input, d=d, expected_dtype=expected_dtype): + self.assertEqual(get_dtype_of_sequence(input), expected_dtype) + ff = FixedFeatureAcquisitionFunction( + acqf, d=d, columns=[2], values=input + ) + self.assertEqual(ff.values.dtype, expected_dtype) + + def test_values_devices(self) -> None: + + acqf = MockAcquisitionFunction() + cpu = torch.device("cpu") + cuda = torch.device("cuda") + + test_cases = [ + (torch.tensor([0.0], device=cpu), 1, cpu), + ([0.0], 1, cpu), + ([0.0, torch.tensor([0.0], device=cpu)], 2, cpu), + ] + + # Can only properly test this when running CUDA tests + if self.device == torch.cuda: + test_cases = test_cases + [ + (torch.tensor([0.0], device=cuda), 1, cuda), + ( + [ + torch.tensor([0.0], dtype=cpu), + torch.tensor([0.0], dtype=cuda), + ], + 2, + cuda, + ), + ([0.0], 1, cpu), + ([torch.tensor(0.0, dtype=cuda), 0.0], 2, cuda), + ] + + for input, d, expected_device in test_cases: + with self.subTest(input=input, d=d, expected_device=expected_device): + self.assertEqual(get_device_of_sequence(input), expected_device) + ff = FixedFeatureAcquisitionFunction( + acqf, d=d, columns=[2], values=input + ) + self.assertEqual(ff.values.device, expected_device) diff --git a/test/acquisition/test_input_constructors.py b/test/acquisition/test_input_constructors.py index dce2e5e5e7..ea8f95b81e 100644 --- a/test/acquisition/test_input_constructors.py +++ b/test/acquisition/test_input_constructors.py @@ -31,6 +31,12 @@ qKnowledgeGradient, qMultiFidelityKnowledgeGradient, ) +from botorch.acquisition.logei import ( + qLogExpectedImprovement, + qLogNoisyExpectedImprovement, + TAU_MAX, + TAU_RELU, +) from botorch.acquisition.max_value_entropy_search import ( qMaxValueEntropy, qMultiFidelityMaxValueEntropy, @@ -139,13 +145,13 @@ def test_get_best_f_mc(self): best_f = get_best_f_mc(training_data=self.blockX_multiY, objective=obj) multi_Y = torch.cat([d.Y() for d in self.blockX_multiY.values()], dim=-1) - best_f_expected = (multi_Y @ obj.weights).max() + best_f_expected = (multi_Y @ obj.weights).amax(dim=-1, keepdim=True) self.assertEqual(best_f, best_f_expected) post_tf = ScalarizedPosteriorTransform(weights=torch.ones(2)) best_f = get_best_f_mc( training_data=self.blockX_multiY, posterior_transform=post_tf ) - best_f_expected = (multi_Y.sum(dim=-1)).max() + best_f_expected = (multi_Y.sum(dim=-1)).amax(dim=-1, keepdim=True) self.assertEqual(best_f, best_f_expected) @mock.patch("botorch.acquisition.input_constructors.optimize_acqf") @@ -350,6 +356,9 @@ def test_construct_inputs_qEI(self): self.assertIsNone(kwargs["objective"]) self.assertIsNone(kwargs["X_pending"]) self.assertIsNone(kwargs["sampler"]) + self.assertIsNone(kwargs["constraints"]) + self.assertIsInstance(kwargs["eta"], float) + self.assertTrue(kwargs["eta"] < 1) X_pending = torch.rand(2, 2) objective = LinearMCObjective(torch.rand(2)) kwargs = c( @@ -362,6 +371,9 @@ def test_construct_inputs_qEI(self): self.assertTrue(torch.equal(kwargs["objective"].weights, objective.weights)) self.assertTrue(torch.equal(kwargs["X_pending"], X_pending)) self.assertIsNone(kwargs["sampler"]) + self.assertIsNone(kwargs["constraints"]) + self.assertIsInstance(kwargs["eta"], float) + self.assertTrue(kwargs["eta"] < 1) multi_Y = torch.cat([d.Y() for d in self.blockX_multiY.values()], dim=-1) best_f_expected = objective(multi_Y).max() self.assertEqual(kwargs["best_f"], best_f_expected) @@ -376,6 +388,23 @@ def test_construct_inputs_qEI(self): ) self.assertEqual(kwargs["best_f"], best_f_expected) + # testing qLogEI input constructor + log_constructor = get_acqf_input_constructor(qLogExpectedImprovement) + log_kwargs = log_constructor( + model=mock_model, + training_data=self.blockX_blockY, + objective=objective, + X_pending=X_pending, + best_f=best_f_expected, + ) + # includes strict superset of kwargs tested above + self.assertTrue(kwargs.items() <= log_kwargs.items()) + self.assertTrue("fat" in log_kwargs) + self.assertTrue("tau_max" in log_kwargs) + self.assertEqual(log_kwargs["tau_max"], TAU_MAX) + self.assertTrue("tau_relu" in log_kwargs) + self.assertEqual(log_kwargs["tau_relu"], TAU_RELU) + def test_construct_inputs_qNEI(self): c = get_acqf_input_constructor(qNoisyExpectedImprovement) mock_model = mock.Mock() @@ -386,6 +415,10 @@ def test_construct_inputs_qNEI(self): self.assertIsNone(kwargs["sampler"]) self.assertTrue(kwargs["prune_baseline"]) self.assertTrue(torch.equal(kwargs["X_baseline"], self.blockX_blockY[0].X())) + self.assertIsNone(kwargs["constraints"]) + self.assertIsInstance(kwargs["eta"], float) + self.assertTrue(kwargs["eta"] < 1) + with self.assertRaisesRegex(ValueError, "Field `X` must be shared"): c(model=mock_model, training_data=self.multiX_multiY) X_baseline = torch.rand(2, 2) @@ -401,6 +434,25 @@ def test_construct_inputs_qNEI(self): self.assertIsNone(kwargs["sampler"]) self.assertFalse(kwargs["prune_baseline"]) self.assertTrue(torch.equal(kwargs["X_baseline"], X_baseline)) + self.assertIsNone(kwargs["constraints"]) + self.assertIsInstance(kwargs["eta"], float) + self.assertTrue(kwargs["eta"] < 1) + + # testing qLogNEI input constructor + log_constructor = get_acqf_input_constructor(qLogNoisyExpectedImprovement) + log_kwargs = log_constructor( + model=mock_model, + training_data=self.blockX_blockY, + X_baseline=X_baseline, + prune_baseline=False, + ) + # includes strict superset of kwargs tested above + self.assertTrue(kwargs.items() <= log_kwargs.items()) + self.assertTrue("fat" in log_kwargs) + self.assertTrue("tau_max" in log_kwargs) + self.assertEqual(log_kwargs["tau_max"], TAU_MAX) + self.assertTrue("tau_relu" in log_kwargs) + self.assertEqual(log_kwargs["tau_relu"], TAU_RELU) def test_construct_inputs_qPI(self): c = get_acqf_input_constructor(qProbabilityOfImprovement) @@ -411,6 +463,9 @@ def test_construct_inputs_qPI(self): self.assertIsNone(kwargs["X_pending"]) self.assertIsNone(kwargs["sampler"]) self.assertEqual(kwargs["tau"], 1e-3) + self.assertIsNone(kwargs["constraints"]) + self.assertIsInstance(kwargs["eta"], float) + self.assertTrue(kwargs["eta"] < 1) X_pending = torch.rand(2, 2) objective = LinearMCObjective(torch.rand(2)) kwargs = c( @@ -425,6 +480,9 @@ def test_construct_inputs_qPI(self): self.assertTrue(torch.equal(kwargs["X_pending"], X_pending)) self.assertIsNone(kwargs["sampler"]) self.assertEqual(kwargs["tau"], 1e-2) + self.assertIsNone(kwargs["constraints"]) + self.assertIsInstance(kwargs["eta"], float) + self.assertTrue(kwargs["eta"] < 1) multi_Y = torch.cat([d.Y() for d in self.blockX_multiY.values()], dim=-1) best_f_expected = objective(multi_Y).max() self.assertEqual(kwargs["best_f"], best_f_expected) diff --git a/test/acquisition/test_logei.py b/test/acquisition/test_logei.py new file mode 100644 index 0000000000..83b66b8bf0 --- /dev/null +++ b/test/acquisition/test_logei.py @@ -0,0 +1,694 @@ +#!/usr/bin/env python3 +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import warnings +from copy import deepcopy +from itertools import product +from math import pi +from unittest import mock + +import torch +from botorch import settings +from botorch.acquisition import ( + LogImprovementMCAcquisitionFunction, + qLogExpectedImprovement, + qLogNoisyExpectedImprovement, +) +from botorch.acquisition.input_constructors import ACQF_INPUT_CONSTRUCTOR_REGISTRY +from botorch.acquisition.monte_carlo import ( + qExpectedImprovement, + qNoisyExpectedImprovement, +) + +from botorch.acquisition.objective import ( + ConstrainedMCObjective, + GenericMCObjective, + IdentityMCObjective, + PosteriorTransform, + ScalarizedPosteriorTransform, +) +from botorch.exceptions import BotorchWarning, UnsupportedError +from botorch.exceptions.errors import BotorchError +from botorch.models import SingleTaskGP +from botorch.sampling.normal import IIDNormalSampler, SobolQMCNormalSampler +from botorch.utils.low_rank import sample_cached_cholesky +from botorch.utils.testing import BotorchTestCase, MockModel, MockPosterior + +from botorch.utils.transforms import standardize +from torch import Tensor + + +def infeasible_con(samples: Tensor) -> Tensor: + return torch.ones_like(samples[..., 0]) + + +def feasible_con(samples: Tensor) -> Tensor: + return -torch.ones_like(samples[..., 0]) + + +class DummyLogImprovementAcquisitionFunction(LogImprovementMCAcquisitionFunction): + def _sample_forward(self, X): + pass + + +class DummyNonScalarizingPosteriorTransform(PosteriorTransform): + scalarize = False + + def evaluate(self, Y): + pass # pragma: no cover + + def forward(self, posterior): + pass # pragma: no cover + + +class TestLogImprovementAcquisitionFunction(BotorchTestCase): + def test_abstract_raises(self): + with self.assertRaises(TypeError): + LogImprovementMCAcquisitionFunction() + # raise if model is multi-output, but no outcome transform or objective + # are given + no = "botorch.utils.testing.MockModel.num_outputs" + with mock.patch(no, new_callable=mock.PropertyMock) as mock_num_outputs: + mock_num_outputs.return_value = 2 + mm = MockModel(MockPosterior()) + with self.assertRaises(UnsupportedError): + DummyLogImprovementAcquisitionFunction(model=mm) + # raise if model is multi-output, but outcome transform does not + # scalarize and no objetive is given + with mock.patch(no, new_callable=mock.PropertyMock) as mock_num_outputs: + mock_num_outputs.return_value = 2 + mm = MockModel(MockPosterior()) + ptf = DummyNonScalarizingPosteriorTransform() + with self.assertRaises(UnsupportedError): + DummyLogImprovementAcquisitionFunction( + model=mm, posterior_transform=ptf + ) + + mm = MockModel(MockPosterior()) + objective = ConstrainedMCObjective( + IdentityMCObjective(), + constraints=[lambda samples: torch.zeros_like(samples[..., 0])], + ) + with self.assertRaisesRegex( + BotorchError, + "Log-Improvement should not be used with `ConstrainedMCObjective`.", + ): + DummyLogImprovementAcquisitionFunction(model=mm, objective=objective) + + +class TestQLogExpectedImprovement(BotorchTestCase): + def test_q_log_expected_improvement(self): + self.assertIn(qLogExpectedImprovement, ACQF_INPUT_CONSTRUCTOR_REGISTRY.keys()) + for dtype in (torch.float, torch.double): + tkwargs = {"device": self.device, "dtype": dtype} + # the event shape is `b x q x t` = 1 x 1 x 1 + samples = torch.zeros(1, 1, 1, **tkwargs) + mm = MockModel(MockPosterior(samples=samples)) + # X is `q x d` = 1 x 1. X is a dummy and unused b/c of mocking + X = torch.zeros(1, 1, **tkwargs) + + # basic test + sampler = IIDNormalSampler(sample_shape=torch.Size([2])) + acqf = qExpectedImprovement(model=mm, best_f=0, sampler=sampler) + log_acqf = qLogExpectedImprovement(model=mm, best_f=0, sampler=sampler) + self.assertFalse(acqf._fat) # different default behavior + self.assertTrue(log_acqf._fat) + # test initialization + for k in ["objective", "sampler"]: + self.assertIn(k, acqf._modules) + self.assertIn(k, log_acqf._modules) + + res = acqf(X).item() + self.assertEqual(res, 0.0) + exp_log_res = log_acqf(X).exp().item() + # Due to the smooth approximation, the value at zero should be close to, but + # not exactly zero, and upper-bounded by the tau hyperparameter. + self.assertTrue(0 < exp_log_res) + self.assertTrue(exp_log_res <= log_acqf.tau_relu) + + # test shifting best_f value downward to see non-zero improvement + best_f = -1 + acqf = qExpectedImprovement(model=mm, best_f=best_f, sampler=sampler) + log_acqf = qLogExpectedImprovement(model=mm, best_f=best_f, sampler=sampler) + res, exp_log_res = acqf(X), log_acqf(X).exp() + expected_val = -best_f + + self.assertEqual(res.dtype, dtype) + self.assertEqual(res.device.type, self.device.type) + self.assertEqual(res.item(), expected_val) + # Further away from zero, the value is numerically indistinguishable with + # single precision arithmetic. + self.assertEqual(exp_log_res.dtype, dtype) + self.assertEqual(exp_log_res.device.type, self.device.type) + self.assertTrue(expected_val <= exp_log_res.item()) + self.assertTrue(exp_log_res.item() <= expected_val + log_acqf.tau_relu) + + # test shifting best_f value upward to see advantage of LogEI + best_f = 1 + acqf = qExpectedImprovement(model=mm, best_f=best_f, sampler=sampler) + log_acqf = qLogExpectedImprovement(model=mm, best_f=best_f, sampler=sampler) + res, log_res = acqf(X), log_acqf(X) + exp_log_res = log_res.exp() + expected_val = 0 + self.assertEqual(res.item(), expected_val) + self.assertTrue(expected_val <= exp_log_res.item()) + self.assertTrue(exp_log_res.item() <= expected_val + log_acqf.tau_relu) + # However, the log value is large and negative with non-vanishing gradients + self.assertGreater(-1, log_res.item()) + self.assertGreater(log_res.item(), -100) + + # NOTE: The following tests are adapted from the qEI tests. + # basic test, no resample + sampler = IIDNormalSampler(sample_shape=torch.Size([2]), seed=12345) + acqf = qLogExpectedImprovement(model=mm, best_f=0, sampler=sampler) + res = acqf(X) + self.assertTrue(0 < res.exp().item()) + self.assertTrue(res.exp().item() < acqf.tau_relu) + self.assertEqual(acqf.sampler.base_samples.shape, torch.Size([2, 1, 1, 1])) + bs = acqf.sampler.base_samples.clone() + res = acqf(X) + self.assertTrue(torch.equal(acqf.sampler.base_samples, bs)) + + # basic test, qmc + sampler = SobolQMCNormalSampler(sample_shape=torch.Size([2])) + acqf = qLogExpectedImprovement(model=mm, best_f=0, sampler=sampler) + res = acqf(X) + self.assertTrue(0 < res.exp().item()) + self.assertTrue(res.exp().item() < acqf.tau_relu) + self.assertEqual(acqf.sampler.base_samples.shape, torch.Size([2, 1, 1, 1])) + bs = acqf.sampler.base_samples.clone() + acqf(X) + self.assertTrue(torch.equal(acqf.sampler.base_samples, bs)) + + # basic test for X_pending and warning + acqf.set_X_pending() + self.assertIsNone(acqf.X_pending) + acqf.set_X_pending(None) + self.assertIsNone(acqf.X_pending) + acqf.set_X_pending(X) + self.assertEqual(acqf.X_pending, X) + mm._posterior._samples = torch.zeros(1, 2, 1, **tkwargs) + res = acqf(X) + X2 = torch.zeros(1, 1, 1, **tkwargs, requires_grad=True) + with warnings.catch_warnings(record=True) as ws, settings.debug(True): + acqf.set_X_pending(X2) + self.assertEqual(acqf.X_pending, X2) + self.assertEqual( + sum(issubclass(w.category, BotorchWarning) for w in ws), 1 + ) + + # testing with illegal taus + with self.assertRaisesRegex(ValueError, "tau_max is not a scalar:"): + qLogExpectedImprovement( + model=mm, best_f=0, tau_max=torch.tensor([1, 2]) + ) + with self.assertRaisesRegex(ValueError, "tau_relu is non-positive:"): + qLogExpectedImprovement(model=mm, best_f=0, tau_relu=-2) + + def test_q_log_expected_improvement_batch(self): + for dtype in (torch.float, torch.double): + # the event shape is `b x q x t` = 2 x 2 x 1 + samples = torch.zeros(2, 2, 1, device=self.device, dtype=dtype) + samples[0, 0, 0] = 1.0 + mm = MockModel(MockPosterior(samples=samples)) + + # X is a dummy and unused b/c of mocking + X = torch.zeros(2, 2, 1, device=self.device, dtype=dtype) + + # test batch mode + sampler = IIDNormalSampler(sample_shape=torch.Size([2])) + acqf = qLogExpectedImprovement(model=mm, best_f=0, sampler=sampler) + exp_log_res = acqf(X).exp() + # with no approximations (qEI): self.assertEqual(res[0].item(), 1.0) + # in the batch case, the values get adjusted toward + self.assertEqual(exp_log_res.dtype, dtype) + self.assertEqual(exp_log_res.device.type, self.device.type) + self.assertTrue(1.0 <= exp_log_res[0].item()) + self.assertTrue(exp_log_res[0].item() <= 1.0 + acqf.tau_relu) + # self.assertAllClose(exp_log_res[0], torch.ones_like(exp_log_res[0]), ) + + # with no approximations (qEI): self.assertEqual(res[1].item(), 0.0) + self.assertTrue(0 < exp_log_res[1].item()) + self.assertTrue(exp_log_res[1].item() <= acqf.tau_relu) + + # test batch model, batched best_f values + sampler = IIDNormalSampler(sample_shape=torch.Size([3])) + acqf = qLogExpectedImprovement( + model=mm, best_f=torch.Tensor([0, 0]), sampler=sampler + ) + exp_log_res = acqf(X).exp() + # with no approximations (qEI): self.assertEqual(res[0].item(), 1.0) + self.assertTrue(1.0 <= exp_log_res[0].item()) + self.assertTrue(exp_log_res[0].item() <= 1.0 + acqf.tau_relu) + # with no approximations (qEI): self.assertEqual(res[1].item(), 0.0) + self.assertTrue(0 < exp_log_res[1].item()) + self.assertTrue(exp_log_res[1].item() <= acqf.tau_relu) + + # test shifting best_f value + acqf = qLogExpectedImprovement(model=mm, best_f=-1, sampler=sampler) + exp_log_res = acqf(X).exp() + # with no approximations (qEI): self.assertEqual(res[0].item(), 2.0) + # TODO: figure out numerically stable tests and principled tolerances + # With q > 1, maximum value can get moved down due to L_q-norm approximation + # of the maximum over the q-batch. + safe_upper_lower_bound = 1.999 + self.assertTrue(safe_upper_lower_bound <= exp_log_res[0].item()) + self.assertTrue(exp_log_res[0].item() <= 2.0 + acqf.tau_relu + acqf.tau_max) + # with no approximations (qEI): self.assertEqual(res[1].item(), 1.0) + self.assertTrue(1.0 <= exp_log_res[1].item()) + # ocurring ~tau_max error when all candidates in a q-batch have the + # acquisition value + self.assertTrue(exp_log_res[1].item() <= 1.0 + acqf.tau_relu + acqf.tau_max) + + # test batch mode + sampler = IIDNormalSampler(sample_shape=torch.Size([2]), seed=12345) + acqf = qLogExpectedImprovement(model=mm, best_f=0, sampler=sampler) + # res = acqf(X) # 1-dim batch + exp_log_res = acqf(X).exp() # 1-dim batch + # with no approximations (qEI): self.assertEqual(res[0].item(), 1.0) + safe_upper_lower_bound = 0.999 + self.assertTrue(safe_upper_lower_bound <= exp_log_res[0].item()) + self.assertTrue(exp_log_res[0].item() <= 1.0 + acqf.tau_relu) + # with no approximations (qEI): self.assertEqual(res[1].item(), 0.0) + self.assertTrue(0.0 <= exp_log_res[1].item()) + self.assertTrue(exp_log_res[1].item() <= 0.0 + acqf.tau_relu) + self.assertEqual(acqf.sampler.base_samples.shape, torch.Size([2, 1, 2, 1])) + bs = acqf.sampler.base_samples.clone() + acqf(X) + self.assertTrue(torch.equal(acqf.sampler.base_samples, bs)) + exp_log_res = acqf(X.expand(2, 2, 1)).exp() # 2-dim batch + # self.assertEqual(res[0].item(), 1.0) + safe_upper_lower_bound = 0.999 + self.assertTrue(safe_upper_lower_bound <= exp_log_res[0].item()) + self.assertTrue(exp_log_res[0].item() <= 1.0 + acqf.tau_relu) + # self.assertEqual(res[1].item(), 0.0) + self.assertTrue(0.0 <= exp_log_res[1].item()) + self.assertTrue(exp_log_res[1].item() <= 0.0 + acqf.tau_relu) + # the base samples should have the batch dim collapsed + self.assertEqual(acqf.sampler.base_samples.shape, torch.Size([2, 1, 2, 1])) + bs = acqf.sampler.base_samples.clone() + acqf(X.expand(2, 2, 1)) + self.assertTrue(torch.equal(acqf.sampler.base_samples, bs)) + + # test batch mode, qmc + sampler = SobolQMCNormalSampler(sample_shape=torch.Size([2])) + acqf = qLogExpectedImprovement(model=mm, best_f=0, sampler=sampler) + exp_log_res = acqf(X).exp() + # self.assertEqual(res[0].item(), 1.0) + safe_upper_lower_bound = 0.999 + self.assertTrue(safe_upper_lower_bound <= exp_log_res[0].item()) + self.assertTrue(exp_log_res[0].item() <= 1.0 + acqf.tau_relu) + # self.assertEqual(res[1].item(), 0.0) + self.assertTrue(0.0 <= exp_log_res[1].item()) + self.assertTrue(exp_log_res[1].item() <= 0.0 + acqf.tau_relu) + self.assertEqual(acqf.sampler.base_samples.shape, torch.Size([2, 1, 2, 1])) + bs = acqf.sampler.base_samples.clone() + acqf(X) + self.assertTrue(torch.equal(acqf.sampler.base_samples, bs)) + + # # TODO: Test different objectives (incl. constraints) + + +class TestQLogNoisyExpectedImprovement(BotorchTestCase): + def test_q_log_noisy_expected_improvement(self): + self.assertIn( + qLogNoisyExpectedImprovement, ACQF_INPUT_CONSTRUCTOR_REGISTRY.keys() + ) + for dtype in (torch.float, torch.double): + # the event shape is `b x q x t` = 1 x 2 x 1 + samples_noisy = torch.tensor([0.0, 1.0], device=self.device, dtype=dtype) + samples_noisy = samples_noisy.view(1, 2, 1) + # X_baseline is `q' x d` = 1 x 1 + X_baseline = torch.zeros(1, 1, device=self.device, dtype=dtype) + mm_noisy = MockModel(MockPosterior(samples=samples_noisy)) + # X is `q x d` = 1 x 1 + X = torch.zeros(1, 1, device=self.device, dtype=dtype) + + # basic test + sampler = IIDNormalSampler(sample_shape=torch.Size([2])) + kwargs = { + "model": mm_noisy, + "X_baseline": X_baseline, + "sampler": sampler, + "prune_baseline": False, + "cache_root": False, + } + acqf = qNoisyExpectedImprovement(**kwargs) + log_acqf = qLogNoisyExpectedImprovement(**kwargs) + + res = acqf(X) + self.assertEqual(res.item(), 1.0) + log_res = log_acqf(X) + self.assertEqual(log_res.dtype, dtype) + self.assertEqual(log_res.device.type, self.device.type) + self.assertAllClose(log_res.exp().item(), 1.0) + + # basic test + sampler = IIDNormalSampler(sample_shape=torch.Size([2]), seed=12345) + kwargs = { + "model": mm_noisy, + "X_baseline": X_baseline, + "sampler": sampler, + "prune_baseline": False, + "cache_root": False, + } + log_acqf = qLogNoisyExpectedImprovement(**kwargs) + log_res = log_acqf(X) + self.assertEqual(log_res.exp().item(), 1.0) + self.assertEqual( + log_acqf.sampler.base_samples.shape, torch.Size([2, 1, 2, 1]) + ) + bs = log_acqf.sampler.base_samples.clone() + log_acqf(X) + self.assertTrue(torch.equal(log_acqf.sampler.base_samples, bs)) + + # basic test, qmc + sampler = SobolQMCNormalSampler(sample_shape=torch.Size([2])) + kwargs = { + "model": mm_noisy, + "X_baseline": X_baseline, + "sampler": sampler, + "prune_baseline": False, + "cache_root": False, + } + log_acqf = qLogNoisyExpectedImprovement(**kwargs) + log_res = log_acqf(X) + self.assertEqual(log_res.exp().item(), 1.0) + self.assertEqual( + log_acqf.sampler.base_samples.shape, torch.Size([2, 1, 2, 1]) + ) + bs = log_acqf.sampler.base_samples.clone() + log_acqf(X) + self.assertTrue(torch.equal(log_acqf.sampler.base_samples, bs)) + + # basic test for X_pending and warning + sampler = SobolQMCNormalSampler(sample_shape=torch.Size([2])) + samples_noisy_pending = torch.tensor( + [1.0, 0.0, 0.0], device=self.device, dtype=dtype + ) + samples_noisy_pending = samples_noisy_pending.view(1, 3, 1) + mm_noisy_pending = MockModel(MockPosterior(samples=samples_noisy_pending)) + kwargs = { + "model": mm_noisy_pending, + "X_baseline": X_baseline, + "sampler": sampler, + "prune_baseline": False, + "cache_root": False, + } + # copy for log version + log_acqf = qLogNoisyExpectedImprovement(**kwargs) + log_acqf.set_X_pending() + self.assertIsNone(log_acqf.X_pending) + log_acqf.set_X_pending(None) + self.assertIsNone(log_acqf.X_pending) + log_acqf.set_X_pending(X) + self.assertEqual(log_acqf.X_pending, X) + log_acqf(X) + X2 = torch.zeros( + 1, 1, 1, device=self.device, dtype=dtype, requires_grad=True + ) + with warnings.catch_warnings(record=True) as ws, settings.debug(True): + log_acqf.set_X_pending(X2) + self.assertEqual(log_acqf.X_pending, X2) + self.assertEqual( + sum(issubclass(w.category, BotorchWarning) for w in ws), 1 + ) + + def test_q_noisy_expected_improvement_batch(self): + for dtype in (torch.float, torch.double): + # the event shape is `b x q x t` = 2 x 3 x 1 + samples_noisy = torch.zeros(2, 3, 1, device=self.device, dtype=dtype) + samples_noisy[0, -1, 0] = 1.0 + mm_noisy = MockModel(MockPosterior(samples=samples_noisy)) + # X is `q x d` = 1 x 1 + X = torch.zeros(2, 2, 1, device=self.device, dtype=dtype) + X_baseline = torch.zeros(1, 1, device=self.device, dtype=dtype) + + # test batch mode + sampler = IIDNormalSampler(sample_shape=torch.Size([2])) + kwargs = { + "model": mm_noisy, + "X_baseline": X_baseline, + "sampler": sampler, + "prune_baseline": False, + "cache_root": False, + } + acqf = qLogNoisyExpectedImprovement(**kwargs) + res = acqf(X).exp() + expected_res = torch.tensor([1.0, 0.0], dtype=dtype, device=self.device) + self.assertAllClose(res, expected_res, atol=acqf.tau_relu) + self.assertGreater(res[1].item(), 0.0) + self.assertGreater(acqf.tau_relu, res[1].item()) + + # test batch mode + sampler = IIDNormalSampler(sample_shape=torch.Size([2]), seed=12345) + acqf = qLogNoisyExpectedImprovement( + model=mm_noisy, + X_baseline=X_baseline, + sampler=sampler, + prune_baseline=False, + cache_root=False, + ) + res = acqf(X).exp() # 1-dim batch + expected_res = torch.tensor([1.0, 0.0], dtype=dtype, device=self.device) + self.assertAllClose(res, expected_res, atol=acqf.tau_relu) + self.assertGreater(res[1].item(), 0.0) + self.assertGreater(acqf.tau_relu, res[1].item()) + self.assertEqual(acqf.sampler.base_samples.shape, torch.Size([2, 1, 3, 1])) + bs = acqf.sampler.base_samples.clone() + acqf(X) + self.assertTrue(torch.equal(acqf.sampler.base_samples, bs)) + res = acqf(X.expand(2, 2, 1)).exp() # 2-dim batch + expected_res = torch.tensor([1.0, 0.0], dtype=dtype, device=self.device) + self.assertAllClose(res, expected_res, atol=acqf.tau_relu) + self.assertGreater(res[1].item(), 0.0) + self.assertGreater(acqf.tau_relu, res[1].item()) + # the base samples should have the batch dim collapsed + self.assertEqual(acqf.sampler.base_samples.shape, torch.Size([2, 1, 3, 1])) + bs = acqf.sampler.base_samples.clone() + acqf(X.expand(2, 2, 1)) + self.assertTrue(torch.equal(acqf.sampler.base_samples, bs)) + + # test batch mode, qmc + sampler = SobolQMCNormalSampler(sample_shape=torch.Size([2])) + acqf = qLogNoisyExpectedImprovement( + model=mm_noisy, + X_baseline=X_baseline, + sampler=sampler, + prune_baseline=False, + cache_root=False, + ) + res = acqf(X).exp() + expected_res = torch.tensor([1.0, 0.0], dtype=dtype, device=self.device) + self.assertAllClose(res, expected_res, atol=acqf.tau_relu) + self.assertGreater(res[1].item(), 0.0) + self.assertGreater(acqf.tau_relu, res[1].item()) + self.assertEqual(acqf.sampler.base_samples.shape, torch.Size([2, 1, 3, 1])) + bs = acqf.sampler.base_samples.clone() + acqf(X) + self.assertTrue(torch.equal(acqf.sampler.base_samples, bs)) + + def test_prune_baseline(self): + no = "botorch.utils.testing.MockModel.num_outputs" + prune = "botorch.acquisition.logei.prune_inferior_points" + for dtype in (torch.float, torch.double): + X_baseline = torch.zeros(1, 1, device=self.device, dtype=dtype) + X_pruned = torch.rand(1, 1, device=self.device, dtype=dtype) + with mock.patch(no, new_callable=mock.PropertyMock) as mock_num_outputs: + mock_num_outputs.return_value = 1 + mm = MockModel(MockPosterior(samples=X_baseline)) + with mock.patch(prune, return_value=X_pruned) as mock_prune: + acqf = qLogNoisyExpectedImprovement( + model=mm, + X_baseline=X_baseline, + prune_baseline=True, + cache_root=False, + ) + mock_prune.assert_called_once() + self.assertTrue(torch.equal(acqf.X_baseline, X_pruned)) + with mock.patch(prune, return_value=X_pruned) as mock_prune: + acqf = qLogNoisyExpectedImprovement( + model=mm, + X_baseline=X_baseline, + prune_baseline=True, + marginalize_dim=-3, + cache_root=False, + ) + _, kwargs = mock_prune.call_args + self.assertEqual(kwargs["marginalize_dim"], -3) + + def test_cache_root(self): + sample_cached_path = ( + "botorch.acquisition.cached_cholesky.sample_cached_cholesky" + ) + raw_state_dict = { + "likelihood.noise_covar.raw_noise": torch.tensor( + [[0.0895], [0.2594]], dtype=torch.float64 + ), + "mean_module.raw_constant": torch.tensor( + [-0.4545, -0.1285], dtype=torch.float64 + ), + "covar_module.raw_outputscale": torch.tensor( + [1.4876, 1.4897], dtype=torch.float64 + ), + "covar_module.base_kernel.raw_lengthscale": torch.tensor( + [[[-0.7202, -0.2868]], [[-0.8794, -1.2877]]], dtype=torch.float64 + ), + } + # test batched models (e.g. for MCMC) + for train_batch_shape, m, dtype in product( + (torch.Size([]), torch.Size([3])), (1, 2), (torch.float, torch.double) + ): + state_dict = deepcopy(raw_state_dict) + for k, v in state_dict.items(): + if m == 1: + v = v[0] + if len(train_batch_shape) > 0: + v = v.unsqueeze(0).expand(*train_batch_shape, *v.shape) + state_dict[k] = v + tkwargs = {"device": self.device, "dtype": dtype} + if m == 2: + objective = GenericMCObjective(lambda Y, X: Y.sum(dim=-1)) + else: + objective = None + for k, v in state_dict.items(): + state_dict[k] = v.to(**tkwargs) + all_close_kwargs = ( + { + "atol": 1e-1, + "rtol": 0.0, + } + if dtype == torch.float + else {"atol": 1e-4, "rtol": 0.0} + ) + torch.manual_seed(1234) + train_X = torch.rand(*train_batch_shape, 3, 2, **tkwargs) + train_Y = ( + torch.sin(train_X * 2 * pi) + + torch.randn(*train_batch_shape, 3, 2, **tkwargs) + )[..., :m] + train_Y = standardize(train_Y) + model = SingleTaskGP( + train_X, + train_Y, + ) + if len(train_batch_shape) > 0: + X_baseline = train_X[0] + else: + X_baseline = train_X + model.load_state_dict(state_dict, strict=False) + sampler = IIDNormalSampler(sample_shape=torch.Size([5]), seed=0) + torch.manual_seed(0) + acqf = qLogNoisyExpectedImprovement( + model=model, + X_baseline=X_baseline, + sampler=sampler, + objective=objective, + prune_baseline=False, + cache_root=True, + ) + + orig_base_samples = acqf.base_sampler.base_samples.detach().clone() + sampler2 = IIDNormalSampler(sample_shape=torch.Size([5]), seed=0) + sampler2.base_samples = orig_base_samples + torch.manual_seed(0) + acqf_no_cache = qLogNoisyExpectedImprovement( + model=model, + X_baseline=X_baseline, + sampler=sampler2, + objective=objective, + prune_baseline=False, + cache_root=False, + ) + for q, batch_shape in product( + (1, 3), (torch.Size([]), torch.Size([3]), torch.Size([4, 3])) + ): + acqf.q_in = -1 + acqf_no_cache.q_in = -1 + test_X = ( + 0.3 + 0.05 * torch.randn(*batch_shape, q, 2, **tkwargs) + ).requires_grad_(True) + with mock.patch( + sample_cached_path, wraps=sample_cached_cholesky + ) as mock_sample_cached: + torch.manual_seed(0) + val = acqf(test_X).exp() + mock_sample_cached.assert_called_once() + val.sum().backward() + base_samples = acqf.sampler.base_samples.detach().clone() + X_grad = test_X.grad.clone() + test_X2 = test_X.detach().clone().requires_grad_(True) + acqf_no_cache.sampler.base_samples = base_samples + with mock.patch( + sample_cached_path, wraps=sample_cached_cholesky + ) as mock_sample_cached: + torch.manual_seed(0) + val2 = acqf_no_cache(test_X2).exp() + mock_sample_cached.assert_not_called() + self.assertAllClose(val, val2, **all_close_kwargs) + val2.sum().backward() + self.assertAllClose(X_grad, test_X2.grad, **all_close_kwargs) + # test we fall back to standard sampling for + # ill-conditioned covariances + acqf._baseline_L = torch.zeros_like(acqf._baseline_L) + with warnings.catch_warnings(record=True) as ws, settings.debug(True): + with torch.no_grad(): + acqf(test_X) + self.assertEqual(sum(issubclass(w.category, BotorchWarning) for w in ws), 1) + + # test w/ posterior transform + X_baseline = torch.rand(2, 1) + model = SingleTaskGP(X_baseline, torch.randn(2, 1)) + pt = ScalarizedPosteriorTransform(weights=torch.tensor([-1])) + with mock.patch.object( + qLogNoisyExpectedImprovement, + "_compute_root_decomposition", + ) as mock_cache_root: + acqf = qLogNoisyExpectedImprovement( + model=model, + X_baseline=X_baseline, + sampler=IIDNormalSampler(sample_shape=torch.Size([1])), + posterior_transform=pt, + prune_baseline=False, + cache_root=True, + ) + tf_post = model.posterior(X_baseline, posterior_transform=pt) + self.assertTrue( + torch.allclose( + tf_post.mean, mock_cache_root.call_args[-1]["posterior"].mean + ) + ) + + # testing constraints + n, d, m = 8, 1, 3 + X_baseline = torch.rand(n, d) + model = SingleTaskGP(X_baseline, torch.randn(n, m)) # batched model + nei_args = { + "model": model, + "X_baseline": X_baseline, + "prune_baseline": False, + "cache_root": True, + "posterior_transform": ScalarizedPosteriorTransform(weights=torch.ones(m)), + "sampler": SobolQMCNormalSampler(torch.Size([5])), + } + acqf = qLogNoisyExpectedImprovement(**nei_args) + X = torch.randn_like(X_baseline) + for con in [feasible_con, infeasible_con]: + with self.subTest(con=con): + target = "botorch.acquisition.utils.get_infeasible_cost" + infcost = torch.tensor([3], device=self.device, dtype=dtype) + with mock.patch(target, return_value=infcost): + cacqf = qLogNoisyExpectedImprovement(**nei_args, constraints=[con]) + + _, obj = cacqf._get_samples_and_objectives(X) + best_feas_f = cacqf.compute_best_f(obj) + if con is feasible_con: + self.assertAllClose(best_feas_f, acqf.compute_best_f(obj)) + else: + self.assertAllClose( + best_feas_f, torch.full_like(obj[..., [0]], -infcost.item()) + ) + # TODO: Test different objectives (incl. constraints) diff --git a/test/acquisition/test_penalized.py b/test/acquisition/test_penalized.py index 67578ef975..ae451645fc 100644 --- a/test/acquisition/test_penalized.py +++ b/test/acquisition/test_penalized.py @@ -12,6 +12,8 @@ group_lasso_regularizer, GroupLassoPenalty, L0Approximation, + L0PenaltyApprox, + L0PenaltyApproxObjective, L1Penalty, L1PenaltyObjective, L2Penalty, @@ -151,6 +153,58 @@ def test_L0Approximation(self): rtol=1e-04, ) + def test_L0PenaltyApproxObjective(self): + for dtype in (torch.float, torch.double): + tkwargs = {"device": self.device, "dtype": dtype} + target_point = torch.zeros(2, **tkwargs) + + # test init + l0_obj = L0PenaltyApproxObjective(target_point=target_point, **tkwargs) + self.assertTrue(torch.equal(l0_obj.target_point, target_point)) + self.assertAllClose(l0_obj.a.data, torch.tensor(1.0, **tkwargs)) + + # check two-dim input tensors X + self.assertTrue( + torch.equal( + l0_obj(torch.zeros(3, 2, **tkwargs)).data, + torch.zeros(1, 3, **tkwargs), + ) + ) + # check "batch_shape x q x dim" input tensors X + batch_shape = 16 + self.assertTrue( + torch.equal( + l0_obj(torch.zeros(batch_shape, 3, 2, **tkwargs)).data, + torch.zeros(1, batch_shape, 3, **tkwargs), + ) + ) + + def test_L0PenaltyApprox(self): + for dtype in (torch.float, torch.double): + tkwargs = {"device": self.device, "dtype": dtype} + target_point = torch.zeros(2, **tkwargs) + + # test init + l0_acqf = L0PenaltyApprox(target_point=target_point, **tkwargs) + self.assertTrue(torch.equal(l0_acqf.target_point, target_point)) + self.assertAllClose(l0_acqf.a.data, torch.tensor(1.0, **tkwargs)) + + # check two-dim input tensors X + self.assertTrue( + torch.equal( + l0_acqf(torch.zeros(3, 2, **tkwargs)).data, + torch.tensor(0, **tkwargs), + ) + ) + # check "batch_shape x q x dim" input tensors X + batch_shape = 16 + self.assertTrue( + torch.equal( + l0_acqf(torch.zeros(batch_shape, 3, 2, **tkwargs)).data, + torch.zeros(batch_shape, **tkwargs), + ) + ) + class TestPenalizedAcquisitionFunction(BotorchTestCase): def test_penalized_acquisition_function(self): @@ -231,6 +285,8 @@ def test_penalized_mc_objective(self): penalty_objective=l1_penalty_obj, regularization_parameter=0.1, ) + # test self.expand_dim + self.assertIsNone(obj.expand_dim) # test 'd' Tensor X samples = torch.randn(4, 3, device=self.device, dtype=dtype) X = torch.randn(4, 5, device=self.device, dtype=dtype) @@ -246,3 +302,42 @@ def test_penalized_mc_objective(self): X = torch.randn(3, 2, 5, device=self.device, dtype=dtype) penalized_obj = generic_obj(samples) - 0.1 * l1_penalty_obj(X) self.assertTrue(torch.equal(obj(samples, X), penalized_obj)) + + # test passing expand_dim + expand_dim = -2 + obj2 = PenalizedMCObjective( + objective=generic_obj, + penalty_objective=l1_penalty_obj, + regularization_parameter=0.1, + expand_dim=expand_dim, + ) + self.assertEqual(obj2.expand_dim, -2) + # test 'd' Tensor X + mcmc_samples = 8 + # MCMC_dim = -3 + samples = torch.randn(mcmc_samples, 4, 3, device=self.device, dtype=dtype) + X = torch.randn(4, 5, device=self.device, dtype=dtype) + penalized_obj = generic_obj(samples) - 0.1 * l1_penalty_obj(X).unsqueeze( + expand_dim + ) + self.assertTrue(torch.equal(obj2(samples, X), penalized_obj)) + # test 'q x d' Tensor X + # MCMC_dim = -3 + samples = torch.randn( + 4, mcmc_samples, 2, 3, device=self.device, dtype=dtype + ) + X = torch.randn(2, 5, device=self.device, dtype=dtype) + penalized_obj = generic_obj(samples) - 0.1 * l1_penalty_obj(X).unsqueeze( + expand_dim + ) + self.assertTrue(torch.equal(obj2(samples, X), penalized_obj)) + # test 'batch-shape x q x d' Tensor X + # MCMC_dim = -3 + samples = torch.randn( + 4, 3, mcmc_samples, 2, 3, device=self.device, dtype=dtype + ) + X = torch.randn(3, 2, 5, device=self.device, dtype=dtype) + penalized_obj = generic_obj(samples) - 0.1 * l1_penalty_obj(X).unsqueeze( + expand_dim + ) + self.assertTrue(torch.equal(obj2(samples, X), penalized_obj)) diff --git a/test/acquisition/test_prior_guided.py b/test/acquisition/test_prior_guided.py index fe48449d68..2a92af6117 100644 --- a/test/acquisition/test_prior_guided.py +++ b/test/acquisition/test_prior_guided.py @@ -12,57 +12,112 @@ from botorch.acquisition.prior_guided import PriorGuidedAcquisitionFunction from botorch.models import SingleTaskGP from botorch.utils.testing import BotorchTestCase +from botorch.utils.transforms import match_batch_shape from torch.nn import Module class DummyPrior(Module): def forward(self, X): p = torch.distributions.Normal(0, 1) - # sum over d and q dimensions - return p.log_prob(X).sum(dim=-1).sum(dim=-1).exp() + # sum over d dimensions + return p.log_prob(X).sum(dim=-1).exp() + + +def get_val_prob(test_X, test_X_exp, af, prior): + with torch.no_grad(): + val = af(test_X) + prob = prior(test_X_exp) + + return val, prob + + +def get_weighted_val(ei_val, prob, exponent, use_log): + if use_log: + return prob * exponent + ei_val + return prob.pow(exponent) * ei_val class TestPriorGuidedAcquisitionFunction(BotorchTestCase): - def test_prior_guided_acquisition_function(self): - prior = DummyPrior() + def setUp(self): + self.prior = DummyPrior() + self.train_X = torch.rand(5, 3, dtype=torch.double, device=self.device) + self.train_Y = self.train_X.norm(dim=-1, keepdim=True) + + def test_prior_guided_analytic_acquisition_function(self): for dtype in (torch.float, torch.double): - train_X = torch.rand(5, 3, dtype=dtype, device=self.device) - train_Y = train_X.norm(dim=-1, keepdim=True) - model = SingleTaskGP(train_X, train_Y).eval() - qEI = qExpectedImprovement(model, best_f=0.0) + model = SingleTaskGP( + self.train_X.to(dtype=dtype), self.train_Y.to(dtype=dtype) + ) + ei = ExpectedImprovement(model, best_f=0.0) + for batch_shape, use_log, exponent in product( + ([], [2]), + (False, True), + (1.0, 2.0), + ): + af = PriorGuidedAcquisitionFunction( + acq_function=ei, + prior_module=self.prior, + log=use_log, + prior_exponent=exponent, + ) + test_X = torch.rand(*batch_shape, 1, 3, dtype=dtype, device=self.device) + test_X_exp = test_X.unsqueeze(0) if batch_shape == [] else test_X + with torch.no_grad(): + ei_val = ei(test_X_exp).unsqueeze(-1) + val, prob = get_val_prob(test_X, test_X_exp, af, self.prior) + weighted_val = get_weighted_val(ei_val, prob, exponent, use_log) + expected_val = weighted_val.squeeze(-1) + self.assertTrue(torch.allclose(val, expected_val)) + # test that q>1 and a non SampleReducing AF raises an exception + msg = ( + "q-batches with q>1 are only supported using " + "SampleReducingMCAcquisitionFunction." + ) + test_X = torch.rand(2, 3, dtype=dtype, device=self.device) + with self.assertRaisesRegex(NotImplementedError, msg): + af(test_X) + + def test_prior_guided_mc_acquisition_function(self): + for dtype in (torch.float, torch.double): + model = SingleTaskGP( + self.train_X.to(dtype=dtype), self.train_Y.to(dtype=dtype) + ) + ei = qExpectedImprovement(model, best_f=0.0) for batch_shape, q, use_log, exponent in product( - ([], [2]), (1, 2), (False, True), (1.0, 2.0) + ([], [2]), + (1, 2), + (False, True), + (1.0, 2.0), ): af = PriorGuidedAcquisitionFunction( - acq_function=qEI, - prior_module=prior, + acq_function=ei, + prior_module=self.prior, log=use_log, prior_exponent=exponent, ) test_X = torch.rand(*batch_shape, q, 3, dtype=dtype, device=self.device) + test_X_exp = test_X.unsqueeze(0) if batch_shape == [] else test_X + val, prob = get_val_prob(test_X, test_X_exp, af, self.prior) + ei_val = ei._non_reduced_forward(test_X_exp) + weighted_val = get_weighted_val(ei_val, prob, exponent, use_log) + expected_val = ei._sample_reduction(ei._q_reduction(weighted_val)) + self.assertTrue(torch.allclose(val, expected_val)) + # test set_X_pending + X_pending = torch.rand(2, 3, dtype=dtype, device=self.device) + af.X_pending = X_pending + self.assertTrue(torch.equal(X_pending, af.X_pending)) + # unsqueeze batch dim + test_X_exp_with_pending = torch.cat( + [test_X_exp, match_batch_shape(X_pending, test_X_exp)], dim=-2 + ) with torch.no_grad(): val = af(test_X) - prob = prior(test_X) - ei = qEI(test_X) + prob = self.prior(test_X_exp_with_pending) + ei_val = ei._non_reduced_forward(test_X_exp_with_pending) if use_log: - expected_val = prob * exponent + ei + weighted_val = prob * exponent + ei_val else: - expected_val = prob.pow(exponent) * ei + weighted_val = prob.pow(exponent) * ei_val + expected_val = ei._sample_reduction(ei._q_reduction(weighted_val)) + self.assertTrue(torch.equal(val, expected_val)) - # test set_X_pending - X_pending = torch.rand(2, 3, dtype=dtype, device=self.device) - af.X_pending = X_pending - self.assertTrue(torch.equal(X_pending, af.acq_func.X_pending)) - self.assertTrue(torch.equal(X_pending, af.X_pending)) - # test exception when base AF does not support X_pending - ei = ExpectedImprovement(model, best_f=0.0) - af = PriorGuidedAcquisitionFunction( - acq_function=ei, - prior_module=prior, - ) - msg = ( - "Base acquisition function ExpectedImprovement " - "does not have an `X_pending` attribute." - ) - with self.assertRaisesRegex(ValueError, msg): - af.X_pending diff --git a/test/acquisition/test_utils.py b/test/acquisition/test_utils.py index 41b4384e85..4fdcf33487 100644 --- a/test/acquisition/test_utils.py +++ b/test/acquisition/test_utils.py @@ -5,6 +5,7 @@ # LICENSE file in the root directory of this source tree. import itertools +import math from unittest import mock import torch @@ -19,6 +20,7 @@ ScalarizedPosteriorTransform, ) from botorch.acquisition.utils import ( + compute_best_feasible_objective, expand_trace_observations, get_acquisition_function, get_infeasible_cost, @@ -59,12 +61,15 @@ def setUp(self): self.qmc = True self.ref_point = [0.0, 0.0] self.mo_objective = DummyMCMultiOutputObjective() - self.Y = torch.tensor([[1.0, 2.0]]) + self.Y = torch.tensor([[1.0, 2.0]]) # (2 x 1)-dim multi-objective outcomes self.seed = 1 @mock.patch(f"{monte_carlo.__name__}.qExpectedImprovement") def test_GetQEI(self, mock_acqf): - self.model = MockModel(MockPosterior(mean=torch.zeros(1, 2))) + n = len(self.X_observed) + mean = torch.arange(n, dtype=torch.double).view(-1, 1) + var = torch.ones_like(mean) + self.model = MockModel(MockPosterior(mean=mean, variance=var)) acqf = get_acquisition_function( acquisition_function_name="qEI", model=self.model, @@ -84,6 +89,8 @@ def test_GetQEI(self, mock_acqf): objective=self.objective, posterior_transform=None, X_pending=self.X_pending, + constraints=None, + eta=1e-3, ) # test batched model self.model = MockModel(MockPosterior(mean=torch.zeros(1, 2, 1))) @@ -124,10 +131,50 @@ def test_GetQEI(self, mock_acqf): ) self.assertEqual(mock_acqf.call_args[-1]["best_f"].item(), -1.0) + # with constraints + upper_bound = self.Y[0, 0] + 1 / 2 # = 1.5 + constraints = [lambda samples: samples[..., 0] - upper_bound] + eta = math.pi * 1e-2 # testing non-standard eta + + acqf = get_acquisition_function( + acquisition_function_name="qEI", + model=self.model, + objective=self.objective, + X_observed=self.X_observed, + X_pending=self.X_pending, + mc_samples=self.mc_samples, + seed=self.seed, + marginalize_dim=0, + constraints=constraints, + eta=eta, + ) + self.assertEqual(acqf, mock_acqf.return_value) + best_feasible_f = compute_best_feasible_objective( + samples=mean, + obj=self.objective(mean), + constraints=constraints, + model=self.model, + objective=self.objective, + X_baseline=self.X_observed, + ) + mock_acqf.assert_called_with( + model=self.model, + best_f=best_feasible_f, + sampler=mock.ANY, + objective=self.objective, + posterior_transform=None, + X_pending=self.X_pending, + constraints=constraints, + eta=eta, + ) + @mock.patch(f"{monte_carlo.__name__}.qProbabilityOfImprovement") def test_GetQPI(self, mock_acqf): # basic test - self.model = MockModel(MockPosterior(mean=torch.zeros(1, 2))) + n = len(self.X_observed) + mean = torch.arange(n, dtype=torch.double).view(-1, 1) + var = torch.ones_like(mean) + self.model = MockModel(MockPosterior(mean=mean, variance=var)) acqf = get_acquisition_function( acquisition_function_name="qPI", model=self.model, @@ -147,6 +194,8 @@ def test_GetQPI(self, mock_acqf): posterior_transform=None, X_pending=self.X_pending, tau=1e-3, + constraints=None, + eta=1e-3, ) args, kwargs = mock_acqf.call_args self.assertEqual(args, ()) @@ -196,9 +245,54 @@ def test_GetQPI(self, mock_acqf): ) self.assertTrue(acqf == mock_acqf.return_value) + # with constraints + n = len(self.X_observed) + mean = torch.arange(n, dtype=torch.double).view(-1, 1) + var = torch.ones_like(mean) + self.model = MockModel(MockPosterior(mean=mean, variance=var)) + upper_bound = self.Y[0, 0] + 1 / 2 # = 1.5 + constraints = [lambda samples: samples[..., 0] - upper_bound] + eta = math.pi * 1e-2 # testing non-standard eta + acqf = get_acquisition_function( + acquisition_function_name="qPI", + model=self.model, + objective=self.objective, + X_observed=self.X_observed, + X_pending=self.X_pending, + mc_samples=self.mc_samples, + seed=self.seed, + marginalize_dim=0, + constraints=constraints, + eta=eta, + ) + self.assertEqual(acqf, mock_acqf.return_value) + best_feasible_f = compute_best_feasible_objective( + samples=mean, + obj=self.objective(mean), + constraints=constraints, + model=self.model, + objective=self.objective, + X_baseline=self.X_observed, + ) + mock_acqf.assert_called_with( + model=self.model, + best_f=best_feasible_f, + sampler=mock.ANY, + objective=self.objective, + posterior_transform=None, + X_pending=self.X_pending, + tau=1e-3, + constraints=constraints, + eta=eta, + ) + @mock.patch(f"{monte_carlo.__name__}.qNoisyExpectedImprovement") def test_GetQNEI(self, mock_acqf): # basic test + n = len(self.X_observed) + mean = torch.arange(n, dtype=torch.double).view(-1, 1) + var = torch.ones_like(mean) + self.model = MockModel(MockPosterior(mean=mean, variance=var)) acqf = get_acquisition_function( acquisition_function_name="qNEI", model=self.model, @@ -256,6 +350,38 @@ def test_GetQNEI(self, mock_acqf): self.assertEqual(sampler.seed, 2) self.assertTrue(torch.equal(kwargs["X_baseline"], self.X_observed)) + # with constraints + upper_bound = self.Y[0, 0] + 1 / 2 # = 1.5 + constraints = [lambda samples: samples[..., 0] - upper_bound] + eta = math.pi * 1e-2 # testing non-standard eta + + acqf = get_acquisition_function( + acquisition_function_name="qNEI", + model=self.model, + objective=self.objective, + X_observed=self.X_observed, + X_pending=self.X_pending, + mc_samples=self.mc_samples, + seed=self.seed, + marginalize_dim=0, + constraints=constraints, + eta=eta, + ) + self.assertEqual(acqf, mock_acqf.return_value) + mock_acqf.assert_called_with( + model=self.model, + X_baseline=self.X_observed, + sampler=mock.ANY, + objective=self.objective, + posterior_transform=None, + X_pending=self.X_pending, + prune_baseline=True, + marginalize_dim=0, + cache_root=True, + constraints=constraints, + eta=eta, + ) + @mock.patch(f"{monte_carlo.__name__}.qSimpleRegret") def test_GetQSR(self, mock_acqf): # basic test @@ -575,7 +701,83 @@ def test_GetUnknownAcquisitionFunction(self): ) -class TestGetInfeasibleCost(BotorchTestCase): +class TestConstraintUtils(BotorchTestCase): + def test_compute_best_feasible_objective(self): + for dtype in (torch.float, torch.double): + with self.subTest(dtype=dtype): + tkwargs = {"dtype": dtype, "device": self.device} + n = 5 + X = torch.arange(n, **tkwargs).view(-1, 1) + means = torch.arange(n, **tkwargs).view(-1, 1) + samples = means + variances = torch.tensor( + [0.09, 0.25, 0.36, 0.25, 0.09], **tkwargs + ).view(-1, 1) + mm = MockModel( + MockPosterior(mean=means, variance=variances, samples=samples) + ) + + # testing all feasible points + obj = means.squeeze(-1) + constraints = [lambda samples: -torch.ones_like(samples[..., 0])] + best_f = compute_best_feasible_objective( + samples=means, obj=obj, constraints=constraints + ) + self.assertAllClose(best_f, obj.amax(dim=-1, keepdim=True)) + + # testing with some infeasible points + con_cutoff = 3.0 + best_f = compute_best_feasible_objective( + samples=means, + obj=obj, + constraints=[ + lambda samples: samples[..., 0] - (con_cutoff + 1 / 2) + ], + ) + # only first three points are feasible + self.assertAllClose(best_f, torch.tensor([con_cutoff], **tkwargs)) + + # testing with no feasible points and infeasible obj + infeasible_obj = torch.tensor(torch.pi, **tkwargs) + best_f = compute_best_feasible_objective( + samples=means, + obj=obj, + constraints=[lambda X: torch.ones_like(X[..., 0])], + infeasible_obj=infeasible_obj, + ) + self.assertAllClose(best_f, infeasible_obj.unsqueeze(0)) + + # testing with no feasible points and not infeasible obj + def objective(Y, X): + return Y.squeeze(-1) - 5.0 + + best_f = compute_best_feasible_objective( + samples=means, + obj=obj, + constraints=[lambda X: torch.ones_like(X[..., 0])], + model=mm, + X_baseline=X, + objective=objective, + ) + self.assertAllClose( + best_f, -get_infeasible_cost(X=X, model=mm, objective=objective) + ) + + with self.assertRaisesRegex(ValueError, "Must specify `model`"): + best_f = compute_best_feasible_objective( + samples=means, + obj=obj, + constraints=[lambda X: torch.ones_like(X[..., 0])], + X_baseline=X, + ) + with self.assertRaisesRegex(ValueError, "Must specify `X_baseline`"): + best_f = compute_best_feasible_objective( + samples=means, + obj=obj, + constraints=[lambda X: torch.ones_like(X[..., 0])], + model=mm, + ) + def test_get_infeasible_cost(self): for dtype in (torch.float, torch.double): tkwargs = {"dtype": dtype, "device": self.device} diff --git a/test/models/test_fully_bayesian.py b/test/models/test_fully_bayesian.py index 5994558026..52897adb54 100644 --- a/test/models/test_fully_bayesian.py +++ b/test/models/test_fully_bayesian.py @@ -18,6 +18,10 @@ ProbabilityOfImprovement, UpperConfidenceBound, ) +from botorch.acquisition.logei import ( + qLogExpectedImprovement, + qLogNoisyExpectedImprovement, +) from botorch.acquisition.monte_carlo import ( qExpectedImprovement, qNoisyExpectedImprovement, @@ -411,6 +415,7 @@ def test_acquisition_functions(self): model, warmup_steps=8, num_samples=5, thinning=2, disable_progbar=True ) deterministic = GenericDeterministicModel(f=lambda x: x[..., :1]) + # due to ModelList type, setting cache_root=False for all noisy EI variants list_gp = ModelListGP(model, model) mixed_list = ModelList(deterministic, model) simple_sampler = get_sampler( @@ -427,11 +432,23 @@ def test_acquisition_functions(self): ProbabilityOfImprovement(model=model, best_f=train_Y.max()), PosteriorMean(model=model), UpperConfidenceBound(model=model, beta=4), + qLogExpectedImprovement( + model=model, best_f=train_Y.max(), sampler=simple_sampler + ), qExpectedImprovement( model=model, best_f=train_Y.max(), sampler=simple_sampler ), + qLogNoisyExpectedImprovement( + model=model, + X_baseline=train_X, + sampler=simple_sampler, + cache_root=False, + ), qNoisyExpectedImprovement( - model=model, X_baseline=train_X, sampler=simple_sampler + model=model, + X_baseline=train_X, + sampler=simple_sampler, + cache_root=False, ), qProbabilityOfImprovement( model=model, best_f=train_Y.max(), sampler=simple_sampler @@ -443,6 +460,7 @@ def test_acquisition_functions(self): X_baseline=train_X, ref_point=torch.zeros(2, **tkwargs), sampler=list_gp_sampler, + cache_root=False, ), qExpectedHypervolumeImprovement( model=list_gp, @@ -458,6 +476,7 @@ def test_acquisition_functions(self): X_baseline=train_X, ref_point=torch.zeros(2, **tkwargs), sampler=mixed_list_sampler, + cache_root=False, ), qExpectedHypervolumeImprovement( model=mixed_list, diff --git a/test/models/test_model_list_gp_regression.py b/test/models/test_model_list_gp_regression.py index a16c013a22..09b058a521 100644 --- a/test/models/test_model_list_gp_regression.py +++ b/test/models/test_model_list_gp_regression.py @@ -6,6 +6,7 @@ import itertools import warnings +from typing import Optional import torch from botorch.acquisition.objective import ScalarizedPosteriorTransform @@ -18,6 +19,8 @@ from botorch.models.transforms.input import Normalize from botorch.models.transforms.outcome import ChainedOutcomeTransform, Log, Standardize from botorch.posteriors import GPyTorchPosterior, PosteriorList, TransformedPosterior +from botorch.sampling.base import MCSampler +from botorch.sampling.list_sampler import ListSampler from botorch.sampling.normal import IIDNormalSampler from botorch.utils.testing import _get_random_data, BotorchTestCase from gpytorch.distributions import MultitaskMultivariateNormal, MultivariateNormal @@ -27,6 +30,7 @@ from gpytorch.mlls import SumMarginalLogLikelihood from gpytorch.mlls.exact_marginal_log_likelihood import ExactMarginalLogLikelihood from gpytorch.priors import GammaPrior +from torch import Tensor def _get_model( @@ -386,6 +390,26 @@ def test_fantasize(self): self.assertEqual(fm_i.train_inputs[0].shape, torch.Size([2, 8, 2])) self.assertEqual(fm_i.train_targets.shape, torch.Size([2, 8])) + # test decoupled + sampler1 = IIDNormalSampler(2) + sampler2 = IIDNormalSampler(2) + eval_mask = torch.tensor( + [[1, 0], [0, 1], [1, 0]], + dtype=torch.bool, + ) + fm = modellist.fantasize( + torch.rand(3, 2), + sampler=ListSampler(sampler1, sampler2), + evaluation_mask=eval_mask, + ) + self.assertIsInstance(fm, ModelListGP) + for i in range(2): + fm_i = fm.models[i] + self.assertIsInstance(fm_i, SingleTaskGP) + num_points = 7 - i + self.assertEqual(fm_i.train_inputs[0].shape, torch.Size([2, num_points, 2])) + self.assertEqual(fm_i.train_targets.shape, torch.Size([2, num_points])) + def test_fantasize_with_outcome_transform(self) -> None: """ Check that fantasized posteriors from a `ModelListGP` with transforms @@ -403,42 +427,108 @@ def test_fantasize_with_outcome_transform(self) -> None: with self.subTest(dtype=dtype): tkwargs = {"device": self.device, "dtype": dtype} X = torch.linspace(0, 1, 20, **tkwargs)[:, None] - Y = 10 * torch.linspace(0, 1, 20, **tkwargs)[:, None] + Y1 = 10 * torch.linspace(0, 1, 20, **tkwargs)[:, None] + Y2 = 2 * Y1 + Y = torch.cat([Y1, Y2], dim=-1) target_x = torch.tensor([[0.5]], **tkwargs) model_with_transform = ModelListGP( - SingleTaskGP(X, Y, outcome_transform=Standardize(m=1)) + SingleTaskGP(X, Y1, outcome_transform=Standardize(m=1)), + SingleTaskGP(X, Y2, outcome_transform=Standardize(m=1)), ) - y_standardized, _ = Standardize(m=1).forward(Y) + outcome_transform = Standardize(m=2) + y_standardized, _ = outcome_transform(Y) + outcome_transform.eval() model_manually_transformed = ModelListGP( - SingleTaskGP(X, y_standardized) + SingleTaskGP(X, y_standardized[:, :1]), + SingleTaskGP(X, y_standardized[:, 1:]), ) - def _get_fant_mean(model: ModelListGP) -> float: + def _get_fant_mean( + model: ModelListGP, + sampler: MCSampler, + eval_mask: Optional[Tensor] = None, + ) -> float: fant = model.fantasize( - target_x, sampler=IIDNormalSampler(10, seed=0) + target_x, + sampler=sampler, + evaluation_mask=eval_mask, ) - return fant.posterior(target_x).mean.mean().item() + return fant.posterior(target_x).mean.mean(dim=(-2, -3)) - outcome_transform = model_with_transform.models[0].outcome_transform # ~0 + sampler = IIDNormalSampler(10, seed=0) fant_mean_with_manual_transform = _get_fant_mean( - model_manually_transformed + model_manually_transformed, sampler=sampler ) # Inexact since this is an MC test and we don't want it flaky - self.assertAlmostEqual(fant_mean_with_manual_transform, 0.0, delta=0.1) - manually_rescaled_mean, _ = outcome_transform.untransform( + self.assertLessEqual( + (fant_mean_with_manual_transform - 0.0).abs().max().item(), 0.1 + ) + manually_rescaled_mean = outcome_transform.untransform( fant_mean_with_manual_transform + )[0].view(-1) + fant_mean_with_native_transform = _get_fant_mean( + model_with_transform, sampler=sampler ) - fant_mean_with_native_transform = _get_fant_mean(model_with_transform) # Inexact since this is an MC test and we don't want it flaky - self.assertAlmostEqual(fant_mean_with_native_transform, 5.0, delta=0.5) + self.assertLessEqual( + ( + fant_mean_with_native_transform + - torch.tensor([5.0, 10.0], **tkwargs) + ) + .abs() + .max() + .item(), + 0.5, + ) # tighter tolerance here since the models should use the same samples - self.assertAlmostEqual( - manually_rescaled_mean.item(), + self.assertAllClose( + manually_rescaled_mean, + fant_mean_with_native_transform, + ) + # test decoupled + sampler = ListSampler( + IIDNormalSampler(10, seed=0), + IIDNormalSampler(10, seed=0), + ) + fant_mean_with_manual_transform = _get_fant_mean( + model_manually_transformed, + sampler=sampler, + eval_mask=torch.tensor( + [[0, 1]], dtype=torch.bool, device=tkwargs["device"] + ), + ) + # Inexact since this is an MC test and we don't want it flaky + self.assertLessEqual( + (fant_mean_with_manual_transform - 0.0).abs().max().item(), 0.1 + ) + manually_rescaled_mean = outcome_transform.untransform( + fant_mean_with_manual_transform + )[0].view(-1) + fant_mean_with_native_transform = _get_fant_mean( + model_with_transform, + sampler=sampler, + eval_mask=torch.tensor( + [[0, 1]], dtype=torch.bool, device=tkwargs["device"] + ), + ) + # Inexact since this is an MC test and we don't want it flaky + self.assertLessEqual( + ( + fant_mean_with_native_transform + - torch.tensor([5.0, 10.0], **tkwargs) + ) + .abs() + .max() + .item(), + 0.5, + ) + # tighter tolerance here since the models should use the same samples + self.assertAllClose( + manually_rescaled_mean, fant_mean_with_native_transform, - delta=1e-6, ) def test_fantasize_with_outcome_transform_fixed_noise(self) -> None: @@ -458,18 +548,78 @@ def test_fantasize_with_outcome_transform_fixed_noise(self) -> None: tkwargs = {"device": self.device, "dtype": dtype} X = torch.tensor([[0.0], [1.0]], **tkwargs) Y = torch.tensor([[y_at_low_x], [y_at_high_x]], **tkwargs) + Y2 = 2 * Y yvar = torch.full_like(Y, 1e-4) + yvar2 = 2 * yvar model = ModelListGP( - FixedNoiseGP(X, Y, yvar, outcome_transform=Standardize(m=1)) + FixedNoiseGP(X, Y, yvar, outcome_transform=Standardize(m=1)), + FixedNoiseGP(X, Y2, yvar2, outcome_transform=Standardize(m=1)), ) - - model.posterior(torch.zeros((1, 1), **tkwargs)) - - fant = model.fantasize( - X, sampler=IIDNormalSampler(n_fants, seed=0), noise=yvar + # test exceptions + eval_mask = torch.zeros( + 3, 2, 2, dtype=torch.bool, device=tkwargs["device"] ) - - fant_mean = fant.posterior(X).mean.mean(0).flatten().tolist() - self.assertAlmostEqual(fant_mean[0], y_at_low_x, delta=1) - # delta=1 is a 1% error (since y_at_low_x = 100) - self.assertAlmostEqual(fant_mean[1], y_at_high_x, delta=1) + msg = ( + f"Expected evaluation_mask of shape `{X.shape[0]} x " + f"{model.num_outputs}`, but got `" + f"{' x '.join(str(i) for i in eval_mask.shape)}`." + ) + with self.assertRaisesRegex(BotorchTensorDimensionError, msg): + + model.fantasize( + X, + evaluation_mask=eval_mask, + sampler=ListSampler( + IIDNormalSampler(n_fants, seed=0), + IIDNormalSampler(n_fants, seed=0), + ), + ) + msg = "Decoupled fantasization requires a list of samplers." + with self.assertRaisesRegex(ValueError, msg): + model.fantasize( + X, + evaluation_mask=eval_mask[0], + sampler=IIDNormalSampler(n_fants, seed=0), + ) + model.posterior(torch.zeros((1, 1), **tkwargs)) + for decoupled in (False, True): + if decoupled: + kwargs = { + "sampler": ListSampler( + IIDNormalSampler(n_fants, seed=0), + IIDNormalSampler(n_fants, seed=0), + ), + "evaluation_mask": torch.tensor( + [[0, 1], [1, 0]], + dtype=torch.bool, + device=tkwargs["device"], + ), + } + else: + kwargs = { + "sampler": IIDNormalSampler(n_fants, seed=0), + } + fant = model.fantasize(X, **kwargs) + + fant_mean = fant.posterior(X).mean.mean(0) + self.assertAlmostEqual(fant_mean[0, 0].item(), y_at_low_x, delta=1) + self.assertAlmostEqual( + fant_mean[0, 1].item(), 2 * y_at_low_x, delta=1 + ) + # delta=1 is a 1% error (since y_at_low_x = 100) + self.assertAlmostEqual(fant_mean[1, 0].item(), y_at_high_x, delta=1) + self.assertAlmostEqual( + fant_mean[1, 1].item(), 2 * y_at_high_x, delta=1 + ) + for i, fm_i in enumerate(fant.models): + n_points = 3 if decoupled else 4 + self.assertEqual( + fm_i.train_inputs[0].shape, torch.Size([20, n_points, 1]) + ) + self.assertEqual( + fm_i.train_targets.shape, torch.Size([20, n_points]) + ) + if decoupled: + self.assertTrue( + torch.equal(fm_i.train_inputs[0][0][-1], X[1 - i]) + ) diff --git a/test/optim/test_optimize.py b/test/optim/test_optimize.py index d29b63e05c..a17a28c60a 100644 --- a/test/optim/test_optimize.py +++ b/test/optim/test_optimize.py @@ -1763,3 +1763,23 @@ def test_optimize_acqf_discrete_local_search(self): ) self.assertEqual(len(X), 20) self.assertAllClose(torch.unique(X, dim=0), X) + + def test_no_precision_loss_with_fixed_features(self) -> None: + + acqf = SquaredAcquisitionFunction() + + val = 1e-1 + fixed_features_list = [{0: val}] + + bounds = torch.stack( + [torch.zeros(2, dtype=torch.float64), torch.ones(2, dtype=torch.float64)] + ) + candidate, _ = optimize_acqf_mixed( + acqf, + bounds=bounds, + q=1, + num_restarts=1, + raw_samples=1, + fixed_features_list=fixed_features_list, + ) + self.assertEqual(candidate[0, 0].item(), val) diff --git a/test/utils/test_datasets.py b/test/utils/test_datasets.py index fd550a2846..02c9234f55 100644 --- a/test/utils/test_datasets.py +++ b/test/utils/test_datasets.py @@ -30,14 +30,15 @@ def test_base(self): def test_supervised_meta(self): X = rand(3, 2) Y = rand(3, 1) - A = DenseContainer(rand(3, 5), event_shape=Size([5])) + t = rand(3, 5) + A = DenseContainer(t, event_shape=Size([5])) B = rand(2, 1) SupervisedDatasetWithDefaults = make_dataclass( cls_name="SupervisedDatasetWithDefaults", bases=(SupervisedDataset,), fields=[ - ("default", DenseContainer, field(default=A)), + ("default", DenseContainer, field(default=t)), ("factory", DenseContainer, field(default_factory=lambda: A)), ("other", Tensor, field(default_factory=lambda: B)), ], @@ -55,6 +56,7 @@ def test_supervised_meta(self): # Check handling of default values and factories dataset = SupervisedDatasetWithDefaults(X=X, Y=Y) + self.assertIsInstance(dataset.default, DenseContainer) self.assertEqual(dataset.default, A) self.assertEqual(dataset.factory, A) self.assertTrue(dataset.other is B) diff --git a/test/utils/test_objective.py b/test/utils/test_objective.py index 3975d6682c..7dabe28260 100644 --- a/test/utils/test_objective.py +++ b/test/utils/test_objective.py @@ -7,6 +7,11 @@ import torch from botorch.utils import apply_constraints, get_objective_weights_transform +from botorch.utils.objective import ( + compute_feasibility_indicator, + compute_smoothed_feasibility_indicator, + soft_eval_constraint, +) from botorch.utils.testing import BotorchTestCase from torch import Tensor @@ -61,7 +66,7 @@ def test_apply_constraints(self): # nonnegative objective, one constraint, eta = 0 samples = torch.randn(1) obj = ones_f(samples) - with self.assertRaises(ValueError): + with self.assertRaisesRegex(ValueError, "eta must be positive."): apply_constraints( obj=obj, constraints=[zeros_f], @@ -70,6 +75,15 @@ def test_apply_constraints(self): eta=0.0, ) + # soft_eval_constraint is not in the path of apply_constraints, adding this test + # for coverage. + with self.assertRaisesRegex(ValueError, "eta must be positive."): + soft_eval_constraint(lhs=obj, eta=0.0) + ind = soft_eval_constraint(lhs=ones_f(samples), eta=1e-6) + self.assertAllClose(ind, torch.zeros_like(ind)) + ind = soft_eval_constraint(lhs=-ones_f(samples), eta=1e-6) + self.assertAllClose(ind, torch.ones_like(ind)) + def test_apply_constraints_multi_output(self): # nonnegative objective, one constraint tkwargs = {"device": self.device} @@ -153,7 +167,7 @@ def test_apply_constraints_multi_output(self): self.assertAllClose(obj, samples.clamp_min(-1.0) * 0.5 - 1.0) # nonnegative objective, one constraint, eta = 0 obj = samples - with self.assertRaises(ValueError): + with self.assertRaisesRegex(ValueError, "eta must be positive"): apply_constraints( obj=obj, constraints=[zeros_f], @@ -168,7 +182,7 @@ def test_apply_constraints_wrong_eta_dim(self): tkwargs["dtype"] = dtype samples = torch.rand(3, 2, **tkwargs) obj = samples.clone() - with self.assertRaises(ValueError): + with self.assertRaisesRegex(ValueError, "Number of provided constraints"): obj = apply_constraints( obj=obj, constraints=[zeros_f, zeros_f], @@ -176,7 +190,7 @@ def test_apply_constraints_wrong_eta_dim(self): infeasible_cost=0.0, eta=torch.tensor([0.1]).to(**tkwargs), ) - with self.assertRaises(ValueError): + with self.assertRaisesRegex(ValueError, "Number of provided constraints"): obj = apply_constraints( obj=obj, constraints=[zeros_f, zeros_f], @@ -185,6 +199,47 @@ def test_apply_constraints_wrong_eta_dim(self): eta=torch.tensor([0.1, 0.1, 0.3]).to(**tkwargs), ) + def test_constraint_indicators(self): + # nonnegative objective, one constraint + samples = torch.randn(1) + ind = compute_feasibility_indicator(constraints=[zeros_f], samples=samples) + self.assertAllClose(ind, torch.zeros_like(ind)) + self.assertEqual(ind.dtype, torch.bool) + + smoothed_ind = compute_smoothed_feasibility_indicator( + constraints=[zeros_f], samples=samples, eta=1e-3 + ) + self.assertAllClose(smoothed_ind, ones_f(samples) / 2) + + # two constraints + samples = torch.randn(1) + smoothed_ind = compute_smoothed_feasibility_indicator( + constraints=[zeros_f, zeros_f], + samples=samples, + eta=1e-3, + ) + self.assertAllClose(smoothed_ind, ones_f(samples) * 0.5 * 0.5) + + # feasible + samples = torch.randn(1) + ind = compute_feasibility_indicator( + constraints=[minus_one_f], + samples=samples, + ) + self.assertAllClose(ind, torch.ones_like(ind)) + + smoothed_ind = compute_smoothed_feasibility_indicator( + constraints=[minus_one_f], samples=samples, eta=1e-3 + ) + self.assertTrue((smoothed_ind > 3 / 4).all()) + + with self.assertRaisesRegex(ValueError, "Number of provided constraints"): + compute_smoothed_feasibility_indicator( + constraints=[zeros_f, zeros_f], + samples=samples, + eta=torch.tensor([0.1], device=self.device), + ) + class TestGetObjectiveWeightsTransform(BotorchTestCase): def test_NoWeights(self): diff --git a/test/utils/test_safe_math.py b/test/utils/test_safe_math.py index 3167928f71..5000d0f27c 100644 --- a/test/utils/test_safe_math.py +++ b/test/utils/test_safe_math.py @@ -12,15 +12,42 @@ from typing import Callable import torch +from botorch.exceptions import UnsupportedError from botorch.utils import safe_math from botorch.utils.constants import get_constants_like -from botorch.utils.safe_math import logmeanexp +from botorch.utils.objective import compute_smoothed_feasibility_indicator +from botorch.utils.safe_math import ( + cauchy, + fatmax, + fatmoid, + fatplus, + log_fatmoid, + log_fatplus, + log_softplus, + logexpit, + logmeanexp, + sigmoid, + smooth_amax, +) from botorch.utils.testing import BotorchTestCase from torch import finfo, Tensor +from torch.nn.functional import softplus INF = float("inf") +def sum_constraint(samples: Tensor) -> Tensor: + """Represents the constraint `samples.sum(dim=-1) > 0`. + + Args: + samples: A `b x q x m`-dim Tensor. + + Returns: + A `b x q`-dim Tensor representing constraint feasibility. + """ + return -samples.sum(dim=-1) + + class UnaryOpTestMixin: op: Callable[[Tensor], Tensor] safe_op: Callable[[Tensor], Tensor] @@ -233,3 +260,152 @@ def test_log_mean_exp(self): logmeanexp(X.log(), dim=(0, -1), keepdim=True).exp(), X.mean(dim=(0, -1), keepdim=True), ) + + +class TestSmoothNonLinearities(BotorchTestCase): + def test_smooth_non_linearities(self): + for dtype in (torch.float, torch.double): + tkwargs = {"dtype": dtype, "device": self.device} + n = 17 + X = torch.randn(n, **tkwargs) + self.assertAllClose(cauchy(X), 1 / (X.square() + 1)) + + # testing softplus and fatplus + tau = 1e-2 + fatplus_X = fatplus(X, tau=tau) + self.assertAllClose(fatplus_X, X.clamp(0), atol=tau) + self.assertTrue((fatplus_X > 0).all()) + self.assertAllClose(fatplus_X.log(), log_fatplus(X, tau=tau)) + self.assertAllClose( + softplus(X, beta=1 / tau), log_softplus(X, tau=tau).exp() + ) + + # testing fatplus differentiability + X = torch.randn(n, **tkwargs) + X.requires_grad = True + log_fatplus(X, tau=tau).sum().backward() + self.assertFalse(X.grad.isinf().any()) + self.assertFalse(X.grad.isnan().any()) + # always increasing, could also test convexity (mathematically guaranteed) + self.assertTrue((X.grad > 0).all()) + + X_soft = X.detach().clone() + X_soft.requires_grad = True + log_softplus(X_soft, tau=tau).sum().backward() + + # for positive values away from zero, log_softplus and log_fatplus are close + is_positive = X > 100 * tau # i.e. 1 for tau = 1e-2 + self.assertAllClose(X.grad[is_positive], 1 / X[is_positive], atol=tau) + self.assertAllClose(X_soft.grad[is_positive], 1 / X[is_positive], atol=tau) + + is_negative = X < -100 * tau # i.e. -1 + # the softplus has very large gradients, which can saturate the smooth + # approximation to the maximum over the q-batch. + asym_val = torch.full_like(X_soft.grad[is_negative], 1 / tau) + self.assertAllClose(X_soft.grad[is_negative], asym_val, atol=tau, rtol=tau) + # the fatplus on the other hand has smaller, though non-vanishing gradients. + self.assertTrue((X_soft.grad[is_negative] > X.grad[is_negative]).all()) + + # testing smoothmax and fatmax + d = 3 + X = torch.randn(n, d, **tkwargs) + fatmax_X = fatmax(X, dim=-1, tau=tau) + true_max = X.amax(dim=-1) + self.assertAllClose(fatmax_X, true_max, atol=tau) + self.assertAllClose(smooth_amax(X, dim=-1, tau=tau), true_max, atol=tau) + + # special case for d = 1 + d = 1 + X = torch.randn(n, d, **tkwargs) + fatmax_X = fatmax(X, dim=-1, tau=tau) + self.assertAllClose(fatmax_X, X[..., 0]) + + # testing fatmax differentiability + X = torch.randn(n, **tkwargs) + X.requires_grad = True + fatmax(X, dim=-1, tau=tau).sum().backward() + + self.assertFalse(X.grad.isinf().any()) + self.assertFalse(X.grad.isnan().any()) + self.assertTrue(X.grad.min() > 0) + + # testing fatmoid + X = torch.randn(n, **tkwargs) + fatmoid_X = fatmoid(X, tau=tau) + # output is in [0, 1] + self.assertTrue((fatmoid_X > 0).all()) + self.assertTrue((fatmoid_X < 1).all()) + # skew symmetry + atol = 1e-6 if dtype == torch.float32 else 1e-12 + self.assertAllClose(1 - fatmoid_X, fatmoid(-X, tau=tau), atol=atol) + zero = torch.tensor(0.0, **tkwargs) + half = torch.tensor(0.5, **tkwargs) + self.assertAllClose(fatmoid(zero), half, atol=atol) + self.assertAllClose(fatmoid_X.log(), log_fatmoid(X, tau=tau)) + + is_center = X.abs() < 100 * tau + self.assertAllClose( + fatmoid_X[~is_center], (X[~is_center] > 0).to(fatmoid_X), atol=1e-3 + ) + + # testing differentiability + X.requires_grad = True + log_fatmoid(X, tau=tau).sum().backward() + self.assertFalse(X.grad.isinf().any()) + self.assertFalse(X.grad.isnan().any()) + self.assertTrue((X.grad > 0).all()) + + # testing constraint indicator + constraints = [sum_constraint] + b = 3 + q = 4 + m = 5 + samples = torch.randn(b, q, m, **tkwargs) + eta = 1e-3 + fat = True + log_feas_vals = compute_smoothed_feasibility_indicator( + constraints=constraints, + samples=samples, + eta=eta, + log=True, + fat=fat, + ) + self.assertTrue(log_feas_vals.shape == torch.Size([b, q])) + expected_feas_vals = sum_constraint(samples) < 0 + hard_feas_vals = log_feas_vals.exp() > 1 / 2 + self.assertAllClose(hard_feas_vals, expected_feas_vals) + + # with deterministic inputs: + samples = torch.ones(1, 1, m, **tkwargs) # sum is greater than 0 + log_feas_vals = compute_smoothed_feasibility_indicator( + constraints=constraints, + samples=samples, + eta=eta, + log=True, + fat=fat, + ) + self.assertTrue((log_feas_vals.exp() > 1 / 2).item()) + + # with deterministic inputs: + samples = -torch.ones(1, 1, m, **tkwargs) # sum is smaller than 0 + log_feas_vals = compute_smoothed_feasibility_indicator( + constraints=constraints, + samples=samples, + eta=eta, + log=True, + fat=fat, + ) + self.assertFalse((log_feas_vals.exp() > 1 / 2).item()) + + # testing sigmoid wrapper function + X = torch.randn(3, 4, 5, **tkwargs) + sigmoid_X = torch.sigmoid(X) + self.assertAllClose(sigmoid(X), sigmoid_X) + self.assertAllClose(sigmoid(X, log=True), logexpit(X)) + self.assertAllClose(sigmoid(X, log=True).exp(), sigmoid_X) + fatmoid_X = fatmoid(X) + self.assertAllClose(sigmoid(X, fat=True), fatmoid_X) + self.assertAllClose(sigmoid(X, log=True, fat=True).exp(), fatmoid_X) + + with self.assertRaisesRegex(UnsupportedError, "Only dtypes"): + log_softplus(torch.randn(2, dtype=torch.float16)) diff --git a/website/yarn.lock b/website/yarn.lock index e5cbdaaaa6..dde5b2098a 100644 --- a/website/yarn.lock +++ b/website/yarn.lock @@ -3173,9 +3173,9 @@ fast-json-stable-stringify@^2.0.0: integrity sha512-lhd/wF+Lk98HZoTCtlVraHtfh5XYijIjalXck7saUtuanSDyLMxnHhSXEDJqHxD7msR8D0uCmqlkwjCV8xvwHw== fast-xml-parser@^4.1.3: - version "4.2.4" - resolved "https://registry.yarnpkg.com/fast-xml-parser/-/fast-xml-parser-4.2.4.tgz#6e846ede1e56ad9e5ef07d8720809edf0ed07e9b" - integrity sha512-fbfMDvgBNIdDJLdLOwacjFAPYt67tr31H9ZhWSm45CDAxvd0I6WTlSOUo7K2P/K5sA5JgMKG64PI3DMcaFdWpQ== + version "4.2.5" + resolved "https://registry.yarnpkg.com/fast-xml-parser/-/fast-xml-parser-4.2.5.tgz#a6747a09296a6cb34f2ae634019bf1738f3b421f" + integrity sha512-B9/wizE4WngqQftFPmdaMYlXoJlJOYxGQOanC77fq9k8+Z0v5dDSVh+3glErdIROP//s/jgb7ZuxKfB8nVyo0g== dependencies: strnum "^1.0.5" @@ -6489,14 +6489,14 @@ semver-truncate@^1.1.2: semver "^5.3.0" "semver@2 || 3 || 4 || 5", semver@^5.3.0, semver@^5.5.0, semver@^5.6.0, semver@^5.7.0, semver@^5.7.1: - version "5.7.1" - resolved "https://registry.yarnpkg.com/semver/-/semver-5.7.1.tgz#a954f931aeba508d307bbf069eff0c01c96116f7" - integrity sha512-sauaDf/PZdVgrLTNYHRtpXa1iRiKcaebiKQ1BJdpQlWH2lCvexQdX55snPFyK7QzpudqbCI0qXFfOasHdyNDGQ== + version "5.7.2" + resolved "https://registry.yarnpkg.com/semver/-/semver-5.7.2.tgz#48d55db737c3287cd4835e17fa13feace1c41ef8" + integrity sha512-cBznnQ9KjJqU67B52RMC65CMarK2600WFnbkcaiwWq3xy/5haFJlshgnpjovMVJ+Hff49d8GEn0b87C5pDQ10g== semver@^6.1.1, semver@^6.1.2, semver@^6.3.0: - version "6.3.0" - resolved "https://registry.yarnpkg.com/semver/-/semver-6.3.0.tgz#ee0a64c8af5e8ceea67687b133761e1becbd1d3d" - integrity sha512-b39TBaTSfV6yBrapU89p5fKekE2m/NwnDocOVruQFS1/veMgdzuPcnOM34M6CwxW8jH/lxEa5rBoDeUwu5HHTw== + version "6.3.1" + resolved "https://registry.yarnpkg.com/semver/-/semver-6.3.1.tgz#556d2ef8689146e46dcea4bfdd095f3434dffcb4" + integrity sha512-BR7VvDCVHO+q2xBEWskxS6DJE1qRnb7DxzUrogb71CWoSficBxYsiAGd+Kl0mmq/MprG9yArRkyrQxTO6XjMzA== send@0.18.0: version "0.18.0"