From 1d077c99ca39de15a54c92ace1dbf8ec8c9fe9f5 Mon Sep 17 00:00:00 2001
From: Sebastian Ament <sebastianament@meta.com>
Date: Mon, 10 Jul 2023 13:05:07 -0700
Subject: [PATCH 01/18] Loopifying conditional kwarg processing in
 `_optimize_acqf_batch` (#1923)

Summary:
Pull Request resolved: https://github.com/pytorch/botorch/pull/1923

Small code simplification, pattern could be reused elsewhere for `kwarg` processing, particularly where a number of fields have to be check.

Reviewed By: esantorella

Differential Revision: D47297929

fbshipit-source-id: 07574fef862b9e0b06d0f66b1004738e9dcd061f
---
 botorch/optim/optimize.py | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/botorch/optim/optimize.py b/botorch/optim/optimize.py
index 3431f3e32f..5ec71767ba 100644
--- a/botorch/optim/optimize.py
+++ b/botorch/optim/optimize.py
@@ -305,14 +305,13 @@ def _optimize_batch_candidates() -> Tuple[Tensor, Tensor, List[Warning]]:
 
         # only add parameter constraints to gen_kwargs if they are specified
         # to avoid unnecessary warnings in _filter_kwargs
-        if opt_inputs.inequality_constraints is not None:
-            gen_kwargs["inequality_constraints"] = opt_inputs.inequality_constraints
-        if opt_inputs.equality_constraints is not None:
-            gen_kwargs["equality_constraints"] = opt_inputs.equality_constraints
-        if opt_inputs.nonlinear_inequality_constraints is not None:
-            gen_kwargs[
-                "nonlinear_inequality_constraints"
-            ] = opt_inputs.nonlinear_inequality_constraints
+        for constraint_name in [
+            "inequality_constraints",
+            "equality_constraints",
+            "nonlinear_inequality_constraints",
+        ]:
+            if (constraint := getattr(opt_inputs, constraint_name)) is not None:
+                gen_kwargs[constraint_name] = constraint
 
         filtered_gen_kwargs = _filter_kwargs(opt_inputs.gen_candidates, **gen_kwargs)
 

From 56198cdb39f05dc5c32629530ee579dc1d905421 Mon Sep 17 00:00:00 2001
From: Elizabeth Santorella <santorella@meta.com>
Date: Mon, 10 Jul 2023 16:09:09 -0700
Subject: [PATCH 02/18] Python 3.11 compliance for dataclasses: Don't use
 mutable defaults (#1927)

Summary:
## Motivation

See error here: https://github.com/pytorch/botorch/actions/runs/5508336054/jobs/10039481686?pr=1924
* Tested with a tensor instead of a `DenseContainer` since we needed something non-mutable
* Added a test that the tensor is properly cast to a `DenseContainer`
* made code more readable (for me anyway)

### Have you read the [Contributing Guidelines on pull requests](https://github.com/pytorch/botorch/blob/main/CONTRIBUTING.md#pull-requests)?

Yes

Pull Request resolved: https://github.com/pytorch/botorch/pull/1927

Test Plan:
Units

## Related PRs

Unblocks https://github.com/pytorch/botorch/issues/1924

Reviewed By: saitcakmak

Differential Revision: D47341881

Pulled By: esantorella

fbshipit-source-id: 64c497504bffde8cc227dff8eaaa85e17aa36b95
---
 botorch/utils/datasets.py   | 33 +++++++++++++++++----------------
 test/utils/test_datasets.py |  6 ++++--
 2 files changed, 21 insertions(+), 18 deletions(-)

diff --git a/botorch/utils/datasets.py b/botorch/utils/datasets.py
index c33a040d32..bfe12aa047 100644
--- a/botorch/utils/datasets.py
+++ b/botorch/utils/datasets.py
@@ -37,28 +37,29 @@ def __call__(cls, *args: Any, **kwargs: Any):
         r"""Converts Tensor-valued fields to DenseContainer under the assumption
         that said fields house collections of feature vectors."""
         hints = get_type_hints(cls)
-        f_iter = filter(lambda f: f.init, fields(cls))
+        fields_iter = (item for item in fields(cls) if item.init is not None)
         f_dict = {}
-        for obj, f in chain(
-            zip(args, f_iter), ((kwargs.pop(f.name, MISSING), f) for f in f_iter)
+        for value, field in chain(
+            zip(args, fields_iter),
+            ((kwargs.pop(field.name, MISSING), field) for field in fields_iter),
         ):
-            if obj is MISSING:
-                if f.default is not MISSING:
-                    obj = f.default
-                elif f.default_factory is not MISSING:
-                    obj = f.default_factory()
+            if value is MISSING:
+                if field.default is not MISSING:
+                    value = field.default
+                elif field.default_factory is not MISSING:
+                    value = field.default_factory()
                 else:
-                    raise RuntimeError(f"Missing required field `{f.name}`.")
+                    raise RuntimeError(f"Missing required field `{field.name}`.")
 
-            if issubclass(hints[f.name], BotorchContainer):
-                if isinstance(obj, Tensor):
-                    obj = DenseContainer(obj, event_shape=obj.shape[-1:])
-                elif not isinstance(obj, BotorchContainer):
+            if issubclass(hints[field.name], BotorchContainer):
+                if isinstance(value, Tensor):
+                    value = DenseContainer(value, event_shape=value.shape[-1:])
+                elif not isinstance(value, BotorchContainer):
                     raise TypeError(
-                        f"Expected <BotorchContainer | Tensor> for field `{f.name}` "
-                        f"but was {type(obj)}."
+                        "Expected <BotorchContainer | Tensor> for field "
+                        f"`{field.name}` but was {type(value)}."
                     )
-            f_dict[f.name] = obj
+            f_dict[field.name] = value
 
         return super().__call__(**f_dict, **kwargs)
 
diff --git a/test/utils/test_datasets.py b/test/utils/test_datasets.py
index fd550a2846..02c9234f55 100644
--- a/test/utils/test_datasets.py
+++ b/test/utils/test_datasets.py
@@ -30,14 +30,15 @@ def test_base(self):
     def test_supervised_meta(self):
         X = rand(3, 2)
         Y = rand(3, 1)
-        A = DenseContainer(rand(3, 5), event_shape=Size([5]))
+        t = rand(3, 5)
+        A = DenseContainer(t, event_shape=Size([5]))
         B = rand(2, 1)
 
         SupervisedDatasetWithDefaults = make_dataclass(
             cls_name="SupervisedDatasetWithDefaults",
             bases=(SupervisedDataset,),
             fields=[
-                ("default", DenseContainer, field(default=A)),
+                ("default", DenseContainer, field(default=t)),
                 ("factory", DenseContainer, field(default_factory=lambda: A)),
                 ("other", Tensor, field(default_factory=lambda: B)),
             ],
@@ -55,6 +56,7 @@ def test_supervised_meta(self):
 
         # Check handling of default values and factories
         dataset = SupervisedDatasetWithDefaults(X=X, Y=Y)
+        self.assertIsInstance(dataset.default, DenseContainer)
         self.assertEqual(dataset.default, A)
         self.assertEqual(dataset.factory, A)
         self.assertTrue(dataset.other is B)

From ad40f8736b85c6e92c3cce411301e46066221c4a Mon Sep 17 00:00:00 2001
From: Qing Feng <qingfeng@meta.com>
Date: Mon, 10 Jul 2023 18:03:42 -0700
Subject: [PATCH 03/18] add Homotopy to docs (#1928)

Summary:
Pull Request resolved: https://github.com/pytorch/botorch/pull/1928

As title. Fix build docs in T157797426

Reviewed By: dme65, Balandat

Differential Revision: D47349095

fbshipit-source-id: 7071369a3a87d56e2973f3525e5ab846cae95af1
---
 sphinx/source/optim.rst | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/sphinx/source/optim.rst b/sphinx/source/optim.rst
index e6b9b3e11a..d3bf9e2db8 100644
--- a/sphinx/source/optim.rst
+++ b/sphinx/source/optim.rst
@@ -36,6 +36,11 @@ Stopping Criteria
 .. automodule:: botorch.optim.stopping
     :members:
 
+Acquisition Function Optimization with Homotopy
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.. automodule:: botorch.optim.optimize_homotopy
+    :members:
+
 Closures
 -------------------------------------------
 
@@ -87,3 +92,8 @@ Parameter Constraint Utilities
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 .. automodule:: botorch.optim.parameter_constraints
     :members:
+
+Homotopy Utilities
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.. automodule:: botorch.optim.homotopy
+    :members:

From edf1b1b95111ef1942e382a8cf7d107e26499268 Mon Sep 17 00:00:00 2001
From: Sam Daulton <sdaulton@meta.com>
Date: Mon, 10 Jul 2023 20:29:57 -0700
Subject: [PATCH 04/18] Add method for computing non-reduced AF vals (#1926)

Summary:
Pull Request resolved: https://github.com/pytorch/botorch/pull/1926

See title. This is used in the stacked diff.

Reviewed By: SebastianAment

Differential Revision: D47296866

fbshipit-source-id: 26d7d98b50a54edcd5f6f6a2e7cd6ed6d43d87cb
---
 botorch/acquisition/monte_carlo.py | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/botorch/acquisition/monte_carlo.py b/botorch/acquisition/monte_carlo.py
index 330c4bdcef..f0bd2be915 100644
--- a/botorch/acquisition/monte_carlo.py
+++ b/botorch/acquisition/monte_carlo.py
@@ -255,10 +255,22 @@ def forward(self, X: Tensor) -> Tensor:
             A Tensor with shape `batch_shape'`, where `batch_shape'` is the broadcasted
             batch shape of model and input `X`.
         """
+        non_reduced_acqval = self._non_reduced_forward(X=X)
+        return self._sample_reduction(self._q_reduction(non_reduced_acqval))
+
+    def _non_reduced_forward(self, X: Tensor) -> Tensor:
+        """Compute the constrained acquisition values at the MC-sample, q level.
+
+        Args:
+            X: A `batch_shape x q x d` Tensor of t-batches with `q` `d`-dim
+                design points each.
+
+        Returns:
+            A Tensor with shape `sample_sample x batch_shape x q`.
+        """
         samples, obj = self._get_samples_and_objectives(X)
         acqval = self._sample_forward(obj)  # `sample_sample x batch_shape x q`
-        weighted_acqval = self._apply_constraints(acqval=acqval, samples=samples)
-        return self._sample_reduction(self._q_reduction(weighted_acqval))
+        return self._apply_constraints(acqval=acqval, samples=samples)
 
     @abstractmethod
     def _sample_forward(self, obj: Tensor) -> Tensor:

From 48afca10034fd3ffd10007ed4adcabf09b19d9f4 Mon Sep 17 00:00:00 2001
From: Sam Daulton <sdaulton@meta.com>
Date: Mon, 10 Jul 2023 20:29:57 -0700
Subject: [PATCH 05/18] Batch-MC Prior-Guided AF (#1925)

Summary:
Pull Request resolved: https://github.com/pytorch/botorch/pull/1925

see title. This weights the AF values before reduction over the sample and q-dims.

This also handles `X_pending` directly, rather than on the underlying AF. This ensures, `X_pending` is concatenated with `X` and passed to both the AF an the prior. This will only work for AFs that support concatenating `X_pending`.

This would need revision to work with for example NEHVI with cached box decompositions.

Reviewed By: Balandat

Differential Revision: D47307059

fbshipit-source-id: 3532388a44aae0bfdbb45d413ca36d3e472173fa
---
 botorch/acquisition/prior_guided.py   |  58 +++++++++----
 test/acquisition/test_prior_guided.py | 119 +++++++++++++++++++-------
 2 files changed, 126 insertions(+), 51 deletions(-)

diff --git a/botorch/acquisition/prior_guided.py b/botorch/acquisition/prior_guided.py
index 72d3407631..fd5fe97d11 100644
--- a/botorch/acquisition/prior_guided.py
+++ b/botorch/acquisition/prior_guided.py
@@ -20,6 +20,8 @@
 from typing import Optional
 
 from botorch.acquisition.acquisition import AcquisitionFunction
+from botorch.acquisition.monte_carlo import SampleReducingMCAcquisitionFunction
+from botorch.utils.transforms import concatenate_pending_points, t_batch_mode_transform
 from torch import Tensor
 
 from torch.nn import Module
@@ -28,6 +30,9 @@
 class PriorGuidedAcquisitionFunction(AcquisitionFunction):
     r"""Class for weighting acquisition functions by a prior distribution.
 
+    Supports MC and batch acquisition functions via
+    SampleReducingAcquisitionFunction.
+
     See [Hvarfner2022]_ for details.
     """
 
@@ -37,6 +42,7 @@ def __init__(
         prior_module: Module,
         log: bool = False,
         prior_exponent: float = 1.0,
+        X_pending: Optional[Tensor] = None,
     ) -> None:
         r"""Initialize the prior-guided acquisition function.
 
@@ -44,36 +50,50 @@ def __init__(
             acq_function: The base acquisition function.
             prior_module: A Module that computes the probability
                 (or log probability) for the provided inputs.
+                `prior_module.forward` should take a `batch_shape x q`-dim
+                tensor of inputs and return a `batch_shape x q`-dim tensor
+                of probabilities.
             log: A boolean that should be true if the acquisition function emits a
                 log-transformed value and the prior module emits a log probability.
             prior_exponent: The exponent applied to the prior. This can be used
                 for example  to decay the effect the prior over time as in
                 [Hvarfner2022]_.
+            X_pending: `n x d` Tensor with `n` `d`-dim design points that have
+                been submitted for evaluation but have not yet been evaluated.
         """
-        Module.__init__(self)
+        super().__init__(model=acq_function.model)
         self.acq_func = acq_function
         self.prior_module = prior_module
         self._log = log
         self._prior_exponent = prior_exponent
+        self._is_sample_reducing_af = isinstance(
+            acq_function, SampleReducingMCAcquisitionFunction
+        )
+        self.set_X_pending(X_pending=X_pending)
 
-    @property
-    def X_pending(self):
-        r"""Return the `X_pending` of the base acquisition function."""
-        try:
-            return self.acq_func.X_pending
-        except (ValueError, AttributeError):
-            raise ValueError(
-                f"Base acquisition function {type(self.acq_func).__name__} "
-                "does not have an `X_pending` attribute."
-            )
-
-    @X_pending.setter
-    def X_pending(self, X_pending: Optional[Tensor]):
-        r"""Sets the `X_pending` of the base acquisition function."""
-        self.acq_func.X_pending = X_pending
-
+    @concatenate_pending_points
+    @t_batch_mode_transform()
     def forward(self, X: Tensor) -> Tensor:
         r"""Compute the acquisition function weighted by the prior."""
+        # batch_shape x q
+        prior = self.prior_module(X)
+        if self._is_sample_reducing_af:
+            # sample_shape x batch_shape x q
+            af_val = self.acq_func._non_reduced_forward(X)
+        else:
+            if prior.shape[-1] > 1:
+                raise NotImplementedError(
+                    "q-batches with q>1 are only supported using "
+                    "SampleReducingMCAcquisitionFunction."
+                )
+            # batch_shape x q
+            af_val = self.acq_func(X).unsqueeze(-1)
         if self._log:
-            return self.acq_func(X) + self.prior_module(X) * self._prior_exponent
-        return self.acq_func(X) * self.prior_module(X).pow(self._prior_exponent)
+            weighted_af_val = af_val + prior * self._prior_exponent
+        else:
+            weighted_af_val = af_val * prior.pow(self._prior_exponent)
+        if self._is_sample_reducing_af:
+            return self.acq_func._sample_reduction(
+                self.acq_func._q_reduction(weighted_af_val)
+            )
+        return weighted_af_val.squeeze(-1)  # squeeze q-dim
diff --git a/test/acquisition/test_prior_guided.py b/test/acquisition/test_prior_guided.py
index fe48449d68..2a92af6117 100644
--- a/test/acquisition/test_prior_guided.py
+++ b/test/acquisition/test_prior_guided.py
@@ -12,57 +12,112 @@
 from botorch.acquisition.prior_guided import PriorGuidedAcquisitionFunction
 from botorch.models import SingleTaskGP
 from botorch.utils.testing import BotorchTestCase
+from botorch.utils.transforms import match_batch_shape
 from torch.nn import Module
 
 
 class DummyPrior(Module):
     def forward(self, X):
         p = torch.distributions.Normal(0, 1)
-        # sum over d and q dimensions
-        return p.log_prob(X).sum(dim=-1).sum(dim=-1).exp()
+        # sum over d dimensions
+        return p.log_prob(X).sum(dim=-1).exp()
+
+
+def get_val_prob(test_X, test_X_exp, af, prior):
+    with torch.no_grad():
+        val = af(test_X)
+        prob = prior(test_X_exp)
+
+    return val, prob
+
+
+def get_weighted_val(ei_val, prob, exponent, use_log):
+    if use_log:
+        return prob * exponent + ei_val
+    return prob.pow(exponent) * ei_val
 
 
 class TestPriorGuidedAcquisitionFunction(BotorchTestCase):
-    def test_prior_guided_acquisition_function(self):
-        prior = DummyPrior()
+    def setUp(self):
+        self.prior = DummyPrior()
+        self.train_X = torch.rand(5, 3, dtype=torch.double, device=self.device)
+        self.train_Y = self.train_X.norm(dim=-1, keepdim=True)
+
+    def test_prior_guided_analytic_acquisition_function(self):
         for dtype in (torch.float, torch.double):
-            train_X = torch.rand(5, 3, dtype=dtype, device=self.device)
-            train_Y = train_X.norm(dim=-1, keepdim=True)
-            model = SingleTaskGP(train_X, train_Y).eval()
-            qEI = qExpectedImprovement(model, best_f=0.0)
+            model = SingleTaskGP(
+                self.train_X.to(dtype=dtype), self.train_Y.to(dtype=dtype)
+            )
+            ei = ExpectedImprovement(model, best_f=0.0)
+            for batch_shape, use_log, exponent in product(
+                ([], [2]),
+                (False, True),
+                (1.0, 2.0),
+            ):
+                af = PriorGuidedAcquisitionFunction(
+                    acq_function=ei,
+                    prior_module=self.prior,
+                    log=use_log,
+                    prior_exponent=exponent,
+                )
+                test_X = torch.rand(*batch_shape, 1, 3, dtype=dtype, device=self.device)
+                test_X_exp = test_X.unsqueeze(0) if batch_shape == [] else test_X
+                with torch.no_grad():
+                    ei_val = ei(test_X_exp).unsqueeze(-1)
+                val, prob = get_val_prob(test_X, test_X_exp, af, self.prior)
+                weighted_val = get_weighted_val(ei_val, prob, exponent, use_log)
+                expected_val = weighted_val.squeeze(-1)
+                self.assertTrue(torch.allclose(val, expected_val))
+                # test that q>1 and a non SampleReducing AF raises an exception
+                msg = (
+                    "q-batches with q>1 are only supported using "
+                    "SampleReducingMCAcquisitionFunction."
+                )
+                test_X = torch.rand(2, 3, dtype=dtype, device=self.device)
+                with self.assertRaisesRegex(NotImplementedError, msg):
+                    af(test_X)
+
+    def test_prior_guided_mc_acquisition_function(self):
+        for dtype in (torch.float, torch.double):
+            model = SingleTaskGP(
+                self.train_X.to(dtype=dtype), self.train_Y.to(dtype=dtype)
+            )
+            ei = qExpectedImprovement(model, best_f=0.0)
             for batch_shape, q, use_log, exponent in product(
-                ([], [2]), (1, 2), (False, True), (1.0, 2.0)
+                ([], [2]),
+                (1, 2),
+                (False, True),
+                (1.0, 2.0),
             ):
                 af = PriorGuidedAcquisitionFunction(
-                    acq_function=qEI,
-                    prior_module=prior,
+                    acq_function=ei,
+                    prior_module=self.prior,
                     log=use_log,
                     prior_exponent=exponent,
                 )
                 test_X = torch.rand(*batch_shape, q, 3, dtype=dtype, device=self.device)
+                test_X_exp = test_X.unsqueeze(0) if batch_shape == [] else test_X
+                val, prob = get_val_prob(test_X, test_X_exp, af, self.prior)
+                ei_val = ei._non_reduced_forward(test_X_exp)
+                weighted_val = get_weighted_val(ei_val, prob, exponent, use_log)
+                expected_val = ei._sample_reduction(ei._q_reduction(weighted_val))
+                self.assertTrue(torch.allclose(val, expected_val))
+                # test set_X_pending
+                X_pending = torch.rand(2, 3, dtype=dtype, device=self.device)
+                af.X_pending = X_pending
+                self.assertTrue(torch.equal(X_pending, af.X_pending))
+                # unsqueeze batch dim
+                test_X_exp_with_pending = torch.cat(
+                    [test_X_exp, match_batch_shape(X_pending, test_X_exp)], dim=-2
+                )
                 with torch.no_grad():
                     val = af(test_X)
-                    prob = prior(test_X)
-                    ei = qEI(test_X)
+                    prob = self.prior(test_X_exp_with_pending)
+                    ei_val = ei._non_reduced_forward(test_X_exp_with_pending)
                 if use_log:
-                    expected_val = prob * exponent + ei
+                    weighted_val = prob * exponent + ei_val
                 else:
-                    expected_val = prob.pow(exponent) * ei
+                    weighted_val = prob.pow(exponent) * ei_val
+                expected_val = ei._sample_reduction(ei._q_reduction(weighted_val))
+
                 self.assertTrue(torch.equal(val, expected_val))
-                # test set_X_pending
-                X_pending = torch.rand(2, 3, dtype=dtype, device=self.device)
-                af.X_pending = X_pending
-                self.assertTrue(torch.equal(X_pending, af.acq_func.X_pending))
-                self.assertTrue(torch.equal(X_pending, af.X_pending))
-        # test exception when base AF does not support X_pending
-        ei = ExpectedImprovement(model, best_f=0.0)
-        af = PriorGuidedAcquisitionFunction(
-            acq_function=ei,
-            prior_module=prior,
-        )
-        msg = (
-            "Base acquisition function ExpectedImprovement "
-            "does not have an `X_pending` attribute."
-        )
-        with self.assertRaisesRegex(ValueError, msg):
-            af.X_pending

From bc90b2ce44bd6b1be7bf6b3985045d4894bca9ac Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 10 Jul 2023 21:01:58 -0700
Subject: [PATCH 06/18] Bump semver from 5.7.1 to 5.7.2 in /website (#1929)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Summary:
Bumps [semver](https://github.com/npm/node-semver) from 5.7.1 to 5.7.2.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/npm/node-semver/releases">semver's releases</a>.</em></p>
<blockquote>
<h2>v5.7.2</h2>
<h2><a href="https://github.com/npm/node-semver/compare/v5.7.1...v5.7.2">5.7.2</a> (2023-07-10)</h2>
<h3>Bug Fixes</h3>
<ul>
<li><a href="https://github.com/npm/node-semver/commit/2f8fd41487acf380194579ecb6f8b1bbfe116be0"><code>2f8fd41</code></a> <a href="https://redirect.github.com/npm/node-semver/pull/585">https://github.com/pytorch/botorch/issues/585</a> better handling of whitespace (<a href="https://redirect.github.com/npm/node-semver/issues/585">https://github.com/pytorch/botorch/issues/585</a>) (<a href="https://github.com/joaomoreno"><code>@​joaomoreno</code></a>, <a href="https://github.com/lukekarrys"><code>@​lukekarrys</code></a>)</li>
</ul>
</blockquote>
</details>
<details>
<summary>Changelog</summary>
<p><em>Sourced from <a href="https://github.com/npm/node-semver/blob/v5.7.2/CHANGELOG.md">semver's changelog</a>.</em></p>
<blockquote>
<h2><a href="https://github.com/npm/node-semver/compare/v5.7.1...v5.7.2">5.7.2</a> (2023-07-10)</h2>
<h3>Bug Fixes</h3>
<ul>
<li><a href="https://github.com/npm/node-semver/commit/2f8fd41487acf380194579ecb6f8b1bbfe116be0"><code>2f8fd41</code></a> <a href="https://redirect.github.com/npm/node-semver/pull/585">https://github.com/pytorch/botorch/issues/585</a> better handling of whitespace (<a href="https://redirect.github.com/npm/node-semver/issues/585">https://github.com/pytorch/botorch/issues/585</a>) (<a href="https://github.com/joaomoreno"><code>@​joaomoreno</code></a>, <a href="https://github.com/lukekarrys"><code>@​lukekarrys</code></a>)</li>
</ul>
<h2>5.7</h2>
<ul>
<li>Add <code>minVersion</code> method</li>
</ul>
<h2>5.6</h2>
<ul>
<li>Move boolean <code>loose</code> param to an options object, with
backwards-compatibility protection.</li>
<li>Add ability to opt out of special prerelease version handling with
the <code>includePrerelease</code> option flag.</li>
</ul>
<h2>5.5</h2>
<ul>
<li>Add version coercion capabilities</li>
</ul>
<h2>5.4</h2>
<ul>
<li>Add intersection checking</li>
</ul>
<h2>5.3</h2>
<ul>
<li>Add <code>minSatisfying</code> method</li>
</ul>
<h2>5.2</h2>
<ul>
<li>Add <code>prerelease(v)</code> that returns prerelease components</li>
</ul>
<h2>5.1</h2>
<ul>
<li>Add Backus-Naur for ranges</li>
<li>Remove excessively cute inspection methods</li>
</ul>
<h2>5.0</h2>
<ul>
<li>Remove AMD/Browserified build artifacts</li>
<li>Fix ltr and gtr when using the <code>*</code> range</li>
<li>Fix for range <code>*</code> with a prerelease identifier</li>
</ul>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="https://github.com/npm/node-semver/commit/f8cc313550691a50d9662d8c94f0c033717efd7d"><code>f8cc313</code></a> chore: release 5.7.2</li>
<li><a href="https://github.com/npm/node-semver/commit/2f8fd41487acf380194579ecb6f8b1bbfe116be0"><code>2f8fd41</code></a> fix: better handling of whitespace (<a href="https://redirect.github.com/npm/node-semver/issues/585">https://github.com/pytorch/botorch/issues/585</a>)</li>
<li><a href="https://github.com/npm/node-semver/commit/deb5ad51bf58868fa243c1683775305fe9e0e365"><code>deb5ad5</code></a> chore: <code>@​npmcli/template-oss</code><a href="https://github.com/4"><code>@​4</code></a>.16.0</li>
<li>See full diff in <a href="https://github.com/npm/node-semver/compare/v5.7.1...v5.7.2">compare view</a></li>
</ul>
</details>
<details>
<summary>Maintainer changes</summary>
<p>This version was pushed to npm by <a href="https://www.npmjs.com/~lukekarrys">lukekarrys</a>, a new releaser for semver since your current version.</p>
</details>
<br />

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=semver&package-manager=npm_and_yarn&previous-version=5.7.1&new-version=5.7.2)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

 ---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `dependabot rebase` will rebase this PR
- `dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `dependabot merge` will merge this PR after your CI passes on it
- `dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `dependabot cancel merge` will cancel a previously requested merge and block automerging
- `dependabot reopen` will reopen this PR if it is closed
- `dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
You can disable automated security fix PRs for this repo from the [Security Alerts page](https://github.com/pytorch/botorch/network/alerts).

</details>

Pull Request resolved: https://github.com/pytorch/botorch/pull/1929

Reviewed By: dme65

Differential Revision: D47356226

Pulled By: Balandat

fbshipit-source-id: bcc87c814ff2be516b739614e5a93d6f5b0b5516
---
 website/yarn.lock | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/website/yarn.lock b/website/yarn.lock
index e5cbdaaaa6..5da19ae13f 100644
--- a/website/yarn.lock
+++ b/website/yarn.lock
@@ -6489,14 +6489,14 @@ semver-truncate@^1.1.2:
     semver "^5.3.0"
 
 "semver@2 || 3 || 4 || 5", semver@^5.3.0, semver@^5.5.0, semver@^5.6.0, semver@^5.7.0, semver@^5.7.1:
-  version "5.7.1"
-  resolved "https://registry.yarnpkg.com/semver/-/semver-5.7.1.tgz#a954f931aeba508d307bbf069eff0c01c96116f7"
-  integrity sha512-sauaDf/PZdVgrLTNYHRtpXa1iRiKcaebiKQ1BJdpQlWH2lCvexQdX55snPFyK7QzpudqbCI0qXFfOasHdyNDGQ==
+  version "5.7.2"
+  resolved "https://registry.yarnpkg.com/semver/-/semver-5.7.2.tgz#48d55db737c3287cd4835e17fa13feace1c41ef8"
+  integrity sha512-cBznnQ9KjJqU67B52RMC65CMarK2600WFnbkcaiwWq3xy/5haFJlshgnpjovMVJ+Hff49d8GEn0b87C5pDQ10g==
 
 semver@^6.1.1, semver@^6.1.2, semver@^6.3.0:
-  version "6.3.0"
-  resolved "https://registry.yarnpkg.com/semver/-/semver-6.3.0.tgz#ee0a64c8af5e8ceea67687b133761e1becbd1d3d"
-  integrity sha512-b39TBaTSfV6yBrapU89p5fKekE2m/NwnDocOVruQFS1/veMgdzuPcnOM34M6CwxW8jH/lxEa5rBoDeUwu5HHTw==
+  version "6.3.1"
+  resolved "https://registry.yarnpkg.com/semver/-/semver-6.3.1.tgz#556d2ef8689146e46dcea4bfdd095f3434dffcb4"
+  integrity sha512-BR7VvDCVHO+q2xBEWskxS6DJE1qRnb7DxzUrogb71CWoSficBxYsiAGd+Kl0mmq/MprG9yArRkyrQxTO6XjMzA==
 
 send@0.18.0:
   version "0.18.0"

From 4b7933198a28c94b48904ab7547d076cbdc5fd27 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 11 Jul 2023 14:48:49 -0700
Subject: [PATCH 07/18] Bump fast-xml-parser from 4.2.4 to 4.2.5 in /website
 (#1930)

Summary:
Bumps [fast-xml-parser](https://github.com/NaturalIntelligence/fast-xml-parser) from 4.2.4 to 4.2.5.
<details>
<summary>Changelog</summary>
<p><em>Sourced from <a href="https://github.com/NaturalIntelligence/fast-xml-parser/blob/master/CHANGELOG.md">fast-xml-parser's changelog</a>.</em></p>
<blockquote>
<p>Note: If you find missing information about particular minor version, that version must have been changed without any functional change in this library.</p>
<p><strong>4.2.5 / 2023-06-22</strong></p>
<ul>
<li>change code implementation</li>
</ul>
<p><strong>4.2.4 / 2023-06-06</strong></p>
<ul>
<li>fix security bug</li>
</ul>
<p><strong>4.2.3 / 2023-06-05</strong></p>
<ul>
<li>fix security bug</li>
</ul>
<p><strong>4.2.2 / 2023-04-18</strong></p>
<ul>
<li>fix <a href="https://redirect.github.com/NaturalIntelligence/fast-xml-parser/issues/562">https://github.com/pytorch/botorch/issues/562</a>: fix unpaired tag when it comes in last of a nested tag. Also throw error when unpaired tag is used as closing tag</li>
</ul>
<p><strong>4.2.1 / 2023-04-18</strong></p>
<ul>
<li>fix: jpath after unpaired tags</li>
</ul>
<p><strong>4.2.0 / 2023-04-09</strong></p>
<ul>
<li>support <code>updateTag</code> parser property</li>
</ul>
<p><strong>4.1.4 / 2023-04-08</strong></p>
<ul>
<li>update typings to let user create XMLBuilder instance without options (<a href="https://redirect.github.com/NaturalIntelligence/fast-xml-parser/issues/556">https://github.com/pytorch/botorch/issues/556</a>) (By <a href="https://github.com/omggga">Patrick</a>)</li>
<li>fix: IsArray option isn't parsing tags with 0 as value correctly <a href="https://redirect.github.com/NaturalIntelligence/fast-xml-parser/issues/490">https://github.com/pytorch/botorch/issues/490</a> (<a href="https://redirect.github.com/NaturalIntelligence/fast-xml-parser/issues/557">https://github.com/pytorch/botorch/issues/557</a>) (By <a href="https://github.com/p-kuen">Aleksandr Murashkin</a>)</li>
<li>feature: support <code>oneListGroup</code> to group repeated children tags udder single group</li>
</ul>
<p><strong>4.1.3 / 2023-02-26</strong></p>
<ul>
<li>fix <a href="https://redirect.github.com/NaturalIntelligence/fast-xml-parser/issues/546">https://github.com/pytorch/botorch/issues/546</a>: Support complex entity value</li>
</ul>
<p><strong>4.1.2 / 2023-02-12</strong></p>
<ul>
<li>Security Fix</li>
</ul>
<p><strong>4.1.1 / 2023-02-03</strong></p>
<ul>
<li>Fix <a href="https://redirect.github.com/NaturalIntelligence/fast-xml-parser/issues/540">https://github.com/pytorch/botorch/issues/540</a>: ignoreAttributes breaks unpairedTags</li>
<li>Refactor XML builder code</li>
</ul>
<p><strong>4.1.0 / 2023-02-02</strong></p>
<ul>
<li>Fix '<!-- raw HTML omitted -->' in DTD comment throwing an error. (<a href="https://redirect.github.com/NaturalIntelligence/fast-xml-parser/issues/533">https://github.com/pytorch/botorch/issues/533</a>) (By <a href="https://github.com/Cwazywierdo">Adam Baker</a>)</li>
<li>Set &quot;eNotation&quot; to 'true' as default</li>
</ul>
<p><strong>4.0.15 / 2023-01-25</strong></p>
<ul>
<li>make &quot;eNotation&quot; optional</li>
</ul>
<p><strong>4.0.14 / 2023-01-22</strong></p>
<ul>
<li>fixed: add missed typing &quot;eNotation&quot; to parse values</li>
</ul>
<p><strong>4.0.13 / 2023-01-07</strong></p>
<ul>
<li>preserveorder formatting (By <a href="https://github.com/mdeknowis">mdeknowis</a>)</li>
<li>support <code>transformAttributeName</code> (By <a href="https://github.com/erkie">Erik Rothoff Andersson</a>)</li>
</ul>
<p><strong>4.0.12 / 2022-11-19</strong></p>

</blockquote>
<p>... (truncated)</p>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="https://github.com/NaturalIntelligence/fast-xml-parser/commit/643816d67b4c8b85ff97ba83e9bf41d23446c963"><code>643816d</code></a> update package details</li>
<li><a href="https://github.com/NaturalIntelligence/fast-xml-parser/commit/cc73065e1469147a0104dc122b0cdf6724354446"><code>cc73065</code></a> Remove unused code (<a href="https://redirect.github.com/NaturalIntelligence/fast-xml-parser/issues/587">https://github.com/pytorch/botorch/issues/587</a>)</li>
<li><a href="https://github.com/NaturalIntelligence/fast-xml-parser/commit/9a880b887916855c3a510869fd1ee268d7fe58b1"><code>9a880b8</code></a> Merge pull request from GHSA-gpv5-7x3g-ghjv</li>
<li>See full diff in <a href="https://github.com/NaturalIntelligence/fast-xml-parser/compare/v4.2.4...v4.2.5">compare view</a></li>
</ul>
</details>
<br />

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=fast-xml-parser&package-manager=npm_and_yarn&previous-version=4.2.4&new-version=4.2.5)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

 ---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `dependabot rebase` will rebase this PR
- `dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `dependabot merge` will merge this PR after your CI passes on it
- `dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `dependabot cancel merge` will cancel a previously requested merge and block automerging
- `dependabot reopen` will reopen this PR if it is closed
- `dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
You can disable automated security fix PRs for this repo from the [Security Alerts page](https://github.com/pytorch/botorch/network/alerts).

</details>

Pull Request resolved: https://github.com/pytorch/botorch/pull/1930

Reviewed By: saitcakmak

Differential Revision: D47380150

Pulled By: Balandat

fbshipit-source-id: 2c19069c7a98c3937440014c58bc5855572ede2f
---
 website/yarn.lock | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/website/yarn.lock b/website/yarn.lock
index 5da19ae13f..dde5b2098a 100644
--- a/website/yarn.lock
+++ b/website/yarn.lock
@@ -3173,9 +3173,9 @@ fast-json-stable-stringify@^2.0.0:
   integrity sha512-lhd/wF+Lk98HZoTCtlVraHtfh5XYijIjalXck7saUtuanSDyLMxnHhSXEDJqHxD7msR8D0uCmqlkwjCV8xvwHw==
 
 fast-xml-parser@^4.1.3:
-  version "4.2.4"
-  resolved "https://registry.yarnpkg.com/fast-xml-parser/-/fast-xml-parser-4.2.4.tgz#6e846ede1e56ad9e5ef07d8720809edf0ed07e9b"
-  integrity sha512-fbfMDvgBNIdDJLdLOwacjFAPYt67tr31H9ZhWSm45CDAxvd0I6WTlSOUo7K2P/K5sA5JgMKG64PI3DMcaFdWpQ==
+  version "4.2.5"
+  resolved "https://registry.yarnpkg.com/fast-xml-parser/-/fast-xml-parser-4.2.5.tgz#a6747a09296a6cb34f2ae634019bf1738f3b421f"
+  integrity sha512-B9/wizE4WngqQftFPmdaMYlXoJlJOYxGQOanC77fq9k8+Z0v5dDSVh+3glErdIROP//s/jgb7ZuxKfB8nVyo0g==
   dependencies:
     strnum "^1.0.5"
 

From 36c8c028da6ea248a05af210626a97f922716c24 Mon Sep 17 00:00:00 2001
From: Qing Feng <qingfeng@meta.com>
Date: Wed, 12 Jul 2023 08:55:13 -0700
Subject: [PATCH 08/18] add ER/IR-L0 Acqusition function (#1916)

Summary:
Pull Request resolved: https://github.com/pytorch/botorch/pull/1916

Implement internal regularization with L0 norm (IR-L0) and external regularization with L0 norm (ER-L0) in MBM.

Reviewed By: lena-kashtelyan

Differential Revision: D46059263

fbshipit-source-id: 1ccc6e1c5754f1ccdfc2437d21f15bf888755b9e
---
 botorch/acquisition/penalized.py   | 59 +++++++++++++++++++
 test/acquisition/test_penalized.py | 95 ++++++++++++++++++++++++++++++
 2 files changed, 154 insertions(+)

diff --git a/botorch/acquisition/penalized.py b/botorch/acquisition/penalized.py
index 6b929a8c96..304c3c2f8a 100644
--- a/botorch/acquisition/penalized.py
+++ b/botorch/acquisition/penalized.py
@@ -176,6 +176,29 @@ def __call__(self, X: Tensor) -> Tensor:
         return nnz_approx(X=X, target_point=self.target_point, a=self.a)
 
 
+class L0PenaltyApprox(L0Approximation):
+    r"""Differentiable relaxation of the L0 norm to be added to any arbitrary
+    acquisition function to construct a PenalizedAcquisitionFunction."""
+
+    def __init__(self, target_point: Tensor, a: float = 1.0, **tkwargs: Any) -> None:
+        r"""Initializing L0 penalty with differentiable relaxation.
+
+        Args:
+            target_point: A tensor corresponding to the target point.
+            a: A hyperparameter that controls the differentiable relaxation.
+        """
+        super().__init__(target_point=target_point, a=a, **tkwargs)
+
+    def __call__(self, X: Tensor) -> Tensor:
+        r"""
+        Args:
+            X: A "batch_shape x q x dim" representing the points to be evaluated.
+        Returns:
+            A tensor of size "batch_shape" representing the acqfn for each q-batch.
+        """
+        return super().__call__(X=X).squeeze(dim=-1).min(dim=-1).values
+
+
 class PenalizedAcquisitionFunction(AcquisitionFunction):
     r"""Single-outcome acquisition function regularized by the given penalty.
 
@@ -297,6 +320,7 @@ def __init__(
         objective: Callable[[Tensor, Optional[Tensor]], Tensor],
         penalty_objective: torch.nn.Module,
         regularization_parameter: float,
+        expand_dim: Optional[int] = None,
     ) -> None:
         r"""Penalized MC objective.
 
@@ -309,10 +333,13 @@ def __init__(
                 `batch-shape x q x d`-dim Tensor `X` and outputs a
                 `1 x batch-shape x q`-dim Tensor of penalty objective values.
             regularization_parameter: weight of the penalty (regularization) term
+            expand_dim: dim to expand penalty_objective to match with objective when
+                fully bayesian model is used. If None, no expansion is performed.
         """
         super().__init__(objective=objective)
         self.penalty_objective = penalty_objective
         self.regularization_parameter = regularization_parameter
+        self.expand_dim = expand_dim
 
     def forward(self, samples: Tensor, X: Optional[Tensor] = None) -> Tensor:
         r"""Evaluate the penalized objective on the samples.
@@ -329,4 +356,36 @@ def forward(self, samples: Tensor, X: Optional[Tensor] = None) -> Tensor:
         """
         obj = super().forward(samples=samples, X=X)
         penalty_obj = self.penalty_objective(X)
+        # when fully bayesian model is used, we pass unmarginalize_dim to match the
+        # shape between obj `sample_shape x batch-shape x mcmc_samples x q` and
+        # penalty_obj `1 x batch-shape x q`
+        if self.expand_dim is not None:
+            # reshape penalty_obj to match the dim
+            penalty_obj = penalty_obj.unsqueeze(self.expand_dim)
         return obj - self.regularization_parameter * penalty_obj
+
+
+class L0PenaltyApproxObjective(L0Approximation):
+    r"""Differentiable relaxation of the L0 norm penalty objective class.
+    An instance of this class can be added to any arbitrary objective to
+    construct a PenalizedMCObjective.
+    """
+
+    def __init__(self, target_point: Tensor, a: float = 1.0, **tkwargs: Any) -> None:
+        r"""Initializing L0 penalty with differentiable relaxation.
+
+        Args:
+            target_point: A tensor corresponding to the target point.
+            a: A hyperparameter that controls the differentiable relaxation.
+        """
+        super().__init__(target_point=target_point, a=a, **tkwargs)
+
+    def __call__(self, X: Tensor) -> Tensor:
+        r"""
+        Args:
+            X: A "batch_shape x q x dim" representing the points to be evaluated.
+        Returns:
+            A "1 x batch_shape x q" tensor representing the penalty for each point.
+            The first dimension corresponds to the dimension of MC samples.
+        """
+        return super().__call__(X=X).squeeze(dim=-1).unsqueeze(dim=0)
diff --git a/test/acquisition/test_penalized.py b/test/acquisition/test_penalized.py
index 67578ef975..ae451645fc 100644
--- a/test/acquisition/test_penalized.py
+++ b/test/acquisition/test_penalized.py
@@ -12,6 +12,8 @@
     group_lasso_regularizer,
     GroupLassoPenalty,
     L0Approximation,
+    L0PenaltyApprox,
+    L0PenaltyApproxObjective,
     L1Penalty,
     L1PenaltyObjective,
     L2Penalty,
@@ -151,6 +153,58 @@ def test_L0Approximation(self):
                 rtol=1e-04,
             )
 
+    def test_L0PenaltyApproxObjective(self):
+        for dtype in (torch.float, torch.double):
+            tkwargs = {"device": self.device, "dtype": dtype}
+            target_point = torch.zeros(2, **tkwargs)
+
+            # test init
+            l0_obj = L0PenaltyApproxObjective(target_point=target_point, **tkwargs)
+            self.assertTrue(torch.equal(l0_obj.target_point, target_point))
+            self.assertAllClose(l0_obj.a.data, torch.tensor(1.0, **tkwargs))
+
+            # check two-dim input tensors X
+            self.assertTrue(
+                torch.equal(
+                    l0_obj(torch.zeros(3, 2, **tkwargs)).data,
+                    torch.zeros(1, 3, **tkwargs),
+                )
+            )
+            # check "batch_shape x q x dim" input tensors X
+            batch_shape = 16
+            self.assertTrue(
+                torch.equal(
+                    l0_obj(torch.zeros(batch_shape, 3, 2, **tkwargs)).data,
+                    torch.zeros(1, batch_shape, 3, **tkwargs),
+                )
+            )
+
+    def test_L0PenaltyApprox(self):
+        for dtype in (torch.float, torch.double):
+            tkwargs = {"device": self.device, "dtype": dtype}
+            target_point = torch.zeros(2, **tkwargs)
+
+            # test init
+            l0_acqf = L0PenaltyApprox(target_point=target_point, **tkwargs)
+            self.assertTrue(torch.equal(l0_acqf.target_point, target_point))
+            self.assertAllClose(l0_acqf.a.data, torch.tensor(1.0, **tkwargs))
+
+            # check two-dim input tensors X
+            self.assertTrue(
+                torch.equal(
+                    l0_acqf(torch.zeros(3, 2, **tkwargs)).data,
+                    torch.tensor(0, **tkwargs),
+                )
+            )
+            # check "batch_shape x q x dim" input tensors X
+            batch_shape = 16
+            self.assertTrue(
+                torch.equal(
+                    l0_acqf(torch.zeros(batch_shape, 3, 2, **tkwargs)).data,
+                    torch.zeros(batch_shape, **tkwargs),
+                )
+            )
+
 
 class TestPenalizedAcquisitionFunction(BotorchTestCase):
     def test_penalized_acquisition_function(self):
@@ -231,6 +285,8 @@ def test_penalized_mc_objective(self):
                 penalty_objective=l1_penalty_obj,
                 regularization_parameter=0.1,
             )
+            # test self.expand_dim
+            self.assertIsNone(obj.expand_dim)
             # test 'd' Tensor X
             samples = torch.randn(4, 3, device=self.device, dtype=dtype)
             X = torch.randn(4, 5, device=self.device, dtype=dtype)
@@ -246,3 +302,42 @@ def test_penalized_mc_objective(self):
             X = torch.randn(3, 2, 5, device=self.device, dtype=dtype)
             penalized_obj = generic_obj(samples) - 0.1 * l1_penalty_obj(X)
             self.assertTrue(torch.equal(obj(samples, X), penalized_obj))
+
+            # test passing expand_dim
+            expand_dim = -2
+            obj2 = PenalizedMCObjective(
+                objective=generic_obj,
+                penalty_objective=l1_penalty_obj,
+                regularization_parameter=0.1,
+                expand_dim=expand_dim,
+            )
+            self.assertEqual(obj2.expand_dim, -2)
+            # test 'd' Tensor X
+            mcmc_samples = 8
+            # MCMC_dim = -3
+            samples = torch.randn(mcmc_samples, 4, 3, device=self.device, dtype=dtype)
+            X = torch.randn(4, 5, device=self.device, dtype=dtype)
+            penalized_obj = generic_obj(samples) - 0.1 * l1_penalty_obj(X).unsqueeze(
+                expand_dim
+            )
+            self.assertTrue(torch.equal(obj2(samples, X), penalized_obj))
+            # test 'q x d' Tensor X
+            # MCMC_dim = -3
+            samples = torch.randn(
+                4, mcmc_samples, 2, 3, device=self.device, dtype=dtype
+            )
+            X = torch.randn(2, 5, device=self.device, dtype=dtype)
+            penalized_obj = generic_obj(samples) - 0.1 * l1_penalty_obj(X).unsqueeze(
+                expand_dim
+            )
+            self.assertTrue(torch.equal(obj2(samples, X), penalized_obj))
+            # test 'batch-shape x q x d' Tensor X
+            # MCMC_dim = -3
+            samples = torch.randn(
+                4, 3, mcmc_samples, 2, 3, device=self.device, dtype=dtype
+            )
+            X = torch.randn(3, 2, 5, device=self.device, dtype=dtype)
+            penalized_obj = generic_obj(samples) - 0.1 * l1_penalty_obj(X).unsqueeze(
+                expand_dim
+            )
+            self.assertTrue(torch.equal(obj2(samples, X), penalized_obj))

From a1b38fc1bb5d6389cedb84254ce0b95a44ce7864 Mon Sep 17 00:00:00 2001
From: "James T. Wilson" <jwilson1020@gmail.com>
Date: Wed, 12 Jul 2023 13:17:48 -0700
Subject: [PATCH 09/18] Minor patch to MVNXPB (#1933)

Summary:
### Overview
This PR introduces a minor fix to the MVNXPB algorithm, so that bounds are properly whitened when computing initial plug-in estimators (which can impact the pivot order).

Pull Request resolved: https://github.com/pytorch/botorch/pull/1933

Test Plan: I have run the existing unit tests locally.

Reviewed By: esantorella

Differential Revision: D47368553

Pulled By: Balandat

fbshipit-source-id: ac2dfe336a5d3ecf1dad14f11752b8fe8ce69d13
---
 botorch/utils/probability/mvnxpb.py | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/botorch/utils/probability/mvnxpb.py b/botorch/utils/probability/mvnxpb.py
index ee479fa40f..4d6edb3539 100644
--- a/botorch/utils/probability/mvnxpb.py
+++ b/botorch/utils/probability/mvnxpb.py
@@ -176,18 +176,20 @@ def solve(self, num_steps: Optional[int] = None, eps: float = 1e-10) -> Tensor:
             if pivot is not None and torch.any(pivot > i):
                 self.pivot_(pivot=pivot)
 
-            # Initialize `i`-th plug-in value as univariate conditional expectation
+            # Compute whitened bounds conditional on preceding plug-ins
             Lii = L[..., i, i].clone()
             if should_update_chol:
-                Lii = Lii.clip(min=0).sqrt()
+                Lii = Lii.clip(min=0).sqrt()  # conditional stddev
             inv_Lii = Lii.reciprocal()
-            if i == 0:
-                lb, ub = bounds[..., i, :].clone().unbind(dim=-1)
-            else:
-                db = (L[..., i, :i].clone() * y[..., :i].clone()).sum(-1, keepdim=True)
-                lb, ub = (bounds[..., i, :].clone() - db).unbind(dim=-1)
+            bounds_i = bounds[..., i, :].clone()
+            if i != 0:
+                bounds_i = bounds_i - torch.sum(
+                    L[..., i, :i].clone() * y[..., :i].clone(), dim=-1, keepdim=True
+                )
+            lb, ub = (inv_Lii.unsqueeze(-1) * bounds_i).unbind(dim=-1)
 
-            Phi_i = Phi(inv_Lii * ub) - Phi(inv_Lii * lb)
+            # Initialize `i`-th plug-in value as univariate conditional expectation
+            Phi_i = Phi(ub) - Phi(lb)
             small = Phi_i <= i * eps
             y[..., i] = case_dispatcher(  # used to select next pivot
                 out=(phi(lb) - phi(ub)) / Phi_i,
@@ -224,7 +226,7 @@ def solve(self, num_steps: Optional[int] = None, eps: float = 1e-10) -> Tensor:
                 # Replace 1D expectations with 2D ones `L[blk, blk]^{-1} y[..., blk]`
                 mask = blk_prob > zero
                 y[..., h] = torch.where(mask, zh, zero)
-                y[..., i] = torch.where(mask, (std_i * zi - Lih * zh) / Lii, zero)
+                y[..., i] = torch.where(mask, inv_Lii * (std_i * zi - Lih * zh), zero)
 
                 # Update running approximation to log probability
                 self.log_prob = self.log_prob + safe_log(blk_prob)

From bdb6b7d13994ae7c2c0edb932534954f94d03f6d Mon Sep 17 00:00:00 2001
From: Sebastian Ament <sebastianament@meta.com>
Date: Thu, 13 Jul 2023 11:58:55 -0700
Subject: [PATCH 10/18] `compute_best_feasible_f` utility (#1931)

Summary:
Pull Request resolved: https://github.com/pytorch/botorch/pull/1931

This commit separates out the `compute_best_feasible_f` function from `qNEI` as a utility function in order to use it in the `input_constructors` and `get_acquisition_function` (follow-up).

Reviewed By: Balandat

Differential Revision: D47365085

fbshipit-source-id: 2d8203544bd646638d9ea6c20789d0700f600c0b
---
 botorch/acquisition/monte_carlo.py |  50 ++++---------
 botorch/acquisition/utils.py       | 108 +++++++++++++++++++++++++++++
 botorch/utils/objective.py         |  29 +++++++-
 test/acquisition/test_utils.py     |  79 ++++++++++++++++++++-
 test/utils/test_objective.py       |  51 +++++++++++++-
 5 files changed, 275 insertions(+), 42 deletions(-)

diff --git a/botorch/acquisition/monte_carlo.py b/botorch/acquisition/monte_carlo.py
index f0bd2be915..c366fdcb0d 100644
--- a/botorch/acquisition/monte_carlo.py
+++ b/botorch/acquisition/monte_carlo.py
@@ -27,7 +27,6 @@
 from typing import Any, Callable, List, Optional, Protocol, Tuple, Union
 
 import torch
-from botorch import acquisition
 from botorch.acquisition.acquisition import AcquisitionFunction, MCSamplerMixin
 from botorch.acquisition.cached_cholesky import CachedCholeskyMCAcquisitionFunction
 from botorch.acquisition.objective import (
@@ -36,7 +35,10 @@
     MCAcquisitionObjective,
     PosteriorTransform,
 )
-from botorch.acquisition.utils import prune_inferior_points
+from botorch.acquisition.utils import (
+    compute_best_feasible_objective,
+    prune_inferior_points,
+)
 from botorch.exceptions.errors import UnsupportedError
 from botorch.models.model import Model
 from botorch.sampling.base import MCSampler
@@ -591,7 +593,8 @@ def _get_samples_and_objectives(self, X: Tensor) -> Tuple[Tensor, Tensor]:
         return samples, obj
 
     def _compute_best_feasible_objective(self, samples: Tensor, obj: Tensor) -> Tensor:
-        """
+        r"""Computes best feasible objective value from samples.
+
         Args:
             samples: `sample_shape x batch_shape x q x m`-dim posterior samples.
             obj: A `sample_shape x batch_shape x q`-dim Tensor of MC objective values.
@@ -599,38 +602,15 @@ def _compute_best_feasible_objective(self, samples: Tensor, obj: Tensor) -> Tens
         Returns:
             A `sample_shape x batch_shape x 1`-dim Tensor of best feasible objectives.
         """
-        if self._constraints is not None:
-            # is_feasible is sample_shape x batch_shape x q
-            is_feasible = compute_smoothed_constraint_indicator(
-                constraints=self._constraints, samples=samples, eta=self._eta
-            )
-            is_feasible = is_feasible > 0.5  # due to smooth approximation
-            if is_feasible.any():
-                obj = torch.where(is_feasible, obj, -torch.inf)
-            else:  # if there are no feasible observations, estimate a lower
-                # bound on the objective by sampling convex combinations of X_baseline.
-                convex_weights = torch.rand(
-                    32,
-                    self.X_baseline.shape[-2],
-                    dtype=self.X_baseline.dtype,
-                    device=self.X_baseline.device,
-                )
-                weights_sum = convex_weights.sum(dim=0, keepdim=True)
-                convex_weights = convex_weights / weights_sum
-                # infeasible cost M is such that -M < min_x f(x), thus
-                # 0 < min_x f(x) - (-M), so we should take -M as a lower
-                # bound on the best feasible objective
-                return -acquisition.utils.get_infeasible_cost(
-                    X=convex_weights @ self.X_baseline,
-                    model=self.model,
-                    objective=self.objective,
-                    posterior_transform=self.posterior_transform,
-                ).expand(*obj.shape[:-1], 1)
-
-        # we don't need to differentiate through X_baseline for now, so taking
-        # the regular max over the n points to get best_f is fine
-        with torch.no_grad():
-            return obj.amax(dim=-1, keepdim=True)
+        return compute_best_feasible_objective(
+            samples=samples,
+            obj=obj,
+            constraints=self._constraints,
+            model=self.model,
+            objective=self.objective,
+            posterior_transform=self.posterior_transform,
+            X_baseline=self.X_baseline,
+        )
 
 
 class qProbabilityOfImprovement(SampleReducingMCAcquisitionFunction):
diff --git a/botorch/acquisition/utils.py b/botorch/acquisition/utils.py
index 595bd6750c..9a1f4eab8b 100644
--- a/botorch/acquisition/utils.py
+++ b/botorch/acquisition/utils.py
@@ -32,6 +32,7 @@
     FastNondominatedPartitioning,
     NondominatedPartitioning,
 )
+from botorch.utils.objective import compute_feasibility_indicator
 from botorch.utils.sampling import optimize_posterior_samples
 from botorch.utils.transforms import is_fully_bayesian
 from torch import Tensor
@@ -213,6 +214,113 @@ def get_acquisition_function(
     )
 
 
+def compute_best_feasible_objective(
+    samples: Tensor,
+    obj: Tensor,
+    constraints: Optional[List[Callable[[Tensor], Tensor]]],
+    model: Optional[Model] = None,
+    objective: Optional[MCAcquisitionObjective] = None,
+    posterior_transform: Optional[PosteriorTransform] = None,
+    X_baseline: Optional[Tensor] = None,
+    infeasible_obj: Optional[Tensor] = None,
+) -> Tensor:
+    """Computes the largest `obj` value that is feasible under the `constraints`. If
+    `constraints` is None, returns the best unconstrained objective value.
+
+    When no feasible observations exist and `infeasible_obj` is not `None`, returns
+    `infeasible_obj` (potentially reshaped). When no feasible observations exist and
+    `infeasible_obj` is `None`, uses `model`, `objective`, `posterior_transform`, and
+    `X_baseline` to infer and return an `infeasible_obj` `M` s.t. `M < min_x f(x)`.
+
+    Args:
+        samples: `(sample_shape) x batch_shape x q x m`-dim posterior samples.
+        obj: A `(sample_shape) x batch_shape x q`-dim Tensor of MC objective values.
+        constraints: A list of constraint callables which map posterior samples to
+            a scalar. The associated constraint is considered satisfied if this
+            scalar is less than zero.
+        model: A Model, only required when there are no feasible observations.
+        objective: An MCAcquisitionObjective, only optionally used when there are no
+            feasible observations.
+        posterior_transform: A PosteriorTransform, only optionally used when there are
+            no feasible observations.
+        X_baseline: A `batch_shape x d`-dim Tensor of baseline points, only required
+            when there are no feasible observations.
+        infeasible_obj: A Tensor to be returned when no feasible points exist.
+
+    Returns:
+        A `(sample_shape) x batch_shape x 1`-dim Tensor of best feasible objectives.
+    """
+    if constraints is None:  # unconstrained case
+        # we don't need to differentiate through X_baseline for now, so taking
+        # the regular max over the n points to get best_f is fine
+        with torch.no_grad():
+            return obj.amax(dim=-1, keepdim=True)
+
+    is_feasible = compute_feasibility_indicator(
+        constraints=constraints, samples=samples
+    )  # sample_shape x batch_shape x q
+    if is_feasible.any():
+        obj = torch.where(is_feasible, obj, -torch.inf)
+        with torch.no_grad():
+            return obj.amax(dim=-1, keepdim=True)
+
+    elif infeasible_obj is not None:
+        return infeasible_obj.expand(*obj.shape[:-1], 1)
+
+    else:
+        if model is None:
+            raise ValueError(
+                "Must specify `model` when no feasible observation exists."
+            )
+        if X_baseline is None:
+            raise ValueError(
+                "Must specify `X_baseline` when no feasible observation exists."
+            )
+        return _estimate_objective_lower_bound(
+            model=model,
+            objective=objective,
+            posterior_transform=posterior_transform,
+            X=X_baseline,
+        ).expand(*obj.shape[:-1], 1)
+
+
+def _estimate_objective_lower_bound(
+    model: Model,
+    objective: Optional[MCAcquisitionObjective],
+    posterior_transform: Optional[PosteriorTransform],
+    X: Tensor,
+) -> Tensor:
+    """Estimates a lower bound on the objective values by evaluating the model at convex
+    combinations of `X`, returning the 6-sigma lower bound of the computed statistics.
+
+    Args:
+        model: A fitted model.
+        objective: An MCAcquisitionObjective with `m` outputs.
+        posterior_transform: A PosteriorTransform.
+        X: A `n x d`-dim Tensor of design points from which to draw convex combinations.
+
+    Returns:
+        A `m`-dimensional Tensor of lower bounds of the objectives.
+    """
+    convex_weights = torch.rand(
+        32,
+        X.shape[-2],
+        dtype=X.dtype,
+        device=X.device,
+    )
+    weights_sum = convex_weights.sum(dim=0, keepdim=True)
+    convex_weights = convex_weights / weights_sum
+    # infeasible cost M is such that -M < min_x f(x), thus
+    # 0 < min_x f(x) - (-M), so we should take -M as a lower
+    # bound on the best feasible objective
+    return -get_infeasible_cost(
+        X=convex_weights @ X,
+        model=model,
+        objective=objective,
+        posterior_transform=posterior_transform,
+    )
+
+
 def get_infeasible_cost(
     X: Tensor,
     model: Model,
diff --git a/botorch/utils/objective.py b/botorch/utils/objective.py
index 0738812287..5e8db010e5 100644
--- a/botorch/utils/objective.py
+++ b/botorch/utils/objective.py
@@ -95,14 +95,37 @@ def apply_constraints_nonnegative_soft(
     return obj.clamp_min(0).mul(w)  # Enforce non-negativity of obj, apply constraints.
 
 
+def compute_feasibility_indicator(
+    constraints: Optional[List[Callable[[Tensor], Tensor]]],
+    samples: Tensor,
+) -> Tensor:
+    r"""Computes the feasibility of a list of constraints given posterior samples.
+
+    Args:
+        constraints: A list of callables, each mapping a batch_shape x q x m`-dim Tensor
+            to a `batch_shape x q`-dim Tensor, where negative values imply feasibility.
+        samples: A batch_shape x q x m`-dim Tensor of posterior samples.
+
+    Returns:
+        A `batch_shape x q`-dim tensor of Boolean feasibility values.
+    """
+    ind = torch.ones(samples.shape[:-1], dtype=torch.bool, device=samples.device)
+    if constraints is not None:
+        for constraint in constraints:
+            ind = ind.logical_and(constraint(samples) < 0)
+    return ind
+
+
 def compute_smoothed_constraint_indicator(
     constraints: List[Callable[[Tensor], Tensor]],
     samples: Tensor,
     eta: Union[Tensor, float],
 ) -> Tensor:
-    r"""Computes the feasibility indicator of a list of constraints given posterior
-    samples, using a sigmoid to smoothly approximate the feasibility indicator
-    of each individual constraint to ensure differentiability and high gradient signal.
+    r"""Computes the smoothed feasibility indicator of a list of constraints.
+
+    Given posterior samples, using a sigmoid to smoothly approximate the feasibility
+    indicator of each individual constraint to ensure differentiability and high
+    gradient signal.
 
     Args:
         constraints: A list of callables, each mapping a Tensor of size `b x q x m`
diff --git a/test/acquisition/test_utils.py b/test/acquisition/test_utils.py
index 41b4384e85..d0b5410d7e 100644
--- a/test/acquisition/test_utils.py
+++ b/test/acquisition/test_utils.py
@@ -19,6 +19,7 @@
     ScalarizedPosteriorTransform,
 )
 from botorch.acquisition.utils import (
+    compute_best_feasible_objective,
     expand_trace_observations,
     get_acquisition_function,
     get_infeasible_cost,
@@ -575,7 +576,83 @@ def test_GetUnknownAcquisitionFunction(self):
             )
 
 
-class TestGetInfeasibleCost(BotorchTestCase):
+class TestConstraintUtils(BotorchTestCase):
+    def test_compute_best_feasible_objective(self):
+        for dtype in (torch.float, torch.double):
+            with self.subTest(dtype=dtype):
+                tkwargs = {"dtype": dtype, "device": self.device}
+                n = 5
+                X = torch.arange(n, **tkwargs).view(-1, 1)
+                means = torch.arange(n, **tkwargs).view(-1, 1)
+                samples = means
+                variances = torch.tensor(
+                    [0.09, 0.25, 0.36, 0.25, 0.09], **tkwargs
+                ).view(-1, 1)
+                mm = MockModel(
+                    MockPosterior(mean=means, variance=variances, samples=samples)
+                )
+
+                # testing all feasible points
+                obj = means.squeeze(-1)
+                constraints = [lambda samples: -torch.ones_like(samples[..., 0])]
+                best_f = compute_best_feasible_objective(
+                    samples=means, obj=obj, constraints=constraints
+                )
+                self.assertAllClose(best_f, obj.amax(dim=-1, keepdim=True))
+
+                # testing with some infeasible points
+                con_cutoff = 3.0
+                best_f = compute_best_feasible_objective(
+                    samples=means,
+                    obj=obj,
+                    constraints=[
+                        lambda samples: samples[..., 0] - (con_cutoff + 1 / 2)
+                    ],
+                )
+                # only first three points are feasible
+                self.assertAllClose(best_f, torch.tensor([con_cutoff], **tkwargs))
+
+                # testing with no feasible points and infeasible obj
+                infeasible_obj = torch.tensor(torch.pi, **tkwargs)
+                best_f = compute_best_feasible_objective(
+                    samples=means,
+                    obj=obj,
+                    constraints=[lambda X: torch.ones_like(X[..., 0])],
+                    infeasible_obj=infeasible_obj,
+                )
+                self.assertAllClose(best_f, infeasible_obj.unsqueeze(0))
+
+                # testing with no feasible points and not infeasible obj
+                def objective(Y, X):
+                    return Y.squeeze(-1) - 5.0
+
+                best_f = compute_best_feasible_objective(
+                    samples=means,
+                    obj=obj,
+                    constraints=[lambda X: torch.ones_like(X[..., 0])],
+                    model=mm,
+                    X_baseline=X,
+                    objective=objective,
+                )
+                self.assertAllClose(
+                    best_f, -get_infeasible_cost(X=X, model=mm, objective=objective)
+                )
+
+                with self.assertRaisesRegex(ValueError, "Must specify `model`"):
+                    best_f = compute_best_feasible_objective(
+                        samples=means,
+                        obj=obj,
+                        constraints=[lambda X: torch.ones_like(X[..., 0])],
+                        X_baseline=X,
+                    )
+                with self.assertRaisesRegex(ValueError, "Must specify `X_baseline`"):
+                    best_f = compute_best_feasible_objective(
+                        samples=means,
+                        obj=obj,
+                        constraints=[lambda X: torch.ones_like(X[..., 0])],
+                        model=mm,
+                    )
+
     def test_get_infeasible_cost(self):
         for dtype in (torch.float, torch.double):
             tkwargs = {"dtype": dtype, "device": self.device}
diff --git a/test/utils/test_objective.py b/test/utils/test_objective.py
index 3975d6682c..ee35bf08e5 100644
--- a/test/utils/test_objective.py
+++ b/test/utils/test_objective.py
@@ -7,6 +7,10 @@
 
 import torch
 from botorch.utils import apply_constraints, get_objective_weights_transform
+from botorch.utils.objective import (
+    compute_feasibility_indicator,
+    compute_smoothed_constraint_indicator,
+)
 from botorch.utils.testing import BotorchTestCase
 from torch import Tensor
 
@@ -153,7 +157,7 @@ def test_apply_constraints_multi_output(self):
             self.assertAllClose(obj, samples.clamp_min(-1.0) * 0.5 - 1.0)
             # nonnegative objective, one constraint, eta = 0
             obj = samples
-            with self.assertRaises(ValueError):
+            with self.assertRaisesRegex(ValueError, "eta must be positive"):
                 apply_constraints(
                     obj=obj,
                     constraints=[zeros_f],
@@ -168,7 +172,7 @@ def test_apply_constraints_wrong_eta_dim(self):
             tkwargs["dtype"] = dtype
             samples = torch.rand(3, 2, **tkwargs)
             obj = samples.clone()
-            with self.assertRaises(ValueError):
+            with self.assertRaisesRegex(ValueError, "Number of provided constraints"):
                 obj = apply_constraints(
                     obj=obj,
                     constraints=[zeros_f, zeros_f],
@@ -176,7 +180,7 @@ def test_apply_constraints_wrong_eta_dim(self):
                     infeasible_cost=0.0,
                     eta=torch.tensor([0.1]).to(**tkwargs),
                 )
-            with self.assertRaises(ValueError):
+            with self.assertRaisesRegex(ValueError, "Number of provided constraints"):
                 obj = apply_constraints(
                     obj=obj,
                     constraints=[zeros_f, zeros_f],
@@ -185,6 +189,47 @@ def test_apply_constraints_wrong_eta_dim(self):
                     eta=torch.tensor([0.1, 0.1, 0.3]).to(**tkwargs),
                 )
 
+    def test_constraint_indicators(self):
+        # nonnegative objective, one constraint
+        samples = torch.randn(1)
+        ind = compute_feasibility_indicator(constraints=[zeros_f], samples=samples)
+        self.assertAllClose(ind, torch.zeros_like(ind))
+        self.assertEqual(ind.dtype, torch.bool)
+
+        smoothed_ind = compute_smoothed_constraint_indicator(
+            constraints=[zeros_f], samples=samples, eta=1e-3
+        )
+        self.assertAllClose(smoothed_ind, ones_f(samples) / 2)
+
+        # two constraints
+        samples = torch.randn(1)
+        smoothed_ind = compute_smoothed_constraint_indicator(
+            constraints=[zeros_f, zeros_f],
+            samples=samples,
+            eta=1e-3,
+        )
+        self.assertAllClose(smoothed_ind, ones_f(samples) * 0.5 * 0.5)
+
+        # feasible
+        samples = torch.randn(1)
+        ind = compute_feasibility_indicator(
+            constraints=[minus_one_f],
+            samples=samples,
+        )
+        self.assertAllClose(ind, torch.ones_like(ind))
+
+        smoothed_ind = compute_smoothed_constraint_indicator(
+            constraints=[minus_one_f], samples=samples, eta=1e-3
+        )
+        self.assertTrue((smoothed_ind > 3 / 4).all())
+
+        with self.assertRaisesRegex(ValueError, "Number of provided constraints"):
+            compute_smoothed_constraint_indicator(
+                constraints=[zeros_f, zeros_f],
+                samples=samples,
+                eta=torch.tensor([0.1], device=self.device),
+            )
+
 
 class TestGetObjectiveWeightsTransform(BotorchTestCase):
     def test_NoWeights(self):

From ef5a939cf7fe33b8a657ad7ed33a31890421f327 Mon Sep 17 00:00:00 2001
From: Sebastian Ament <sebastianament@meta.com>
Date: Thu, 13 Jul 2023 11:58:55 -0700
Subject: [PATCH 11/18] Enabling `SampleReducingMCAcquisitionFunctions` with
 constraints in `input_constructors` (#1932)

Summary:
Pull Request resolved: https://github.com/pytorch/botorch/pull/1932

This commit enables the prinicpled constraint treatment via `SampleReducingMCAcquisitionFunctions` through `input_constructors` and `get_acquisition_function`.

Reviewed By: esantorella

Differential Revision: D47365084

fbshipit-source-id: b2ec27a7f185e4a2793e7842342ea23b32b5d544
---
 botorch/acquisition/input_constructors.py   |  92 +++++++++++---
 botorch/acquisition/utils.py                |  21 +++-
 test/acquisition/test_input_constructors.py |  23 +++-
 test/acquisition/test_utils.py              | 131 +++++++++++++++++++-
 4 files changed, 245 insertions(+), 22 deletions(-)

diff --git a/botorch/acquisition/input_constructors.py b/botorch/acquisition/input_constructors.py
index 6e6f7dcd62..f4ac50fd9c 100644
--- a/botorch/acquisition/input_constructors.py
+++ b/botorch/acquisition/input_constructors.py
@@ -78,6 +78,7 @@
 from botorch.acquisition.preference import AnalyticExpectedUtilityOfBestOption
 from botorch.acquisition.risk_measures import RiskMeasureMCObjective
 from botorch.acquisition.utils import (
+    compute_best_feasible_objective,
     expand_trace_observations,
     get_optimal_samples,
     project_to_target_fidelity,
@@ -457,7 +458,9 @@ def construct_inputs_qEI(
     X_pending: Optional[Tensor] = None,
     sampler: Optional[MCSampler] = None,
     best_f: Optional[Union[float, Tensor]] = None,
-    **kwargs: Any,
+    constraints: Optional[List[Callable[[Tensor], Tensor]]] = None,
+    eta: Union[Tensor, float] = 1e-3,
+    **ignored: Any,
 ) -> Dict[str, Any]:
     r"""Construct kwargs for the `qExpectedImprovement` constructor.
 
@@ -473,7 +476,15 @@ def construct_inputs_qEI(
         sampler: The sampler used to draw base samples. If omitted, uses
             the acquisition functions's default sampler.
         best_f: Threshold above (or below) which improvement is defined.
-        kwargs: Not used.
+        constraints: A list of constraint callables which map a Tensor of posterior
+            samples of dimension `sample_shape x batch-shape x q x m`-dim to a
+            `sample_shape x batch-shape x q`-dim Tensor. The associated constraints
+            are considered satisfied if the output is less than zero.
+        eta: Temperature parameter(s) governing the smoothness of the sigmoid
+            approximation to the constraint indicators. For more details, on this
+            parameter, see the docs of `compute_smoothed_constraint_indicator`.
+        ignored: Not used.
+
     Returns:
         A dict mapping kwarg names of the constructor to values.
     """
@@ -489,9 +500,11 @@ def construct_inputs_qEI(
             training_data=training_data,
             objective=objective,
             posterior_transform=posterior_transform,
+            constraints=constraints,
+            model=model,
         )
 
-    return {**base_inputs, "best_f": best_f}
+    return {**base_inputs, "best_f": best_f, "constraints": constraints, "eta": eta}
 
 
 @acqf_input_constructor(qNoisyExpectedImprovement)
@@ -505,7 +518,9 @@ def construct_inputs_qNEI(
     X_baseline: Optional[Tensor] = None,
     prune_baseline: Optional[bool] = True,
     cache_root: Optional[bool] = True,
-    **kwargs: Any,
+    constraints: Optional[List[Callable[[Tensor], Tensor]]] = None,
+    eta: Union[Tensor, float] = 1e-3,
+    **ignored: Any,
 ) -> Dict[str, Any]:
     r"""Construct kwargs for the `qNoisyExpectedImprovement` constructor.
 
@@ -527,7 +542,14 @@ def construct_inputs_qNEI(
         prune_baseline: If True, remove points in `X_baseline` that are
             highly unlikely to be the best point. This can significantly
             improve performance and is generally recommended.
-        kwargs: Not used.
+        constraints: A list of constraint callables which map a Tensor of posterior
+            samples of dimension `sample_shape x batch-shape x q x m`-dim to a
+            `sample_shape x batch-shape x q`-dim Tensor. The associated constraints
+            are considered satisfied if the output is less than zero.
+        eta: Temperature parameter(s) governing the smoothness of the sigmoid
+            approximation to the constraint indicators. For more details, on this
+            parameter, see the docs of `compute_smoothed_constraint_indicator`.
+        ignored: Not used.
 
     Returns:
         A dict mapping kwarg names of the constructor to values.
@@ -553,6 +575,8 @@ def construct_inputs_qNEI(
         "X_baseline": X_baseline,
         "prune_baseline": prune_baseline,
         "cache_root": cache_root,
+        "constraints": constraints,
+        "eta": eta,
     }
 
 
@@ -566,7 +590,9 @@ def construct_inputs_qPI(
     sampler: Optional[MCSampler] = None,
     tau: float = 1e-3,
     best_f: Optional[Union[float, Tensor]] = None,
-    **kwargs: Any,
+    constraints: Optional[List[Callable[[Tensor], Tensor]]] = None,
+    eta: Union[Tensor, float] = 1e-3,
+    **ignored: Any,
 ) -> Dict[str, Any]:
     r"""Construct kwargs for the `qProbabilityOfImprovement` constructor.
 
@@ -588,13 +614,26 @@ def construct_inputs_qPI(
         best_f: The best objective value observed so far (assumed noiseless). Can
             be a `batch_shape`-shaped tensor, which in case of a batched model
             specifies potentially different values for each element of the batch.
-        kwargs: Not used.
+        constraints: A list of constraint callables which map a Tensor of posterior
+            samples of dimension `sample_shape x batch-shape x q x m`-dim to a
+            `sample_shape x batch-shape x q`-dim Tensor. The associated constraints
+            are considered satisfied if the output is less than zero.
+        eta: Temperature parameter(s) governing the smoothness of the sigmoid
+            approximation to the constraint indicators. For more details, on this
+            parameter, see the docs of `compute_smoothed_constraint_indicator`.
+        ignored: Not used.
+
     Returns:
         A dict mapping kwarg names of the constructor to values.
     """
     if best_f is None:
-        best_f = get_best_f_mc(training_data=training_data, objective=objective)
-
+        best_f = get_best_f_mc(
+            training_data=training_data,
+            objective=objective,
+            posterior_transform=posterior_transform,
+            constraints=constraints,
+            model=model,
+        )
     base_inputs = _construct_inputs_mc_base(
         model=model,
         objective=objective,
@@ -603,7 +642,13 @@ def construct_inputs_qPI(
         X_pending=X_pending,
     )
 
-    return {**base_inputs, "tau": tau, "best_f": best_f}
+    return {
+        **base_inputs,
+        "tau": tau,
+        "best_f": best_f,
+        "constraints": constraints,
+        "eta": eta,
+    }
 
 
 @acqf_input_constructor(qUpperConfidenceBound)
@@ -615,7 +660,7 @@ def construct_inputs_qUCB(
     X_pending: Optional[Tensor] = None,
     sampler: Optional[MCSampler] = None,
     beta: float = 0.2,
-    **kwargs: Any,
+    **ignored: Any,
 ) -> Dict[str, Any]:
     r"""Construct kwargs for the `qUpperConfidenceBound` constructor.
 
@@ -631,7 +676,7 @@ def construct_inputs_qUCB(
         sampler: The sampler used to draw base samples. If omitted, uses
             the acquisition functions's default sampler.
         beta: Controls tradeoff between mean and standard deviation in UCB.
-        kwargs: Not used.
+        ignored: Not used.
 
     Returns:
         A dict mapping kwarg names of the constructor to values.
@@ -1083,18 +1128,28 @@ def get_best_f_mc(
     training_data: MaybeDict[SupervisedDataset],
     objective: Optional[MCAcquisitionObjective] = None,
     posterior_transform: Optional[PosteriorTransform] = None,
+    constraints: Optional[List[Callable[[Tensor], Tensor]]] = None,
+    model: Optional[Model] = None,
 ) -> Tensor:
     if isinstance(training_data, dict) and not _field_is_shared(
         training_data, fieldname="X"
     ):
         raise NotImplementedError("Currently only block designs are supported.")
 
+    X_baseline = _get_dataset_field(
+        training_data,
+        fieldname="X",
+        transform=lambda field: field(),
+        assert_shared=True,
+        first_only=True,
+    )
+
     Y = _get_dataset_field(
         training_data,
         fieldname="Y",
         transform=lambda field: field(),
         join_rule=lambda field_tensors: torch.cat(field_tensors, dim=-1),
-    )
+    )  # batch_shape x n x d
 
     if posterior_transform is not None:
         # retain the original tensor dimension since objective expects explicit
@@ -1111,7 +1166,16 @@ def get_best_f_mc(
                 "acquisition functions)."
             )
         objective = IdentityMCObjective()
-    return objective(Y).max(-1).values
+    obj = objective(Y, X=X_baseline)  # batch_shape x n
+    return compute_best_feasible_objective(
+        samples=Y,
+        obj=obj,
+        constraints=constraints,
+        model=model,
+        objective=objective,
+        posterior_transform=posterior_transform,
+        X_baseline=X_baseline,
+    )
 
 
 def optimize_objective(
diff --git a/botorch/acquisition/utils.py b/botorch/acquisition/utils.py
index 9a1f4eab8b..a683896840 100644
--- a/botorch/acquisition/utils.py
+++ b/botorch/acquisition/utils.py
@@ -101,10 +101,19 @@ def get_acquisition_function(
         )
     # instantiate and return the requested acquisition function
     if acquisition_function_name in ("qEI", "qPI"):
-        obj = objective(
-            model.posterior(X_observed, posterior_transform=posterior_transform).mean
+        # Since these are the non-noisy variants, use the posterior mean at the observed
+        # inputs directly to compute the best feasible value without sampling.
+        Y = model.posterior(X_observed, posterior_transform=posterior_transform).mean
+        obj = objective(samples=Y, X=X_observed)
+        best_f = compute_best_feasible_objective(
+            samples=Y,
+            obj=obj,
+            constraints=constraints,
+            model=model,
+            objective=objective,
+            posterior_transform=posterior_transform,
+            X_baseline=X_observed,
         )
-        best_f = obj.max(dim=-1).values
     if acquisition_function_name == "qEI":
         return monte_carlo.qExpectedImprovement(
             model=model,
@@ -113,6 +122,8 @@ def get_acquisition_function(
             objective=objective,
             posterior_transform=posterior_transform,
             X_pending=X_pending,
+            constraints=constraints,
+            eta=eta,
         )
     elif acquisition_function_name == "qPI":
         return monte_carlo.qProbabilityOfImprovement(
@@ -123,6 +134,8 @@ def get_acquisition_function(
             posterior_transform=posterior_transform,
             X_pending=X_pending,
             tau=kwargs.get("tau", 1e-3),
+            constraints=constraints,
+            eta=eta,
         )
     elif acquisition_function_name == "qNEI":
         return monte_carlo.qNoisyExpectedImprovement(
@@ -135,6 +148,8 @@ def get_acquisition_function(
             prune_baseline=kwargs.get("prune_baseline", True),
             marginalize_dim=kwargs.get("marginalize_dim"),
             cache_root=kwargs.get("cache_root", True),
+            constraints=constraints,
+            eta=eta,
         )
     elif acquisition_function_name == "qSR":
         return monte_carlo.qSimpleRegret(
diff --git a/test/acquisition/test_input_constructors.py b/test/acquisition/test_input_constructors.py
index dce2e5e5e7..e2a59d34ad 100644
--- a/test/acquisition/test_input_constructors.py
+++ b/test/acquisition/test_input_constructors.py
@@ -139,13 +139,13 @@ def test_get_best_f_mc(self):
         best_f = get_best_f_mc(training_data=self.blockX_multiY, objective=obj)
 
         multi_Y = torch.cat([d.Y() for d in self.blockX_multiY.values()], dim=-1)
-        best_f_expected = (multi_Y @ obj.weights).max()
+        best_f_expected = (multi_Y @ obj.weights).amax(dim=-1, keepdim=True)
         self.assertEqual(best_f, best_f_expected)
         post_tf = ScalarizedPosteriorTransform(weights=torch.ones(2))
         best_f = get_best_f_mc(
             training_data=self.blockX_multiY, posterior_transform=post_tf
         )
-        best_f_expected = (multi_Y.sum(dim=-1)).max()
+        best_f_expected = (multi_Y.sum(dim=-1)).amax(dim=-1, keepdim=True)
         self.assertEqual(best_f, best_f_expected)
 
     @mock.patch("botorch.acquisition.input_constructors.optimize_acqf")
@@ -350,6 +350,9 @@ def test_construct_inputs_qEI(self):
         self.assertIsNone(kwargs["objective"])
         self.assertIsNone(kwargs["X_pending"])
         self.assertIsNone(kwargs["sampler"])
+        self.assertIsNone(kwargs["constraints"])
+        self.assertIsInstance(kwargs["eta"], float)
+        self.assertTrue(kwargs["eta"] < 1)
         X_pending = torch.rand(2, 2)
         objective = LinearMCObjective(torch.rand(2))
         kwargs = c(
@@ -362,6 +365,9 @@ def test_construct_inputs_qEI(self):
         self.assertTrue(torch.equal(kwargs["objective"].weights, objective.weights))
         self.assertTrue(torch.equal(kwargs["X_pending"], X_pending))
         self.assertIsNone(kwargs["sampler"])
+        self.assertIsNone(kwargs["constraints"])
+        self.assertIsInstance(kwargs["eta"], float)
+        self.assertTrue(kwargs["eta"] < 1)
         multi_Y = torch.cat([d.Y() for d in self.blockX_multiY.values()], dim=-1)
         best_f_expected = objective(multi_Y).max()
         self.assertEqual(kwargs["best_f"], best_f_expected)
@@ -386,6 +392,10 @@ def test_construct_inputs_qNEI(self):
         self.assertIsNone(kwargs["sampler"])
         self.assertTrue(kwargs["prune_baseline"])
         self.assertTrue(torch.equal(kwargs["X_baseline"], self.blockX_blockY[0].X()))
+        self.assertIsNone(kwargs["constraints"])
+        self.assertIsInstance(kwargs["eta"], float)
+        self.assertTrue(kwargs["eta"] < 1)
+
         with self.assertRaisesRegex(ValueError, "Field `X` must be shared"):
             c(model=mock_model, training_data=self.multiX_multiY)
         X_baseline = torch.rand(2, 2)
@@ -401,6 +411,9 @@ def test_construct_inputs_qNEI(self):
         self.assertIsNone(kwargs["sampler"])
         self.assertFalse(kwargs["prune_baseline"])
         self.assertTrue(torch.equal(kwargs["X_baseline"], X_baseline))
+        self.assertIsNone(kwargs["constraints"])
+        self.assertIsInstance(kwargs["eta"], float)
+        self.assertTrue(kwargs["eta"] < 1)
 
     def test_construct_inputs_qPI(self):
         c = get_acqf_input_constructor(qProbabilityOfImprovement)
@@ -411,6 +424,9 @@ def test_construct_inputs_qPI(self):
         self.assertIsNone(kwargs["X_pending"])
         self.assertIsNone(kwargs["sampler"])
         self.assertEqual(kwargs["tau"], 1e-3)
+        self.assertIsNone(kwargs["constraints"])
+        self.assertIsInstance(kwargs["eta"], float)
+        self.assertTrue(kwargs["eta"] < 1)
         X_pending = torch.rand(2, 2)
         objective = LinearMCObjective(torch.rand(2))
         kwargs = c(
@@ -425,6 +441,9 @@ def test_construct_inputs_qPI(self):
         self.assertTrue(torch.equal(kwargs["X_pending"], X_pending))
         self.assertIsNone(kwargs["sampler"])
         self.assertEqual(kwargs["tau"], 1e-2)
+        self.assertIsNone(kwargs["constraints"])
+        self.assertIsInstance(kwargs["eta"], float)
+        self.assertTrue(kwargs["eta"] < 1)
         multi_Y = torch.cat([d.Y() for d in self.blockX_multiY.values()], dim=-1)
         best_f_expected = objective(multi_Y).max()
         self.assertEqual(kwargs["best_f"], best_f_expected)
diff --git a/test/acquisition/test_utils.py b/test/acquisition/test_utils.py
index d0b5410d7e..4fdcf33487 100644
--- a/test/acquisition/test_utils.py
+++ b/test/acquisition/test_utils.py
@@ -5,6 +5,7 @@
 # LICENSE file in the root directory of this source tree.
 
 import itertools
+import math
 from unittest import mock
 
 import torch
@@ -60,12 +61,15 @@ def setUp(self):
         self.qmc = True
         self.ref_point = [0.0, 0.0]
         self.mo_objective = DummyMCMultiOutputObjective()
-        self.Y = torch.tensor([[1.0, 2.0]])
+        self.Y = torch.tensor([[1.0, 2.0]])  # (2 x 1)-dim multi-objective outcomes
         self.seed = 1
 
     @mock.patch(f"{monte_carlo.__name__}.qExpectedImprovement")
     def test_GetQEI(self, mock_acqf):
-        self.model = MockModel(MockPosterior(mean=torch.zeros(1, 2)))
+        n = len(self.X_observed)
+        mean = torch.arange(n, dtype=torch.double).view(-1, 1)
+        var = torch.ones_like(mean)
+        self.model = MockModel(MockPosterior(mean=mean, variance=var))
         acqf = get_acquisition_function(
             acquisition_function_name="qEI",
             model=self.model,
@@ -85,6 +89,8 @@ def test_GetQEI(self, mock_acqf):
             objective=self.objective,
             posterior_transform=None,
             X_pending=self.X_pending,
+            constraints=None,
+            eta=1e-3,
         )
         # test batched model
         self.model = MockModel(MockPosterior(mean=torch.zeros(1, 2, 1)))
@@ -125,10 +131,50 @@ def test_GetQEI(self, mock_acqf):
         )
         self.assertEqual(mock_acqf.call_args[-1]["best_f"].item(), -1.0)
 
+        # with constraints
+        upper_bound = self.Y[0, 0] + 1 / 2  # = 1.5
+        constraints = [lambda samples: samples[..., 0] - upper_bound]
+        eta = math.pi * 1e-2  # testing non-standard eta
+
+        acqf = get_acquisition_function(
+            acquisition_function_name="qEI",
+            model=self.model,
+            objective=self.objective,
+            X_observed=self.X_observed,
+            X_pending=self.X_pending,
+            mc_samples=self.mc_samples,
+            seed=self.seed,
+            marginalize_dim=0,
+            constraints=constraints,
+            eta=eta,
+        )
+        self.assertEqual(acqf, mock_acqf.return_value)
+        best_feasible_f = compute_best_feasible_objective(
+            samples=mean,
+            obj=self.objective(mean),
+            constraints=constraints,
+            model=self.model,
+            objective=self.objective,
+            X_baseline=self.X_observed,
+        )
+        mock_acqf.assert_called_with(
+            model=self.model,
+            best_f=best_feasible_f,
+            sampler=mock.ANY,
+            objective=self.objective,
+            posterior_transform=None,
+            X_pending=self.X_pending,
+            constraints=constraints,
+            eta=eta,
+        )
+
     @mock.patch(f"{monte_carlo.__name__}.qProbabilityOfImprovement")
     def test_GetQPI(self, mock_acqf):
         # basic test
-        self.model = MockModel(MockPosterior(mean=torch.zeros(1, 2)))
+        n = len(self.X_observed)
+        mean = torch.arange(n, dtype=torch.double).view(-1, 1)
+        var = torch.ones_like(mean)
+        self.model = MockModel(MockPosterior(mean=mean, variance=var))
         acqf = get_acquisition_function(
             acquisition_function_name="qPI",
             model=self.model,
@@ -148,6 +194,8 @@ def test_GetQPI(self, mock_acqf):
             posterior_transform=None,
             X_pending=self.X_pending,
             tau=1e-3,
+            constraints=None,
+            eta=1e-3,
         )
         args, kwargs = mock_acqf.call_args
         self.assertEqual(args, ())
@@ -197,9 +245,54 @@ def test_GetQPI(self, mock_acqf):
         )
         self.assertTrue(acqf == mock_acqf.return_value)
 
+        # with constraints
+        n = len(self.X_observed)
+        mean = torch.arange(n, dtype=torch.double).view(-1, 1)
+        var = torch.ones_like(mean)
+        self.model = MockModel(MockPosterior(mean=mean, variance=var))
+        upper_bound = self.Y[0, 0] + 1 / 2  # = 1.5
+        constraints = [lambda samples: samples[..., 0] - upper_bound]
+        eta = math.pi * 1e-2  # testing non-standard eta
+        acqf = get_acquisition_function(
+            acquisition_function_name="qPI",
+            model=self.model,
+            objective=self.objective,
+            X_observed=self.X_observed,
+            X_pending=self.X_pending,
+            mc_samples=self.mc_samples,
+            seed=self.seed,
+            marginalize_dim=0,
+            constraints=constraints,
+            eta=eta,
+        )
+        self.assertEqual(acqf, mock_acqf.return_value)
+        best_feasible_f = compute_best_feasible_objective(
+            samples=mean,
+            obj=self.objective(mean),
+            constraints=constraints,
+            model=self.model,
+            objective=self.objective,
+            X_baseline=self.X_observed,
+        )
+        mock_acqf.assert_called_with(
+            model=self.model,
+            best_f=best_feasible_f,
+            sampler=mock.ANY,
+            objective=self.objective,
+            posterior_transform=None,
+            X_pending=self.X_pending,
+            tau=1e-3,
+            constraints=constraints,
+            eta=eta,
+        )
+
     @mock.patch(f"{monte_carlo.__name__}.qNoisyExpectedImprovement")
     def test_GetQNEI(self, mock_acqf):
         # basic test
+        n = len(self.X_observed)
+        mean = torch.arange(n, dtype=torch.double).view(-1, 1)
+        var = torch.ones_like(mean)
+        self.model = MockModel(MockPosterior(mean=mean, variance=var))
         acqf = get_acquisition_function(
             acquisition_function_name="qNEI",
             model=self.model,
@@ -257,6 +350,38 @@ def test_GetQNEI(self, mock_acqf):
         self.assertEqual(sampler.seed, 2)
         self.assertTrue(torch.equal(kwargs["X_baseline"], self.X_observed))
 
+        # with constraints
+        upper_bound = self.Y[0, 0] + 1 / 2  # = 1.5
+        constraints = [lambda samples: samples[..., 0] - upper_bound]
+        eta = math.pi * 1e-2  # testing non-standard eta
+
+        acqf = get_acquisition_function(
+            acquisition_function_name="qNEI",
+            model=self.model,
+            objective=self.objective,
+            X_observed=self.X_observed,
+            X_pending=self.X_pending,
+            mc_samples=self.mc_samples,
+            seed=self.seed,
+            marginalize_dim=0,
+            constraints=constraints,
+            eta=eta,
+        )
+        self.assertEqual(acqf, mock_acqf.return_value)
+        mock_acqf.assert_called_with(
+            model=self.model,
+            X_baseline=self.X_observed,
+            sampler=mock.ANY,
+            objective=self.objective,
+            posterior_transform=None,
+            X_pending=self.X_pending,
+            prune_baseline=True,
+            marginalize_dim=0,
+            cache_root=True,
+            constraints=constraints,
+            eta=eta,
+        )
+
     @mock.patch(f"{monte_carlo.__name__}.qSimpleRegret")
     def test_GetQSR(self, mock_acqf):
         # basic test

From d33316324c3bc1605cf5bcda5c56011d7e362e69 Mon Sep 17 00:00:00 2001
From: Sebastian Ament <sebastianament@meta.com>
Date: Thu, 13 Jul 2023 11:58:55 -0700
Subject: [PATCH 12/18] `compute_smoothed_constraint_indicator` ->
 `compute_smoothed_feasibility_indicator` (#1935)

Summary:
Pull Request resolved: https://github.com/pytorch/botorch/pull/1935

D47365085 introduced the aptly named `compute_feasibility_indicator`. This commit brings the smoothed counterpart in alignment with the naming convention.

Reviewed By: Balandat

Differential Revision: D47436246

fbshipit-source-id: 00be7f9fad99f1ce7e317e7b385629010e066d09
---
 botorch/acquisition/input_constructors.py          |  6 +++---
 botorch/acquisition/monte_carlo.py                 | 12 ++++++------
 botorch/acquisition/multi_objective/monte_carlo.py |  6 +++---
 botorch/utils/objective.py                         |  4 ++--
 test/utils/test_objective.py                       | 10 +++++-----
 5 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/botorch/acquisition/input_constructors.py b/botorch/acquisition/input_constructors.py
index f4ac50fd9c..49deb81512 100644
--- a/botorch/acquisition/input_constructors.py
+++ b/botorch/acquisition/input_constructors.py
@@ -482,7 +482,7 @@ def construct_inputs_qEI(
             are considered satisfied if the output is less than zero.
         eta: Temperature parameter(s) governing the smoothness of the sigmoid
             approximation to the constraint indicators. For more details, on this
-            parameter, see the docs of `compute_smoothed_constraint_indicator`.
+            parameter, see the docs of `compute_smoothed_feasibility_indicator`.
         ignored: Not used.
 
     Returns:
@@ -548,7 +548,7 @@ def construct_inputs_qNEI(
             are considered satisfied if the output is less than zero.
         eta: Temperature parameter(s) governing the smoothness of the sigmoid
             approximation to the constraint indicators. For more details, on this
-            parameter, see the docs of `compute_smoothed_constraint_indicator`.
+            parameter, see the docs of `compute_smoothed_feasibility_indicator`.
         ignored: Not used.
 
     Returns:
@@ -620,7 +620,7 @@ def construct_inputs_qPI(
             are considered satisfied if the output is less than zero.
         eta: Temperature parameter(s) governing the smoothness of the sigmoid
             approximation to the constraint indicators. For more details, on this
-            parameter, see the docs of `compute_smoothed_constraint_indicator`.
+            parameter, see the docs of `compute_smoothed_feasibility_indicator`.
         ignored: Not used.
 
     Returns:
diff --git a/botorch/acquisition/monte_carlo.py b/botorch/acquisition/monte_carlo.py
index c366fdcb0d..0b07c4d852 100644
--- a/botorch/acquisition/monte_carlo.py
+++ b/botorch/acquisition/monte_carlo.py
@@ -42,7 +42,7 @@
 from botorch.exceptions.errors import UnsupportedError
 from botorch.models.model import Model
 from botorch.sampling.base import MCSampler
-from botorch.utils.objective import compute_smoothed_constraint_indicator
+from botorch.utils.objective import compute_smoothed_feasibility_indicator
 from botorch.utils.transforms import (
     concatenate_pending_points,
     match_batch_shape,
@@ -215,7 +215,7 @@ def __init__(
                 acquistion utilities, e.g. all improvement-based acquisition functions.
             eta: Temperature parameter(s) governing the smoothness of the sigmoid
                 approximation to the constraint indicators. For more details, on this
-                parameter, see the docs of `compute_smoothed_constraint_indicator`.
+                parameter, see the docs of `compute_smoothed_feasibility_indicator`.
         """
         if constraints is not None and isinstance(objective, ConstrainedMCObjective):
             raise ValueError(
@@ -305,7 +305,7 @@ def _apply_constraints(self, acqval: Tensor, samples: Tensor) -> Tensor:
                     "Constraint-weighting requires unconstrained "
                     "acquisition values to be non-negative."
                 )
-            acqval = acqval * compute_smoothed_constraint_indicator(
+            acqval = acqval * compute_smoothed_feasibility_indicator(
                 constraints=self._constraints, samples=samples, eta=self._eta
             )
         return acqval
@@ -366,7 +366,7 @@ def __init__(
                 are considered satisfied if the output is less than zero.
             eta: Temperature parameter(s) governing the smoothness of the sigmoid
                 approximation to the constraint indicators. For more details, on this
-                parameter, see the docs of `compute_smoothed_constraint_indicator`.
+                parameter, see the docs of `compute_smoothed_feasibility_indicator`.
         """
         super().__init__(
             model=model,
@@ -457,7 +457,7 @@ def __init__(
                 are considered satisfied if the output is less than zero.
             eta: Temperature parameter(s) governing the smoothness of the sigmoid
                 approximation to the constraint indicators. For more details, on this
-                parameter, see the docs of `compute_smoothed_constraint_indicator`.
+                parameter, see the docs of `compute_smoothed_feasibility_indicator`.
 
         TODO: similar to qNEHVI, when we are using sequential greedy candidate
         selection, we could incorporate pending points X_baseline and compute
@@ -671,7 +671,7 @@ def __init__(
                 scalar is less than zero.
             eta: Temperature parameter(s) governing the smoothness of the sigmoid
                 approximation to the constraint indicators. For more details, on this
-                parameter, see the docs of `compute_smoothed_constraint_indicator`.
+                parameter, see the docs of `compute_smoothed_feasibility_indicator`.
         """
         super().__init__(
             model=model,
diff --git a/botorch/acquisition/multi_objective/monte_carlo.py b/botorch/acquisition/multi_objective/monte_carlo.py
index 8c4e857c1b..36e119fcae 100644
--- a/botorch/acquisition/multi_objective/monte_carlo.py
+++ b/botorch/acquisition/multi_objective/monte_carlo.py
@@ -57,7 +57,7 @@
 from botorch.utils.multi_objective.box_decompositions.utils import (
     _pad_batch_pareto_frontier,
 )
-from botorch.utils.objective import compute_smoothed_constraint_indicator
+from botorch.utils.objective import compute_smoothed_feasibility_indicator
 from botorch.utils.torch import BufferDict
 from botorch.utils.transforms import (
     concatenate_pending_points,
@@ -279,7 +279,7 @@ def _compute_qehvi(self, samples: Tensor, X: Optional[Tensor] = None) -> Tensor:
         obj = self.objective(samples, X=X)
         q = obj.shape[-2]
         if self.constraints is not None:
-            feas_weights = compute_smoothed_constraint_indicator(
+            feas_weights = compute_smoothed_feasibility_indicator(
                 constraints=self.constraints, samples=samples, eta=self.eta
             )  # `sample_shape x batch-shape x q`
         self._cache_q_subset_indices(q_out=q)
@@ -414,7 +414,7 @@ def __init__(
                 tensor the length of the tensor must match the number of provided
                 constraints. The i-th constraint is then estimated with the i-th
                 eta value. For more details, on this parameter, see the docs of
-                `compute_smoothed_constraint_indicator`.
+                `compute_smoothed_feasibility_indicator`.
             prune_baseline: If True, remove points in `X_baseline` that are
                 highly unlikely to be the pareto optimal and better than the
                 reference point. This can significantly improve computation time and
diff --git a/botorch/utils/objective.py b/botorch/utils/objective.py
index 5e8db010e5..d8034d49eb 100644
--- a/botorch/utils/objective.py
+++ b/botorch/utils/objective.py
@@ -87,7 +87,7 @@ def apply_constraints_nonnegative_soft(
     Returns:
         A `n_samples x b x q (x m')`-dim tensor of feasibility-weighted objectives.
     """
-    w = compute_smoothed_constraint_indicator(
+    w = compute_smoothed_feasibility_indicator(
         constraints=constraints, samples=samples, eta=eta
     )
     if obj.dim() == samples.dim():
@@ -116,7 +116,7 @@ def compute_feasibility_indicator(
     return ind
 
 
-def compute_smoothed_constraint_indicator(
+def compute_smoothed_feasibility_indicator(
     constraints: List[Callable[[Tensor], Tensor]],
     samples: Tensor,
     eta: Union[Tensor, float],
diff --git a/test/utils/test_objective.py b/test/utils/test_objective.py
index ee35bf08e5..c7000b712f 100644
--- a/test/utils/test_objective.py
+++ b/test/utils/test_objective.py
@@ -9,7 +9,7 @@
 from botorch.utils import apply_constraints, get_objective_weights_transform
 from botorch.utils.objective import (
     compute_feasibility_indicator,
-    compute_smoothed_constraint_indicator,
+    compute_smoothed_feasibility_indicator,
 )
 from botorch.utils.testing import BotorchTestCase
 from torch import Tensor
@@ -196,14 +196,14 @@ def test_constraint_indicators(self):
         self.assertAllClose(ind, torch.zeros_like(ind))
         self.assertEqual(ind.dtype, torch.bool)
 
-        smoothed_ind = compute_smoothed_constraint_indicator(
+        smoothed_ind = compute_smoothed_feasibility_indicator(
             constraints=[zeros_f], samples=samples, eta=1e-3
         )
         self.assertAllClose(smoothed_ind, ones_f(samples) / 2)
 
         # two constraints
         samples = torch.randn(1)
-        smoothed_ind = compute_smoothed_constraint_indicator(
+        smoothed_ind = compute_smoothed_feasibility_indicator(
             constraints=[zeros_f, zeros_f],
             samples=samples,
             eta=1e-3,
@@ -218,13 +218,13 @@ def test_constraint_indicators(self):
         )
         self.assertAllClose(ind, torch.ones_like(ind))
 
-        smoothed_ind = compute_smoothed_constraint_indicator(
+        smoothed_ind = compute_smoothed_feasibility_indicator(
             constraints=[minus_one_f], samples=samples, eta=1e-3
         )
         self.assertTrue((smoothed_ind > 3 / 4).all())
 
         with self.assertRaisesRegex(ValueError, "Number of provided constraints"):
-            compute_smoothed_constraint_indicator(
+            compute_smoothed_feasibility_indicator(
                 constraints=[zeros_f, zeros_f],
                 samples=samples,
                 eta=torch.tensor([0.1], device=self.device),

From 50bcf9512106dc7ccff9ef7bc01484abaaddc728 Mon Sep 17 00:00:00 2001
From: Sebastian Ament <sebastianament@meta.com>
Date: Tue, 18 Jul 2023 06:57:49 -0700
Subject: [PATCH 13/18] qLogEI (#1936)

Summary:
Pull Request resolved: https://github.com/pytorch/botorch/pull/1936

This commit introduces `qLogExpectedImprovement` (`qLogEI`), which computes the logarithm of a smooth approximation to the regular EI utility. As EI is known to suffer from vanishing gradients, especially for challenging, constrained, or high-dimensional problems, using `qLogEI` can lead to significant optimization improvements.

Reviewed By: Balandat

Differential Revision: D47439148

fbshipit-source-id: 3d43fd359f678b1a6ce1674ca565890adab338ea
---
 botorch/acquisition/__init__.py           |  12 +
 botorch/acquisition/input_constructors.py |   3 +-
 botorch/acquisition/logei.py              | 261 +++++++++++++++++++
 botorch/acquisition/monte_carlo.py        |  17 +-
 botorch/utils/objective.py                |  22 +-
 botorch/utils/safe_math.py                | 156 +++++++++++-
 sphinx/source/acquisition.rst             |   3 +
 test/acquisition/test_logei.py            | 296 ++++++++++++++++++++++
 test/utils/test_objective.py              |  12 +-
 test/utils/test_safe_math.py              | 166 +++++++++++-
 10 files changed, 934 insertions(+), 14 deletions(-)
 create mode 100644 botorch/acquisition/logei.py
 create mode 100644 test/acquisition/test_logei.py

diff --git a/botorch/acquisition/__init__.py b/botorch/acquisition/__init__.py
index 7ff09b30c5..5bd208cd81 100644
--- a/botorch/acquisition/__init__.py
+++ b/botorch/acquisition/__init__.py
@@ -16,6 +16,8 @@
     AnalyticAcquisitionFunction,
     ConstrainedExpectedImprovement,
     ExpectedImprovement,
+    LogExpectedImprovement,
+    LogNoisyExpectedImprovement,
     NoisyExpectedImprovement,
     PosteriorMean,
     ProbabilityOfImprovement,
@@ -32,6 +34,10 @@
     qKnowledgeGradient,
     qMultiFidelityKnowledgeGradient,
 )
+from botorch.acquisition.logei import (
+    LogImprovementMCAcquisitionFunction,
+    qLogExpectedImprovement,
+)
 from botorch.acquisition.max_value_entropy_search import (
     MaxValueBase,
     qLowerBoundMaxValueEntropy,
@@ -46,6 +52,7 @@
     qProbabilityOfImprovement,
     qSimpleRegret,
     qUpperConfidenceBound,
+    SampleReducingMCAcquisitionFunction,
 )
 from botorch.acquisition.multi_step_lookahead import qMultiStepLookahead
 from botorch.acquisition.objective import (
@@ -71,6 +78,8 @@
     "AnalyticExpectedUtilityOfBestOption",
     "ConstrainedExpectedImprovement",
     "ExpectedImprovement",
+    "LogExpectedImprovement",
+    "LogNoisyExpectedImprovement",
     "FixedFeatureAcquisitionFunction",
     "GenericCostAwareUtility",
     "InverseCostWeightedUtility",
@@ -85,6 +94,8 @@
     "UpperConfidenceBound",
     "qAnalyticProbabilityOfImprovement",
     "qExpectedImprovement",
+    "LogImprovementMCAcquisitionFunction",
+    "qLogExpectedImprovement",
     "qKnowledgeGradient",
     "MaxValueBase",
     "qMultiFidelityKnowledgeGradient",
@@ -104,6 +115,7 @@
     "LearnedObjective",
     "LinearMCObjective",
     "MCAcquisitionFunction",
+    "SampleReducingMCAcquisitionFunction",
     "MCAcquisitionObjective",
     "ScalarizedPosteriorTransform",
     "get_acquisition_function",
diff --git a/botorch/acquisition/input_constructors.py b/botorch/acquisition/input_constructors.py
index 49deb81512..cff41d46e1 100644
--- a/botorch/acquisition/input_constructors.py
+++ b/botorch/acquisition/input_constructors.py
@@ -47,6 +47,7 @@
     qKnowledgeGradient,
     qMultiFidelityKnowledgeGradient,
 )
+from botorch.acquisition.logei import qLogExpectedImprovement
 from botorch.acquisition.max_value_entropy_search import (
     qMaxValueEntropy,
     qMultiFidelityMaxValueEntropy,
@@ -449,7 +450,7 @@ def construct_inputs_qSimpleRegret(
     )
 
 
-@acqf_input_constructor(qExpectedImprovement)
+@acqf_input_constructor(qExpectedImprovement, qLogExpectedImprovement)
 def construct_inputs_qEI(
     model: Model,
     training_data: MaybeDict[SupervisedDataset],
diff --git a/botorch/acquisition/logei.py b/botorch/acquisition/logei.py
new file mode 100644
index 0000000000..95affe1c28
--- /dev/null
+++ b/botorch/acquisition/logei.py
@@ -0,0 +1,261 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+r"""
+Batch implementations of the LogEI family of improvements-based acquisition functions.
+"""
+
+
+from __future__ import annotations
+
+from functools import partial
+
+from typing import Callable, List, Optional, TypeVar, Union
+
+import torch
+from botorch.acquisition.monte_carlo import SampleReducingMCAcquisitionFunction
+from botorch.acquisition.objective import (
+    ConstrainedMCObjective,
+    MCAcquisitionObjective,
+    PosteriorTransform,
+)
+from botorch.exceptions.errors import BotorchError
+from botorch.models.model import Model
+from botorch.sampling.base import MCSampler
+from botorch.utils.safe_math import (
+    fatmax,
+    log_fatplus,
+    log_softplus,
+    logmeanexp,
+    smooth_amax,
+)
+from torch import Tensor
+
+"""
+NOTE: On the default temperature parameters:
+
+tau_relu: It is generally important to set `tau_relu` to be very small, in particular,
+smaller than the expected improvement value. Otherwise, the optimization can stagnate.
+By setting `tau_relu=1e-6` by default, stagnation is exceedingly unlikely to occur due
+to the smooth ReLU approximation for practical applications of BO.
+IDEA: We could consider shrinking `tau_relu` with the progression of the optimization.
+
+tau_max: This is only relevant for the batch (`q > 1`) case, and `tau_max=1e-2` is
+sufficient to get a good approximation to the maximum improvement in the batch of
+candidates. If `fat=False`, the smooth approximation to the maximum can saturate
+numerically. It is therefore recommended to use `fat=True` when optimizing batches
+of `q > 1` points.
+"""
+TAU_RELU = 1e-6
+TAU_MAX = 1e-2
+FloatOrTensor = TypeVar("FloatOrTensor", float, Tensor)
+
+
+class LogImprovementMCAcquisitionFunction(SampleReducingMCAcquisitionFunction):
+    r"""
+    Abstract base class for Monte-Carlo-based batch LogEI acquisition functions.
+
+    :meta private:
+    """
+
+    _log: bool = True
+
+    def __init__(
+        self,
+        model: Model,
+        sampler: Optional[MCSampler] = None,
+        objective: Optional[MCAcquisitionObjective] = None,
+        posterior_transform: Optional[PosteriorTransform] = None,
+        X_pending: Optional[Tensor] = None,
+        constraints: Optional[List[Callable[[Tensor], Tensor]]] = None,
+        eta: Union[Tensor, float] = 1e-3,
+        fat: bool = True,
+        tau_max: float = TAU_MAX,
+    ) -> None:
+        r"""
+        Args:
+            model: A fitted model.
+            sampler: The sampler used to draw base samples. If not given,
+                a sampler is generated using `get_sampler`.
+                NOTE: For posteriors that do not support base samples,
+                a sampler compatible with intended use case must be provided.
+                See `ForkedRNGSampler` and `StochasticSampler` as examples.
+            objective: The MCAcquisitionObjective under which the samples are
+                evaluated. Defaults to `IdentityMCObjective()`.
+            posterior_transform: A PosteriorTransform (optional).
+            X_pending: A `batch_shape, m x d`-dim Tensor of `m` design points
+                that have points that have been submitted for function evaluation
+                but have not yet been evaluated.
+            constraints: A list of constraint callables which map a Tensor of posterior
+                samples of dimension `sample_shape x batch-shape x q x m`-dim to a
+                `sample_shape x batch-shape x q`-dim Tensor. The associated constraints
+                are satisfied if `constraint(samples) < 0`.
+            eta: Temperature parameter(s) governing the smoothness of the sigmoid
+                approximation to the constraint indicators. See the docs of
+                `compute_(log_)constraint_indicator` for more details on this parameter.
+            fat: Toggles the logarithmic / linear asymptotic behavior of the smooth
+                approximation to the ReLU.
+            tau_max: Temperature parameter controlling the sharpness of the
+                approximation to the `max` operator over the `q` candidate points.
+        """
+        if isinstance(objective, ConstrainedMCObjective):
+            raise BotorchError(
+                "Log-Improvement should not be used with `ConstrainedMCObjective`."
+                "Please pass the `constraints` directly to the constructor of the "
+                "acquisition function."
+            )
+        q_reduction = partial(fatmax if fat else smooth_amax, tau=tau_max)
+        super().__init__(
+            model=model,
+            sampler=sampler,
+            objective=objective,
+            posterior_transform=posterior_transform,
+            X_pending=X_pending,
+            sample_reduction=logmeanexp,
+            q_reduction=q_reduction,
+            constraints=constraints,
+            eta=eta,
+            fat=fat,
+        )
+        self.tau_max = tau_max
+
+
+class qLogExpectedImprovement(LogImprovementMCAcquisitionFunction):
+    r"""MC-based batch Log Expected Improvement.
+
+    This computes qLogEI by
+    (1) sampling the joint posterior over q points,
+    (2) evaluating the smoothed log improvement over the current best for each sample,
+    (3) smoothly maximizing over q, and
+    (4) averaging over the samples in log space.
+
+    `qLogEI(X) ~ log(qEI(X)) = log(E(max(max Y - best_f, 0)))`,
+
+    where `Y ~ f(X)`, and `X = (x_1,...,x_q)`.
+
+    Example:
+        >>> model = SingleTaskGP(train_X, train_Y)
+        >>> best_f = train_Y.max()[0]
+        >>> sampler = SobolQMCNormalSampler(1024)
+        >>> qLogEI = qLogExpectedImprovement(model, best_f, sampler)
+        >>> qei = qLogEI(test_X)
+    """
+
+    def __init__(
+        self,
+        model: Model,
+        best_f: Union[float, Tensor],
+        sampler: Optional[MCSampler] = None,
+        objective: Optional[MCAcquisitionObjective] = None,
+        posterior_transform: Optional[PosteriorTransform] = None,
+        X_pending: Optional[Tensor] = None,
+        constraints: Optional[List[Callable[[Tensor], Tensor]]] = None,
+        eta: Union[Tensor, float] = 1e-3,
+        fat: bool = True,
+        tau_max: float = TAU_MAX,
+        tau_relu: float = TAU_RELU,
+    ) -> None:
+        r"""q-Log Expected Improvement.
+
+        Args:
+            model: A fitted model.
+            best_f: The best objective value observed so far (assumed noiseless). Can be
+                a `batch_shape`-shaped tensor, which in case of a batched model
+                specifies potentially different values for each element of the batch.
+            sampler: The sampler used to draw base samples. See `MCAcquisitionFunction`
+                more details.
+            objective: The MCAcquisitionObjective under which the samples are evaluated.
+                Defaults to `IdentityMCObjective()`.
+            posterior_transform: A PosteriorTransform (optional).
+            X_pending:  A `m x d`-dim Tensor of `m` design points that have been
+                submitted for function evaluation but have not yet been evaluated.
+                Concatenated into `X` upon forward call. Copied and set to have no
+                gradient.
+            constraints: A list of constraint callables which map a Tensor of posterior
+                samples of dimension `sample_shape x batch-shape x q x m`-dim to a
+                `sample_shape x batch-shape x q`-dim Tensor. The associated constraints
+                are satisfied if `constraint(samples) < 0`.
+            eta: Temperature parameter(s) governing the smoothness of the sigmoid
+                approximation to the constraint indicators. See the docs of
+                `compute_(log_)smoothed_constraint_indicator` for details.
+            fat: Toggles the logarithmic / linear asymptotic behavior of the smooth
+                approximation to the ReLU.
+            tau_max: Temperature parameter controlling the sharpness of the smooth
+                approximations to max.
+            tau_relu: Temperature parameter controlling the sharpness of the smooth
+                approximations to ReLU.
+        """
+        super().__init__(
+            model=model,
+            sampler=sampler,
+            objective=objective,
+            posterior_transform=posterior_transform,
+            X_pending=X_pending,
+            constraints=constraints,
+            eta=eta,
+            tau_max=check_tau(tau_max, name="tau_max"),
+            fat=fat,
+        )
+        self.register_buffer("best_f", torch.as_tensor(best_f))
+        self.tau_relu = check_tau(tau_relu, name="tau_relu")
+
+    def _sample_forward(self, obj: Tensor) -> Tensor:
+        r"""Evaluate qLogExpectedImprovement on the candidate set `X`.
+
+        Args:
+            obj: `mc_shape x batch_shape x q`-dim Tensor of MC objective values.
+
+        Returns:
+            A `mc_shape x batch_shape x q`-dim Tensor of expected improvement values.
+        """
+        li = _log_improvement(
+            Y=obj,
+            best_f=self.best_f,
+            tau=self.tau_relu,
+            fat=self._fat,
+        )
+        return li
+
+
+"""
+###################################### utils ##########################################
+"""
+
+
+def _log_improvement(
+    Y: Tensor,
+    best_f: Tensor,
+    tau: Union[float, Tensor],
+    fat: bool,
+) -> Tensor:
+    """Computes the logarithm of the softplus-smoothed improvement, i.e.
+    `log_softplus(Y - best_f, beta=(1 / tau))`.
+    Note that softplus is an approximation to the regular ReLU objective whose maximum
+    pointwise approximation error is linear with respect to tau as tau goes to zero.
+
+    Args:
+        obj: `mc_samples x batch_shape x q`-dim Tensor of output samples.
+        best_f: Best previously observed objective value(s), broadcastable with `obj`.
+        tau: Temperature parameter for smooth approximation of ReLU.
+            as `tau -> 0`, maximum pointwise approximation error is linear w.r.t. `tau`.
+        fat: Toggles the logarithmic / linear asymptotic behavior of the
+            smooth approximation to ReLU.
+
+    Returns:
+        A `mc_samples x batch_shape x q`-dim Tensor of improvement values.
+    """
+    log_soft_clamp = log_fatplus if fat else log_softplus
+    Z = Y - best_f.to(Y)
+    return log_soft_clamp(Z, tau=tau)  # ~ ((Y - best_f) / Y_std).clamp(0)
+
+
+def check_tau(tau: FloatOrTensor, name: str) -> FloatOrTensor:
+    """Checks the validity of the tau arguments of the functions below, and returns
+    `tau` if it is valid."""
+    if isinstance(tau, Tensor) and tau.numel() != 1:
+        raise ValueError(name + f" is not a scalar: {tau.numel() = }.")
+    if not (tau > 0):
+        raise ValueError(name + f" is non-positive: {tau = }.")
+    return tau
diff --git a/botorch/acquisition/monte_carlo.py b/botorch/acquisition/monte_carlo.py
index 0b07c4d852..5d58f8d332 100644
--- a/botorch/acquisition/monte_carlo.py
+++ b/botorch/acquisition/monte_carlo.py
@@ -170,6 +170,8 @@ class SampleReducingMCAcquisitionFunction(MCAcquisitionFunction):
     forward pass. These problems are circumvented by the design of this class.
     """
 
+    _log: bool = False  # whether the acquisition utilities are in log-space
+
     def __init__(
         self,
         model: Model,
@@ -181,6 +183,7 @@ def __init__(
         q_reduction: SampleReductionProtocol = torch.amax,
         constraints: Optional[List[Callable[[Tensor], Tensor]]] = None,
         eta: Union[Tensor, float] = 1e-3,
+        fat: bool = False,
     ):
         r"""Constructor of SampleReducingMCAcquisitionFunction.
 
@@ -216,6 +219,8 @@ def __init__(
             eta: Temperature parameter(s) governing the smoothness of the sigmoid
                 approximation to the constraint indicators. For more details, on this
                 parameter, see the docs of `compute_smoothed_feasibility_indicator`.
+            fat: Wether to apply a fat-tailed smooth approximation to the feasibility
+                indicator or the canonical sigmoid approximation.
         """
         if constraints is not None and isinstance(objective, ConstrainedMCObjective):
             raise ValueError(
@@ -236,6 +241,7 @@ def __init__(
         self._q_reduction = partial(q_reduction, dim=-1)
         self._constraints = constraints
         self._eta = eta
+        self._fat = fat
 
     @concatenate_pending_points
     @t_batch_mode_transform()
@@ -300,14 +306,19 @@ def _apply_constraints(self, acqval: Tensor, samples: Tensor) -> Tensor:
                 multiplied by a smoothed constraint indicator per sample.
         """
         if self._constraints is not None:
-            if (acqval < 0).any():
+            if not self._log and (acqval < 0).any():
                 raise ValueError(
                     "Constraint-weighting requires unconstrained "
                     "acquisition values to be non-negative."
                 )
-            acqval = acqval * compute_smoothed_feasibility_indicator(
-                constraints=self._constraints, samples=samples, eta=self._eta
+            ind = compute_smoothed_feasibility_indicator(
+                constraints=self._constraints,
+                samples=samples,
+                eta=self._eta,
+                log=self._log,
+                fat=self._fat,
             )
+            acqval = acqval.add(ind) if self._log else acqval.mul(ind)
         return acqval
 
 
diff --git a/botorch/utils/objective.py b/botorch/utils/objective.py
index d8034d49eb..e30adece1d 100644
--- a/botorch/utils/objective.py
+++ b/botorch/utils/objective.py
@@ -13,6 +13,7 @@
 from typing import Callable, List, Optional, Union
 
 import torch
+from botorch.utils.safe_math import log_fatmoid, logexpit
 from torch import Tensor
 
 
@@ -120,12 +121,17 @@ def compute_smoothed_feasibility_indicator(
     constraints: List[Callable[[Tensor], Tensor]],
     samples: Tensor,
     eta: Union[Tensor, float],
+    log: bool = False,
+    fat: bool = False,
 ) -> Tensor:
     r"""Computes the smoothed feasibility indicator of a list of constraints.
 
     Given posterior samples, using a sigmoid to smoothly approximate the feasibility
     indicator of each individual constraint to ensure differentiability and high
-    gradient signal.
+    gradient signal. The `fat` and `log` options improve the numerical behavior of
+    the smooth approximation.
+
+    NOTE: *Negative* constraint values are associated with feasibility.
 
     Args:
         constraints: A list of callables, each mapping a Tensor of size `b x q x m`
@@ -138,6 +144,8 @@ def compute_smoothed_feasibility_indicator(
             constraint in constraints. In case of a tensor the length of the tensor
             must match the number of provided constraints. The i-th constraint is
             then estimated with the i-th eta value.
+        log: Toggles the computation of the log-feasibility indicator.
+        fat: Toggles the computation of the fat-tailed feasibility indicator.
 
     Returns:
         A `n_samples x b x q`-dim tensor of feasibility indicator values.
@@ -148,12 +156,14 @@ def compute_smoothed_feasibility_indicator(
         raise ValueError(
             "Number of provided constraints and number of provided etas do not match."
         )
-    is_feasible = torch.ones_like(samples[..., 0])
+    if not (eta > 0).all():
+        raise ValueError("eta must be positive.")
+    is_feasible = torch.zeros_like(samples[..., 0])
+    log_sigmoid = log_fatmoid if fat else logexpit
     for constraint, e in zip(constraints, eta):
-        w = soft_eval_constraint(constraint(samples), eta=e)
-        is_feasible = is_feasible.mul(w)  # TODO: add log version.
+        is_feasible = is_feasible + log_sigmoid(-constraint(samples) / e)
 
-    return is_feasible
+    return is_feasible if log else is_feasible.exp()
 
 
 def soft_eval_constraint(lhs: Tensor, eta: float = 1e-3) -> Tensor:
@@ -172,7 +182,7 @@ def soft_eval_constraint(lhs: Tensor, eta: float = 1e-3) -> Tensor:
         `value(x) -> 1` as x becomes negative.
     """
     if eta <= 0:
-        raise ValueError("eta must be positive")
+        raise ValueError("eta must be positive.")
     return torch.sigmoid(-lhs / eta)
 
 
diff --git a/botorch/utils/safe_math.py b/botorch/utils/safe_math.py
index 83a2155a68..a0c79020cc 100644
--- a/botorch/utils/safe_math.py
+++ b/botorch/utils/safe_math.py
@@ -20,10 +20,13 @@
 from typing import Tuple, Union
 
 import torch
+from botorch.exceptions import UnsupportedError
 from botorch.utils.constants import get_constants_like
 from torch import finfo, Tensor
+from torch.nn.functional import softplus
 
 _log2 = math.log(2)
+_inv_sqrt_3 = math.sqrt(1 / 3)
 
 
 # Unary ops
@@ -76,6 +79,23 @@ def log1mexp(x: Tensor) -> Tensor:
     )
 
 
+def log1pexp(x: Tensor) -> Tensor:
+    """Numerically accurate evaluation of log(1 + exp(x)).
+    See [Maechler2012accurate]_ for details.
+    """
+    mask = x <= 18
+    return torch.where(
+        mask,
+        (lambda z: z.exp().log1p())(x.masked_fill(~mask, 0)),
+        (lambda z: z + (-z).exp())(x.masked_fill(mask, 0)),
+    )
+
+
+def logexpit(X: Tensor) -> Tensor:
+    """Computes the logarithm of the expit (a.k.a. sigmoid) function."""
+    return -log1pexp(-X)
+
+
 def logdiffexp(log_a: Tensor, log_b: Tensor) -> Tensor:
     """Computes log(b - a) accurately given log(a) and log(b).
     Assumes, log_b > log_a, i.e. b > a > 0.
@@ -93,7 +113,7 @@ def logdiffexp(log_a: Tensor, log_b: Tensor) -> Tensor:
 def logmeanexp(
     X: Tensor, dim: Union[int, Tuple[int, ...]], keepdim: bool = False
 ) -> Tensor:
-    """Computes log(mean(exp(X), dim=dim, keepdim=keepdim)).
+    """Computes `log(mean(exp(X), dim=dim, keepdim=keepdim))`.
 
     Args:
         X: Values of which to compute the logmeanexp.
@@ -101,7 +121,139 @@ def logmeanexp(
         keepdim: If True, keeps the reduced dimensions.
 
     Returns:
-        A Tensor of values corresponding to log(mean(exp(X), dim=dim)).
+        A Tensor of values corresponding to `log(mean(exp(X), dim=dim))`.
     """
     n = X.shape[dim] if isinstance(dim, int) else math.prod(X.shape[i] for i in dim)
     return torch.logsumexp(X, dim=dim, keepdim=keepdim) - math.log(n)
+
+
+def log_softplus(x: Tensor, tau: Union[float, Tensor] = 1.0) -> Tensor:
+    """Computes the logarithm of the softplus function with high numerical accuracy.
+
+    Args:
+        x: Input tensor, should have single or double precision floats.
+        tau: Decreasing tau increases the tightness of the
+            approximation to ReLU. Non-negative and defaults to 1.0.
+
+    Returns:
+        Tensor corresponding to `log(softplus(x))`.
+    """
+    check_dtype_float32_or_float64(x)
+    tau = torch.as_tensor(tau, dtype=x.dtype, device=x.device)
+    # cutoff chosen to achieve accuracy to machine epsilon
+    upper = 16 if x.dtype == torch.float32 else 32
+    lower = -15 if x.dtype == torch.float32 else -35
+    mask = x / tau > lower
+    return torch.where(
+        mask,
+        softplus(x.masked_fill(~mask, lower), beta=(1 / tau), threshold=upper).log(),
+        x / tau + tau.log(),
+    )
+
+
+def smooth_amax(X: Tensor, tau: Union[float, Tensor] = 1e-3, dim: int = -1) -> Tensor:
+    """Computes a smooth approximation to `max(X, dim=dim)`, i.e the maximum value of
+    `X` over dimension `dim`, using the logarithm of the `l_(1/tau)` norm of `exp(X)`.
+    Note that when `X = log(U)` is the *logarithm* of an acquisition utility `U`,
+
+    `logsumexp(log(U) / tau) * tau = log(sum(U^(1/tau))^tau) = log(norm(U, ord=(1/tau))`
+
+    Args:
+        X: A Tensor from which to compute the smoothed amax.
+        tau: Temperature parameter controlling the smooth approximation
+            to max operator, becomes tighter as tau goes to 0. Needs to be positive.
+
+    Returns:
+        A Tensor of smooth approximations to `max(X, dim=dim)`.
+    """
+    # consider normalizing by log_n = math.log(X.shape[dim]) to reduce error
+    return torch.logsumexp(X / tau, dim=dim) * tau  # ~ X.amax(dim=dim)
+
+
+def check_dtype_float32_or_float64(X: Tensor) -> None:
+    if X.dtype != torch.float32 and X.dtype != torch.float64:
+        raise UnsupportedError(
+            f"Only dtypes float32 and float64 are supported, but received {X.dtype}."
+        )
+
+
+def log_fatplus(x: Tensor, tau: Union[float, Tensor] = 1.0) -> Tensor:
+    """Computes the logarithm of the fat-tailed softplus.
+
+    NOTE: Separated out in case the complexity of the `log` implementation increases
+    in the future.
+    """
+    return fatplus(x, tau=tau).log()
+
+
+def fatplus(x: Tensor, tau: Union[float, Tensor] = 1.0) -> Tensor:
+    """Computes a fat-tailed approximation to `ReLU(x) = max(x, 0)` by linearly
+    combining a regular softplus function and the density function of a Cauchy
+    distribution. The coefficient `alpha` of the Cauchy density is chosen to guarantee
+    monotonicity and convexity.
+
+    Args:
+        x: A Tensor on whose values to compute the smoothed function.
+
+    Returns:
+        A Tensor of values of the fat-tailed softplus.
+    """
+
+    def _fatplus(x: Tensor) -> Tensor:
+        alpha = 1e-1  # guarantees monotonicity and convexity (TODO: ref + Lemma 4)
+        return softplus(x) + alpha * cauchy(x)
+
+    return tau * _fatplus(x / tau)
+
+
+def fatmax(X: Tensor, dim: int, tau: Union[float, Tensor] = 1.0) -> Tensor:
+    """Computes a smooth approximation to amax(X, dim=dim) with a fat tail.
+
+    Args:
+        X: A Tensor from which to compute the smoothed amax.
+        tau: Temperature parameter controlling the smooth approximation
+            to max operator, becomes tighter as tau goes to 0. Needs to be positive.
+        standardize: Toggles the temperature standardization of the smoothed function.
+
+    Returns:
+        A Tensor of smooth approximations to `max(X, dim=dim)` with a fat tail.
+    """
+    if X.shape[dim] == 1:
+        return X.squeeze(dim)
+
+    M = X.amax(dim=dim, keepdim=True)
+    Y = (X - M) / tau  # NOTE: this would cause NaNs when X has Infs.
+    M = M.squeeze(dim)
+    return M + tau * cauchy(Y).sum(dim=dim).log()  # could change to mean
+
+
+def log_fatmoid(X: Tensor, tau: Union[float, Tensor] = 1.0) -> Tensor:
+    """Computes the logarithm of the fatmoid. Separated out in case the implementation
+    of the logarithm becomes more complex in the future to ensure numerical stability.
+    """
+    return fatmoid(X, tau=tau).log()
+
+
+def fatmoid(X: Tensor, tau: Union[float, Tensor] = 1.0) -> Tensor:
+    """Computes a twice continuously differentiable approximation to the Heaviside
+    step function with a fat tail, i.e. `O(1 / x^2)` as `x` goes to -inf.
+
+    Args:
+        X: A Tensor from which to compute the smoothed step function.
+        tau: Temperature parameter controlling the smoothness of the approximation.
+
+    Returns:
+        A tensor of fat-tailed approximations to the Heaviside step function.
+    """
+    X = X / tau
+    m = _inv_sqrt_3  # this defines the inflection point
+    return torch.where(
+        X < 0,
+        2 / 3 * cauchy(X - m),
+        1 - 2 / 3 * cauchy(X + m),
+    )
+
+
+def cauchy(x: Tensor) -> Tensor:
+    """Computes a Lorentzian, i.e. an un-normalized Cauchy density function."""
+    return 1 / (1 + x.square())
diff --git a/sphinx/source/acquisition.rst b/sphinx/source/acquisition.rst
index 3aae7e277b..c46c906a89 100644
--- a/sphinx/source/acquisition.rst
+++ b/sphinx/source/acquisition.rst
@@ -60,6 +60,9 @@ Monte-Carlo Acquisition Functions
     :members:
     :exclude-members: MCAcquisitionFunction
 
+.. automodule:: botorch.acquisition.logei
+    :members:
+
 Multi-Objective Analytic Acquisition Functions
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 .. automodule:: botorch.acquisition.multi_objective.analytic
diff --git a/test/acquisition/test_logei.py b/test/acquisition/test_logei.py
new file mode 100644
index 0000000000..6d84ad671e
--- /dev/null
+++ b/test/acquisition/test_logei.py
@@ -0,0 +1,296 @@
+#!/usr/bin/env python3
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import warnings
+from unittest import mock
+
+import torch
+from botorch import settings
+from botorch.acquisition import (
+    LogImprovementMCAcquisitionFunction,
+    qLogExpectedImprovement,
+)
+from botorch.acquisition.input_constructors import ACQF_INPUT_CONSTRUCTOR_REGISTRY
+from botorch.acquisition.monte_carlo import qExpectedImprovement
+from botorch.acquisition.objective import (
+    ConstrainedMCObjective,
+    IdentityMCObjective,
+    PosteriorTransform,
+)
+from botorch.exceptions import BotorchWarning, UnsupportedError
+from botorch.exceptions.errors import BotorchError
+from botorch.sampling.normal import IIDNormalSampler, SobolQMCNormalSampler
+from botorch.utils.testing import BotorchTestCase, MockModel, MockPosterior
+from torch import Tensor
+
+
+def infeasible_con(samples: Tensor) -> Tensor:
+    return torch.ones_like(samples[..., 0])
+
+
+def feasible_con(samples: Tensor) -> Tensor:
+    return -torch.ones_like(samples[..., 0])
+
+
+class DummyLogImprovementAcquisitionFunction(LogImprovementMCAcquisitionFunction):
+    def _sample_forward(self, X):
+        pass
+
+
+class DummyNonScalarizingPosteriorTransform(PosteriorTransform):
+    scalarize = False
+
+    def evaluate(self, Y):
+        pass  # pragma: no cover
+
+    def forward(self, posterior):
+        pass  # pragma: no cover
+
+
+class TestLogImprovementAcquisitionFunction(BotorchTestCase):
+    def test_abstract_raises(self):
+        with self.assertRaises(TypeError):
+            LogImprovementMCAcquisitionFunction()
+        # raise if model is multi-output, but no outcome transform or objective
+        # are given
+        no = "botorch.utils.testing.MockModel.num_outputs"
+        with mock.patch(no, new_callable=mock.PropertyMock) as mock_num_outputs:
+            mock_num_outputs.return_value = 2
+            mm = MockModel(MockPosterior())
+            with self.assertRaises(UnsupportedError):
+                DummyLogImprovementAcquisitionFunction(model=mm)
+        # raise if model is multi-output, but outcome transform does not
+        # scalarize and no objetive is given
+        with mock.patch(no, new_callable=mock.PropertyMock) as mock_num_outputs:
+            mock_num_outputs.return_value = 2
+            mm = MockModel(MockPosterior())
+            ptf = DummyNonScalarizingPosteriorTransform()
+            with self.assertRaises(UnsupportedError):
+                DummyLogImprovementAcquisitionFunction(
+                    model=mm, posterior_transform=ptf
+                )
+
+        mm = MockModel(MockPosterior())
+        objective = ConstrainedMCObjective(
+            IdentityMCObjective(),
+            constraints=[lambda samples: torch.zeros_like(samples[..., 0])],
+        )
+        with self.assertRaisesRegex(
+            BotorchError,
+            "Log-Improvement should not be used with `ConstrainedMCObjective`.",
+        ):
+            DummyLogImprovementAcquisitionFunction(model=mm, objective=objective)
+
+
+class TestQLogExpectedImprovement(BotorchTestCase):
+    def test_q_log_expected_improvement(self):
+        self.assertIn(qLogExpectedImprovement, ACQF_INPUT_CONSTRUCTOR_REGISTRY.keys())
+        for dtype in (torch.float, torch.double):
+            tkwargs = {"device": self.device, "dtype": dtype}
+            # the event shape is `b x q x t` = 1 x 1 x 1
+            samples = torch.zeros(1, 1, 1, **tkwargs)
+            mm = MockModel(MockPosterior(samples=samples))
+            # X is `q x d` = 1 x 1. X is a dummy and unused b/c of mocking
+            X = torch.zeros(1, 1, **tkwargs)
+
+            # basic test
+            sampler = IIDNormalSampler(sample_shape=torch.Size([2]))
+            acqf = qExpectedImprovement(model=mm, best_f=0, sampler=sampler)
+            log_acqf = qLogExpectedImprovement(model=mm, best_f=0, sampler=sampler)
+            self.assertFalse(acqf._fat)  # different default behavior
+            self.assertTrue(log_acqf._fat)
+            # test initialization
+            for k in ["objective", "sampler"]:
+                self.assertIn(k, acqf._modules)
+                self.assertIn(k, log_acqf._modules)
+
+            res = acqf(X).item()
+            self.assertEqual(res, 0.0)
+            exp_log_res = log_acqf(X).exp().item()
+            # Due to the smooth approximation, the value at zero should be close to, but
+            # not exactly zero, and upper-bounded by the tau hyperparameter.
+            self.assertTrue(0 < exp_log_res)
+            self.assertTrue(exp_log_res <= log_acqf.tau_relu)
+
+            # test shifting best_f value downward to see non-zero improvement
+            best_f = -1
+            acqf = qExpectedImprovement(model=mm, best_f=best_f, sampler=sampler)
+            log_acqf = qLogExpectedImprovement(model=mm, best_f=best_f, sampler=sampler)
+            res, exp_log_res = acqf(X), log_acqf(X).exp()
+            expected_val = -best_f
+
+            self.assertEqual(res.dtype, dtype)
+            self.assertEqual(res.device.type, self.device.type)
+            self.assertEqual(res.item(), expected_val)
+            # Further away from zero, the value is numerically indistinguishable with
+            # single precision arithmetic.
+            self.assertTrue(expected_val <= exp_log_res.item())
+            self.assertTrue(exp_log_res.item() <= expected_val + log_acqf.tau_relu)
+
+            # test shifting best_f value upward to see advantage of LogEI
+            best_f = 1
+            acqf = qExpectedImprovement(model=mm, best_f=best_f, sampler=sampler)
+            log_acqf = qLogExpectedImprovement(model=mm, best_f=best_f, sampler=sampler)
+            res, log_res = acqf(X), log_acqf(X)
+            exp_log_res = log_res.exp()
+            expected_val = 0
+            self.assertEqual(res.item(), expected_val)
+            self.assertTrue(expected_val <= exp_log_res.item())
+            self.assertTrue(exp_log_res.item() <= expected_val + log_acqf.tau_relu)
+            # However, the log value is large and negative with non-vanishing gradients
+            self.assertGreater(-1, log_res.item())
+            self.assertGreater(log_res.item(), -100)
+
+            # NOTE: The following tests are adapted from the qEI tests.
+            # basic test, no resample
+            sampler = IIDNormalSampler(sample_shape=torch.Size([2]), seed=12345)
+            acqf = qLogExpectedImprovement(model=mm, best_f=0, sampler=sampler)
+            res = acqf(X)
+            self.assertTrue(0 < res.exp().item())
+            self.assertTrue(res.exp().item() < acqf.tau_relu)
+            self.assertEqual(acqf.sampler.base_samples.shape, torch.Size([2, 1, 1, 1]))
+            bs = acqf.sampler.base_samples.clone()
+            res = acqf(X)
+            self.assertTrue(torch.equal(acqf.sampler.base_samples, bs))
+
+            # basic test, qmc
+            sampler = SobolQMCNormalSampler(sample_shape=torch.Size([2]))
+            acqf = qLogExpectedImprovement(model=mm, best_f=0, sampler=sampler)
+            res = acqf(X)
+            self.assertTrue(0 < res.exp().item())
+            self.assertTrue(res.exp().item() < acqf.tau_relu)
+            self.assertEqual(acqf.sampler.base_samples.shape, torch.Size([2, 1, 1, 1]))
+            bs = acqf.sampler.base_samples.clone()
+            acqf(X)
+            self.assertTrue(torch.equal(acqf.sampler.base_samples, bs))
+
+            # basic test for X_pending and warning
+            acqf.set_X_pending()
+            self.assertIsNone(acqf.X_pending)
+            acqf.set_X_pending(None)
+            self.assertIsNone(acqf.X_pending)
+            acqf.set_X_pending(X)
+            self.assertEqual(acqf.X_pending, X)
+            mm._posterior._samples = torch.zeros(1, 2, 1, **tkwargs)
+            res = acqf(X)
+            X2 = torch.zeros(1, 1, 1, **tkwargs, requires_grad=True)
+            with warnings.catch_warnings(record=True) as ws, settings.debug(True):
+                acqf.set_X_pending(X2)
+                self.assertEqual(acqf.X_pending, X2)
+                self.assertEqual(
+                    sum(issubclass(w.category, BotorchWarning) for w in ws), 1
+                )
+
+            # testing with illegal taus
+            with self.assertRaisesRegex(ValueError, "tau_max is not a scalar:"):
+                qLogExpectedImprovement(
+                    model=mm, best_f=0, tau_max=torch.tensor([1, 2])
+                )
+            with self.assertRaisesRegex(ValueError, "tau_relu is non-positive:"):
+                qLogExpectedImprovement(model=mm, best_f=0, tau_relu=-2)
+
+    def test_q_log_expected_improvement_batch(self):
+        for dtype in (torch.float, torch.double):
+            # the event shape is `b x q x t` = 2 x 2 x 1
+            samples = torch.zeros(2, 2, 1, device=self.device, dtype=dtype)
+            samples[0, 0, 0] = 1.0
+            mm = MockModel(MockPosterior(samples=samples))
+
+            # X is a dummy and unused b/c of mocking
+            X = torch.zeros(2, 2, 1, device=self.device, dtype=dtype)
+
+            # test batch mode
+            sampler = IIDNormalSampler(sample_shape=torch.Size([2]))
+            acqf = qLogExpectedImprovement(model=mm, best_f=0, sampler=sampler)
+            exp_log_res = acqf(X).exp()
+            # with no approximations (qEI): self.assertEqual(res[0].item(), 1.0)
+            # in the batch case, the values get adjusted toward
+            self.assertEqual(exp_log_res.dtype, dtype)
+            self.assertEqual(exp_log_res.device.type, self.device.type)
+            self.assertTrue(1.0 <= exp_log_res[0].item())
+            self.assertTrue(exp_log_res[0].item() <= 1.0 + acqf.tau_relu)
+            # self.assertAllClose(exp_log_res[0], torch.ones_like(exp_log_res[0]), )
+
+            # with no approximations (qEI): self.assertEqual(res[1].item(), 0.0)
+            self.assertTrue(0 < exp_log_res[1].item())
+            self.assertTrue(exp_log_res[1].item() <= acqf.tau_relu)
+
+            # test batch model, batched best_f values
+            sampler = IIDNormalSampler(sample_shape=torch.Size([3]))
+            acqf = qLogExpectedImprovement(
+                model=mm, best_f=torch.Tensor([0, 0]), sampler=sampler
+            )
+            exp_log_res = acqf(X).exp()
+            # with no approximations (qEI): self.assertEqual(res[0].item(), 1.0)
+            self.assertTrue(1.0 <= exp_log_res[0].item())
+            self.assertTrue(exp_log_res[0].item() <= 1.0 + acqf.tau_relu)
+            # with no approximations (qEI): self.assertEqual(res[1].item(), 0.0)
+            self.assertTrue(0 < exp_log_res[1].item())
+            self.assertTrue(exp_log_res[1].item() <= acqf.tau_relu)
+
+            # test shifting best_f value
+            acqf = qLogExpectedImprovement(model=mm, best_f=-1, sampler=sampler)
+            exp_log_res = acqf(X).exp()
+            # with no approximations (qEI): self.assertEqual(res[0].item(), 2.0)
+            # TODO: figure out numerically stable tests and principled tolerances
+            # With q > 1, maximum value can get moved down due to L_q-norm approximation
+            # of the maximum over the q-batch.
+            safe_upper_lower_bound = 1.999
+            self.assertTrue(safe_upper_lower_bound <= exp_log_res[0].item())
+            self.assertTrue(exp_log_res[0].item() <= 2.0 + acqf.tau_relu + acqf.tau_max)
+            # with no approximations (qEI): self.assertEqual(res[1].item(), 1.0)
+            self.assertTrue(1.0 <= exp_log_res[1].item())
+            # ocurring ~tau_max error when all candidates in a q-batch have the
+            # acquisition value
+            self.assertTrue(exp_log_res[1].item() <= 1.0 + acqf.tau_relu + acqf.tau_max)
+
+            # test batch mode
+            sampler = IIDNormalSampler(sample_shape=torch.Size([2]), seed=12345)
+            acqf = qLogExpectedImprovement(model=mm, best_f=0, sampler=sampler)
+            # res = acqf(X)  # 1-dim batch
+            exp_log_res = acqf(X).exp()  # 1-dim batch
+            # with no approximations (qEI): self.assertEqual(res[0].item(), 1.0)
+            safe_upper_lower_bound = 0.999
+            self.assertTrue(safe_upper_lower_bound <= exp_log_res[0].item())
+            self.assertTrue(exp_log_res[0].item() <= 1.0 + acqf.tau_relu)
+            # with no approximations (qEI): self.assertEqual(res[1].item(), 0.0)
+            self.assertTrue(0.0 <= exp_log_res[1].item())
+            self.assertTrue(exp_log_res[1].item() <= 0.0 + acqf.tau_relu)
+            self.assertEqual(acqf.sampler.base_samples.shape, torch.Size([2, 1, 2, 1]))
+            bs = acqf.sampler.base_samples.clone()
+            acqf(X)
+            self.assertTrue(torch.equal(acqf.sampler.base_samples, bs))
+            exp_log_res = acqf(X.expand(2, 2, 1)).exp()  # 2-dim batch
+            # self.assertEqual(res[0].item(), 1.0)
+            safe_upper_lower_bound = 0.999
+            self.assertTrue(safe_upper_lower_bound <= exp_log_res[0].item())
+            self.assertTrue(exp_log_res[0].item() <= 1.0 + acqf.tau_relu)
+            # self.assertEqual(res[1].item(), 0.0)
+            self.assertTrue(0.0 <= exp_log_res[1].item())
+            self.assertTrue(exp_log_res[1].item() <= 0.0 + acqf.tau_relu)
+            # the base samples should have the batch dim collapsed
+            self.assertEqual(acqf.sampler.base_samples.shape, torch.Size([2, 1, 2, 1]))
+            bs = acqf.sampler.base_samples.clone()
+            acqf(X.expand(2, 2, 1))
+            self.assertTrue(torch.equal(acqf.sampler.base_samples, bs))
+
+            # test batch mode, qmc
+            sampler = SobolQMCNormalSampler(sample_shape=torch.Size([2]))
+            acqf = qLogExpectedImprovement(model=mm, best_f=0, sampler=sampler)
+            exp_log_res = acqf(X).exp()
+            # self.assertEqual(res[0].item(), 1.0)
+            safe_upper_lower_bound = 0.999
+            self.assertTrue(safe_upper_lower_bound <= exp_log_res[0].item())
+            self.assertTrue(exp_log_res[0].item() <= 1.0 + acqf.tau_relu)
+            # self.assertEqual(res[1].item(), 0.0)
+            self.assertTrue(0.0 <= exp_log_res[1].item())
+            self.assertTrue(exp_log_res[1].item() <= 0.0 + acqf.tau_relu)
+            self.assertEqual(acqf.sampler.base_samples.shape, torch.Size([2, 1, 2, 1]))
+            bs = acqf.sampler.base_samples.clone()
+            acqf(X)
+            self.assertTrue(torch.equal(acqf.sampler.base_samples, bs))
+
+    # # TODO: Test different objectives (incl. constraints)
diff --git a/test/utils/test_objective.py b/test/utils/test_objective.py
index c7000b712f..7dabe28260 100644
--- a/test/utils/test_objective.py
+++ b/test/utils/test_objective.py
@@ -10,6 +10,7 @@
 from botorch.utils.objective import (
     compute_feasibility_indicator,
     compute_smoothed_feasibility_indicator,
+    soft_eval_constraint,
 )
 from botorch.utils.testing import BotorchTestCase
 from torch import Tensor
@@ -65,7 +66,7 @@ def test_apply_constraints(self):
         # nonnegative objective, one constraint, eta = 0
         samples = torch.randn(1)
         obj = ones_f(samples)
-        with self.assertRaises(ValueError):
+        with self.assertRaisesRegex(ValueError, "eta must be positive."):
             apply_constraints(
                 obj=obj,
                 constraints=[zeros_f],
@@ -74,6 +75,15 @@ def test_apply_constraints(self):
                 eta=0.0,
             )
 
+        # soft_eval_constraint is not in the path of apply_constraints, adding this test
+        # for coverage.
+        with self.assertRaisesRegex(ValueError, "eta must be positive."):
+            soft_eval_constraint(lhs=obj, eta=0.0)
+        ind = soft_eval_constraint(lhs=ones_f(samples), eta=1e-6)
+        self.assertAllClose(ind, torch.zeros_like(ind))
+        ind = soft_eval_constraint(lhs=-ones_f(samples), eta=1e-6)
+        self.assertAllClose(ind, torch.ones_like(ind))
+
     def test_apply_constraints_multi_output(self):
         # nonnegative objective, one constraint
         tkwargs = {"device": self.device}
diff --git a/test/utils/test_safe_math.py b/test/utils/test_safe_math.py
index 3167928f71..27d19437fb 100644
--- a/test/utils/test_safe_math.py
+++ b/test/utils/test_safe_math.py
@@ -12,15 +12,40 @@
 from typing import Callable
 
 import torch
+from botorch.exceptions import UnsupportedError
 from botorch.utils import safe_math
 from botorch.utils.constants import get_constants_like
-from botorch.utils.safe_math import logmeanexp
+from botorch.utils.objective import compute_smoothed_feasibility_indicator
+from botorch.utils.safe_math import (
+    cauchy,
+    fatmax,
+    fatmoid,
+    fatplus,
+    log_fatmoid,
+    log_fatplus,
+    log_softplus,
+    logmeanexp,
+    smooth_amax,
+)
 from botorch.utils.testing import BotorchTestCase
 from torch import finfo, Tensor
+from torch.nn.functional import softplus
 
 INF = float("inf")
 
 
+def sum_constraint(samples: Tensor) -> Tensor:
+    """Represents the constraint `samples.sum(dim=-1) > 0`.
+
+    Args:
+        samples: A `b x q x m`-dim Tensor.
+
+    Returns:
+        A `b x q`-dim Tensor representing constraint feasibility.
+    """
+    return -samples.sum(dim=-1)
+
+
 class UnaryOpTestMixin:
     op: Callable[[Tensor], Tensor]
     safe_op: Callable[[Tensor], Tensor]
@@ -233,3 +258,142 @@ def test_log_mean_exp(self):
                 logmeanexp(X.log(), dim=(0, -1), keepdim=True).exp(),
                 X.mean(dim=(0, -1), keepdim=True),
             )
+
+
+class TestSmoothNonLinearities(BotorchTestCase):
+    def test_smooth_non_linearities(self):
+        for dtype in (torch.float, torch.double):
+            tkwargs = {"dtype": dtype, "device": self.device}
+            n = 17
+            X = torch.randn(n, **tkwargs)
+            self.assertAllClose(cauchy(X), 1 / (X.square() + 1))
+
+            # testing softplus and fatplus
+            tau = 1e-2
+            fatplus_X = fatplus(X, tau=tau)
+            self.assertAllClose(fatplus_X, X.clamp(0), atol=tau)
+            self.assertTrue((fatplus_X > 0).all())
+            self.assertAllClose(fatplus_X.log(), log_fatplus(X, tau=tau))
+            self.assertAllClose(
+                softplus(X, beta=1 / tau), log_softplus(X, tau=tau).exp()
+            )
+
+            # testing fatplus differentiability
+            X = torch.randn(n, **tkwargs)
+            X.requires_grad = True
+            log_fatplus(X, tau=tau).sum().backward()
+            self.assertFalse(X.grad.isinf().any())
+            self.assertFalse(X.grad.isnan().any())
+            # always increasing, could also test convexity (mathematically guaranteed)
+            self.assertTrue((X.grad > 0).all())
+
+            X_soft = X.detach().clone()
+            X_soft.requires_grad = True
+            log_softplus(X_soft, tau=tau).sum().backward()
+
+            # for positive values away from zero, log_softplus and log_fatplus are close
+            is_positive = X > 100 * tau  # i.e. 1 for tau = 1e-2
+            self.assertAllClose(X.grad[is_positive], 1 / X[is_positive], atol=tau)
+            self.assertAllClose(X_soft.grad[is_positive], 1 / X[is_positive], atol=tau)
+
+            is_negative = X < -100 * tau  # i.e. -1
+            # the softplus has very large gradients, which can saturate the smooth
+            # approximation to the maximum over the q-batch.
+            asym_val = torch.full_like(X_soft.grad[is_negative], 1 / tau)
+            self.assertAllClose(X_soft.grad[is_negative], asym_val, atol=tau, rtol=tau)
+            # the fatplus on the other hand has smaller, though non-vanishing gradients.
+            self.assertTrue((X_soft.grad[is_negative] > X.grad[is_negative]).all())
+
+            # testing smoothmax and fatmax
+            d = 3
+            X = torch.randn(n, d, **tkwargs)
+            fatmax_X = fatmax(X, dim=-1, tau=tau)
+            true_max = X.amax(dim=-1)
+            self.assertAllClose(fatmax_X, true_max, atol=tau)
+            self.assertAllClose(smooth_amax(X, dim=-1, tau=tau), true_max, atol=tau)
+
+            # special case for d = 1
+            d = 1
+            X = torch.randn(n, d, **tkwargs)
+            fatmax_X = fatmax(X, dim=-1, tau=tau)
+            self.assertAllClose(fatmax_X, X[..., 0])
+
+            # testing fatmax differentiability
+            X = torch.randn(n, **tkwargs)
+            X.requires_grad = True
+            fatmax(X, dim=-1, tau=tau).sum().backward()
+
+            self.assertFalse(X.grad.isinf().any())
+            self.assertFalse(X.grad.isnan().any())
+            self.assertTrue(X.grad.min() > 0)
+
+            # testing fatmoid
+            X = torch.randn(n, **tkwargs)
+            fatmoid_X = fatmoid(X, tau=tau)
+            # output is in [0, 1]
+            self.assertTrue((fatmoid_X > 0).all())
+            self.assertTrue((fatmoid_X < 1).all())
+            # skew symmetry
+            atol = 1e-6 if dtype == torch.float32 else 1e-12
+            self.assertAllClose(1 - fatmoid_X, fatmoid(-X, tau=tau), atol=atol)
+            zero = torch.tensor(0.0, **tkwargs)
+            half = torch.tensor(0.5, **tkwargs)
+            self.assertAllClose(fatmoid(zero), half, atol=atol)
+            self.assertAllClose(fatmoid_X.log(), log_fatmoid(X, tau=tau))
+
+            is_center = X.abs() < 100 * tau
+            self.assertAllClose(
+                fatmoid_X[~is_center], (X[~is_center] > 0).to(fatmoid_X), atol=1e-3
+            )
+
+            # testing differentiability
+            X.requires_grad = True
+            log_fatmoid(X, tau=tau).sum().backward()
+            self.assertFalse(X.grad.isinf().any())
+            self.assertFalse(X.grad.isnan().any())
+            self.assertTrue((X.grad > 0).all())
+
+            # testing constraint indicator
+            constraints = [sum_constraint]
+            b = 3
+            q = 4
+            m = 5
+            samples = torch.randn(b, q, m, **tkwargs)
+            eta = 1e-3
+            fat = True
+            log_feas_vals = compute_smoothed_feasibility_indicator(
+                constraints=constraints,
+                samples=samples,
+                eta=eta,
+                log=True,
+                fat=fat,
+            )
+            self.assertTrue(log_feas_vals.shape == torch.Size([b, q]))
+            expected_feas_vals = sum_constraint(samples) < 0
+            hard_feas_vals = log_feas_vals.exp() > 1 / 2
+            self.assertAllClose(hard_feas_vals, expected_feas_vals)
+
+            # with deterministic inputs:
+            samples = torch.ones(1, 1, m, **tkwargs)  # sum is greater than 0
+            log_feas_vals = compute_smoothed_feasibility_indicator(
+                constraints=constraints,
+                samples=samples,
+                eta=eta,
+                log=True,
+                fat=fat,
+            )
+            self.assertTrue((log_feas_vals.exp() > 1 / 2).item())
+
+            # with deterministic inputs:
+            samples = -torch.ones(1, 1, m, **tkwargs)  # sum is smaller than 0
+            log_feas_vals = compute_smoothed_feasibility_indicator(
+                constraints=constraints,
+                samples=samples,
+                eta=eta,
+                log=True,
+                fat=fat,
+            )
+            self.assertFalse((log_feas_vals.exp() > 1 / 2).item())
+
+        with self.assertRaisesRegex(UnsupportedError, "Only dtypes"):
+            log_softplus(torch.randn(2, dtype=torch.float16))

From dfd2a9b4008d62141c1e66de602181b4376ea7b1 Mon Sep 17 00:00:00 2001
From: Sebastian Ament <sebastianament@meta.com>
Date: Tue, 18 Jul 2023 06:57:49 -0700
Subject: [PATCH 14/18] `utils.sigmoid` with `log` and `fat` options (#1938)

Summary:
Pull Request resolved: https://github.com/pytorch/botorch/pull/1938

This commit introduces `utils.sigmoid` with `log` and `fat` options, thereby enabling easy access and discovery of the `logexpit` and `(log)_fatmoid` functions that exhibit better numerical behavior - when applicable - than the canonical sigmoid.

Reviewed By: Balandat

Differential Revision: D47519695

fbshipit-source-id: 5389026ac9f6d95c4d70d69c61c61e8aa00cb87e
---
 botorch/utils/objective.py   |  8 ++++++++
 botorch/utils/safe_math.py   | 17 +++++++++++++++++
 test/utils/test_safe_math.py | 12 ++++++++++++
 3 files changed, 37 insertions(+)

diff --git a/botorch/utils/objective.py b/botorch/utils/objective.py
index e30adece1d..c7ce67794d 100644
--- a/botorch/utils/objective.py
+++ b/botorch/utils/objective.py
@@ -10,6 +10,8 @@
 
 from __future__ import annotations
 
+import warnings
+
 from typing import Callable, List, Optional, Union
 
 import torch
@@ -166,6 +168,7 @@ def compute_smoothed_feasibility_indicator(
     return is_feasible if log else is_feasible.exp()
 
 
+# TODO: deprecate this function
 def soft_eval_constraint(lhs: Tensor, eta: float = 1e-3) -> Tensor:
     r"""Element-wise evaluation of a constraint in a 'soft' fashion
 
@@ -181,6 +184,11 @@ def soft_eval_constraint(lhs: Tensor, eta: float = 1e-3) -> Tensor:
         For each element `x`, `value(x) -> 0` as `x` becomes positive, and
         `value(x) -> 1` as x becomes negative.
     """
+    warnings.warn(
+        "`soft_eval_constraint` is deprecated. Please consider `torch.utils.sigmoid` "
+        + "with its `fat` and `log` options to compute feasibility indicators.",
+        DeprecationWarning,
+    )
     if eta <= 0:
         raise ValueError("eta must be positive.")
     return torch.sigmoid(-lhs / eta)
diff --git a/botorch/utils/safe_math.py b/botorch/utils/safe_math.py
index a0c79020cc..72b2951773 100644
--- a/botorch/utils/safe_math.py
+++ b/botorch/utils/safe_math.py
@@ -257,3 +257,20 @@ def fatmoid(X: Tensor, tau: Union[float, Tensor] = 1.0) -> Tensor:
 def cauchy(x: Tensor) -> Tensor:
     """Computes a Lorentzian, i.e. an un-normalized Cauchy density function."""
     return 1 / (1 + x.square())
+
+
+def sigmoid(X: Tensor, log: bool = False, fat: bool = False) -> Tensor:
+    """A sigmoid function with an optional fat tail and evaluation in log space for
+    better numerical behavior. Notably, the fat-tailed sigmoid can be used to remedy
+    numerical underflow problems in the value and gradient of the canonical sigmoid.
+
+    Args:
+        X: The Tensor on which to evaluate the sigmoid.
+        log: Toggles the evaluation of the log sigmoid.
+        fat: Toggles the evaluation of the fat-tailed sigmoid.
+
+    Returns:
+        A Tensor of (log-)sigmoid values.
+    """
+    Y = log_fatmoid(X) if fat else logexpit(X)
+    return Y if log else Y.exp()
diff --git a/test/utils/test_safe_math.py b/test/utils/test_safe_math.py
index 27d19437fb..5000d0f27c 100644
--- a/test/utils/test_safe_math.py
+++ b/test/utils/test_safe_math.py
@@ -24,7 +24,9 @@
     log_fatmoid,
     log_fatplus,
     log_softplus,
+    logexpit,
     logmeanexp,
+    sigmoid,
     smooth_amax,
 )
 from botorch.utils.testing import BotorchTestCase
@@ -395,5 +397,15 @@ def test_smooth_non_linearities(self):
             )
             self.assertFalse((log_feas_vals.exp() > 1 / 2).item())
 
+            # testing sigmoid wrapper function
+            X = torch.randn(3, 4, 5, **tkwargs)
+            sigmoid_X = torch.sigmoid(X)
+            self.assertAllClose(sigmoid(X), sigmoid_X)
+            self.assertAllClose(sigmoid(X, log=True), logexpit(X))
+            self.assertAllClose(sigmoid(X, log=True).exp(), sigmoid_X)
+            fatmoid_X = fatmoid(X)
+            self.assertAllClose(sigmoid(X, fat=True), fatmoid_X)
+            self.assertAllClose(sigmoid(X, log=True, fat=True).exp(), fatmoid_X)
+
         with self.assertRaisesRegex(UnsupportedError, "Only dtypes"):
             log_softplus(torch.randn(2, dtype=torch.float16))

From 5850ba6c716c55841cdb7e282aa3b86130b542e9 Mon Sep 17 00:00:00 2001
From: Sebastian Ament <sebastianament@meta.com>
Date: Tue, 18 Jul 2023 12:54:16 -0700
Subject: [PATCH 15/18] qLogNEI (#1937)

Summary:
Pull Request resolved: https://github.com/pytorch/botorch/pull/1937

This commit introduces `qLogNoisyExpectedImprovement` (`qLogNEI`) a cousin of `qLogEI`. Similar to `qLogEI` and in contrast to `q(N)EI`, it generally exhibits strong and smooth gradients, leading to better acquisition function optimization and Bayesian optimization as a result.

Reviewed By: Balandat

Differential Revision: D47439161

fbshipit-source-id: 79c0930659ae344561c315211253680c9e43ffb8
---
 botorch/acquisition/__init__.py             |   2 +
 botorch/acquisition/input_constructors.py   | 152 +++++++-
 botorch/acquisition/logei.py                | 264 ++++++++++++-
 test/acquisition/test_input_constructors.py |  39 ++
 test/acquisition/test_logei.py              | 400 +++++++++++++++++++-
 test/models/test_fully_bayesian.py          |  21 +-
 6 files changed, 871 insertions(+), 7 deletions(-)

diff --git a/botorch/acquisition/__init__.py b/botorch/acquisition/__init__.py
index 5bd208cd81..276cbd3aa0 100644
--- a/botorch/acquisition/__init__.py
+++ b/botorch/acquisition/__init__.py
@@ -37,6 +37,7 @@
 from botorch.acquisition.logei import (
     LogImprovementMCAcquisitionFunction,
     qLogExpectedImprovement,
+    qLogNoisyExpectedImprovement,
 )
 from botorch.acquisition.max_value_entropy_search import (
     MaxValueBase,
@@ -96,6 +97,7 @@
     "qExpectedImprovement",
     "LogImprovementMCAcquisitionFunction",
     "qLogExpectedImprovement",
+    "qLogNoisyExpectedImprovement",
     "qKnowledgeGradient",
     "MaxValueBase",
     "qMultiFidelityKnowledgeGradient",
diff --git a/botorch/acquisition/input_constructors.py b/botorch/acquisition/input_constructors.py
index cff41d46e1..c1b3fa02f2 100644
--- a/botorch/acquisition/input_constructors.py
+++ b/botorch/acquisition/input_constructors.py
@@ -47,7 +47,12 @@
     qKnowledgeGradient,
     qMultiFidelityKnowledgeGradient,
 )
-from botorch.acquisition.logei import qLogExpectedImprovement
+from botorch.acquisition.logei import (
+    qLogExpectedImprovement,
+    qLogNoisyExpectedImprovement,
+    TAU_MAX,
+    TAU_RELU,
+)
 from botorch.acquisition.max_value_entropy_search import (
     qMaxValueEntropy,
     qMultiFidelityMaxValueEntropy,
@@ -450,7 +455,7 @@ def construct_inputs_qSimpleRegret(
     )
 
 
-@acqf_input_constructor(qExpectedImprovement, qLogExpectedImprovement)
+@acqf_input_constructor(qExpectedImprovement)
 def construct_inputs_qEI(
     model: Model,
     training_data: MaybeDict[SupervisedDataset],
@@ -508,6 +513,72 @@ def construct_inputs_qEI(
     return {**base_inputs, "best_f": best_f, "constraints": constraints, "eta": eta}
 
 
+@acqf_input_constructor(qLogExpectedImprovement)
+def construct_inputs_qLogEI(
+    model: Model,
+    training_data: MaybeDict[SupervisedDataset],
+    objective: Optional[MCAcquisitionObjective] = None,
+    posterior_transform: Optional[PosteriorTransform] = None,
+    X_pending: Optional[Tensor] = None,
+    sampler: Optional[MCSampler] = None,
+    best_f: Optional[Union[float, Tensor]] = None,
+    constraints: Optional[List[Callable[[Tensor], Tensor]]] = None,
+    eta: Union[Tensor, float] = 1e-3,
+    fat: bool = True,
+    tau_max: float = TAU_MAX,
+    tau_relu: float = TAU_RELU,
+    **ignored: Any,
+) -> Dict[str, Any]:
+    r"""Construct kwargs for the `qExpectedImprovement` constructor.
+
+    Args:
+        model: The model to be used in the acquisition function.
+        training_data: Dataset(s) used to train the model.
+        objective: The objective to be used in the acquisition function.
+        posterior_transform: The posterior transform to be used in the
+            acquisition function.
+        X_pending: A `m x d`-dim Tensor of `m` design points that have been
+            submitted for function evaluation but have not yet been evaluated.
+            Concatenated into X upon forward call.
+        sampler: The sampler used to draw base samples. If omitted, uses
+            the acquisition functions's default sampler.
+        best_f: Threshold above (or below) which improvement is defined.
+        constraints: A list of constraint callables which map a Tensor of posterior
+            samples of dimension `sample_shape x batch-shape x q x m`-dim to a
+            `sample_shape x batch-shape x q`-dim Tensor. The associated constraints
+            are considered satisfied if the output is less than zero.
+        eta: Temperature parameter(s) governing the smoothness of the sigmoid
+            approximation to the constraint indicators. For more details, on this
+            parameter, see the docs of `compute_smoothed_feasibility_indicator`.
+        fat: Toggles the logarithmic / linear asymptotic behavior of the smooth
+            approximation to the ReLU.
+        tau_max: Temperature parameter controlling the sharpness of the smooth
+            approximations to max.
+        tau_relu: Temperature parameter controlling the sharpness of the smooth
+            approximations to ReLU.
+        ignored: Not used.
+
+    Returns:
+        A dict mapping kwarg names of the constructor to values.
+    """
+    return {
+        **construct_inputs_qEI(
+            model=model,
+            training_data=training_data,
+            objective=objective,
+            posterior_transform=posterior_transform,
+            X_pending=X_pending,
+            sampler=sampler,
+            best_f=best_f,
+            constraints=constraints,
+            eta=eta,
+        ),
+        "fat": fat,
+        "tau_max": tau_max,
+        "tau_relu": tau_relu,
+    }
+
+
 @acqf_input_constructor(qNoisyExpectedImprovement)
 def construct_inputs_qNEI(
     model: Model,
@@ -570,7 +641,6 @@ def construct_inputs_qNEI(
             assert_shared=True,
             first_only=True,
         )
-
     return {
         **base_inputs,
         "X_baseline": X_baseline,
@@ -581,6 +651,82 @@ def construct_inputs_qNEI(
     }
 
 
+@acqf_input_constructor(qLogNoisyExpectedImprovement)
+def construct_inputs_qLogNEI(
+    model: Model,
+    training_data: MaybeDict[SupervisedDataset],
+    objective: Optional[MCAcquisitionObjective] = None,
+    posterior_transform: Optional[PosteriorTransform] = None,
+    X_pending: Optional[Tensor] = None,
+    sampler: Optional[MCSampler] = None,
+    X_baseline: Optional[Tensor] = None,
+    prune_baseline: Optional[bool] = True,
+    cache_root: Optional[bool] = True,
+    constraints: Optional[List[Callable[[Tensor], Tensor]]] = None,
+    eta: Union[Tensor, float] = 1e-3,
+    fat: bool = True,
+    tau_max: float = TAU_MAX,
+    tau_relu: float = TAU_RELU,
+    **ignored: Any,
+):
+    r"""Construct kwargs for the `qNoisyExpectedImprovement` constructor.
+
+    Args:
+        model: The model to be used in the acquisition function.
+        training_data: Dataset(s) used to train the model.
+        objective: The objective to be used in the acquisition function.
+        posterior_transform: The posterior transform to be used in the
+            acquisition function.
+        X_pending: A `m x d`-dim Tensor of `m` design points that have been
+            submitted for function evaluation but have not yet been evaluated.
+            Concatenated into X upon forward call.
+        sampler: The sampler used to draw base samples. If omitted, uses
+            the acquisition functions's default sampler.
+        X_baseline: A `batch_shape x r x d`-dim Tensor of `r` design points
+            that have already been observed. These points are considered as
+            the potential best design point. If omitted, checks that all
+            training_data have the same input features and take the first `X`.
+        prune_baseline: If True, remove points in `X_baseline` that are
+            highly unlikely to be the best point. This can significantly
+            improve performance and is generally recommended.
+        constraints: A list of constraint callables which map a Tensor of posterior
+            samples of dimension `sample_shape x batch-shape x q x m`-dim to a
+            `sample_shape x batch-shape x q`-dim Tensor. The associated constraints
+            are considered satisfied if the output is less than zero.
+        eta: Temperature parameter(s) governing the smoothness of the sigmoid
+            approximation to the constraint indicators. For more details, on this
+            parameter, see the docs of `compute_smoothed_feasibility_indicator`.
+        fat: Toggles the logarithmic / linear asymptotic behavior of the smooth
+            approximation to the ReLU.
+        tau_max: Temperature parameter controlling the sharpness of the smooth
+            approximations to max.
+        tau_relu: Temperature parameter controlling the sharpness of the smooth
+            approximations to ReLU.
+        ignored: Not used.
+
+    Returns:
+        A dict mapping kwarg names of the constructor to values.
+    """
+    return {
+        **construct_inputs_qNEI(
+            model=model,
+            training_data=training_data,
+            objective=objective,
+            posterior_transform=posterior_transform,
+            X_pending=X_pending,
+            sampler=sampler,
+            X_baseline=X_baseline,
+            prune_baseline=prune_baseline,
+            cache_root=cache_root,
+            constraint=constraints,
+            eta=eta,
+        ),
+        "fat": fat,
+        "tau_max": tau_max,
+        "tau_relu": tau_relu,
+    }
+
+
 @acqf_input_constructor(qProbabilityOfImprovement)
 def construct_inputs_qPI(
     model: Model,
diff --git a/botorch/acquisition/logei.py b/botorch/acquisition/logei.py
index 95affe1c28..e12228c1a5 100644
--- a/botorch/acquisition/logei.py
+++ b/botorch/acquisition/logei.py
@@ -7,20 +7,26 @@
 Batch implementations of the LogEI family of improvements-based acquisition functions.
 """
 
-
 from __future__ import annotations
 
+from copy import deepcopy
+
 from functools import partial
 
-from typing import Callable, List, Optional, TypeVar, Union
+from typing import Any, Callable, List, Optional, Tuple, TypeVar, Union
 
 import torch
+from botorch.acquisition.cached_cholesky import CachedCholeskyMCAcquisitionFunction
 from botorch.acquisition.monte_carlo import SampleReducingMCAcquisitionFunction
 from botorch.acquisition.objective import (
     ConstrainedMCObjective,
     MCAcquisitionObjective,
     PosteriorTransform,
 )
+from botorch.acquisition.utils import (
+    compute_best_feasible_objective,
+    prune_inferior_points,
+)
 from botorch.exceptions.errors import BotorchError
 from botorch.models.model import Model
 from botorch.sampling.base import MCSampler
@@ -31,6 +37,7 @@
     logmeanexp,
     smooth_amax,
 )
+from botorch.utils.transforms import match_batch_shape
 from torch import Tensor
 
 """
@@ -219,6 +226,259 @@ def _sample_forward(self, obj: Tensor) -> Tensor:
         return li
 
 
+class qLogNoisyExpectedImprovement(
+    LogImprovementMCAcquisitionFunction, CachedCholeskyMCAcquisitionFunction
+):
+    r"""MC-based batch Log Noisy Expected Improvement.
+
+    This function does not assume a `best_f` is known (which would require
+    noiseless observations). Instead, it uses samples from the joint posterior
+    over the `q` test points and previously observed points. A smooth approximation
+    to the canonical improvement over previously observed points is computed
+    for each sample and the logarithm of the average is returned.
+
+    `qLogNEI(X) ~ log(qNEI(X)) = Log E(max(max Y - max Y_baseline, 0))`, where
+    `(Y, Y_baseline) ~ f((X, X_baseline)), X = (x_1,...,x_q)`
+
+    Example:
+        >>> model = SingleTaskGP(train_X, train_Y)
+        >>> sampler = SobolQMCNormalSampler(1024)
+        >>> qLogNEI = qLogNoisyExpectedImprovement(model, train_X, sampler)
+        >>> acqval = qLogNEI(test_X)
+    """
+
+    def __init__(
+        self,
+        model: Model,
+        X_baseline: Tensor,
+        sampler: Optional[MCSampler] = None,
+        objective: Optional[MCAcquisitionObjective] = None,
+        posterior_transform: Optional[PosteriorTransform] = None,
+        X_pending: Optional[Tensor] = None,
+        constraints: Optional[List[Callable[[Tensor], Tensor]]] = None,
+        eta: Union[Tensor, float] = 1e-3,
+        fat: bool = True,
+        prune_baseline: bool = False,
+        cache_root: bool = True,
+        tau_max: float = TAU_MAX,
+        tau_relu: float = TAU_RELU,
+        **kwargs: Any,
+    ) -> None:
+        r"""q-Noisy Expected Improvement.
+
+        Args:
+            model: A fitted model.
+            X_baseline: A `batch_shape x r x d`-dim Tensor of `r` design points
+                that have already been observed. These points are considered as
+                the potential best design point.
+            sampler: The sampler used to draw base samples. See `MCAcquisitionFunction`
+                more details.
+            objective: The MCAcquisitionObjective under which the samples are
+                evaluated. Defaults to `IdentityMCObjective()`.
+            posterior_transform: A PosteriorTransform (optional).
+            X_pending: A `batch_shape x m x d`-dim Tensor of `m` design points
+                that have points that have been submitted for function evaluation
+                but have not yet been evaluated. Concatenated into `X` upon
+                forward call. Copied and set to have no gradient.
+            constraints: A list of constraint callables which map a Tensor of posterior
+                samples of dimension `sample_shape x batch-shape x q x m`-dim to a
+                `sample_shape x batch-shape x q`-dim Tensor. The associated constraints
+                are satisfied if `constraint(samples) < 0`.
+            eta: Temperature parameter(s) governing the smoothness of the sigmoid
+                approximation to the constraint indicators. See the docs of
+                `compute_(log_)smoothed_constraint_indicator` for details.
+            fat: Toggles the logarithmic / linear asymptotic behavior of the smooth
+                approximation to the ReLU.
+            prune_baseline: If True, remove points in `X_baseline` that are
+                highly unlikely to be the best point. This can significantly
+                improve performance and is generally recommended. In order to
+                customize pruning parameters, instead manually call
+                `botorch.acquisition.utils.prune_inferior_points` on `X_baseline`
+                before instantiating the acquisition function.
+            cache_root: A boolean indicating whether to cache the root
+                decomposition over `X_baseline` and use low-rank updates.
+            tau_max: Temperature parameter controlling the sharpness of the smooth
+                approximations to max.
+            tau_relu: Temperature parameter controlling the sharpness of the smooth
+                approximations to ReLU.
+            kwargs: Here for qNEI for compatibility.
+
+        TODO: similar to qNEHVI, when we are using sequential greedy candidate
+        selection, we could incorporate pending points X_baseline and compute
+        the incremental q(Log)NEI from the new point. This would greatly increase
+        efficiency for large batches.
+        """
+        # TODO: separate out baseline variables initialization and other functions
+        # in qNEI to avoid duplication of both code and work at runtime.
+        super().__init__(
+            model=model,
+            sampler=sampler,
+            objective=objective,
+            posterior_transform=posterior_transform,
+            X_pending=X_pending,
+            constraints=constraints,
+            eta=eta,
+            fat=fat,
+            tau_max=tau_max,
+        )
+        self.tau_relu = tau_relu
+        self._init_baseline(
+            model=model,
+            X_baseline=X_baseline,
+            sampler=sampler,
+            objective=objective,
+            posterior_transform=posterior_transform,
+            prune_baseline=prune_baseline,
+            cache_root=cache_root,
+            **kwargs,
+        )
+
+    def _sample_forward(self, obj: Tensor) -> Tensor:
+        r"""Evaluate qLogNoisyExpectedImprovement per sample on the candidate set `X`.
+
+        Args:
+            obj: `mc_shape x batch_shape x q`-dim Tensor of MC objective values.
+
+        Returns:
+            A `sample_shape x batch_shape x q`-dim Tensor of log noisy expected smoothed
+            improvement values.
+        """
+        return _log_improvement(
+            Y=obj,
+            best_f=self.compute_best_f(obj),
+            tau=self.tau_relu,
+            fat=self._fat,
+        )
+
+    def _init_baseline(
+        self,
+        model: Model,
+        X_baseline: Tensor,
+        sampler: Optional[MCSampler] = None,
+        objective: Optional[MCAcquisitionObjective] = None,
+        posterior_transform: Optional[PosteriorTransform] = None,
+        prune_baseline: bool = False,
+        cache_root: bool = True,
+        **kwargs: Any,
+    ) -> None:
+        # setup of CachedCholeskyMCAcquisitionFunction
+        self._setup(model=model, cache_root=cache_root)
+        if prune_baseline:
+            X_baseline = prune_inferior_points(
+                model=model,
+                X=X_baseline,
+                objective=objective,
+                posterior_transform=posterior_transform,
+                marginalize_dim=kwargs.get("marginalize_dim"),
+            )
+        self.register_buffer("X_baseline", X_baseline)
+        # registering buffers for _get_samples_and_objectives in the next `if` block
+        self.register_buffer("baseline_samples", None)
+        self.register_buffer("baseline_obj", None)
+        if self._cache_root:
+            self.q_in = -1
+            # set baseline samples
+            with torch.no_grad():  # this is _get_samples_and_objectives(X_baseline)
+                posterior = self.model.posterior(
+                    X_baseline, posterior_transform=self.posterior_transform
+                )
+                # Note: The root decomposition is cached in two different places. It
+                # may be confusing to have two different caches, but this is not
+                # trivial to change since each is needed for a different reason:
+                # - LinearOperator caching to `posterior.mvn` allows for reuse within
+                #   this function, which may be helpful if the same root decomposition
+                #   is produced by the calls to `self.base_sampler` and
+                #   `self._cache_root_decomposition`.
+                # - self._baseline_L allows a root decomposition to be persisted outside
+                #   this method.
+                self.baseline_samples = self.get_posterior_samples(posterior)
+                self.baseline_obj = self.objective(self.baseline_samples, X=X_baseline)
+
+            # We make a copy here because we will write an attribute `base_samples`
+            # to `self.base_sampler.base_samples`, and we don't want to mutate
+            # `self.sampler`.
+            self.base_sampler = deepcopy(self.sampler)
+            self.register_buffer(
+                "_baseline_best_f",
+                self._compute_best_feasible_objective(
+                    samples=self.baseline_samples, obj=self.baseline_obj
+                ),
+            )
+            self._baseline_L = self._compute_root_decomposition(posterior=posterior)
+
+    def compute_best_f(self, obj: Tensor) -> Tensor:
+        """Computes the best (feasible) noisy objective value.
+
+        Args:
+            obj: `sample_shape x batch_shape x q`-dim Tensor of objectives in forward.
+
+        Returns:
+            A `sample_shape x batch_shape x 1`-dim Tensor of best feasible objectives.
+        """
+        if self._cache_root:
+            val = self._baseline_best_f
+        else:
+            val = self._compute_best_feasible_objective(
+                samples=self.baseline_samples, obj=self.baseline_obj
+            )
+        # ensuring shape, dtype, device compatibility with obj
+        n_sample_dims = len(self.sample_shape)
+        view_shape = torch.Size(
+            [
+                *val.shape[:n_sample_dims],  # sample dimensions
+                *(1,) * (obj.ndim - val.ndim),  # pad to match obj
+                *val.shape[n_sample_dims:],  # the rest
+            ]
+        )
+        return val.view(view_shape).to(obj)
+
+    def _get_samples_and_objectives(self, X: Tensor) -> Tuple[Tensor, Tensor]:
+        r"""Compute samples at new points, using the cached root decomposition.
+
+        Args:
+            X: A `batch_shape x q x d`-dim tensor of inputs.
+
+        Returns:
+            A two-tuple `(samples, obj)`, where `samples` is a tensor of posterior
+            samples with shape `sample_shape x batch_shape x q x m`, and `obj` is a
+            tensor of MC objective values with shape `sample_shape x batch_shape x q`.
+        """
+        n_baseline, q = self.X_baseline.shape[-2], X.shape[-2]
+        X_full = torch.cat([match_batch_shape(self.X_baseline, X), X], dim=-2)
+        # TODO: Implement more efficient way to compute posterior over both training and
+        # test points in GPyTorch (https://github.com/cornellius-gp/gpytorch/issues/567)
+        posterior = self.model.posterior(
+            X_full, posterior_transform=self.posterior_transform
+        )
+        if not self._cache_root:
+            samples_full = super().get_posterior_samples(posterior)
+            obj_full = self.objective(samples_full, X=X_full)
+            # assigning baseline buffers so `best_f` can be computed in _sample_forward
+            self.baseline_samples, samples = samples_full.split([n_baseline, q], dim=-2)
+            self.baseline_obj, obj = obj_full.split([n_baseline, q], dim=-1)
+            return samples, obj
+
+        # handle one-to-many input transforms
+        n_plus_q = X_full.shape[-2]
+        n_w = posterior._extended_shape()[-2] // n_plus_q
+        q_in = q * n_w
+        self._set_sampler(q_in=q_in, posterior=posterior)
+        samples = self._get_f_X_samples(posterior=posterior, q_in=q_in)
+        obj = self.objective(samples, X=X_full[..., -q:, :])
+        return samples, obj
+
+    def _compute_best_feasible_objective(self, samples: Tensor, obj: Tensor) -> Tensor:
+        return compute_best_feasible_objective(
+            samples=samples,
+            obj=obj,
+            constraints=self._constraints,
+            model=self.model,
+            objective=self.objective,
+            posterior_transform=self.posterior_transform,
+            X_baseline=self.X_baseline,
+        )
+
+
 """
 ###################################### utils ##########################################
 """
diff --git a/test/acquisition/test_input_constructors.py b/test/acquisition/test_input_constructors.py
index e2a59d34ad..ea8f95b81e 100644
--- a/test/acquisition/test_input_constructors.py
+++ b/test/acquisition/test_input_constructors.py
@@ -31,6 +31,12 @@
     qKnowledgeGradient,
     qMultiFidelityKnowledgeGradient,
 )
+from botorch.acquisition.logei import (
+    qLogExpectedImprovement,
+    qLogNoisyExpectedImprovement,
+    TAU_MAX,
+    TAU_RELU,
+)
 from botorch.acquisition.max_value_entropy_search import (
     qMaxValueEntropy,
     qMultiFidelityMaxValueEntropy,
@@ -382,6 +388,23 @@ def test_construct_inputs_qEI(self):
         )
         self.assertEqual(kwargs["best_f"], best_f_expected)
 
+        # testing qLogEI input constructor
+        log_constructor = get_acqf_input_constructor(qLogExpectedImprovement)
+        log_kwargs = log_constructor(
+            model=mock_model,
+            training_data=self.blockX_blockY,
+            objective=objective,
+            X_pending=X_pending,
+            best_f=best_f_expected,
+        )
+        # includes strict superset of kwargs tested above
+        self.assertTrue(kwargs.items() <= log_kwargs.items())
+        self.assertTrue("fat" in log_kwargs)
+        self.assertTrue("tau_max" in log_kwargs)
+        self.assertEqual(log_kwargs["tau_max"], TAU_MAX)
+        self.assertTrue("tau_relu" in log_kwargs)
+        self.assertEqual(log_kwargs["tau_relu"], TAU_RELU)
+
     def test_construct_inputs_qNEI(self):
         c = get_acqf_input_constructor(qNoisyExpectedImprovement)
         mock_model = mock.Mock()
@@ -415,6 +438,22 @@ def test_construct_inputs_qNEI(self):
         self.assertIsInstance(kwargs["eta"], float)
         self.assertTrue(kwargs["eta"] < 1)
 
+        # testing qLogNEI input constructor
+        log_constructor = get_acqf_input_constructor(qLogNoisyExpectedImprovement)
+        log_kwargs = log_constructor(
+            model=mock_model,
+            training_data=self.blockX_blockY,
+            X_baseline=X_baseline,
+            prune_baseline=False,
+        )
+        # includes strict superset of kwargs tested above
+        self.assertTrue(kwargs.items() <= log_kwargs.items())
+        self.assertTrue("fat" in log_kwargs)
+        self.assertTrue("tau_max" in log_kwargs)
+        self.assertEqual(log_kwargs["tau_max"], TAU_MAX)
+        self.assertTrue("tau_relu" in log_kwargs)
+        self.assertEqual(log_kwargs["tau_relu"], TAU_RELU)
+
     def test_construct_inputs_qPI(self):
         c = get_acqf_input_constructor(qProbabilityOfImprovement)
         mock_model = mock.Mock()
diff --git a/test/acquisition/test_logei.py b/test/acquisition/test_logei.py
index 6d84ad671e..83b66b8bf0 100644
--- a/test/acquisition/test_logei.py
+++ b/test/acquisition/test_logei.py
@@ -5,6 +5,9 @@
 # LICENSE file in the root directory of this source tree.
 
 import warnings
+from copy import deepcopy
+from itertools import product
+from math import pi
 from unittest import mock
 
 import torch
@@ -12,18 +15,29 @@
 from botorch.acquisition import (
     LogImprovementMCAcquisitionFunction,
     qLogExpectedImprovement,
+    qLogNoisyExpectedImprovement,
 )
 from botorch.acquisition.input_constructors import ACQF_INPUT_CONSTRUCTOR_REGISTRY
-from botorch.acquisition.monte_carlo import qExpectedImprovement
+from botorch.acquisition.monte_carlo import (
+    qExpectedImprovement,
+    qNoisyExpectedImprovement,
+)
+
 from botorch.acquisition.objective import (
     ConstrainedMCObjective,
+    GenericMCObjective,
     IdentityMCObjective,
     PosteriorTransform,
+    ScalarizedPosteriorTransform,
 )
 from botorch.exceptions import BotorchWarning, UnsupportedError
 from botorch.exceptions.errors import BotorchError
+from botorch.models import SingleTaskGP
 from botorch.sampling.normal import IIDNormalSampler, SobolQMCNormalSampler
+from botorch.utils.low_rank import sample_cached_cholesky
 from botorch.utils.testing import BotorchTestCase, MockModel, MockPosterior
+
+from botorch.utils.transforms import standardize
 from torch import Tensor
 
 
@@ -127,6 +141,8 @@ def test_q_log_expected_improvement(self):
             self.assertEqual(res.item(), expected_val)
             # Further away from zero, the value is numerically indistinguishable with
             # single precision arithmetic.
+            self.assertEqual(exp_log_res.dtype, dtype)
+            self.assertEqual(exp_log_res.device.type, self.device.type)
             self.assertTrue(expected_val <= exp_log_res.item())
             self.assertTrue(exp_log_res.item() <= expected_val + log_acqf.tau_relu)
 
@@ -294,3 +310,385 @@ def test_q_log_expected_improvement_batch(self):
             self.assertTrue(torch.equal(acqf.sampler.base_samples, bs))
 
     # # TODO: Test different objectives (incl. constraints)
+
+
+class TestQLogNoisyExpectedImprovement(BotorchTestCase):
+    def test_q_log_noisy_expected_improvement(self):
+        self.assertIn(
+            qLogNoisyExpectedImprovement, ACQF_INPUT_CONSTRUCTOR_REGISTRY.keys()
+        )
+        for dtype in (torch.float, torch.double):
+            # the event shape is `b x q x t` = 1 x 2 x 1
+            samples_noisy = torch.tensor([0.0, 1.0], device=self.device, dtype=dtype)
+            samples_noisy = samples_noisy.view(1, 2, 1)
+            # X_baseline is `q' x d` = 1 x 1
+            X_baseline = torch.zeros(1, 1, device=self.device, dtype=dtype)
+            mm_noisy = MockModel(MockPosterior(samples=samples_noisy))
+            # X is `q x d` = 1 x 1
+            X = torch.zeros(1, 1, device=self.device, dtype=dtype)
+
+            # basic test
+            sampler = IIDNormalSampler(sample_shape=torch.Size([2]))
+            kwargs = {
+                "model": mm_noisy,
+                "X_baseline": X_baseline,
+                "sampler": sampler,
+                "prune_baseline": False,
+                "cache_root": False,
+            }
+            acqf = qNoisyExpectedImprovement(**kwargs)
+            log_acqf = qLogNoisyExpectedImprovement(**kwargs)
+
+            res = acqf(X)
+            self.assertEqual(res.item(), 1.0)
+            log_res = log_acqf(X)
+            self.assertEqual(log_res.dtype, dtype)
+            self.assertEqual(log_res.device.type, self.device.type)
+            self.assertAllClose(log_res.exp().item(), 1.0)
+
+            # basic test
+            sampler = IIDNormalSampler(sample_shape=torch.Size([2]), seed=12345)
+            kwargs = {
+                "model": mm_noisy,
+                "X_baseline": X_baseline,
+                "sampler": sampler,
+                "prune_baseline": False,
+                "cache_root": False,
+            }
+            log_acqf = qLogNoisyExpectedImprovement(**kwargs)
+            log_res = log_acqf(X)
+            self.assertEqual(log_res.exp().item(), 1.0)
+            self.assertEqual(
+                log_acqf.sampler.base_samples.shape, torch.Size([2, 1, 2, 1])
+            )
+            bs = log_acqf.sampler.base_samples.clone()
+            log_acqf(X)
+            self.assertTrue(torch.equal(log_acqf.sampler.base_samples, bs))
+
+            # basic test, qmc
+            sampler = SobolQMCNormalSampler(sample_shape=torch.Size([2]))
+            kwargs = {
+                "model": mm_noisy,
+                "X_baseline": X_baseline,
+                "sampler": sampler,
+                "prune_baseline": False,
+                "cache_root": False,
+            }
+            log_acqf = qLogNoisyExpectedImprovement(**kwargs)
+            log_res = log_acqf(X)
+            self.assertEqual(log_res.exp().item(), 1.0)
+            self.assertEqual(
+                log_acqf.sampler.base_samples.shape, torch.Size([2, 1, 2, 1])
+            )
+            bs = log_acqf.sampler.base_samples.clone()
+            log_acqf(X)
+            self.assertTrue(torch.equal(log_acqf.sampler.base_samples, bs))
+
+            # basic test for X_pending and warning
+            sampler = SobolQMCNormalSampler(sample_shape=torch.Size([2]))
+            samples_noisy_pending = torch.tensor(
+                [1.0, 0.0, 0.0], device=self.device, dtype=dtype
+            )
+            samples_noisy_pending = samples_noisy_pending.view(1, 3, 1)
+            mm_noisy_pending = MockModel(MockPosterior(samples=samples_noisy_pending))
+            kwargs = {
+                "model": mm_noisy_pending,
+                "X_baseline": X_baseline,
+                "sampler": sampler,
+                "prune_baseline": False,
+                "cache_root": False,
+            }
+            # copy for log version
+            log_acqf = qLogNoisyExpectedImprovement(**kwargs)
+            log_acqf.set_X_pending()
+            self.assertIsNone(log_acqf.X_pending)
+            log_acqf.set_X_pending(None)
+            self.assertIsNone(log_acqf.X_pending)
+            log_acqf.set_X_pending(X)
+            self.assertEqual(log_acqf.X_pending, X)
+            log_acqf(X)
+            X2 = torch.zeros(
+                1, 1, 1, device=self.device, dtype=dtype, requires_grad=True
+            )
+            with warnings.catch_warnings(record=True) as ws, settings.debug(True):
+                log_acqf.set_X_pending(X2)
+                self.assertEqual(log_acqf.X_pending, X2)
+                self.assertEqual(
+                    sum(issubclass(w.category, BotorchWarning) for w in ws), 1
+                )
+
+    def test_q_noisy_expected_improvement_batch(self):
+        for dtype in (torch.float, torch.double):
+            # the event shape is `b x q x t` = 2 x 3 x 1
+            samples_noisy = torch.zeros(2, 3, 1, device=self.device, dtype=dtype)
+            samples_noisy[0, -1, 0] = 1.0
+            mm_noisy = MockModel(MockPosterior(samples=samples_noisy))
+            # X is `q x d` = 1 x 1
+            X = torch.zeros(2, 2, 1, device=self.device, dtype=dtype)
+            X_baseline = torch.zeros(1, 1, device=self.device, dtype=dtype)
+
+            # test batch mode
+            sampler = IIDNormalSampler(sample_shape=torch.Size([2]))
+            kwargs = {
+                "model": mm_noisy,
+                "X_baseline": X_baseline,
+                "sampler": sampler,
+                "prune_baseline": False,
+                "cache_root": False,
+            }
+            acqf = qLogNoisyExpectedImprovement(**kwargs)
+            res = acqf(X).exp()
+            expected_res = torch.tensor([1.0, 0.0], dtype=dtype, device=self.device)
+            self.assertAllClose(res, expected_res, atol=acqf.tau_relu)
+            self.assertGreater(res[1].item(), 0.0)
+            self.assertGreater(acqf.tau_relu, res[1].item())
+
+            # test batch mode
+            sampler = IIDNormalSampler(sample_shape=torch.Size([2]), seed=12345)
+            acqf = qLogNoisyExpectedImprovement(
+                model=mm_noisy,
+                X_baseline=X_baseline,
+                sampler=sampler,
+                prune_baseline=False,
+                cache_root=False,
+            )
+            res = acqf(X).exp()  # 1-dim batch
+            expected_res = torch.tensor([1.0, 0.0], dtype=dtype, device=self.device)
+            self.assertAllClose(res, expected_res, atol=acqf.tau_relu)
+            self.assertGreater(res[1].item(), 0.0)
+            self.assertGreater(acqf.tau_relu, res[1].item())
+            self.assertEqual(acqf.sampler.base_samples.shape, torch.Size([2, 1, 3, 1]))
+            bs = acqf.sampler.base_samples.clone()
+            acqf(X)
+            self.assertTrue(torch.equal(acqf.sampler.base_samples, bs))
+            res = acqf(X.expand(2, 2, 1)).exp()  # 2-dim batch
+            expected_res = torch.tensor([1.0, 0.0], dtype=dtype, device=self.device)
+            self.assertAllClose(res, expected_res, atol=acqf.tau_relu)
+            self.assertGreater(res[1].item(), 0.0)
+            self.assertGreater(acqf.tau_relu, res[1].item())
+            # the base samples should have the batch dim collapsed
+            self.assertEqual(acqf.sampler.base_samples.shape, torch.Size([2, 1, 3, 1]))
+            bs = acqf.sampler.base_samples.clone()
+            acqf(X.expand(2, 2, 1))
+            self.assertTrue(torch.equal(acqf.sampler.base_samples, bs))
+
+            # test batch mode, qmc
+            sampler = SobolQMCNormalSampler(sample_shape=torch.Size([2]))
+            acqf = qLogNoisyExpectedImprovement(
+                model=mm_noisy,
+                X_baseline=X_baseline,
+                sampler=sampler,
+                prune_baseline=False,
+                cache_root=False,
+            )
+            res = acqf(X).exp()
+            expected_res = torch.tensor([1.0, 0.0], dtype=dtype, device=self.device)
+            self.assertAllClose(res, expected_res, atol=acqf.tau_relu)
+            self.assertGreater(res[1].item(), 0.0)
+            self.assertGreater(acqf.tau_relu, res[1].item())
+            self.assertEqual(acqf.sampler.base_samples.shape, torch.Size([2, 1, 3, 1]))
+            bs = acqf.sampler.base_samples.clone()
+            acqf(X)
+            self.assertTrue(torch.equal(acqf.sampler.base_samples, bs))
+
+    def test_prune_baseline(self):
+        no = "botorch.utils.testing.MockModel.num_outputs"
+        prune = "botorch.acquisition.logei.prune_inferior_points"
+        for dtype in (torch.float, torch.double):
+            X_baseline = torch.zeros(1, 1, device=self.device, dtype=dtype)
+            X_pruned = torch.rand(1, 1, device=self.device, dtype=dtype)
+            with mock.patch(no, new_callable=mock.PropertyMock) as mock_num_outputs:
+                mock_num_outputs.return_value = 1
+                mm = MockModel(MockPosterior(samples=X_baseline))
+                with mock.patch(prune, return_value=X_pruned) as mock_prune:
+                    acqf = qLogNoisyExpectedImprovement(
+                        model=mm,
+                        X_baseline=X_baseline,
+                        prune_baseline=True,
+                        cache_root=False,
+                    )
+                mock_prune.assert_called_once()
+                self.assertTrue(torch.equal(acqf.X_baseline, X_pruned))
+                with mock.patch(prune, return_value=X_pruned) as mock_prune:
+                    acqf = qLogNoisyExpectedImprovement(
+                        model=mm,
+                        X_baseline=X_baseline,
+                        prune_baseline=True,
+                        marginalize_dim=-3,
+                        cache_root=False,
+                    )
+                    _, kwargs = mock_prune.call_args
+                    self.assertEqual(kwargs["marginalize_dim"], -3)
+
+    def test_cache_root(self):
+        sample_cached_path = (
+            "botorch.acquisition.cached_cholesky.sample_cached_cholesky"
+        )
+        raw_state_dict = {
+            "likelihood.noise_covar.raw_noise": torch.tensor(
+                [[0.0895], [0.2594]], dtype=torch.float64
+            ),
+            "mean_module.raw_constant": torch.tensor(
+                [-0.4545, -0.1285], dtype=torch.float64
+            ),
+            "covar_module.raw_outputscale": torch.tensor(
+                [1.4876, 1.4897], dtype=torch.float64
+            ),
+            "covar_module.base_kernel.raw_lengthscale": torch.tensor(
+                [[[-0.7202, -0.2868]], [[-0.8794, -1.2877]]], dtype=torch.float64
+            ),
+        }
+        # test batched models (e.g. for MCMC)
+        for train_batch_shape, m, dtype in product(
+            (torch.Size([]), torch.Size([3])), (1, 2), (torch.float, torch.double)
+        ):
+            state_dict = deepcopy(raw_state_dict)
+            for k, v in state_dict.items():
+                if m == 1:
+                    v = v[0]
+                if len(train_batch_shape) > 0:
+                    v = v.unsqueeze(0).expand(*train_batch_shape, *v.shape)
+                state_dict[k] = v
+            tkwargs = {"device": self.device, "dtype": dtype}
+            if m == 2:
+                objective = GenericMCObjective(lambda Y, X: Y.sum(dim=-1))
+            else:
+                objective = None
+            for k, v in state_dict.items():
+                state_dict[k] = v.to(**tkwargs)
+            all_close_kwargs = (
+                {
+                    "atol": 1e-1,
+                    "rtol": 0.0,
+                }
+                if dtype == torch.float
+                else {"atol": 1e-4, "rtol": 0.0}
+            )
+            torch.manual_seed(1234)
+            train_X = torch.rand(*train_batch_shape, 3, 2, **tkwargs)
+            train_Y = (
+                torch.sin(train_X * 2 * pi)
+                + torch.randn(*train_batch_shape, 3, 2, **tkwargs)
+            )[..., :m]
+            train_Y = standardize(train_Y)
+            model = SingleTaskGP(
+                train_X,
+                train_Y,
+            )
+            if len(train_batch_shape) > 0:
+                X_baseline = train_X[0]
+            else:
+                X_baseline = train_X
+            model.load_state_dict(state_dict, strict=False)
+            sampler = IIDNormalSampler(sample_shape=torch.Size([5]), seed=0)
+            torch.manual_seed(0)
+            acqf = qLogNoisyExpectedImprovement(
+                model=model,
+                X_baseline=X_baseline,
+                sampler=sampler,
+                objective=objective,
+                prune_baseline=False,
+                cache_root=True,
+            )
+
+            orig_base_samples = acqf.base_sampler.base_samples.detach().clone()
+            sampler2 = IIDNormalSampler(sample_shape=torch.Size([5]), seed=0)
+            sampler2.base_samples = orig_base_samples
+            torch.manual_seed(0)
+            acqf_no_cache = qLogNoisyExpectedImprovement(
+                model=model,
+                X_baseline=X_baseline,
+                sampler=sampler2,
+                objective=objective,
+                prune_baseline=False,
+                cache_root=False,
+            )
+            for q, batch_shape in product(
+                (1, 3), (torch.Size([]), torch.Size([3]), torch.Size([4, 3]))
+            ):
+                acqf.q_in = -1
+                acqf_no_cache.q_in = -1
+                test_X = (
+                    0.3 + 0.05 * torch.randn(*batch_shape, q, 2, **tkwargs)
+                ).requires_grad_(True)
+                with mock.patch(
+                    sample_cached_path, wraps=sample_cached_cholesky
+                ) as mock_sample_cached:
+                    torch.manual_seed(0)
+                    val = acqf(test_X).exp()
+                    mock_sample_cached.assert_called_once()
+                val.sum().backward()
+                base_samples = acqf.sampler.base_samples.detach().clone()
+                X_grad = test_X.grad.clone()
+                test_X2 = test_X.detach().clone().requires_grad_(True)
+                acqf_no_cache.sampler.base_samples = base_samples
+                with mock.patch(
+                    sample_cached_path, wraps=sample_cached_cholesky
+                ) as mock_sample_cached:
+                    torch.manual_seed(0)
+                    val2 = acqf_no_cache(test_X2).exp()
+                    mock_sample_cached.assert_not_called()
+                self.assertAllClose(val, val2, **all_close_kwargs)
+                val2.sum().backward()
+                self.assertAllClose(X_grad, test_X2.grad, **all_close_kwargs)
+            # test we fall back to standard sampling for
+            # ill-conditioned covariances
+            acqf._baseline_L = torch.zeros_like(acqf._baseline_L)
+            with warnings.catch_warnings(record=True) as ws, settings.debug(True):
+                with torch.no_grad():
+                    acqf(test_X)
+            self.assertEqual(sum(issubclass(w.category, BotorchWarning) for w in ws), 1)
+
+        # test w/ posterior transform
+        X_baseline = torch.rand(2, 1)
+        model = SingleTaskGP(X_baseline, torch.randn(2, 1))
+        pt = ScalarizedPosteriorTransform(weights=torch.tensor([-1]))
+        with mock.patch.object(
+            qLogNoisyExpectedImprovement,
+            "_compute_root_decomposition",
+        ) as mock_cache_root:
+            acqf = qLogNoisyExpectedImprovement(
+                model=model,
+                X_baseline=X_baseline,
+                sampler=IIDNormalSampler(sample_shape=torch.Size([1])),
+                posterior_transform=pt,
+                prune_baseline=False,
+                cache_root=True,
+            )
+            tf_post = model.posterior(X_baseline, posterior_transform=pt)
+            self.assertTrue(
+                torch.allclose(
+                    tf_post.mean, mock_cache_root.call_args[-1]["posterior"].mean
+                )
+            )
+
+        # testing constraints
+        n, d, m = 8, 1, 3
+        X_baseline = torch.rand(n, d)
+        model = SingleTaskGP(X_baseline, torch.randn(n, m))  # batched model
+        nei_args = {
+            "model": model,
+            "X_baseline": X_baseline,
+            "prune_baseline": False,
+            "cache_root": True,
+            "posterior_transform": ScalarizedPosteriorTransform(weights=torch.ones(m)),
+            "sampler": SobolQMCNormalSampler(torch.Size([5])),
+        }
+        acqf = qLogNoisyExpectedImprovement(**nei_args)
+        X = torch.randn_like(X_baseline)
+        for con in [feasible_con, infeasible_con]:
+            with self.subTest(con=con):
+                target = "botorch.acquisition.utils.get_infeasible_cost"
+                infcost = torch.tensor([3], device=self.device, dtype=dtype)
+                with mock.patch(target, return_value=infcost):
+                    cacqf = qLogNoisyExpectedImprovement(**nei_args, constraints=[con])
+
+                _, obj = cacqf._get_samples_and_objectives(X)
+                best_feas_f = cacqf.compute_best_f(obj)
+                if con is feasible_con:
+                    self.assertAllClose(best_feas_f, acqf.compute_best_f(obj))
+                else:
+                    self.assertAllClose(
+                        best_feas_f, torch.full_like(obj[..., [0]], -infcost.item())
+                    )
+        # TODO: Test different objectives (incl. constraints)
diff --git a/test/models/test_fully_bayesian.py b/test/models/test_fully_bayesian.py
index 5994558026..52897adb54 100644
--- a/test/models/test_fully_bayesian.py
+++ b/test/models/test_fully_bayesian.py
@@ -18,6 +18,10 @@
     ProbabilityOfImprovement,
     UpperConfidenceBound,
 )
+from botorch.acquisition.logei import (
+    qLogExpectedImprovement,
+    qLogNoisyExpectedImprovement,
+)
 from botorch.acquisition.monte_carlo import (
     qExpectedImprovement,
     qNoisyExpectedImprovement,
@@ -411,6 +415,7 @@ def test_acquisition_functions(self):
             model, warmup_steps=8, num_samples=5, thinning=2, disable_progbar=True
         )
         deterministic = GenericDeterministicModel(f=lambda x: x[..., :1])
+        # due to ModelList type, setting cache_root=False for all noisy EI variants
         list_gp = ModelListGP(model, model)
         mixed_list = ModelList(deterministic, model)
         simple_sampler = get_sampler(
@@ -427,11 +432,23 @@ def test_acquisition_functions(self):
             ProbabilityOfImprovement(model=model, best_f=train_Y.max()),
             PosteriorMean(model=model),
             UpperConfidenceBound(model=model, beta=4),
+            qLogExpectedImprovement(
+                model=model, best_f=train_Y.max(), sampler=simple_sampler
+            ),
             qExpectedImprovement(
                 model=model, best_f=train_Y.max(), sampler=simple_sampler
             ),
+            qLogNoisyExpectedImprovement(
+                model=model,
+                X_baseline=train_X,
+                sampler=simple_sampler,
+                cache_root=False,
+            ),
             qNoisyExpectedImprovement(
-                model=model, X_baseline=train_X, sampler=simple_sampler
+                model=model,
+                X_baseline=train_X,
+                sampler=simple_sampler,
+                cache_root=False,
             ),
             qProbabilityOfImprovement(
                 model=model, best_f=train_Y.max(), sampler=simple_sampler
@@ -443,6 +460,7 @@ def test_acquisition_functions(self):
                 X_baseline=train_X,
                 ref_point=torch.zeros(2, **tkwargs),
                 sampler=list_gp_sampler,
+                cache_root=False,
             ),
             qExpectedHypervolumeImprovement(
                 model=list_gp,
@@ -458,6 +476,7 @@ def test_acquisition_functions(self):
                 X_baseline=train_X,
                 ref_point=torch.zeros(2, **tkwargs),
                 sampler=mixed_list_sampler,
+                cache_root=False,
             ),
             qExpectedHypervolumeImprovement(
                 model=mixed_list,

From 4acbdec518057135435b57c0d235599349dc7aae Mon Sep 17 00:00:00 2001
From: Sam Daulton <sdaulton@meta.com>
Date: Tue, 18 Jul 2023 19:11:03 -0700
Subject: [PATCH 16/18] Decoupled Fantasization (#1853)

Summary:
Pull Request resolved: https://github.com/pytorch/botorch/pull/1853

Support fantasizing at a subset of outcomes for each design

Reviewed By: esantorella

Differential Revision: D46344822

fbshipit-source-id: 066795f2f3bce1b6628d2ab1d7305b331f1b1ffb
---
 botorch/models/model.py                      |  83 ++++++++-
 test/models/test_model_list_gp_regression.py | 177 ++++++++++++++++---
 2 files changed, 233 insertions(+), 27 deletions(-)

diff --git a/botorch/models/model.py b/botorch/models/model.py
index 0b5112ef0b..67f10fc9dc 100644
--- a/botorch/models/model.py
+++ b/botorch/models/model.py
@@ -32,10 +32,11 @@
 import numpy as np
 import torch
 from botorch import settings
-from botorch.exceptions.errors import InputDataError
+from botorch.exceptions.errors import BotorchTensorDimensionError, InputDataError
 from botorch.models.utils.assorted import fantasize as fantasize_flag
 from botorch.posteriors import Posterior, PosteriorList
 from botorch.sampling.base import MCSampler
+from botorch.sampling.list_sampler import ListSampler
 from botorch.utils.datasets import BotorchDataset
 from botorch.utils.transforms import is_fully_bayesian
 from torch import Tensor
@@ -327,6 +328,19 @@ def fantasize(
         Returns:
             The constructed fantasy model.
         """
+        # if the inputs are empty, expand the inputs
+        if X.shape[-2] == 0:
+            output_shape = (
+                sampler.sample_shape
+                + X.shape[:-2]
+                + self.batch_shape
+                + torch.Size([0, self.num_outputs])
+            )
+            return self.condition_on_observations(
+                X=self.transform_inputs(X),
+                Y=torch.empty(output_shape, dtype=X.dtype, device=X.device),
+                **kwargs,
+            )
         propagate_grads = kwargs.pop("propagate_grads", False)
         with fantasize_flag():
             with settings.propagate_grads(propagate_grads):
@@ -529,6 +543,73 @@ def load_state_dict(
                 m.load_state_dict(filtered_dict)
         super().load_state_dict(state_dict=state_dict, strict=strict)
 
+    def fantasize(
+        self,
+        X: Tensor,
+        sampler: MCSampler,
+        observation_noise: bool = True,
+        evaluation_mask: Optional[Tensor] = None,
+        **kwargs: Any,
+    ) -> Model:
+        r"""Construct a fantasy model.
+
+        Constructs a fantasy model in the following fashion:
+        (1) compute the model posterior at `X` (including observation noise if
+        `observation_noise=True`).
+        (2) sample from this posterior (using `sampler`) to generate "fake"
+        observations.
+        (3) condition the model on the new fake observations.
+
+        Args:
+            X: A `batch_shape x n' x d`-dim Tensor, where `d` is the dimension of
+                the feature space, `n'` is the number of points per batch, and
+                `batch_shape` is the batch shape (must be compatible with the
+                batch shape of the model).
+            sampler: The sampler used for sampling from the posterior at `X`. If
+                evaluation_mask is not None, this must be a `ListSampler`.
+            observation_noise: If True, include observation noise.
+            evaluation_mask: A `n' x m`-dim tensor of booleans indicating which
+                outputs should be fantasized for a given design. This uses the same
+                evaluation mask for all batches.
+
+        Returns:
+            The constructed fantasy model.
+        """
+        if evaluation_mask is not None:
+            if (
+                evaluation_mask.ndim != 2
+                and evaluation_mask.shape[0] != X.shape[-2]
+                and evaluation_mask.shape[1] != self.num_outputs
+            ):
+                raise BotorchTensorDimensionError(
+                    f"Expected evaluation_mask of shape {X.shape[0]} "
+                    f"x {self.num_outputs}, but got {evaluation_mask.shape}."
+                )
+            if not isinstance(sampler, ListSampler):
+                raise ValueError("Decoupled fantasization requires a list of samplers.")
+
+        fant_models = []
+        X_i = X
+        for i in range(self.num_outputs):
+            # get the inputs to fantasize at for output i
+            if evaluation_mask is not None:
+                mask_i = evaluation_mask[:, i]
+                X_i = X[..., mask_i, :]
+                # TODO (T158701749): implement a QMC DecoupledSampler that draws all
+                # samples from a single Sobol sequence or consider requiring that the
+                # sampling is IID to ensure good coverage.
+                sampler_i = sampler.samplers[i]
+            else:
+                sampler_i = sampler
+            fant_model = self.models[i].fantasize(
+                X=X_i,
+                sampler=sampler_i,
+                observation_noise=observation_noise,
+                **kwargs,
+            )
+            fant_models.append(fant_model)
+        return self.__class__(*fant_models)
+
 
 class ModelDict(ModuleDict):
     r"""A lightweight container mapping model names to models."""
diff --git a/test/models/test_model_list_gp_regression.py b/test/models/test_model_list_gp_regression.py
index a16c013a22..1547f2a212 100644
--- a/test/models/test_model_list_gp_regression.py
+++ b/test/models/test_model_list_gp_regression.py
@@ -6,6 +6,7 @@
 
 import itertools
 import warnings
+from typing import Optional
 
 import torch
 from botorch.acquisition.objective import ScalarizedPosteriorTransform
@@ -18,6 +19,8 @@
 from botorch.models.transforms.input import Normalize
 from botorch.models.transforms.outcome import ChainedOutcomeTransform, Log, Standardize
 from botorch.posteriors import GPyTorchPosterior, PosteriorList, TransformedPosterior
+from botorch.sampling.base import MCSampler
+from botorch.sampling.list_sampler import ListSampler
 from botorch.sampling.normal import IIDNormalSampler
 from botorch.utils.testing import _get_random_data, BotorchTestCase
 from gpytorch.distributions import MultitaskMultivariateNormal, MultivariateNormal
@@ -27,6 +30,7 @@
 from gpytorch.mlls import SumMarginalLogLikelihood
 from gpytorch.mlls.exact_marginal_log_likelihood import ExactMarginalLogLikelihood
 from gpytorch.priors import GammaPrior
+from torch import Tensor
 
 
 def _get_model(
@@ -386,6 +390,26 @@ def test_fantasize(self):
             self.assertEqual(fm_i.train_inputs[0].shape, torch.Size([2, 8, 2]))
             self.assertEqual(fm_i.train_targets.shape, torch.Size([2, 8]))
 
+        # test decoupled
+        sampler1 = IIDNormalSampler(2)
+        sampler2 = IIDNormalSampler(2)
+        eval_mask = torch.tensor(
+            [[1, 0], [0, 1], [1, 0]],
+            dtype=torch.bool,
+        )
+        fm = modellist.fantasize(
+            torch.rand(3, 2),
+            sampler=ListSampler(sampler1, sampler2),
+            evaluation_mask=eval_mask,
+        )
+        self.assertIsInstance(fm, ModelListGP)
+        for i in range(2):
+            fm_i = fm.models[i]
+            self.assertIsInstance(fm_i, SingleTaskGP)
+            num_points = 7 - i
+            self.assertEqual(fm_i.train_inputs[0].shape, torch.Size([2, num_points, 2]))
+            self.assertEqual(fm_i.train_targets.shape, torch.Size([2, num_points]))
+
     def test_fantasize_with_outcome_transform(self) -> None:
         """
         Check that fantasized posteriors from a `ModelListGP` with transforms
@@ -403,42 +427,108 @@ def test_fantasize_with_outcome_transform(self) -> None:
             with self.subTest(dtype=dtype):
                 tkwargs = {"device": self.device, "dtype": dtype}
                 X = torch.linspace(0, 1, 20, **tkwargs)[:, None]
-                Y = 10 * torch.linspace(0, 1, 20, **tkwargs)[:, None]
+                Y1 = 10 * torch.linspace(0, 1, 20, **tkwargs)[:, None]
+                Y2 = 2 * Y1
+                Y = torch.cat([Y1, Y2], dim=-1)
                 target_x = torch.tensor([[0.5]], **tkwargs)
 
                 model_with_transform = ModelListGP(
-                    SingleTaskGP(X, Y, outcome_transform=Standardize(m=1))
+                    SingleTaskGP(X, Y1, outcome_transform=Standardize(m=1)),
+                    SingleTaskGP(X, Y2, outcome_transform=Standardize(m=1)),
                 )
-                y_standardized, _ = Standardize(m=1).forward(Y)
+                outcome_transform = Standardize(m=2)
+                y_standardized, _ = outcome_transform(Y)
+                outcome_transform.eval()
                 model_manually_transformed = ModelListGP(
-                    SingleTaskGP(X, y_standardized)
+                    SingleTaskGP(X, y_standardized[:, :1]),
+                    SingleTaskGP(X, y_standardized[:, 1:]),
                 )
 
-                def _get_fant_mean(model: ModelListGP) -> float:
+                def _get_fant_mean(
+                    model: ModelListGP,
+                    sampler: MCSampler,
+                    eval_mask: Optional[Tensor] = None,
+                ) -> float:
                     fant = model.fantasize(
-                        target_x, sampler=IIDNormalSampler(10, seed=0)
+                        target_x,
+                        sampler=sampler,
+                        evaluation_mask=eval_mask,
                     )
-                    return fant.posterior(target_x).mean.mean().item()
+                    return fant.posterior(target_x).mean.mean(dim=(-2, -3))
 
-                outcome_transform = model_with_transform.models[0].outcome_transform
                 # ~0
+                sampler = IIDNormalSampler(10, seed=0)
                 fant_mean_with_manual_transform = _get_fant_mean(
-                    model_manually_transformed
+                    model_manually_transformed, sampler=sampler
                 )
                 # Inexact since this is an MC test and we don't want it flaky
-                self.assertAlmostEqual(fant_mean_with_manual_transform, 0.0, delta=0.1)
-                manually_rescaled_mean, _ = outcome_transform.untransform(
+                self.assertLessEqual(
+                    (fant_mean_with_manual_transform - 0.0).abs().max().item(), 0.1
+                )
+                manually_rescaled_mean = outcome_transform.untransform(
                     fant_mean_with_manual_transform
+                )[0].view(-1)
+                fant_mean_with_native_transform = _get_fant_mean(
+                    model_with_transform, sampler=sampler
                 )
-                fant_mean_with_native_transform = _get_fant_mean(model_with_transform)
                 # Inexact since this is an MC test and we don't want it flaky
-                self.assertAlmostEqual(fant_mean_with_native_transform, 5.0, delta=0.5)
+                self.assertLessEqual(
+                    (
+                        fant_mean_with_native_transform
+                        - torch.tensor([5.0, 10.0], **tkwargs)
+                    )
+                    .abs()
+                    .max()
+                    .item(),
+                    0.5,
+                )
 
                 # tighter tolerance here since the models should use the same samples
-                self.assertAlmostEqual(
-                    manually_rescaled_mean.item(),
+                self.assertAllClose(
+                    manually_rescaled_mean,
+                    fant_mean_with_native_transform,
+                )
+                # test decoupled
+                sampler = ListSampler(
+                    IIDNormalSampler(10, seed=0),
+                    IIDNormalSampler(10, seed=0),
+                )
+                fant_mean_with_manual_transform = _get_fant_mean(
+                    model_manually_transformed,
+                    sampler=sampler,
+                    eval_mask=torch.tensor(
+                        [[0, 1]], dtype=torch.bool, device=tkwargs["device"]
+                    ),
+                )
+                # Inexact since this is an MC test and we don't want it flaky
+                self.assertLessEqual(
+                    (fant_mean_with_manual_transform - 0.0).abs().max().item(), 0.1
+                )
+                manually_rescaled_mean = outcome_transform.untransform(
+                    fant_mean_with_manual_transform
+                )[0].view(-1)
+                fant_mean_with_native_transform = _get_fant_mean(
+                    model_with_transform,
+                    sampler=sampler,
+                    eval_mask=torch.tensor(
+                        [[0, 1]], dtype=torch.bool, device=tkwargs["device"]
+                    ),
+                )
+                # Inexact since this is an MC test and we don't want it flaky
+                self.assertLessEqual(
+                    (
+                        fant_mean_with_native_transform
+                        - torch.tensor([5.0, 10.0], **tkwargs)
+                    )
+                    .abs()
+                    .max()
+                    .item(),
+                    0.5,
+                )
+                # tighter tolerance here since the models should use the same samples
+                self.assertAllClose(
+                    manually_rescaled_mean,
                     fant_mean_with_native_transform,
-                    delta=1e-6,
                 )
 
     def test_fantasize_with_outcome_transform_fixed_noise(self) -> None:
@@ -458,18 +548,53 @@ def test_fantasize_with_outcome_transform_fixed_noise(self) -> None:
                 tkwargs = {"device": self.device, "dtype": dtype}
                 X = torch.tensor([[0.0], [1.0]], **tkwargs)
                 Y = torch.tensor([[y_at_low_x], [y_at_high_x]], **tkwargs)
+                Y2 = 2 * Y
                 yvar = torch.full_like(Y, 1e-4)
+                yvar2 = 2 * yvar
                 model = ModelListGP(
-                    FixedNoiseGP(X, Y, yvar, outcome_transform=Standardize(m=1))
+                    FixedNoiseGP(X, Y, yvar, outcome_transform=Standardize(m=1)),
+                    FixedNoiseGP(X, Y2, yvar2, outcome_transform=Standardize(m=1)),
                 )
 
                 model.posterior(torch.zeros((1, 1), **tkwargs))
-
-                fant = model.fantasize(
-                    X, sampler=IIDNormalSampler(n_fants, seed=0), noise=yvar
-                )
-
-                fant_mean = fant.posterior(X).mean.mean(0).flatten().tolist()
-                self.assertAlmostEqual(fant_mean[0], y_at_low_x, delta=1)
-                # delta=1 is a 1% error (since y_at_low_x = 100)
-                self.assertAlmostEqual(fant_mean[1], y_at_high_x, delta=1)
+                for decoupled in (False, True):
+                    if decoupled:
+                        kwargs = {
+                            "sampler": ListSampler(
+                                IIDNormalSampler(n_fants, seed=0),
+                                IIDNormalSampler(n_fants, seed=0),
+                            ),
+                            "evaluation_mask": torch.tensor(
+                                [[0, 1], [1, 0]],
+                                dtype=torch.bool,
+                                device=tkwargs["device"],
+                            ),
+                        }
+                    else:
+                        kwargs = {
+                            "sampler": IIDNormalSampler(n_fants, seed=0),
+                        }
+                    fant = model.fantasize(X, **kwargs)
+
+                    fant_mean = fant.posterior(X).mean.mean(0)
+                    self.assertAlmostEqual(fant_mean[0, 0].item(), y_at_low_x, delta=1)
+                    self.assertAlmostEqual(
+                        fant_mean[0, 1].item(), 2 * y_at_low_x, delta=1
+                    )
+                    # delta=1 is a 1% error (since y_at_low_x = 100)
+                    self.assertAlmostEqual(fant_mean[1, 0].item(), y_at_high_x, delta=1)
+                    self.assertAlmostEqual(
+                        fant_mean[1, 1].item(), 2 * y_at_high_x, delta=1
+                    )
+                    for i, fm_i in enumerate(fant.models):
+                        n_points = 3 if decoupled else 4
+                        self.assertEqual(
+                            fm_i.train_inputs[0].shape, torch.Size([20, n_points, 1])
+                        )
+                        self.assertEqual(
+                            fm_i.train_targets.shape, torch.Size([20, n_points])
+                        )
+                        if decoupled:
+                            self.assertTrue(
+                                torch.equal(fm_i.train_inputs[0][0][-1], X[1 - i])
+                            )

From 81f7c98c3702e84caa7170755119b06a6186d6fb Mon Sep 17 00:00:00 2001
From: Sam Daulton <sdaulton@meta.com>
Date: Wed, 19 Jul 2023 13:29:46 -0700
Subject: [PATCH 17/18] bump test coverage (#1940)

Summary:
Pull Request resolved: https://github.com/pytorch/botorch/pull/1940

see title. This also fixes the shape check on `evaluation_mask`.

Reviewed By: Balandat

Differential Revision: D47595261

fbshipit-source-id: bb271855451a33deed48296daf328e3e2ecac142
---
 botorch/models/model.py                      | 11 ++++-----
 test/models/test_model_list_gp_regression.py | 25 ++++++++++++++++++++
 2 files changed, 30 insertions(+), 6 deletions(-)

diff --git a/botorch/models/model.py b/botorch/models/model.py
index 67f10fc9dc..8b20ff0edd 100644
--- a/botorch/models/model.py
+++ b/botorch/models/model.py
@@ -576,14 +576,13 @@ def fantasize(
             The constructed fantasy model.
         """
         if evaluation_mask is not None:
-            if (
-                evaluation_mask.ndim != 2
-                and evaluation_mask.shape[0] != X.shape[-2]
-                and evaluation_mask.shape[1] != self.num_outputs
+            if evaluation_mask.ndim != 2 or evaluation_mask.shape != torch.Size(
+                [X.shape[-2], self.num_outputs]
             ):
                 raise BotorchTensorDimensionError(
-                    f"Expected evaluation_mask of shape {X.shape[0]} "
-                    f"x {self.num_outputs}, but got {evaluation_mask.shape}."
+                    f"Expected evaluation_mask of shape `{X.shape[0]} "
+                    f"x {self.num_outputs}`, but got `"
+                    f"{' x '.join(str(i) for i in evaluation_mask.shape)}`."
                 )
             if not isinstance(sampler, ListSampler):
                 raise ValueError("Decoupled fantasization requires a list of samplers.")
diff --git a/test/models/test_model_list_gp_regression.py b/test/models/test_model_list_gp_regression.py
index 1547f2a212..09b058a521 100644
--- a/test/models/test_model_list_gp_regression.py
+++ b/test/models/test_model_list_gp_regression.py
@@ -555,7 +555,32 @@ def test_fantasize_with_outcome_transform_fixed_noise(self) -> None:
                     FixedNoiseGP(X, Y, yvar, outcome_transform=Standardize(m=1)),
                     FixedNoiseGP(X, Y2, yvar2, outcome_transform=Standardize(m=1)),
                 )
+                # test exceptions
+                eval_mask = torch.zeros(
+                    3, 2, 2, dtype=torch.bool, device=tkwargs["device"]
+                )
+                msg = (
+                    f"Expected evaluation_mask of shape `{X.shape[0]} x "
+                    f"{model.num_outputs}`, but got `"
+                    f"{' x '.join(str(i) for i in eval_mask.shape)}`."
+                )
+                with self.assertRaisesRegex(BotorchTensorDimensionError, msg):
 
+                    model.fantasize(
+                        X,
+                        evaluation_mask=eval_mask,
+                        sampler=ListSampler(
+                            IIDNormalSampler(n_fants, seed=0),
+                            IIDNormalSampler(n_fants, seed=0),
+                        ),
+                    )
+                msg = "Decoupled fantasization requires a list of samplers."
+                with self.assertRaisesRegex(ValueError, msg):
+                    model.fantasize(
+                        X,
+                        evaluation_mask=eval_mask[0],
+                        sampler=IIDNormalSampler(n_fants, seed=0),
+                    )
                 model.posterior(torch.zeros((1, 1), **tkwargs))
                 for decoupled in (False, True):
                     if decoupled:

From 71fd34e1cda6d883566a24bfc754442222df906c Mon Sep 17 00:00:00 2001
From: Elizabeth Santorella <santorella@meta.com>
Date: Mon, 24 Jul 2023 11:53:24 -0700
Subject: [PATCH 18/18] Bug fix: FixedFeatureAcquisitionFunction should not
 convert floats to float32 tensors (#1944)

Summary:
## Motivation

Addresses https://github.com/pytorch/botorch/issues/1943

* Dtype fix: floats should be double-precision
* Cleanup for clarity
* Units

Pull Request resolved: https://github.com/pytorch/botorch/pull/1944

Test Plan: Added unit tests

Reviewed By: saitcakmak

Differential Revision: D47723525

Pulled By: esantorella

fbshipit-source-id: aa5c8633b54ef31de559466baf6d320c8f2ca71c
---
 botorch/acquisition/fixed_feature.py   | 49 ++++++++++++++----
 test/acquisition/test_fixed_feature.py | 70 ++++++++++++++++++++++++--
 test/optim/test_optimize.py            | 20 ++++++++
 3 files changed, 125 insertions(+), 14 deletions(-)

diff --git a/botorch/acquisition/fixed_feature.py b/botorch/acquisition/fixed_feature.py
index 0f3b85faa7..31a746b954 100644
--- a/botorch/acquisition/fixed_feature.py
+++ b/botorch/acquisition/fixed_feature.py
@@ -20,6 +20,35 @@
 from torch.nn import Module
 
 
+def get_dtype_of_sequence(values: Sequence[Union[Tensor, float]]) -> torch.dtype:
+    """
+    Return torch.float32 if everything is single-precision and torch.float64
+    otherwise.
+
+    Numbers (non-tensors) are double-precision.
+    """
+
+    def _is_single(value: Union[Tensor, float]) -> bool:
+        return isinstance(value, Tensor) and value.dtype == torch.float32
+
+    all_single_precision = all(_is_single(value) for value in values)
+    return torch.float32 if all_single_precision else torch.float64
+
+
+def get_device_of_sequence(values: Sequence[Union[Tensor, float]]) -> torch.dtype:
+    """
+    CPU if everything is on the CPU; Cuda otherwise.
+
+    Numbers (non-tensors) are considered to be on the CPU.
+    """
+
+    def _is_cuda(value: Union[Tensor, float]) -> bool:
+        return hasattr(value, "device") and value.device == torch.device("cuda")
+
+    any_cuda = any(_is_cuda(value) for value in values)
+    return torch.device("cuda") if any_cuda else torch.device("cpu")
+
+
 class FixedFeatureAcquisitionFunction(AcquisitionFunction):
     """A wrapper around AquisitionFunctions to fix a subset of features.
 
@@ -58,27 +87,25 @@ def __init__(
         """
         Module.__init__(self)
         self.acq_func = acq_function
-        dtype = torch.float
-        device = torch.device("cpu")
         self.d = d
+
         if isinstance(values, Tensor):
             new_values = values.detach().clone()
         else:
+
+            dtype = get_dtype_of_sequence(values)
+            device = get_device_of_sequence(values)
+
             new_values = []
             for value in values:
                 if isinstance(value, Number):
-                    new_values.append(torch.tensor([float(value)]))
+                    value = torch.tensor([value], dtype=dtype)
                 else:
-                    # if any value uses double, use double for all values
-                    # likewise if any value uses cuda, use cuda for all values
-                    dtype = value.dtype if value.dtype == torch.double else dtype
-                    device = value.device if value.device.type == "cuda" else device
                     if value.ndim == 0:  # since we can't broadcast with zero-d tensors
                         value = value.unsqueeze(0)
-                    new_values.append(value.detach().clone())
-            # move all values to same device
-            for i, val in enumerate(new_values):
-                new_values[i] = val.to(dtype=dtype, device=device)
+                    value = value.detach().clone()
+
+                new_values.append(value.to(dtype=dtype, device=device))
 
             # There are 3 cases for when `values` is a `Sequence`.
             # 1) `values` == list of floats as earlier.
diff --git a/test/acquisition/test_fixed_feature.py b/test/acquisition/test_fixed_feature.py
index 8dcc02f1df..1f5218d228 100644
--- a/test/acquisition/test_fixed_feature.py
+++ b/test/acquisition/test_fixed_feature.py
@@ -6,14 +6,18 @@
 
 import torch
 from botorch.acquisition.analytic import ExpectedImprovement
-from botorch.acquisition.fixed_feature import FixedFeatureAcquisitionFunction
+from botorch.acquisition.fixed_feature import (
+    FixedFeatureAcquisitionFunction,
+    get_device_of_sequence,
+    get_dtype_of_sequence,
+)
 from botorch.acquisition.monte_carlo import qExpectedImprovement
 from botorch.models import SingleTaskGP
-from botorch.utils.testing import BotorchTestCase
+from botorch.utils.testing import BotorchTestCase, MockAcquisitionFunction
 
 
 class TestFixedFeatureAcquisitionFunction(BotorchTestCase):
-    def test_fixed_features(self):
+    def test_fixed_features(self) -> None:
         train_X = torch.rand(5, 3, device=self.device)
         train_Y = train_X.norm(dim=-1, keepdim=True)
         model = SingleTaskGP(train_X, train_Y).to(device=self.device).eval()
@@ -132,3 +136,63 @@ def test_fixed_features(self):
         )
         with self.assertRaises(ValueError):
             EI_ff.X_pending
+
+    def test_values_dtypes(self) -> None:
+        acqf = MockAcquisitionFunction()
+
+        for input, d, expected_dtype in [
+            (torch.tensor([0.0], dtype=torch.float32), 1, torch.float32),
+            (torch.tensor([0.0], dtype=torch.float64), 1, torch.float64),
+            (
+                [
+                    torch.tensor([0.0], dtype=torch.float32),
+                    torch.tensor([0.0], dtype=torch.float64),
+                ],
+                2,
+                torch.float64,
+            ),
+            ([0.0], 1, torch.float64),
+            ([torch.tensor(0.0, dtype=torch.float32), 0.0], 2, torch.float64),
+        ]:
+            with self.subTest(input=input, d=d, expected_dtype=expected_dtype):
+                self.assertEqual(get_dtype_of_sequence(input), expected_dtype)
+                ff = FixedFeatureAcquisitionFunction(
+                    acqf, d=d, columns=[2], values=input
+                )
+                self.assertEqual(ff.values.dtype, expected_dtype)
+
+    def test_values_devices(self) -> None:
+
+        acqf = MockAcquisitionFunction()
+        cpu = torch.device("cpu")
+        cuda = torch.device("cuda")
+
+        test_cases = [
+            (torch.tensor([0.0], device=cpu), 1, cpu),
+            ([0.0], 1, cpu),
+            ([0.0, torch.tensor([0.0], device=cpu)], 2, cpu),
+        ]
+
+        # Can only properly test this when running CUDA tests
+        if self.device == torch.cuda:
+            test_cases = test_cases + [
+                (torch.tensor([0.0], device=cuda), 1, cuda),
+                (
+                    [
+                        torch.tensor([0.0], dtype=cpu),
+                        torch.tensor([0.0], dtype=cuda),
+                    ],
+                    2,
+                    cuda,
+                ),
+                ([0.0], 1, cpu),
+                ([torch.tensor(0.0, dtype=cuda), 0.0], 2, cuda),
+            ]
+
+        for input, d, expected_device in test_cases:
+            with self.subTest(input=input, d=d, expected_device=expected_device):
+                self.assertEqual(get_device_of_sequence(input), expected_device)
+                ff = FixedFeatureAcquisitionFunction(
+                    acqf, d=d, columns=[2], values=input
+                )
+                self.assertEqual(ff.values.device, expected_device)
diff --git a/test/optim/test_optimize.py b/test/optim/test_optimize.py
index d29b63e05c..a17a28c60a 100644
--- a/test/optim/test_optimize.py
+++ b/test/optim/test_optimize.py
@@ -1763,3 +1763,23 @@ def test_optimize_acqf_discrete_local_search(self):
             )
             self.assertEqual(len(X), 20)
             self.assertAllClose(torch.unique(X, dim=0), X)
+
+    def test_no_precision_loss_with_fixed_features(self) -> None:
+
+        acqf = SquaredAcquisitionFunction()
+
+        val = 1e-1
+        fixed_features_list = [{0: val}]
+
+        bounds = torch.stack(
+            [torch.zeros(2, dtype=torch.float64), torch.ones(2, dtype=torch.float64)]
+        )
+        candidate, _ = optimize_acqf_mixed(
+            acqf,
+            bounds=bounds,
+            q=1,
+            num_restarts=1,
+            raw_samples=1,
+            fixed_features_list=fixed_features_list,
+        )
+        self.assertEqual(candidate[0, 0].item(), val)