diff --git a/ax/benchmark/benchmark.py b/ax/benchmark/benchmark.py index 2412f98d19f..33df2c1ec5c 100644 --- a/ax/benchmark/benchmark.py +++ b/ax/benchmark/benchmark.py @@ -22,7 +22,7 @@ from collections.abc import Iterable from itertools import product from logging import Logger -from time import time +from time import monotonic, time import numpy as np @@ -30,6 +30,7 @@ from ax.benchmark.benchmark_problem import BenchmarkProblem from ax.benchmark.benchmark_result import AggregatedBenchmarkResult, BenchmarkResult from ax.core.experiment import Experiment +from ax.core.types import TParameterization from ax.core.utils import get_model_times from ax.service.scheduler import Scheduler from ax.service.utils.best_point_mixin import BestPointMixin @@ -93,12 +94,23 @@ def benchmark_replication( method: BenchmarkMethod, seed: int, ) -> BenchmarkResult: - """Runs one benchmarking replication (equivalent to one optimization loop). + """ + Run one benchmarking replication (equivalent to one optimization loop). + + After each trial, the `method` gets the best parameter(s) found so far, as + evaluated based on empirical data. After all trials are run, the `problem` + gets the oracle values of each "best" parameter; this yields the ``inference + trace``. The cumulative maximum of the oracle value of each parameterization + tested is the ``oracle_trace``. + Args: problem: The BenchmarkProblem to test against (can be synthetic or real) method: The BenchmarkMethod to test seed: The seed to use for this replication. + + Return: + ``BenchmarkResult`` object. """ experiment = Experiment( @@ -113,19 +125,67 @@ def benchmark_replication( generation_strategy=method.generation_strategy.clone_reset(), options=method.scheduler_options, ) + timeout_hours = scheduler.options.timeout_hours + # list of parameters for each trial + best_params_by_trial: list[list[TParameterization]] = [] + + is_mf_or_mt = len(problem.runner.target_fidelity_and_task) > 0 + # Run the optimization loop. + timeout_hours = scheduler.options.timeout_hours with with_rng_seed(seed=seed): - scheduler.run_n_trials(max_trials=problem.num_trials) + start = monotonic() + for _ in range(problem.num_trials): + next( + scheduler.run_trials_and_yield_results( + max_trials=1, timeout_hours=timeout_hours + ) + ) + if timeout_hours is not None: + elapsed_hours = (monotonic() - start) / 3600 + timeout_hours = timeout_hours - elapsed_hours + if timeout_hours <= 0: + break + + if problem.is_moo or is_mf_or_mt: + # Inference trace is not supported for MOO. + # It's also not supported for multi-fidelity or multi-task + # problems, because Ax's best-point functionality doesn't know + # to predict at the target task or fidelity. + continue + + best_params = method.get_best_parameters( + experiment=experiment, + optimization_config=problem.optimization_config, + n_points=problem.n_best_points, + ) + best_params_by_trial.append(best_params) + + # Construct inference trace from best parameters + inference_trace = np.full(problem.num_trials, np.nan) + for trial_index, best_params in enumerate(best_params_by_trial): + # Construct an experiment with one BatchTrial + best_params_oracle_experiment = problem.get_oracle_experiment_from_params( + {0: {str(i): p for i, p in enumerate(best_params)}} + ) + # Get the optimization trace. It will have only one point. + inference_trace[trial_index] = BestPointMixin._get_trace( + experiment=best_params_oracle_experiment, + optimization_config=problem.optimization_config, + )[0] - oracle_experiment = problem.get_oracle_experiment_from_experiment( + actual_params_oracle_experiment = problem.get_oracle_experiment_from_experiment( experiment=experiment ) - optimization_trace = np.array( + oracle_trace = np.array( BestPointMixin._get_trace( - experiment=oracle_experiment, + experiment=actual_params_oracle_experiment, optimization_config=problem.optimization_config, ) ) + optimization_trace = ( + inference_trace if problem.report_inference_value_as_trace else oracle_trace + ) try: # Catch any errors that may occur during score computation, such as errors @@ -155,6 +215,8 @@ def benchmark_replication( name=scheduler.experiment.name, seed=seed, experiment=scheduler.experiment, + oracle_trace=oracle_trace, + inference_trace=inference_trace, optimization_trace=optimization_trace, score_trace=score_trace, fit_time=fit_time, diff --git a/ax/benchmark/benchmark_method.py b/ax/benchmark/benchmark_method.py index 95983d569d8..01f962af4de 100644 --- a/ax/benchmark/benchmark_method.py +++ b/ax/benchmark/benchmark_method.py @@ -5,16 +5,20 @@ # pyre-strict -import logging -from dataclasses import dataclass +from dataclasses import dataclass, field + +from ax.core.experiment import Experiment +from ax.core.optimization_config import ( + MultiObjectiveOptimizationConfig, + OptimizationConfig, +) +from ax.core.types import TParameterization from ax.modelbridge.generation_strategy import GenerationStrategy +from ax.service.utils.best_point_mixin import BestPointMixin from ax.service.utils.scheduler_options import SchedulerOptions, TrialType from ax.utils.common.base import Base -from ax.utils.common.logger import get_logger - - -logger: logging.Logger = get_logger("BenchmarkMethod") +from pyre_extensions import none_throws @dataclass(frozen=True) @@ -36,12 +40,74 @@ class BenchmarkMethod(Base): `get_benchmark_scheduler_options`. distribute_replications: Indicates whether the replications should be run in a distributed manner. Ax itself does not use this attribute. + best_point_kwargs: Arguments passed to `get_pareto_optimal_parameters` + (if multi-objective) or `BestPointMixin._get_best_trial` (if + single-objective). Currently, the only supported argument is + `use_model_predictions`. However, note that if multi-objective, + best-point selection is not currently supported and + `get_pareto_optimal_parameters` will raise a `NotImplementedError`. """ name: str generation_strategy: GenerationStrategy scheduler_options: SchedulerOptions distribute_replications: bool = False + best_point_kwargs: dict[str, bool] = field( + default_factory=lambda: {"use_model_predictions": False} + ) + + def get_best_parameters( + self, + experiment: Experiment, + optimization_config: OptimizationConfig, + n_points: int, + ) -> list[TParameterization]: + """ + Get ``n_points`` promising points. NOTE: Only SOO with n_points = 1 is + supported. + + The expected use case is that these points will be evaluated against an + oracle for hypervolume (if multi-objective) or for the value of the best + parameter (if single-objective). + + For multi-objective cases, ``n_points > 1`` is needed. For SOO, ``n_points > 1`` + reflects setups where we can choose some points which will then be + evaluated noiselessly or at high fidelity and then use the best one. + + + Args: + experiment: The experiment to get the data from. This should contain + values that would be observed in a realistic setting and not + contain oracle values. + optimization_config: The ``optimization_config`` for the corresponding + ``BenchmarkProblem``. + n_points: The number of points to return. + """ + if isinstance(optimization_config, MultiObjectiveOptimizationConfig): + raise NotImplementedError( + "BenchmarkMethod.get_pareto_optimal_parameters is not currently " + "supported for multi-objective problems." + ) + + if n_points != 1: + raise NotImplementedError( + f"Currently only n_points=1 is supported. Got {n_points=}." + ) + + # SOO, n=1 case. + # Note: This has the same effect Scheduler.get_best_parameters + result = BestPointMixin._get_best_trial( + experiment=experiment, + generation_strategy=self.generation_strategy, + optimization_config=optimization_config, + # pyre-fixme: Incompatible parameter type [6]: In call + # `get_pareto_optimal_parameters`, for 4th positional argument, + # expected `Optional[Iterable[int]]` but got `bool`. + **self.best_point_kwargs, + ) + + i, params, prediction = none_throws(result) + return [params] def get_benchmark_scheduler_options( diff --git a/ax/benchmark/benchmark_problem.py b/ax/benchmark/benchmark_problem.py index ea8e21313bb..a39d90c8406 100644 --- a/ax/benchmark/benchmark_problem.py +++ b/ax/benchmark/benchmark_problem.py @@ -74,6 +74,13 @@ class BenchmarkProblem(Base): search_space: The search space. runner: The Runner that will be used to generate data for the problem, including any ground-truth data stored as tracking metrics. + report_inference_value_as_trace: Whether the ``optimization_trace`` on a + ``BenchmarkResult`` should use the ``oracle_trace`` (if False, + default) or the ``inference_trace``. See ``BenchmarkResult`` for + more information. Currently, this is only supported for + single-objective problems. + n_best_points: Number of points for a best-point selector to recommend. + Currently, only ``n_best_points=1`` is supported. """ name: str @@ -84,6 +91,17 @@ class BenchmarkProblem(Base): search_space: SearchSpace = field(repr=False) runner: BenchmarkRunner = field(repr=False) + report_inference_value_as_trace: bool = False + n_best_points: int = 1 + + def __post_init__(self) -> None: + if self.n_best_points != 1: + raise NotImplementedError("Only `n_best_points=1` is currently supported.") + if self.report_inference_value_as_trace and self.is_moo: + raise NotImplementedError( + "Inference trace is not supported for MOO. Please set " + "`report_inference_value_as_trace` to False." + ) def get_oracle_experiment_from_params( self, @@ -285,6 +303,7 @@ def create_problem_from_botorch( lower_is_better: bool = True, observe_noise_sd: bool = False, search_space: SearchSpace | None = None, + report_inference_value_as_trace: bool = False, ) -> BenchmarkProblem: """ Create a `BenchmarkProblem` from a BoTorch `BaseTestProblem`. @@ -308,6 +327,10 @@ def create_problem_from_botorch( search_space: If provided, the `search_space` of the `BenchmarkProblem`. Otherwise, a `SearchSpace` with all `RangeParameter`s is created from the bounds of the test problem. + report_inference_value_as_trace: If True, indicates that the + ``optimization_trace`` on a ``BenchmarkResult`` ought to be the + ``inference_trace``; otherwise, it will be the ``oracle_trace``. + See ``BenchmarkResult`` for more information. """ # pyre-fixme [45]: Invalid class instantiation test_problem = test_problem_class(**test_problem_kwargs) @@ -364,4 +387,5 @@ def create_problem_from_botorch( num_trials=num_trials, observe_noise_stds=observe_noise_sd, optimal_value=optimal_value, + report_inference_value_as_trace=report_inference_value_as_trace, ) diff --git a/ax/benchmark/benchmark_result.py b/ax/benchmark/benchmark_result.py index afa74e77ee4..52bcf06f994 100644 --- a/ax/benchmark/benchmark_result.py +++ b/ax/benchmark/benchmark_result.py @@ -33,15 +33,38 @@ class BenchmarkResult(Base): name: Name of the benchmark. Should make it possible to determine the problem and the method. seed: Seed used for determinism. - optimization_trace: For single-objective problems, element i of the - optimization trace is the oracle value of the "best" point, computed - after the first i trials have been run. For multi-objective - problems, element i of the optimization trace is the hypervolume of - oracle values at a set of points, also computed after the first i - trials (even if these were ``BatchTrials``). Oracle values are - typically ground-truth (rather than noisy) and evaluated at the - target task and fidelity. - + oracle_trace: For single-objective problems, element i of the + optimization trace is the best oracle value of the arms evaluated + after the first i trials. For multi-objective problems, element i + of the optimization trace is the hypervolume of the oracle values of + the arms in the first i trials (which may be ``BatchTrial``s). + Oracle values are typically ground-truth (rather than noisy) and + evaluated at the target task and fidelity. + inference_trace: Inference trace comes from choosing a "best" point + based only on data that would be observable in realistic settings + and then evaluating the oracle value of that point. For + multi-objective problems, we find a Pareto set and evaluate its + hypervolume. + + There are several ways of specifying the "best" point: One could + pick the point with the best observed value, or the point with the + best model prediction, and could consider the whole search space, + the set of trials completed so far, etc. How the inference trace is + computed is specified by a best-point selector, which is an + attribute of the `BenchmarkMethod`. + + Note: This is not "inference regret", which is a lower-is-better value + that is relative to the best possible value. The inference value + trace is higher-is-better if the problem is a maximization problem + or if the problem is multi-objective (in which case hypervolume is + used). Hence, it is signed the same as ``oracle_trace`` and + ``optimization_trace``. ``score_trace`` is higher-is-better and + relative to the optimum. + optimization_trace: Either the ``oracle_trace`` or the + ``inference_trace``, depending on whether the ``BenchmarkProblem`` + specifies ``report_inference_value``. Having ``optimization_trace`` + specified separately is useful when we need just one value to + evaluate how well the benchmark went. score_trace: The scores associated with the problem, typically either the optimization_trace or inference_value_trace normalized to a 0-100 scale for comparability between problems. @@ -56,6 +79,8 @@ class BenchmarkResult(Base): name: str seed: int + oracle_trace: ndarray + inference_trace: ndarray optimization_trace: ndarray score_trace: ndarray diff --git a/ax/benchmark/methods/modular_botorch.py b/ax/benchmark/methods/modular_botorch.py index 7df32f7f2d1..a55aeb1c598 100644 --- a/ax/benchmark/methods/modular_botorch.py +++ b/ax/benchmark/methods/modular_botorch.py @@ -48,6 +48,7 @@ def get_sobol_botorch_modular_acquisition( name: Optional[str] = None, num_sobol_trials: int = 5, model_gen_kwargs: Optional[dict[str, Any]] = None, + best_point_kwargs: dict[str, bool] | None = None, ) -> BenchmarkMethod: """Get a `BenchmarkMethod` that uses Sobol followed by MBM. @@ -64,6 +65,7 @@ def get_sobol_botorch_modular_acquisition( `BatchTrial`s. model_gen_kwargs: Passed to the BoTorch `GenerationStep` and ultimately to the BoTorch `Model`. + best_point_kwargs: Passed to the created `BenchmarkMethod`. Example: >>> # A simple example @@ -138,4 +140,5 @@ def get_sobol_botorch_modular_acquisition( generation_strategy=generation_strategy, scheduler_options=scheduler_options or get_benchmark_scheduler_options(), distribute_replications=distribute_replications, + best_point_kwargs={} if best_point_kwargs is None else best_point_kwargs, ) diff --git a/ax/benchmark/tests/methods/test_methods.py b/ax/benchmark/tests/methods/test_methods.py index 740213f8af7..27ae23844f6 100644 --- a/ax/benchmark/tests/methods/test_methods.py +++ b/ax/benchmark/tests/methods/test_methods.py @@ -6,14 +6,24 @@ # pyre-strict +from itertools import product +from unittest.mock import patch + import numpy as np from ax.benchmark.benchmark import benchmark_replication from ax.benchmark.benchmark_method import get_benchmark_scheduler_options from ax.benchmark.methods.modular_botorch import get_sobol_botorch_modular_acquisition from ax.benchmark.methods.sobol import get_sobol_benchmark_method from ax.benchmark.problems.registry import get_problem +from ax.core.experiment import Experiment from ax.modelbridge.registry import Models +from ax.service.scheduler import Scheduler +from ax.service.utils.best_point import ( + get_best_by_raw_objective_with_trial_index, + get_best_parameters_from_model_predictions_with_trial_index, +) from ax.service.utils.scheduler_options import SchedulerOptions +from ax.utils.common.random import with_rng_seed from ax.utils.common.testutils import TestCase from ax.utils.testing.mock import fast_botorch_optimize from botorch.acquisition.acquisition import AcquisitionFunction @@ -124,3 +134,68 @@ def test_sobol(self) -> None: problem = get_problem(problem_name="ackley4", num_trials=3) result = benchmark_replication(problem=problem, method=method, seed=0) self.assertTrue(np.isfinite(result.score_trace).all()) + + def _test_get_best_parameters( + self, use_model_predictions: bool, as_batch: bool + ) -> None: + problem = get_problem( + problem_name="ackley4", num_trials=2, test_problem_kwargs={"noise_std": 1.0} + ) + + method = get_sobol_botorch_modular_acquisition( + model_cls=SingleTaskGP, + acquisition_cls=qLogExpectedImprovement, + distribute_replications=False, + best_point_kwargs={"use_model_predictions": use_model_predictions}, + num_sobol_trials=1, + ) + + experiment = Experiment( + name="test", + search_space=problem.search_space, + optimization_config=problem.optimization_config, + runner=problem.runner, + ) + + scheduler = Scheduler( + experiment=experiment, + generation_strategy=method.generation_strategy.clone_reset(), + options=method.scheduler_options, + ) + + with with_rng_seed(seed=0): + scheduler.run_n_trials(max_trials=problem.num_trials) + + # because the second trial is a BoTorch trial, the model should be used + best_point_mixin_path = "ax.service.utils.best_point_mixin.best_point_utils." + with patch( + best_point_mixin_path + + "get_best_parameters_from_model_predictions_with_trial_index", + wraps=get_best_parameters_from_model_predictions_with_trial_index, + ) as mock_get_best_parameters_from_predictions, patch( + best_point_mixin_path + "get_best_by_raw_objective_with_trial_index", + wraps=get_best_by_raw_objective_with_trial_index, + ) as mock_get_best_by_raw_objective_with_trial_index: + best_params = method.get_best_parameters( + experiment=experiment, + optimization_config=problem.optimization_config, + n_points=1, + ) + if use_model_predictions: + mock_get_best_parameters_from_predictions.assert_called_once() + # get_best_by_raw_objective_with_trial_index might be used as + # fallback + else: + mock_get_best_parameters_from_predictions.assert_not_called() + mock_get_best_by_raw_objective_with_trial_index.assert_called_once() + self.assertEqual(len(best_params), 1) + + def test_get_best_parameters(self) -> None: + for use_model_predictions, as_batch in product( + [False, True], + [False, True], + ): + with self.subTest(f"{use_model_predictions=}, {as_batch=}"): + self._test_get_best_parameters( + use_model_predictions=use_model_predictions, as_batch=as_batch + ) diff --git a/ax/benchmark/tests/test_benchmark.py b/ax/benchmark/tests/test_benchmark.py index 5e2447b9236..3c3294114f7 100644 --- a/ax/benchmark/tests/test_benchmark.py +++ b/ax/benchmark/tests/test_benchmark.py @@ -6,6 +6,7 @@ # pyre-strict import tempfile +from itertools import product from unittest.mock import patch import numpy as np @@ -119,6 +120,8 @@ def test_benchmark_result_invalid_inputs(self) -> None: BenchmarkResult( name="name", seed=0, + inference_trace=np.array([]), + oracle_trace=np.array([]), optimization_trace=np.array([]), score_trace=np.array([]), fit_time=0.0, @@ -133,6 +136,8 @@ def test_benchmark_result_invalid_inputs(self) -> None: BenchmarkResult( name="name", seed=0, + inference_trace=np.array([]), + oracle_trace=np.array([]), optimization_trace=np.array([]), score_trace=np.array([]), fit_time=0.0, @@ -220,6 +225,73 @@ def test_replication_sobol_surrogate(self) -> None: self.assertTrue(np.isfinite(res.score_trace).all()) self.assertTrue(np.all(res.score_trace <= 100)) + @fast_botorch_optimize + def _test_replication_with_inference_value( + self, + batch_size: int, + use_model_predictions: bool, + report_inference_value_as_trace: bool, + ) -> None: + seed = 1 + method = get_sobol_botorch_modular_acquisition( + model_cls=SingleTaskGP, + acquisition_cls=qLogNoisyExpectedImprovement, + distribute_replications=False, + best_point_kwargs={"use_model_predictions": use_model_predictions}, + num_sobol_trials=3, + ) + + test_problem_kwargs = {"noise_std": 100.0} + num_trials = 4 + problem = get_single_objective_benchmark_problem( + test_problem_kwargs=test_problem_kwargs, + num_trials=num_trials, + report_inference_value_as_trace=report_inference_value_as_trace, + ) + res = benchmark_replication(problem=problem, method=method, seed=seed) + # The inference trace could coincide with the oracle trace, but it won't + # happen in this example with high noise and a seed + self.assertEqual( + np.equal(res.inference_trace, res.optimization_trace).all(), + report_inference_value_as_trace, + ) + self.assertEqual( + np.equal(res.oracle_trace, res.optimization_trace).all(), + not report_inference_value_as_trace, + ) + + self.assertEqual(res.optimization_trace.shape, (problem.num_trials,)) + self.assertTrue((res.inference_trace >= res.oracle_trace).all()) + self.assertTrue((res.score_trace >= 0).all()) + self.assertTrue((res.score_trace <= 100).all()) + + def test_replication_with_inference_value(self) -> None: + for ( + use_model_predictions, + batch_size, + report_inference_value_as_trace, + ) in product( + [False, True], + [1, 2], + [False, True], + ): + with self.subTest( + batch_size=batch_size, + use_model_predictions=use_model_predictions, + report_inference_value_as_trace=report_inference_value_as_trace, + ): + self._test_replication_with_inference_value( + batch_size=batch_size, + use_model_predictions=use_model_predictions, + report_inference_value_as_trace=report_inference_value_as_trace, + ) + + with self.assertRaisesRegex( + NotImplementedError, + "Inference trace is not supported for MOO", + ): + get_multi_objective_benchmark_problem(report_inference_value_as_trace=True) + @fast_botorch_optimize def test_replication_mbm(self) -> None: with patch.dict( diff --git a/ax/benchmark/tests/test_benchmark_problem.py b/ax/benchmark/tests/test_benchmark_problem.py index 9e1218a8070..887b49fc9d0 100644 --- a/ax/benchmark/tests/test_benchmark_problem.py +++ b/ax/benchmark/tests/test_benchmark_problem.py @@ -14,11 +14,14 @@ from ax.benchmark.benchmark_metric import BenchmarkMetric -from ax.benchmark.benchmark_problem import create_problem_from_botorch +from ax.benchmark.benchmark_problem import BenchmarkProblem, create_problem_from_botorch from ax.benchmark.runners.botorch_test import BotorchTestProblemRunner from ax.core.arm import Arm -from ax.core.objective import MultiObjective -from ax.core.optimization_config import MultiObjectiveOptimizationConfig +from ax.core.objective import MultiObjective, Objective +from ax.core.optimization_config import ( + MultiObjectiveOptimizationConfig, + OptimizationConfig, +) from ax.core.parameter import ChoiceParameter, ParameterType, RangeParameter from ax.core.search_space import SearchSpace from ax.core.types import ComparisonOp @@ -44,6 +47,46 @@ def setUp(self) -> None: self.maxDiff = None super().setUp() + def test_inference_value_not_implemented(self) -> None: + objectives = [ + Objective(metric=BenchmarkMetric(name, lower_is_better=True)) + for name in ["Branin", "Currin"] + ] + optimization_config = OptimizationConfig(objective=objectives[0]) + runner = BotorchTestProblemRunner( + test_problem_class=Branin, + outcome_names=["foo"], + test_problem_kwargs={}, + ) + with self.assertRaisesRegex(NotImplementedError, "Only `n_best_points=1`"): + BenchmarkProblem( + name="foo", + optimization_config=optimization_config, + num_trials=1, + observe_noise_stds=False, + optimal_value=0.0, + search_space=SearchSpace(parameters=[]), + runner=runner, + n_best_points=2, + ) + + with self.assertRaisesRegex( + NotImplementedError, "Inference trace is not supported for MOO" + ): + BenchmarkProblem( + name="foo", + optimization_config=MultiObjectiveOptimizationConfig( + objective=MultiObjective(objectives) + ), + num_trials=1, + observe_noise_stds=False, + optimal_value=0.0, + search_space=SearchSpace(parameters=[]), + runner=runner, + n_best_points=1, + report_inference_value_as_trace=True, + ) + def _test_multi_fidelity_or_multi_task(self, fidelity_or_task: str) -> None: """ Args: diff --git a/ax/service/scheduler.py b/ax/service/scheduler.py index 47aa7f3f624..9494f825861 100644 --- a/ax/service/scheduler.py +++ b/ax/service/scheduler.py @@ -850,7 +850,7 @@ def run_trials_and_yield_results( self, max_trials: int, ignore_global_stopping_strategy: bool = False, - timeout_hours: Optional[int] = None, + timeout_hours: int | float | None = None, idle_callback: Optional[Callable[[Scheduler], None]] = None, ) -> Generator[dict[str, Any], None, None]: """Make continuous calls to `run` and `process_results` to run up to diff --git a/ax/utils/testing/benchmark_stubs.py b/ax/utils/testing/benchmark_stubs.py index a87e1f9e171..7f3bfd7b524 100644 --- a/ax/utils/testing/benchmark_stubs.py +++ b/ax/utils/testing/benchmark_stubs.py @@ -46,12 +46,14 @@ def get_single_objective_benchmark_problem( observe_noise_sd: bool = False, num_trials: int = 4, test_problem_kwargs: Optional[dict[str, Any]] = None, + report_inference_value_as_trace: bool = False, ) -> BenchmarkProblem: return create_problem_from_botorch( test_problem_class=Branin, test_problem_kwargs=test_problem_kwargs or {}, num_trials=num_trials, observe_noise_sd=observe_noise_sd, + report_inference_value_as_trace=report_inference_value_as_trace, ) @@ -59,12 +61,14 @@ def get_multi_objective_benchmark_problem( observe_noise_sd: bool = False, num_trials: int = 4, test_problem_class: type[BraninCurrin] = BraninCurrin, + report_inference_value_as_trace: bool = False, ) -> BenchmarkProblem: return create_problem_from_botorch( test_problem_class=test_problem_class, test_problem_kwargs={}, num_trials=num_trials, observe_noise_sd=observe_noise_sd, + report_inference_value_as_trace=report_inference_value_as_trace, ) @@ -206,6 +210,8 @@ def get_benchmark_result() -> BenchmarkResult: runner=problem.runner, is_test=True, ), + inference_trace=np.ones(4), + oracle_trace=np.zeros(4), optimization_trace=np.array([3, 2, 1, 0.1]), score_trace=np.array([3, 2, 1, 0.1]), fit_time=0.1,