diff --git a/CHANGELOG.md b/CHANGELOG.md index f9f5a0b2d..8540b6206 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -25,6 +25,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Bandit optimization example - `qThompsonSampling` acquisition function - `BetaPrior` class +- `recommend` now accepts the `pending_experiments` argument, informing the algorithm + about points that were already selected for evaluation +- Pure recommenders now have the `allow_recommending_pending_experiments` flag, + controlling whether pending experiments are excluded from candidates in purely + discrete search spaces ### Changed - The transition from experimental to computational representation no longer happens diff --git a/README.md b/README.md index 5f8793fa7..723f0b0db 100644 --- a/README.md +++ b/README.md @@ -33,10 +33,12 @@ Besides functionality to perform a typical recommend-measure loop, BayBE's highl - โœจ Custom parameter encodings: Improve your campaign with domain knowledge - ๐Ÿงช Built-in chemical encodings: Improve your campaign with chemical knowledge - ๐ŸŽฏ Single and multiple targets with min, max and match objectives -- โš™๏ธ Custom surrogate models: For specialized problems or active learning - ๐ŸŽญ Hybrid (mixed continuous and discrete) spaces - ๐Ÿš€ Transfer learning: Mix data from multiple campaigns and accelerate optimization - ๐ŸŽฐ Bandit models: Efficiently find the best among many options in noisy environments (e.g. A/B Testing) +- ๐ŸŒŽ Distributed workflows: Run campaigns asynchronously with pending experiments +- ๐ŸŽ“ Active learning: Perform smart data acquisition campaigns +- โš™๏ธ Custom surrogate models: Enhance your predictions through mechanistic understanding - ๐Ÿ“ˆ Comprehensive backtest, simulation and imputation utilities: Benchmark and find your best settings - ๐Ÿ“ Fully typed and hypothesis-tested: Robust code base - ๐Ÿ”„ All objects are fully de-/serializable: Useful for storing results in databases or use in wrappers like APIs diff --git a/baybe/acquisition/base.py b/baybe/acquisition/base.py index 3aaa5c0b3..9e5aeef5a 100644 --- a/baybe/acquisition/base.py +++ b/baybe/acquisition/base.py @@ -10,7 +10,10 @@ import pandas as pd from attrs import define -from baybe.exceptions import UnidentifiedSubclassError +from baybe.exceptions import ( + IncompatibleAcquisitionFunctionError, + UnidentifiedSubclassError, +) from baybe.objectives.base import Objective from baybe.objectives.desirability import DesirabilityObjective from baybe.objectives.single import SingleTargetObjective @@ -55,6 +58,7 @@ def to_botorch( searchspace: SearchSpace, objective: Objective, measurements: pd.DataFrame, + pending_experiments: pd.DataFrame | None = None, ): """Create the botorch-ready representation of the function. @@ -89,6 +93,15 @@ def to_botorch( additional_params["mc_points"] = to_tensor( self.get_integration_points(searchspace) # type: ignore[attr-defined] ) + if pending_experiments is not None: + if self.is_mc: + pending_x = searchspace.transform(pending_experiments, allow_extra=True) + additional_params["X_pending"] = to_tensor(pending_x) + else: + raise IncompatibleAcquisitionFunctionError( + f"Pending experiments were provided but the chosen acquisition " + f"function '{self.__class__.__name__}' does not support this." + ) # Add acquisition objective / best observed value match objective: diff --git a/baybe/campaign.py b/baybe/campaign.py index 233d62312..00d08a1d0 100644 --- a/baybe/campaign.py +++ b/baybe/campaign.py @@ -214,11 +214,16 @@ def add_measurements( def recommend( self, batch_size: int, + pending_experiments: pd.DataFrame | None = None, + batch_quantity: int = None, # type: ignore[assignment] ) -> pd.DataFrame: """Provide the recommendations for the next batch of experiments. Args: batch_size: Number of requested recommendations. + pending_experiments: Parameter configurations specifying experiments + that are currently pending. + batch_quantity: Deprecated! Use ``batch_size`` instead. Returns: Dataframe containing the recommendations in experimental representation. @@ -232,6 +237,10 @@ def recommend( f"{batch_size=}." ) + # Invalidate cached recommendation if pending experiments are provided + if (pending_experiments is not None) and (len(pending_experiments) > 0): + self._cached_recommendation = pd.DataFrame() + # If there are cached recommendations and the batch size of those is equal to # the previously requested one, we just return those if len(self._cached_recommendation) == batch_size: @@ -248,6 +257,7 @@ def recommend( self.searchspace, self.objective, self._measurements_exp, + pending_experiments, ) # Cache the recommendations diff --git a/baybe/exceptions.py b/baybe/exceptions.py index c9196f6cb..661f61a97 100644 --- a/baybe/exceptions.py +++ b/baybe/exceptions.py @@ -23,6 +23,10 @@ class IncompatibleSearchSpaceError(IncompatibilityError): """ +class IncompatibleAcquisitionFunctionError(IncompatibilityError): + """An incompatible acquisition function was selected.""" + + class NotEnoughPointsLeftError(Exception): """ More recommendations are requested than there are viable parameter configurations diff --git a/baybe/recommenders/base.py b/baybe/recommenders/base.py index cf43a72df..d4de83b85 100644 --- a/baybe/recommenders/base.py +++ b/baybe/recommenders/base.py @@ -19,8 +19,9 @@ def recommend( self, batch_size: int, searchspace: SearchSpace, - objective: Objective | None, - measurements: pd.DataFrame | None, + objective: Objective | None = None, + measurements: pd.DataFrame | None = None, + pending_experiments: pd.DataFrame | None = None, ) -> pd.DataFrame: """Recommend a batch of points from the given search space. @@ -36,6 +37,8 @@ def recommend( Each row corresponds to one conducted experiment, where the parameter columns define the experimental setting and the target columns report the measured outcomes. + pending_experiments: Parameter configurations in "experimental + representation" specifying experiments that are currently pending. Returns: A dataframe containing the recommendations in experimental representation diff --git a/baybe/recommenders/meta/base.py b/baybe/recommenders/meta/base.py index 7460a9b69..aff74bbc1 100644 --- a/baybe/recommenders/meta/base.py +++ b/baybe/recommenders/meta/base.py @@ -27,6 +27,7 @@ def select_recommender( searchspace: SearchSpace, objective: Objective | None = None, measurements: pd.DataFrame | None = None, + pending_experiments: pd.DataFrame | None = None, ) -> PureRecommender: """Select a pure recommender for the given experimentation context. @@ -39,6 +40,8 @@ def select_recommender( See :func:`baybe.recommenders.meta.base.MetaRecommender.recommend`. measurements: See :func:`baybe.recommenders.meta.base.MetaRecommender.recommend`. + pending_experiments: + See :func:`baybe.recommenders.meta.base.MetaRecommender.recommend`. Returns: The selected recommender. @@ -50,6 +53,7 @@ def recommend( searchspace: SearchSpace, objective: Objective | None = None, measurements: pd.DataFrame | None = None, + pending_experiments: pd.DataFrame | None = None, ) -> pd.DataFrame: """See :func:`baybe.recommenders.base.RecommenderProtocol.recommend`.""" recommender = self.select_recommender( @@ -57,6 +61,7 @@ def recommend( searchspace=searchspace, objective=objective, measurements=measurements, + pending_experiments=pending_experiments, ) # Non-predictive recommenders should not be called with an objective or @@ -72,7 +77,10 @@ def recommend( ) return recommender.recommend( - batch_size=batch_size, searchspace=searchspace, **optional_args + batch_size=batch_size, + searchspace=searchspace, + pending_experiments=pending_experiments, + **optional_args, ) diff --git a/baybe/recommenders/meta/sequential.py b/baybe/recommenders/meta/sequential.py index 6c38596ff..c4e140042 100644 --- a/baybe/recommenders/meta/sequential.py +++ b/baybe/recommenders/meta/sequential.py @@ -56,6 +56,7 @@ def select_recommender( # noqa: D102 searchspace: SearchSpace | None = None, objective: Objective | None = None, measurements: pd.DataFrame | None = None, + pending_experiments: pd.DataFrame | None = None, ) -> PureRecommender: # See base class. @@ -135,6 +136,7 @@ def select_recommender( # noqa: D102 searchspace: SearchSpace | None = None, objective: Objective | None = None, measurements: pd.DataFrame | None = None, + pending_experiments: pd.DataFrame | None = None, ) -> PureRecommender: # See base class. @@ -224,6 +226,7 @@ def select_recommender( # noqa: D102 searchspace: SearchSpace | None = None, objective: Objective | None = None, measurements: pd.DataFrame | None = None, + pending_experiments: pd.DataFrame | None = None, ) -> PureRecommender: # See base class. diff --git a/baybe/recommenders/naive.py b/baybe/recommenders/naive.py index f7191fe20..c2817815a 100644 --- a/baybe/recommenders/naive.py +++ b/baybe/recommenders/naive.py @@ -82,6 +82,7 @@ def recommend( # noqa: D102 searchspace: SearchSpace, objective: Objective | None = None, measurements: pd.DataFrame | None = None, + pending_experiments: pd.DataFrame | None = None, ) -> pd.DataFrame: # See base class. @@ -108,6 +109,7 @@ def recommend( # noqa: D102 searchspace=searchspace, objective=objective, measurements=measurements, + pending_experiments=pending_experiments, ) # We are in a hybrid setting now @@ -121,7 +123,6 @@ def recommend( # noqa: D102 # Get discrete candidates. The metadata flags are ignored since the search space # is hybrid - # TODO Slight BOILERPLATE CODE, see recommender.py, ll. 47+ candidates_exp, _ = searchspace.discrete.get_candidates( allow_repeated_recommendations=True, allow_recommending_already_measured=True, @@ -131,7 +132,7 @@ def recommend( # noqa: D102 if isinstance(self.disc_recommender, BayesianRecommender): # Get access to the recommenders acquisition function self.disc_recommender._setup_botorch_acqf( - searchspace, objective, measurements + searchspace, objective, measurements, pending_experiments ) # Construct the partial acquisition function that attaches cont_part @@ -157,7 +158,9 @@ def recommend( # noqa: D102 disc_part_tensor = to_tensor(disc_part).unsqueeze(-2) # Setup a fresh acquisition function for the continuous recommender - self.cont_recommender._setup_botorch_acqf(searchspace, objective, measurements) + self.cont_recommender._setup_botorch_acqf( + searchspace, objective, measurements, pending_experiments + ) # Construct the continuous space as a standalone space cont_acqf_part = PartialAcquisitionFunction( diff --git a/baybe/recommenders/pure/base.py b/baybe/recommenders/pure/base.py index 8984d1161..10ec66fd3 100644 --- a/baybe/recommenders/pure/base.py +++ b/baybe/recommenders/pure/base.py @@ -32,12 +32,18 @@ class PureRecommender(ABC, RecommenderProtocol): """Allow to make recommendations that were measured previously. This only has an influence in discrete search spaces.""" + allow_recommending_pending_experiments: bool = field(default=False, kw_only=True) + """Allow `pending_experiments` to be part of the recommendations. If set to `False`, + the corresponding points will be removed from the candidates. This only has an + influence in discrete search spaces.""" + def recommend( # noqa: D102 self, batch_size: int, searchspace: SearchSpace, objective: Objective | None = None, measurements: pd.DataFrame | None = None, + pending_experiments: pd.DataFrame | None = None, ) -> pd.DataFrame: # See base class if searchspace.type is SearchSpaceType.CONTINUOUS: @@ -45,7 +51,11 @@ def recommend( # noqa: D102 subspace_continuous=searchspace.continuous, batch_size=batch_size ) else: - return self._recommend_with_discrete_parts(searchspace, batch_size) + return self._recommend_with_discrete_parts( + searchspace, + batch_size, + pending_experiments=pending_experiments, + ) def _recommend_discrete( self, @@ -154,6 +164,7 @@ def _recommend_with_discrete_parts( self, searchspace: SearchSpace, batch_size: int, + pending_experiments: pd.DataFrame | None, ) -> pd.DataFrame: """Obtain recommendations in search spaces with a discrete part. @@ -163,6 +174,7 @@ def _recommend_with_discrete_parts( Args: searchspace: The search space from which to generate recommendations. batch_size: The size of the recommendation batch. + pending_experiments: Pending experiments in experimental representation. Returns: A dataframe containing the recommendations as individual rows. @@ -175,11 +187,17 @@ def _recommend_with_discrete_parts( # Get discrete candidates # Repeated recommendations are always allowed for hybrid spaces + # Pending experiments are excluded for discrete spaces unless configured + # differently. + dont_exclude_pending = ( + is_hybrid_space or self.allow_recommending_pending_experiments + ) candidates_exp, _ = searchspace.discrete.get_candidates( allow_repeated_recommendations=is_hybrid_space or self.allow_repeated_recommendations, allow_recommending_already_measured=is_hybrid_space or self.allow_recommending_already_measured, + exclude=None if dont_exclude_pending else pending_experiments, ) # TODO: Introduce new flag to recommend batches larger than the search space diff --git a/baybe/recommenders/pure/bayesian/base.py b/baybe/recommenders/pure/bayesian/base.py index 98ff1ff27..48d4c2d63 100644 --- a/baybe/recommenders/pure/bayesian/base.py +++ b/baybe/recommenders/pure/bayesian/base.py @@ -48,11 +48,16 @@ def _setup_botorch_acqf( searchspace: SearchSpace, objective: Objective, measurements: pd.DataFrame, + pending_experiments: pd.DataFrame | None = None, ) -> None: """Create the acquisition function for the current training data.""" # noqa: E501 self.surrogate_model.fit(searchspace, objective, measurements) self._botorch_acqf = self.acquisition_function.to_botorch( - self.surrogate_model, searchspace, objective, measurements + self.surrogate_model, + searchspace, + objective, + measurements, + pending_experiments, ) def recommend( # noqa: D102 @@ -61,6 +66,7 @@ def recommend( # noqa: D102 searchspace: SearchSpace, objective: Objective | None = None, measurements: pd.DataFrame | None = None, + pending_experiments: pd.DataFrame | None = None, ) -> pd.DataFrame: # See base class. @@ -89,11 +95,14 @@ def recommend( # noqa: D102 if isinstance(self.surrogate_model, CustomONNXSurrogate): CustomONNXSurrogate.validate_compatibility(searchspace) - self._setup_botorch_acqf(searchspace, objective, measurements) + self._setup_botorch_acqf( + searchspace, objective, measurements, pending_experiments + ) return super().recommend( batch_size=batch_size, searchspace=searchspace, objective=objective, measurements=measurements, + pending_experiments=pending_experiments, ) diff --git a/baybe/recommenders/pure/bayesian/botorch.py b/baybe/recommenders/pure/bayesian/botorch.py index 2ec317dce..460f3af6d 100644 --- a/baybe/recommenders/pure/bayesian/botorch.py +++ b/baybe/recommenders/pure/bayesian/botorch.py @@ -8,7 +8,10 @@ from attrs import define, field from baybe.acquisition.acqfs import qThompsonSampling -from baybe.exceptions import IncompatibilityError, NoMCAcquisitionFunctionError +from baybe.exceptions import ( + IncompatibilityError, + IncompatibleAcquisitionFunctionError, +) from baybe.recommenders.pure.bayesian.base import BayesianRecommender from baybe.searchspace import ( SearchSpace, @@ -90,15 +93,15 @@ def _recommend_discrete( batch_size: The size of the recommendation batch. Raises: - NoMCAcquisitionFunctionError: If a non-Monte Carlo acquisition function is - used with a batch size > 1. + IncompatibleAcquisitionFunctionError: If a non-Monte Carlo acquisition + function is used with a batch size > 1. Returns: The dataframe indices of the recommended points in the provided experimental representation. """ if batch_size > 1 and not self.acquisition_function.is_mc: - raise NoMCAcquisitionFunctionError( + raise IncompatibleAcquisitionFunctionError( f"The '{self.__class__.__name__}' only works with Monte Carlo " f"acquisition functions for batch sizes > 1." ) @@ -144,15 +147,15 @@ def _recommend_continuous( batch_size: The size of the recommendation batch. Raises: - NoMCAcquisitionFunctionError: If a non-Monte Carlo acquisition function is - used with a batch size > 1. + IncompatibleAcquisitionFunctionError: If a non-Monte Carlo acquisition + function is used with a batch size > 1. Returns: A dataframe containing the recommendations as individual rows. """ # For batch size > 1, this optimizer needs a MC acquisition function if batch_size > 1 and not self.acquisition_function.is_mc: - raise NoMCAcquisitionFunctionError( + raise IncompatibleAcquisitionFunctionError( f"The '{self.__class__.__name__}' only works with Monte Carlo " f"acquisition functions for batch sizes > 1." ) @@ -180,7 +183,7 @@ def _recommend_continuous( ) # Return optimized points as dataframe - rec = pd.DataFrame(points, columns=subspace_continuous.param_names) + rec = pd.DataFrame(points, columns=subspace_continuous.parameter_names) return rec def _recommend_hybrid( @@ -205,15 +208,15 @@ def _recommend_hybrid( batch_size: The size of the calculated batch. Raises: - NoMCAcquisitionFunctionError: If a non-Monte Carlo acquisition function is - used with a batch size > 1. + IncompatibleAcquisitionFunctionError: If a non-Monte Carlo acquisition + function is used with a batch size > 1. Returns: The recommended points. """ # For batch size > 1, this optimizer needs a MC acquisition function if batch_size > 1 and not self.acquisition_function.is_mc: - raise NoMCAcquisitionFunctionError( + raise IncompatibleAcquisitionFunctionError( f"The '{self.__class__.__name__}' only works with Monte Carlo " f"acquisition functions for batch sizes > 1." ) @@ -288,7 +291,7 @@ def _recommend_hybrid( # Get experimental representation of discrete and continuous parts rec_disc_exp = searchspace.discrete.exp_rep.loc[idxs] rec_cont_exp = pd.DataFrame( - cont_points, columns=searchspace.continuous.param_names + cont_points, columns=searchspace.continuous.parameter_names ) # Adjust the index of the continuous part and create overall recommendations diff --git a/baybe/recommenders/pure/nonpredictive/base.py b/baybe/recommenders/pure/nonpredictive/base.py index c3c4e9b87..74707fcf3 100644 --- a/baybe/recommenders/pure/nonpredictive/base.py +++ b/baybe/recommenders/pure/nonpredictive/base.py @@ -4,12 +4,13 @@ from abc import ABC import pandas as pd +from attr import fields from attrs import define from baybe.exceptions import UnusedObjectWarning from baybe.objectives.base import Objective from baybe.recommenders.pure.base import PureRecommender -from baybe.searchspace.core import SearchSpace +from baybe.searchspace.core import SearchSpace, SearchSpaceType @define @@ -22,6 +23,7 @@ def recommend( # noqa: D102 searchspace: SearchSpace, objective: Objective | None = None, measurements: pd.DataFrame | None = None, + pending_experiments: pd.DataFrame | None = None, ) -> pd.DataFrame: # See base class. @@ -39,9 +41,22 @@ def recommend( # noqa: D102 f"consider any objectives, meaning that the argument is ignored.", UnusedObjectWarning, ) + if (pending_experiments is not None) and ( + self.allow_recommending_pending_experiments + or searchspace.type is not SearchSpaceType.DISCRETE + ): + warnings.warn( + f"Pending experiments were provided but the selected recommender " + f"'{self.__class__.__name__}' only utilizes this information for " + f"purely discrete spaces and " + f"{fields(self.__class__).allow_recommending_pending_experiments.name}" + f"=False.", + UnusedObjectWarning, + ) return super().recommend( batch_size=batch_size, searchspace=searchspace, objective=objective, measurements=measurements, + pending_experiments=pending_experiments, ) diff --git a/baybe/recommenders/pure/nonpredictive/clustering.py b/baybe/recommenders/pure/nonpredictive/clustering.py index 70d228496..8a273a98b 100644 --- a/baybe/recommenders/pure/nonpredictive/clustering.py +++ b/baybe/recommenders/pure/nonpredictive/clustering.py @@ -107,6 +107,7 @@ def _recommend_discrete( scaler = StandardScaler() scaler.fit(subspace_discrete.comp_rep) + # Scale candidates candidates_comp = subspace_discrete.transform(candidates_exp) candidates_scaled = np.ascontiguousarray(scaler.transform(candidates_comp)) diff --git a/baybe/recommenders/pure/nonpredictive/sampling.py b/baybe/recommenders/pure/nonpredictive/sampling.py index 3811e65f1..bb7b4a8e4 100644 --- a/baybe/recommenders/pure/nonpredictive/sampling.py +++ b/baybe/recommenders/pure/nonpredictive/sampling.py @@ -71,6 +71,7 @@ def _recommend_discrete( scaler = StandardScaler() scaler.fit(subspace_discrete.comp_rep) + # Scale and sample candidates_comp = subspace_discrete.transform(candidates_exp) candidates_scaled = np.ascontiguousarray(scaler.transform(candidates_comp)) ilocs = farthest_point_sampling(candidates_scaled, batch_size) diff --git a/baybe/searchspace/continuous.py b/baybe/searchspace/continuous.py index 5e4438397..d61a4b981 100644 --- a/baybe/searchspace/continuous.py +++ b/baybe/searchspace/continuous.py @@ -238,8 +238,8 @@ def is_empty(self) -> bool: return len(self.parameters) == 0 @property - def param_names(self) -> tuple[str, ...]: - """Return list of parameter names.""" + def parameter_names(self) -> tuple[str, ...]: + """Return tuple of parameter names.""" return tuple(p.name for p in self.parameters) @property @@ -375,7 +375,7 @@ def _sample_from_bounds(self, batch_size: int, bounds: np.ndarray) -> pd.DataFra low=bounds[0, :], high=bounds[1, :], size=(batch_size, len(self.parameters)) ) - return pd.DataFrame(points, columns=self.param_names) + return pd.DataFrame(points, columns=self.parameter_names) def _sample_from_polytope( self, batch_size: int, bounds: np.ndarray @@ -394,7 +394,7 @@ def _sample_from_polytope( c.to_botorch(self.parameters) for c in self.constraints_lin_ineq ], ) - return pd.DataFrame(points, columns=self.param_names) + return pd.DataFrame(points, columns=self.parameter_names) def _sample_from_polytope_with_cardinality_constraints( self, batch_size: int @@ -491,7 +491,7 @@ def sample_from_full_factorial(self, batch_size: int = 1) -> pd.DataFrame: def full_factorial(self) -> pd.DataFrame: """Get the full factorial of the continuous space.""" index = pd.MultiIndex.from_product( - self.comp_rep_bounds.values.T.tolist(), names=self.param_names + self.comp_rep_bounds.values.T.tolist(), names=self.parameter_names ) return pd.DataFrame(index=index).reset_index() diff --git a/baybe/searchspace/core.py b/baybe/searchspace/core.py index 55c0b6f4c..b43c4861f 100644 --- a/baybe/searchspace/core.py +++ b/baybe/searchspace/core.py @@ -248,6 +248,11 @@ def comp_rep_bounds(self) -> pd.DataFrame: [self.discrete.comp_rep_bounds, self.continuous.comp_rep_bounds], axis=1 ) + @property + def parameter_names(self) -> tuple[str, ...]: + """Return tuple of parameter names.""" + return self.discrete.parameter_names + self.continuous.parameter_names + @property def task_idx(self) -> int | None: """The column index of the task parameter in computational representation.""" diff --git a/baybe/searchspace/discrete.py b/baybe/searchspace/discrete.py index 27bf049a5..8e24da1f8 100644 --- a/baybe/searchspace/discrete.py +++ b/baybe/searchspace/discrete.py @@ -588,6 +588,11 @@ def is_empty(self) -> bool: """Return whether this subspace is empty.""" return len(self.parameters) == 0 + @property + def parameter_names(self) -> tuple[str, ...]: + """Return tuple of parameter names.""" + return tuple(p.name for p in self.parameters) + @property def comp_rep_columns(self) -> tuple[str, ...]: """The columns spanning the computational representation.""" @@ -669,6 +674,7 @@ def get_candidates( self, allow_repeated_recommendations: bool = False, allow_recommending_already_measured: bool = False, + exclude: pd.DataFrame | None = None, ) -> tuple[pd.DataFrame, pd.DataFrame]: """Return the set of candidate parameter settings that can be tested. @@ -681,6 +687,8 @@ def get_candidates( allow_recommending_already_measured: If ``True``, parameters settings for which there are already target values available are still considered as valid candidates. + exclude: Points in experimental representation that should be excluded as + candidates. Returns: The candidate parameter settings both in experimental and computational @@ -693,6 +701,12 @@ def get_candidates( if not allow_recommending_already_measured: mask_todrop |= self.metadata["was_measured"] + # Remove additional excludes + if exclude is not None: + mask_todrop |= pd.merge(self.exp_rep, exclude, indicator=True, how="left")[ + "_merge" + ].eq("both") + return self.exp_rep.loc[~mask_todrop], self.comp_rep.loc[~mask_todrop] def transform( diff --git a/docs/userguide/async.md b/docs/userguide/async.md new file mode 100644 index 000000000..177d4d772 --- /dev/null +++ b/docs/userguide/async.md @@ -0,0 +1,85 @@ +# Asynchronous Workflows + +Asynchronous workflows describe situations where the loop between measurement and +recommendation is more complex and needs to incorporate various other aspects. These +could for instance be: +- **Distributed workflows**: When recommendations are distributed across several + operators, e.g. at different locations or in several reactors, some experiments might + have been started, but are not ready when the next batch of recommendations is requested. + Without further consideration, the algorithm would be likely to recommend the pending + experiments again (since they were and still are considered most promising), as it is + unaware they were already started. +- **Partial targets**: When dealing with multiple targets that require very different + amounts of time to measure, the targets of previously recommended points might only be + partially available when requesting the next batch of recommendations. Still, these + partial experiments should ideally be considered when generating the recommendations. + +With *pending experiments* we mean experiments whose measurement process has +been started, but not yet completed by time of triggering the next set of +recommendations โ€“ this is typically the case when at least one of the configured +targets has not yet been measured. + +There are two levels of dealing with such situations: +1) **Marking experiments as pending**: If an experiment is not completed (meaning at least one target is not yet measured), its + data cannot be added as a regular measurement. However, it can be marked as pending via + `pending_experiments` in `recommend`. +1) **Adding partial results**: If an experiment is partially completed (meaning at least one target has been + measured), we can already update the model with the available information + by adding a *partial* measurement. + +(PENDING_EXPERIMENTS)= +## Marking Experiments as Pending + +To avoid repeated recommendations in the above scenario, BayBE provides the +`pending_experiments` keyword. It is available wherever recommendations can be +requested, i.e. [`Campaign.recommend`](baybe.campaign.Campaign.recommend) or +[`RecommenderProtocol.recommend`](baybe.recommenders.base.RecommenderProtocol.recommend). + +```{admonition} Supported Acquisition Functions +:class: important +`pending_experiments` is only supported by Monte Carlo (MC) acquisition functions, i.e. the +ones that start with a `q` in their name. Attempting to use a non-MC acquisition +function with `pending_experiments` will result in an +[`IncompatibleAcquisitionFunctionError`](baybe.exceptions.IncompatibleAcquisitionFunctionError). +``` + +```{admonition} Supported Recommenders +:class: important +For technical reasons, not every recommender is able to utilize `pending_experiments` in +the same way. For instance, +[`BotorchRecommender`](baybe.recommenders.pure.bayesian.botorch.BotorchRecommender) +takes all pending experiments into account, even if they do not match exactly with any +point in the search space. +[Non-predictive recommenders](baybe.recommenders.pure.nonpredictive.base.NonPredictiveRecommender) like +[`SKLearnClusteringRecommender`](baybe.recommenders.pure.nonpredictive.clustering.SKLearnClusteringRecommender)s, +[`RandomRecommender`](baybe.recommenders.pure.nonpredictive.sampling.RandomRecommender) or +[`FPSRecommender`](baybe.recommenders.pure.nonpredictive.sampling.FPSRecommender) +only take pending points into consideration if the recommender flag +[allow_recommending_pending_experiments](baybe.recommenders.pure.nonpredictive.base.NonPredictiveRecommender.allow_recommending_pending_experiments) +is set to `False`. In that case, the candidate space is stripped of pending experiments +that are exact matches with the search space, i.e. they will not even be considered. +``` + +Akin to `measurements` or `recommendations`, `pending_experiments` is a dataframe in +[experimental representation](DATA_REPRESENTATION). +In the following example, we get a set of recommendations, add results for half of them, +and start the next recommendation, marking the other half pending: +```python +# Get a set of 10 recommendation +rec = campaign.recommend(batch_size=10) + +# Split recommendations into two parts +rec_finished = rec.iloc[:5] +rec_pending = rec.iloc[5:] + +# Add target measurements to the finished part. Here we add a random number +rec_finished["Target_max"] = 1337 +campaign.add_measurements(rec_finished) + +# Get the next set of recommendations, incorporating the still unfinished experiments. +# These will not include the experiments marked as pending again. +rec_next = campaign.recommend(10, pending_experiments=rec_pending) +``` + +## Adding Partial Results +This functionality is under development as part of multi-target models. \ No newline at end of file diff --git a/docs/userguide/recommenders.md b/docs/userguide/recommenders.md index 11d77277f..e2d6b9c8e 100644 --- a/docs/userguide/recommenders.md +++ b/docs/userguide/recommenders.md @@ -16,11 +16,16 @@ compatibility is indicated via the corresponding `compatibility` class variable. ```{admonition} Additional Options for Discrete Search Spaces :class: note -For discrete search spaces, BayBE provides additional control over pure recommenders. -You can explicitly define whether a recommender is allowed to recommend previous -recommendations again via `allow_repeated_recommendations` and whether it can output -recommendations that have already been measured via -`allow_recommending_already_measured`. +For discrete search spaces, BayBE provides additional controls for pure recommenders: +- `allow_repeated_recommendations`: Controls whether a recommender is allowed to + recommend previous recommendations again. +- `allow_recommending_already_measured`: Controls whether a recommender is allowed to + recommend points that have already been measured. This only considers exact matches + to the search space. +- `allow_recommending_pending_experiments`: Controls whether a recommender is allowed + to recommend points that have been marked as `pending_experiments` + (see [asynchronous workflows](PENDING_EXPERIMENTS)). This only considers exact matches to the + search space. ``` ### Bayesian Recommenders diff --git a/docs/userguide/searchspace.md b/docs/userguide/searchspace.md index 89ecfdfd9..e4006834e 100644 --- a/docs/userguide/searchspace.md +++ b/docs/userguide/searchspace.md @@ -110,6 +110,7 @@ subspace = SubspaceDiscrete.from_simplex(max_sum=1.0, simplex_parameters=paramet Note that it is also possible to provide additional parameters that then enter in the form of a Cartesian product. These can be provided via the keyword `product_parameters`. +(DATA_REPRESENTATION)= ### Representation of Data within Discrete Subspaces Internally, discrete subspaces are represented by two dataframes, the *experimental* and the *computational* representation. diff --git a/docs/userguide/userguide.md b/docs/userguide/userguide.md index eb978b25e..07e275be5 100644 --- a/docs/userguide/userguide.md +++ b/docs/userguide/userguide.md @@ -1,8 +1,9 @@ -# User guide +# User Guide ```{toctree} Campaigns Active Learning +Asynchronous Workflows Constraints Environment Vars Objectives diff --git a/tests/conftest.py b/tests/conftest.py index 3b5a06aa3..f4a343467 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -637,14 +637,22 @@ def allow_recommending_already_measured(): return True +@pytest.fixture(name="allow_recommending_pending_experiments") +def fixture_allow_recommending_pending_experiments(): + return False + + @pytest.fixture(name="initial_recommender") def fixture_initial_recommender( - allow_recommending_already_measured, allow_repeated_recommendations + allow_recommending_already_measured, + allow_repeated_recommendations, + allow_recommending_pending_experiments, ): """The default initial recommender to be used if not specified differently.""" return RandomRecommender( allow_repeated_recommendations=allow_repeated_recommendations, allow_recommending_already_measured=allow_recommending_already_measured, + allow_recommending_pending_experiments=allow_recommending_pending_experiments, ) @@ -655,6 +663,7 @@ def fixture_recommender( acqf, allow_repeated_recommendations, allow_recommending_already_measured, + allow_recommending_pending_experiments, ): """The default recommender to be used if not specified differently.""" return TwoPhaseMetaRecommender( @@ -664,6 +673,7 @@ def fixture_recommender( acquisition_function=acqf, allow_repeated_recommendations=allow_repeated_recommendations, allow_recommending_already_measured=allow_recommending_already_measured, + allow_recommending_pending_experiments=allow_recommending_pending_experiments, ), ) diff --git a/tests/docs/test_docs.py b/tests/docs/test_docs.py index fc51225c4..e7286d118 100644 --- a/tests/docs/test_docs.py +++ b/tests/docs/test_docs.py @@ -22,10 +22,11 @@ not CHEM_INSTALLED, reason="Optional chem dependency not installed." ) @pytest.mark.parametrize("file", doc_files, ids=doc_files) -def test_code_executability(file: Path): +def test_code_executability(file: Path, campaign): """The code blocks in the file become a valid python script when concatenated. - Blocks surrounded with "triple-tilde" are ignored. + Blocks surrounded with "triple-tilde" are ignored. Fixtures made available to this + test will be available in the executed code too. """ userguide_code = "\n".join(extract_code_blocks(file, include_tilde=False)) exec(userguide_code) diff --git a/tests/test_pending_experiments.py b/tests/test_pending_experiments.py new file mode 100644 index 000000000..dc20cbfa5 --- /dev/null +++ b/tests/test_pending_experiments.py @@ -0,0 +1,177 @@ +"""Tests pending experiments mechanism.""" + +import warnings + +import pandas as pd +import pytest +from pytest import param + +from baybe.acquisition.base import AcquisitionFunction +from baybe.exceptions import IncompatibleAcquisitionFunctionError, UnusedObjectWarning +from baybe.recommenders import ( + BotorchRecommender, + FPSRecommender, + GaussianMixtureClusteringRecommender, + KMeansClusteringRecommender, + NaiveHybridSpaceRecommender, + PAMClusteringRecommender, + TwoPhaseMetaRecommender, +) +from baybe.utils.basic import get_subclasses +from baybe.utils.dataframe import add_fake_results, add_parameter_noise +from baybe.utils.random import temporary_seed + +_discrete_params = ["Categorical_1", "Switch_1", "Num_disc_1"] +_continuous_params = ["Conti_finite1", "Conti_finite2", "Conti_finite3"] +_hybrid_params = ["Categorical_1", "Num_disc_1", "Conti_finite1", "Conti_finite2"] + +# Repeated recommendations explicitly need to be allowed or the potential overlap will +# be avoided trivially +_flags = dict( + allow_repeated_recommendations=True, + allow_recommending_already_measured=True, +) + + +@pytest.mark.parametrize( + "parameter_names, recommender", + [ + param( + _discrete_params, + FPSRecommender(**_flags), + id="fps_discrete", + ), + param(_discrete_params, PAMClusteringRecommender(**_flags), id="pam_discrete"), + param( + _discrete_params, + KMeansClusteringRecommender(**_flags), + id="kmeans_discrete", + ), + param( + _discrete_params, + GaussianMixtureClusteringRecommender(**_flags), + id="gm_discrete", + ), + param( + _discrete_params, + TwoPhaseMetaRecommender(recommender=BotorchRecommender(**_flags)), + id="botorch_discrete", + ), + param( + _continuous_params, + TwoPhaseMetaRecommender(recommender=BotorchRecommender(**_flags)), + id="botorch_continuous", + ), + param( + _hybrid_params, + TwoPhaseMetaRecommender(recommender=BotorchRecommender(**_flags)), + id="botorch_hybrid", + ), + param( + _discrete_params, + TwoPhaseMetaRecommender( + recommender=BotorchRecommender( + **_flags, allow_recommending_pending_experiments=True + ) + ), + id="botorch_discrete_allow", + ), + param( + _continuous_params, + TwoPhaseMetaRecommender( + recommender=BotorchRecommender( + **_flags, allow_recommending_pending_experiments=True + ) + ), + id="botorch_continuous_allow", + ), + param( + _hybrid_params, + TwoPhaseMetaRecommender( + recommender=BotorchRecommender( + **_flags, allow_recommending_pending_experiments=True + ) + ), + id="botorch_hybrid_allow", + ), + param( + _discrete_params, + NaiveHybridSpaceRecommender( + disc_recommender=FPSRecommender(**_flags), **_flags + ), + id="naive1_discrete", + ), + param( + _discrete_params, + NaiveHybridSpaceRecommender( + disc_recommender=KMeansClusteringRecommender(**_flags), **_flags + ), + id="naive2_discrete", + ), + ], +) +@pytest.mark.parametrize("n_grid_points", [8], ids=["grid8"]) +def test_pending_points(campaign, batch_size): + """Test there is no recommendation overlap if pending experiments are specified.""" + warnings.filterwarnings("ignore", category=UnusedObjectWarning) + + # Perform a fake first iteration + rec = campaign.recommend(batch_size) + add_fake_results(rec, campaign.targets) + campaign.add_measurements(rec) + + # Get recommendations and set them as pending experiments while getting another set + # Fix the random seed for each recommend call to limit influence of randomness in + # some recommenders which could also trivially avoid overlap + with temporary_seed(1337): + rec1 = campaign.recommend(batch_size) + campaign._cached_recommendation = pd.DataFrame() # ensure no recommendation cache + with temporary_seed(1337): + rec2 = campaign.recommend(batch_size=batch_size, pending_experiments=rec1) + + # Assert they have no overlap, round to avoid numerical fluctuation + overlap = pd.merge(rec1.round(3), rec2.round(3), how="inner") + assert len(overlap) == 0, ( + f"Recommendations are overlapping!\n\nRecommendations 1:\n{rec1}\n\n" + f"Recommendations 2:\n{rec2}\n\nOverlap:\n{overlap}" + ) + + +_non_mc_acqfs = [a() for a in get_subclasses(AcquisitionFunction) if not a.is_mc] + + +@pytest.mark.parametrize( + "acqf", _non_mc_acqfs, ids=[a.abbreviation for a in _non_mc_acqfs] +) +@pytest.mark.parametrize( + "parameter_names", + [ + param(_discrete_params, id="discrete"), + param(_continuous_params, id="continuous"), + param(_hybrid_params, id="hybrid"), + ], +) +@pytest.mark.parametrize("n_grid_points", [5], ids=["g5"]) +@pytest.mark.parametrize("batch_size", [3], ids=["b3"]) +def test_invalid_acqf(searchspace, recommender, objective, batch_size, acqf): + """Test exception raised for acqfs that don't support pending experiments.""" + recommender = TwoPhaseMetaRecommender( + recommender=BotorchRecommender(acquisition_function=acqf) + ) + + # Get recommendation and add a fake results + rec1 = recommender.recommend(batch_size, searchspace, objective) + add_fake_results(rec1, objective.targets) + + # Create fake pending experiments + rec2 = rec1.copy() + add_parameter_noise(rec2, searchspace.parameters) + + with pytest.raises(IncompatibleAcquisitionFunctionError): + recommender.recommend( + batch_size, + searchspace, + objective, + measurements=rec1, + pending_experiments=rec2, + )