Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Comp Rep Transition Point #278

Merged
merged 8 commits into from
Jul 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
AdrianSosic marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- `Parameter.is_numeric` has been replaced with `Parameter.is_numerical`
- `DiscreteParameter.transform_rep_exp2comp` has been replaced with
`DiscreteParameter.transform`
- `Surrogate` models now operate on dataframes in experimental representation instead of
tensors in computational representation

### Added
- `Surrogate` base class now exposes a `to_botorch` method
Expand Down
12 changes: 9 additions & 3 deletions baybe/acquisition/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,15 +39,21 @@ def to_botorch(
self,
surrogate: Surrogate,
searchspace: SearchSpace,
train_x: pd.DataFrame,
train_y: pd.DataFrame,
measurements: pd.DataFrame,
AdrianSosic marked this conversation as resolved.
Show resolved Hide resolved
):
"""Create the botorch-ready representation of the function."""
"""Create the botorch-ready representation of the function.

The required structure of `measurements` is specified in
:meth:`babye.recommenders.base.RecommenderProtocol.recommend`.
"""
import botorch.acquisition as botorch_acqf_module

acqf_cls = getattr(botorch_acqf_module, self.__class__.__name__)
params_dict = filter_attributes(object=self, callable_=acqf_cls.__init__)

train_x = surrogate.transform_inputs(measurements)
train_y = surrogate.transform_targets(measurements)

AdrianSosic marked this conversation as resolved.
Show resolved Hide resolved
signature_params = signature(acqf_cls).parameters
additional_params = {}
if "model" in signature_params:
Expand Down
4 changes: 4 additions & 0 deletions baybe/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,3 +50,7 @@ class DeprecationError(Exception):

class UnidentifiedSubclassError(Exception):
"""A specified subclass cannot be found in the given class hierarchy."""


class ModelNotTrainedError(Exception):
"""A prediction/transformation is attempted before the model has been trained."""
AdrianSosic marked this conversation as resolved.
Show resolved Hide resolved
4 changes: 2 additions & 2 deletions baybe/recommenders/naive.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ def recommend( # noqa: D102
# Get discrete candidates. The metadata flags are ignored since the search space
# is hybrid
# TODO Slight BOILERPLATE CODE, see recommender.py, ll. 47+
_, candidates_comp = searchspace.discrete.get_candidates(
candidates_exp, _ = searchspace.discrete.get_candidates(
allow_repeated_recommendations=True,
allow_recommending_already_measured=True,
)
Expand All @@ -147,7 +147,7 @@ def recommend( # noqa: D102
# Call the private function of the discrete recommender and get the indices
disc_rec_idx = self.disc_recommender._recommend_discrete(
subspace_discrete=searchspace.discrete,
candidates_comp=candidates_comp,
candidates_exp=candidates_exp,
batch_size=batch_size,
)

Expand Down
22 changes: 11 additions & 11 deletions baybe/recommenders/pure/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,15 +50,15 @@ def recommend( # noqa: D102
def _recommend_discrete(
self,
subspace_discrete: SubspaceDiscrete,
candidates_comp: pd.DataFrame,
candidates_exp: pd.DataFrame,
batch_size: int,
) -> pd.Index:
"""Generate recommendations from a discrete search space.

Args:
subspace_discrete: The discrete subspace from which to generate
recommendations.
candidates_comp: The computational representation of all discrete candidate
candidates_exp: The experimental representation of all discrete candidate
points to be considered.
batch_size: The size of the recommendation batch.

Expand All @@ -67,14 +67,14 @@ def _recommend_discrete(

Returns:
The dataframe indices of the recommended points in the provided
computational representation.
experimental representation.
"""
# If this method is not implemented by a child class, try to resort to hybrid
# recommendation (with an empty subspace) instead.
try:
return self._recommend_hybrid(
searchspace=SearchSpace(discrete=subspace_discrete),
candidates_comp=candidates_comp,
candidates_exp=candidates_exp,
batch_size=batch_size,
).index
except NotImplementedError as exc:
Expand Down Expand Up @@ -110,7 +110,7 @@ def _recommend_continuous(
try:
return self._recommend_hybrid(
searchspace=SearchSpace(continuous=subspace_continuous),
candidates_comp=pd.DataFrame(),
candidates_exp=pd.DataFrame(),
batch_size=batch_size,
)
except NotImplementedError as exc:
Expand All @@ -126,7 +126,7 @@ def _recommend_continuous(
def _recommend_hybrid(
self,
searchspace: SearchSpace,
candidates_comp: pd.DataFrame,
candidates_exp: pd.DataFrame,
batch_size: int,
) -> pd.DataFrame:
"""Generate recommendations from a hybrid search space.
Expand All @@ -138,7 +138,7 @@ def _recommend_hybrid(
Args:
searchspace: The hybrid search space from which to generate
recommendations.
candidates_comp: The computational representation of all discrete candidate
candidates_exp: The experimental representation of all discrete candidate
points to be considered.
batch_size: The size of the recommendation batch.

Expand Down Expand Up @@ -175,7 +175,7 @@ def _recommend_with_discrete_parts(

# Get discrete candidates
# Repeated recommendations are always allowed for hybrid spaces
_, candidates_comp = searchspace.discrete.get_candidates(
candidates_exp, _ = searchspace.discrete.get_candidates(
allow_repeated_recommendations=is_hybrid_space
or self.allow_repeated_recommendations,
allow_recommending_already_measured=is_hybrid_space
Expand All @@ -184,7 +184,7 @@ def _recommend_with_discrete_parts(

# Check if enough candidates are left
# TODO [15917]: This check is not perfectly correct.
if (not is_hybrid_space) and (len(candidates_comp) < batch_size):
if (not is_hybrid_space) and (len(candidates_exp) < batch_size):
raise NotEnoughPointsLeftError(
f"Using the current settings, there are fewer than {batch_size} "
"possible data points left to recommend. This can be "
Expand All @@ -196,11 +196,11 @@ def _recommend_with_discrete_parts(

# Get recommendations
if is_hybrid_space:
rec = self._recommend_hybrid(searchspace, candidates_comp, batch_size)
rec = self._recommend_hybrid(searchspace, candidates_exp, batch_size)
idxs = rec.index
else:
idxs = self._recommend_discrete(
searchspace.discrete, candidates_comp, batch_size
searchspace.discrete, candidates_exp, batch_size
)
rec = searchspace.discrete.exp_rep.loc[idxs, :]

Expand Down
10 changes: 2 additions & 8 deletions baybe/recommenders/pure/bayesian/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
from baybe.searchspace import SearchSpace
from baybe.surrogates import CustomONNXSurrogate, GaussianProcessSurrogate
from baybe.surrogates.base import Surrogate
from baybe.utils.dataframe import to_tensor


@define
Expand Down Expand Up @@ -51,14 +50,9 @@ def _setup_botorch_acqf(
measurements: pd.DataFrame,
) -> None:
"""Create the acquisition function for the current training data.""" # noqa: E501
# TODO: Transition point from dataframe to tensor needs to be refactored.
# Currently, surrogate models operate with tensors, while acquisition
# functions with dataframes.
train_x = searchspace.transform(measurements)
train_y = objective.transform(measurements)
self.surrogate_model._fit(searchspace, *to_tensor(train_x, train_y))
self.surrogate_model.fit(searchspace, objective, measurements)
self._botorch_acqf = self.acquisition_function.to_botorch(
self.surrogate_model, searchspace, train_x, train_y
self.surrogate_model, searchspace, measurements
)

def recommend( # noqa: D102
Expand Down
18 changes: 11 additions & 7 deletions baybe/recommenders/pure/bayesian/botorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,15 +74,15 @@ def _validate_percentage( # noqa: DOC101, DOC103
def _recommend_discrete(
self,
subspace_discrete: SubspaceDiscrete,
candidates_comp: pd.DataFrame,
candidates_exp: pd.DataFrame,
batch_size: int,
) -> pd.Index:
"""Generate recommendations from a discrete search space.

Args:
subspace_discrete: The discrete subspace from which to generate
recommendations.
candidates_comp: The computational representation of all discrete candidate
candidates_exp: The experimental representation of all discrete candidate
points to be considered.
batch_size: The size of the recommendation batch.

Expand All @@ -92,7 +92,7 @@ def _recommend_discrete(

Returns:
The dataframe indices of the recommended points in the provided
computational representation.
experimental representation.
"""
# For batch size > 1, this optimizer needs a MC acquisition function
if batch_size > 1 and not self.acquisition_function.is_mc:
Expand All @@ -104,16 +104,17 @@ def _recommend_discrete(
from botorch.optim import optimize_acqf_discrete

# determine the next set of points to be tested
candidates_tensor = to_tensor(candidates_comp)
candidates_comp = self.surrogate_model.transform_inputs(candidates_exp)
points, _ = optimize_acqf_discrete(
self._botorch_acqf, batch_size, candidates_tensor
self._botorch_acqf, batch_size, to_tensor(candidates_comp)
)

# retrieve the index of the points from the input dataframe
# IMPROVE: The merging procedure is conceptually similar to what
# `SearchSpace._match_measurement_with_searchspace_indices` does, though using
# a simpler matching logic. When refactoring the SearchSpace class to
# handle continuous parameters, a corresponding utility could be extracted.
# IMPROVE: Maintain order of recommendations (currently lost during merge)
idxs = pd.Index(
pd.merge(
candidates_comp.reset_index(),
Expand Down Expand Up @@ -179,7 +180,7 @@ def _recommend_continuous(
def _recommend_hybrid(
self,
searchspace: SearchSpace,
candidates_comp: pd.DataFrame,
candidates_exp: pd.DataFrame,
batch_size: int,
) -> pd.DataFrame:
"""Recommend points using the ``optimize_acqf_mixed`` function of BoTorch.
Expand All @@ -193,7 +194,7 @@ def _recommend_hybrid(

Args:
searchspace: The search space in which the recommendations should be made.
candidates_comp: The computational representation of the candidates
candidates_exp: The experimental representation of the candidates
of the discrete subspace.
batch_size: The size of the calculated batch.

Expand All @@ -214,6 +215,9 @@ def _recommend_hybrid(
import torch
from botorch.optim import optimize_acqf_mixed

# Transform discrete candidates
candidates_comp = self.surrogate_model.transform_inputs(candidates_exp)

if len(candidates_comp) > 0:
# Calculate the number of samples from the given percentage
n_candidates = math.ceil(
Expand Down
3 changes: 2 additions & 1 deletion baybe/recommenders/pure/nonpredictive/clustering.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ def _make_selection_custom(
def _recommend_discrete(
self,
subspace_discrete: SubspaceDiscrete,
candidates_comp: pd.DataFrame,
candidates_exp: pd.DataFrame,
batch_size: int,
) -> pd.Index:
# See base class.
Expand All @@ -106,6 +106,7 @@ def _recommend_discrete(
scaler = StandardScaler()
scaler.fit(subspace_discrete.comp_rep)

candidates_comp = subspace_discrete.transform(candidates_exp)
candidates_scaled = np.ascontiguousarray(scaler.transform(candidates_comp))

# Set model parameters and perform fit
Expand Down
8 changes: 5 additions & 3 deletions baybe/recommenders/pure/nonpredictive/sampling.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,13 @@ class RandomRecommender(NonPredictiveRecommender):
def _recommend_hybrid(
self,
searchspace: SearchSpace,
candidates_comp: pd.DataFrame,
candidates_exp: pd.DataFrame,
batch_size: int,
) -> pd.DataFrame:
# See base class.

if searchspace.type == SearchSpaceType.DISCRETE:
return candidates_comp.sample(batch_size)
return candidates_exp.sample(batch_size)

cont_random = searchspace.continuous.sample_uniform(batch_size=batch_size)
if searchspace.type == SearchSpaceType.CONTINUOUS:
Expand Down Expand Up @@ -56,7 +56,7 @@ class FPSRecommender(NonPredictiveRecommender):
def _recommend_discrete(
self,
subspace_discrete: SubspaceDiscrete,
candidates_comp: pd.DataFrame,
candidates_exp: pd.DataFrame,
batch_size: int,
) -> pd.Index:
# See base class.
Expand All @@ -65,6 +65,8 @@ def _recommend_discrete(
# TODO [Scaling]: scaling should be handled by search space object
scaler = StandardScaler()
scaler.fit(subspace_discrete.comp_rep)

candidates_comp = subspace_discrete.transform(candidates_exp)
candidates_scaled = np.ascontiguousarray(scaler.transform(candidates_comp))
ilocs = farthest_point_sampling(candidates_scaled, batch_size)
return candidates_comp.index[ilocs]
Loading
Loading