Skip to content

Commit

Permalink
Refactor recommender signature (#220)
Browse files Browse the repository at this point in the history
This PR implements a breaking change by refactoring the recommender
signature such that:
* it accepts an optional `Objective`
* it expects training data as a single dataframe in experimental
representation

Apart from fixing certain responsibilities (e.g. `Campaign` now only
acts as a meta-data handler, just like it is supposed to) this
enables/prepares several new features:
* Users can directly use `Recommender`s as entry point instead of being
forced to go via `Campaign`
* Because recommenders now operate on experimental representations, they
can offer the same interface back to users, e.g. when exposing
model-internal things such as acquisition functions and surrogate models
* Additional pre-processing steps that require access to the
experimental representation (such as data-augmentation) can now happen
inside recommenders
  • Loading branch information
AdrianSosic authored Jun 4, 2024
2 parents 3a08893 + c75ad17 commit f064b7f
Show file tree
Hide file tree
Showing 16 changed files with 256 additions and 176 deletions.
13 changes: 13 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,13 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [Unreleased]
### Changed
- Passing an `Objective` to `Campaign` is now optional

### Breaking Changes
- Providing an explicit `batch_size` is now mandatory when asking for recommendations

## [0.9.1] - 2024-06-04
### Changed
- Discrete searchspace memory estimate is now natively represented in bytes
Expand Down Expand Up @@ -52,6 +59,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Environment variables `BAYBE_NUMPY_USE_SINGLE_PRECISION` and
`BAYBE_TORCH_USE_SINGLE_PRECISION` to enforce single point precision usage

### Breaking Changes
- `RecommenderProtocol.recommend` now accepts an optional `Objective`
- `RecommenderProtocol.recommend` now expects training data to be provided as a single
dataframe in experimental representation instead of two separate dataframes in
computational representation

### Removed
- `model_params` attribute from `Surrogate` base class, `GaussianProcessSurrogate` and
`CustomONNXSurrogate`
Expand Down
30 changes: 10 additions & 20 deletions baybe/campaign.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,13 @@
from __future__ import annotations

import json
from typing import Optional

import cattrs
import numpy as np
import pandas as pd
from attrs import define, field
from attrs.converters import optional

from baybe.exceptions import DeprecationError
from baybe.objectives.base import Objective, to_objective
Expand Down Expand Up @@ -47,7 +49,9 @@ class Campaign(SerialMixin):
searchspace: SearchSpace = field()
"""The search space in which the experiments are conducted."""

objective: Objective = field(converter=to_objective)
objective: Optional[Objective] = field(
default=None, converter=optional(to_objective)
)
"""The optimization objective.
When passing a single :class:`baybe.targets.base.Target`, it gets automatically
wrapped into a :class:`baybe.objectives.single.SingleTargetObjective`."""
Expand Down Expand Up @@ -127,21 +131,7 @@ def parameters(self) -> tuple[Parameter, ...]:
@property
def targets(self) -> tuple[Target, ...]:
"""The targets of the underlying objective."""
return self.objective.targets

@property
def _measurements_parameters_comp(self) -> pd.DataFrame:
"""The computational representation of the measured parameters."""
if len(self._measurements_exp) < 1:
return pd.DataFrame()
return self.searchspace.transform(self._measurements_exp)

@property
def _measurements_targets_comp(self) -> pd.DataFrame:
"""The computational representation of the measured targets."""
if len(self._measurements_exp) < 1:
return pd.DataFrame()
return self.objective.transform(self._measurements_exp)
return self.objective.targets if self.objective is not None else ()

@classmethod
def from_config(cls, config_json: str) -> Campaign:
Expand Down Expand Up @@ -258,7 +248,7 @@ def add_measurements(

def recommend(
self,
batch_size: int = 5,
batch_size: int,
batch_quantity: int = None, # type: ignore[assignment]
) -> pd.DataFrame:
"""Provide the recommendations for the next batch of experiments.
Expand Down Expand Up @@ -298,10 +288,10 @@ def recommend(

# Get the recommended search space entries
rec = self.recommender.recommend(
self.searchspace,
batch_size,
self._measurements_parameters_comp,
self._measurements_targets_comp,
self.searchspace,
self.objective,
self._measurements_exp,
)

# Cache the recommendations
Expand Down
23 changes: 16 additions & 7 deletions baybe/recommenders/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import cattrs
import pandas as pd

from baybe.objectives.base import Objective
from baybe.recommenders.deprecation import structure_recommender_protocol
from baybe.searchspace import SearchSpace
from baybe.serialization import converter, unstructure_base
Expand All @@ -15,21 +16,29 @@ class RecommenderProtocol(Protocol):

def recommend(
self,
searchspace: SearchSpace,
batch_size: int,
train_x: Optional[pd.DataFrame],
train_y: Optional[pd.DataFrame],
searchspace: SearchSpace,
objective: Optional[Objective],
measurements: Optional[pd.DataFrame],
) -> pd.DataFrame:
"""Recommend a batch of points from the given search space.
Args:
searchspace: The search space from which to recommend the points.
batch_size: The number of points to be recommended.
train_x: Optional training inputs for training a model.
train_y: Optional training labels for training a model.
searchspace: The search space from which to recommend the points.
objective: An optional objective to be optimized.
measurements: Optional experimentation data that can be used for model
training. The data is to be provided in "experimental representation":
It needs to contain one column for each parameter spanning the search
space (column name matching the parameter name) and one column for each
target tracked by the objective (column name matching the target name).
Each row corresponds to one conducted experiment, where the parameter
columns define the experimental setting and the target columns report
the measured outcomes.
Returns:
A dataframe containing the recommendations as individual rows.
A dataframe containing the recommendations in experimental representation
as individual rows.
"""
...

Expand Down
35 changes: 24 additions & 11 deletions baybe/recommenders/meta/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from attrs import define, field

from baybe.exceptions import DeprecationError
from baybe.objectives.base import Objective
from baybe.recommenders.base import RecommenderProtocol
from baybe.recommenders.deprecation import structure_recommender_protocol
from baybe.recommenders.pure.base import PureRecommender
Expand Down Expand Up @@ -50,35 +51,47 @@ def _validate_allow_recommending_already_measured(self, _, value):
@abstractmethod
def select_recommender(
self,
batch_size: int,
searchspace: SearchSpace,
batch_size: int = 1,
train_x: Optional[pd.DataFrame] = None,
train_y: Optional[pd.DataFrame] = None,
objective: Optional[Objective] = None,
measurements: Optional[pd.DataFrame] = None,
) -> PureRecommender:
"""Select a pure recommender for the given experimentation context.
Args:
batch_size:
See :func:`baybe.recommenders.meta.base.MetaRecommender.recommend`.
searchspace:
See :func:`baybe.recommenders.meta.base.MetaRecommender.recommend`.
batch_size:
objective:
See :func:`baybe.recommenders.meta.base.MetaRecommender.recommend`.
measurements:
See :func:`baybe.recommenders.meta.base.MetaRecommender.recommend`.
train_x: See :func:`baybe.recommenders.meta.base.MetaRecommender.recommend`.
train_y: See :func:`baybe.recommenders.meta.base.MetaRecommender.recommend`.
Returns:
The selected recommender.
"""

def recommend(
self,
batch_size: int,
searchspace: SearchSpace,
batch_size: int = 1,
train_x: Optional[pd.DataFrame] = None,
train_y: Optional[pd.DataFrame] = None,
objective: Optional[Objective] = None,
measurements: Optional[pd.DataFrame] = None,
) -> pd.DataFrame:
"""See :func:`baybe.recommenders.base.RecommenderProtocol.recommend`."""
recommender = self.select_recommender(searchspace, batch_size, train_x, train_y)
return recommender.recommend(searchspace, batch_size, train_x, train_y)
recommender = self.select_recommender(
batch_size=batch_size,
searchspace=searchspace,
objective=objective,
measurements=measurements,
)
return recommender.recommend(
batch_size=batch_size,
searchspace=searchspace,
objective=objective,
measurements=measurements,
)


# Register (un-)structure hooks
Expand Down
75 changes: 29 additions & 46 deletions baybe/recommenders/meta/sequential.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,12 @@
from attrs.validators import deep_iterable, in_, instance_of

from baybe.exceptions import NoRecommendersLeftError
from baybe.objectives.base import Objective
from baybe.recommenders.meta.base import MetaRecommender
from baybe.recommenders.pure.base import PureRecommender
from baybe.recommenders.pure.bayesian.sequential_greedy import (
SequentialGreedyRecommender,
)
from baybe.recommenders.pure.nonpredictive.base import NonPredictiveRecommender
from baybe.recommenders.pure.nonpredictive.sampling import RandomRecommender
from baybe.searchspace import SearchSpace
from baybe.serialization import (
Expand All @@ -25,12 +25,6 @@
converter,
)

# TODO: Make bayesian recommenders handle empty training data
_unsupported_recommender_error = ValueError(
f"For cases where no training is available, the selected recommender "
f"must be a subclass of '{NonPredictiveRecommender.__name__}'."
)


@define
class TwoPhaseMetaRecommender(MetaRecommender):
Expand Down Expand Up @@ -59,22 +53,16 @@ class TwoPhaseMetaRecommender(MetaRecommender):

def select_recommender( # noqa: D102
self,
searchspace: SearchSpace,
batch_size: int = 1,
train_x: Optional[pd.DataFrame] = None,
train_y: Optional[pd.DataFrame] = None,
batch_size: int,
searchspace: Optional[SearchSpace] = None,
objective: Optional[Objective] = None,
measurements: Optional[pd.DataFrame] = None,
) -> PureRecommender:
# See base class.

# TODO: enable bayesian recommenders for empty training data
if (train_x is None or len(train_x) == 0) and not isinstance(
self.initial_recommender, NonPredictiveRecommender
):
raise _unsupported_recommender_error

return (
self.recommender
if len(train_x) >= self.switch_after
if (measurements is not None) and (len(measurements) >= self.switch_after)
else self.initial_recommender
)

Expand All @@ -95,6 +83,8 @@ class SequentialMetaRecommender(MetaRecommender):
instead.
Raises:
RuntimeError: If the training dataset size decreased compared to the previous
call.
NoRecommendersLeftError: If more recommenders are requested than there are
recommenders available and ``mode="raise"``.
"""
Expand Down Expand Up @@ -134,21 +124,24 @@ class SequentialMetaRecommender(MetaRecommender):

def select_recommender( # noqa: D102
self,
searchspace: SearchSpace,
batch_size: int = 1,
train_x: Optional[pd.DataFrame] = None,
train_y: Optional[pd.DataFrame] = None,
batch_size: int,
searchspace: Optional[SearchSpace] = None,
objective: Optional[Objective] = None,
measurements: Optional[pd.DataFrame] = None,
) -> PureRecommender:
# See base class.

n_data = len(measurements) if measurements is not None else 0

# If the training dataset size has increased, move to the next recommender
if len(train_x) > self._n_last_measurements:
if n_data > self._n_last_measurements:
self._step += 1

# If the training dataset size has decreased, something went wrong
elif len(train_x) < self._n_last_measurements:
elif n_data < self._n_last_measurements:
raise RuntimeError(
f"The training dataset size decreased from {self._n_last_measurements} "
f"to {len(train_x)} since the last function call, which indicates that "
f"to {n_data} since the last function call, which indicates that "
f"'{self.__class__.__name__}' was not used as intended."
)

Expand All @@ -169,13 +162,7 @@ def select_recommender( # noqa: D102
) from ex

# Remember the training dataset size for the next call
self._n_last_measurements = len(train_x)

# TODO: enable bayesian recommenders for empty training data
if (train_x is None or len(train_x) == 0) and not isinstance(
recommender, NonPredictiveRecommender
):
raise _unsupported_recommender_error
self._n_last_measurements = n_data

return recommender

Expand Down Expand Up @@ -219,24 +206,26 @@ def default_iterator(self):

def select_recommender( # noqa: D102
self,
searchspace: SearchSpace,
batch_size: int = 1,
train_x: Optional[pd.DataFrame] = None,
train_y: Optional[pd.DataFrame] = None,
batch_size: int,
searchspace: Optional[SearchSpace] = None,
objective: Optional[Objective] = None,
measurements: Optional[pd.DataFrame] = None,
) -> PureRecommender:
# See base class.

use_last = True
n_data = len(measurements) if measurements is not None else 0

# If the training dataset size has increased, move to the next recommender
if len(train_x) > self._n_last_measurements:
if n_data > self._n_last_measurements:
self._step += 1
use_last = False

# If the training dataset size has decreased, something went wrong
elif len(train_x) < self._n_last_measurements:
elif n_data < self._n_last_measurements:
raise RuntimeError(
f"The training dataset size decreased from {self._n_last_measurements} "
f"to {len(train_x)} since the last function call, which indicates that "
f"to {n_data} since the last function call, which indicates that "
f"'{self.__class__.__name__}' was not used as intended."
)

Expand All @@ -251,13 +240,7 @@ def select_recommender( # noqa: D102
) from ex

# Remember the training dataset size for the next call
self._n_last_measurements = len(train_x)

# TODO: enable bayesian recommenders for empty training data
if (train_x is None or len(train_x) == 0) and not isinstance(
self._last_recommender, NonPredictiveRecommender
):
raise _unsupported_recommender_error
self._n_last_measurements = n_data

return self._last_recommender # type: ignore[return-value]

Expand Down
Loading

0 comments on commit f064b7f

Please sign in to comment.