Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Migrate tensor to ndarray in searchspace #135

Merged
merged 3 commits into from
Mar 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- [WIP] `torch` is loaded lazily
- Full lookup backtesting example now tests different substance encodings
- Replaced unmaintained `mordred` dependency by `mordredcommunity`
- `SearchSpace`s now use `ndarray` instead of `Tensor`
AVHopp marked this conversation as resolved.
Show resolved Hide resolved

## [0.8.0] - 2024-02-29
### Changed
Expand Down
7 changes: 5 additions & 2 deletions baybe/recommenders/pure/bayesian/sequential_greedy.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,11 +102,12 @@ def _recommend_continuous(
batch_size: int,
) -> pd.DataFrame:
# See base class.
import torch

try:
points, _ = optimize_acqf(
acq_function=self._acquisition_function,
bounds=subspace_continuous.param_bounds_comp,
bounds=torch.from_numpy(subspace_continuous.param_bounds_comp),
q=batch_size,
num_restarts=5, # TODO make choice for num_restarts
raw_samples=10, # TODO make choice for raw_samples
Expand Down Expand Up @@ -159,6 +160,8 @@ def _recommend_hybrid(
NoMCAcquisitionFunctionError: If a non Monte Carlo acquisition function
is chosen.
"""
import torch

if len(candidates_comp) > 0:
# Calculate the number of samples from the given percentage
n_candidates = int(self.sampling_percentage * len(candidates_comp.index))
Expand All @@ -185,7 +188,7 @@ def _recommend_hybrid(
try:
points, _ = optimize_acqf_mixed(
acq_function=self._acquisition_function,
bounds=searchspace.param_bounds_comp,
bounds=torch.from_numpy(searchspace.param_bounds_comp),
q=batch_size,
num_restarts=5, # TODO make choice for num_restarts
raw_samples=10, # TODO make choice for raw_samples
Expand Down
20 changes: 12 additions & 8 deletions baybe/searchspace/continuous.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@

import numpy as np
import pandas as pd
import torch
from attr import define, field

from baybe.constraints import (
Expand All @@ -18,7 +17,7 @@
from baybe.searchspace.validation import validate_parameter_names
from baybe.serialization import SerialMixin, converter, select_constructor_hook
from baybe.utils.dataframe import pretty_print_df
from baybe.utils.numerical import DTypeFloatTorch
from baybe.utils.numerical import DTypeFloatNumpy


@define
Expand Down Expand Up @@ -145,11 +144,11 @@ def param_names(self) -> List[str]:
return [p.name for p in self.parameters]

@property
def param_bounds_comp(self) -> torch.Tensor:
"""Return bounds as tensor."""
def param_bounds_comp(self) -> np.ndarray:
"""Return bounds as numpy array."""
if not self.parameters:
return torch.empty(2, 0, dtype=DTypeFloatTorch)
return torch.stack([p.bounds.to_tensor() for p in self.parameters]).T
return np.empty((2, 0), dtype=DTypeFloatNumpy)
return np.stack([p.bounds.to_ndarray() for p in self.parameters]).T

def transform(
self,
Expand Down Expand Up @@ -180,12 +179,17 @@ def samples_random(self, n_points: int = 1) -> pd.DataFrame:
"""
if not self.parameters:
return pd.DataFrame()

import torch
rjavadi marked this conversation as resolved.
Show resolved Hide resolved
from botorch.utils.sampling import get_polytope_samples

# TODO Revisit: torch and botorch here are actually only necessary if there
# are constraints. If there are none and the lists are empty we can just sample
# without the get_polytope_samples, which means torch and botorch
# wouldn't be needed.

points = get_polytope_samples(
n=n_points,
bounds=self.param_bounds_comp,
bounds=torch.from_numpy(self.param_bounds_comp),
equality_constraints=[
c.to_botorch(self.parameters) for c in self.constraints_lin_eq
],
Expand Down
6 changes: 3 additions & 3 deletions baybe/searchspace/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@
from enum import Enum
from typing import List, Optional, cast

import numpy as np
import pandas as pd
import torch
from attr import define, field

from baybe.constraints import (
Expand Down Expand Up @@ -231,9 +231,9 @@ def contains_rdkit(self) -> bool:
)

@property
def param_bounds_comp(self) -> torch.Tensor:
def param_bounds_comp(self) -> np.ndarray:
"""Return bounds as tensor."""
return torch.hstack(
return np.hstack(
[self.discrete.param_bounds_comp, self.continuous.param_bounds_comp]
)

Expand Down
7 changes: 3 additions & 4 deletions baybe/searchspace/discrete.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@

import numpy as np
import pandas as pd
import torch
from attr import define, field
from cattrs import IterableValidationError

Expand Down Expand Up @@ -481,14 +480,14 @@ def is_empty(self) -> bool:
return len(self.parameters) == 0

@property
def param_bounds_comp(self) -> torch.Tensor:
def param_bounds_comp(self) -> np.ndarray:
"""Return bounds as tensor.

Take bounds from the parameter definitions, but discards bounds belonging to
columns that were filtered out during the creation of the space.
"""
if not self.parameters:
return torch.empty(2, 0)
return np.empty((2, 0))
bounds = np.hstack(
[
np.vstack([p.comp_df[col].min(), p.comp_df[col].max()])
Expand All @@ -497,7 +496,7 @@ def param_bounds_comp(self) -> torch.Tensor:
if col in self.comp_rep.columns
]
)
return torch.from_numpy(bounds)
return bounds

def mark_as_measured(
self,
Expand Down
2 changes: 1 addition & 1 deletion baybe/surrogates/gaussian_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def _fit(self, searchspace: SearchSpace, train_x: Tensor, train_y: Tensor) -> No
numeric_idxs = [i for i in range(train_x.shape[1]) if i != task_idx]

# get the input bounds from the search space in BoTorch Format
bounds = searchspace.param_bounds_comp
bounds = torch.from_numpy(searchspace.param_bounds_comp)
# TODO: use target value bounds when explicitly provided

# define the input and outcome transforms
Expand Down
3 changes: 2 additions & 1 deletion baybe/utils/basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
from typing import Callable, Dict, Iterable, List, TypeVar

import numpy as np
import torch

_C = TypeVar("_C", bound=type)
_T = TypeVar("_T")
Expand Down Expand Up @@ -57,6 +56,8 @@ def set_random_seed(seed: int):
Args:
seed: The chosen global random seed.
"""
import torch

torch.manual_seed(seed)
random.seed(seed)
np.random.seed(seed)
Expand Down
4 changes: 2 additions & 2 deletions tests/test_continuous.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"""Test for continuous parameters."""
import numpy as np
import pytest
import torch


@pytest.mark.parametrize(
Expand All @@ -16,6 +16,6 @@ def test_valid_configs(campaign):
print(campaign.searchspace.continuous.param_bounds_comp.flatten())

assert all(
torch.is_floating_point(itm)
np.issubdtype(type(itm), np.floating)
for itm in campaign.searchspace.continuous.param_bounds_comp.flatten()
)
12 changes: 6 additions & 6 deletions tests/test_searchspace.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""Tests for the searchspace module."""
import numpy as np
import pandas as pd
import pytest
import torch

from baybe.constraints import (
ContinuousLinearEqualityConstraint,
Expand Down Expand Up @@ -41,8 +41,8 @@ def test_bounds_order():
NumericalContinuousParameter(name="B_cont", bounds=(10.0, 12.0)),
]
searchspace = SearchSpace.from_product(parameters=parameters)
expected = torch.tensor([[1.0, 7.0, 4.0, 10.0], [3.0, 9.0, 6.0, 12.0]]).double()
assert torch.equal(
expected = np.array([[1.0, 7.0, 4.0, 10.0], [3.0, 9.0, 6.0, 12.0]])
assert np.array_equal(
searchspace.param_bounds_comp,
expected,
)
Expand All @@ -56,9 +56,9 @@ def test_empty_parameter_bounds():
parameters = []
searchspace_discrete = SubspaceDiscrete.from_product(parameters=parameters)
searchspace_continuous = SubspaceContinuous(parameters=parameters)
expected = torch.empty(2, 0)
assert torch.equal(searchspace_discrete.param_bounds_comp, expected)
assert torch.equal(searchspace_continuous.param_bounds_comp, expected)
expected = np.empty((2, 0))
assert np.array_equal(searchspace_discrete.param_bounds_comp, expected)
assert np.array_equal(searchspace_continuous.param_bounds_comp, expected)


def test_discrete_searchspace_creation_from_dataframe():
Expand Down