Merge: Migrate tensor to ndarray in searchspace (#135)

emdgroup · Mar 7, 2024 · de41e92 · de41e92
2 parents 2a03ed1 + 3401a4e
commit de41e92
Show file tree

Hide file tree

Showing 9 changed files with 35 additions and 27 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -15,6 +15,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - [WIP] `torch` is loaded lazily
 - Full lookup backtesting example now tests different substance encodings
 - Replaced unmaintained `mordred` dependency by `mordredcommunity`
+- `SearchSpace`s now use `ndarray` instead of `Tensor` 
 
 ## [0.8.0] - 2024-02-29
 ### Changed

diff --git a/baybe/recommenders/pure/bayesian/sequential_greedy.py b/baybe/recommenders/pure/bayesian/sequential_greedy.py
@@ -102,11 +102,12 @@ def _recommend_continuous(
         batch_size: int,
     ) -> pd.DataFrame:
         # See base class.
+        import torch
 
         try:
             points, _ = optimize_acqf(
                 acq_function=self._acquisition_function,
-                bounds=subspace_continuous.param_bounds_comp,
+                bounds=torch.from_numpy(subspace_continuous.param_bounds_comp),
                 q=batch_size,
                 num_restarts=5,  # TODO make choice for num_restarts
                 raw_samples=10,  # TODO make choice for raw_samples
@@ -159,6 +160,8 @@ def _recommend_hybrid(
             NoMCAcquisitionFunctionError: If a non Monte Carlo acquisition function
                 is chosen.
         """
+        import torch
+
         if len(candidates_comp) > 0:
             # Calculate the number of samples from the given percentage
             n_candidates = int(self.sampling_percentage * len(candidates_comp.index))
@@ -185,7 +188,7 @@ def _recommend_hybrid(
         try:
             points, _ = optimize_acqf_mixed(
                 acq_function=self._acquisition_function,
-                bounds=searchspace.param_bounds_comp,
+                bounds=torch.from_numpy(searchspace.param_bounds_comp),
                 q=batch_size,
                 num_restarts=5,  # TODO make choice for num_restarts
                 raw_samples=10,  # TODO make choice for raw_samples

diff --git a/baybe/searchspace/continuous.py b/baybe/searchspace/continuous.py
@@ -6,7 +6,6 @@
 
 import numpy as np
 import pandas as pd
-import torch
 from attr import define, field
 
 from baybe.constraints import (
@@ -18,7 +17,7 @@
 from baybe.searchspace.validation import validate_parameter_names
 from baybe.serialization import SerialMixin, converter, select_constructor_hook
 from baybe.utils.dataframe import pretty_print_df
-from baybe.utils.numerical import DTypeFloatTorch
+from baybe.utils.numerical import DTypeFloatNumpy
 
 
 @define
@@ -145,11 +144,11 @@ def param_names(self) -> List[str]:
         return [p.name for p in self.parameters]
 
     @property
-    def param_bounds_comp(self) -> torch.Tensor:
-        """Return bounds as tensor."""
+    def param_bounds_comp(self) -> np.ndarray:
+        """Return bounds as numpy array."""
         if not self.parameters:
-            return torch.empty(2, 0, dtype=DTypeFloatTorch)
-        return torch.stack([p.bounds.to_tensor() for p in self.parameters]).T
+            return np.empty((2, 0), dtype=DTypeFloatNumpy)
+        return np.stack([p.bounds.to_ndarray() for p in self.parameters]).T
 
     def transform(
         self,
@@ -180,12 +179,17 @@ def samples_random(self, n_points: int = 1) -> pd.DataFrame:
         """
         if not self.parameters:
             return pd.DataFrame()
-
+        import torch
         from botorch.utils.sampling import get_polytope_samples
 
+        # TODO Revisit: torch and botorch here are actually only necessary if there
+        # are constraints. If there are none and the lists are empty we can just sample
+        # without the get_polytope_samples, which means torch and botorch
+        # wouldn't be needed.
+
         points = get_polytope_samples(
             n=n_points,
-            bounds=self.param_bounds_comp,
+            bounds=torch.from_numpy(self.param_bounds_comp),
             equality_constraints=[
                 c.to_botorch(self.parameters) for c in self.constraints_lin_eq
             ],

diff --git a/baybe/searchspace/core.py b/baybe/searchspace/core.py
@@ -5,8 +5,8 @@
 from enum import Enum
 from typing import List, Optional, cast
 
+import numpy as np
 import pandas as pd
-import torch
 from attr import define, field
 
 from baybe.constraints import (
@@ -231,9 +231,9 @@ def contains_rdkit(self) -> bool:
         )
 
     @property
-    def param_bounds_comp(self) -> torch.Tensor:
+    def param_bounds_comp(self) -> np.ndarray:
         """Return bounds as tensor."""
-        return torch.hstack(
+        return np.hstack(
             [self.discrete.param_bounds_comp, self.continuous.param_bounds_comp]
         )
 

diff --git a/baybe/searchspace/discrete.py b/baybe/searchspace/discrete.py
@@ -7,7 +7,6 @@
 
 import numpy as np
 import pandas as pd
-import torch
 from attr import define, field
 from cattrs import IterableValidationError
 
@@ -481,14 +480,14 @@ def is_empty(self) -> bool:
         return len(self.parameters) == 0
 
     @property
-    def param_bounds_comp(self) -> torch.Tensor:
+    def param_bounds_comp(self) -> np.ndarray:
         """Return bounds as tensor.
 
         Take bounds from the parameter definitions, but discards bounds belonging to
         columns that were filtered out during the creation of the space.
         """
         if not self.parameters:
-            return torch.empty(2, 0)
+            return np.empty((2, 0))
         bounds = np.hstack(
             [
                 np.vstack([p.comp_df[col].min(), p.comp_df[col].max()])
@@ -497,7 +496,7 @@ def param_bounds_comp(self) -> torch.Tensor:
                 if col in self.comp_rep.columns
             ]
         )
-        return torch.from_numpy(bounds)
+        return bounds
 
     def mark_as_measured(
         self,

diff --git a/baybe/surrogates/gaussian_process.py b/baybe/surrogates/gaussian_process.py
@@ -56,7 +56,7 @@ def _fit(self, searchspace: SearchSpace, train_x: Tensor, train_y: Tensor) -> No
         numeric_idxs = [i for i in range(train_x.shape[1]) if i != task_idx]
 
         # get the input bounds from the search space in BoTorch Format
-        bounds = searchspace.param_bounds_comp
+        bounds = torch.from_numpy(searchspace.param_bounds_comp)
         # TODO: use target value bounds when explicitly provided
 
         # define the input and outcome transforms

diff --git a/baybe/utils/basic.py b/baybe/utils/basic.py
@@ -5,7 +5,6 @@
 from typing import Callable, Dict, Iterable, List, TypeVar
 
 import numpy as np
-import torch
 
 _C = TypeVar("_C", bound=type)
 _T = TypeVar("_T")
@@ -57,6 +56,8 @@ def set_random_seed(seed: int):
     Args:
         seed: The chosen global random seed.
     """
+    import torch
+
     torch.manual_seed(seed)
     random.seed(seed)
     np.random.seed(seed)

diff --git a/tests/test_continuous.py b/tests/test_continuous.py
@@ -1,6 +1,6 @@
 """Test for continuous parameters."""
+import numpy as np
 import pytest
-import torch
 
 
 @pytest.mark.parametrize(
@@ -16,6 +16,6 @@ def test_valid_configs(campaign):
     print(campaign.searchspace.continuous.param_bounds_comp.flatten())
 
     assert all(
-        torch.is_floating_point(itm)
+        np.issubdtype(type(itm), np.floating)
         for itm in campaign.searchspace.continuous.param_bounds_comp.flatten()
     )
diff --git a/tests/test_searchspace.py b/tests/test_searchspace.py
@@ -1,7 +1,7 @@
 """Tests for the searchspace module."""
+import numpy as np
 import pandas as pd
 import pytest
-import torch
 
 from baybe.constraints import (
     ContinuousLinearEqualityConstraint,
@@ -41,8 +41,8 @@ def test_bounds_order():
         NumericalContinuousParameter(name="B_cont", bounds=(10.0, 12.0)),
     ]
     searchspace = SearchSpace.from_product(parameters=parameters)
-    expected = torch.tensor([[1.0, 7.0, 4.0, 10.0], [3.0, 9.0, 6.0, 12.0]]).double()
-    assert torch.equal(
+    expected = np.array([[1.0, 7.0, 4.0, 10.0], [3.0, 9.0, 6.0, 12.0]])
+    assert np.array_equal(
         searchspace.param_bounds_comp,
         expected,
     )
@@ -56,9 +56,9 @@ def test_empty_parameter_bounds():
     parameters = []
     searchspace_discrete = SubspaceDiscrete.from_product(parameters=parameters)
     searchspace_continuous = SubspaceContinuous(parameters=parameters)
-    expected = torch.empty(2, 0)
-    assert torch.equal(searchspace_discrete.param_bounds_comp, expected)
-    assert torch.equal(searchspace_continuous.param_bounds_comp, expected)
+    expected = np.empty((2, 0))
+    assert np.array_equal(searchspace_discrete.param_bounds_comp, expected)
+    assert np.array_equal(searchspace_continuous.param_bounds_comp, expected)
 
 
 def test_discrete_searchspace_creation_from_dataframe():