From 0947f533d927a1f1063ff463c3dea88d38816354 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20P=2E=20D=C3=BCrholt?= Date: Fri, 21 Apr 2023 11:52:52 +0200 Subject: [PATCH] add include_labcodes (#168) * add include_labcodes * update black --- bofire/surrogates/trainable.py | 11 +++++++++++ tests/bofire/surrogates/test_cross_validate.py | 13 ++++++++++--- 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/bofire/surrogates/trainable.py b/bofire/surrogates/trainable.py index 8d87be4f9..5e3c5c6d8 100644 --- a/bofire/surrogates/trainable.py +++ b/bofire/surrogates/trainable.py @@ -51,6 +51,7 @@ def cross_validate( experiments: pd.DataFrame, folds: int = -1, include_X: bool = False, + include_labcodes: bool = False, hooks: Dict[ str, Callable[ @@ -83,6 +84,8 @@ def cross_validate( Tuple[CvResults, CvResults, Dict[str, List[Any]]]: First CvResults object reflects the training data, second CvResults object the test data, dictionary object holds the return values of the applied hooks. """ + if include_labcodes and "labcode" not in experiments.columns: + raise ValueError("No labcodes available for the provided experiments.") if len(self.output_features) > 1: # type: ignore raise NotImplementedError( @@ -114,6 +117,12 @@ def cross_validate( X_test = experiments.iloc[test_index][self.input_features.get_keys()] # type: ignore y_train = experiments.iloc[train_index][self.output_features.get_keys()] # type: ignore y_test = experiments.iloc[test_index][self.output_features.get_keys()] # type: ignore + train_labcodes = ( + experiments.iloc[train_index]["labcode"] if include_labcodes else None + ) + test_labcodes = ( + experiments.iloc[test_index]["labcode"] if include_labcodes else None + ) # now fit the model self._fit(X_train, y_train) # now do the scoring @@ -127,6 +136,7 @@ def cross_validate( predicted=y_train_pred[key + "_pred"], standard_deviation=y_train_pred[key + "_sd"], X=X_train if include_X else None, + labcodes=train_labcodes, ) ) test_results.append( @@ -136,6 +146,7 @@ def cross_validate( predicted=y_test_pred[key + "_pred"], standard_deviation=y_test_pred[key + "_sd"], X=X_test if include_X else None, + labcodes=test_labcodes, ) ) # now call the hooks if available diff --git a/tests/bofire/surrogates/test_cross_validate.py b/tests/bofire/surrogates/test_cross_validate.py index 88183b974..92ce50058 100644 --- a/tests/bofire/surrogates/test_cross_validate.py +++ b/tests/bofire/surrogates/test_cross_validate.py @@ -80,8 +80,8 @@ def test_model_cross_validate_descriptor(): assert len(test_cv.results) == efolds -@pytest.mark.parametrize("include_X", [True, False]) -def test_model_cross_validate_include_X(include_X): +@pytest.mark.parametrize("include_X, include_labcodes", [[True, False], [False, True]]) +def test_model_cross_validate_include_X(include_X, include_labcodes): input_features = Inputs( features=[ ContinuousInput( @@ -93,6 +93,7 @@ def test_model_cross_validate_include_X(include_X): ) output_features = Outputs(features=[ContinuousOutput(key="y")]) experiments = input_features.sample(n=10) + experiments["labcode"] = [str(i) for i in range(10)] experiments.eval("y=((x_1**2 + x_2 - 11)**2+(x_1 + x_2**2 -7)**2)", inplace=True) experiments["valid_y"] = 1 model = SingleTaskGPSurrogate( @@ -101,7 +102,7 @@ def test_model_cross_validate_include_X(include_X): ) model = surrogates.map(model) train_cv, test_cv, _ = model.cross_validate( - experiments, folds=5, include_X=include_X + experiments, folds=5, include_X=include_X, include_labcodes=include_labcodes ) if include_X: assert train_cv.results[0].X.shape == (8, 2) @@ -109,6 +110,12 @@ def test_model_cross_validate_include_X(include_X): if include_X is False: assert train_cv.results[0].X is None assert test_cv.results[0].X is None + if include_labcodes: + assert train_cv.results[0].labcodes.shape == (8,) + assert test_cv.results[0].labcodes.shape == (2,) + else: + assert train_cv.results[0].labcodes is None + assert train_cv.results[0].labcodes is None def test_model_cross_validate_hooks():