diff --git a/CHANGELOG.rst b/CHANGELOG.rst index aa5fbf90..ea6d52cb 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,6 +1,7 @@ Changelog ========= +- Update surrogate model initialisation to use all initial evidence - Use kernel copy to avoid pickle issue and allow BOLFI parallelisation with non-default kernel - Restrict matplotlib version < 3.9 for compatibility with GPy - Add option to use additive or multiplicative adjustment in any acquisition method diff --git a/elfi/methods/inference/bolfi.py b/elfi/methods/inference/bolfi.py index 07f878f9..aa36c13c 100644 --- a/elfi/methods/inference/bolfi.py +++ b/elfi/methods/inference/bolfi.py @@ -95,7 +95,7 @@ def __init__(self, if precomputed is not None: params = batch_to_arr2d(precomputed, self.target_model.parameter_names) n_precomputed = len(params) - self.target_model.update(params, precomputed[target_name]) + self.target_model.update(params, precomputed[target_name], optimize=True) self.batches_per_acquisition = batches_per_acquisition or self.max_parallel_batches @@ -115,6 +115,10 @@ def __init__(self, self.state['last_GP_update'] = self.n_initial_evidence self.state['acquisition'] = [] + if self.target_model.n_evidence < 1 and self.n_initial_evidence > 0: + self.init_x = np.zeros((self.n_initial_evidence, self.target_model.input_dim)) + self.init_y = np.zeros((self.n_initial_evidence, 1)) + def _resolve_initial_evidence(self, initial_evidence): # Some sensibility limit for starting GP regression precomputed = None @@ -215,10 +219,20 @@ def update(self, batch, batch_index): params = batch_to_arr2d(batch, self.target_model.parameter_names) self._report_batch(batch_index, params, batch[self.target_name]) - optimize = self._should_optimize() - self.target_model.update(params, batch[self.target_name], optimize) - if optimize: - self.state['last_GP_update'] = self.target_model.n_evidence + if self.target_model.n_evidence < 1 and self.n_initial_evidence > 0: + # accumulate initialisation data + n = self.state['n_evidence'] + self.init_x[n - self.batch_size:n] = params + self.init_y[n - self.batch_size:n] = batch[self.target_name].reshape(-1, 1) + if self.state['n_evidence'] >= self.n_initial_evidence: + # initialise model + self.target_model.update(self.init_x, self.init_y, optimize=True) + else: + # update model + optimize = self._should_optimize() + self.target_model.update(params, batch[self.target_name], optimize) + if optimize: + self.state['last_GP_update'] = self.state['n_evidence'] def prepare_new_batch(self, batch_index): """Prepare values for a new batch. diff --git a/elfi/methods/inference/bolfire.py b/elfi/methods/inference/bolfire.py index 4ab51528..01522a6c 100644 --- a/elfi/methods/inference/bolfire.py +++ b/elfi/methods/inference/bolfire.py @@ -95,6 +95,8 @@ def __init__(self, # Initialize BO self.n_initial_evidence = self._resolve_n_initial_evidence(n_initial_evidence) self.acquisition_method = self._resolve_acquisition_method(acquisition_method) + self.init_x = np.zeros((self.n_initial_evidence, self.target_model.input_dim)) + self.init_y = np.zeros((self.n_initial_evidence, 1)) # Initialize state dictionary self.state['n_evidence'] = 0 @@ -390,10 +392,19 @@ def _process_simulated(self): # BO part self.state['n_evidence'] += 1 parameter_values = self.current_params - optimize = self._should_optimize() - self.target_model.update(parameter_values, negative_log_ratio_value, optimize) - if optimize: - self.state['last_GP_update'] = self.target_model.n_evidence + if self.target_model.n_evidence < 1 and self.n_initial_evidence > 0: + # accumulate initialisation data + self.init_x[self.state['n_evidence'] - 1] = parameter_values + self.init_y[self.state['n_evidence'] - 1] = negative_log_ratio_value + if self.state['n_evidence'] >= self.n_initial_evidence: + # initialise model + self.target_model.update(self.init_x, self.init_y, optimize=True) + else: + # update model + optimize = self._should_optimize() + self.target_model.update(parameter_values, negative_log_ratio_value, optimize) + if optimize: + self.state['last_GP_update'] = self.target_model.n_evidence def _generate_training_data(self, likelihood, marginal): """Generate training data."""