diff --git a/ema_workbench/em_framework/callbacks.py b/ema_workbench/em_framework/callbacks.py index f7968d6d3..dbebbc282 100644 --- a/ema_workbench/em_framework/callbacks.py +++ b/ema_workbench/em_framework/callbacks.py @@ -195,41 +195,32 @@ def __init__( reporting_frequency, log_progress, ) - - self.cases = None + self.cases = np.empty((nr_experiments, len(uncertainties) + len(levers)), dtype=object) + self.uncertainty_and_lever_labels = [(entry.name, "") for entry in uncertainties + levers] + self.uncertainties = [u.name for u in uncertainties] + self.levers = [l.name for l in levers] self.results = {} - # determine data types of parameters - columns = [] dtypes = [] - for parameter in self.parameters: - name = parameter.name dtype = "float" - if isinstance(parameter, BooleanParameter): dtype = "bool" elif isinstance(parameter, CategoricalParameter): dtype = "object" elif isinstance(parameter, IntegerParameter): dtype = "int" - columns.append(name) - dtypes.append(dtype) - - for name in ["scenario", "policy", "model"]: - columns.append(name) - dtypes.append("object") - - self.columns = columns + dtypes.append((parameter.name, dtype)) + + dtypes.extend( + [ + ("scenario", "object"), + ("policy", "object"), + ("model", "object"), + ] + ) self.dtypes = dtypes - - index = np.arange(nr_experiments) - column_dict = { - name: pd.Series(dtype=dtype, index=index) for name, dtype in zip(columns, dtypes) - } - df = pd.concat(column_dict, axis=1).copy() - - self.cases = df + self.cases = np.empty(nr_experiments, dtype=dtypes) for outcome in self.outcomes: shape = outcome.shape @@ -242,34 +233,27 @@ def _store_case(self, experiment): policy = experiment.policy index = experiment.experiment_id - self.cases.at[index, "scenario"] = scenario.name - self.cases.at[index, "policy"] = policy.name - self.cases.at[index, "model"] = experiment.model_name - - for k, v in scenario.items(): - self.cases.at[index, k] = v - - for k, v in policy.items(): - self.cases.at[index, k] = v + self.cases[index] = ( + tuple([scenario[u] for u in self.uncertainties]) + + tuple([policy[l] for l in self.levers]) + + (scenario.name, policy.name, experiment.model_name) + ) def _store_outcomes(self, case_id, outcomes): for outcome in self.outcomes: - outcome = outcome.name - _logger.debug(f"storing {outcome}") + outcome_name = outcome.name try: - outcome_res = outcomes[outcome] + outcome_res = outcomes[outcome_name] except KeyError: - message = f"{outcome} not specified as outcome in " f"model(s)" + message = f"{outcome_name} not specified as outcome in model(s)" _logger.debug(message) else: try: - self.results[outcome][case_id,] = outcome_res + self.results[outcome_name][case_id,] = outcome_res except KeyError: data = np.asarray(outcome_res) - shape = data.shape - if len(shape) > 2: message = self.shape_error_msg.format(len(shape)) raise ema_exceptions.EMAError(message) @@ -277,8 +261,8 @@ def _store_outcomes(self, case_id, outcomes): shape = list(shape) shape.insert(0, self.nr_experiments) - self.results[outcome] = self._setup_outcomes_array(shape, data.dtype) - self.results[outcome][case_id,] = outcome_res + self.results[outcome_name] = self._setup_outcomes_array(shape, data.dtype) + self.results[outcome_name][case_id,] = outcome_res def __call__(self, experiment, outcomes): """ @@ -293,11 +277,7 @@ def __call__(self, experiment, outcomes): """ super().__call__(experiment, outcomes) - - # store the case self._store_case(experiment) - - # store outcomes self._store_outcomes(experiment.experiment_id, outcomes) def get_results(self): @@ -309,18 +289,20 @@ def get_results(self): _logger.warning("some experiments have failed, returning masked result arrays") results[k] = v + cases = pd.DataFrame.from_records(self.cases) + # we want to ensure the dtypes for the columns in the experiments dataframe match # the type of uncertainty. The exception is needed in case their are missing values (i.e. nans). # nans can only ever be a float. - for name, dtype in zip(self.columns, self.dtypes): + for name, dtype in self.dtypes: try: if dtype == "object": dtype = "category" - self.cases[name] = self.cases[name].astype(dtype) + cases[name] = cases[name].astype(dtype) except Exception: pass - return self.cases, results + return cases, results def _setup_outcomes_array(self, shape, dtype): array = np.ma.empty(shape, dtype=dtype) diff --git a/test/test_em_framework/test_callback.py b/test/test_em_framework/test_callback.py index b135bfcb2..3e1124625 100644 --- a/test/test_em_framework/test_callback.py +++ b/test/test_em_framework/test_callback.py @@ -73,9 +73,13 @@ def test_store_results(mocker): # case 5 assert raises KeyError callback = DefaultCallback(uncs, [], outcomes, nr_experiments=nr_experiments) model_outcomes = {"some_other_name": np.random.rand(2, 2, 2)} - mock = mocker.patch("ema_workbench.em_framework.callbacks._logger.debug") + mock = mocker.patch( + "ema_workbench.em_framework.callbacks._logger.debug", + autospec=True, + side_effect=lambda *args, **kwargs: print(args, kwargs), + ) callback._store_outcomes(1, model_outcomes) - assert mock.call_count == 2 + assert mock.call_count == 1 def test_init(): @@ -94,9 +98,9 @@ def test_init(): assert callback.reporting_interval == 100 # self.assertEqual(callback.outcomes, outcomes) - names = callback.cases.columns.values.tolist() + names = [name for name, _ in callback.uncertainty_and_lever_labels] names = set(names) - assert names == {"a", "b", "policy", "model", "scenario"} + assert names == {"a", "b"} assert "scalar" not in callback.results assert "timeseries" not in callback.results @@ -121,9 +125,9 @@ def test_init(): assert callback.reporting_interval == 250 # self.assertEqual(callback.outcomes, [o.name for o in outcomes]) - names = callback.cases.columns.values.tolist() + names = [name for name, _ in callback.uncertainty_and_lever_labels] names = set(names) - assert names == {"a", "b", "c", "policy", "model", "scenario"} + assert names == {"a", "b", "c"} assert "scalar" not in callback.results assert "timeseries" not in callback.results @@ -209,7 +213,7 @@ def test_store_cases(): names = experiments.columns.values.tolist() for name in names: - assert experiments[name][0] == design[name] + assert experiments[name][0] == design.get(name), f"failed for name {name}" def test_get_results(mocker):