diff --git a/autosklearn/data/target_validator.py b/autosklearn/data/target_validator.py
index 8a09f22e8f..5e9e1e0c86 100644
--- a/autosklearn/data/target_validator.py
+++ b/autosklearn/data/target_validator.py
@@ -166,6 +166,7 @@ def _fit(
             # The label encoder makes sure data is, and remains
             # 1 dimensional
             self.encoder = preprocessing.OrdinalEncoder(handle_unknown='use_encoded_value',
+                                                        dtype=np.int32,
                                                         unknown_value=-1)
         else:
             # We should not reach this if statement as we check for type of targets before
diff --git a/autosklearn/ensemble_builder.py b/autosklearn/ensemble_builder.py
index ca92830c98..780e264e29 100644
--- a/autosklearn/ensemble_builder.py
+++ b/autosklearn/ensemble_builder.py
@@ -1260,8 +1260,7 @@ def fit_ensemble(self, selected_keys: list):
 
         try:
             self.logger.debug(
-                "Fitting the ensemble on %d models.",
-                len(predictions_train),
+                f"Fitting the ensemble on {len(predictions_train)} models: {include_num_runs}"
             )
             start_time = time.time()
             ensemble.fit(predictions_train, self.y_true_ensemble,
diff --git a/autosklearn/evaluation/__init__.py b/autosklearn/evaluation/__init__.py
index 6df39da521..d108bb4da7 100644
--- a/autosklearn/evaluation/__init__.py
+++ b/autosklearn/evaluation/__init__.py
@@ -130,6 +130,7 @@ def __init__(
         init_params: Optional[Dict[str, Any]] = None,
         budget_type: Optional[str] = None,
         ta: Optional[Callable] = None,
+        compute_train_loss: bool = False,
         **resampling_strategy_args: Any,
     ):
 
@@ -190,6 +191,7 @@ def __init__(
         self.disable_file_output = disable_file_output
         self.init_params = init_params
         self.budget_type = budget_type
+        self.compute_train_loss = compute_train_loss
 
         if memory_limit is not None:
             memory_limit = int(math.ceil(memory_limit))
@@ -204,6 +206,7 @@ def __init__(
             self._get_test_loss = True
         else:
             self._get_test_loss = False
+        del dm
 
         self.port = port
         self.pynisher_context = pynisher_context
@@ -461,9 +464,11 @@ def run(
                 additional_run_info['learning_curve'] = learning_curve
                 additional_run_info['learning_curve_runtime'] = learning_curve_runtime
 
-            train_learning_curve = autosklearn.evaluation.util.extract_learning_curve(
-                info, 'train_loss'
-            )
+            train_learning_curve = []
+            if self.compute_train_loss:
+                train_learning_curve = autosklearn.evaluation.util.extract_learning_curve(
+                    info, 'train_loss'
+                )
             if len(train_learning_curve) > 1:
                 additional_run_info['train_learning_curve'] = train_learning_curve
                 additional_run_info['learning_curve_runtime'] = learning_curve_runtime
@@ -498,5 +503,11 @@ def run(
         runtime = float(obj.wall_clock_time)
 
         autosklearn.evaluation.util.empty_queue(queue)
-        self.logger.info("Finished evaluating configuration %d" % config_id)
+        self.logger.info(
+            "Finished evaluating configuration c:{}/i:{} with status {}".format(
+                config_id,
+                instance,
+                status
+            )
+        )
         return status, cost, runtime, additional_run_info
diff --git a/autosklearn/evaluation/train_evaluator.py b/autosklearn/evaluation/train_evaluator.py
index b253516085..3468d2f8e7 100644
--- a/autosklearn/evaluation/train_evaluator.py
+++ b/autosklearn/evaluation/train_evaluator.py
@@ -31,6 +31,7 @@
 from autosklearn.metrics import Scorer
 from autosklearn.util.backend import Backend
 from autosklearn.util.logging_ import PicklableClientLogger
+import gc
 
 
 __all__ = ['TrainEvaluator', 'eval_holdout', 'eval_iterative_holdout',
@@ -190,6 +191,7 @@ def __init__(
         exclude: Optional[List[str]] = None,
         disable_file_output: bool = False,
         init_params: Optional[Dict[str, Any]] = None,
+        compute_train_loss: bool = False,
     ):
 
         super().__init__(
@@ -222,8 +224,7 @@ def __init__(
         self.X_train = self.datamanager.data['X_train']
         self.Y_train = self.datamanager.data['Y_train']
         self.Y_optimization: Optional[Union[List, np.ndarray]] = None
-        self.Y_targets = [None] * self.num_cv_folds
-        self.Y_train_targets = np.ones(self.Y_train.shape) * np.NaN
+        self.Y_optimization_pred: Optional[Union[List, np.ndarray]] = None
         self.models = [None] * self.num_cv_folds
         self.indices: List[Optional[Tuple[List[int], List[int]]]] = [None] * self.num_cv_folds
 
@@ -233,6 +234,11 @@ def __init__(
         # opposite.
         self.partial = True
         self.keep_models = keep_models
+        # By default, we do not calculate train-performance.
+        # Only if the user provided this flag, we compute it
+        self.compute_train_loss = compute_train_loss
+        if self.compute_train_loss:
+            self.Y_train_targets = np.ones(self.Y_train.shape) * np.NaN
 
     def fit_predict_and_loss(self, iterative: bool = False) -> None:
         """Fit, predict and compute the loss for cross-validation and
@@ -267,7 +273,6 @@ def fit_predict_and_loss(self, iterative: bool = False) -> None:
                 converged = [False] * self.num_cv_folds
 
                 Y_train_pred = [None] * self.num_cv_folds
-                Y_optimization_pred = [None] * self.num_cv_folds
                 Y_valid_pred = [None] * self.num_cv_folds
                 Y_test_pred = [None] * self.num_cv_folds
                 train_splits = [None] * self.num_cv_folds
@@ -321,9 +326,10 @@ def fit_predict_and_loss(self, iterative: bool = False) -> None:
                         model = self.models[i]
 
                         if iterations[i] == 1:
-                            self.Y_train_targets[train_indices] = \
-                                self.Y_train[train_indices]
-                            self.Y_targets[i] = self.Y_train[test_indices]
+                            if self.compute_train_loss:
+                                self.Y_train_targets[train_indices] = self.Y_train[
+                                    train_indices
+                                ]
 
                             Xt, fit_params = model.fit_transformer(
                                 self.X_train[train_indices],
@@ -346,28 +352,40 @@ def fit_predict_and_loss(self, iterative: bool = False) -> None:
                             train_indices=train_indices,
                             test_indices=test_indices,
                         )
+                        if self.num_cv_folds == 1:
+                            self.Y_optimization_pred = opt_pred
+                        else:
+                            if self.Y_optimization_pred is None:
+                                y_shape = opt_pred.shape
+                                self.Y_optimization_pred = np.zeros(
+                                    (self.X_train.shape[0], 1 if len(y_shape) == 1 else y_shape[1]),
+                                    dtype=np.float32,
+                                )
+                            self.Y_optimization_pred[test_indices] = opt_pred
+                        del opt_pred
 
                         Y_train_pred[i] = train_pred
-                        Y_optimization_pred[i] = opt_pred
                         Y_valid_pred[i] = valid_pred
                         Y_test_pred[i] = test_pred
-                        train_splits[i] = train_indices
 
                         # Compute train loss of this fold and store it. train_loss could
                         # either be a scalar or a dict of scalars with metrics as keys.
-                        train_loss = self._loss(
-                            self.Y_train_targets[train_indices],
-                            train_pred,
-                        )
-                        train_losses[i] = train_loss
+                        if self.compute_train_loss:
+                            train_splits[i] = train_indices
+                            train_loss: Optional[Union[float, Dict[str, float]]] = self._loss(
+                                self.Y_train_targets[train_indices],
+                                train_pred,
+                            )
+                            train_losses[i] = train_loss
+
                         # number of training data points for this fold. Used for weighting
                         # the average.
                         train_fold_weights[i] = len(train_indices)
 
                         # Compute validation loss of this fold and store it.
                         optimization_loss = self._loss(
-                            self.Y_targets[i],
-                            opt_pred,
+                            self.Y_train[test_indices],
+                            self.Y_optimization_pred[test_indices],
                         )
                         opt_losses[i] = optimization_loss
                         # number of optimization data points for this fold.
@@ -384,6 +402,8 @@ def fit_predict_and_loss(self, iterative: bool = False) -> None:
 
                         iterations[i] = iterations[i] + 1
 
+                        gc.collect()
+
                     # Compute weights of each fold based on the number of samples in each
                     # fold.
                     train_fold_weights_percentage = [
@@ -396,13 +416,17 @@ def fit_predict_and_loss(self, iterative: bool = False) -> None:
                     # train_losses is a list of either scalars or dicts. If it contains
                     # dicts, then train_loss is computed using the target metric
                     # (self.metric).
-                    if all(isinstance(elem, dict) for elem in train_losses):
-                        train_loss = np.average([train_losses[i][str(self.metric)]
-                                                 for i in range(self.num_cv_folds)],
-                                                weights=train_fold_weights_percentage,
-                                                )
+                    if self.compute_train_loss:
+                        if all(isinstance(elem, dict) for elem in train_losses):
+                            train_loss = np.average([train_losses[i][str(self.metric)]
+                                                     for i in range(self.num_cv_folds)],
+                                                    weights=train_fold_weights_percentage,
+                                                    )
+                        else:
+                            train_loss = np.average(train_losses,
+                                                    weights=train_fold_weights_percentage)
                     else:
-                        train_loss = np.average(train_losses, weights=train_fold_weights_percentage)
+                        train_loss = None
 
                     # if all_scoring_function is true, return a dict of opt_loss.
                     # Otherwise, return a scalar.
@@ -419,16 +443,9 @@ def fit_predict_and_loss(self, iterative: bool = False) -> None:
                     else:
                         opt_loss = np.average(opt_losses, weights=opt_fold_weights_percentage)
 
-                    Y_targets = self.Y_targets
-                    Y_train_targets = self.Y_train_targets
-
-                    Y_optimization_preds = np.concatenate(
-                        [Y_optimization_pred[i] for i in range(self.num_cv_folds)
-                         if Y_optimization_pred[i] is not None])
-                    Y_targets = np.concatenate([
-                        Y_targets[i] for i in range(self.num_cv_folds)
-                        if Y_targets[i] is not None
-                    ])
+                    # No need to generate the targets, they are same as y_train
+                    if self.Y_optimization is None:
+                        self.Y_optimization = self.Y_train
 
                     if self.X_valid is not None:
                         Y_valid_preds = np.array([Y_valid_pred[i]
@@ -450,8 +467,8 @@ def fit_predict_and_loss(self, iterative: bool = False) -> None:
                     else:
                         Y_test_preds = None
 
-                    self.Y_optimization = Y_targets
-                    self.Y_actual_train = Y_train_targets
+                    if self.compute_train_loss:
+                        self.Y_actual_train = self.Y_train_targets
 
                     self.model = self._get_model()
                     status = StatusType.DONOTADVANCE
@@ -461,7 +478,7 @@ def fit_predict_and_loss(self, iterative: bool = False) -> None:
                     self.finish_up(
                         loss=opt_loss,
                         train_loss=train_loss,
-                        opt_pred=Y_optimization_preds,
+                        opt_pred=self.Y_optimization_pred,
                         valid_pred=Y_valid_preds,
                         test_pred=Y_test_preds,
                         additional_run_info=additional_run_info,
@@ -475,7 +492,6 @@ def fit_predict_and_loss(self, iterative: bool = False) -> None:
             self.partial = False
 
             Y_train_pred = [None] * self.num_cv_folds
-            Y_optimization_pred = [None] * self.num_cv_folds
             Y_valid_pred = [None] * self.num_cv_folds
             Y_test_pred = [None] * self.num_cv_folds
             train_splits = [None] * self.num_cv_folds
@@ -537,27 +553,39 @@ def fit_predict_and_loss(self, iterative: bool = False) -> None:
                         (additional_run_info, i)
                     )
 
+                if self.num_cv_folds == 1:
+                    self.Y_optimization_pred = opt_pred
+                else:
+                    if self.Y_optimization_pred is None:
+                        y_shape = opt_pred.shape
+                        self.Y_optimization_pred = np.zeros(
+                            (self.X_train.shape[0], 1 if len(y_shape) == 1 else y_shape[1]),
+                            dtype=np.float32,
+                        )
+                    self.Y_optimization_pred[test_split] = opt_pred
+
                 Y_train_pred[i] = train_pred
-                Y_optimization_pred[i] = opt_pred
                 Y_valid_pred[i] = valid_pred
                 Y_test_pred[i] = test_pred
-                train_splits[i] = train_split
 
                 # Compute train loss of this fold and store it. train_loss could
                 # either be a scalar or a dict of scalars with metrics as keys.
-                train_loss = self._loss(
-                    self.Y_train_targets[train_split],
-                    train_pred,
-                )
-                train_losses.append(train_loss)
+                if self.compute_train_loss:
+                    train_splits[i] = train_split
+                    train_loss = self._loss(
+                        self.Y_train_targets[train_split],
+                        train_pred,
+                    )
+                    train_losses.append(train_loss)
                 # number of training data points for this fold. Used for weighting
                 # the average.
                 train_fold_weights.append(len(train_split))
 
                 # Compute validation loss of this fold and store it.
                 optimization_loss = self._loss(
-                    self.Y_targets[i],
-                    opt_pred,
+                    self.Y_train[test_split],
+                    self.Y_optimization_pred if self.num_cv_folds == 1
+                    else self.Y_optimization_pred[test_split],
                 )
                 opt_losses.append(optimization_loss)
                 # number of optimization data points for this fold. Used for weighting
@@ -571,13 +599,16 @@ def fit_predict_and_loss(self, iterative: bool = False) -> None:
 
             # train_losses is a list of either scalars or dicts. If it contains dicts,
             # then train_loss is computed using the target metric (self.metric).
-            if all(isinstance(elem, dict) for elem in train_losses):
-                train_loss = np.average([train_losses[i][str(self.metric)]
-                                         for i in range(self.num_cv_folds)],
-                                        weights=train_fold_weights,
-                                        )
+            if self.compute_train_loss:
+                if all(isinstance(elem, dict) for elem in train_losses):
+                    train_loss = np.average([train_losses[i][str(self.metric)]
+                                             for i in range(self.num_cv_folds)],
+                                            weights=train_fold_weights,
+                                            )
+                else:
+                    train_loss = np.average(train_losses, weights=train_fold_weights)
             else:
-                train_loss = np.average(train_losses, weights=train_fold_weights)
+                train_loss = None
 
             # if all_scoring_function is true, return a dict of opt_loss. Otherwise,
             # return a scalar.
@@ -591,14 +622,8 @@ def fit_predict_and_loss(self, iterative: bool = False) -> None:
             else:
                 opt_loss = np.average(opt_losses, weights=opt_fold_weights)
 
-            Y_targets = self.Y_targets
-            Y_train_targets = self.Y_train_targets
-
-            Y_optimization_pred = np.concatenate(
-                [Y_optimization_pred[i] for i in range(self.num_cv_folds)
-                 if Y_optimization_pred[i] is not None])
-            Y_targets = np.concatenate([Y_targets[i] for i in range(self.num_cv_folds)
-                                        if Y_targets[i] is not None])
+            if self.Y_optimization is None:
+                self.Y_optimization = self.Y_train
 
             if self.X_valid is not None:
                 Y_valid_pred = np.array([Y_valid_pred[i]
@@ -616,8 +641,8 @@ def fit_predict_and_loss(self, iterative: bool = False) -> None:
                 if len(np.shape(Y_test_pred)) == 3:
                     Y_test_pred = np.nanmean(Y_test_pred, axis=0)
 
-            self.Y_optimization = Y_targets
-            self.Y_actual_train = Y_train_targets
+            if self.compute_train_loss:
+                self.Y_actual_train = self.Y_train_targets
 
             if self.num_cv_folds > 1:
                 self.model = self._get_model()
@@ -653,7 +678,7 @@ def fit_predict_and_loss(self, iterative: bool = False) -> None:
             self.finish_up(
                 loss=opt_loss,
                 train_loss=train_loss,
-                opt_pred=Y_optimization_pred,
+                opt_pred=self.Y_optimization_pred,
                 valid_pred=Y_valid_pred if self.X_valid is not None else None,
                 test_pred=Y_test_pred if self.X_test is not None else None,
                 additional_run_info=additional_run_info,
@@ -700,8 +725,12 @@ def partial_fit_predict_and_loss(self, fold: int, iterative: bool = False) -> No
                     add_model_to_self=True,
                 )
             )
-            train_loss = self._loss(self.Y_actual_train, train_pred)
-            loss = self._loss(self.Y_targets[fold], opt_pred)
+            if self.compute_train_loss:
+                train_loss: Optional[Union[float, Dict[str, float]]] = self._loss(
+                    self.Y_actual_train, train_pred)
+            else:
+                train_loss = None
+            loss = self._loss(self.Y_train[test_split], opt_pred)
 
             if self.model.estimator_supports_iterative_fit():
                 model_max_iter = self.model.get_max_iter()
@@ -741,7 +770,8 @@ def _partial_fit_and_predict_iterative(self, fold: int, train_indices: List[int]
             Xt, fit_params = model.fit_transformer(self.X_train[train_indices],
                                                    self.Y_train[train_indices])
 
-            self.Y_train_targets[train_indices] = self.Y_train[train_indices]
+            if self.compute_train_loss:
+                self.Y_train_targets[train_indices] = self.Y_train[train_indices]
 
             iteration = 1
             total_n_iteration = 0
@@ -775,7 +805,11 @@ def _partial_fit_and_predict_iterative(self, fold: int, train_indices: List[int]
                 if add_model_to_self:
                     self.model = model
 
-                train_loss = self._loss(self.Y_train[train_indices], Y_train_pred)
+                if self.compute_train_loss:
+                    train_loss: Optional[Union[float, Dict[str, float]]] = self._loss(
+                        self.Y_train[train_indices], Y_train_pred)
+                else:
+                    train_loss = None
                 loss = self._loss(self.Y_train[test_indices], Y_optimization_pred)
                 additional_run_info = model.get_additional_run_info()
 
@@ -814,7 +848,10 @@ def _partial_fit_and_predict_iterative(self, fold: int, train_indices: List[int]
                 additional_run_info
             ) = self._partial_fit_and_predict_standard(fold, train_indices, test_indices,
                                                        add_model_to_self)
-            train_loss = self._loss(self.Y_train[train_indices], Y_train_pred)
+            if self.compute_train_loss:
+                train_loss = self._loss(self.Y_train[train_indices], Y_train_pred)
+            else:
+                train_loss = None
             loss = self._loss(self.Y_train[test_indices], Y_optimization_pred)
             if self.model.estimator_supports_iterative_fit():
                 model_max_iter = self.model.get_max_iter()
@@ -861,8 +898,8 @@ def _partial_fit_and_predict_standard(
         else:
             self.models[fold] = model
 
-        self.Y_targets[fold] = self.Y_train[test_indices]
-        self.Y_train_targets[train_indices] = self.Y_train[train_indices]
+        if self.compute_train_loss:
+            self.Y_train_targets[train_indices] = self.Y_train[train_indices]
 
         train_pred, opt_pred, valid_pred, test_pred = self._predict(
             model=model,
@@ -892,8 +929,8 @@ def _partial_fit_and_predict_budget(
 
         model = self._get_model()
         self.indices[fold] = ((train_indices, test_indices))
-        self.Y_targets[fold] = self.Y_train[test_indices]
-        self.Y_train_targets[train_indices] = self.Y_train[train_indices]
+        if self.compute_train_loss:
+            self.Y_train_targets[train_indices] = self.Y_train[train_indices]
 
         _fit_with_budget(
             X_train=self.X_train,
@@ -929,9 +966,11 @@ def _partial_fit_and_predict_budget(
     def _predict(self, model: BaseEstimator, test_indices: List[int],
                  train_indices: List[int]) -> Tuple[np.ndarray, np.ndarray,
                                                     np.ndarray, np.ndarray]:
-        train_pred = self.predict_function(self.X_train[train_indices],
-                                           model, self.task_type,
-                                           self.Y_train[train_indices])
+        train_pred = None
+        if self.compute_train_loss:
+            train_pred = self.predict_function(self.X_train[train_indices],
+                                               model, self.task_type,
+                                               self.Y_train[train_indices])
 
         opt_pred = self.predict_function(self.X_train[test_indices],
                                          model, self.task_type,
diff --git a/autosklearn/metalearning/files/accuracy_binary.classification_dense/algorithm_runs.arff b/autosklearn/metalearning/files/accuracy_binary.classification_dense/algorithm_runs.arff
index c09d9871bb..d9e5f11d06 100644
--- a/autosklearn/metalearning/files/accuracy_binary.classification_dense/algorithm_runs.arff
+++ b/autosklearn/metalearning/files/accuracy_binary.classification_dense/algorithm_runs.arff
@@ -11,8 +11,8 @@ twonorm,1.0,1,0.015561015561015523,ok
 autouniv-au1-1000,1.0,7,0.18787878787878787,ok
 hill-valley,1.0,9,0.0,ok
 eye_movements,1.0,14,0.17655210643015518,ok
-abalone,1.0,17,0.7670537010159652,ok
-tamilnadu-electricity,1.0,19,0.9385715231349705,ok
+abalone,1.0,709,0.05224963715529751,ok
+tamilnadu-electricity,1.0,709,0.05924405904547558,ok
 xd6,1.0,22,0.0,ok
 colleges_usnews,1.0,25,0.2517482517482518,ok
 pc4,1.0,28,0.079002079002079,ok
@@ -34,7 +34,7 @@ cardiotocography,1.0,83,0.0,ok
 kick,1.0,86,0.09483474505896028,ok
 microaggregation2,1.0,88,0.3684848484848485,ok
 monks-problems-2,1.0,91,0.0,ok
-autouniv-au6-750,1.0,97,0.6720647773279352,ok
+autouniv-au6-750,1.0,709,0.1578947368421053,ok
 meta_stream_intervals.arff,1.0,99,0.004898013955984992,ok
 diabetes130us,1.0,102,0.3863676969805253,ok
 steel-plates-fault,1.0,105,0.21562499999999996,ok
@@ -46,7 +46,7 @@ isolet,1.0,121,0.03342401865526623,ok
 led24,1.0,124,0.2547348484848485,ok
 satimage,1.0,129,0.07873644507307875,ok
 pbcseq,1.0,131,0.14352574102964122,ok
-bachchoralharmony,1.0,134,0.8801498127340824,ok
+bachchoralharmony,1.0,709,0.7019796682718031,ok
 compas-two-years,1.0,138,0.3256748994830557,ok
 volcanoes-b6,1.0,142,0.030520646319569078,ok
 philippine,1.0,145,0.16943866943866948,ok
@@ -54,10 +54,10 @@ irish,1.0,147,0.0,ok
 monks-problems-1,1.0,149,0.0,ok
 porto-seguro,1.0,150,0.03627449482992995,ok
 arsenic-female-lung,1.0,154,0.005434782608695676,ok
-autouniv-au7-1100,1.0,160,0.5371900826446281,ok
+autouniv-au7-1100,1.0,709,0.26997245179063367,ok
 steel-plates-fault,1.0,162,0.0,ok
 speeddating,1.0,165,0.12590448625180894,ok
-devnagari-script,1.0,171,0.03764822134387347,ok
+devnagari-script,1.0,709,0.02154150197628457,ok
 arsenic-male-lung,1.0,172,0.0,ok
 fars,1.0,173,0.19925567994237525,ok
 wdbc,1.0,176,0.010695187165775444,ok
@@ -66,7 +66,7 @@ rmftsa_ladata,1.0,184,0.09580838323353291,ok
 banknote-authentication,1.0,188,0.0,ok
 visualizing_soil,1.0,191,0.00035075412136087447,ok
 wine-quality-white,1.0,193,0.31311881188118806,ok
-analcatdata_dmft,1.0,197,0.7832699619771863,ok
+analcatdata_dmft,1.0,709,0.2433460076045627,ok
 ova_colon,1.0,199,0.03339882121807469,ok
 volcanoes-e1,1.0,204,0.07435897435897432,ok
 cpu_small,1.0,206,0.06992230854605996,ok
@@ -100,7 +100,7 @@ mc1,1.0,302,0.004803073967339144,ok
 ova_prostate,1.0,304,0.0,ok
 volcanoes-b2,1.0,310,0.02840909090909094,ok
 hypothyroid,1.0,313,0.002411575562700996,ok
-autouniv-au7-700,1.0,315,0.48051948051948057,ok
+autouniv-au7-700,1.0,709,0.37662337662337664,ok
 ova_lung,1.0,317,0.013752455795677854,ok
 pokerhand,1.0,322,0.0009647853352628966,ok
 quake,1.0,327,0.42618384401114207,ok
@@ -150,7 +150,7 @@ micro-mass,1.0,480,0.07446808510638303,ok
 indian_pines,1.0,485,0.0553529996685449,ok
 miceprotein,1.0,486,0.0,ok
 diabetes,1.0,492,0.23320158102766797,ok
-collins,1.0,494,0.696969696969697,ok
+collins,1.0,709,0.2212121212121212,ok
 internet-advertisements,1.0,499,0.020332717190388205,ok
 ova_endometrium,1.0,503,0.037328094302554016,ok
 phishingwebsites,1.0,506,0.023300438596491224,ok
@@ -164,7 +164,7 @@ gtsrb-hog01,1.0,528,0.005962820063135754,ok
 puma32h,1.0,532,0.0828708842027377,ok
 bioresponse,1.0,536,0.20856911883589324,ok
 cjs,1.0,538,0.0032537960954447387,ok
-spoken-arabic-digit,1.0,543,0.7427538734258812,ok
+spoken-arabic-digit,1.0,709,0.12207334760687893,ok
 fri_c4_500_100,1.0,546,0.08484848484848484,ok
 ova_kidney,1.0,548,0.00982318271119842,ok
 ldpa,1.0,553,0.005643071154164292,ok
diff --git a/autosklearn/metalearning/files/accuracy_binary.classification_dense/configurations.csv b/autosklearn/metalearning/files/accuracy_binary.classification_dense/configurations.csv
index 86c9bdf47c..0aba914e64 100644
--- a/autosklearn/metalearning/files/accuracy_binary.classification_dense/configurations.csv
+++ b/autosklearn/metalearning/files/accuracy_binary.classification_dense/configurations.csv
@@ -204,3 +204,4 @@ idx,balancing:strategy,classifier:__choice__,classifier:adaboost:algorithm,class
 701,none,liblinear_svc,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,17045.7732372673,False,True,1,squared_hinge,ovr,l2,0.0008192814934567822,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,no_encoding,minority_coalescer,0.008405913574040906,mean,robust_scaler,,,0.9598706457974426,0.1882826974837794,select_percentile_classification,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,63.57218134514263,mutual_info,,,
 702,weighting,gradient_boosting,,,,,,,,,,,,,,,,,,,,,,,,train,3.387912939529945e-10,0.30755227194768237,auto,255,None,60,39,18,loss,1e-07,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,one_hot_encoding,minority_coalescer,0.010000000000000004,most_frequent,none,,,,,select_percentile_classification,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,93.39844669585806,f_classif,,,
 707,weighting,lda,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,auto,,0.00018030860519654287,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,no_encoding,minority_coalescer,0.041538950281903686,mean,robust_scaler,,,0.8898452660666816,0.1704082739702074,kitchen_sinks,,,,,,,,,,,,,,,,,,,,,,,,0.0005845623820571637,8606,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
+709,weighting,mlp,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,relu,0.0001,auto,0.9,0.999,train,1E-08,2,0.0003,32,128,True,adam,0.0001,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,one_hot_encoding,minority_coalescer,0.004071801722749603,median,quantile_transformer,1000,normal,,,no_preprocessing,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
diff --git a/autosklearn/metalearning/files/accuracy_multiclass.classification_dense/algorithm_runs.arff b/autosklearn/metalearning/files/accuracy_multiclass.classification_dense/algorithm_runs.arff
index c09d9871bb..d9e5f11d06 100644
--- a/autosklearn/metalearning/files/accuracy_multiclass.classification_dense/algorithm_runs.arff
+++ b/autosklearn/metalearning/files/accuracy_multiclass.classification_dense/algorithm_runs.arff
@@ -11,8 +11,8 @@ twonorm,1.0,1,0.015561015561015523,ok
 autouniv-au1-1000,1.0,7,0.18787878787878787,ok
 hill-valley,1.0,9,0.0,ok
 eye_movements,1.0,14,0.17655210643015518,ok
-abalone,1.0,17,0.7670537010159652,ok
-tamilnadu-electricity,1.0,19,0.9385715231349705,ok
+abalone,1.0,709,0.05224963715529751,ok
+tamilnadu-electricity,1.0,709,0.05924405904547558,ok
 xd6,1.0,22,0.0,ok
 colleges_usnews,1.0,25,0.2517482517482518,ok
 pc4,1.0,28,0.079002079002079,ok
@@ -34,7 +34,7 @@ cardiotocography,1.0,83,0.0,ok
 kick,1.0,86,0.09483474505896028,ok
 microaggregation2,1.0,88,0.3684848484848485,ok
 monks-problems-2,1.0,91,0.0,ok
-autouniv-au6-750,1.0,97,0.6720647773279352,ok
+autouniv-au6-750,1.0,709,0.1578947368421053,ok
 meta_stream_intervals.arff,1.0,99,0.004898013955984992,ok
 diabetes130us,1.0,102,0.3863676969805253,ok
 steel-plates-fault,1.0,105,0.21562499999999996,ok
@@ -46,7 +46,7 @@ isolet,1.0,121,0.03342401865526623,ok
 led24,1.0,124,0.2547348484848485,ok
 satimage,1.0,129,0.07873644507307875,ok
 pbcseq,1.0,131,0.14352574102964122,ok
-bachchoralharmony,1.0,134,0.8801498127340824,ok
+bachchoralharmony,1.0,709,0.7019796682718031,ok
 compas-two-years,1.0,138,0.3256748994830557,ok
 volcanoes-b6,1.0,142,0.030520646319569078,ok
 philippine,1.0,145,0.16943866943866948,ok
@@ -54,10 +54,10 @@ irish,1.0,147,0.0,ok
 monks-problems-1,1.0,149,0.0,ok
 porto-seguro,1.0,150,0.03627449482992995,ok
 arsenic-female-lung,1.0,154,0.005434782608695676,ok
-autouniv-au7-1100,1.0,160,0.5371900826446281,ok
+autouniv-au7-1100,1.0,709,0.26997245179063367,ok
 steel-plates-fault,1.0,162,0.0,ok
 speeddating,1.0,165,0.12590448625180894,ok
-devnagari-script,1.0,171,0.03764822134387347,ok
+devnagari-script,1.0,709,0.02154150197628457,ok
 arsenic-male-lung,1.0,172,0.0,ok
 fars,1.0,173,0.19925567994237525,ok
 wdbc,1.0,176,0.010695187165775444,ok
@@ -66,7 +66,7 @@ rmftsa_ladata,1.0,184,0.09580838323353291,ok
 banknote-authentication,1.0,188,0.0,ok
 visualizing_soil,1.0,191,0.00035075412136087447,ok
 wine-quality-white,1.0,193,0.31311881188118806,ok
-analcatdata_dmft,1.0,197,0.7832699619771863,ok
+analcatdata_dmft,1.0,709,0.2433460076045627,ok
 ova_colon,1.0,199,0.03339882121807469,ok
 volcanoes-e1,1.0,204,0.07435897435897432,ok
 cpu_small,1.0,206,0.06992230854605996,ok
@@ -100,7 +100,7 @@ mc1,1.0,302,0.004803073967339144,ok
 ova_prostate,1.0,304,0.0,ok
 volcanoes-b2,1.0,310,0.02840909090909094,ok
 hypothyroid,1.0,313,0.002411575562700996,ok
-autouniv-au7-700,1.0,315,0.48051948051948057,ok
+autouniv-au7-700,1.0,709,0.37662337662337664,ok
 ova_lung,1.0,317,0.013752455795677854,ok
 pokerhand,1.0,322,0.0009647853352628966,ok
 quake,1.0,327,0.42618384401114207,ok
@@ -150,7 +150,7 @@ micro-mass,1.0,480,0.07446808510638303,ok
 indian_pines,1.0,485,0.0553529996685449,ok
 miceprotein,1.0,486,0.0,ok
 diabetes,1.0,492,0.23320158102766797,ok
-collins,1.0,494,0.696969696969697,ok
+collins,1.0,709,0.2212121212121212,ok
 internet-advertisements,1.0,499,0.020332717190388205,ok
 ova_endometrium,1.0,503,0.037328094302554016,ok
 phishingwebsites,1.0,506,0.023300438596491224,ok
@@ -164,7 +164,7 @@ gtsrb-hog01,1.0,528,0.005962820063135754,ok
 puma32h,1.0,532,0.0828708842027377,ok
 bioresponse,1.0,536,0.20856911883589324,ok
 cjs,1.0,538,0.0032537960954447387,ok
-spoken-arabic-digit,1.0,543,0.7427538734258812,ok
+spoken-arabic-digit,1.0,709,0.12207334760687893,ok
 fri_c4_500_100,1.0,546,0.08484848484848484,ok
 ova_kidney,1.0,548,0.00982318271119842,ok
 ldpa,1.0,553,0.005643071154164292,ok
diff --git a/autosklearn/metalearning/files/accuracy_multiclass.classification_dense/configurations.csv b/autosklearn/metalearning/files/accuracy_multiclass.classification_dense/configurations.csv
index 86c9bdf47c..0aba914e64 100644
--- a/autosklearn/metalearning/files/accuracy_multiclass.classification_dense/configurations.csv
+++ b/autosklearn/metalearning/files/accuracy_multiclass.classification_dense/configurations.csv
@@ -204,3 +204,4 @@ idx,balancing:strategy,classifier:__choice__,classifier:adaboost:algorithm,class
 701,none,liblinear_svc,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,17045.7732372673,False,True,1,squared_hinge,ovr,l2,0.0008192814934567822,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,no_encoding,minority_coalescer,0.008405913574040906,mean,robust_scaler,,,0.9598706457974426,0.1882826974837794,select_percentile_classification,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,63.57218134514263,mutual_info,,,
 702,weighting,gradient_boosting,,,,,,,,,,,,,,,,,,,,,,,,train,3.387912939529945e-10,0.30755227194768237,auto,255,None,60,39,18,loss,1e-07,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,one_hot_encoding,minority_coalescer,0.010000000000000004,most_frequent,none,,,,,select_percentile_classification,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,93.39844669585806,f_classif,,,
 707,weighting,lda,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,auto,,0.00018030860519654287,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,no_encoding,minority_coalescer,0.041538950281903686,mean,robust_scaler,,,0.8898452660666816,0.1704082739702074,kitchen_sinks,,,,,,,,,,,,,,,,,,,,,,,,0.0005845623820571637,8606,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
+709,weighting,mlp,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,relu,0.0001,auto,0.9,0.999,train,1E-08,2,0.0003,32,128,True,adam,0.0001,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,one_hot_encoding,minority_coalescer,0.004071801722749603,median,quantile_transformer,1000,normal,,,no_preprocessing,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
diff --git a/autosklearn/metalearning/files/balanced_accuracy_binary.classification_dense/algorithm_runs.arff b/autosklearn/metalearning/files/balanced_accuracy_binary.classification_dense/algorithm_runs.arff
index 642b8e1219..44d4baf63e 100644
--- a/autosklearn/metalearning/files/balanced_accuracy_binary.classification_dense/algorithm_runs.arff
+++ b/autosklearn/metalearning/files/balanced_accuracy_binary.classification_dense/algorithm_runs.arff
@@ -11,8 +11,8 @@ twonorm,1.0,1,0.01565922561525568,ok
 autouniv-au1-1000,1.0,5,0.25950000000000006,ok
 hill-valley,1.0,9,0.0,ok
 eye_movements,1.0,14,0.17681209540342113,ok
-abalone,1.0,17,0.8769085717046411,ok
-tamilnadu-electricity,1.0,19,0.947756348266465,ok
+abalone,1.0,709,0.11341504710773154,ok
+tamilnadu-electricity,1.0,709,0.04755653538107785,ok
 xd6,1.0,22,0.0,ok
 colleges_usnews,1.0,25,0.25167376749847836,ok
 pc4,1.0,29,0.09702797202797209,ok
@@ -20,23 +20,23 @@ fri_c3_500_50,1.0,32,0.1011018463371054,ok
 run_or_walk_information,1.0,37,0.008410684831804849,ok
 satellite,1.0,42,0.03017286537454167,ok
 2dplanes,1.0,43,0.06880799167253038,ok
-wine-quality-red,1.0,47,0.6161336684542298,ok
+wine-quality-red,1.0,709,0.30593389790637504,ok
 arsenic-female-bladder,1.0,52,0.1940880503144654,ok
 kuzushiji-mnist,1.0,56,0.021482498414330586,ok
 anneal,1.0,59,0.0009216589861751334,ok
 fri_c3_1000_25,1.0,61,0.07383792677910317,ok
 pollen,1.0,67,0.48644136848276476,ok
-volcanoes-b5,1.0,68,0.5699847967928948,ok
+volcanoes-b5,1.0,709,0.26897722411117975,ok
 titanic,1.0,72,0.29320987654320985,ok
 strikes,1.0,75,0.0,ok
 madeline,1.0,81,0.0821005669048338,ok
 cardiotocography,1.0,83,0.0,ok
 kick,1.0,84,0.3006417025492927,ok
-microaggregation2,1.0,89,0.4916187730470363,ok
+microaggregation2,1.0,709,0.3777507888408984,ok
 monks-problems-2,1.0,91,0.0,ok
-autouniv-au6-750,1.0,95,0.7612754085799343,ok
+autouniv-au6-750,1.0,709,0.1241263679082587,ok
 meta_stream_intervals.arff,1.0,98,0.006967878468411404,ok
-diabetes130us,1.0,101,0.5067441114849294,ok
+diabetes130us,1.0,709,0.4118415380351441,ok
 steel-plates-fault,1.0,104,0.19263434860176276,ok
 bank8fm,1.0,108,0.054070141112634085,ok
 semeion,1.0,113,0.031490541211592005,ok
@@ -46,18 +46,18 @@ isolet,1.0,121,0.033152523763478325,ok
 led24,1.0,124,0.25890692593605036,ok
 satimage,1.0,129,0.10112177411403478,ok
 pbcseq,1.0,131,0.1434266632917307,ok
-bachchoralharmony,1.0,134,0.9175415800871299,ok
+bachchoralharmony,1.0,709,0.3875832875858466,ok
 compas-two-years,1.0,138,0.3265118829110152,ok
-volcanoes-b6,1.0,143,0.5809143287967867,ok
+volcanoes-b6,1.0,709,0.2390641802397251,ok
 philippine,1.0,145,0.16948482215796368,ok
 irish,1.0,147,0.0,ok
 monks-problems-1,1.0,149,0.0,ok
 porto-seguro,1.0,150,0.3988950321221292,ok
 arsenic-female-lung,1.0,154,0.05555555555555558,ok
-autouniv-au7-1100,1.0,159,0.5064077630375687,ok
+autouniv-au7-1100,1.0,709,0.2838726245505907,ok
 steel-plates-fault,1.0,162,0.0,ok
 speeddating,1.0,168,0.22671228902772056,ok
-devnagari-script,1.0,171,0.03767826945318897,ok
+devnagari-script,1.0,709,0.021739130434782594,ok
 arsenic-male-lung,1.0,172,0.0,ok
 fars,1.0,174,0.39134793637762566,ok
 wdbc,1.0,176,0.01449275362318847,ok
@@ -65,10 +65,10 @@ hiva_agnostic,1.0,181,0.28794275953202053,ok
 rmftsa_ladata,1.0,185,0.09256715506715507,ok
 banknote-authentication,1.0,188,0.0,ok
 visualizing_soil,1.0,191,0.0003103662321539691,ok
-wine-quality-white,1.0,194,0.5594259465253884,ok
-analcatdata_dmft,1.0,197,0.7737781849718646,ok
+wine-quality-white,1.0,709,0.28548558778040434,ok
+analcatdata_dmft,1.0,709,0.2372536438556463,ok
 ova_colon,1.0,199,0.06985388361958478,ok
-volcanoes-e1,1.0,203,0.7058646616541353,ok
+volcanoes-e1,1.0,709,0.2258013454689355,ok
 cpu_small,1.0,206,0.08179300997192374,ok
 gametes_heterogeneity_20atts_1600_het_0.4_0.2_75_edm-2_001,1.0,211,0.26064295662411874,ok
 dresses-sales,1.0,214,0.37422037422037424,ok
@@ -77,10 +77,10 @@ parity5_plus_5,1.0,222,0.0,ok
 delta_ailerons,1.0,227,0.05943422519509478,ok
 gametes_heterogeneity_20atts_1600_het_0.4_0.2_50_edm-2_001,1.0,230,0.28459230117593004,ok
 mammography,1.0,233,0.0645452024403772,ok
-first-order-theorem-proving,1.0,236,0.4963289871477006,ok
+first-order-theorem-proving,1.0,709,0.4063548645225059,ok
 pol,1.0,238,0.010955207771181863,ok
 one-hundred-plants-margin,1.0,243,0.14592241658568195,ok
-volcanoes-a2,1.0,246,0.5618923065238854,ok
+volcanoes-a2,1.0,709,0.27818850155692254,ok
 kdd_el_nino-small,1.0,251,0.0555267254800208,ok
 kc2,1.0,255,0.21854575163398693,ok
 fri_c3_1000_10,1.0,257,0.07222776116096163,ok
@@ -98,16 +98,16 @@ arsenic-male-bladder,1.0,294,0.13372093023255816,ok
 letter,1.0,298,0.0302953166025568,ok
 mc1,1.0,301,0.11897921491600949,ok
 ova_prostate,1.0,304,0.0,ok
-volcanoes-b2,1.0,308,0.6049296764663846,ok
+volcanoes-b2,1.0,709,0.2505278585090791,ok
 hypothyroid,1.0,313,0.25043630017452,ok
-autouniv-au7-700,1.0,315,0.48565916634574835,ok
+autouniv-au7-700,1.0,709,0.3746500155874639,ok
 ova_lung,1.0,318,0.0742391077756932,ok
 pokerhand,1.0,322,0.15738714306963697,ok
 quake,1.0,325,0.4566588050314466,ok
 stock,1.0,329,0.029126808928133352,ok
 fri_c2_1000_25,1.0,332,0.05855211119062864,ok
 fried,1.0,338,0.05970753367223658,ok
-volcanoes-a4,1.0,342,0.6090137328339575,ok
+volcanoes-a4,1.0,709,0.32727840199750313,ok
 no2,1.0,345,0.32140758154569493,ok
 space_ga,1.0,347,0.1251789549033644,ok
 led-display-domain-7digit,1.0,352,0.22896929824561407,ok
@@ -117,7 +117,7 @@ ova_breast,1.0,364,0.02820688083845979,ok
 mozilla4,1.0,367,0.057752752285568865,ok
 churn,1.0,370,0.09994486131421865,ok
 rl,1.0,375,0.17550484692926682,ok
-volcanoes-a3,1.0,377,0.5921428571428572,ok
+volcanoes-a3,1.0,709,0.3635897435897436,ok
 profb,1.0,379,0.2641435306443164,ok
 bank32nh,1.0,384,0.19108836369110338,ok
 cylinder-bands,1.0,388,0.16560846560846554,ok
@@ -133,7 +133,7 @@ wilt,1.0,421,0.04880593092794183,ok
 magictelescope,1.0,425,0.12872306147306456,ok
 pc3,1.0,428,0.18599435122113306,ok
 japanesevowels,1.0,434,0.007347987818080326,ok
-volcanoes-b1,1.0,435,0.6702470997649754,ok
+volcanoes-b1,1.0,709,0.2750187216422184,ok
 fri_c1_1000_50,1.0,440,0.06399204244031831,ok
 splice,1.0,444,0.03519156774858112,ok
 optdigits,1.0,445,0.006898860431585718,ok
@@ -150,7 +150,7 @@ micro-mass,1.0,480,0.0688694638694638,ok
 indian_pines,1.0,485,0.06809369628097439,ok
 miceprotein,1.0,486,0.0,ok
 diabetes,1.0,489,0.23524844720496896,ok
-collins,1.0,494,0.7292812799100832,ok
+collins,1.0,709,0.19234712380249908,ok
 internet-advertisements,1.0,499,0.05529848302957552,ok
 ova_endometrium,1.0,500,0.2015848670756646,ok
 phishingwebsites,1.0,506,0.024655738306104125,ok
@@ -164,7 +164,7 @@ gtsrb-hog01,1.0,529,0.006539945570489758,ok
 puma32h,1.0,532,0.08290275761973875,ok
 bioresponse,1.0,536,0.21245605620589147,ok
 cjs,1.0,538,0.0021739130434782483,ok
-spoken-arabic-digit,1.0,543,0.7427910860239428,ok
+spoken-arabic-digit,1.0,709,0.12078554185046508,ok
 fri_c4_500_100,1.0,546,0.08922235363690123,ok
 ova_kidney,1.0,549,0.01312332209973155,ok
 ldpa,1.0,553,0.012296136894902499,ok
@@ -177,7 +177,7 @@ kdd_internet_usage,1.0,573,0.12425897115198847,ok
 kin8nm,1.0,578,0.08507835912444572,ok
 dna,1.0,579,0.03591082203249496,ok
 gametes_epistasis_3-way_20atts_0.2h_edm-1_1,1.0,585,0.3280686892472038,ok
-volcanoes-d4,1.0,586,0.6129977423609093,ok
+volcanoes-d4,1.0,709,0.21706864564007422,ok
 fri_c0_1000_5,1.0,590,0.06341911764705888,ok
 gesturephasesegmentationprocessed,1.0,594,0.3420059330064098,ok
 ova_ovary,1.0,598,0.06628418849678863,ok
@@ -196,7 +196,7 @@ eating,1.0,640,0.2829086031298169,ok
 eeg-eye-state,1.0,643,0.019845559156376535,ok
 artificial-characters,1.0,647,0.09627924935929022,ok
 climate-model-simulation-crashes,1.0,650,0.0457317073170731,ok
-volcanoes-d1,1.0,652,0.6733972083836718,ok
+volcanoes-d1,1.0,709,0.19926199261992616,ok
 cpu_act,1.0,657,0.07139897376319104,ok
 wind,1.0,659,0.12518590637984695,ok
 fri_c3_500_10,1.0,666,0.09081542537533127,ok
diff --git a/autosklearn/metalearning/files/balanced_accuracy_binary.classification_dense/configurations.csv b/autosklearn/metalearning/files/balanced_accuracy_binary.classification_dense/configurations.csv
index 3712b358d0..cf5c462684 100644
--- a/autosklearn/metalearning/files/balanced_accuracy_binary.classification_dense/configurations.csv
+++ b/autosklearn/metalearning/files/balanced_accuracy_binary.classification_dense/configurations.csv
@@ -204,3 +204,4 @@ idx,balancing:strategy,classifier:__choice__,classifier:adaboost:algorithm,class
 701,none,liblinear_svc,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,17045.7732372673,False,True,1,squared_hinge,ovr,l2,0.0008192814934567822,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,no_encoding,minority_coalescer,0.008405913574040906,mean,robust_scaler,,,0.9598706457974426,0.1882826974837794,select_percentile_classification,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,63.57218134514263,mutual_info,,,
 702,weighting,gradient_boosting,,,,,,,,,,,,,,,,,,,,,,,,train,3.387912939529945e-10,0.30755227194768237,auto,255,None,60,39,18,loss,1e-07,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,one_hot_encoding,minority_coalescer,0.010000000000000004,most_frequent,none,,,,,select_percentile_classification,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,93.39844669585806,f_classif,,,
 707,weighting,lda,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,auto,,0.00018030860519654287,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,no_encoding,minority_coalescer,0.041538950281903686,mean,robust_scaler,,,0.8898452660666816,0.1704082739702074,kitchen_sinks,,,,,,,,,,,,,,,,,,,,,,,,0.0005845623820571637,8606,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
+709,weighting,mlp,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,relu,0.0001,auto,0.9,0.999,train,1E-08,2,0.0003,32,128,True,adam,0.0001,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,one_hot_encoding,minority_coalescer,0.004071801722749603,median,quantile_transformer,1000,normal,,,no_preprocessing,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
diff --git a/autosklearn/metalearning/files/balanced_accuracy_multiclass.classification_dense/algorithm_runs.arff b/autosklearn/metalearning/files/balanced_accuracy_multiclass.classification_dense/algorithm_runs.arff
index 642b8e1219..44d4baf63e 100644
--- a/autosklearn/metalearning/files/balanced_accuracy_multiclass.classification_dense/algorithm_runs.arff
+++ b/autosklearn/metalearning/files/balanced_accuracy_multiclass.classification_dense/algorithm_runs.arff
@@ -11,8 +11,8 @@ twonorm,1.0,1,0.01565922561525568,ok
 autouniv-au1-1000,1.0,5,0.25950000000000006,ok
 hill-valley,1.0,9,0.0,ok
 eye_movements,1.0,14,0.17681209540342113,ok
-abalone,1.0,17,0.8769085717046411,ok
-tamilnadu-electricity,1.0,19,0.947756348266465,ok
+abalone,1.0,709,0.11341504710773154,ok
+tamilnadu-electricity,1.0,709,0.04755653538107785,ok
 xd6,1.0,22,0.0,ok
 colleges_usnews,1.0,25,0.25167376749847836,ok
 pc4,1.0,29,0.09702797202797209,ok
@@ -20,23 +20,23 @@ fri_c3_500_50,1.0,32,0.1011018463371054,ok
 run_or_walk_information,1.0,37,0.008410684831804849,ok
 satellite,1.0,42,0.03017286537454167,ok
 2dplanes,1.0,43,0.06880799167253038,ok
-wine-quality-red,1.0,47,0.6161336684542298,ok
+wine-quality-red,1.0,709,0.30593389790637504,ok
 arsenic-female-bladder,1.0,52,0.1940880503144654,ok
 kuzushiji-mnist,1.0,56,0.021482498414330586,ok
 anneal,1.0,59,0.0009216589861751334,ok
 fri_c3_1000_25,1.0,61,0.07383792677910317,ok
 pollen,1.0,67,0.48644136848276476,ok
-volcanoes-b5,1.0,68,0.5699847967928948,ok
+volcanoes-b5,1.0,709,0.26897722411117975,ok
 titanic,1.0,72,0.29320987654320985,ok
 strikes,1.0,75,0.0,ok
 madeline,1.0,81,0.0821005669048338,ok
 cardiotocography,1.0,83,0.0,ok
 kick,1.0,84,0.3006417025492927,ok
-microaggregation2,1.0,89,0.4916187730470363,ok
+microaggregation2,1.0,709,0.3777507888408984,ok
 monks-problems-2,1.0,91,0.0,ok
-autouniv-au6-750,1.0,95,0.7612754085799343,ok
+autouniv-au6-750,1.0,709,0.1241263679082587,ok
 meta_stream_intervals.arff,1.0,98,0.006967878468411404,ok
-diabetes130us,1.0,101,0.5067441114849294,ok
+diabetes130us,1.0,709,0.4118415380351441,ok
 steel-plates-fault,1.0,104,0.19263434860176276,ok
 bank8fm,1.0,108,0.054070141112634085,ok
 semeion,1.0,113,0.031490541211592005,ok
@@ -46,18 +46,18 @@ isolet,1.0,121,0.033152523763478325,ok
 led24,1.0,124,0.25890692593605036,ok
 satimage,1.0,129,0.10112177411403478,ok
 pbcseq,1.0,131,0.1434266632917307,ok
-bachchoralharmony,1.0,134,0.9175415800871299,ok
+bachchoralharmony,1.0,709,0.3875832875858466,ok
 compas-two-years,1.0,138,0.3265118829110152,ok
-volcanoes-b6,1.0,143,0.5809143287967867,ok
+volcanoes-b6,1.0,709,0.2390641802397251,ok
 philippine,1.0,145,0.16948482215796368,ok
 irish,1.0,147,0.0,ok
 monks-problems-1,1.0,149,0.0,ok
 porto-seguro,1.0,150,0.3988950321221292,ok
 arsenic-female-lung,1.0,154,0.05555555555555558,ok
-autouniv-au7-1100,1.0,159,0.5064077630375687,ok
+autouniv-au7-1100,1.0,709,0.2838726245505907,ok
 steel-plates-fault,1.0,162,0.0,ok
 speeddating,1.0,168,0.22671228902772056,ok
-devnagari-script,1.0,171,0.03767826945318897,ok
+devnagari-script,1.0,709,0.021739130434782594,ok
 arsenic-male-lung,1.0,172,0.0,ok
 fars,1.0,174,0.39134793637762566,ok
 wdbc,1.0,176,0.01449275362318847,ok
@@ -65,10 +65,10 @@ hiva_agnostic,1.0,181,0.28794275953202053,ok
 rmftsa_ladata,1.0,185,0.09256715506715507,ok
 banknote-authentication,1.0,188,0.0,ok
 visualizing_soil,1.0,191,0.0003103662321539691,ok
-wine-quality-white,1.0,194,0.5594259465253884,ok
-analcatdata_dmft,1.0,197,0.7737781849718646,ok
+wine-quality-white,1.0,709,0.28548558778040434,ok
+analcatdata_dmft,1.0,709,0.2372536438556463,ok
 ova_colon,1.0,199,0.06985388361958478,ok
-volcanoes-e1,1.0,203,0.7058646616541353,ok
+volcanoes-e1,1.0,709,0.2258013454689355,ok
 cpu_small,1.0,206,0.08179300997192374,ok
 gametes_heterogeneity_20atts_1600_het_0.4_0.2_75_edm-2_001,1.0,211,0.26064295662411874,ok
 dresses-sales,1.0,214,0.37422037422037424,ok
@@ -77,10 +77,10 @@ parity5_plus_5,1.0,222,0.0,ok
 delta_ailerons,1.0,227,0.05943422519509478,ok
 gametes_heterogeneity_20atts_1600_het_0.4_0.2_50_edm-2_001,1.0,230,0.28459230117593004,ok
 mammography,1.0,233,0.0645452024403772,ok
-first-order-theorem-proving,1.0,236,0.4963289871477006,ok
+first-order-theorem-proving,1.0,709,0.4063548645225059,ok
 pol,1.0,238,0.010955207771181863,ok
 one-hundred-plants-margin,1.0,243,0.14592241658568195,ok
-volcanoes-a2,1.0,246,0.5618923065238854,ok
+volcanoes-a2,1.0,709,0.27818850155692254,ok
 kdd_el_nino-small,1.0,251,0.0555267254800208,ok
 kc2,1.0,255,0.21854575163398693,ok
 fri_c3_1000_10,1.0,257,0.07222776116096163,ok
@@ -98,16 +98,16 @@ arsenic-male-bladder,1.0,294,0.13372093023255816,ok
 letter,1.0,298,0.0302953166025568,ok
 mc1,1.0,301,0.11897921491600949,ok
 ova_prostate,1.0,304,0.0,ok
-volcanoes-b2,1.0,308,0.6049296764663846,ok
+volcanoes-b2,1.0,709,0.2505278585090791,ok
 hypothyroid,1.0,313,0.25043630017452,ok
-autouniv-au7-700,1.0,315,0.48565916634574835,ok
+autouniv-au7-700,1.0,709,0.3746500155874639,ok
 ova_lung,1.0,318,0.0742391077756932,ok
 pokerhand,1.0,322,0.15738714306963697,ok
 quake,1.0,325,0.4566588050314466,ok
 stock,1.0,329,0.029126808928133352,ok
 fri_c2_1000_25,1.0,332,0.05855211119062864,ok
 fried,1.0,338,0.05970753367223658,ok
-volcanoes-a4,1.0,342,0.6090137328339575,ok
+volcanoes-a4,1.0,709,0.32727840199750313,ok
 no2,1.0,345,0.32140758154569493,ok
 space_ga,1.0,347,0.1251789549033644,ok
 led-display-domain-7digit,1.0,352,0.22896929824561407,ok
@@ -117,7 +117,7 @@ ova_breast,1.0,364,0.02820688083845979,ok
 mozilla4,1.0,367,0.057752752285568865,ok
 churn,1.0,370,0.09994486131421865,ok
 rl,1.0,375,0.17550484692926682,ok
-volcanoes-a3,1.0,377,0.5921428571428572,ok
+volcanoes-a3,1.0,709,0.3635897435897436,ok
 profb,1.0,379,0.2641435306443164,ok
 bank32nh,1.0,384,0.19108836369110338,ok
 cylinder-bands,1.0,388,0.16560846560846554,ok
@@ -133,7 +133,7 @@ wilt,1.0,421,0.04880593092794183,ok
 magictelescope,1.0,425,0.12872306147306456,ok
 pc3,1.0,428,0.18599435122113306,ok
 japanesevowels,1.0,434,0.007347987818080326,ok
-volcanoes-b1,1.0,435,0.6702470997649754,ok
+volcanoes-b1,1.0,709,0.2750187216422184,ok
 fri_c1_1000_50,1.0,440,0.06399204244031831,ok
 splice,1.0,444,0.03519156774858112,ok
 optdigits,1.0,445,0.006898860431585718,ok
@@ -150,7 +150,7 @@ micro-mass,1.0,480,0.0688694638694638,ok
 indian_pines,1.0,485,0.06809369628097439,ok
 miceprotein,1.0,486,0.0,ok
 diabetes,1.0,489,0.23524844720496896,ok
-collins,1.0,494,0.7292812799100832,ok
+collins,1.0,709,0.19234712380249908,ok
 internet-advertisements,1.0,499,0.05529848302957552,ok
 ova_endometrium,1.0,500,0.2015848670756646,ok
 phishingwebsites,1.0,506,0.024655738306104125,ok
@@ -164,7 +164,7 @@ gtsrb-hog01,1.0,529,0.006539945570489758,ok
 puma32h,1.0,532,0.08290275761973875,ok
 bioresponse,1.0,536,0.21245605620589147,ok
 cjs,1.0,538,0.0021739130434782483,ok
-spoken-arabic-digit,1.0,543,0.7427910860239428,ok
+spoken-arabic-digit,1.0,709,0.12078554185046508,ok
 fri_c4_500_100,1.0,546,0.08922235363690123,ok
 ova_kidney,1.0,549,0.01312332209973155,ok
 ldpa,1.0,553,0.012296136894902499,ok
@@ -177,7 +177,7 @@ kdd_internet_usage,1.0,573,0.12425897115198847,ok
 kin8nm,1.0,578,0.08507835912444572,ok
 dna,1.0,579,0.03591082203249496,ok
 gametes_epistasis_3-way_20atts_0.2h_edm-1_1,1.0,585,0.3280686892472038,ok
-volcanoes-d4,1.0,586,0.6129977423609093,ok
+volcanoes-d4,1.0,709,0.21706864564007422,ok
 fri_c0_1000_5,1.0,590,0.06341911764705888,ok
 gesturephasesegmentationprocessed,1.0,594,0.3420059330064098,ok
 ova_ovary,1.0,598,0.06628418849678863,ok
@@ -196,7 +196,7 @@ eating,1.0,640,0.2829086031298169,ok
 eeg-eye-state,1.0,643,0.019845559156376535,ok
 artificial-characters,1.0,647,0.09627924935929022,ok
 climate-model-simulation-crashes,1.0,650,0.0457317073170731,ok
-volcanoes-d1,1.0,652,0.6733972083836718,ok
+volcanoes-d1,1.0,709,0.19926199261992616,ok
 cpu_act,1.0,657,0.07139897376319104,ok
 wind,1.0,659,0.12518590637984695,ok
 fri_c3_500_10,1.0,666,0.09081542537533127,ok
diff --git a/autosklearn/metalearning/files/balanced_accuracy_multiclass.classification_dense/configurations.csv b/autosklearn/metalearning/files/balanced_accuracy_multiclass.classification_dense/configurations.csv
index 3712b358d0..cf5c462684 100644
--- a/autosklearn/metalearning/files/balanced_accuracy_multiclass.classification_dense/configurations.csv
+++ b/autosklearn/metalearning/files/balanced_accuracy_multiclass.classification_dense/configurations.csv
@@ -204,3 +204,4 @@ idx,balancing:strategy,classifier:__choice__,classifier:adaboost:algorithm,class
 701,none,liblinear_svc,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,17045.7732372673,False,True,1,squared_hinge,ovr,l2,0.0008192814934567822,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,no_encoding,minority_coalescer,0.008405913574040906,mean,robust_scaler,,,0.9598706457974426,0.1882826974837794,select_percentile_classification,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,63.57218134514263,mutual_info,,,
 702,weighting,gradient_boosting,,,,,,,,,,,,,,,,,,,,,,,,train,3.387912939529945e-10,0.30755227194768237,auto,255,None,60,39,18,loss,1e-07,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,one_hot_encoding,minority_coalescer,0.010000000000000004,most_frequent,none,,,,,select_percentile_classification,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,93.39844669585806,f_classif,,,
 707,weighting,lda,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,auto,,0.00018030860519654287,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,no_encoding,minority_coalescer,0.041538950281903686,mean,robust_scaler,,,0.8898452660666816,0.1704082739702074,kitchen_sinks,,,,,,,,,,,,,,,,,,,,,,,,0.0005845623820571637,8606,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
+709,weighting,mlp,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,relu,0.0001,auto,0.9,0.999,train,1E-08,2,0.0003,32,128,True,adam,0.0001,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,one_hot_encoding,minority_coalescer,0.004071801722749603,median,quantile_transformer,1000,normal,,,no_preprocessing,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
diff --git a/autosklearn/metalearning/files/log_loss_binary.classification_dense/algorithm_runs.arff b/autosklearn/metalearning/files/log_loss_binary.classification_dense/algorithm_runs.arff
index 889cc292b3..1cec3c541b 100644
--- a/autosklearn/metalearning/files/log_loss_binary.classification_dense/algorithm_runs.arff
+++ b/autosklearn/metalearning/files/log_loss_binary.classification_dense/algorithm_runs.arff
@@ -46,7 +46,7 @@ isolet,1.0,123,0.16356916174675815,ok
 led24,1.0,124,0.7814469293719439,ok
 satimage,1.0,129,0.23632698635562363,ok
 pbcseq,1.0,133,0.3965121399605423,ok
-bachchoralharmony,1.0,134,3.5570787530713313,ok
+bachchoralharmony,1.0,709,2.5870167462116758,ok
 compas-two-years,1.0,139,0.6145546685461338,ok
 volcanoes-b6,1.0,142,0.13378592547234638,ok
 philippine,1.0,145,0.37128927711646215,ok
@@ -94,7 +94,7 @@ cmc,1.0,281,0.8842480351945765,ok
 delta_elevators,1.0,286,0.28829474115557374,ok
 kropt,1.0,289,0.32042209965538,ok
 mnist_784,1.0,290,0.1849885060869448,ok
-arsenic-male-bladder,1.0,293,0.10375917546132216,ok
+arsenic-male-bladder,1.0,709,0.09755482598717143,ok
 letter,1.0,299,0.13082042502993893,ok
 mc1,1.0,300,0.021251873902710102,ok
 ova_prostate,1.0,307,0.0,ok
diff --git a/autosklearn/metalearning/files/log_loss_binary.classification_dense/configurations.csv b/autosklearn/metalearning/files/log_loss_binary.classification_dense/configurations.csv
index 49e6d86159..10d93fb58f 100644
--- a/autosklearn/metalearning/files/log_loss_binary.classification_dense/configurations.csv
+++ b/autosklearn/metalearning/files/log_loss_binary.classification_dense/configurations.csv
@@ -204,3 +204,4 @@ idx,balancing:strategy,classifier:__choice__,classifier:adaboost:algorithm,class
 700,none,libsvm_svc,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,11.971574533613923,-0.39588785696879913,2,0.5649333295846743,poly,-1,False,0.0005016673735160964,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,no_encoding,minority_coalescer,0.006581276380447836,mean,none,,,,,select_percentile_classification,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,58.914624158252536,mutual_info,,,
 702,none,gradient_boosting,,,,,,,,,,,,,,,,,,,,,,,,train,2.459103572787973e-05,0.7160373349401171,auto,255,None,3,11,12,loss,1e-07,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,no_encoding,no_coalescense,,most_frequent,minmax,,,,,polynomial,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,3,True,False,,,,,,,,,,,,
 708,none,mlp,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,tanh,0.0014187815366827666,auto,0.9,0.999,train,1e-08,1,0.0008372397492611581,32,54,True,adam,0.0001,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,no_encoding,minority_coalescer,0.002841707358825979,most_frequent,robust_scaler,,,0.8646269699284821,0.022276271975525615,fast_ica,,,,,,,,,,,deflation,cube,,False,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
+709,weighting,mlp,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,relu,0.0001,auto,0.9,0.999,train,1E-08,2,0.0003,32,128,True,adam,0.0001,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,one_hot_encoding,minority_coalescer,0.004071801722749603,median,quantile_transformer,1000,normal,,,no_preprocessing,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
diff --git a/autosklearn/metalearning/files/log_loss_multiclass.classification_dense/algorithm_runs.arff b/autosklearn/metalearning/files/log_loss_multiclass.classification_dense/algorithm_runs.arff
index 889cc292b3..1cec3c541b 100644
--- a/autosklearn/metalearning/files/log_loss_multiclass.classification_dense/algorithm_runs.arff
+++ b/autosklearn/metalearning/files/log_loss_multiclass.classification_dense/algorithm_runs.arff
@@ -46,7 +46,7 @@ isolet,1.0,123,0.16356916174675815,ok
 led24,1.0,124,0.7814469293719439,ok
 satimage,1.0,129,0.23632698635562363,ok
 pbcseq,1.0,133,0.3965121399605423,ok
-bachchoralharmony,1.0,134,3.5570787530713313,ok
+bachchoralharmony,1.0,709,2.5870167462116758,ok
 compas-two-years,1.0,139,0.6145546685461338,ok
 volcanoes-b6,1.0,142,0.13378592547234638,ok
 philippine,1.0,145,0.37128927711646215,ok
@@ -94,7 +94,7 @@ cmc,1.0,281,0.8842480351945765,ok
 delta_elevators,1.0,286,0.28829474115557374,ok
 kropt,1.0,289,0.32042209965538,ok
 mnist_784,1.0,290,0.1849885060869448,ok
-arsenic-male-bladder,1.0,293,0.10375917546132216,ok
+arsenic-male-bladder,1.0,709,0.09755482598717143,ok
 letter,1.0,299,0.13082042502993893,ok
 mc1,1.0,300,0.021251873902710102,ok
 ova_prostate,1.0,307,0.0,ok
diff --git a/autosklearn/metalearning/files/log_loss_multiclass.classification_dense/configurations.csv b/autosklearn/metalearning/files/log_loss_multiclass.classification_dense/configurations.csv
index 49e6d86159..10d93fb58f 100644
--- a/autosklearn/metalearning/files/log_loss_multiclass.classification_dense/configurations.csv
+++ b/autosklearn/metalearning/files/log_loss_multiclass.classification_dense/configurations.csv
@@ -204,3 +204,4 @@ idx,balancing:strategy,classifier:__choice__,classifier:adaboost:algorithm,class
 700,none,libsvm_svc,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,11.971574533613923,-0.39588785696879913,2,0.5649333295846743,poly,-1,False,0.0005016673735160964,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,no_encoding,minority_coalescer,0.006581276380447836,mean,none,,,,,select_percentile_classification,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,58.914624158252536,mutual_info,,,
 702,none,gradient_boosting,,,,,,,,,,,,,,,,,,,,,,,,train,2.459103572787973e-05,0.7160373349401171,auto,255,None,3,11,12,loss,1e-07,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,no_encoding,no_coalescense,,most_frequent,minmax,,,,,polynomial,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,3,True,False,,,,,,,,,,,,
 708,none,mlp,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,tanh,0.0014187815366827666,auto,0.9,0.999,train,1e-08,1,0.0008372397492611581,32,54,True,adam,0.0001,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,no_encoding,minority_coalescer,0.002841707358825979,most_frequent,robust_scaler,,,0.8646269699284821,0.022276271975525615,fast_ica,,,,,,,,,,,deflation,cube,,False,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
+709,weighting,mlp,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,relu,0.0001,auto,0.9,0.999,train,1E-08,2,0.0003,32,128,True,adam,0.0001,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,one_hot_encoding,minority_coalescer,0.004071801722749603,median,quantile_transformer,1000,normal,,,no_preprocessing,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
diff --git a/autosklearn/smbo.py b/autosklearn/smbo.py
index 2be6eaacab..81d96cea93 100644
--- a/autosklearn/smbo.py
+++ b/autosklearn/smbo.py
@@ -482,6 +482,7 @@ def run_smbo(self):
                     )
             scenario_dict.update(self.smac_scenario_args)
 
+        del self.datamanager
         smac_args = {
             'scenario_dict': scenario_dict,
             'seed': seed,
diff --git a/autosklearn/util/common.py b/autosklearn/util/common.py
index 4905d0eaa8..20c477ac60 100644
--- a/autosklearn/util/common.py
+++ b/autosklearn/util/common.py
@@ -1,16 +1,48 @@
 # -*- encoding: utf-8 -*-
 
 import os
+import resource
 import warnings
 
 import numpy as np
 
+import psutil
+
 __all__ = [
     'check_pid',
     'warn_if_not_float'
 ]
 
 
+def print_memory(tag: str = '', extra: bool = True, include_all: bool = False) -> str:
+    memory = []
+    processes = [(str(os.getpid()), f"{tag}-current")]
+
+    if include_all:
+        processes.append((str(os.getppid()), f"{tag}-parent"))
+        parent = psutil.Process(os.getpid())
+        for children in parent.children(recursive=True):
+            if children.pid:
+                processes.append((str(children.pid), f"{tag}-children"))
+
+    for pid, name in processes:
+        filename = '/proc/' + str(pid) + '/status'
+        if pid and os.path.exists('/proc/' + str(pid) + '/status'):
+            with open(filename, 'r') as fin:
+                data = fin.read()
+                for line in data.split('\n'):
+                    if 'Vm' not in line:
+                        continue
+                    data = data.strip().replace('\t', ' ')
+                    memory.append(f"{name}-{pid}-{line}")
+        memory.append("\n")
+
+    if extra:
+        memory.append(f"rsuage={resource.getrusage(resource.RUSAGE_SELF)}")
+
+    return "\n".join(memory)
+
+
 def warn_if_not_float(X: np.ndarray, estimator: str = 'This algorithm') -> bool:
     """Warning utility function to check that data type is floating point.
     Returns True if a warning was raised (i.e. the input is not float) and
diff --git a/requirements.txt b/requirements.txt
index a29774a201..57ddeaba90 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,7 +1,7 @@
 setuptools
 
 numpy>=1.9.0
-scipy>=0.14.1
+scipy>=0.14.1,<1.7.0
 
 joblib
 scikit-learn>=0.24.0,<0.25.0