From 9aff42610999536c54317093400805637c25f248 Mon Sep 17 00:00:00 2001
From: Alan Akbik <alan.akbik@gmail.com>
Date: Mon, 15 Nov 2021 16:35:31 +0100
Subject: [PATCH] Removes hyperparameter features

---
 flair/hyperparameter/__init__.py              |  11 -
 flair/hyperparameter/param_selection.py       | 277 ------------------
 flair/hyperparameter/parameter.py             |  66 -----
 .../docs/TUTORIAL_8_MODEL_OPTIMIZATION.md     | 166 -----------
 tests/test_hyperparameter.py                  |  92 ------
 5 files changed, 612 deletions(-)
 delete mode 100644 flair/hyperparameter/__init__.py
 delete mode 100644 flair/hyperparameter/param_selection.py
 delete mode 100644 flair/hyperparameter/parameter.py
 delete mode 100644 resources/docs/TUTORIAL_8_MODEL_OPTIMIZATION.md
 delete mode 100644 tests/test_hyperparameter.py

diff --git a/flair/hyperparameter/__init__.py b/flair/hyperparameter/__init__.py
deleted file mode 100644
index 89ff46aaca..0000000000
--- a/flair/hyperparameter/__init__.py
+++ /dev/null
@@ -1,11 +0,0 @@
-from .parameter import (
-    Parameter,
-    SEQUENCE_TAGGER_PARAMETERS,
-    TRAINING_PARAMETERS,
-    DOCUMENT_EMBEDDING_PARAMETERS,
-)
-from .param_selection import (
-    SequenceTaggerParamSelector,
-    TextClassifierParamSelector,
-    SearchSpace,
-)
diff --git a/flair/hyperparameter/param_selection.py b/flair/hyperparameter/param_selection.py
deleted file mode 100644
index 2f84738135..0000000000
--- a/flair/hyperparameter/param_selection.py
+++ /dev/null
@@ -1,277 +0,0 @@
-import logging
-from abc import abstractmethod
-from enum import Enum
-from pathlib import Path
-from typing import Tuple, Union
-import numpy as np
-
-from hyperopt import hp, fmin, tpe
-
-import flair.nn
-from flair.data import Corpus
-from flair.embeddings import DocumentPoolEmbeddings, DocumentRNNEmbeddings
-from flair.hyperparameter import Parameter
-from flair.hyperparameter.parameter import (
-    SEQUENCE_TAGGER_PARAMETERS,
-    TRAINING_PARAMETERS,
-    DOCUMENT_EMBEDDING_PARAMETERS,
-    MODEL_TRAINER_PARAMETERS,
-)
-from flair.models import SequenceTagger, TextClassifier
-from flair.trainers import ModelTrainer
-from flair.training_utils import (
-    EvaluationMetric,
-    log_line,
-    init_output_file,
-    add_file_handler,
-)
-
-log = logging.getLogger("flair")
-
-
-class OptimizationValue(Enum):
-    DEV_LOSS = "loss"
-    DEV_SCORE = "score"
-
-
-class SearchSpace(object):
-    def __init__(self):
-        self.search_space = {}
-
-    def add(self, parameter: Parameter, func, **kwargs):
-        self.search_space[parameter.value] = func(parameter.value, **kwargs)
-
-    def get_search_space(self):
-        return hp.choice("parameters", [self.search_space])
-
-
-class ParamSelector(object):
-    def __init__(
-        self,
-        corpus: Corpus,
-        base_path: Union[str, Path],
-        max_epochs: int,
-        evaluation_metric: EvaluationMetric,
-        training_runs: int,
-        optimization_value: OptimizationValue,
-    ):
-        if type(base_path) is str:
-            base_path = Path(base_path)
-
-        self.corpus = corpus
-        self.max_epochs = max_epochs
-        self.base_path = base_path
-        self.evaluation_metric = evaluation_metric
-        self.run = 1
-        self.training_runs = training_runs
-        self.optimization_value = optimization_value
-
-        self.param_selection_file = init_output_file(base_path, "param_selection.txt")
-
-    @abstractmethod
-    def _set_up_model(self, params: dict) -> flair.nn.Model:
-        pass
-
-    def _objective(self, params: dict):
-        log_line(log)
-        log.info(f"Evaluation run: {self.run}")
-        log.info(f"Evaluating parameter combination:")
-        for k, v in params.items():
-            if isinstance(v, Tuple):
-                v = ",".join([str(x) for x in v])
-            log.info(f"\t{k}: {str(v)}")
-        log_line(log)
-
-        for sent in self.corpus.get_all_sentences():
-            sent.clear_embeddings()
-
-        scores = []
-        vars = []
-
-        for i in range(0, self.training_runs):
-            log_line(log)
-            log.info(f"Training run: {i + 1}")
-
-            model = self._set_up_model(params)
-
-            training_params = {
-                key: params[key] for key in params if key in TRAINING_PARAMETERS
-            }
-            model_trainer_parameters = {
-                key: params[key] for key in params if key in MODEL_TRAINER_PARAMETERS
-            }
-
-            trainer: ModelTrainer = ModelTrainer(
-                model, self.corpus, **model_trainer_parameters
-            )
-
-            result = trainer.train(
-                self.base_path,
-                max_epochs=self.max_epochs,
-                param_selection_mode=True,
-                **training_params,
-            )
-
-            # take the average over the last three scores of training
-            if self.optimization_value == OptimizationValue.DEV_LOSS:
-                curr_scores = result["dev_loss_history"][-3:]
-            else:
-                curr_scores = list(
-                    map(lambda s: 1 - s, result["dev_score_history"][-3:])
-                )
-
-            score = sum(curr_scores) / float(len(curr_scores))
-            var = np.var(curr_scores)
-            scores.append(score)
-            vars.append(var)
-
-        # take average over the scores from the different training runs
-        final_score = sum(scores) / float(len(scores))
-        final_var = sum(vars) / float(len(vars))
-
-        test_score = result["test_score"]
-        log_line(log)
-        log.info(f"Done evaluating parameter combination:")
-        for k, v in params.items():
-            if isinstance(v, Tuple):
-                v = ",".join([str(x) for x in v])
-            log.info(f"\t{k}: {v}")
-        log.info(f"{self.optimization_value.value}: {final_score}")
-        log.info(f"variance: {final_var}")
-        log.info(f"test_score: {test_score}\n")
-        log_line(log)
-
-        with open(self.param_selection_file, "a") as f:
-            f.write(f"evaluation run {self.run}\n")
-            for k, v in params.items():
-                if isinstance(v, Tuple):
-                    v = ",".join([str(x) for x in v])
-                f.write(f"\t{k}: {str(v)}\n")
-            f.write(f"{self.optimization_value.value}: {final_score}\n")
-            f.write(f"variance: {final_var}\n")
-            f.write(f"test_score: {test_score}\n")
-            f.write("-" * 100 + "\n")
-
-        self.run += 1
-
-        return {"status": "ok", "loss": final_score, "loss_variance": final_var}
-
-    def optimize(self, space: SearchSpace, max_evals=100):
-        search_space = space.search_space
-        best = fmin(
-            self._objective, search_space, algo=tpe.suggest, max_evals=max_evals
-        )
-
-        log_line(log)
-        log.info("Optimizing parameter configuration done.")
-        log.info("Best parameter configuration found:")
-        for k, v in best.items():
-            log.info(f"\t{k}: {v}")
-        log_line(log)
-
-        with open(self.param_selection_file, "a") as f:
-            f.write("best parameter combination\n")
-            for k, v in best.items():
-                if isinstance(v, Tuple):
-                    v = ",".join([str(x) for x in v])
-                f.write(f"\t{k}: {str(v)}\n")
-
-
-class SequenceTaggerParamSelector(ParamSelector):
-    def __init__(
-        self,
-        corpus: Corpus,
-        tag_type: str,
-        base_path: Union[str, Path],
-        max_epochs: int = 50,
-        evaluation_metric: EvaluationMetric = EvaluationMetric.MICRO_F1_SCORE,
-        training_runs: int = 1,
-        optimization_value: OptimizationValue = OptimizationValue.DEV_LOSS,
-    ):
-        """
-        :param corpus: the corpus
-        :param tag_type: tag type to use
-        :param base_path: the path to the result folder (results will be written to that folder)
-        :param max_epochs: number of epochs to perform on every evaluation run
-        :param evaluation_metric: evaluation metric used during training
-        :param training_runs: number of training runs per evaluation run
-        :param optimization_value: value to optimize
-        """
-        super().__init__(
-            corpus,
-            base_path,
-            max_epochs,
-            evaluation_metric,
-            training_runs,
-            optimization_value,
-        )
-
-        self.tag_type = tag_type
-        self.tag_dictionary = self.corpus.make_label_dictionary(self.tag_type)
-
-    def _set_up_model(self, params: dict):
-        sequence_tagger_params = {
-            key: params[key] for key in params if key in SEQUENCE_TAGGER_PARAMETERS
-        }
-
-        tagger: SequenceTagger = SequenceTagger(
-            tag_dictionary=self.tag_dictionary,
-            tag_type=self.tag_type,
-            **sequence_tagger_params,
-        )
-        return tagger
-
-
-class TextClassifierParamSelector(ParamSelector):
-    def __init__(
-        self,
-        corpus: Corpus,
-        multi_label: bool,
-        base_path: Union[str, Path],
-        document_embedding_type: str,
-        max_epochs: int = 50,
-        evaluation_metric: EvaluationMetric = EvaluationMetric.MICRO_F1_SCORE,
-        training_runs: int = 1,
-        optimization_value: OptimizationValue = OptimizationValue.DEV_LOSS,
-    ):
-        """
-        :param corpus: the corpus
-        :param multi_label: true, if the dataset is multi label, false otherwise
-        :param base_path: the path to the result folder (results will be written to that folder)
-        :param document_embedding_type: either 'lstm', 'mean', 'min', or 'max'
-        :param max_epochs: number of epochs to perform on every evaluation run
-        :param evaluation_metric: evaluation metric used during training
-        :param training_runs: number of training runs per evaluation run
-        :param optimization_value: value to optimize
-        """
-        super().__init__(
-            corpus,
-            base_path,
-            max_epochs,
-            evaluation_metric,
-            training_runs,
-            optimization_value,
-        )
-
-        self.multi_label = multi_label
-        self.document_embedding_type = document_embedding_type
-
-        self.label_dictionary = self.corpus.make_label_dictionary()
-
-    def _set_up_model(self, params: dict):
-        embdding_params = {
-            key: params[key] for key in params if key in DOCUMENT_EMBEDDING_PARAMETERS
-        }
-
-        if self.document_embedding_type == "lstm":
-            document_embedding = DocumentRNNEmbeddings(**embdding_params)
-        else:
-            document_embedding = DocumentPoolEmbeddings(**embdding_params)
-
-        text_classifier: TextClassifier = TextClassifier(
-            label_dictionary=self.label_dictionary,
-            multi_label=self.multi_label,
-            document_embeddings=document_embedding,
-        )
-
-        return text_classifier
diff --git a/flair/hyperparameter/parameter.py b/flair/hyperparameter/parameter.py
deleted file mode 100644
index 0e47aa791b..0000000000
--- a/flair/hyperparameter/parameter.py
+++ /dev/null
@@ -1,66 +0,0 @@
-from enum import Enum
-
-
-class Parameter(Enum):
-    EMBEDDINGS = "embeddings"
-    HIDDEN_SIZE = "hidden_size"
-    USE_CRF = "use_crf"
-    USE_RNN = "use_rnn"
-    RNN_LAYERS = "rnn_layers"
-    DROPOUT = "dropout"
-    WORD_DROPOUT = "word_dropout"
-    LOCKED_DROPOUT = "locked_dropout"
-    LEARNING_RATE = "learning_rate"
-    MINI_BATCH_SIZE = "mini_batch_size"
-    ANNEAL_FACTOR = "anneal_factor"
-    ANNEAL_WITH_RESTARTS = "anneal_with_restarts"
-    PATIENCE = "patience"
-    REPROJECT_WORDS = "reproject_words"
-    REPROJECT_WORD_DIMENSION = "reproject_words_dimension"
-    BIDIRECTIONAL = "bidirectional"
-    OPTIMIZER = "optimizer"
-    MOMENTUM = "momentum"
-    DAMPENING = "dampening"
-    WEIGHT_DECAY = "weight_decay"
-    NESTEROV = "nesterov"
-    AMSGRAD = "amsgrad"
-    BETAS = "betas"
-    EPS = "eps"
-
-
-TRAINING_PARAMETERS = [
-    Parameter.LEARNING_RATE.value,
-    Parameter.MINI_BATCH_SIZE.value,
-    Parameter.ANNEAL_FACTOR.value,
-    Parameter.PATIENCE.value,
-    Parameter.ANNEAL_WITH_RESTARTS.value,
-    Parameter.MOMENTUM.value,
-    Parameter.DAMPENING.value,
-    Parameter.WEIGHT_DECAY.value,
-    Parameter.NESTEROV.value,
-    Parameter.AMSGRAD.value,
-    Parameter.BETAS.value,
-    Parameter.EPS.value,
-]
-SEQUENCE_TAGGER_PARAMETERS = [
-    Parameter.EMBEDDINGS.value,
-    Parameter.HIDDEN_SIZE.value,
-    Parameter.RNN_LAYERS.value,
-    Parameter.USE_CRF.value,
-    Parameter.USE_RNN.value,
-    Parameter.DROPOUT.value,
-    Parameter.LOCKED_DROPOUT.value,
-    Parameter.WORD_DROPOUT.value,
-]
-MODEL_TRAINER_PARAMETERS = [Parameter.OPTIMIZER.value]
-DOCUMENT_EMBEDDING_PARAMETERS = [
-    Parameter.EMBEDDINGS.value,
-    Parameter.HIDDEN_SIZE.value,
-    Parameter.RNN_LAYERS.value,
-    Parameter.REPROJECT_WORDS.value,
-    Parameter.REPROJECT_WORD_DIMENSION.value,
-    Parameter.BIDIRECTIONAL.value,
-    Parameter.DROPOUT.value,
-    Parameter.LOCKED_DROPOUT.value,
-    Parameter.WORD_DROPOUT.value,
-]
diff --git a/resources/docs/TUTORIAL_8_MODEL_OPTIMIZATION.md b/resources/docs/TUTORIAL_8_MODEL_OPTIMIZATION.md
deleted file mode 100644
index 04843f956f..0000000000
--- a/resources/docs/TUTORIAL_8_MODEL_OPTIMIZATION.md
+++ /dev/null
@@ -1,166 +0,0 @@
-# Tutorial 8: Model Tuning
-
-This is part 8 of the tutorial, in which we look into how we can improve the quality of our model by selecting
-the right set of model and hyper parameters.
-
-## Selecting Hyper Parameters
-
-Flair includes a wrapper for the well-known hyper parameter selection tool
-[hyperopt](https://github.com/hyperopt/hyperopt).
-
-First you need to load your corpus. If you want to load the [AGNews corpus](https://www.di.unipi.it/~gulli/AG_corpus_of_news_articles.html)
-used in the following example, you first need to download it and convert it into the correct format. Please
-check [tutorial 6](/resources/docs/TUTORIAL_6_CORPUS.md) for more details.
-```python
-from flair.datasets import TREC_6
-
-# load your corpus
-corpus = TREC_6()
-```
-
-Second you need to define the search space of parameters.
-Therefore, you can use all
-[parameter expressions](https://github.com/hyperopt/hyperopt/wiki/FMin#21-parameter-expressions) defined by hyperopt.
-
-```python
-from hyperopt import hp
-from flair.hyperparameter.param_selection import SearchSpace, Parameter
-
-# define your search space
-search_space = SearchSpace()
-search_space.add(Parameter.EMBEDDINGS, hp.choice, options=[
-    [ WordEmbeddings('en') ], 
-    [ FlairEmbeddings('news-forward'), FlairEmbeddings('news-backward') ]
-])
-search_space.add(Parameter.HIDDEN_SIZE, hp.choice, options=[32, 64, 128])
-search_space.add(Parameter.RNN_LAYERS, hp.choice, options=[1, 2])
-search_space.add(Parameter.DROPOUT, hp.uniform, low=0.0, high=0.5)
-search_space.add(Parameter.LEARNING_RATE, hp.choice, options=[0.05, 0.1, 0.15, 0.2])
-search_space.add(Parameter.MINI_BATCH_SIZE, hp.choice, options=[8, 16, 32])
-```
-
-Attention: You should always add your embeddings to the search space (as shown above). If you don't want to test
-different kind of embeddings, simply pass just one embedding option to the search space, which will then be used in
-every test run. Here is an example:
-```python
-search_space.add(Parameter.EMBEDDINGS, hp.choice, options=[
-    [ FlairEmbeddings('news-forward'), FlairEmbeddings('news-backward') ]
-])
-```
-
-In the last step you have to create the actual parameter selector. 
-Depending on the task you need either to define a `TextClassifierParamSelector` or a `SequenceTaggerParamSelector` and 
-start the optimization.
-You can define the maximum number of evaluation runs hyperopt should perform (`max_evals`).
-A evaluation run performs the specified number of epochs (`max_epochs`). 
-To overcome the issue of noisy evaluation scores, we take the average over the last three evaluation scores (either
-`dev_score` or `dev_loss`) from the evaluation run, which represents the final score and will be passed to hyperopt.
-Additionally, you can specify the number of runs per evaluation run (`training_runs`). 
-If you specify more than one training run, one evaluation run will be executed the specified number of times.
-The final evaluation score will be the average over all those runs.
-
-```python
-from flair.hyperparameter.param_selection import TextClassifierParamSelector, OptimizationValue
-
-# create the parameter selector
-param_selector = TextClassifierParamSelector(
-    corpus, 
-    False, 
-    'resources/results', 
-    'lstm',
-    max_epochs=50, 
-    training_runs=3,
-    optimization_value=OptimizationValue.DEV_SCORE
-)
-
-# start the optimization
-param_selector.optimize(search_space, max_evals=100)
-```
-
-The parameter settings and the evaluation scores will be written to `param_selection.txt` in the result directory.
-While selecting the best parameter combination we do not store any model to disk. We also do not perform a test run
-during training, we just evaluate the model once after training on the test set for logging purpose.
-
-## Finding the best Learning Rate
-
-The learning rate is one of the most important hyper parameter and it fundamentally depends on the topology of the loss
-landscape via the architecture of your model and the training data it consumes. An optimal learning will improve your
-training speed and hopefully give more performant models. A simple technique described by Leslie Smith's
-[Cyclical Learning Rates for Training](https://arxiv.org/abs/1506.01186) paper is to train your model starting with a
-very low learning rate and increases the learning rate exponentially at every batch update of SGD. By plotting the loss
-with respect to the learning rate we will typically observe three distinct phases: for low learning rates the loss does
-not improve, an optimal learning rate range where the loss drops the steepest and the final phase where the loss
-explodes as the learning rate becomes too big. With such a plot, the optimal learning rate selection is as easy as
-picking the highest one from the optimal phase.
-
-In order to run such an experiment start with your initialized `ModelTrainer` and call `find_learning_rate()` with the
-`base_path` and the file name in which to records the learning rates and losses. Then plot the generated results via the
-`Plotter`'s `plot_learning_rate()` function and have a look at the `learning_rate.png` image to select the optimal
-learning rate:
-
-```python
-from flair.datasets import WNUT_17
-from flair.embeddings import TokenEmbeddings, WordEmbeddings, StackedEmbeddings
-from flair.trainers import ModelTrainer
-from typing import List
-
-# 1. get the corpus
-corpus = WNUT_17().downsample(0.1)
-print(corpus)
-
-# 2. what tag do we want to predict?
-tag_type = 'ner'
-
-# 3. make the tag dictionary from the corpus
-tag_dictionary = corpus.make_tag_dictionary(tag_type=tag_type)
-print(tag_dictionary.idx2item)
-
-# 4. initialize embeddings
-embedding_types: List[TokenEmbeddings] = [
-    WordEmbeddings('glove'),
-]
-
-embeddings: StackedEmbeddings = StackedEmbeddings(embeddings=embedding_types)
-
-# 5. initialize sequence tagger
-from flair.models import SequenceTagger
-
-tagger: SequenceTagger = SequenceTagger(hidden_size=256,
-                                        embeddings=embeddings,
-                                        tag_dictionary=tag_dictionary,
-                                        tag_type=tag_type,
-                                        use_crf=True)
-
-# 6. initialize trainer
-trainer: ModelTrainer = ModelTrainer(tagger, corpus)
-
-# 7. find learning rate
-learning_rate_tsv = trainer.find_learning_rate('resources/taggers/example-ner',
-                                                    'learning_rate.tsv')
-
-# 8. plot the learning rate finder curve
-from flair.visual.training_curves import Plotter
-plotter = Plotter()
-plotter.plot_learning_rate(learning_rate_tsv)
-```
-
-## Custom Optimizers
-
-You can now use any of PyTorch's optimizers for training when initializing a `ModelTrainer`. To give the optimizer any
-extra options just specify it as shown with the `weight_decay` example:
-
-```python
-from torch.optim.adam import Adam
-
-trainer = ModelTrainer(tagger, corpus,
-                       optimizer=Adam)
-                                     
-trainer.train(
-    "resources/taggers/example",
-    weight_decay=1e-4
-)
-```
-
-## Next
-
-The last tutorial is about [training your own embeddings](/resources/docs/TUTORIAL_9_TRAINING_LM_EMBEDDINGS.md).
diff --git a/tests/test_hyperparameter.py b/tests/test_hyperparameter.py
deleted file mode 100644
index 48321bc338..0000000000
--- a/tests/test_hyperparameter.py
+++ /dev/null
@@ -1,92 +0,0 @@
-import shutil
-
-import pytest
-from hyperopt import hp
-from torch.optim import SGD
-
-from flair.embeddings import WordEmbeddings, StackedEmbeddings, FlairEmbeddings
-from flair.hyperparameter import (
-    SearchSpace,
-    Parameter,
-    SequenceTaggerParamSelector,
-    TextClassifierParamSelector,
-)
-import flair.datasets
-
-glove_embedding: WordEmbeddings = WordEmbeddings("glove")
-
-
-@pytest.mark.skip
-def test_sequence_tagger_param_selector(results_base_path, tasks_base_path):
-    corpus = flair.datasets.ColumnCorpus(
-        data_folder=tasks_base_path / "fashion", column_format={0: "text", 3: "ner"}
-    )
-
-    # define search space
-    search_space = SearchSpace()
-
-    # sequence tagger parameter
-    search_space.add(
-        Parameter.EMBEDDINGS,
-        hp.choice,
-        options=[StackedEmbeddings([glove_embedding])],
-    )
-    search_space.add(Parameter.USE_CRF, hp.choice, options=[True, False])
-    search_space.add(Parameter.DROPOUT, hp.uniform, low=0.25, high=0.75)
-    search_space.add(Parameter.WORD_DROPOUT, hp.uniform, low=0.0, high=0.25)
-    search_space.add(Parameter.LOCKED_DROPOUT, hp.uniform, low=0.0, high=0.5)
-    search_space.add(Parameter.HIDDEN_SIZE, hp.choice, options=[64, 128])
-    search_space.add(Parameter.RNN_LAYERS, hp.choice, options=[1, 2])
-
-    # model trainer parameter
-    search_space.add(Parameter.OPTIMIZER, hp.choice, options=[SGD])
-
-    # training parameter
-    search_space.add(Parameter.MINI_BATCH_SIZE, hp.choice, options=[4, 8, 32])
-    search_space.add(Parameter.LEARNING_RATE, hp.uniform, low=0.01, high=1)
-    search_space.add(Parameter.ANNEAL_FACTOR, hp.uniform, low=0.3, high=0.75)
-    search_space.add(Parameter.PATIENCE, hp.choice, options=[3, 5])
-    search_space.add(Parameter.WEIGHT_DECAY, hp.uniform, low=0.01, high=1)
-
-    # find best parameter settings
-    optimizer = SequenceTaggerParamSelector(
-        corpus, "ner", results_base_path, max_epochs=2
-    )
-    optimizer.optimize(search_space, max_evals=2)
-
-    # clean up results directory
-    shutil.rmtree(results_base_path)
-    del optimizer, search_space
-
-
-@pytest.mark.skip
-def test_text_classifier_param_selector(results_base_path, tasks_base_path):
-    corpus = flair.datasets.ClassificationCorpus(tasks_base_path / "imdb")
-
-    search_space = SearchSpace()
-
-    # document embeddings parameter
-    search_space.add(Parameter.EMBEDDINGS, hp.choice, options=[[glove_embedding]])
-    search_space.add(Parameter.HIDDEN_SIZE, hp.choice, options=[64, 128, 256, 512])
-    search_space.add(Parameter.RNN_LAYERS, hp.choice, options=[1, 2])
-    search_space.add(Parameter.REPROJECT_WORDS, hp.choice, options=[True, False])
-    search_space.add(Parameter.REPROJECT_WORD_DIMENSION, hp.choice, options=[64, 128])
-    search_space.add(Parameter.BIDIRECTIONAL, hp.choice, options=[True, False])
-    search_space.add(Parameter.DROPOUT, hp.uniform, low=0.25, high=0.75)
-    search_space.add(Parameter.WORD_DROPOUT, hp.uniform, low=0.25, high=0.75)
-    search_space.add(Parameter.LOCKED_DROPOUT, hp.uniform, low=0.25, high=0.75)
-
-    # training parameter
-    search_space.add(Parameter.LEARNING_RATE, hp.uniform, low=0, high=1)
-    search_space.add(Parameter.MINI_BATCH_SIZE, hp.choice, options=[4, 8, 16, 32])
-    search_space.add(Parameter.ANNEAL_FACTOR, hp.uniform, low=0, high=0.75)
-    search_space.add(Parameter.PATIENCE, hp.choice, options=[3, 5])
-
-    param_selector = TextClassifierParamSelector(
-        corpus, False, results_base_path, document_embedding_type="lstm", max_epochs=2
-    )
-    param_selector.optimize(search_space, max_evals=2)
-
-    # clean up results directory
-    shutil.rmtree(results_base_path)
-    del param_selector, search_space