From a57a126b9f620d04044dd881971a9b7d548fa213 Mon Sep 17 00:00:00 2001
From: Benjamin Rombaut <benjamin.rombaut@gmail.com>
Date: Tue, 29 Oct 2024 17:17:09 +0100
Subject: [PATCH] begin on support for Choice in Optuna parsing

---
 src/amltk/optimization/optimizers/optuna.py  |  31 ++-
 src/amltk/pipeline/parsers/optuna.py         |  16 +-
 tests/optimizers/test_optimizers.py          |  31 +++
 tests/pipeline/parsing/test_optuna_parser.py | 196 +++++++++++++++++++
 4 files changed, 267 insertions(+), 7 deletions(-)

diff --git a/src/amltk/optimization/optimizers/optuna.py b/src/amltk/optimization/optimizers/optuna.py
index 3f8233fd..1e328d5d 100644
--- a/src/amltk/optimization/optimizers/optuna.py
+++ b/src/amltk/optimization/optimizers/optuna.py
@@ -292,7 +292,36 @@ def ask(
         if n is not None:
             return (self.ask(n=None) for _ in range(n))
 
-        optuna_trial: optuna.Trial = self.study.ask(self.space)
+        if any("__choice__" in k for k in self.space):
+            optuna_trial: optuna.Trial = self.study.ask()
+            # do all __choice__ suggestions with suggest_categorical
+            workspace = self.space.copy()
+            delete_other_options = []
+            for name, distribution in workspace.items():
+                if "__choice__" in name:
+                    possible_choices = distribution.choices
+                    choice_made = optuna_trial.suggest_categorical(name, choices=possible_choices)
+                    for c in possible_choices:
+                        if c != choice_made:
+                            delete_other_options.append(f"{name}:{c}:")
+            # filter all parameters given the made choices
+            filtered_workspace = {k: v for k, v in workspace.items() if (
+                ("__choice__" not in k) and
+                (not any(c in k for c in delete_other_options))
+            )}
+            # do all remaining suggestions with the correct suggest function
+            for name, distribution in filtered_workspace.items():
+                match distribution:
+                    case optuna.distributions.CategoricalDistribution(choices=choices):
+                        optuna_trial.suggest_categorical(name, choices=choices)
+                    case optuna.distributions.IntDistribution(low=low, high=high, log=log):
+                        optuna_trial.suggest_int(name, low=low, high=high, log=log)
+                    case optuna.distributions.FloatDistribution(low=low, high=high):
+                        optuna_trial.suggest_float(name, low=low, high=high)
+                    case _:
+                        raise ValueError(f"Unknown distribution: {distribution}")
+        else:
+            optuna_trial: optuna.Trial = self.study.ask(self.space)
         config = optuna_trial.params
         trial_number = optuna_trial.number
         unique_name = f"{trial_number=}"
diff --git a/src/amltk/pipeline/parsers/optuna.py b/src/amltk/pipeline/parsers/optuna.py
index f6387b55..976ce0df 100644
--- a/src/amltk/pipeline/parsers/optuna.py
+++ b/src/amltk/pipeline/parsers/optuna.py
@@ -103,16 +103,17 @@
 )
 
 from amltk._functional import prefix_keys
+from amltk.pipeline.components import Choice
 
 if TYPE_CHECKING:
     from typing import TypeAlias
 
     from amltk.pipeline import Node
 
-    OptunaSearchSpace: TypeAlias = dict[str, BaseDistribution]
-
 PAIR = 2
 
+OptunaSearchSpace: TypeAlias = dict[str, BaseDistribution]
+
 
 def _convert_hp_to_optuna_distribution(
     name: str,
@@ -196,12 +197,15 @@ def parser(
 
         delim: The delimiter to use for the names of the hyperparameters.
     """
-    if conditionals:
-        raise NotImplementedError("Conditionals are not yet supported with Optuna.")
-
     space = prefix_keys(_parse_space(node), prefix=f"{node.name}{delim}")
 
-    for child in node.nodes:
+    children = node.nodes
+
+    if isinstance(node, Choice) and any(children):
+        name = f"{node.name}{delim}__choice__"
+        space[name] = CategoricalDistribution([child.name for child in children])
+
+    for child in children:
         subspace = parser(child, flat=flat, conditionals=conditionals, delim=delim)
         if not flat:
             subspace = prefix_keys(subspace, prefix=f"{node.name}{delim}")
diff --git a/tests/optimizers/test_optimizers.py b/tests/optimizers/test_optimizers.py
index cf900379..79b2d51e 100644
--- a/tests/optimizers/test_optimizers.py
+++ b/tests/optimizers/test_optimizers.py
@@ -10,6 +10,7 @@
 
 from amltk.optimization import Metric, Optimizer, Trial
 from amltk.pipeline import Component
+from amltk.pipeline.components import Choice
 from amltk.profiling import Timer
 
 if TYPE_CHECKING:
@@ -24,6 +25,10 @@ class _A:
     pass
 
 
+class _B:
+    pass
+
+
 metrics = [
     Metric("score_bounded", minimize=False, bounds=(0, 1)),
     Metric("score_unbounded", minimize=False),
@@ -87,6 +92,25 @@ def opt_optuna(metric: Metric, tmp_path: Path) -> OptunaOptimizer:
     )
 
 
+@case
+@parametrize("metric", [*metrics, metrics])  # Single obj and multi
+def opt_optuna_choice(metric: Metric, tmp_path: Path) -> OptunaOptimizer:
+    try:
+        from amltk.optimization.optimizers.optuna import OptunaOptimizer
+    except ImportError:
+        pytest.skip("Optuna is not installed")
+
+    c1 = Component(_A, name="hi1", space={"a": [1, 2, 3]})
+    c2 = Component(_B, name="hi2", space={"b": [4, 5, 6]})
+    pipeline = Choice([c1, c2], name="hi")
+    return OptunaOptimizer.create(
+        space=pipeline,
+        metrics=metric,
+        seed=42,
+        bucket=tmp_path,
+    )
+
+
 @case
 @parametrize("metric", [*metrics])  # Single obj
 def opt_neps(metric: Metric, tmp_path: Path) -> NEPSOptimizer:
@@ -142,3 +166,10 @@ def test_batched_ask_generates_unique_configs(optimizer: Optimizer):
     batch = list(optimizer.ask(10))
     assert len(batch) == 10
     assert all_unique(batch)
+
+
+@parametrize_with_cases("optimizer", cases=".", prefix="opt_optuna_choice")
+def test_optuna_choice_output(optimizer: Optimizer):
+    trial = optimizer.ask()
+    keys = list(trial.config.keys())
+    assert any("__choice__" in k for k in keys), trial.config
diff --git a/tests/pipeline/parsing/test_optuna_parser.py b/tests/pipeline/parsing/test_optuna_parser.py
index ba098ff7..1558cd3c 100644
--- a/tests/pipeline/parsing/test_optuna_parser.py
+++ b/tests/pipeline/parsing/test_optuna_parser.py
@@ -1 +1,197 @@
 # TODO: Fill this in
+from __future__ import annotations
+
+from dataclasses import dataclass
+
+import pytest
+from pytest_cases import case, parametrize_with_cases
+
+from amltk.pipeline import Component, Fixed, Node
+from amltk.pipeline.components import Choice
+
+try:
+    from optuna.distributions import CategoricalDistribution, IntDistribution
+
+    from amltk.pipeline.parsers.optuna import OptunaSearchSpace
+except ImportError:
+    pytest.skip("Optuna not installed", allow_module_level=True)
+
+
+FLAT = True
+NOT_FLAT = False
+CONDITIONED = True
+NOT_CONDITIONED = False
+
+
+@dataclass
+class Params:
+    """A test case for parsing a Node into a ConfigurationSpace."""
+
+    root: Node
+    expected: dict[tuple[bool, bool], OptunaSearchSpace]
+
+
+@case
+def case_single_frozen() -> Params:
+    item = Fixed(object(), name="a")
+    space = OptunaSearchSpace()
+    expected = {
+        (NOT_FLAT, CONDITIONED): space,
+        (NOT_FLAT, NOT_CONDITIONED): space,
+        (FLAT, CONDITIONED): space,
+        (FLAT, NOT_CONDITIONED): space,
+    }
+    return Params(item, expected)  # type: ignore
+
+
+@case
+def case_single_component() -> Params:
+    item = Component(object, name="a", space={"hp": [1, 2, 3]})
+    space = OptunaSearchSpace({"a:hp": CategoricalDistribution([1, 2, 3])})
+    expected = {
+        (NOT_FLAT, CONDITIONED): space,
+        (NOT_FLAT, NOT_CONDITIONED): space,
+        (FLAT, CONDITIONED): space,
+        (FLAT, NOT_CONDITIONED): space,
+    }
+    return Params(item, expected)  # type: ignore
+
+
+@case
+def case_single_step_two_hp() -> Params:
+    item = Component(object, name="a", space={"hp": [1, 2, 3], "hp2": [1, 2, 3]})
+    space = OptunaSearchSpace(
+        {
+            "a:hp": CategoricalDistribution([1, 2, 3]),
+            "a:hp2": CategoricalDistribution([1, 2, 3]),
+        },
+    )
+
+    expected = {
+        (NOT_FLAT, CONDITIONED): space,
+        (NOT_FLAT, NOT_CONDITIONED): space,
+        (FLAT, CONDITIONED): space,
+        (FLAT, NOT_CONDITIONED): space,
+    }
+    return Params(item, expected)  # type: ignore
+
+
+@case
+def case_single_step_two_hp_different_types() -> Params:
+    item = Component(object, name="a", space={"hp": [1, 2, 3], "hp2": (1, 10)})
+    space = OptunaSearchSpace(
+        {"a:hp": CategoricalDistribution([1, 2, 3]), "a:hp2": IntDistribution(1, 10)},
+    )
+    expected = {
+        (NOT_FLAT, CONDITIONED): space,
+        (NOT_FLAT, NOT_CONDITIONED): space,
+        (FLAT, CONDITIONED): space,
+        (FLAT, NOT_CONDITIONED): space,
+    }
+    return Params(item, expected)  # type: ignore
+
+
+# TODO: Testing for with and without conditions does not really make sense here
+@case
+def case_choice() -> Params:
+    item = Choice(
+        Component(object, name="a", space={"hp": [1, 2, 3]}),
+        Component(object, name="b", space={"hp2": (1, 10)}),
+        name="choice1",
+        space={"hp3": (1, 10)},
+    )
+
+    expected = {}
+
+    # Not flat and with conditions
+    space = OptunaSearchSpace(
+        {
+            "choice1:a:hp": CategoricalDistribution([1, 2, 3]),
+            "choice1:b:hp2": IntDistribution(1, 10),
+            "choice1:hp3": IntDistribution(1, 10),
+            "choice1:__choice__": CategoricalDistribution(["a", "b"]),
+        },
+    )
+    expected[(NOT_FLAT, CONDITIONED)] = space
+
+    # Flat and with conditions
+    space = OptunaSearchSpace(
+        {
+            "a:hp": CategoricalDistribution([1, 2, 3]),
+            "b:hp2": IntDistribution(1, 10),
+            "choice1:hp3": IntDistribution(1, 10),
+            "choice1:__choice__": CategoricalDistribution(["a", "b"]),
+        },
+    )
+    expected[(FLAT, CONDITIONED)] = space
+
+    # Not Flat and without conditions
+    space = OptunaSearchSpace(
+        {
+            "choice1:a:hp": CategoricalDistribution([1, 2, 3]),
+            "choice1:b:hp2": IntDistribution(1, 10),
+            "choice1:hp3": IntDistribution(1, 10),
+            "choice1:__choice__": CategoricalDistribution(["a", "b"]),
+        },
+    )
+    expected[(NOT_FLAT, NOT_CONDITIONED)] = space
+
+    # Flat and without conditions
+    space = OptunaSearchSpace(
+        {
+            "a:hp": CategoricalDistribution([1, 2, 3]),
+            "b:hp2": IntDistribution(1, 10),
+            "choice1:hp3": IntDistribution(1, 10),
+            "choice1:__choice__": CategoricalDistribution(["a", "b"]),
+        },
+    )
+    expected[(FLAT, NOT_CONDITIONED)] = space
+    return Params(item, expected)  # type: ignore
+
+
+@parametrize_with_cases("test_case", cases=".")
+def test_parsing_pipeline(test_case: Params) -> None:
+    pipeline = test_case.root
+
+    for (flat, conditioned), expected in test_case.expected.items():
+        parsed_space = pipeline.search_space(
+            "optuna",
+            flat=flat,
+            conditionals=conditioned,
+        )
+        assert (
+            parsed_space == expected
+        ), f"Failed for {flat=}, {conditioned=}.\n{parsed_space}\n{expected}"
+
+
+@parametrize_with_cases("test_case", cases=".")
+def test_parsing_does_not_mutate_space_of_nodes(test_case: Params) -> None:
+    pipeline = test_case.root
+    spaces_before = {tuple(path): step.space for path, step in pipeline.walk()}
+
+    for (flat, conditioned), _ in test_case.expected.items():
+        pipeline.search_space(
+            "optuna",
+            flat=flat,
+            conditionals=conditioned,
+        )
+        spaces_after = {tuple(path): step.space for path, step in pipeline.walk()}
+        assert spaces_before == spaces_after
+
+
+@parametrize_with_cases("test_case", cases=".")
+def test_parsing_twice_produces_same_space(test_case: Params) -> None:
+    pipeline = test_case.root
+
+    for (flat, conditioned), _ in test_case.expected.items():
+        parsed_space = pipeline.search_space(
+            "optuna",
+            flat=flat,
+            conditionals=conditioned,
+        )
+        parsed_space2 = pipeline.search_space(
+            "optuna",
+            flat=flat,
+            conditionals=conditioned,
+        )
+        assert parsed_space == parsed_space2