From 72863643f7df8354d4a56a428311cafc47b46381 Mon Sep 17 00:00:00 2001 From: Pinchuk Maya <57305249+maypink@users.noreply.github.com> Date: Fri, 9 Jun 2023 14:14:02 +0300 Subject: [PATCH] NeuralMAB (#102) * fix * fix#2 * minor * initial neural mab * add context agents enum * add experiments * launch * minor * add contextual mab to pull arms * put NN in a separate class & add docstrings * adjust settings * change places of mutations * fixes after review * minors * minors * add contextual bandits * add experimenter & multiple fitness lines visualizer * experiments * probabilities for contexts * minors & docstrings * fix pep8 * fix requirements * minor * minor * add average visualizatio * fixes after review * minors * update requirements * Update unit-build.yml * Update unit-build.yml * fix pep8 * Update unit-build.yml --- .github/workflows/unit-build.yml | 1 + ...ontext_mab_experiment_different_targets.py | 20 ++ .../adaptive_optimizer/experiment_setup.py | 52 ++- ...py => mab_experiment_different_targets.py} | 63 ++-- ...neural_mab_experiment_different_targets.py | 15 + examples/adaptive_optimizer/utils.py | 14 +- .../synthetic_graph_evolution/graph_search.py | 22 +- experiments/__init__.py | 0 experiments/mab/__init__.py | 0 .../mab/mab_synthetic_experiment_helper.py | 208 ++++++++++++ golem/core/adapter/nx_adapter.py | 25 ++ .../optimisers/adaptive/context_agents.py | 44 +++ .../adaptive/mab_agents/__init__.py | 0 .../mab_agents/contextual_mab_agent.py | 84 +++++ .../adaptive/{ => mab_agents}/mab_agent.py | 0 .../mab_agents/neural_contextual_mab_agent.py | 20 ++ golem/core/optimisers/adaptive/neural_mab.py | 306 ++++++++++++++++++ .../optimisers/adaptive/operator_agent.py | 1 + golem/core/optimisers/genetic/gp_params.py | 5 + .../optimisers/genetic/operators/mutation.py | 15 +- .../graph_sa/graph_structural_analysis.py | 4 +- .../visualisation/opt_history/fitness_line.py | 83 ++++- requirements.txt | 11 +- requirements_adaptive.txt | 2 + 24 files changed, 946 insertions(+), 49 deletions(-) create mode 100644 examples/adaptive_optimizer/context_mab_experiment_different_targets.py rename examples/adaptive_optimizer/{experiment_different_targets.py => mab_experiment_different_targets.py} (68%) create mode 100644 examples/adaptive_optimizer/neural_mab_experiment_different_targets.py create mode 100644 experiments/__init__.py create mode 100644 experiments/mab/__init__.py create mode 100644 experiments/mab/mab_synthetic_experiment_helper.py create mode 100644 golem/core/optimisers/adaptive/context_agents.py create mode 100644 golem/core/optimisers/adaptive/mab_agents/__init__.py create mode 100644 golem/core/optimisers/adaptive/mab_agents/contextual_mab_agent.py rename golem/core/optimisers/adaptive/{ => mab_agents}/mab_agent.py (100%) create mode 100644 golem/core/optimisers/adaptive/mab_agents/neural_contextual_mab_agent.py create mode 100644 golem/core/optimisers/adaptive/neural_mab.py create mode 100644 requirements_adaptive.txt diff --git a/.github/workflows/unit-build.yml b/.github/workflows/unit-build.yml index d35ba7c5..e5b40289 100644 --- a/.github/workflows/unit-build.yml +++ b/.github/workflows/unit-build.yml @@ -31,6 +31,7 @@ jobs: pip install .[docs] pip install .[profilers] pip install pytest-cov + pip install -r requirements_adaptive.txt - name: Test with pytest run: | pytest --cov=golem test/unit diff --git a/examples/adaptive_optimizer/context_mab_experiment_different_targets.py b/examples/adaptive_optimizer/context_mab_experiment_different_targets.py new file mode 100644 index 00000000..ef2b3786 --- /dev/null +++ b/examples/adaptive_optimizer/context_mab_experiment_different_targets.py @@ -0,0 +1,20 @@ +from examples.adaptive_optimizer.experiment_setup import run_adaptive_mutations_with_context +from examples.adaptive_optimizer.mab_experiment_different_targets import run_experiment_node_num, \ + run_experiment_edge_num, run_experiment_graphs_ratio_edges_nodes, run_experiment_trees +from golem.core.optimisers.adaptive.operator_agent import MutationAgentTypeEnum + + +if __name__ == '__main__': + """Run adaptive optimizer on different targets to see how neural multi-armed bandit agent converges + to different probabilities of actions (i.e. mutations) for different targets.""" + adaptive_mutation_type = MutationAgentTypeEnum.contextual_bandit + + run_experiment_node_num(trial_timeout=2, adaptive_mutation_type=adaptive_mutation_type, + run_func=run_adaptive_mutations_with_context) + run_experiment_edge_num(trial_timeout=2, adaptive_mutation_type=adaptive_mutation_type, + run_func=run_adaptive_mutations_with_context) + run_experiment_trees(trial_timeout=10, trial_iterations=2000, adaptive_mutation_type=adaptive_mutation_type, + run_func=run_adaptive_mutations_with_context) + run_experiment_graphs_ratio_edges_nodes(trial_timeout=10, trial_iterations=2000, + adaptive_mutation_type=adaptive_mutation_type, + run_func=run_adaptive_mutations_with_context) diff --git a/examples/adaptive_optimizer/experiment_setup.py b/examples/adaptive_optimizer/experiment_setup.py index b176d168..be8c0613 100644 --- a/examples/adaptive_optimizer/experiment_setup.py +++ b/examples/adaptive_optimizer/experiment_setup.py @@ -1,8 +1,10 @@ from pprint import pprint -from typing import List, Sequence, Optional +from typing import List, Sequence, Optional, Dict import networkx as nx +import numpy as np from matplotlib import pyplot as plt +from sklearn.cluster import KMeans from examples.synthetic_graph_evolution.utils import draw_graphs_subplots from examples.adaptive_optimizer.utils import plot_action_values @@ -23,7 +25,7 @@ def run_adaptive_mutations( stats_action_value_log: List[List[float]] = [] def log_action_values(next_pop: PopulationT, optimizer: EvoGraphOptimizer): - values = optimizer.mutation.agent.get_action_values(obs=None) + values = optimizer.mutation.agent.get_action_values(obs=next_pop[0]) stats_action_value_log.append(list(values)) # Setup the logger and run the optimizer @@ -47,3 +49,49 @@ def log_action_values(next_pop: PopulationT, optimizer: EvoGraphOptimizer): plot_action_values(stats_action_value_log, action_tags=agent.actions) plt.show() return stats_action_value_log + + +def run_adaptive_mutations_with_context( + optimizer: EvoGraphOptimizer, + objective: Objective, + target: Optional[nx.DiGraph] = None, + visualize: bool = True, + n_clusters: int = 2 +): + """This experiment setup outputs graphic of relative action probabilities + for given target/objective and given optimizer setup.""" + stats_action_value_log: Dict[int, List[List[float]]] = dict() + cluster = KMeans(n_clusters=n_clusters) + + def log_action_values_with_clusters(next_pop: PopulationT, optimizer: EvoGraphOptimizer): + obs_contexts = optimizer.mutation.agent.get_context(next_pop) + cluster.fit(np.array(obs_contexts).reshape(-1, 1)) + centers = cluster.cluster_centers_ + for i, center in enumerate(centers): + values = optimizer.mutation.agent.get_action_values(obs=center) + if i not in stats_action_value_log.keys(): + stats_action_value_log[i] = [] + stats_action_value_log[i].append(list(values)) + + # Setup the logger and run the optimizer + optimizer.set_iteration_callback(log_action_values_with_clusters) + found_graphs = optimizer.optimise(objective) + found_graph = found_graphs[0] if isinstance(found_graphs, Sequence) else found_graphs + history = optimizer.history + agent = optimizer.mutation.agent + + print('History of action probabilities:') + pprint(stats_action_value_log) + if visualize: + found_nx_graph = BaseNetworkxAdapter().restore(found_graph) + final_metrics = objective(found_nx_graph).value + if target is not None: + draw_graphs_subplots(target, found_nx_graph, + titles=['Target Graph', f'Found Graph (fitness={final_metrics})']) + else: + draw_graphs_subplots(found_nx_graph, titles=[f'Found Graph (fitness={final_metrics})']) + history.show.fitness_line() + for i in range(n_clusters): + plot_action_values(stats_action_value_log[i], action_tags=agent.actions) + plt.show() + return stats_action_value_log diff --git a/examples/adaptive_optimizer/experiment_different_targets.py b/examples/adaptive_optimizer/mab_experiment_different_targets.py similarity index 68% rename from examples/adaptive_optimizer/experiment_different_targets.py rename to examples/adaptive_optimizer/mab_experiment_different_targets.py index 070cfa31..591a4374 100644 --- a/examples/adaptive_optimizer/experiment_different_targets.py +++ b/examples/adaptive_optimizer/mab_experiment_different_targets.py @@ -1,6 +1,6 @@ from datetime import timedelta from functools import partial -from typing import Optional, Sequence +from typing import Optional, Sequence, Callable import networkx as nx from examples.adaptive_optimizer.experiment_setup import run_adaptive_mutations @@ -37,23 +37,25 @@ def generate_trees(graph_sizes: Sequence[int], node_types: Sequence[str] = ('x', return trees -def get_graph_gp_params(objective: Objective): +def get_graph_gp_params(objective: Objective, adaptive_mutation_type: MutationAgentTypeEnum, pop_size: int = None): return GPAlgorithmParameters( - adaptive_mutation_type=MutationAgentTypeEnum.bandit, - pop_size=21, + adaptive_mutation_type=adaptive_mutation_type, + pop_size=pop_size or 21, multi_objective=objective.is_multi_objective, genetic_scheme_type=GeneticSchemeTypesEnum.generational, mutation_types=[ - MutationTypesEnum.single_add, - MutationTypesEnum.single_edge, MutationTypesEnum.single_drop, + MutationTypesEnum.single_edge, + MutationTypesEnum.single_add ], crossover_types=[CrossoverTypesEnum.none] ) -def run_experiment_node_num(target_sizes: Sequence[int] = (100, 400), - trial_timeout: int = 15): +def run_experiment_node_num(adaptive_mutation_type: MutationAgentTypeEnum, + target_sizes: Sequence[int] = (50, 400), + trial_timeout: int = 15, + run_func: Callable = run_adaptive_mutations): for target_size in target_sizes: # Setup simple objective that searches for required graph size (number of nodes) objective = Objective({'graph_size': lambda graph: abs(target_size - @@ -63,15 +65,18 @@ def run_experiment_node_num(target_sizes: Sequence[int] = (100, 400), optimizer, _ = graph_search_setup( objective=objective, optimizer_cls=EvoGraphOptimizer, - algorithm_parameters=get_graph_gp_params(objective), + algorithm_parameters=get_graph_gp_params(objective=objective, + adaptive_mutation_type=adaptive_mutation_type), timeout=timedelta(minutes=trial_timeout), num_iterations=target_size * 3, ) - run_adaptive_mutations(optimizer, objective, visualize=True) + run_func(optimizer, objective, visualize=True) -def run_experiment_edge_num(target_sizes: Sequence[int] = (100, 400), - trial_timeout: int = 15): +def run_experiment_edge_num(adaptive_mutation_type: MutationAgentTypeEnum, + target_sizes: Sequence[int] = (100, 400), + trial_timeout: int = 15, + run_func: Callable = run_adaptive_mutations): for target_size in target_sizes: # Setup simple objective that searches for required graph size (number of nodes) objective = Objective({'graph_size': lambda graph: abs(target_size - @@ -81,14 +86,18 @@ def run_experiment_edge_num(target_sizes: Sequence[int] = (100, 400), optimizer, _ = graph_search_setup( objective=objective, optimizer_cls=EvoGraphOptimizer, - algorithm_parameters=get_graph_gp_params(objective), + algorithm_parameters=get_graph_gp_params(objective=objective, + adaptive_mutation_type=adaptive_mutation_type), timeout=timedelta(minutes=trial_timeout), num_iterations=target_size * 3, ) - run_adaptive_mutations(optimizer, objective, visualize=True) + run_func(optimizer, objective, visualize=True) -def run_experiment_graphs_ratio_edges_nodes(trial_timeout: int = 15, trial_iterations: Optional[int] = 500): +def run_experiment_graphs_ratio_edges_nodes(adaptive_mutation_type: MutationAgentTypeEnum, + trial_timeout: int = 15, + trial_iterations: Optional[int] = 500, + run_func: Callable = run_adaptive_mutations): """In this experiment setup we generate different graphs with different ratios of #Edges/#Nodes. Respectively, probabilities of adding edges and adding nodes must be different for different targets.""" @@ -111,16 +120,20 @@ def run_experiment_graphs_ratio_edges_nodes(trial_timeout: int = 15, trial_itera optimizer, _ = graph_search_setup( objective=objective, optimizer_cls=EvoGraphOptimizer, - algorithm_parameters=get_graph_gp_params(objective), + algorithm_parameters=get_graph_gp_params(objective=objective, + adaptive_mutation_type=adaptive_mutation_type), node_types=node_types, timeout=timedelta(minutes=trial_timeout), num_iterations=trial_iterations, ) - run_adaptive_mutations(optimizer, objective, target, visualize=True) + run_func(optimizer, objective, target, visualize=True) -def run_experiment_trees(trial_timeout: int = 15, trial_iterations: Optional[int] = 500): +def run_experiment_trees(adaptive_mutation_type: MutationAgentTypeEnum, + trial_timeout: int = 15, + trial_iterations: Optional[int] = 500, + run_func: Callable = run_adaptive_mutations): node_types = ['x'] for target in generate_trees(graph_sizes=[20, 30, 50], node_types=node_types): # Setup objective that measures some graph-theoretic similarity measure @@ -137,7 +150,7 @@ def run_experiment_trees(trial_timeout: int = 15, trial_iterations: Optional[int MutationTypesEnum.single_drop, ], crossover_types=[CrossoverTypesEnum.none], - adaptive_mutation_type=MutationAgentTypeEnum.bandit, + adaptive_mutation_type=adaptive_mutation_type, ) # Build the optimizer @@ -150,14 +163,16 @@ def run_experiment_trees(trial_timeout: int = 15, trial_iterations: Optional[int num_iterations=trial_iterations, ) - run_adaptive_mutations(optimizer, objective, target, visualize=True) + run_func(optimizer, objective, target, visualize=True) if __name__ == '__main__': """Run adaptive optimizer on different targets to see how adaptive agent converges to different probabilities of actions (i.e. mutations) for different targets.""" + adaptive_mutation_type = MutationAgentTypeEnum.bandit - run_experiment_node_num(trial_timeout=2) - run_experiment_edge_num(trial_timeout=2) - run_experiment_trees(trial_timeout=10, trial_iterations=2000) - run_experiment_graphs_ratio_edges_nodes(trial_timeout=10, trial_iterations=2000) + run_experiment_node_num(trial_timeout=2, adaptive_mutation_type=adaptive_mutation_type) + run_experiment_edge_num(trial_timeout=2, adaptive_mutation_type=adaptive_mutation_type) + run_experiment_trees(trial_timeout=10, trial_iterations=2000, adaptive_mutation_type=adaptive_mutation_type) + run_experiment_graphs_ratio_edges_nodes(trial_timeout=10, trial_iterations=2000, + adaptive_mutation_type=adaptive_mutation_type) diff --git a/examples/adaptive_optimizer/neural_mab_experiment_different_targets.py b/examples/adaptive_optimizer/neural_mab_experiment_different_targets.py new file mode 100644 index 00000000..ddb9dbf3 --- /dev/null +++ b/examples/adaptive_optimizer/neural_mab_experiment_different_targets.py @@ -0,0 +1,15 @@ +from examples.adaptive_optimizer.mab_experiment_different_targets import run_experiment_node_num, \ + run_experiment_edge_num, run_experiment_graphs_ratio_edges_nodes, run_experiment_trees +from golem.core.optimisers.adaptive.operator_agent import MutationAgentTypeEnum + + +if __name__ == '__main__': + """Run adaptive optimizer on different targets to see how neural multi-armed bandit agent converges + to different probabilities of actions (i.e. mutations) for different targets.""" + adaptive_mutation_type = MutationAgentTypeEnum.neural_bandit + + run_experiment_node_num(trial_timeout=2, adaptive_mutation_type=adaptive_mutation_type) + run_experiment_edge_num(trial_timeout=2, adaptive_mutation_type=adaptive_mutation_type) + run_experiment_trees(trial_timeout=10, trial_iterations=2000, adaptive_mutation_type=adaptive_mutation_type) + run_experiment_graphs_ratio_edges_nodes(trial_timeout=10, trial_iterations=2000, + adaptive_mutation_type=adaptive_mutation_type) diff --git a/examples/adaptive_optimizer/utils.py b/examples/adaptive_optimizer/utils.py index e182ee64..d519df5d 100644 --- a/examples/adaptive_optimizer/utils.py +++ b/examples/adaptive_optimizer/utils.py @@ -1,4 +1,4 @@ -from typing import Sequence, Optional, Any +from typing import Sequence, Optional, Any, List import numpy as np from matplotlib import pyplot as plt @@ -7,7 +7,8 @@ def plot_action_values(stats: Sequence[Sequence[float]], action_tags: Optional[Sequence[Any]] = None, - size: float = 5.): + size: float = 5., + titles: List[str] = None): # Plot stackplot of how action expectations and probabilities changed x = np.arange(len(stats)) y = np.array(stats).T @@ -22,10 +23,15 @@ def plot_action_values(stats: Sequence[Sequence[float]], ax0.grid() ax1.stackplot(x, y_prob, labels=labels) - ax0.set_title('Action Expectation Values') + if not titles: + expectation_values_title = 'Action Expectation Values' + probabilities_title = 'Action Probabilities' + else: + expectation_values_title, probabilities_title = titles + ax0.set_title(expectation_values_title, size=10) ax0.set_xlabel('Generation') ax0.set_ylabel('Reward Expectation') - ax1.set_title('Action Probabilities') + ax1.set_title(probabilities_title, size=10) ax1.set_xlabel('Generation') ax1.set_ylabel('Probability') ax1.set(ylim=(0, 1.0), yticks=np.linspace(0., 1., 21)) diff --git a/examples/synthetic_graph_evolution/graph_search.py b/examples/synthetic_graph_evolution/graph_search.py index 3d071709..b009012a 100644 --- a/examples/synthetic_graph_evolution/graph_search.py +++ b/examples/synthetic_graph_evolution/graph_search.py @@ -1,12 +1,12 @@ from datetime import timedelta from functools import partial -from typing import Type, Optional, Sequence +from typing import Type, Optional, Sequence, List import networkx as nx from examples.synthetic_graph_evolution.experiment_setup import run_experiments -from examples.synthetic_graph_evolution.generators import generate_labeled_graph from golem.core.adapter.nx_adapter import BaseNetworkxAdapter -from golem.core.dag.verification_rules import has_no_self_cycled_nodes +from golem.core.dag.graph import Graph +from golem.core.dag.verification_rules import DEFAULT_DAG_RULES from golem.core.optimisers.adaptive.operator_agent import MutationAgentTypeEnum from golem.core.optimisers.genetic.gp_optimizer import EvoGraphOptimizer from golem.core.optimisers.genetic.gp_params import GPAlgorithmParameters @@ -25,7 +25,10 @@ def graph_search_setup(target_graph: Optional[nx.DiGraph] = None, algorithm_parameters: Optional[AlgorithmParameters] = None, node_types: Sequence[str] = ('x',), timeout: Optional[timedelta] = None, - num_iterations: Optional[int] = None): + num_iterations: Optional[int] = None, + initial_graph_sizes: Optional[List[int]] = None, + initial_graphs: List[Graph] = None, + pop_size: int = None): if target_graph is not None and objective is not None: raise ValueError('Please provide either target or objective, not both') elif target_graph is not None: @@ -61,7 +64,7 @@ def graph_search_setup(target_graph: Optional[nx.DiGraph] = None, ) default_gp_params = GPAlgorithmParameters( adaptive_mutation_type=MutationAgentTypeEnum.random, - pop_size=21, + pop_size=pop_size or 21, multi_objective=objective.is_multi_objective, genetic_scheme_type=GeneticSchemeTypesEnum.generational, mutation_types=[ @@ -74,13 +77,16 @@ def graph_search_setup(target_graph: Optional[nx.DiGraph] = None, gp_params = algorithm_parameters or default_gp_params graph_gen_params = GraphGenerationParams( adapter=BaseNetworkxAdapter(), - rules_for_constraint=[has_no_self_cycled_nodes], + rules_for_constraint=DEFAULT_DAG_RULES, available_node_types=node_types, ) # Generate simple initial population with line graphs - initial_graphs = [generate_labeled_graph('gnp', 7, node_types) - for _ in range(gp_params.pop_size)] + if not initial_graphs: + if not initial_graph_sizes: + initial_graph_sizes = [7] * gp_params.pop_size + initial_graphs = [nx.random_tree(initial_graph_sizes[i], create_using=nx.DiGraph) + for i in range(gp_params.pop_size)] # Build the optimizer optimiser = optimizer_cls(objective, initial_graphs, requirements, graph_gen_params, gp_params) return optimiser, objective diff --git a/experiments/__init__.py b/experiments/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/experiments/mab/__init__.py b/experiments/mab/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/experiments/mab/mab_synthetic_experiment_helper.py b/experiments/mab/mab_synthetic_experiment_helper.py new file mode 100644 index 00000000..47afac74 --- /dev/null +++ b/experiments/mab/mab_synthetic_experiment_helper.py @@ -0,0 +1,208 @@ +import os.path +import random +from datetime import timedelta +from functools import partial +from pprint import pprint + +import numpy as np +import pandas as pd +import seaborn as sns + +from typing import List, Callable, Sequence, Optional, Dict + +import networkx as nx +from matplotlib import pyplot as plt +from sklearn.cluster import MiniBatchKMeans + +from examples.adaptive_optimizer.mab_experiment_different_targets import get_graph_gp_params +from examples.adaptive_optimizer.utils import plot_action_values +from examples.synthetic_graph_evolution.graph_search import graph_search_setup +from examples.synthetic_graph_evolution.utils import draw_graphs_subplots +from golem.core.adapter.nx_adapter import BaseNetworkxAdapter +from golem.core.dag.graph import Graph +from golem.core.optimisers.adaptive.operator_agent import MutationAgentTypeEnum + +from golem.core.optimisers.genetic.gp_optimizer import EvoGraphOptimizer +from golem.core.optimisers.genetic.operators.operator import PopulationT +from golem.core.optimisers.objective import Objective +from golem.core.optimisers.optimizer import GraphOptimizer +from golem.core.paths import project_root +from golem.visualisation.opt_history.fitness_line import MultipleFitnessLines + + +class MABSyntheticExperimentHelper: + """ Class to provide synthetic experiments without data to compare MABs. """ + + def __init__(self, launch_num: int, timeout: float, bandits_to_compare: List[MutationAgentTypeEnum], + path_to_save: str = None, is_visualize: bool = False, n_clusters: Optional[int] = None): + self.launch_num = launch_num + self.timeout = timeout + self.bandits_to_compare = bandits_to_compare + self.bandit_metrics = dict.fromkeys(bandit.name for bandit in self.bandits_to_compare) + self.path_to_save = path_to_save or os.path.join(project_root(), 'mab') + self.is_visualize = is_visualize + self.histories = dict.fromkeys([bandit.name for bandit in self.bandits_to_compare]) + self.cluster = MiniBatchKMeans(n_clusters=n_clusters) + + def compare_bandits(self, setup_parameters: Callable, initial_population_func: Callable = None): + results = dict() + for i in range(self.launch_num): + initial_graphs = initial_population_func() + for bandit in self.bandits_to_compare: + optimizer, objective = setup_parameters(initial_graphs=initial_graphs, bandit_type=bandit) + agent = optimizer.mutation.agent + result = self.launch_bandit(bandit_type=bandit, optimizer=optimizer, objective=objective) + if bandit.name not in results.keys(): + results[bandit.name] = [] + results[bandit.name].append(result) + if self.is_visualize: + self.show_average_action_probabilities(show_action_probabilities=results, actions=agent.actions) + + def launch_bandit(self, bandit_type: MutationAgentTypeEnum, optimizer: GraphOptimizer, objective: Callable): + + stats_action_value_log: Dict[int, List[List[float]]] = dict() + + def log_action_values(next_pop: PopulationT, optimizer: EvoGraphOptimizer): + values = optimizer.mutation.agent.get_action_values(obs=next_pop[0]) + if 0 not in stats_action_value_log.keys(): + stats_action_value_log[0] = [] + stats_action_value_log[0].append(list(values)) + + def log_action_values_with_clusters(next_pop: PopulationT, optimizer: EvoGraphOptimizer): + obs_contexts = optimizer.mutation.agent.get_context(next_pop) + self.cluster.partial_fit(np.array(obs_contexts).reshape(-1, 1)) + centers = self.cluster.cluster_centers_ + for i, center in enumerate(sorted(centers)): + values = optimizer.mutation.agent.get_action_values(obs=[center]) + if i not in stats_action_value_log.keys(): + stats_action_value_log[i] = [] + stats_action_value_log[i].append(list(values)) + + # set iteration callback + if bandit_type == MutationAgentTypeEnum.bandit: + optimizer.set_iteration_callback(log_action_values) + elif bandit_type in (MutationAgentTypeEnum.contextual_bandit, MutationAgentTypeEnum.neural_bandit): + optimizer.set_iteration_callback(log_action_values_with_clusters) + else: + raise ValueError("No callback function was specified for that bandit type.") + + found_graphs = optimizer.optimise(objective) + found_graph = found_graphs[0] if isinstance(found_graphs, Sequence) else found_graphs + history = optimizer.history + if not self.histories[bandit_type.name]: + self.histories[bandit_type.name] = [] + self.histories[bandit_type.name].append(history) + agent = optimizer.mutation.agent + found_nx_graph = BaseNetworkxAdapter().restore(found_graph) + final_metrics = objective(found_nx_graph).value + if not self.bandit_metrics[bandit_type.name]: + self.bandit_metrics[bandit_type.name] = [] + self.bandit_metrics[bandit_type.name].append(final_metrics) + + print('History of action probabilities:') + pprint(stats_action_value_log) + if self.is_visualize: + self.show_fitness_line(found_nx_graph=found_nx_graph, final_metrics=final_metrics, + history=history) + self.show_action_probabilities(bandit_type=bandit_type, stats_action_value_log=stats_action_value_log, + actions=agent.actions) + + return stats_action_value_log + + @staticmethod + def show_fitness_line(found_nx_graph, final_metrics, history): + draw_graphs_subplots(found_nx_graph, titles=[f'Found Graph (fitness={final_metrics})']) + history.show.fitness_line() + + def show_action_probabilities(self, bandit_type: MutationAgentTypeEnum, stats_action_value_log, + actions, is_average: bool = False): + if is_average: + titles = ['Average action Expectation Values', 'Average action Probabilities'] + else: + titles = ['Action Expectation Values', 'Action Probabilities'] + if bandit_type == MutationAgentTypeEnum.bandit: + plot_action_values(stats=stats_action_value_log[0], action_tags=actions, titles=titles) + plt.show() + else: + centers = sorted(self.cluster.cluster_centers_) + for i in range(self.cluster.n_clusters): + titles = [title + f' for cluster with center {int(centers[i])}' for title in titles] + plot_action_values(stats=stats_action_value_log[i], action_tags=actions, + titles=titles) + plt.show() + + def show_average_action_probabilities(self, show_action_probabilities: dict, actions): + """ Shows action probabilities across several launches. """ + for bandit in list(show_action_probabilities.keys()): + total_sum = None + for launch in show_action_probabilities[bandit]: + if not total_sum: + total_sum = launch + continue + for cluster in launch.keys(): + for i in range(len(total_sum[cluster])): + for j in range(len(total_sum[cluster][i])): + total_sum[cluster][i][j] += launch[cluster][i][j] + for cluster in total_sum.keys(): + for i in range(len(total_sum[cluster])): + for j in range(len(total_sum[cluster][i])): + total_sum[cluster][i][j] /= len(show_action_probabilities[bandit]) + self.show_action_probabilities(bandit_type=MutationAgentTypeEnum(bandit), + stats_action_value_log=total_sum, + actions=actions, + is_average=True) + + def show_boxplots(self): + sns.boxplot(data=pd.DataFrame(self.bandit_metrics)) + plt.title('Metrics', fontsize=15) + plt.show() + + def show_fitness_lines(self): + multiple_fitness_lines = MultipleFitnessLines(histories_to_compare=self.histories) + multiple_fitness_lines.visualize() + + +def setup_parameters(initial_graphs: List[Graph], bandit_type: MutationAgentTypeEnum, + target_size: int, trial_timeout: float): + objective = Objective({'graph_size': lambda graph: abs(target_size - + graph.number_of_nodes())}) + + # Build the optimizer + optimizer, _ = graph_search_setup( + objective=objective, + optimizer_cls=EvoGraphOptimizer, + algorithm_parameters=get_graph_gp_params(objective=objective, + adaptive_mutation_type=bandit_type), + timeout=timedelta(minutes=trial_timeout), + num_iterations=target_size * 3, + initial_graphs=initial_graphs + ) + return optimizer, objective + + +def initial_population_func(graph_size: List[int] = None, pop_size: int = None, initial_graphs: List[Graph] = None): + if initial_graphs: + return initial_graphs + initial_graphs = [nx.random_tree(graph_size[i], create_using=nx.DiGraph) + for i in range(pop_size)] + return initial_graphs + + +if __name__ == '__main__': + timeout = 0.3 + launch_num = 1 + target_size = 50 + + bandits_to_compare = [MutationAgentTypeEnum.contextual_bandit] + setup_parameters_func = partial(setup_parameters, target_size=target_size, trial_timeout=timeout) + initial_population_func = partial(initial_population_func, + graph_size=[random.randint(5, 10) for _ in range(10)] + + [random.randint(90, 95) for _ in range(10)], + pop_size=20) + + helper = MABSyntheticExperimentHelper(timeout=timeout, launch_num=launch_num, bandits_to_compare=bandits_to_compare, + n_clusters=2, is_visualize=True) + helper.compare_bandits(initial_population_func=initial_population_func, + setup_parameters=setup_parameters_func) + # helper.show_boxplots() + # helper.show_fitness_lines() diff --git a/golem/core/adapter/nx_adapter.py b/golem/core/adapter/nx_adapter.py index cb020f82..9bfe1116 100644 --- a/golem/core/adapter/nx_adapter.py +++ b/golem/core/adapter/nx_adapter.py @@ -85,3 +85,28 @@ def _node_restore(self, node: GraphNode) -> Dict: def _node_adapt(self, data: Dict) -> OptNode: return data[_NX_NODE_KEY] + + +class BanditNetworkxAdapter(BaseNetworkxAdapter): + """ Classic networkx adapter with nodes indexes in names instead of uids. + It is needed since some frameworks (e.g. karateclub) have asserts in which node + names should consist only of its indexes. + """ + def _restore(self, opt_graph: OptGraph, metadata: Optional[Dict[str, Any]] = None) -> nx.DiGraph: + nx_graph = nx.DiGraph() + nx_node_data = {} + + # add nodes + for node in opt_graph.nodes: + nx_node_data[node.uid] = self._node_restore(node) + nx_graph.add_node(opt_graph.nodes.index(node)) + + # add edges + for node in opt_graph.nodes: + for parent in node.nodes_from: + nx_graph.add_edge(opt_graph.nodes.index(parent), opt_graph.nodes.index(node)) + + # add nodes ad labels + nx.set_node_attributes(nx_graph, nx_node_data) + + return nx_graph diff --git a/golem/core/optimisers/adaptive/context_agents.py b/golem/core/optimisers/adaptive/context_agents.py new file mode 100644 index 00000000..dfb8830f --- /dev/null +++ b/golem/core/optimisers/adaptive/context_agents.py @@ -0,0 +1,44 @@ +from enum import Enum + +from typing import List, Callable, Any + +from karateclub import FeatherGraph + +from golem.core.adapter.nx_adapter import BanditNetworkxAdapter +from golem.core.optimisers.opt_history_objects.individual import Individual + + +def feather_graph(obs: Any) -> List[float]: + """ Returns embedding based on an implementation of `"FEATHER-G" `_. + The procedure uses characteristic functions of node features with random walk weights to describe + node neighborhoods. These node level features are pooled by mean pooling to + create graph level statistics. """ + descriptor = FeatherGraph() + nx_graph = BanditNetworkxAdapter().restore(obs) + descriptor.fit([nx_graph]) + return descriptor.get_embedding()[:20] + + +def nodes_num(obs: Any) -> int: + """ Returns number of nodes in graph. """ + if isinstance(obs, Individual): + return len(obs.graph.nodes) + else: + return len(obs.nodes) + + +class ContextAgentTypeEnum(Enum): + feather_graph = 'feather_graph' + nodes_num = 'nodes_num' + + +class ContextAgentsRepository: + """ Repository of functions to encode observations. """ + _agents_implementations = { + ContextAgentTypeEnum.feather_graph: feather_graph, + ContextAgentTypeEnum.nodes_num: nodes_num + } + + @staticmethod + def agent_class_by_id(agent_id: ContextAgentTypeEnum) -> Callable: + return ContextAgentsRepository._agents_implementations[agent_id] diff --git a/golem/core/optimisers/adaptive/mab_agents/__init__.py b/golem/core/optimisers/adaptive/mab_agents/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/golem/core/optimisers/adaptive/mab_agents/contextual_mab_agent.py b/golem/core/optimisers/adaptive/mab_agents/contextual_mab_agent.py new file mode 100644 index 00000000..2ef5dbcc --- /dev/null +++ b/golem/core/optimisers/adaptive/mab_agents/contextual_mab_agent.py @@ -0,0 +1,84 @@ +import random +from typing import Union, Sequence, Optional, List + +import numpy as np +from mabwiser.mab import MAB, LearningPolicy, NeighborhoodPolicy +from scipy.special import softmax + +from golem.core.dag.graph import Graph +from golem.core.dag.graph_node import GraphNode +from golem.core.optimisers.adaptive.context_agents import ContextAgentsRepository, ContextAgentTypeEnum +from golem.core.optimisers.adaptive.operator_agent import ActType, ObsType, ExperienceBuffer, OperatorAgent + + +class ContextualMultiArmedBanditAgent(OperatorAgent): + """ Contextual Multi-Armed bandit. Observations can be encoded with simple context agent without + using NN to guarantee convergence. """ + + def __init__(self, actions: Sequence[ActType], n_jobs: int = 1, + context_agent_type: ContextAgentTypeEnum = ContextAgentTypeEnum.nodes_num, + enable_logging: bool = True): + super().__init__(enable_logging) + self.actions = list(actions) + self._indices = list(range(len(actions))) + self._arm_by_action = dict(zip(actions, self._indices)) + self._agent = MAB(arms=self._indices, + learning_policy=LearningPolicy.UCB1(alpha=1.25), + neighborhood_policy=NeighborhoodPolicy.Clusters(), + n_jobs=n_jobs) + self._context_agent = ContextAgentsRepository.agent_class_by_id(context_agent_type) + self._is_fitted = False + + def _initial_fit(self, obs: ObsType): + """ Initial fit for Contextual Multi-Armed Bandit. + At this step, all hands are assigned the same weights with the very first context + that is fed to the bandit. """ + # initial fit for mab + n = len(self._indices) + uniform_rewards = [1. / n] * n + contexts = self.get_context(obs=obs) + self._agent.fit(decisions=self._indices, rewards=uniform_rewards, contexts=contexts * n) + self._is_fitted = True + + def choose_action(self, obs: ObsType) -> ActType: + if not self._is_fitted: + self._initial_fit(obs=obs) + contexts = self.get_context(obs=obs) + arm = self._agent.predict(contexts=contexts) + action = self.actions[arm] + return action + + def get_action_values(self, obs: Optional[ObsType] = None) -> Sequence[float]: + if not self._is_fitted: + self._initial_fit(obs=obs) + contexts = self.get_context(obs) + prob_dict = self._agent.predict_expectations(contexts=contexts) + prob_list = [prob_dict[i] for i in range(len(prob_dict))] + return prob_list + + def get_action_probs(self, obs: Optional[ObsType] = None) -> Sequence[float]: + return softmax(self.get_action_values(obs=obs)) + + def choose_nodes(self, graph: Graph, num_nodes: int = 1) -> Union[GraphNode, Sequence[GraphNode]]: + subject_nodes = random.sample(graph.nodes, k=num_nodes) + return subject_nodes[0] if num_nodes == 1 else subject_nodes + + def partial_fit(self, experience: ExperienceBuffer): + """Continues learning of underlying agent with new experience.""" + obs, actions, rewards = experience.retrieve_experience() + self._dbg_log(obs, actions, rewards) + arms = [self._arm_by_action[action] for action in actions] + contexts = self.get_context(obs=obs) + self._agent.partial_fit(decisions=arms, rewards=rewards, contexts=contexts) + + def get_context(self, obs: Union[List[ObsType], ObsType]) -> List[List[float]]: + """ Returns contexts based on specified context agent. """ + contexts = [] + if not isinstance(obs, list): + obs = [obs] + for ob in obs: + if isinstance(ob, list) or isinstance(ob, np.ndarray): + contexts.append(ob) + else: + contexts.append([self._context_agent(ob)]) + return contexts diff --git a/golem/core/optimisers/adaptive/mab_agent.py b/golem/core/optimisers/adaptive/mab_agents/mab_agent.py similarity index 100% rename from golem/core/optimisers/adaptive/mab_agent.py rename to golem/core/optimisers/adaptive/mab_agents/mab_agent.py diff --git a/golem/core/optimisers/adaptive/mab_agents/neural_contextual_mab_agent.py b/golem/core/optimisers/adaptive/mab_agents/neural_contextual_mab_agent.py new file mode 100644 index 00000000..b665c13b --- /dev/null +++ b/golem/core/optimisers/adaptive/mab_agents/neural_contextual_mab_agent.py @@ -0,0 +1,20 @@ +from typing import Sequence + +from golem.core.optimisers.adaptive.mab_agents.contextual_mab_agent import ContextualMultiArmedBanditAgent +from golem.core.optimisers.adaptive.neural_mab import NeuralMAB +from golem.core.optimisers.adaptive.context_agents import ContextAgentTypeEnum +from golem.core.optimisers.adaptive.operator_agent import ActType + + +class NeuralContextualMultiArmedBanditAgent(ContextualMultiArmedBanditAgent): + """ Neural Contextual Multi-Armed bandit. Observations can be encoded with the use of Neural Networks, + but still there are some restrictions to guarantee convergence. """ + def __init__(self, + actions: Sequence[ActType], + n_jobs: int = 1, + context_agent_type: ContextAgentTypeEnum = ContextAgentTypeEnum.nodes_num, + enable_logging: bool = True): + super().__init__(actions=actions, n_jobs=n_jobs, + enable_logging=enable_logging, context_agent_type=context_agent_type) + self._agent = NeuralMAB(arms=self._indices, + n_jobs=n_jobs) diff --git a/golem/core/optimisers/adaptive/neural_mab.py b/golem/core/optimisers/adaptive/neural_mab.py new file mode 100644 index 00000000..9e8a2be4 --- /dev/null +++ b/golem/core/optimisers/adaptive/neural_mab.py @@ -0,0 +1,306 @@ +import copy +import math +from typing import List, Any, Union, Dict + +import torch +import numpy as np +from mabwiser.mab import MAB, LearningPolicy, NeighborhoodPolicy +from mabwiser.utils import Arm, Constants, Num + +from golem.core.log import default_log + +import warnings + +warnings.filterwarnings("ignore") + + +class NeuralMAB(MAB): + """ + Neural Multi-Armed Bandit. + The main concept is explained in the article: https://arxiv.org/abs/2012.01780. + Deep representation is formed with NN and Contextual Multi-Armed Bandit is integrated to choose arm. + """ + + def __init__(self, arms: List[Arm], + learning_policy: Any = LearningPolicy.UCB1(alpha=1.25), + neighborhood_policy: Any = NeighborhoodPolicy.Clusters(), + seed: int = Constants.default_seed, + n_jobs: int = 1): + + super().__init__(arms, learning_policy, neighborhood_policy, seed, n_jobs) + self.nn_with_se = NNWithShallowExploration(context_size=1, arms_count=len(arms)) + self.arms = arms + self.seed = seed + self.n_jobs = n_jobs + self.log = default_log('NeuralMAB') + # to track when GNN needs to be updated + self.iter = 0 + self._indices = list(range(len(arms))) + self._mab = MAB(arms=self._indices, + learning_policy=learning_policy, + neighborhood_policy=neighborhood_policy, + n_jobs=n_jobs) + self.is_fitted = False + + def _initial_fit_mab(self, context: Any): + """ Initial fit for Contextual Multi-Armed Bandit. + At this step, all hands are assigned the same weights with the very first context + that is fed to the bandit. """ + # initial fit for mab + n = len(self.arms) + uniform_rewards = [1. / n] * n + deep_context = self._get_deep_context(context=context) + self._mab.fit(decisions=self._indices, rewards=uniform_rewards, contexts=n * [deep_context]) + self.is_fitted = True + + def partial_fit(self, decisions: List[Any], rewards: List[float], contexts: List[Any] = None): + + # get deep contexts, calculate regret and update weights for NN (once in _H_q iters) + deep_contexts = self.nn_with_se.partial_fit(iter=self.iter, decisions=decisions, + rewards=rewards, contexts=contexts) + self.iter += 1 + + # update contextual mab with deep contexts + self._mab.partial_fit(decisions=decisions, contexts=deep_contexts, rewards=rewards) + + def predict(self, contexts: Any = None) -> Union[Arm, List[Arm]]: + """ Predicts which arm to pull to get maximum reward. """ + if not self.is_fitted: + self._initial_fit_mab(context=contexts) + deep_context = self._get_deep_context(context=contexts) + a_choose = self._mab.predict(contexts=[deep_context]) + return a_choose + + def predict_expectations(self, contexts: Any = None) -> Union[Dict[Arm, Num], List[Dict[Arm, Num]]]: + """ Returns expected reward for each arm. """ + if not self.is_fitted: + self._initial_fit_mab(context=contexts) + deep_context = self._get_deep_context(context=contexts) + return self._mab.predict_expectations(contexts=[deep_context]) + + def _get_deep_context(self, context: Any) -> List[Any]: + """ Returns deep representation of context. """ + temp = self.nn_with_se.transfer(context, 0, len(self.arms)) + feat = self.nn_with_se.feature_extractor(temp, self.nn_with_se.W).numpy().squeeze() + return list(feat) + + +class NNWithShallowExploration: + """ Neural Network with shallow exploration which means that weights are updated every _H_q iterations. """ + + def __init__(self, context_size: int, arms_count: int): + """ + Initial fit for NN. + beta -- parameter for UCB exploration + H_q -- how many time steps to update NN + interT -- internal steps for GD + """ + self._beta = 0.02 + self._lambd = 1 + self._lr = 0.001 + self._H_q = 5 + self._interT = 1000 + self._hidden_dim = [1000, 1000] + hid_dim_lst = self._hidden_dim + dim_second_last = self._hidden_dim[-1] * 2 + + dim_for_init = [context_size + arms_count] + hid_dim_lst + [1] + self.arms_count = arms_count + self.W0, total_dim = self._initialization(dim_for_init) + self.LAMBDA = self._lambd * torch.eye(dim_second_last, dtype=torch.double) + self.bb = torch.zeros(self.LAMBDA.size()[0], dtype=torch.double).view(-1, 1) + + theta = np.random.randn(dim_second_last, 1) / np.sqrt(dim_second_last) + self.theta = torch.from_numpy(theta) + + self.THETA_action = torch.tensor([]) + self.CONTEXT_action = torch.tensor([]) + self.REWARD_action = torch.tensor([]) + self.result_neuralucb = [] + self.W = copy.deepcopy(self.W0) + self.summ = 0 + self.log = default_log('NNWithShallowExploration') + + def partial_fit(self, iter: int, + decisions: List[Any], rewards: List[float], contexts: List[Any] = None): + deep_contexts = [] + + # update NN and calculate reward + for decision, context, reward in zip(decisions, contexts, rewards): + + # calculate reward + temp = self.transfer(context, decision, self.arms_count) + feat = self.feature_extractor(temp, self.W) + deep_contexts.append(list(feat.numpy().squeeze())) + expected_reward = torch.mm(self.theta.view(1, -1), feat) + self._beta * self.UCB(self.LAMBDA, feat) + + self.summ += (expected_reward - reward) + self.result_neuralucb.append(self.summ) + + # gather dataset for next NN training (context_action and reward_action) + if np.mod(iter, self._H_q) == 0: + context_action = temp + reward_action = torch.tensor([reward], dtype=torch.double) + else: + context_action = torch.cat((self.CONTEXT_action, temp), 1) + reward_action = torch.cat((self.REWARD_action, torch.tensor([reward], dtype=torch.double)), 0) + + # update LAMBDA and bb + self.LAMBDA += torch.mm(self.feature_extractor(temp, self.W), + self.feature_extractor(temp, self.W).t()) + self.bb += reward * self.feature_extractor(temp, self.W) + theta, _ = torch.solve(self.bb, self.LAMBDA) + + if np.mod(iter, self._H_q) == 0: + theta_action = theta.view(-1, 1) + else: + theta_action = torch.cat((self.THETA_action, theta.view(-1, 1)), 1) + + # update weight of NN + if np.mod(iter, self._H_q) == self._H_q - 1: + self.log.info(f'Current regret: {self.summ}') + self.W = self.train_with_shallow_exploration(context_action, reward_action, self.W0, + self._interT, self._lr, theta_action, self._H_q) + return deep_contexts + + @staticmethod + def UCB(A, phi): + """ Ucb term. """ + try: + tmp, _ = torch.solve(phi, A) + except Exception: + tmp = torch.Tensor(np.linalg.solve(A, phi)) + + return torch.sqrt(torch.mm(torch.transpose(phi, 0, 1).double(), tmp.double())) + + @staticmethod + def transfer(c, a, arm_size): + """ + Transfer an array context + action to new context with dimension 2*(__context__ + __armsize__). + """ + action = np.zeros(arm_size) + action[a] = 1 + c_final = np.append(c, action) + c_final = torch.from_numpy(c_final) + c_final = c_final.view((len(c_final), 1)) + c_final = c_final.repeat(2, 1) + return c_final + + def train_with_shallow_exploration(self, X, Y, W_start, T, et, THETA, H): + """ Gd-based model training with shallow exploration + Dataset X, label Y. """ + W = copy.deepcopy(W_start) + X = X[:, -H:] + Y = Y[-H:] + THETA = THETA[:, -H:] + + prev_loss = 1000000 + prev_loss_1k = 1000000 + for i in range(0, T): + grad = self._gradient_loss(X, Y, W, THETA) + for j in range(0, len(W) - 1): + W[j] = W[j] - et * grad[j] + + curr_loss = self._loss(X, Y, W, THETA) + if i % 100 == 0: + print('------', curr_loss) + if curr_loss > prev_loss_1k: + et = et * 0.1 + print('lr/10 to', et) + + prev_loss_1k = curr_loss + + # early stopping + if abs(curr_loss - prev_loss) < 1e-6: + break + prev_loss = curr_loss + return W + + @staticmethod + def _initialization(dim): + """ Initialization. + dim consists of (d1, d2,...), where dl = 1 (placeholder, deprecated). """ + w = [] + total_dim = 0 + for i in range(0, len(dim) - 1): + if i < len(dim) - 2: + temp = np.random.randn(dim[i + 1], dim[i]) / np.sqrt(dim[i + 1]) + temp = np.kron(np.eye(2, dtype=int), temp) + temp = torch.from_numpy(temp) + w.append(temp) + total_dim += dim[i + 1] * dim[i] * 4 + else: + temp = np.random.randn(dim[i + 1], dim[i]) / np.sqrt(dim[i]) + temp = np.kron([[1, -1]], temp) + temp = torch.from_numpy(temp) + w.append(temp) + total_dim += dim[i + 1] * dim[i] * 2 + + return w, total_dim + + @staticmethod + def feature_extractor(x, W): + """ Functions feature extractor. + x is the input, dimension is d; W is the list of parameter matrices. """ + depth = len(W) + output = x + for i in range(0, depth - 1): + output = torch.mm(W[i], output) + output = output.clamp(min=0) + + output = output * math.sqrt(W[depth - 1].size()[1]) + return output + + def _gradient_loss(self, X, Y, W, THETA): + """ Return a list of grad, satisfying that W[i] = W[i] - grad[i] for single context x. """ + depth = len(W) + num_sample = Y.shape[0] + loss = [] + grad = [] + relu = [] + output = X + loss.append(output) + for i in range(0, depth - 1): + output = torch.mm(W[i], output) + relu.append(output) + output = output.clamp(min=0) + loss.append(output) + + THETA_t = torch.transpose(THETA, 0, 1).view(num_sample, 1, -1) + output_t = torch.transpose(output, 0, 1).view(num_sample, -1, 1) + output = torch.bmm(THETA_t, output_t).squeeze().view(1, -1) + + loss.append(output) + + feat = self.feature_extractor(X, W) + feat_t = torch.transpose(feat, 0, 1).view(num_sample, -1, 1) + output_t = torch.bmm(THETA_t, feat_t).squeeze().view(1, -1) + + # backward gradient propagation + back = output_t - Y + back = back.double() + grad_t = torch.mm(back, loss[depth - 1].t()) + grad.append(grad_t) + + for i in range(1, depth): + back = torch.mm(W[depth - i].t(), back) + back[relu[depth - i - 1] < 0] = 0 + grad_t = torch.mm(back, loss[depth - i - 1].t()) + grad.append(grad_t) + + grad1 = [] + for i in range(0, depth): + grad1.append(grad[depth - 1 - i] * math.sqrt(W[depth - 1].size()[1]) / len(X[0, :])) + + return grad1 + + def _loss(self, X, Y, W, THETA): + # total loss + num_sample = len(X[0, :]) + output = self.feature_extractor(X, W) + THETA_t = torch.transpose(THETA, 0, 1).view(num_sample, 1, -1) + output_t = torch.transpose(output, 0, 1).view(num_sample, -1, 1) + output_y = torch.bmm(THETA_t, output_t).squeeze().view(1, -1) + + summ = (Y - output_y).pow(2).sum() / num_sample + return summ diff --git a/golem/core/optimisers/adaptive/operator_agent.py b/golem/core/optimisers/adaptive/operator_agent.py index c088f0d8..10e0175e 100644 --- a/golem/core/optimisers/adaptive/operator_agent.py +++ b/golem/core/optimisers/adaptive/operator_agent.py @@ -19,6 +19,7 @@ class MutationAgentTypeEnum(Enum): random = 'random' bandit = 'bandit' contextual_bandit = 'contextual_bandit' + neural_bandit = 'neural_bandit' class ExperienceBuffer: diff --git a/golem/core/optimisers/genetic/gp_params.py b/golem/core/optimisers/genetic/gp_params.py index 33820a7b..a908682c 100644 --- a/golem/core/optimisers/genetic/gp_params.py +++ b/golem/core/optimisers/genetic/gp_params.py @@ -2,6 +2,7 @@ from typing import Sequence, Union, Any from golem.core.optimisers.adaptive.operator_agent import MutationAgentTypeEnum +from golem.core.optimisers.adaptive.mab_agents.neural_contextual_mab_agent import ContextAgentTypeEnum from golem.core.optimisers.genetic.operators.base_mutations import MutationStrengthEnum, MutationTypesEnum, \ simple_mutation_set from golem.core.optimisers.optimizer import AlgorithmParameters @@ -34,6 +35,9 @@ class GPAlgorithmParameters(AlgorithmParameters): MutationAgentTypeEnum.bandit uses Multi-Armed Bandit (MAB) learning algorithm. MutationAgentTypeEnum.contextual bandit uses contextual MAB learning algorithm. + :param context_agent_type: specifies how the context of graph/node will be obtained. + Should be specified with ContextAgentTypeEnum. + :param selection_types: Sequence of selection operators types :param crossover_types: Sequence of crossover operators types :param mutation_types: Sequence of mutation operators types @@ -68,6 +72,7 @@ class GPAlgorithmParameters(AlgorithmParameters): min_pop_size_with_elitism: int = 5 adaptive_mutation_type: MutationAgentTypeEnum = MutationAgentTypeEnum.default + context_agent_type: ContextAgentTypeEnum = ContextAgentTypeEnum.nodes_num selection_types: Sequence[SelectionTypesEnum] = \ (SelectionTypesEnum.tournament,) diff --git a/golem/core/optimisers/genetic/operators/mutation.py b/golem/core/optimisers/genetic/operators/mutation.py index 900d894c..80a697c4 100644 --- a/golem/core/optimisers/genetic/operators/mutation.py +++ b/golem/core/optimisers/genetic/operators/mutation.py @@ -5,7 +5,9 @@ import numpy as np from golem.core.dag.graph import Graph -from golem.core.optimisers.adaptive.mab_agent import MultiArmedBanditAgent +from golem.core.optimisers.adaptive.mab_agents.contextual_mab_agent import ContextualMultiArmedBanditAgent +from golem.core.optimisers.adaptive.mab_agents.neural_contextual_mab_agent import NeuralContextualMultiArmedBanditAgent +from golem.core.optimisers.adaptive.mab_agents.mab_agent import MultiArmedBanditAgent from golem.core.optimisers.adaptive.operator_agent import \ OperatorAgent, RandomAgent, ExperienceBuffer, MutationAgentTypeEnum from golem.core.optimisers.genetic.operators.base_mutations import \ @@ -47,9 +49,16 @@ def _init_operator_agent(parameters: 'GPAlgorithmParameters', if kind == MutationAgentTypeEnum.default or kind == MutationAgentTypeEnum.random: agent = RandomAgent(actions=parameters.mutation_types) elif kind == MutationAgentTypeEnum.bandit: - agent = MultiArmedBanditAgent(parameters.mutation_types, n_jobs=requirements.n_jobs) + agent = MultiArmedBanditAgent(actions=parameters.mutation_types, + n_jobs=requirements.n_jobs) elif kind == MutationAgentTypeEnum.contextual_bandit: - raise NotImplementedError() + agent = ContextualMultiArmedBanditAgent(actions=parameters.mutation_types, + context_agent_type=parameters.context_agent_type, + n_jobs=requirements.n_jobs) + elif kind == MutationAgentTypeEnum.neural_bandit: + agent = NeuralContextualMultiArmedBanditAgent(actions=parameters.mutation_types, + context_agent_type=parameters.context_agent_type, + n_jobs=requirements.n_jobs) else: raise TypeError(f'Unknown parameter {kind}') return agent diff --git a/golem/structural_analysis/graph_sa/graph_structural_analysis.py b/golem/structural_analysis/graph_sa/graph_structural_analysis.py index 2287add8..726f2ca1 100644 --- a/golem/structural_analysis/graph_sa/graph_structural_analysis.py +++ b/golem/structural_analysis/graph_sa/graph_structural_analysis.py @@ -1,6 +1,6 @@ import os from copy import deepcopy -from typing import List, Optional, Tuple +from typing import List, Optional, Tuple, Dict import multiprocessing from golem.core.log import default_log @@ -226,7 +226,7 @@ def visualize_on_graph(graph: Graph, analysis_result: SAAnalysisResults, """ def get_nodes_and_edges_labels(analysis_result: SAAnalysisResults, iter: int) \ - -> tuple[dict[int, str], dict[int, str]]: + -> Tuple[Dict[int, str], Dict[int, str]]: """ Get nodes and edges labels in dictionary form. """ def get_str_labels(result: ObjectSAResult) -> str: diff --git a/golem/visualisation/opt_history/fitness_line.py b/golem/visualisation/opt_history/fitness_line.py index 32b30c7f..73bc4401 100644 --- a/golem/visualisation/opt_history/fitness_line.py +++ b/golem/visualisation/opt_history/fitness_line.py @@ -2,7 +2,8 @@ import os from datetime import datetime from pathlib import Path -from typing import Any, Dict, List, Optional, Union +from statistics import mean +from typing import Any, Dict, List, Optional, Union, TYPE_CHECKING import matplotlib as mpl import numpy as np @@ -13,9 +14,13 @@ from golem.core.optimisers.fitness import null_fitness from golem.core.optimisers.opt_history_objects.individual import Individual from golem.core.paths import default_data_dir +from golem.visualisation.opt_history.arg_constraint_wrapper import ArgConstraintWrapper from golem.visualisation.opt_history.history_visualization import HistoryVisualization from golem.visualisation.opt_history.utils import show_or_save_figure +if TYPE_CHECKING: + from golem.core.optimisers.opt_history_objects.opt_history import OptHistory + def with_alternate_matplotlib_backend(func): @functools.wraps(func) @@ -44,7 +49,7 @@ def setup_fitness_plot(axis: plt.Axes, xlabel: str, title: Optional[str] = None, axis.grid(axis='y') -def plot_fitness_line_per_time(axis: plt.Axes, generations: List[List[Individual]], label: Optional[str] = None, +def plot_fitness_line_per_time(axis: plt.Axes, generations, label: Optional[str] = None, with_generation_limits: bool = True) \ -> Dict[int, Individual]: best_fitness = null_fitness() @@ -247,3 +252,77 @@ def prev(self, event): b_prev.on_clicked(callback.prev) show_or_save_figure(fig, save_path, dpi) + + +class MultipleFitnessLines(metaclass=ArgConstraintWrapper): + """ Class to compare fitness changes during optimization process. + :param histories_to_compare: dictionary with labels to display as keys and histories as values. """ + + def __init__(self, histories_to_compare: Dict[str, List['OptHistory']], visuals_params: Dict[str, Any] = None): + self.histories_to_compare = histories_to_compare + self.visuals_params = visuals_params or {} + self.log = default_log(self) + + def visualize(self, save_path: Optional[Union[os.PathLike, str]] = None, dpi: Optional[int] = None): + """ Visualizes the best fitness values during the evolution in the form of line. + :param save_path: path to save the visualization. If set, then the image will be saved, + and if not, it will be displayed. + :param dpi: DPI of the output figure. + """ + save_path = save_path or self.get_predefined_value('save_path') + dpi = dpi or self.get_predefined_value('dpi') + + fig, ax = plt.subplots(figsize=(6.4, 4.8), facecolor='w') + xlabel = 'Generation' + self.plot_multiple_fitness_lines(ax=ax) + setup_fitness_plot(ax, xlabel) + plt.legend() + show_or_save_figure(fig, save_path, dpi) + + def plot_multiple_fitness_lines(self, ax: plt.axis): + for histories, label in zip(list(self.histories_to_compare.values()), list(self.histories_to_compare.keys())): + plot_average_fitness_line_per_generations(axis=ax, histories=histories, label=label) + + def get_predefined_value(self, param: str): + return self.visuals_params.get(param) + + +def plot_average_fitness_line_per_generations(axis: plt.Axes, histories, label: Optional[str] = None): + """ Plots average fitness line. """ + best_fitness = null_fitness() + best_individuals = {} + + fitness_value_per_generation = [] + for history in histories: + generations = history.individuals + for gen_num, gen in enumerate(generations): + for ind in gen: + if ind.native_generation != gen_num: + continue + if ind.fitness > best_fitness: + best_individuals[gen_num] = ind + best_fitness = ind.fitness + + best_generations, best_fitnesses = np.transpose( + [(gen_num, abs(individual.fitness.value)) + for gen_num, individual in best_individuals.items()]) + + best_generations = list(best_generations) + best_fitnesses = list(best_fitnesses) + + if best_generations[-1] != len(generations) - 1: + best_fitnesses.append(abs(best_fitness.value)) + best_generations.append(len(generations) - 1) + + fitness_value_per_generation.append(best_fitnesses) + + # get average fitness value + average_fitness_per_gen = [] + max_len = max(len(i) for i in fitness_value_per_generation) + for i in range(max_len): + all_fitness_gen = [] + for fitnesses in fitness_value_per_generation: + if i < len(fitnesses): + all_fitness_gen.append(fitnesses[i]) + average_fitness_per_gen.append(mean(all_fitness_gen)) + axis.plot(range(len(average_fitness_per_gen)), average_fitness_per_gen, label=label) diff --git a/requirements.txt b/requirements.txt index 0ca6a54b..52e460a6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,9 +4,8 @@ pandas>=1.3.0; python_version >='3.8' # Models and frameworks networkx>=2.4, !=2.7.*, !=2.8.1, !=2.8.2, !=2.8.3 -scipy -zss -mabwiser # for adaptive learning +scipy~=1.7.3 +zss~=1.2.0 # Plotting matplotlib>=3.3.1; python_version >= '3.8' @@ -19,7 +18,7 @@ Pillow>=9.5.0 func_timeout==4.3.5 joblib>=0.17.0 requests>=2.0 -tqdm +tqdm~=4.65.0 typing>=3.7.0 psutil>=5.9.2 @@ -30,3 +29,7 @@ iOpt==0.1.6 # Tests pytest>=6.2.0 testfixtures>=6.18.0 + +# Bandits +mabwiser~=2.7.0 +karateclub==1.3.3 \ No newline at end of file diff --git a/requirements_adaptive.txt b/requirements_adaptive.txt new file mode 100644 index 00000000..8e6ab9c0 --- /dev/null +++ b/requirements_adaptive.txt @@ -0,0 +1,2 @@ +# heavy requirements needed for bandits and adaptive learning +torch~=2.0.0 \ No newline at end of file