From 28ec651478d5b27b0003d971620d623c3e7f56b1 Mon Sep 17 00:00:00 2001 From: Nunkyl Date: Wed, 23 Aug 2023 10:37:07 +0300 Subject: [PATCH 1/6] implement restart feature --- examples/graph_model_optimization.py | 14 ++- golem/core/optimisers/genetic/gp_optimizer.py | 61 ++++++----- .../opt_history_objects/individual.py | 3 +- golem/core/optimisers/optimizer.py | 44 +++++++- .../core/optimisers/populational_optimizer.py | 101 +++++++++++++++--- test/integration/test_saved_state.py | 86 +++++++++++++++ 6 files changed, 258 insertions(+), 51 deletions(-) create mode 100644 test/integration/test_saved_state.py diff --git a/examples/graph_model_optimization.py b/examples/graph_model_optimization.py index ef4b9d5f9..52b2e41d1 100644 --- a/examples/graph_model_optimization.py +++ b/examples/graph_model_optimization.py @@ -102,11 +102,13 @@ def run_custom_example(optimizer_cls: Type[GraphOptimizer] = EvoGraphOptimizer, objective = Objective({'custom': custom_metric}) optimizer = optimizer_cls( - objective=objective, - initial_graphs=initial, - requirements=requirements, - graph_generation_params=graph_generation_params, - graph_optimizer_params=optimizer_parameters) + objective=objective, + initial_graphs=initial, + requirements=requirements, + graph_generation_params=graph_generation_params, + graph_optimizer_params=optimizer_parameters, + use_saved_state=True + ) objective_eval = ObjectiveEvaluate(objective, data=data, visualisation=visualisation) optimized_graphs = optimizer.optimise(objective_eval) @@ -119,5 +121,7 @@ def run_custom_example(optimizer_cls: Type[GraphOptimizer] = EvoGraphOptimizer, visualisation = False timeout = datetime.timedelta(minutes=1) optimizers = [EvoGraphOptimizer, RandomSearchOptimizer, RandomMutationOptimizer] + optimizers = [EvoGraphOptimizer, RandomSearchOptimizer, RandomMutationSearchOptimizer] # SurrogateEachNgenOptimizer + optimizers = [EvoGraphOptimizer] for optimizer_cls in optimizers: run_custom_example(optimizer_cls, timeout, visualisation) diff --git a/golem/core/optimisers/genetic/gp_optimizer.py b/golem/core/optimisers/genetic/gp_optimizer.py index 36bc1db00..dfbb5e084 100644 --- a/golem/core/optimisers/genetic/gp_optimizer.py +++ b/golem/core/optimisers/genetic/gp_optimizer.py @@ -34,35 +34,42 @@ def __init__(self, requirements: GraphRequirements, graph_generation_params: GraphGenerationParams, graph_optimizer_params: GPAlgorithmParameters, + use_saved_state: bool = False, + saved_state_path: str = 'saved_optimisation_state/main/evo_graph_optimiser', + saved_state_file: str = None, **custom_optimizer_params ): - super().__init__(objective, initial_graphs, requirements, - graph_generation_params, graph_optimizer_params, **custom_optimizer_params) - # Define genetic operators - self.regularization = Regularization(graph_optimizer_params, graph_generation_params) - self.selection = Selection(graph_optimizer_params) - self.crossover = Crossover(graph_optimizer_params, requirements, graph_generation_params) - self.mutation = Mutation(graph_optimizer_params, requirements, graph_generation_params) - self.inheritance = Inheritance(graph_optimizer_params, self.selection) - self.elitism = Elitism(graph_optimizer_params) - self.operators = [self.regularization, self.selection, self.crossover, - self.mutation, self.inheritance, self.elitism] - self.reproducer = ReproductionController(graph_optimizer_params, self.selection, self.mutation, self.crossover) - - # Define adaptive parameters - self._pop_size: PopulationSize = init_adaptive_pop_size(graph_optimizer_params, self.generations) - self._operators_prob = init_adaptive_operators_prob(graph_optimizer_params) - self._graph_depth = AdaptiveGraphDepth(self.generations, - start_depth=requirements.start_depth, - max_depth=requirements.max_depth, - max_stagnation_gens=graph_optimizer_params.adaptive_depth_max_stagnation, - adaptive=graph_optimizer_params.adaptive_depth) - - # Define initial parameters - self.requirements.max_depth = self._graph_depth.initial - self.graph_optimizer_params.pop_size = self._pop_size.initial - self.initial_individuals = [Individual(graph, metadata=requirements.static_individual_metadata) - for graph in self.initial_graphs] + super().__init__(objective, initial_graphs, requirements, graph_generation_params, + graph_optimizer_params, use_saved_state, saved_state_path, saved_state_file, + **custom_optimizer_params) + + if not use_saved_state: + # Define genetic operators + self.regularization = Regularization(graph_optimizer_params, graph_generation_params) + self.selection = Selection(graph_optimizer_params) + self.crossover = Crossover(graph_optimizer_params, requirements, graph_generation_params) + self.mutation = Mutation(graph_optimizer_params, requirements, graph_generation_params) + self.inheritance = Inheritance(graph_optimizer_params, self.selection) + self.elitism = Elitism(graph_optimizer_params) + self.operators = [self.regularization, self.selection, self.crossover, + self.mutation, self.inheritance, self.elitism] + self.reproducer = ReproductionController(graph_optimizer_params, self.selection, self.mutation, self.crossover) + + # Define adaptive parameters + self._pop_size: PopulationSize = init_adaptive_pop_size(graph_optimizer_params, self.generations) + self._operators_prob = init_adaptive_operators_prob(graph_optimizer_params) + self._graph_depth = AdaptiveGraphDepth(self.generations, + start_depth=requirements.start_depth, + max_depth=requirements.max_depth, + max_stagnation_gens=graph_optimizer_params.adaptive_depth_max_stagnation, + adaptive=graph_optimizer_params.adaptive_depth) + + # Define initial parameters + self.requirements.max_depth = self._graph_depth.initial + self.graph_optimizer_params.pop_size = self._pop_size.initial + self.initial_individuals = [Individual(graph, metadata=requirements.static_individual_metadata) + for graph in self.initial_graphs] + def _initial_population(self, evaluator: EvaluationOperator): """ Initializes the initial population """ diff --git a/golem/core/optimisers/opt_history_objects/individual.py b/golem/core/optimisers/opt_history_objects/individual.py index f43ae66c1..c42406290 100644 --- a/golem/core/optimisers/opt_history_objects/individual.py +++ b/golem/core/optimisers/opt_history_objects/individual.py @@ -112,7 +112,8 @@ def __repr__(self): f'| graph: {self.graph}>') def __eq__(self, other: Individual): - return self.uid == other.uid + return self.uid == other.uid and self.fitness.value == other.fitness.value and \ + self.native_generation == other.native_generation and self.graph == other.graph def __copy__(self): default_log(self).log_or_raise('warning', INDIVIDUAL_COPY_RESTRICTION_MESSAGE) diff --git a/golem/core/optimisers/optimizer.py b/golem/core/optimisers/optimizer.py index 86adf1a66..476d8d3be 100644 --- a/golem/core/optimisers/optimizer.py +++ b/golem/core/optimisers/optimizer.py @@ -1,3 +1,9 @@ +import glob +import os +import uuid +import dill as pickle +# from deepdiff import DeepDiff + from abc import abstractmethod from dataclasses import dataclass from typing import Any, Callable, Optional, Sequence, Union @@ -113,6 +119,7 @@ def __init__(self, requirements: Optional[OptimizationParameters] = None, graph_generation_params: Optional[GraphGenerationParams] = None, graph_optimizer_params: Optional[AlgorithmParameters] = None, + saved_state_path='saved_optimisation_state/main', **custom_optimizer_params): self.log = default_log(self) self._objective = objective @@ -128,6 +135,8 @@ def __init__(self, # Log random state for reproducibility of runs RandomStateHandler.log_random_state() + self._saved_state_path = saved_state_path + self._run_id = str(uuid.uuid1()) @property def objective(self) -> Objective: """Returns Objective of this optimizer with information about metrics used.""" @@ -161,7 +170,11 @@ def set_evaluation_callback(self, callback: Optional[GraphFunction]): @property def _progressbar(self): if self.requirements.show_progress: - bar = tqdm(total=self.requirements.num_of_generations, desc='Generations', unit='gen', initial=0) + if self.use_saved_state: + bar = tqdm(total=self.requirements.num_of_generations, desc='Generations', unit='gen', + initial=self.current_generation_num - 2) + else: + bar = tqdm(total=self.requirements.num_of_generations, desc='Generations', unit='gen', initial=0) else: # disable call to tqdm.__init__ to avoid stdout/stderr access inside it # part of a workaround for https://github.com/nccr-itmo/FEDOT/issues/765 @@ -169,6 +182,35 @@ def _progressbar(self): return bar + def save(self, saved_state_path): + """ + Method for serializing and saving a class object to a file using the dill library + :param str saved_state_path: full path to the saved state file (including filename) + """ + folder_path = os.path.dirname(os.path.abspath(saved_state_path)) + if not os.path.isdir(folder_path): + os.makedirs(folder_path) + self.log.info(f'Created directory for saving optimization state: {folder_path}') + with open(saved_state_path, 'wb') as f: + pickle.dump(self.__dict__, f, 2) + + def load(self, saved_state_path): + """ + Method for loading a serialized class object from file using the dill library + :param str saved_state_path: full path to the saved state file + """ + with open(saved_state_path, 'rb') as f: + self.__dict__.update(pickle.load(f)) + + + def _find_latest_dir(self, directory: str) -> str: + return max([os.path.join(directory, d) for d in os.listdir(directory) if os.path.isdir( + os.path.join(directory, d))], key=os.path.getmtime) + + def _find_latest_file_in_dir(self, directory: str) -> str: + return max(glob.glob(os.path.join(directory, '*')), key=os.path.getmtime) + + IterationCallback = Callable[[PopulationT, GraphOptimizer], Any] diff --git a/golem/core/optimisers/populational_optimizer.py b/golem/core/optimisers/populational_optimizer.py index 8a8704d9d..91faa0e6b 100644 --- a/golem/core/optimisers/populational_optimizer.py +++ b/golem/core/optimisers/populational_optimizer.py @@ -1,4 +1,7 @@ +import os +import time from abc import abstractmethod +from datetime import timedelta, datetime from random import choice from typing import Any, Optional, Sequence, Dict @@ -13,6 +16,7 @@ from golem.core.optimisers.optimization_parameters import GraphRequirements from golem.core.optimisers.optimizer import GraphGenerationParams, GraphOptimizer, AlgorithmParameters from golem.core.optimisers.timer import OptimisationTimer +from golem.core.paths import default_data_dir from golem.utilities.grouped_condition import GroupedCondition @@ -40,25 +44,76 @@ def __init__(self, requirements: GraphRequirements, graph_generation_params: GraphGenerationParams, graph_optimizer_params: Optional['AlgorithmParameters'] = None, + use_saved_state: bool = False, + saved_state_path: str = 'saved_optimisation_state/main/populational_optimiser', + saved_state_file: str = None, **custom_optimizer_params ): - super().__init__(objective, initial_graphs, requirements, - graph_generation_params, graph_optimizer_params, **custom_optimizer_params) - self.population = None - self.generations = GenerationKeeper(self.objective, keep_n_best=requirements.keep_n_best) - self.timer = OptimisationTimer(timeout=self.requirements.timeout) - dispatcher_type = MultiprocessingDispatcher if self.requirements.parallelization_mode == 'populational' else \ - SequentialDispatcher - - self.eval_dispatcher = dispatcher_type(adapter=graph_generation_params.adapter, - n_jobs=requirements.n_jobs, - graph_cleanup_fn=_try_unfit_graph, - delegate_evaluator=graph_generation_params.remote_evaluator) + super().__init__(objective, initial_graphs, requirements, graph_generation_params, graph_optimizer_params, + saved_state_path, **custom_optimizer_params) + + # Restore state from previous run + if use_saved_state: + self.log.info('USING SAVED STATE') + if saved_state_file: + if os.path.isfile(saved_state_file): + current_saved_state_path = saved_state_file + else: + raise SystemExit('ERROR: Could not restore saved optimisation state: ' + f'given file with saved state {saved_state_file} not found.') + else: + try: + full_state_path = os.path.join(default_data_dir(), self._saved_state_path) + current_saved_state_path = self._find_latest_file_in_dir(self._find_latest_dir(full_state_path)) + except (ValueError, FileNotFoundError): + raise SystemExit('ERROR: Could not restore saved optimisation state: ' + f'path with saved state {full_state_path} not found.') + try: + self.load(current_saved_state_path) + except Exception as e: + raise SystemExit('ERROR: Could not restore saved optimisation state from {full_state_path}.' + f'If saved state file is broken remove it manually from the saved state dir or' + f'pass a valid saved state filepath.' + f'Full error message: {e}') + + # Override optimisation params from the saved state file with new values + self.requirements.num_of_generations = requirements.num_of_generations + self.requirements.timeout = requirements.timeout + + # Update all time parameters + saved_state_timestamp = datetime.fromtimestamp(os.path.getmtime(current_saved_state_path)) + elapsed_time: timedelta = saved_state_timestamp - self.timer.start_time + + timeout = self.requirements.timeout - elapsed_time + self.timer = OptimisationTimer(timeout=timeout) + self.requirements.timeout = self.requirements.timeout - timedelta(seconds=elapsed_time.total_seconds()) + self.eval_dispatcher.timer = self.requirements.timeout + + stag_time_delta = saved_state_timestamp - self.generations._stagnation_start_time + self.generations._stagnation_start_time = datetime.now() - stag_time_delta + else: + self.population = None + self.generations = GenerationKeeper(self.objective, keep_n_best=requirements.keep_n_best) + self.timer = OptimisationTimer(timeout=self.requirements.timeout) + + dispatcher_type = MultiprocessingDispatcher if self.requirements.parallelization_mode == 'populational' else \ + SequentialDispatcher + + self.eval_dispatcher = dispatcher_type(adapter=graph_generation_params.adapter, + n_jobs=requirements.n_jobs, + graph_cleanup_fn=_try_unfit_graph, + delegate_evaluator=graph_generation_params.remote_evaluator) + + # in how many generations structural diversity check should be performed + self.gen_structural_diversity_check = self.graph_optimizer_params.structural_diversity_frequency_check + + self.use_saved_state = use_saved_state # early_stopping_iterations and early_stopping_timeout may be None, so use some obvious max number max_stagnation_length = requirements.early_stopping_iterations or requirements.num_of_generations max_stagnation_time = requirements.early_stopping_timeout or self.timer.timeout + self.stop_optimization = \ GroupedCondition(results_as_message=True).add_condition( lambda: self.timer.is_time_limit_reached(self.current_generation_num - 1), @@ -70,10 +125,10 @@ def __init__(self, ).add_condition( lambda: (max_stagnation_length is not None and self.generations.stagnation_iter_count >= max_stagnation_length), - 'Optimisation finished: Early stopping iterations criteria was satisfied' + 'Optimisation finished: Early stopping iterations criteria was satisfied (stagnation_iter_count)' ).add_condition( lambda: self.generations.stagnation_time_duration >= max_stagnation_time, - 'Optimisation finished: Early stopping timeout criteria was satisfied' + 'Optimisation finished: Early stopping timeout criteria was satisfied (stagnation_time_duration)' ) # in how many generations structural diversity check should be performed self.gen_structural_diversity_check = self.graph_optimizer_params.structural_diversity_frequency_check @@ -86,14 +141,16 @@ def set_evaluation_callback(self, callback: Optional[GraphFunction]): # Redirect callback to evaluation dispatcher self.eval_dispatcher.set_graph_evaluation_callback(callback) - def optimise(self, objective: ObjectiveFunction) -> Sequence[Graph]: + def optimise(self, objective: ObjectiveFunction, save_state_delta: int = 60) -> Sequence[Graph]: - # eval_dispatcher defines how to evaluate objective on the whole population + saved_state_path = os.path.join(default_data_dir(), self._saved_state_path, self._run_id) evaluator = self.eval_dispatcher.dispatch(objective, self.timer) + last_write_time = datetime.now() with self.timer, self._progressbar as pbar: - self._initial_population(evaluator) + if not self.use_saved_state: + self._initial_population(evaluator) while not self.stop_optimization(): try: @@ -108,7 +165,17 @@ def optimise(self, objective: ObjectiveFunction) -> Sequence[Graph]: break # Adding of new population to history self._update_population(new_population) + delta = datetime.now() - last_write_time + # Create new file with saved state every {save_state_delta} seconds + if delta.seconds >= save_state_delta: + save_path = os.path.join(saved_state_path, f'{str(round(time.time()))}.pkl') + self.save(save_path) + self.log.info(f'State saved to {save_path}') + last_write_time = datetime.now() pbar.close() + save_path = os.path.join(saved_state_path, f'{str(round(time.time()))}.pkl') + self.save(save_path) + self.log.info(save_path) self._update_population(self.best_individuals, 'final_choices') return [ind.graph for ind in self.best_individuals] diff --git a/test/integration/test_saved_state.py b/test/integration/test_saved_state.py new file mode 100644 index 000000000..6325b04fd --- /dev/null +++ b/test/integration/test_saved_state.py @@ -0,0 +1,86 @@ +import os +import time +import glob + +from datetime import timedelta +from examples.synthetic_graph_evolution.generators import generate_labeled_graph +from golem.core.adapter.nx_adapter import BaseNetworkxAdapter +from golem.core.optimisers.genetic.gp_params import GPAlgorithmParameters +from golem.core.optimisers.genetic.gp_optimizer import EvoGraphOptimizer +from golem.core.optimisers.objective import Objective +from golem.core.optimisers.optimization_parameters import GraphRequirements +from golem.core.optimisers.optimizer import GraphGenerationParams +from golem.core.paths import default_data_dir +from golem.metrics.edit_distance import tree_edit_dist +from functools import partial + + +def find_latest_file_in_dir(directory: str) -> str: + return max(glob.glob(os.path.join(directory, '*')), key=os.path.getmtime) + +def test_saved_state(): + # Set params + size = 16 + num_of_generations_run_1 = 40 # 40 100 10 + num_of_generations_run_2 = 45 # 45 120 12 + timeout = 10 + saved_state_path = 'saved_optimisation_state/test' + + # Generate target graph sought by optimizer using edit distance objective + node_types = ('a', 'b') # Available node types that can appear in graphs + target_graph = generate_labeled_graph('tree', size, node_types) + objective = Objective(partial(tree_edit_dist, target_graph)) + initial_population = [generate_labeled_graph('tree', 5, node_types) for _ in range(10)] + + # Setup optimization parameters + requirements_run_1 = GraphRequirements(timeout=timedelta(minutes=timeout), + num_of_generations=num_of_generations_run_1) + requirements_run_2 = GraphRequirements(timeout=timedelta(minutes=timeout), + num_of_generations=num_of_generations_run_2) + + gen_params = GraphGenerationParams(adapter=BaseNetworkxAdapter(), available_node_types=node_types) + algo_params = GPAlgorithmParameters(pop_size=30) + + # Build and run the optimizer to create a saved state file + optimiser1 = EvoGraphOptimizer(objective, initial_population, requirements_run_1, gen_params, algo_params, + saved_state_path=saved_state_path) + st = time.time() + optimiser1.optimise(objective, save_state_delta=1) + et = time.time() + time1 = int(et - st) / 60 + + # Check that the file with saved state was created + saved_state_full_path = os.path.join(default_data_dir(), saved_state_path, optimiser1._run_id) + saved_state_file = find_latest_file_in_dir(saved_state_full_path) + assert os.path.isfile(saved_state_file) is True, 'ERROR: Saved state file was not created!' + + # Create the optimizer to check that the saved state was used + optimiser2 = EvoGraphOptimizer(objective, initial_population, requirements_run_2, gen_params, algo_params, + use_saved_state=True, saved_state_path=saved_state_path) + + # Check that the restored object has the same main parameters as the original or at least the params are not empty + assert optimiser1.current_generation_num == optimiser2.current_generation_num + 1, \ + f'ERROR: Restored object field \'current_generation_num\' has wrong value: {optimiser2.current_generation_num}' + assert optimiser1.generations.stagnation_iter_count == optimiser2.generations.stagnation_iter_count + 1, \ + f'ERROR: Restored object field \'generations.stagnation_iter_count\' has wrong value: ' \ + f'{optimiser2.generations.stagnation_iter_count}' + assert optimiser1.best_individuals != [], f'ERROR: Restored object field \'best_individuals\' is empty' + assert optimiser1.population is not None, f'ERROR: Restored object field \'population\' is empty' + assert optimiser1.timer.timeout > optimiser2.timer.timeout, 'ERROR: timeout was not adjusted correctly' + + st = time.time() + optimiser2.optimise(objective) + et = time.time() + time2 = int(et - st) / 60 + + # Make sure the second run made it to the end + assert optimiser2.current_generation_num == num_of_generations_run_2 + 2 + + print(time1) + print(time2) + # Check that the run with the saved state takes less time than it would without it + assert time1 > 2 + assert time2 < 1 + + # Check that the result of the second optimisation is the same as or better than the first + assert optimiser2.best_individuals[0].fitness.value <= optimiser1.best_individuals[0].fitness.value From fac36681a0ead14129804ad1c915687195d815b1 Mon Sep 17 00:00:00 2001 From: Nunkyl Date: Thu, 21 Mar 2024 00:56:10 +0300 Subject: [PATCH 2/6] Minor fix --- golem/core/optimisers/optimizer.py | 1 - test/integration/test_saved_state.py | 4 ++-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/golem/core/optimisers/optimizer.py b/golem/core/optimisers/optimizer.py index 476d8d3be..1bd762199 100644 --- a/golem/core/optimisers/optimizer.py +++ b/golem/core/optimisers/optimizer.py @@ -2,7 +2,6 @@ import os import uuid import dill as pickle -# from deepdiff import DeepDiff from abc import abstractmethod from dataclasses import dataclass diff --git a/test/integration/test_saved_state.py b/test/integration/test_saved_state.py index 6325b04fd..60e781e95 100644 --- a/test/integration/test_saved_state.py +++ b/test/integration/test_saved_state.py @@ -21,8 +21,8 @@ def find_latest_file_in_dir(directory: str) -> str: def test_saved_state(): # Set params size = 16 - num_of_generations_run_1 = 40 # 40 100 10 - num_of_generations_run_2 = 45 # 45 120 12 + num_of_generations_run_1 = 40 + num_of_generations_run_2 = 45 timeout = 10 saved_state_path = 'saved_optimisation_state/test' From e063932e7dfc507bcc12c964ea13741ed79a1eec Mon Sep 17 00:00:00 2001 From: Nunkyl Date: Thu, 21 Mar 2024 11:46:29 +0300 Subject: [PATCH 3/6] Fix comments --- examples/graph_model_optimization.py | 14 +++++--------- .../optimisers/opt_history_objects/individual.py | 3 +-- test/integration/test_saved_state.py | 2 -- 3 files changed, 6 insertions(+), 13 deletions(-) diff --git a/examples/graph_model_optimization.py b/examples/graph_model_optimization.py index 52b2e41d1..ef4b9d5f9 100644 --- a/examples/graph_model_optimization.py +++ b/examples/graph_model_optimization.py @@ -102,13 +102,11 @@ def run_custom_example(optimizer_cls: Type[GraphOptimizer] = EvoGraphOptimizer, objective = Objective({'custom': custom_metric}) optimizer = optimizer_cls( - objective=objective, - initial_graphs=initial, - requirements=requirements, - graph_generation_params=graph_generation_params, - graph_optimizer_params=optimizer_parameters, - use_saved_state=True - ) + objective=objective, + initial_graphs=initial, + requirements=requirements, + graph_generation_params=graph_generation_params, + graph_optimizer_params=optimizer_parameters) objective_eval = ObjectiveEvaluate(objective, data=data, visualisation=visualisation) optimized_graphs = optimizer.optimise(objective_eval) @@ -121,7 +119,5 @@ def run_custom_example(optimizer_cls: Type[GraphOptimizer] = EvoGraphOptimizer, visualisation = False timeout = datetime.timedelta(minutes=1) optimizers = [EvoGraphOptimizer, RandomSearchOptimizer, RandomMutationOptimizer] - optimizers = [EvoGraphOptimizer, RandomSearchOptimizer, RandomMutationSearchOptimizer] # SurrogateEachNgenOptimizer - optimizers = [EvoGraphOptimizer] for optimizer_cls in optimizers: run_custom_example(optimizer_cls, timeout, visualisation) diff --git a/golem/core/optimisers/opt_history_objects/individual.py b/golem/core/optimisers/opt_history_objects/individual.py index c42406290..f43ae66c1 100644 --- a/golem/core/optimisers/opt_history_objects/individual.py +++ b/golem/core/optimisers/opt_history_objects/individual.py @@ -112,8 +112,7 @@ def __repr__(self): f'| graph: {self.graph}>') def __eq__(self, other: Individual): - return self.uid == other.uid and self.fitness.value == other.fitness.value and \ - self.native_generation == other.native_generation and self.graph == other.graph + return self.uid == other.uid def __copy__(self): default_log(self).log_or_raise('warning', INDIVIDUAL_COPY_RESTRICTION_MESSAGE) diff --git a/test/integration/test_saved_state.py b/test/integration/test_saved_state.py index 60e781e95..318e31437 100644 --- a/test/integration/test_saved_state.py +++ b/test/integration/test_saved_state.py @@ -76,8 +76,6 @@ def test_saved_state(): # Make sure the second run made it to the end assert optimiser2.current_generation_num == num_of_generations_run_2 + 2 - print(time1) - print(time2) # Check that the run with the saved state takes less time than it would without it assert time1 > 2 assert time2 < 1 From 62149feddc809b4fb4dee3d8d8757c51905f62f1 Mon Sep 17 00:00:00 2001 From: Nunkyl Date: Thu, 21 Mar 2024 12:24:49 +0300 Subject: [PATCH 4/6] Fix some pep8 issues --- golem/core/optimisers/genetic/gp_optimizer.py | 4 ++-- golem/core/optimisers/optimizer.py | 3 +-- golem/core/optimisers/populational_optimizer.py | 4 ++-- test/integration/test_saved_state.py | 11 ++++++----- 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/golem/core/optimisers/genetic/gp_optimizer.py b/golem/core/optimisers/genetic/gp_optimizer.py index dfbb5e084..6b28c5c28 100644 --- a/golem/core/optimisers/genetic/gp_optimizer.py +++ b/golem/core/optimisers/genetic/gp_optimizer.py @@ -53,7 +53,8 @@ def __init__(self, self.elitism = Elitism(graph_optimizer_params) self.operators = [self.regularization, self.selection, self.crossover, self.mutation, self.inheritance, self.elitism] - self.reproducer = ReproductionController(graph_optimizer_params, self.selection, self.mutation, self.crossover) + self.reproducer = ReproductionController(graph_optimizer_params, self.selection, self.mutation, + self.crossover) # Define adaptive parameters self._pop_size: PopulationSize = init_adaptive_pop_size(graph_optimizer_params, self.generations) @@ -70,7 +71,6 @@ def __init__(self, self.initial_individuals = [Individual(graph, metadata=requirements.static_individual_metadata) for graph in self.initial_graphs] - def _initial_population(self, evaluator: EvaluationOperator): """ Initializes the initial population """ # Adding of initial assumptions to history as zero generation diff --git a/golem/core/optimisers/optimizer.py b/golem/core/optimisers/optimizer.py index 1bd762199..b698a6253 100644 --- a/golem/core/optimisers/optimizer.py +++ b/golem/core/optimisers/optimizer.py @@ -136,6 +136,7 @@ def __init__(self, self._saved_state_path = saved_state_path self._run_id = str(uuid.uuid1()) + @property def objective(self) -> Objective: """Returns Objective of this optimizer with information about metrics used.""" @@ -180,7 +181,6 @@ def _progressbar(self): bar = EmptyProgressBar() return bar - def save(self, saved_state_path): """ Method for serializing and saving a class object to a file using the dill library @@ -201,7 +201,6 @@ def load(self, saved_state_path): with open(saved_state_path, 'rb') as f: self.__dict__.update(pickle.load(f)) - def _find_latest_dir(self, directory: str) -> str: return max([os.path.join(directory, d) for d in os.listdir(directory) if os.path.isdir( os.path.join(directory, d))], key=os.path.getmtime) diff --git a/golem/core/optimisers/populational_optimizer.py b/golem/core/optimisers/populational_optimizer.py index 91faa0e6b..5fb98a19c 100644 --- a/golem/core/optimisers/populational_optimizer.py +++ b/golem/core/optimisers/populational_optimizer.py @@ -97,8 +97,8 @@ def __init__(self, self.generations = GenerationKeeper(self.objective, keep_n_best=requirements.keep_n_best) self.timer = OptimisationTimer(timeout=self.requirements.timeout) - dispatcher_type = MultiprocessingDispatcher if self.requirements.parallelization_mode == 'populational' else \ - SequentialDispatcher + dispatcher_type = MultiprocessingDispatcher if self.requirements.parallelization_mode == 'populational' \ + else SequentialDispatcher self.eval_dispatcher = dispatcher_type(adapter=graph_generation_params.adapter, n_jobs=requirements.n_jobs, diff --git a/test/integration/test_saved_state.py b/test/integration/test_saved_state.py index 318e31437..c6f4b29dd 100644 --- a/test/integration/test_saved_state.py +++ b/test/integration/test_saved_state.py @@ -18,6 +18,7 @@ def find_latest_file_in_dir(directory: str) -> str: return max(glob.glob(os.path.join(directory, '*')), key=os.path.getmtime) + def test_saved_state(): # Set params size = 16 @@ -34,7 +35,7 @@ def test_saved_state(): # Setup optimization parameters requirements_run_1 = GraphRequirements(timeout=timedelta(minutes=timeout), - num_of_generations=num_of_generations_run_1) + num_of_generations=num_of_generations_run_1) requirements_run_2 = GraphRequirements(timeout=timedelta(minutes=timeout), num_of_generations=num_of_generations_run_2) @@ -43,7 +44,7 @@ def test_saved_state(): # Build and run the optimizer to create a saved state file optimiser1 = EvoGraphOptimizer(objective, initial_population, requirements_run_1, gen_params, algo_params, - saved_state_path=saved_state_path) + saved_state_path=saved_state_path) st = time.time() optimiser1.optimise(objective, save_state_delta=1) et = time.time() @@ -56,7 +57,7 @@ def test_saved_state(): # Create the optimizer to check that the saved state was used optimiser2 = EvoGraphOptimizer(objective, initial_population, requirements_run_2, gen_params, algo_params, - use_saved_state=True, saved_state_path=saved_state_path) + use_saved_state=True, saved_state_path=saved_state_path) # Check that the restored object has the same main parameters as the original or at least the params are not empty assert optimiser1.current_generation_num == optimiser2.current_generation_num + 1, \ @@ -64,8 +65,8 @@ def test_saved_state(): assert optimiser1.generations.stagnation_iter_count == optimiser2.generations.stagnation_iter_count + 1, \ f'ERROR: Restored object field \'generations.stagnation_iter_count\' has wrong value: ' \ f'{optimiser2.generations.stagnation_iter_count}' - assert optimiser1.best_individuals != [], f'ERROR: Restored object field \'best_individuals\' is empty' - assert optimiser1.population is not None, f'ERROR: Restored object field \'population\' is empty' + assert optimiser1.best_individuals != [], 'ERROR: Restored object field \'best_individuals\' is empty' + assert optimiser1.population is not None, 'ERROR: Restored object field \'population\' is empty' assert optimiser1.timer.timeout > optimiser2.timer.timeout, 'ERROR: timeout was not adjusted correctly' st = time.time() From 12c358d9cd5c5bb004578a4daec269fb9e1b9425 Mon Sep 17 00:00:00 2001 From: Nunkyl Date: Fri, 22 Mar 2024 14:31:14 +0300 Subject: [PATCH 5/6] Delete extra saved state files --- golem/core/optimisers/populational_optimizer.py | 13 ++++++++++--- test/integration/test_saved_state.py | 5 ++--- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/golem/core/optimisers/populational_optimizer.py b/golem/core/optimisers/populational_optimizer.py index 5fb98a19c..6b770bdf7 100644 --- a/golem/core/optimisers/populational_optimizer.py +++ b/golem/core/optimisers/populational_optimizer.py @@ -172,13 +172,20 @@ def optimise(self, objective: ObjectiveFunction, save_state_delta: int = 60) -> self.save(save_path) self.log.info(f'State saved to {save_path}') last_write_time = datetime.now() + self._clean_up_old_saved_state_files(save_path) pbar.close() - save_path = os.path.join(saved_state_path, f'{str(round(time.time()))}.pkl') - self.save(save_path) - self.log.info(save_path) self._update_population(self.best_individuals, 'final_choices') return [ind.graph for ind in self.best_individuals] + @staticmethod + def _clean_up_old_saved_state_files(last_saved_state_path: str): + folder_path = os.path.dirname(os.path.abspath(last_saved_state_path)) + filename = os.path.basename(os.path.abspath(last_saved_state_path)) + for file in os.listdir(folder_path): + if file != filename: + os.remove(os.path.join(folder_path, file)) + + @property def best_individuals(self): return self.generations.best_individuals diff --git a/test/integration/test_saved_state.py b/test/integration/test_saved_state.py index c6f4b29dd..aad3bf725 100644 --- a/test/integration/test_saved_state.py +++ b/test/integration/test_saved_state.py @@ -52,8 +52,7 @@ def test_saved_state(): # Check that the file with saved state was created saved_state_full_path = os.path.join(default_data_dir(), saved_state_path, optimiser1._run_id) - saved_state_file = find_latest_file_in_dir(saved_state_full_path) - assert os.path.isfile(saved_state_file) is True, 'ERROR: Saved state file was not created!' + assert len(os.listdir(saved_state_full_path)) == 1, 'ERROR: Wrong number of saved state files!' # Create the optimizer to check that the saved state was used optimiser2 = EvoGraphOptimizer(objective, initial_population, requirements_run_2, gen_params, algo_params, @@ -70,7 +69,7 @@ def test_saved_state(): assert optimiser1.timer.timeout > optimiser2.timer.timeout, 'ERROR: timeout was not adjusted correctly' st = time.time() - optimiser2.optimise(objective) + optimiser2.optimise(objective, save_state_delta=0) et = time.time() time2 = int(et - st) / 60 From bc97b6212448a755e6d8a7cd4c422578ba1b6888 Mon Sep 17 00:00:00 2001 From: Nunkyl Date: Fri, 22 Mar 2024 14:32:51 +0300 Subject: [PATCH 6/6] Fix pep8 issue --- golem/core/optimisers/populational_optimizer.py | 1 - 1 file changed, 1 deletion(-) diff --git a/golem/core/optimisers/populational_optimizer.py b/golem/core/optimisers/populational_optimizer.py index 6b770bdf7..21b14330b 100644 --- a/golem/core/optimisers/populational_optimizer.py +++ b/golem/core/optimisers/populational_optimizer.py @@ -185,7 +185,6 @@ def _clean_up_old_saved_state_files(last_saved_state_path: str): if file != filename: os.remove(os.path.join(folder_path, file)) - @property def best_individuals(self): return self.generations.best_individuals