From 83b86ca39f56d12ab76e112ab7d648a33c84aa10 Mon Sep 17 00:00:00 2001 From: LisIva Date: Fri, 15 Mar 2024 16:40:39 +0300 Subject: [PATCH] Change cross-over for symnet implementation --- epde/globals.py | 10 +-- epde/interface/interface.py | 18 ++--- epde/interface/token_family.py | 76 +++++----------------- epde/operators/multiobjective/variation.py | 56 ++++++---------- epde/structure/main_structures.py | 5 +- exscripts/experiment_burgers_sindy.py | 6 +- 6 files changed, 54 insertions(+), 117 deletions(-) diff --git a/epde/globals.py b/epde/globals.py index 414e192..dc94298 100644 --- a/epde/globals.py +++ b/epde/globals.py @@ -10,7 +10,7 @@ import warnings from epde.cache.cache import Cache -# from symnet import pool_terms +from symnet import pool_terms def init_caches(set_grids: bool = False): @@ -98,7 +98,7 @@ def init_verbose(plot_DE_solutions : bool = False, show_iter_idx : bool = True, show_iter_stats, show_warnings) -# def init_symnet_pool(max_factors_in_term, families, u, derivs, shape, names, grids, max_deriv_order): -# global sympool -# sympool = pool_terms.PoolTerms(max_factors_in_term, families) -# sympool.set_initial_distr(u, derivs, shape, names, families=families, grids=grids, max_deriv_order=max_deriv_order) +def init_symnet_pool(max_factors_in_term, families, u, derivs, shape, names, grids, max_deriv_order): + global sympool + sympool = pool_terms.PoolTerms(max_factors_in_term, families) + sympool.set_initial_distr(u, derivs, shape, names, families=families, grids=grids, max_deriv_order=max_deriv_order) diff --git a/epde/interface/interface.py b/epde/interface/interface.py index 68b3725..31e83bd 100644 --- a/epde/interface/interface.py +++ b/epde/interface/interface.py @@ -503,9 +503,7 @@ def set_preprocessor(self, preprocessor_pipeline: PreprocessingPipe = None, def create_pool(self, data: Union[np.ndarray, list, tuple], variable_names=['u',], derivs=None, max_deriv_order=1, additional_tokens=[], - data_fun_pow: int = 1, deriv_fun_pow: int = 1, max_factors_in_term=1, - custom_cross_prob: dict = {}): - + data_fun_pow: int = 1, max_factors_in_term=1): self.pool_params = {'variable_names' : variable_names, 'max_deriv_order' : max_deriv_order, 'additional_tokens' : [family.token_family.ftype for family in additional_tokens]} assert (isinstance(derivs, list) and isinstance(derivs[0], np.ndarray)) or derivs is None @@ -553,13 +551,12 @@ def create_pool(self, data: Union[np.ndarray, list, tuple], variable_names=['u', print(isinstance(additional_tokens, PreparedTokens)) raise TypeError(f'Incorrect type of additional tokens: expected list or TokenFamily/Prepared_tokens - obj, instead got {type(additional_tokens)}') self.pool = TFPool(data_tokens + [tf if isinstance(tf, TokenFamily) else tf.token_family - for tf in additional_tokens],custom_cross_prob=custom_cross_prob, - max_factors_in_term=max_factors_in_term) + for tf in additional_tokens]) - # grids = [self.cache[0].memory_default.get('0'), self.cache[0].memory_default.get('1')] - # global_var.init_symnet_pool(max_factors_in_term, self.pool.families, - # entry.data_tensor, entry.derivatives, entry.data_tensor.shape, entry.names, - # grids=grids, max_deriv_order=max_deriv_order) + grids = [self.cache[0].memory_default.get('0'), self.cache[0].memory_default.get('1')] + global_var.init_symnet_pool(max_factors_in_term, self.pool.families, + entry.data_tensor, entry.derivatives, entry.data_tensor.shape, entry.names, + grids=grids, max_deriv_order=max_deriv_order) print(f'The cardinality of defined token pool is {self.pool.families_cardinality()}') print(f'Among them, the pool contains {self.pool.families_cardinality(meaningful_only=True)}') @@ -580,7 +577,7 @@ def saved_derivaties(self): def fit(self, data: Union[np.ndarray, list, tuple], equation_terms_max_number=6, equation_factors_max_number=1, variable_names=['u',], eq_sparsity_interval=(1e-4, 2.5), - derivs=None, max_deriv_order=1, additional_tokens=[], data_fun_pow: int = 1, custom_cross_prob : dict = {}): + derivs=None, max_deriv_order=1, additional_tokens=[], data_fun_pow: int = 1): """ Fit epde search algorithm to obtain differential equations, describing passed data. @@ -637,7 +634,6 @@ def fit(self, data: Union[np.ndarray, list, tuple], equation_terms_max_number=6, derivs=derivs, max_deriv_order=max_deriv_order, additional_tokens=additional_tokens, data_fun_pow=data_fun_pow, - custom_cross_prob=custom_cross_prob, max_factors_in_term=equation_factors_max_number) self.optimizer_init_params['population_instruct'] = {"pool": self.pool, "terms_number": equation_terms_max_number, diff --git a/epde/interface/token_family.py b/epde/interface/token_family.py index 4183590..7dd5145 100644 --- a/epde/interface/token_family.py +++ b/epde/interface/token_family.py @@ -19,6 +19,7 @@ import seaborn as sns import matplotlib.pyplot as plt +from symnet.initcoefficients import get_csym_tsym from sympy import Symbol, Mul def constancy_hard_equality(tensor, epsilon=1e-7): @@ -430,54 +431,6 @@ def evaluate_all(self): raise KeyError('Generated token somehow was not stored in cache.') -class CustomProbInfo: - def __init__(self, pool): - self.pool = pool - - token_ls = [] - for family in self.pool.families: - token_ls += family.tokens - term_ls = [] - for i in range(1, self.pool.max_factors_in_term + 1): - term_ls += list(itertools.combinations(token_ls, i)) - self.pool_terms = self.cast_to_symbols(term_ls) - self.cross_distr = self.get_cross_distr(self.pool.custom_cross_prob) - - @staticmethod - def get_cross_distr(custom_cross_prob, min_pr=0.2, max_pr=0.7): # 0.15 0.75 for burg sindy 9 0.2-0.8 bo - mmf = 2.4 - values = list(custom_cross_prob.values()) - - csym_arr = np.fabs(np.array(values)) - - min_max_coeff = np.max(csym_arr) - mmf * np.min(csym_arr) - smoothing_factor = min_max_coeff / (min_max_coeff + (mmf - 1) * np.average(csym_arr)) - - uniform_csym = np.array([np.sum(csym_arr) / len(csym_arr)] * len(csym_arr)) - smoothed_array = (1 - smoothing_factor) * csym_arr + smoothing_factor * uniform_csym - - # Dominator's distribution - # final_probabilities = (np.max(smoothed_array) - smoothed_array) / \ - # (np.max(smoothed_array) - np.min(smoothed_array)) * \ - # (max_pr-min_pr) + min_pr - - final_probabilities = (smoothed_array - np.min(smoothed_array)) / \ - (np.max(smoothed_array) - np.min(smoothed_array)) * \ - (max_pr-min_pr) + min_pr - cross_dict = dict(zip(custom_cross_prob.keys(), final_probabilities)) - return cross_dict - - - @staticmethod - def cast_to_symbols(pool_names: list[tuple[str]]): - - pool_ls = [] - for name in pool_names: - term_symbolic = list(map(lambda u: Symbol(u), name)) - pool_ls.append(Mul(*term_symbolic)) - return pool_ls - - class TFPool(object): """ Class stored pool for token families @@ -487,20 +440,25 @@ class TFPool(object): """ distribution_ls = [] - def __init__(self, families: list, stored_pool=None, custom_cross_prob : dict = {}, max_factors_in_term: int = 1): + def __init__(self, families: list, stored_pool=None): if stored_pool is not None: self = pickle.load(stored_pool) self.families = families - self.custom_cross_prob = custom_cross_prob - self.max_factors_in_term = max_factors_in_term - self.cross_prob_distr = CustomProbInfo(self).cross_distr - @staticmethod - def compile_term(term): - complete_term = [] - for factor in term.structure: - complete_term.append(factor.label) - return tuple(complete_term) + # def show_distrib(self, plot_distrib=False): + # for item in range(len(self.prob_info.term_set_hashed)): + # indexes = np.where(np.array(self.distribution_ls) == item)[0] + # print(f'{self.prob_info.distribution_dict_idx[item]}: {len(indexes)}') + # + # if plot_distrib: + # sublist = self.distribution_ls + # fig, ax1 = plt.subplots() + # sns.kdeplot(sublist, ax=ax1) + # ax1.set_xlim(min(sublist), max(sublist)) + # ax2 = ax1.twinx() + # sns.histplot(sublist, ax=ax2, bins=len(self.prob_info.term_set_hashed)) # discrete=True) + # plt.grid() + # plt.show() # def update_distribution(self, term): # @@ -631,7 +589,7 @@ def create_from_family(self, family_label: str, token_status=None, **kwargs): return family.create(label=None, token_status=token_status, **kwargs) def __add__(self, other): - return TFPool(families=self.families + other.families, custom_cross_prob=self.custom_cross_prob, max_factors_in_term=self.max_factors_in_term) + return TFPool(families=self.families + other.families) def __len__(self): return len(self.families) diff --git a/epde/operators/multiobjective/variation.py b/epde/operators/multiobjective/variation.py index 2ec23ab..5bbd100 100644 --- a/epde/operators/multiobjective/variation.py +++ b/epde/operators/multiobjective/variation.py @@ -23,6 +23,8 @@ from epde.operators.multiobjective.moeadd_specific import get_basic_populator_updater from epde.operators.multiobjective.mutations import get_basic_mutation from sympy import Mul, Symbol +import epde.globals as global_var +from symnet.preproc_output import get_cross_distr, to_symbolic class ParetoLevelsCrossover(CompoundOperator): @@ -109,8 +111,6 @@ def apply(self, objective : tuple, arguments : dict): temp_eq_1, temp_eq_2 = self.suboperators['equation_crossover'].apply(objective = (objective[0].vals[eq_key], objective[1].vals[eq_key]), arguments = subop_args['equation_crossover']) - # except TypeError: - # pass objective[0].vals.replace_gene(gene_key = eq_key, value = temp_eq_1) offspring_2.vals.replace_gene(gene_key = eq_key, value = temp_eq_2) @@ -184,54 +184,36 @@ def apply(self, objective : tuple, arguments : dict): eq2_distr = self.get_equation_cross_distr(objective[1], start_idx) - # for i in range(same_num + similar_num, len(objective[0].structure)): - # if check_uniqueness(objective[0].structure[i], objective[1].structure) and check_uniqueness(objective[1].structure[i], objective[0].structure): - # objective[0].structure[i], objective[1].structure[i] = self.suboperators['term_crossover'].apply(objective = (objective[0].structure[i], - # objective[1].structure[i]), - # arguments = subop_args['term_crossover']) + for i in range(same_num + similar_num, len(objective[0].structure)): + if check_uniqueness(objective[0].structure[i], objective[1].structure) and check_uniqueness(objective[1].structure[i], objective[0].structure): + objective[0].structure[i], objective[1].structure[i] = self.suboperators['term_crossover'].apply(objective = (objective[0].structure[i], + objective[1].structure[i]), + arguments = subop_args['term_crossover']) + # term1 = objective[1].structure[0] + # term2 = objective[0].structure[i] + # else: + # print("Uniqueness in equation 0:") + # print(check_uniqueness(objective[0].structure[i], objective[1].structure)) + # + # print("Term is not unique in equation:") + return objective[0], objective[1] def use_default_tags(self): self._tags = {'crossover', 'gene level', 'contains suboperators', 'standard'} - @staticmethod - def to_symbolic(term): - if type(term.cache_label[0]) == tuple: - labels = [] - for label in term.cache_label: - labels.append(str(label[0])) - symlabels = list(map(lambda token: Symbol(token), labels)) - return Mul(*symlabels) - else: - return Symbol(str(term.cache_label[0])) def get_equation_cross_distr(self, equation, start_idx): importance_coeffs = {} for i in range(start_idx, len(equation.structure)): - sym_term = self.to_symbolic(equation.structure[i]) - importance_coeffs[sym_term] = equation.pool.custom_cross_prob.get(sym_term) - cross_distr = self.get_cross_distr(importance_coeffs, start_idx, len(equation.structure)) + sym_term = to_symbolic(equation.structure[i]) + importance_coeffs[sym_term] = global_var.sympool.pool_dict.get(sym_term) + # importance_coeffs[sym_term] = global_var.sympool.pool_sym_dict.get(sym_term) + cross_distr = get_cross_distr(importance_coeffs, start_idx, len(equation.structure)) return cross_distr - @staticmethod - def get_cross_distr(custom_cross_prob, start_idx, end_idx_exclude): - mmf = 2.4 - values = list(custom_cross_prob.values()) - csym_arr = np.fabs(np.array(values)) - - if np.max(csym_arr) / np.min(csym_arr) > 2.6: - min_max_coeff = mmf * np.min(csym_arr) - np.max(csym_arr) - smoothing_factor = min_max_coeff / (min_max_coeff - (mmf - 1) * np.average(csym_arr)) - uniform_csym = np.array([np.sum(csym_arr) / len(csym_arr)] * len(csym_arr)) - - smoothed_array = (1 - smoothing_factor) * csym_arr + smoothing_factor * uniform_csym - inv = 1 / smoothed_array - else: - inv = 1 / csym_arr - inv_norm = inv / np.sum(inv) - return dict(zip([i for i in range(start_idx, end_idx_exclude)], inv_norm.tolist())) class EquationExchangeCrossover(CompoundOperator): key = 'EquationExchangeCrossover' diff --git a/epde/structure/main_structures.py b/epde/structure/main_structures.py index cc5de9b..b4aa3f7 100644 --- a/epde/structure/main_structures.py +++ b/epde/structure/main_structures.py @@ -30,6 +30,7 @@ import seaborn as sns import matplotlib.pyplot as plt +from symnet.pool_terms import to_symbolic class Term(ComplexStructure): @@ -792,8 +793,8 @@ def __init__(self, pool: TFPool, metaparameters: dict): check_metaparameters(metaparameters) self.metaparameters = metaparameters - self.tokens_for_eq = TFPool(pool.families_demand_equation, custom_cross_prob=pool.custom_cross_prob, max_factors_in_term=pool.max_factors_in_term) - self.tokens_supp = TFPool(pool.families_equationless, custom_cross_prob=pool.custom_cross_prob, max_factors_in_term=pool.max_factors_in_term) + self.tokens_for_eq = TFPool(pool.families_demand_equation) + self.tokens_supp = TFPool(pool.families_equationless) self.moeadd_set = False self.vars_to_describe = [token_family.ftype for token_family in self.tokens_for_eq.families] # Made list from set diff --git a/exscripts/experiment_burgers_sindy.py b/exscripts/experiment_burgers_sindy.py index a287705..5ad168e 100644 --- a/exscripts/experiment_burgers_sindy.py +++ b/exscripts/experiment_burgers_sindy.py @@ -97,9 +97,9 @@ def hash_term(term): grids = np.meshgrid(t, x, indexing='ij') ''' Parameters of the experiment ''' - write_csv = True + write_csv = False print_results = True - max_iter_number = 2000 + max_iter_number = 10 title = f'dfs0_{max_iter_number}_simstart2' terms = [('u',), ('du/dx1',), ('du/dx2',), ('d^2u/dx2^2',), ('u', 'du/dx1'), ('u', 'du/dx2'), ('u', 'd^2u/dx2^2'), @@ -135,7 +135,7 @@ def hash_term(term): try: epde_search_obj.fit(data=u, max_deriv_order=(1, 2), equation_terms_max_number=3, equation_factors_max_number=2, - eq_sparsity_interval=(1e-08, 1e-1), custom_cross_prob=cross_distr) + eq_sparsity_interval=(1e-08, 1e-1)) except Exception as e: logging.error(traceback.format_exc()) population_error += 1