From 83b86ca39f56d12ab76e112ab7d648a33c84aa10 Mon Sep 17 00:00:00 2001
From: LisIva <lizavvveta99@gmail.com>
Date: Fri, 15 Mar 2024 16:40:39 +0300
Subject: [PATCH] Change cross-over for symnet implementation

---
 epde/globals.py                            | 10 +--
 epde/interface/interface.py                | 18 ++---
 epde/interface/token_family.py             | 76 +++++-----------------
 epde/operators/multiobjective/variation.py | 56 ++++++----------
 epde/structure/main_structures.py          |  5 +-
 exscripts/experiment_burgers_sindy.py      |  6 +-
 6 files changed, 54 insertions(+), 117 deletions(-)

diff --git a/epde/globals.py b/epde/globals.py
index 414e192..dc94298 100644
--- a/epde/globals.py
+++ b/epde/globals.py
@@ -10,7 +10,7 @@
 import warnings
 
 from epde.cache.cache import Cache
-# from symnet import pool_terms
+from symnet import pool_terms
 
 
 def init_caches(set_grids: bool = False):
@@ -98,7 +98,7 @@ def init_verbose(plot_DE_solutions : bool = False, show_iter_idx : bool = True,
                              show_iter_stats, show_warnings)
 
 
-# def init_symnet_pool(max_factors_in_term, families, u, derivs, shape, names, grids, max_deriv_order):
-#     global sympool
-#     sympool = pool_terms.PoolTerms(max_factors_in_term, families)
-#     sympool.set_initial_distr(u, derivs, shape, names, families=families, grids=grids, max_deriv_order=max_deriv_order)
+def init_symnet_pool(max_factors_in_term, families, u, derivs, shape, names, grids, max_deriv_order):
+    global sympool
+    sympool = pool_terms.PoolTerms(max_factors_in_term, families)
+    sympool.set_initial_distr(u, derivs, shape, names, families=families, grids=grids, max_deriv_order=max_deriv_order)
diff --git a/epde/interface/interface.py b/epde/interface/interface.py
index 68b3725..31e83bd 100644
--- a/epde/interface/interface.py
+++ b/epde/interface/interface.py
@@ -503,9 +503,7 @@ def set_preprocessor(self, preprocessor_pipeline: PreprocessingPipe = None,
 
     def create_pool(self, data: Union[np.ndarray, list, tuple], variable_names=['u',],
                     derivs=None, max_deriv_order=1, additional_tokens=[],
-                    data_fun_pow: int = 1, deriv_fun_pow: int = 1, max_factors_in_term=1,
-                    custom_cross_prob: dict = {}):
-
+                    data_fun_pow: int = 1, max_factors_in_term=1):
         self.pool_params = {'variable_names' : variable_names, 'max_deriv_order' : max_deriv_order,
                             'additional_tokens' : [family.token_family.ftype for family in additional_tokens]}
         assert (isinstance(derivs, list) and isinstance(derivs[0], np.ndarray)) or derivs is None
@@ -553,13 +551,12 @@ def create_pool(self, data: Union[np.ndarray, list, tuple], variable_names=['u',
             print(isinstance(additional_tokens, PreparedTokens))
             raise TypeError(f'Incorrect type of additional tokens: expected list or TokenFamily/Prepared_tokens - obj, instead got {type(additional_tokens)}')
         self.pool = TFPool(data_tokens + [tf if isinstance(tf, TokenFamily) else tf.token_family
-                                          for tf in additional_tokens],custom_cross_prob=custom_cross_prob,
-                                                                    max_factors_in_term=max_factors_in_term)
+                                      for tf in additional_tokens])
 
-        # grids = [self.cache[0].memory_default.get('0'), self.cache[0].memory_default.get('1')]
-        # global_var.init_symnet_pool(max_factors_in_term, self.pool.families,
-        #                             entry.data_tensor, entry.derivatives, entry.data_tensor.shape, entry.names,
-        #                             grids=grids, max_deriv_order=max_deriv_order)
+        grids = [self.cache[0].memory_default.get('0'), self.cache[0].memory_default.get('1')]
+        global_var.init_symnet_pool(max_factors_in_term, self.pool.families,
+                                    entry.data_tensor, entry.derivatives, entry.data_tensor.shape, entry.names,
+                                    grids=grids, max_deriv_order=max_deriv_order)
         print(f'The cardinality of defined token pool is {self.pool.families_cardinality()}')
         print(f'Among them, the pool contains {self.pool.families_cardinality(meaningful_only=True)}')
         
@@ -580,7 +577,7 @@ def saved_derivaties(self):
 
     def fit(self, data: Union[np.ndarray, list, tuple], equation_terms_max_number=6,
             equation_factors_max_number=1, variable_names=['u',], eq_sparsity_interval=(1e-4, 2.5),
-            derivs=None, max_deriv_order=1, additional_tokens=[], data_fun_pow: int = 1, custom_cross_prob : dict = {}):
+            derivs=None, max_deriv_order=1, additional_tokens=[], data_fun_pow: int = 1):
         """
         Fit epde search algorithm to obtain differential equations, describing passed data.
 
@@ -637,7 +634,6 @@ def fit(self, data: Union[np.ndarray, list, tuple], equation_terms_max_number=6,
                                  derivs=derivs, max_deriv_order=max_deriv_order,
                                  additional_tokens=additional_tokens,
                                  data_fun_pow=data_fun_pow,
-                                 custom_cross_prob=custom_cross_prob,
                                  max_factors_in_term=equation_factors_max_number)
 
         self.optimizer_init_params['population_instruct'] = {"pool": self.pool, "terms_number": equation_terms_max_number,
diff --git a/epde/interface/token_family.py b/epde/interface/token_family.py
index 4183590..7dd5145 100644
--- a/epde/interface/token_family.py
+++ b/epde/interface/token_family.py
@@ -19,6 +19,7 @@
 import seaborn as sns
 import matplotlib.pyplot as plt
 
+from symnet.initcoefficients import get_csym_tsym
 from sympy import Symbol, Mul
 
 def constancy_hard_equality(tensor, epsilon=1e-7):
@@ -430,54 +431,6 @@ def evaluate_all(self):
                     raise KeyError('Generated token somehow was not stored in cache.')
 
 
-class CustomProbInfo:
-    def __init__(self, pool):
-        self.pool = pool
-
-        token_ls = []
-        for family in self.pool.families:
-            token_ls += family.tokens
-        term_ls = []
-        for i in range(1, self.pool.max_factors_in_term + 1):
-            term_ls += list(itertools.combinations(token_ls, i))
-        self.pool_terms = self.cast_to_symbols(term_ls)
-        self.cross_distr = self.get_cross_distr(self.pool.custom_cross_prob)
-
-    @staticmethod
-    def get_cross_distr(custom_cross_prob, min_pr=0.2, max_pr=0.7): # 0.15 0.75 for burg sindy 9 0.2-0.8 bo
-        mmf = 2.4
-        values = list(custom_cross_prob.values())
-
-        csym_arr = np.fabs(np.array(values))
-
-        min_max_coeff = np.max(csym_arr) - mmf * np.min(csym_arr)
-        smoothing_factor = min_max_coeff / (min_max_coeff + (mmf - 1) * np.average(csym_arr))
-
-        uniform_csym = np.array([np.sum(csym_arr) / len(csym_arr)] * len(csym_arr))
-        smoothed_array = (1 - smoothing_factor) * csym_arr + smoothing_factor * uniform_csym
-
-        # Dominator's distribution
-        # final_probabilities = (np.max(smoothed_array) - smoothed_array) / \
-        #                       (np.max(smoothed_array) - np.min(smoothed_array)) * \
-        #                       (max_pr-min_pr) + min_pr
-
-        final_probabilities = (smoothed_array - np.min(smoothed_array)) / \
-                              (np.max(smoothed_array) - np.min(smoothed_array)) * \
-                              (max_pr-min_pr) + min_pr
-        cross_dict = dict(zip(custom_cross_prob.keys(), final_probabilities))
-        return cross_dict
-
-
-    @staticmethod
-    def cast_to_symbols(pool_names: list[tuple[str]]):
-
-        pool_ls = []
-        for name in pool_names:
-            term_symbolic = list(map(lambda u: Symbol(u), name))
-            pool_ls.append(Mul(*term_symbolic))
-        return pool_ls
-
-
 class TFPool(object):
     """
      Class stored pool for token families
@@ -487,20 +440,25 @@ class TFPool(object):
     """
     distribution_ls = []
 
-    def __init__(self, families: list, stored_pool=None, custom_cross_prob : dict = {}, max_factors_in_term: int = 1):
+    def __init__(self, families: list, stored_pool=None):
         if stored_pool is not None:
             self = pickle.load(stored_pool)
         self.families = families
-        self.custom_cross_prob = custom_cross_prob
-        self.max_factors_in_term = max_factors_in_term
-        self.cross_prob_distr = CustomProbInfo(self).cross_distr
 
-    @staticmethod
-    def compile_term(term):
-        complete_term = []
-        for factor in term.structure:
-            complete_term.append(factor.label)
-        return tuple(complete_term)
+    # def show_distrib(self, plot_distrib=False):
+    #     for item in range(len(self.prob_info.term_set_hashed)):
+    #         indexes = np.where(np.array(self.distribution_ls) == item)[0]
+    #         print(f'{self.prob_info.distribution_dict_idx[item]}: {len(indexes)}')
+    #
+    #     if plot_distrib:
+    #         sublist = self.distribution_ls
+    #         fig, ax1 = plt.subplots()
+    #         sns.kdeplot(sublist, ax=ax1)
+    #         ax1.set_xlim(min(sublist), max(sublist))
+    #         ax2 = ax1.twinx()
+    #         sns.histplot(sublist, ax=ax2, bins=len(self.prob_info.term_set_hashed))  # discrete=True)
+    #         plt.grid()
+    #         plt.show()
 
     # def update_distribution(self, term):
     #
@@ -631,7 +589,7 @@ def create_from_family(self, family_label: str, token_status=None, **kwargs):
         return family.create(label=None, token_status=token_status, **kwargs)
 
     def __add__(self, other):
-        return TFPool(families=self.families + other.families, custom_cross_prob=self.custom_cross_prob, max_factors_in_term=self.max_factors_in_term)
+        return TFPool(families=self.families + other.families)
 
     def __len__(self):
         return len(self.families)
diff --git a/epde/operators/multiobjective/variation.py b/epde/operators/multiobjective/variation.py
index 2ec23ab..5bbd100 100644
--- a/epde/operators/multiobjective/variation.py
+++ b/epde/operators/multiobjective/variation.py
@@ -23,6 +23,8 @@
 from epde.operators.multiobjective.moeadd_specific import get_basic_populator_updater
 from epde.operators.multiobjective.mutations import get_basic_mutation
 from sympy import Mul, Symbol
+import epde.globals as global_var
+from symnet.preproc_output import get_cross_distr, to_symbolic
 
 
 class ParetoLevelsCrossover(CompoundOperator):
@@ -109,8 +111,6 @@ def apply(self, objective : tuple, arguments : dict):
             temp_eq_1, temp_eq_2 = self.suboperators['equation_crossover'].apply(objective = (objective[0].vals[eq_key],
                                                                                               objective[1].vals[eq_key]),
                                                                                  arguments = subop_args['equation_crossover'])
-            # except TypeError:
-            #     pass
             objective[0].vals.replace_gene(gene_key = eq_key, value = temp_eq_1)
             offspring_2.vals.replace_gene(gene_key = eq_key, value = temp_eq_2)
             
@@ -184,54 +184,36 @@ def apply(self, objective : tuple, arguments : dict):
                         eq2_distr = self.get_equation_cross_distr(objective[1], start_idx)
 
 
-        # for i in range(same_num + similar_num, len(objective[0].structure)):
-        #     if check_uniqueness(objective[0].structure[i], objective[1].structure) and check_uniqueness(objective[1].structure[i], objective[0].structure):
-        #         objective[0].structure[i], objective[1].structure[i] = self.suboperators['term_crossover'].apply(objective = (objective[0].structure[i],
-        #                                                                                                                       objective[1].structure[i]),
-        #                                                                                                          arguments = subop_args['term_crossover'])
+        for i in range(same_num + similar_num, len(objective[0].structure)):
+            if check_uniqueness(objective[0].structure[i], objective[1].structure) and check_uniqueness(objective[1].structure[i], objective[0].structure):
+                objective[0].structure[i], objective[1].structure[i] = self.suboperators['term_crossover'].apply(objective = (objective[0].structure[i],
+                                                                                                                              objective[1].structure[i]),
+                                                                                                                 arguments = subop_args['term_crossover'])
+                # term1 = objective[1].structure[0]
+                # term2 = objective[0].structure[i]
+            # else:
+            #     print("Uniqueness in equation 0:")
+            #     print(check_uniqueness(objective[0].structure[i], objective[1].structure))
+            #
+            #     print("Term is not unique in equation:")
+
 
         return objective[0], objective[1]
 
     def use_default_tags(self):
         self._tags = {'crossover', 'gene level', 'contains suboperators', 'standard'}
 
-    @staticmethod
-    def to_symbolic(term):
-        if type(term.cache_label[0]) == tuple:
-            labels = []
-            for label in term.cache_label:
-                labels.append(str(label[0]))
-            symlabels = list(map(lambda token: Symbol(token), labels))
-            return Mul(*symlabels)
-        else:
-            return Symbol(str(term.cache_label[0]))
 
     def get_equation_cross_distr(self, equation, start_idx):
         importance_coeffs = {}
         for i in range(start_idx, len(equation.structure)):
-            sym_term = self.to_symbolic(equation.structure[i])
-            importance_coeffs[sym_term] = equation.pool.custom_cross_prob.get(sym_term)
-        cross_distr = self.get_cross_distr(importance_coeffs, start_idx, len(equation.structure))
+            sym_term = to_symbolic(equation.structure[i])
+            importance_coeffs[sym_term] = global_var.sympool.pool_dict.get(sym_term)
+            # importance_coeffs[sym_term] = global_var.sympool.pool_sym_dict.get(sym_term)
+        cross_distr = get_cross_distr(importance_coeffs, start_idx, len(equation.structure))
         return cross_distr
 
-    @staticmethod
-    def get_cross_distr(custom_cross_prob, start_idx, end_idx_exclude):
-        mmf = 2.4
-        values = list(custom_cross_prob.values())
-        csym_arr = np.fabs(np.array(values))
-
-        if np.max(csym_arr) / np.min(csym_arr) > 2.6:
-            min_max_coeff = mmf * np.min(csym_arr) - np.max(csym_arr)
-            smoothing_factor = min_max_coeff / (min_max_coeff - (mmf - 1) * np.average(csym_arr))
-            uniform_csym = np.array([np.sum(csym_arr) / len(csym_arr)] * len(csym_arr))
-
-            smoothed_array = (1 - smoothing_factor) * csym_arr + smoothing_factor * uniform_csym
-            inv = 1 / smoothed_array
-        else:
-            inv = 1 / csym_arr
-        inv_norm = inv / np.sum(inv)
 
-        return dict(zip([i for i in range(start_idx, end_idx_exclude)], inv_norm.tolist()))
 
 class EquationExchangeCrossover(CompoundOperator):
     key = 'EquationExchangeCrossover'
diff --git a/epde/structure/main_structures.py b/epde/structure/main_structures.py
index cc5de9b..b4aa3f7 100644
--- a/epde/structure/main_structures.py
+++ b/epde/structure/main_structures.py
@@ -30,6 +30,7 @@
 
 import seaborn as sns
 import matplotlib.pyplot as plt
+from symnet.pool_terms import to_symbolic
 
 
 class Term(ComplexStructure):
@@ -792,8 +793,8 @@ def __init__(self, pool: TFPool, metaparameters: dict):
         check_metaparameters(metaparameters)
 
         self.metaparameters = metaparameters
-        self.tokens_for_eq = TFPool(pool.families_demand_equation, custom_cross_prob=pool.custom_cross_prob, max_factors_in_term=pool.max_factors_in_term)
-        self.tokens_supp = TFPool(pool.families_equationless, custom_cross_prob=pool.custom_cross_prob, max_factors_in_term=pool.max_factors_in_term)
+        self.tokens_for_eq = TFPool(pool.families_demand_equation)
+        self.tokens_supp = TFPool(pool.families_equationless)
         self.moeadd_set = False
         self.vars_to_describe = [token_family.ftype for token_family in self.tokens_for_eq.families] # Made list from set
 
diff --git a/exscripts/experiment_burgers_sindy.py b/exscripts/experiment_burgers_sindy.py
index a287705..5ad168e 100644
--- a/exscripts/experiment_burgers_sindy.py
+++ b/exscripts/experiment_burgers_sindy.py
@@ -97,9 +97,9 @@ def hash_term(term):
     grids = np.meshgrid(t, x, indexing='ij')
 
     ''' Parameters of the experiment '''
-    write_csv = True
+    write_csv = False
     print_results = True
-    max_iter_number = 2000
+    max_iter_number = 10
     title = f'dfs0_{max_iter_number}_simstart2'
 
     terms = [('u',), ('du/dx1',), ('du/dx2',), ('d^2u/dx2^2',), ('u', 'du/dx1'), ('u', 'du/dx2'), ('u', 'd^2u/dx2^2'),
@@ -135,7 +135,7 @@ def hash_term(term):
         try:
             epde_search_obj.fit(data=u, max_deriv_order=(1, 2),
                                 equation_terms_max_number=3, equation_factors_max_number=2,
-                                eq_sparsity_interval=(1e-08, 1e-1), custom_cross_prob=cross_distr)
+                                eq_sparsity_interval=(1e-08, 1e-1))
         except Exception as e:
             logging.error(traceback.format_exc())
             population_error += 1