completed save methods for all error methods

quanted · Dec 18, 2023 · e70cb7a · e70cb7a
1 parent 8295427
commit e70cb7a
Show file tree

Hide file tree

Showing 5 changed files with 5,302 additions and 901 deletions.
diff --git a/notebooks/.ipynb_checkpoints/epa_nmf-py_workflow_01-checkpoint.ipynb b/notebooks/.ipynb_checkpoints/epa_nmf-py_workflow_01-checkpoint.ipynb
diff --git a/notebooks/epa_nmf-py_workflow_01.ipynb b/notebooks/epa_nmf-py_workflow_01.ipynb
diff --git a/src/error/bootstrap.py b/src/error/bootstrap.py
@@ -87,7 +87,13 @@ def __init__(self,
         self.factor_tables = {}
         self.bs_profiles = {}
         self.bs_factor_contributions = {}
-        self.metadata = {}
+        self.metadata = {
+            "model_selected": self.model_selected,
+            "bootstrap_n": self.bootstrap_n,
+            "block_size": self.block_size,
+            "threshold": self.threshold,
+            "bs_seed": self.bs_seed
+        }
 
     def _block_resample(self,
                         data: np.ndarray,
@@ -349,7 +355,7 @@ def run(self,
         """
         self.metadata["keep_H"] = keep_H
         self.metadata["reuse_seed"] = reuse_seed
-        self.metadata["block"] = block
+        self.metadata["use_block"] = block
         self.metadata["overlapping"] = overlapping
 
         self._train(keep_H=keep_H, reuse_seed=reuse_seed, block=block, overlapping=overlapping)
@@ -683,12 +689,36 @@ def save(self, bs_name: str,
                     pickle.dump(self, save_file)
                     logger.info(f"BS NMF output saved to pickle file: {file_path}")
             else:
-                # meta_file = os.path.join(output_directory, f"{bs_name}-metadata.json")
-                # with open(meta_file, "w") as mfile:
-                #     json.dump(self.metadata, mfile, default=np_encoder)
-                #     logger.info(f"BS NMF model metadata saved to file: {meta_file}")
-                logger.error("Not yet implemented.")
-            return file_path
+                file_path = output_directory
+                meta_file = os.path.join(output_directory, f"{bs_name}-metadata.json")
+                with open(meta_file, "w") as mfile:
+                    json.dump(self.metadata, mfile, default=np_encoder)
+                    logger.info(f"BS NMF model metadata saved to file: {meta_file}")
+                results_file = os.path.join(output_directory, f"{bs_name}-results.json")
+                with open(results_file, "w") as resfile:
+                    json.dump(self.bs_results, resfile, default=np_encoder)
+                    logger.info(f"BS NMF results saved to file: {results_file}")
+                mapping_file = os.path.join(output_directory, f"{bs_name}-mapping.csv")
+                with open(mapping_file, "w") as mapfile:
+                    self.mapping_df.to_csv(mapfile)
+                    logger.info(f"BS NMF model mapping saved to file: {mapping_file}")
+                qtable_file = os.path.join(output_directory, f"{bs_name}-qtable.csv")
+                with open(qtable_file, "w") as qfile:
+                    self.q_results.to_csv(qfile)
+                    logger.info(f"BS NMF q table saved to file: {qtable_file}")
+                ftables_file = os.path.join(output_directory, f"{bs_name}-ftables.json")
+                with open(ftables_file, "w") as f_file:
+                    json.dump(self.factor_tables, f_file, default=np_encoder)
+                    logger.info(f"BS NMF factor tables saved to file: {ftables_file}")
+                profiles_file = os.path.join(output_directory, f"{bs_name}-profiles.json")
+                with open(profiles_file, "w") as p_file:
+                    json.dump(self.bs_profiles, p_file, default=np_encoder)
+                    logger.info(f"BS NMF profiles saved to file: {profiles_file}")
+                contr_file = os.path.join(output_directory, f"{bs_name}-contributions.json")
+                with open(contr_file, "w") as c_file:
+                    json.dump(self.bs_factor_contributions, c_file, default=np_encoder)
+                    logger.info(f"BS NMF contributions saved to file: {contr_file}")
+                return file_path
         else:
             logger.error(f"Output directory does not exist. Specified directory: {output_directory}")
             return None

diff --git a/src/error/bs_disp.py b/src/error/bs_disp.py
@@ -93,9 +93,18 @@ def __init__(self,
         self.n_drops = 0
         self.disp_swap = 0
         self.fit_swaps = -1
+        self.metadata = {
+            "model_selected": self.model_selected,
+            "bs-block_size": self.block_size,
+            "bs-threshold": self.threshold,
+            "disp-max_search": self.max_search,
+            "disp-threshold_dQ": self.threshold_dQ,
+            "features": self.features,
+            "seed": self.seed
+        }
 
     def run(self,
-            parallel: bool = True,
+            parallel: bool = False,
             keep_H: bool = True,
             reuse_seed: bool = True,
             block: bool = True,
@@ -117,10 +126,13 @@ def run(self,
         overlapping : bool
            Allow resampled blocks to overlap. Default = False
 
-        Returns
-        -------
-
         """
+        self.metadata["parallel"] = parallel
+        self.metadata["keep_H"] = keep_H
+        self.metadata["reuse_seed"] = reuse_seed
+        self.metadata["block"] = block
+        self.metadata["overlapping"] = overlapping
+
         if self.bootstrap is None:
             logger.info(f"Running new Bootstrap instance with {self.bootstrap_n} runs and block size {self.block_size}")
             # Run BS
@@ -137,24 +149,24 @@ def run(self,
             cpus = cpus - 1 if cpus > 1 else 1
             with mp.Pool(processes=cpus) as pool:
                 p_args = []
-                for i in range(len(self.bootstrap.bs_results.keys())):
-                    i_model = self.bootstrap.bs_results[bs_keys[i]]["model"]
-                    i_args = (bs_keys[i], i_model, self.feature_labels, self.model_selected, self.threshold_dQ,
+                for i, bs_key in enumerate(bs_keys):
+                    i_model = self.bootstrap.bs_results[bs_key]["model"]
+                    i_args = (bs_key, i_model, self.feature_labels, self.model_selected, self.threshold_dQ,
                               self.max_search, self.features, self.dQmax)
                     p_args.append(i_args)
 
                 for result in pool.starmap(BSDISP._parallel_disp, p_args, chunksize=10):
                     i, i_disp = result
                     self.disp_results[i] = i_disp
         else:
-            for i in tqdm(range(len(self.bootstrap.bs_results.keys())), desc="BS-DISP - Displacement Stage", position=0, leave=True):
-                bs_result = self.bootstrap.bs_results[bs_keys[i]]
+            for bs_key in tqdm(bs_keys, desc="BS-DISP - Displacement Stage", position=0, leave=True):
+                bs_result = self.bootstrap.bs_results[bs_key]
                 bs_model = bs_result["model"]
                 i_disp = Displacement(nmf=bs_model, feature_labels=self.feature_labels, model_selected=self.model_selected,
                                       threshold_dQ=self.threshold_dQ, max_search=self.max_search, features=self.features)
                 i_disp.dQmax = self.dQmax
-                i_disp.run(batch=i)
-                self.disp_results[i] = i_disp
+                i_disp.run(batch=bs_key)
+                self.disp_results[bs_key] = i_disp
         t1 = time.time()
         logger.info(f"Completed all BS-DISP calculations, BS runs: {self.bootstrap_n}, "
                     f"Features: {len(self.feature_labels)}, Factors: {self.factors}, "
@@ -177,8 +189,8 @@ def __compile_results(self):
         """
         Calculate the merging statistics and metrics for the bs-disp results.
         """
-        disp_result = self.disp_results[0].compiled_results
-
+        key0 = list(self.disp_results.keys())[0]
+        disp_result = self.disp_results[key0].compiled_results
         profiles = disp_result["profile"]
         profiles_max = disp_result["profile_max"]
         profiles_min = disp_result["profile_min"]
@@ -198,17 +210,15 @@ def __compile_results(self):
             conc_min_i = disp_result_i["conc_min"]
             dQ_drop_i = disp_result_i["dQ_drop"]
             disp_profiles.append(profile_i)
-            # profiles = np.mean((profiles, profile_i))
             profiles_max = np.max([profiles_max, profile_max_i], axis=0)
             profiles_min = np.min([profiles_min, profile_min_i], axis=0)
-            # conc = np.mean((conc, conc_i))
             disp_conc.append(conc_i)
             conc_max = np.max([conc_max, conc_max_i], axis=0)
             conc_min = np.min([conc_min, conc_min_i], axis=0)
             dQ_drop = np.min([dQ_drop, dQ_drop_i.values], axis=0)
             if any(dQ_drop_i < 0.0):
                 self.n_drops += 1
-        self.compiled_results = copy.copy(self.disp_results[0].compiled_results)
+        self.compiled_results = copy.copy(self.disp_results[key0].compiled_results)
         self.compiled_results["profiles"] = np.mean(disp_profiles, axis=0)
         self.compiled_results["profile_max"] = profiles_max
         self.compiled_results["profile_min"] = profiles_min
@@ -217,7 +227,7 @@ def __compile_results(self):
         self.compiled_results["conc_min"] = conc_min
         self.compiled_results["dQ_drop"] = dQ_drop
 
-        for result_i in range(0, len(self.disp_results.keys())):
+        for result_i in self.disp_results.keys():
             self.swap_table = self.swap_table + self.disp_results[result_i].swap_table
             self.count_table = self.count_table + self.disp_results[result_i].count_table
             if np.count_nonzero(self.disp_results[result_i].swap_table) > 0:
@@ -395,6 +405,7 @@ def save(self, bsdisp_name: str,
                     pickle.dump(self, save_file)
                     logger.info(f"BS-DISP NMF output saved to pickle file: {file_path}")
             else:
+
                 logger.error("Not yet implemented.")
             return file_path
         else:

diff --git a/src/error/displacement.py b/src/error/displacement.py
@@ -2,12 +2,13 @@
 import copy
 import pickle
 import os
+import json
 import numpy as np
 import pandas as pd
 from pathlib import Path
 import plotly.graph_objects as go
 from tqdm import tqdm
-from src.utils import q_loss, compare_all_factors, EPSILON
+from src.utils import q_loss, compare_all_factors, EPSILON, np_encoder
 from src.model.nmf import NMF
 
 
@@ -80,6 +81,13 @@ def __init__(self,
         self.swap_table = np.zeros(shape=(len(self.dQmax), self.factors))
         self.count_table = np.zeros(shape=(len(self.dQmax), self.factors))
         self.compiled_results = None
+        self.metadata = {
+            "selected_model": self.selected_model,
+            "features": self.features,
+            "excluded_features": self.excluded_features,
+            "max_search": self.max_search,
+            "threshold_dQ": self.threshold_dQ
+        }
 
     def run(self, batch: int = -1):
         """
@@ -236,7 +244,6 @@ def _increase_disp(self, batch: int = -1):
            Batch number identifier, used for labeling DISP during parallel runs with BS-DISP.
 
         """
-        # logger.info("DISP - Testing increasing value changes to H")
         for factor_i in tqdm(range(self.H.shape[0]), desc="Increasing value for factors", position=0, leave=True):
             factor_results = {}
             for feature_j in tqdm(self.features, desc=f"+ : Batch {batch}, Factor {factor_i+1} - Features", position=0, leave=True):
@@ -317,7 +324,6 @@ def _decrease_disp(self, batch: int = -1):
         batch : int
            Batch number identifier, used for labeling DISP during parallel runs with BS-DISP.
         """
-        # logger.info("DISP - Testing decreasing value changes to H")
         for factor_i in tqdm(range(self.H.shape[0]), desc="Decreasing value for factors", position=0, leave=True):
             factor_results = {}
             for feature_j in tqdm(self.features, desc=f"- : Batch {batch}, Factor {factor_i+1} - Features", position=0, leave=True):
@@ -470,7 +476,35 @@ def save(self, disp_name: str,
                     pickle.dump(self, save_file)
                     logger.info(f"DISP NMF output saved to pickle file: {file_path}")
             else:
-                logger.error("Not yet implemented.")
+                file_path = output_directory
+                meta_file = os.path.join(output_directory, f"{disp_name}-metadata.json")
+                with open(meta_file, "w") as mfile:
+                    json.dump(self.metadata, mfile, default=np_encoder)
+                    logger.info(f"DISP NMF model metadata saved to file: {meta_file}")
+                increase_file = os.path.join(output_directory, f"{disp_name}-increase-disp.json")
+                with open(increase_file, "w") as incfile:
+                    json.dump(self.increase_results, incfile, default=np_encoder)
+                    logger.info(f"DISP NMF model increasing results saved to file: {increase_file}")
+                decrease_file = os.path.join(output_directory, f"{disp_name}-decrease-disp.json")
+                with open(increase_file, "w") as decfile:
+                    json.dump(self.decrease_results, decfile, default=np_encoder)
+                    logger.info(f"DISP NMF model decreasing results saved to file: {decrease_file}")
+                swap_file = os.path.join(output_directory, f"{disp_name}-swaptable.csv")
+                with open(swap_file, 'w') as stfile:
+                    table_labels = ["dQ Max"]
+                    for i in range(self.factors):
+                        table_labels.append(f"Factor {i + 1}")
+                    table_data = np.round(100 * (self.swap_table / self.count_table), 2)
+                    dq_list = list(reversed(self.dQmax))
+                    dq_list = np.reshape(dq_list, newshape=(len(dq_list), 1))
+                    table_data = np.hstack((dq_list, table_data))
+                    swap_comment = f"Swap % Table\nMetadata File: {meta_file}\n\n"
+                    np.savetxt(stfile, table_data, delimiter=',', header=table_labels, comments=swap_comment)
+                    logger.info(f"DISP NMF swap table saved to file: {swap_file}")
+                compiled_file = os.path.join(output_directory, f"{disp_name}-results.csv")
+                with open(compiled_file, 'w') as cfile:
+                    self.compiled_results.to_csv(cfile)
+                    logger.info(f"DISP NMF compiled results saved to file: {compiled_file}")
             return file_path
         else:
             logger.error(f"Output directory does not exist. Specified directory: {output_directory}")