Skip to content

Commit

Permalink
completed save methods for all error methods
Browse files Browse the repository at this point in the history
  • Loading branch information
deronsmith committed Dec 18, 2023
1 parent 8295427 commit e70cb7a
Show file tree
Hide file tree
Showing 5 changed files with 5,302 additions and 901 deletions.
871 changes: 300 additions & 571 deletions notebooks/.ipynb_checkpoints/epa_nmf-py_workflow_01-checkpoint.ipynb

Large diffs are not rendered by default.

5,199 changes: 4,898 additions & 301 deletions notebooks/epa_nmf-py_workflow_01.ipynb

Large diffs are not rendered by default.

46 changes: 38 additions & 8 deletions src/error/bootstrap.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,13 @@ def __init__(self,
self.factor_tables = {}
self.bs_profiles = {}
self.bs_factor_contributions = {}
self.metadata = {}
self.metadata = {
"model_selected": self.model_selected,
"bootstrap_n": self.bootstrap_n,
"block_size": self.block_size,
"threshold": self.threshold,
"bs_seed": self.bs_seed
}

def _block_resample(self,
data: np.ndarray,
Expand Down Expand Up @@ -349,7 +355,7 @@ def run(self,
"""
self.metadata["keep_H"] = keep_H
self.metadata["reuse_seed"] = reuse_seed
self.metadata["block"] = block
self.metadata["use_block"] = block
self.metadata["overlapping"] = overlapping

self._train(keep_H=keep_H, reuse_seed=reuse_seed, block=block, overlapping=overlapping)
Expand Down Expand Up @@ -683,12 +689,36 @@ def save(self, bs_name: str,
pickle.dump(self, save_file)
logger.info(f"BS NMF output saved to pickle file: {file_path}")
else:
# meta_file = os.path.join(output_directory, f"{bs_name}-metadata.json")
# with open(meta_file, "w") as mfile:
# json.dump(self.metadata, mfile, default=np_encoder)
# logger.info(f"BS NMF model metadata saved to file: {meta_file}")
logger.error("Not yet implemented.")
return file_path
file_path = output_directory
meta_file = os.path.join(output_directory, f"{bs_name}-metadata.json")
with open(meta_file, "w") as mfile:
json.dump(self.metadata, mfile, default=np_encoder)
logger.info(f"BS NMF model metadata saved to file: {meta_file}")
results_file = os.path.join(output_directory, f"{bs_name}-results.json")
with open(results_file, "w") as resfile:
json.dump(self.bs_results, resfile, default=np_encoder)
logger.info(f"BS NMF results saved to file: {results_file}")
mapping_file = os.path.join(output_directory, f"{bs_name}-mapping.csv")
with open(mapping_file, "w") as mapfile:
self.mapping_df.to_csv(mapfile)
logger.info(f"BS NMF model mapping saved to file: {mapping_file}")
qtable_file = os.path.join(output_directory, f"{bs_name}-qtable.csv")
with open(qtable_file, "w") as qfile:
self.q_results.to_csv(qfile)
logger.info(f"BS NMF q table saved to file: {qtable_file}")
ftables_file = os.path.join(output_directory, f"{bs_name}-ftables.json")
with open(ftables_file, "w") as f_file:
json.dump(self.factor_tables, f_file, default=np_encoder)
logger.info(f"BS NMF factor tables saved to file: {ftables_file}")
profiles_file = os.path.join(output_directory, f"{bs_name}-profiles.json")
with open(profiles_file, "w") as p_file:
json.dump(self.bs_profiles, p_file, default=np_encoder)
logger.info(f"BS NMF profiles saved to file: {profiles_file}")
contr_file = os.path.join(output_directory, f"{bs_name}-contributions.json")
with open(contr_file, "w") as c_file:
json.dump(self.bs_factor_contributions, c_file, default=np_encoder)
logger.info(f"BS NMF contributions saved to file: {contr_file}")
return file_path
else:
logger.error(f"Output directory does not exist. Specified directory: {output_directory}")
return None
Expand Down
45 changes: 28 additions & 17 deletions src/error/bs_disp.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,9 +93,18 @@ def __init__(self,
self.n_drops = 0
self.disp_swap = 0
self.fit_swaps = -1
self.metadata = {
"model_selected": self.model_selected,
"bs-block_size": self.block_size,
"bs-threshold": self.threshold,
"disp-max_search": self.max_search,
"disp-threshold_dQ": self.threshold_dQ,
"features": self.features,
"seed": self.seed
}

def run(self,
parallel: bool = True,
parallel: bool = False,
keep_H: bool = True,
reuse_seed: bool = True,
block: bool = True,
Expand All @@ -117,10 +126,13 @@ def run(self,
overlapping : bool
Allow resampled blocks to overlap. Default = False
Returns
-------
"""
self.metadata["parallel"] = parallel
self.metadata["keep_H"] = keep_H
self.metadata["reuse_seed"] = reuse_seed
self.metadata["block"] = block
self.metadata["overlapping"] = overlapping

if self.bootstrap is None:
logger.info(f"Running new Bootstrap instance with {self.bootstrap_n} runs and block size {self.block_size}")
# Run BS
Expand All @@ -137,24 +149,24 @@ def run(self,
cpus = cpus - 1 if cpus > 1 else 1
with mp.Pool(processes=cpus) as pool:
p_args = []
for i in range(len(self.bootstrap.bs_results.keys())):
i_model = self.bootstrap.bs_results[bs_keys[i]]["model"]
i_args = (bs_keys[i], i_model, self.feature_labels, self.model_selected, self.threshold_dQ,
for i, bs_key in enumerate(bs_keys):
i_model = self.bootstrap.bs_results[bs_key]["model"]
i_args = (bs_key, i_model, self.feature_labels, self.model_selected, self.threshold_dQ,
self.max_search, self.features, self.dQmax)
p_args.append(i_args)

for result in pool.starmap(BSDISP._parallel_disp, p_args, chunksize=10):
i, i_disp = result
self.disp_results[i] = i_disp
else:
for i in tqdm(range(len(self.bootstrap.bs_results.keys())), desc="BS-DISP - Displacement Stage", position=0, leave=True):
bs_result = self.bootstrap.bs_results[bs_keys[i]]
for bs_key in tqdm(bs_keys, desc="BS-DISP - Displacement Stage", position=0, leave=True):
bs_result = self.bootstrap.bs_results[bs_key]
bs_model = bs_result["model"]
i_disp = Displacement(nmf=bs_model, feature_labels=self.feature_labels, model_selected=self.model_selected,
threshold_dQ=self.threshold_dQ, max_search=self.max_search, features=self.features)
i_disp.dQmax = self.dQmax
i_disp.run(batch=i)
self.disp_results[i] = i_disp
i_disp.run(batch=bs_key)
self.disp_results[bs_key] = i_disp
t1 = time.time()
logger.info(f"Completed all BS-DISP calculations, BS runs: {self.bootstrap_n}, "
f"Features: {len(self.feature_labels)}, Factors: {self.factors}, "
Expand All @@ -177,8 +189,8 @@ def __compile_results(self):
"""
Calculate the merging statistics and metrics for the bs-disp results.
"""
disp_result = self.disp_results[0].compiled_results

key0 = list(self.disp_results.keys())[0]
disp_result = self.disp_results[key0].compiled_results
profiles = disp_result["profile"]
profiles_max = disp_result["profile_max"]
profiles_min = disp_result["profile_min"]
Expand All @@ -198,17 +210,15 @@ def __compile_results(self):
conc_min_i = disp_result_i["conc_min"]
dQ_drop_i = disp_result_i["dQ_drop"]
disp_profiles.append(profile_i)
# profiles = np.mean((profiles, profile_i))
profiles_max = np.max([profiles_max, profile_max_i], axis=0)
profiles_min = np.min([profiles_min, profile_min_i], axis=0)
# conc = np.mean((conc, conc_i))
disp_conc.append(conc_i)
conc_max = np.max([conc_max, conc_max_i], axis=0)
conc_min = np.min([conc_min, conc_min_i], axis=0)
dQ_drop = np.min([dQ_drop, dQ_drop_i.values], axis=0)
if any(dQ_drop_i < 0.0):
self.n_drops += 1
self.compiled_results = copy.copy(self.disp_results[0].compiled_results)
self.compiled_results = copy.copy(self.disp_results[key0].compiled_results)
self.compiled_results["profiles"] = np.mean(disp_profiles, axis=0)
self.compiled_results["profile_max"] = profiles_max
self.compiled_results["profile_min"] = profiles_min
Expand All @@ -217,7 +227,7 @@ def __compile_results(self):
self.compiled_results["conc_min"] = conc_min
self.compiled_results["dQ_drop"] = dQ_drop

for result_i in range(0, len(self.disp_results.keys())):
for result_i in self.disp_results.keys():
self.swap_table = self.swap_table + self.disp_results[result_i].swap_table
self.count_table = self.count_table + self.disp_results[result_i].count_table
if np.count_nonzero(self.disp_results[result_i].swap_table) > 0:
Expand Down Expand Up @@ -395,6 +405,7 @@ def save(self, bsdisp_name: str,
pickle.dump(self, save_file)
logger.info(f"BS-DISP NMF output saved to pickle file: {file_path}")
else:

logger.error("Not yet implemented.")
return file_path
else:
Expand Down
42 changes: 38 additions & 4 deletions src/error/displacement.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,13 @@
import copy
import pickle
import os
import json
import numpy as np
import pandas as pd
from pathlib import Path
import plotly.graph_objects as go
from tqdm import tqdm
from src.utils import q_loss, compare_all_factors, EPSILON
from src.utils import q_loss, compare_all_factors, EPSILON, np_encoder
from src.model.nmf import NMF


Expand Down Expand Up @@ -80,6 +81,13 @@ def __init__(self,
self.swap_table = np.zeros(shape=(len(self.dQmax), self.factors))
self.count_table = np.zeros(shape=(len(self.dQmax), self.factors))
self.compiled_results = None
self.metadata = {
"selected_model": self.selected_model,
"features": self.features,
"excluded_features": self.excluded_features,
"max_search": self.max_search,
"threshold_dQ": self.threshold_dQ
}

def run(self, batch: int = -1):
"""
Expand Down Expand Up @@ -236,7 +244,6 @@ def _increase_disp(self, batch: int = -1):
Batch number identifier, used for labeling DISP during parallel runs with BS-DISP.
"""
# logger.info("DISP - Testing increasing value changes to H")
for factor_i in tqdm(range(self.H.shape[0]), desc="Increasing value for factors", position=0, leave=True):
factor_results = {}
for feature_j in tqdm(self.features, desc=f"+ : Batch {batch}, Factor {factor_i+1} - Features", position=0, leave=True):
Expand Down Expand Up @@ -317,7 +324,6 @@ def _decrease_disp(self, batch: int = -1):
batch : int
Batch number identifier, used for labeling DISP during parallel runs with BS-DISP.
"""
# logger.info("DISP - Testing decreasing value changes to H")
for factor_i in tqdm(range(self.H.shape[0]), desc="Decreasing value for factors", position=0, leave=True):
factor_results = {}
for feature_j in tqdm(self.features, desc=f"- : Batch {batch}, Factor {factor_i+1} - Features", position=0, leave=True):
Expand Down Expand Up @@ -470,7 +476,35 @@ def save(self, disp_name: str,
pickle.dump(self, save_file)
logger.info(f"DISP NMF output saved to pickle file: {file_path}")
else:
logger.error("Not yet implemented.")
file_path = output_directory
meta_file = os.path.join(output_directory, f"{disp_name}-metadata.json")
with open(meta_file, "w") as mfile:
json.dump(self.metadata, mfile, default=np_encoder)
logger.info(f"DISP NMF model metadata saved to file: {meta_file}")
increase_file = os.path.join(output_directory, f"{disp_name}-increase-disp.json")
with open(increase_file, "w") as incfile:
json.dump(self.increase_results, incfile, default=np_encoder)
logger.info(f"DISP NMF model increasing results saved to file: {increase_file}")
decrease_file = os.path.join(output_directory, f"{disp_name}-decrease-disp.json")
with open(increase_file, "w") as decfile:
json.dump(self.decrease_results, decfile, default=np_encoder)
logger.info(f"DISP NMF model decreasing results saved to file: {decrease_file}")
swap_file = os.path.join(output_directory, f"{disp_name}-swaptable.csv")
with open(swap_file, 'w') as stfile:
table_labels = ["dQ Max"]
for i in range(self.factors):
table_labels.append(f"Factor {i + 1}")
table_data = np.round(100 * (self.swap_table / self.count_table), 2)
dq_list = list(reversed(self.dQmax))
dq_list = np.reshape(dq_list, newshape=(len(dq_list), 1))
table_data = np.hstack((dq_list, table_data))
swap_comment = f"Swap % Table\nMetadata File: {meta_file}\n\n"
np.savetxt(stfile, table_data, delimiter=',', header=table_labels, comments=swap_comment)
logger.info(f"DISP NMF swap table saved to file: {swap_file}")
compiled_file = os.path.join(output_directory, f"{disp_name}-results.csv")
with open(compiled_file, 'w') as cfile:
self.compiled_results.to_csv(cfile)
logger.info(f"DISP NMF compiled results saved to file: {compiled_file}")
return file_path
else:
logger.error(f"Output directory does not exist. Specified directory: {output_directory}")
Expand Down

0 comments on commit e70cb7a

Please sign in to comment.