-
Notifications
You must be signed in to change notification settings - Fork 8
/
experiment_analyzer.py
82 lines (72 loc) · 4.15 KB
/
experiment_analyzer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
import os
import tarfile
import matplotlib.pyplot as plt
from scipy.stats import mannwhitneyu, kruskal, ttest_ind
from experiments.experiment_analyzer import ExperimentAnalyzer
from golem.core.paths import project_root
if __name__ == '__main__':
""" The result of analysis can be seen without running the script in
'~/GOLEM/examples/experiment_analyzer/result_analysis.tar.gz'. """
path_to_root = os.path.join('Z://', 'Pinchuk')
# extract data if there is an archive
if 'data.tar.gz' in os.listdir(path_to_root):
tar = tarfile.open(os.path.join(path_to_root, 'data.tar.gz'), "r:gz")
tar.extractall()
tar.close()
path_to_experiment_data = path_to_root
path_to_save = os.path.join(path_to_root, 'result_analysis')
analyzer = ExperimentAnalyzer(path_to_root=path_to_experiment_data, folders_to_ignore=['result_analysis',
'Thumbs.db'])
#
# # to get convergence table with mean values
# path_to_save_convergence = os.path.join(path_to_save, 'convergence')
#
# convergence_mean = analyzer.analyze_convergence(history_folder='histories', is_raise=False,
# path_to_save=path_to_save_convergence,
# is_mean=True)
#
# # to get convergence boxplots
# convergence = analyzer.analyze_convergence(history_folder='histories', is_raise=False)
# path_to_save_convergence_boxplots = os.path.join(path_to_save_convergence, 'convergence_boxplots')
#
# metrics = list(convergence.keys())
# setups = list(convergence[metrics[0]].keys())
# datasets = list(convergence[metrics[0]][setups[0]].keys())
# for dataset in datasets:
# for metric_name in convergence.keys():
# to_compare = dict()
# for setup in convergence[metric_name].keys():
# to_compare[setup] = [i for i in convergence[metric_name][setup][dataset]]
# plt.boxplot(list(to_compare.values()), labels=list(to_compare.keys()))
# plt.title(f'Convergence on {dataset}')
# os.makedirs(path_to_save_convergence_boxplots, exist_ok=True)
# plt.savefig(os.path.join(path_to_save_convergence_boxplots, f'convergence_{dataset}.png'))
# plt.close()
# to get metrics table with mean values
path_to_save_metrics = os.path.join(path_to_save, 'metrics_with_UCB')
metric_names = ['roc_auc', 'f1', 'logloss']
metrics_dict_mean = analyzer.analyze_metrics(metric_names=metric_names, file_name='evaluation_results.csv',
is_raise=False, path_to_save=path_to_save_metrics,
is_mean=True)
# # to get metrics boxplots
# metrics_dict = analyzer.analyze_metrics(metric_names=metric_names, file_name='evaluation_results.csv',
# is_raise=False)
# path_to_save_metrics_boxplots = os.path.join(path_to_save_metrics, 'metrics_boxplot')
#
# for metric in metric_names:
# for dataset in metrics_dict[metric][list(metrics_dict[metric].keys())[0]].keys():
# to_compare = dict()
# for setup in metrics_dict[metric].keys():
# to_compare[setup] = [-1 * i for i in metrics_dict[metric][setup][dataset]]
# plt.boxplot(list(to_compare.values()), labels=list(to_compare.keys()))
# plt.title(f'{metric} on {dataset}')
# cur_path_to_save = os.path.join(path_to_save_metrics_boxplots, metric)
# os.makedirs(cur_path_to_save, exist_ok=True)
# plt.savefig(os.path.join(cur_path_to_save, f'{metric}_{dataset}.png'))
# plt.close()
#
# # to get stat test results table
# path_to_save_stat = os.path.join(path_to_save, 'statistic')
# stat_dict = analyzer.analyze_statistical_significance(data_to_analyze=metrics_dict['roc_auc'],
# stat_tests=[mannwhitneyu, kruskal, ttest_ind],
# path_to_save=path_to_save_stat)