-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathlog_results_from_sacred.py
468 lines (410 loc) · 18.5 KB
/
log_results_from_sacred.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
import argparse
import bisect
import incense
from incense import ExperimentLoader
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import pvectorc
import pyrsistent
from tensorflow import keras
import yaml as yml
import zipfile
from bfseg.utils.evaluation import evaluate_model_multiple_epochs_and_datasets
class LogExperiment:
def __init__(self, experiment_id, save_folder, save_output=False):
loader = ExperimentLoader(mongo_uri=MONGO_URI, db_name='bfseg')
# Load the experiment.
self._experiment_id = experiment_id
self._experiment = loader.find_by_id(self._experiment_id)
assert (self._experiment.to_dict()['status']
not in ["FAILED"]), f"Experiment {self._experiment_id} failed."
# Set up folders.
save_folder = os.path.abspath(save_folder)
self._save_folder_models = os.path.join(save_folder, 'models')
self._save_folder_plots = os.path.join(save_folder,
f'plots/{self._experiment_id}')
self._save_folder_logs = os.path.join(save_folder,
f'logs/{self._experiment_id}')
self._save_folder_evaluate = os.path.join(
save_folder, f'evaluate/{self._experiment_id}')
folders = [
self._save_folder_models, self._save_folder_plots,
self._save_folder_logs, self._save_folder_evaluate
]
for folder in folders:
if (not os.path.isdir(folder)):
os.makedirs(folder)
# Set the epochs for which to log the results (if reached)
self._epochs_to_save = [100, "final"]
# Set the test datasets on which to optionally evaluated the pretrained
# models.
self._datasets_names_to_evaluate = [
"BfsegValidationLabeled", "OfficeRumlangValidationLabeled"
]
self._datasets_scenes_to_evaluate = ["CLA", "RUMLANG"]
self._find_splits_to_log()
# Save the experiment configuration to file.
self.save_config_file()
# Save the plots containing the metrics.
self.save_plots()
@staticmethod
def _recursive_transform_pmap(pmap):
pmap = dict(pmap)
for key, value in pmap.items():
if (isinstance(value, pvectorc.PVector)):
pmap[key] = LogExperiment._recursive_transform_pvector(value)
elif (isinstance(value, pyrsistent._pmap.PMap)):
pmap[key] = LogExperiment._recursive_transform_pmap(value)
return pmap
@staticmethod
def _recursive_transform_pvector(pvector):
pvector = list(pvector)
for idx, value in enumerate(pvector):
if (isinstance(value, pvectorc.PVector)):
pvector[idx] = LogExperiment._recursive_transform_pvector(value)
elif (isinstance(value, pyrsistent._pmap.PMap)):
pvector[idx] = LogExperiment._recursive_transform_pmap(value)
return pvector
def save_config_file(self):
with open(os.path.join(self._save_folder_plots, "config.txt"), "w") as f:
f.write("Experiment config:\n")
for key in sorted(self._experiment.config.keys()):
if (key[-7:] != "_params"):
continue
f.write(f"- {key}\n")
for subkey in sorted(self._experiment.config[key].keys()):
value = self._experiment.config[key][subkey]
# If possible, sort also the value (e.g., if it has keys).
if (hasattr(value, "items")):
value = dict(sorted(value.items()))
f.write(f" - {subkey}: {value}\n")
# Save the config file also as yaml file.
with open(os.path.join(self._save_folder_plots, "config.yml"), "w") as f:
valid_keys = [
key for key in self._experiment.config.keys() if key[-7:] == "_params"
]
valid_params = {}
for key in valid_keys:
value = self._experiment.config[key]
if (isinstance(value, pvectorc.PVector)):
value = LogExperiment._recursive_transform_pvector(value)
elif (isinstance(value, pyrsistent._pmap.PMap)):
value = LogExperiment._recursive_transform_pmap(value)
valid_params[key] = value
yml.dump(valid_params, f)
def save_model(self, epoch_to_save):
assert (isinstance(epoch_to_save, int) or epoch_to_save.isnumeric() or
epoch_to_save == "final")
artifact_name = f'model_epoch_{epoch_to_save}.zip'
try:
complete_model_path = os.path.join(
self._save_folder_models, f"{self._experiment_id}_{artifact_name}")
if (os.path.isfile(complete_model_path)):
print(
f"Skipping saving of model '{complete_model_path}' because already "
"existing.")
else:
self._experiment.artifacts[artifact_name].save(
to_dir=self._save_folder_models)
print(f"Saved model '{complete_model_path}'.")
except KeyError:
print(f"Experiment {self._experiment_id} does not have artifact "
f"{artifact_name} (yet).")
complete_model_path = None
return complete_model_path, artifact_name
def save_output(self):
with open(os.path.join(self._save_folder_logs, "output_to_screen.txt"),
"w") as f:
f.write(self._experiment.to_dict()['captured_out'])
def evaluate(self, epochs_to_evaluate, datasets_names_to_evaluate,
datasets_scenes_to_evaluate):
r"""Evaluates the model with the checkpoint(s) from the given epoch(s) on
the given test dataset(s).
Args:
epochs_to_evaluate (int/str or list of int/str): Epoch(s) of which the
corresponding saved model should be used to perform evaluation.
datasets_names_to_evaluate (str or list of str): Names of the dataset(s)
on which the model(s) should be evaluated.
datasets_scenes_to_evaluate (str or list of str): Scenes of the dataset(s)
on which the model(s) should be evaluated.
Returns:
accuracies (dict): Accuracies, indexed by the concatenation of the dataset
name and scene and by the epoch number.
mean_ious (dict): Mean IoUs, indexed by the concatenation of the dataset
name and scene and by the epoch number.
"""
actual_epochs_to_evaluate = []
model_paths_epochs_to_evaluate = []
# Prepare all the required models.
for test_dataset_name, test_dataset_scene in zip(
datasets_names_to_evaluate, datasets_scenes_to_evaluate):
curr_dataset_and_scene = f"{test_dataset_name}_{test_dataset_scene}"
# Skip re-evaluating if evaluation was already performed.
all_output_evaluation_filenames = [
os.path.join(
self._save_folder_evaluate,
f"{test_dataset_name}_{test_dataset_scene}_epoch_{epoch}.yml")
for epoch in epochs_to_evaluate
]
for epoch, output_evaluation_filename in zip(
epochs_to_evaluate, all_output_evaluation_filenames):
# Retrieve the required model.
model_path, artifact_name = self.save_model(epoch_to_save=epoch)
if (model_path is None):
if (epoch == "final"):
# If final model not available, take last available one.
model_path, artifact_name = self.save_model(
epoch_to_save=self._num_epochs - 1)
if (model_path is None):
print(f"Cannot evaluate current experiment at epoch {epoch}, because "
"the corresponding model does not exist.")
continue
# Extract the pretrained model from the archive, if necessary.
weights_file_name = artifact_name.split('.zip')[0] + ".h5"
output_filename = f"{self._experiment_id}_{weights_file_name}"
extracted_model_path = os.path.join(self._save_folder_models,
output_filename)
if (not os.path.isfile(extracted_model_path)):
assert (model_path[-4:] == ".zip")
with zipfile.ZipFile(model_path, 'r') as zip_helper:
assert (zip_helper.namelist() == [weights_file_name])
# Rename the pretrained model so as to include the experiment ID.
f = zip_helper.open(weights_file_name)
file_content = f.read()
f = open(extracted_model_path, 'wb')
f.write(file_content)
f.close()
actual_epochs_to_evaluate.append(epoch)
model_paths_epochs_to_evaluate.append(extracted_model_path)
accuracies, mean_ious = evaluate_model_multiple_epochs_and_datasets(
pretrained_dirs=model_paths_epochs_to_evaluate,
epochs_to_evaluate=actual_epochs_to_evaluate,
datasets_names_to_evaluate=datasets_names_to_evaluate,
datasets_scenes_to_evaluate=datasets_scenes_to_evaluate,
save_folder=self._save_folder_evaluate,
use_fov_mask=True)
return accuracies, mean_ious
def _find_splits_to_log(self):
self._splits_to_log = []
self._metrics_to_log = ["accuracy", "mean_iou"] #, "loss"]:
self._special_split_names = [
"train_no_replay", "NyuDepthV2Labeled_None",
"BfsegCLAMeshdistLabels_None", "MeshdistPseudolabels_None",
"MeshdistPseudolabels_garage_full", "MeshdistPseudolabels_office_full",
"MeshdistPseudolabels_rumlang_full", "MeshdistPseudolabels_garage1",
"MeshdistPseudolabels_garage2", "MeshdistPseudolabels_garage3",
"MeshdistPseudolabels_office4", "MeshdistPseudolabels_office5",
"MeshdistPseudolabels_office4_2302",
"MeshdistPseudolabels_office4_2402",
"MeshdistPseudolabels_office6_2502", "MeshdistPseudolabels_rumlang2",
"MeshdistPseudolabels_rumlang3", "BfsegValidationLabeled_None",
"BfsegValidationLabeled_ARCHE", "BfsegValidationLabeled_CLA",
"OfficeRumlangValidationLabeled_None",
"OfficeRumlangValidationLabeled_OFFICE",
"OfficeRumlangValidationLabeled_RUMLANG"
]
for split in ["train", "val", "test"] + self._special_split_names:
all_metrics_found_for_split = True
for metric in self._metrics_to_log:
if (f'{split}_{metric}' not in self._experiment.metrics):
all_metrics_found_for_split = False
break
if (all_metrics_found_for_split):
self._splits_to_log.append(split)
def save_results(self, evaluate_on_validation):
def write_result(split, metric, epoch, out_file, value=None):
try:
if (epoch == "final"):
epoch_number = self._num_epochs - 1
epoch_text = [f"final epoch", f" (ep. {epoch_number})"]
else:
epoch_number = epoch
epoch_text = [f"epoch {epoch_number}", ""]
if (value is None):
value = self._experiment.metrics[f'{split}_{metric}'].values[
epoch_number]
value_text = "{:.4f}".format(value) + f"{epoch_text[1]}"
out_file.write(f"- {split} {metric} @ {epoch_text[0]}: " + value_text +
"\n")
return value_text
except IndexError:
return None
if (evaluate_on_validation):
val_accuracies, val_mean_ious = self.evaluate(
epochs_to_evaluate=self._epochs_to_save,
datasets_names_to_evaluate=self._datasets_names_to_evaluate,
datasets_scenes_to_evaluate=self._datasets_scenes_to_evaluate)
def get_updated_cell_text(cell_text, split, metric, epoch, f, value=None):
value_text = write_result(split=split,
metric=metric,
epoch=epoch,
out_file=f,
value=value)
if (value_text is not None):
if (len(cell_text) == 0):
cell_text = value_text
else:
cell_text += f"\n{value_text}"
return cell_text
with open(os.path.join(self._save_folder_plots, "results.txt"), "w") as f:
full_text = [[]]
column_headers = []
for metric in self._metrics_to_log:
for split in self._splits_to_log:
cell_text = ""
column_headers.append(f"{metric} {split}")
for epoch in self._epochs_to_save:
cell_text = get_updated_cell_text(cell_text=cell_text,
split=split,
metric=metric,
epoch=epoch,
f=f)
full_text[0].append(cell_text)
# Log also the metrics from the optional evaluation.
if ((metric in ["accuracy", "mean_iou"]) and evaluate_on_validation):
if (metric == "accuracy"):
for split in val_accuracies.keys():
cell_text = ""
column_headers.append(f"{metric} {split}")
for epoch in self._epochs_to_save:
try:
value = val_accuracies[split][epoch]
cell_text = get_updated_cell_text(cell_text=cell_text,
split=split,
metric=metric,
epoch=epoch,
f=f,
value=value)
except KeyError:
continue
full_text[0].append(cell_text)
else:
for split in val_mean_ious.keys():
cell_text = ""
column_headers.append(f"{metric} {split}")
for epoch in self._epochs_to_save:
try:
value = val_mean_ious[split][epoch]
cell_text = get_updated_cell_text(cell_text=cell_text,
split=split,
metric=metric,
epoch=epoch,
f=f,
value=value)
except KeyError:
continue
full_text[0].append(cell_text)
# Also write the results as an excel file, for easier logging to
# Spreadsheet.
df = pd.DataFrame(full_text,
index=[f"{self._experiment_id}"],
columns=column_headers)
df.to_excel(os.path.join(self._save_folder_plots, "excel_results.ods"))
def save_plots(self):
split_with_metric = {split: [] for split in self._splits_to_log}
for full_metric_name in self._experiment.metrics.keys():
if (full_metric_name == "lr"):
# Learning rate is handled separately.
continue
# Try first with the special cases.
prefix = None
for special_split_name in self._special_split_names:
split_metric_name = full_metric_name.split(special_split_name + "_",
maxsplit=1)
if (len(split_metric_name) != 1):
prefix = special_split_name
metric_name = split_metric_name[-1]
break
if (prefix is None):
# Case `train`, `test`, or `val`,
prefix, metric_name = full_metric_name.split("_", maxsplit=1)
if (not prefix in split_with_metric.keys()):
raise KeyError(f"Metric {full_metric_name} was not recognized.")
if (prefix in split_with_metric.keys()):
bisect.insort(split_with_metric[prefix], metric_name)
len_first_element = len(split_with_metric[list(
split_with_metric.keys())[0]])
# Check that all split types (e.g., 'train') have the same metrics.
split_types = list(split_with_metric.keys())
metric_names_per_type = list(split_with_metric.values())
assert (metric_names_per_type.count(
metric_names_per_type[0]) == len(metric_names_per_type))
metric_names = metric_names_per_type[0]
metrics_to_log = [[
f"{split_type}_{metric_name}" for split_type in split_types
] for metric_name in metric_names]
self._num_epochs = len(self._experiment.metrics[list(
self._experiment.metrics.keys())[0]])
# If present, log learning rate.
if ("lr" in self._experiment.metrics.keys()):
# Learning rate is handled separately.
metrics_to_log.append(["lr"])
for group_idx, metric_group in enumerate(metrics_to_log):
fig = plt.figure(num=f'Run {self._experiment_id}, fig {group_idx}',
figsize=(12, 10))
ax = fig.add_subplot(1, 1, 1)
for metric in metric_group:
self._experiment.metrics[metric].index += 1
self._experiment.metrics[metric].plot(ax=ax)
if (self._num_epochs + 1 >= 5):
major_ticks = np.linspace(1, self._num_epochs, 5, dtype=int)
else:
major_ticks = self._experiment.metrics[metric].index
minor_ticks = np.arange(1, self._num_epochs + 1, 1)
ax.set_xticks(minor_ticks, minor=True)
ax.grid(which='minor', alpha=0.2)
ax.set_xticks(major_ticks)
ax.grid(which='major', alpha=0.5)
ax.legend()
# Save the plot to file.
plt.savefig(os.path.join(self._save_folder_plots,
f"plot_{group_idx}.png"))
if (__name__ == "__main__"):
parser = argparse.ArgumentParser()
parser.add_argument("--id",
type=int,
help="ID of the experiment to log.",
required=True)
parser.add_argument("--save_folder",
type=str,
help="Path where the logs should be saved.",
required=True)
parser.add_argument(
"--model_to_save",
help=
"If specified, epoch of the model to save (a valid epoch number or the "
"string 'final', for the final model.",
default=None)
parser.add_argument(
"--save_output",
action="store_true",
help="Whether or not to save the output to screen of the experiment.")
parser.add_argument(
"--save_results",
action="store_true",
help="Whether or not to save a summary of the results of the experiment.")
parser.add_argument(
"--evaluate",
action="store_true",
help="Whether or not to evaluate selected pretrained models on selected "
"validation sets.")
args = parser.parse_args()
experiment_id = args.id
save_folder = args.save_folder
save_output = args.save_output
save_results = args.save_results
evaluate = args.evaluate
model_to_save = args.model_to_save
experiment_logger = LogExperiment(experiment_id=experiment_id,
save_folder=save_folder)
# Optionally save a model to file.
if (model_to_save is not None):
experiment_logger.save_model(epoch_to_save=model_to_save)
# Optionally save the output to file.
if (save_output):
experiment_logger.save_output()
# Optionally save a summary of the results to file.
if (save_results):
experiment_logger.save_results(evaluate_on_validation=evaluate)