forked from INM-6/beNNch
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathanalysis_helper.py
97 lines (76 loc) · 3.59 KB
/
analysis_helper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
"""
beNNch - Unified execution, collection, analysis and
comparison of neural network simulation benchmarks.
Copyright (C) 2021 Forschungszentrum Juelich GmbH, INM-6
This program is free software: you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation, either version 3 of the License, or (at your option) any later
version.
This program is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program. If not, see <https://www.gnu.org/licenses/>.
SPDX-License-Identifier: GPL-3.0-or-later
"""
import os
import json
import tarfile
import glob
import numpy as np
import pandas as pd
def shell(command):
return os.system(command)
def shell_without_print(command):
return os.system(command + '>/dev/null 2>&1')
def shell_return(command):
return os.popen(command).read().strip()
def load(filepath):
with open(filepath, 'r') as f:
data = json.load(f)
return data
def git_annex(cpu_info, job_info, uuidgen_hash, base_path):
tmp_result_file_path = os.path.join(base_path, uuidgen_hash + '.csv')
result_file_path = os.path.join('./', uuidgen_hash + '.csv')
archive_path = f'{uuidgen_hash}.tgz'
## tar gzip metadata archives, append uuids to results csv file
metadata_archives = glob.glob(f"{base_path}/**/*.tgz", recursive=True)
# sort metadata in same way as they are sorted in the csv
metadata_archives.sort()
# get metadata uuids from absolute paths
metadata_uuids = [os.path.split(archive)[-1].split('.')[0]
for archive in metadata_archives]
# tar metadata archives for each run
with tarfile.open(archive_path, "w") as tar_obj:
for archive in metadata_archives:
tar_obj.add(archive,
arcname=os.path.join(
uuidgen_hash,
os.path.split(archive)[-1]))
# add metadata uuid to corresponding csv entry
csv = pd.read_csv(tmp_result_file_path)
csv['metadata_uuid'] = metadata_uuids
csv.to_csv(result_file_path)
# works for machines with the naming scheme XXX.name (used for JSC
# clusters, might need adjustment for other machines)
machine = os.popen('echo $HOSTNAME').read().strip().split('.')[-1]
user = os.popen('echo $USER').read().strip()
shell(f'git annex add {result_file_path}')
shell(f'git annex add {archive_path}')
shell_without_print(
f'git annex metadata {result_file_path} --set key={uuidgen_hash}')
for info_dict in [job_info, cpu_info]:
for key, value in info_dict.items():
value_without_spaces = value.replace(' ', ';')
shell_without_print(f'git annex metadata {result_file_path} '
+ f'--set {key}="{value_without_spaces}" '
+ '--force')
# additionally add machine and user name
shell_without_print(f'git annex metadata {result_file_path} '
+ f'--set machine="{machine}" --force')
shell_without_print(f'git annex metadata {result_file_path} '
+ f'--set user="{user}" --force')
averaged_over = len(
np.unique(pd.read_csv(result_file_path)['rng_seed'].values))
shell_without_print(f'git annex metadata {result_file_path} '
+ f'--set averaged_over="{averaged_over}" --force')