-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathbundle.py
executable file
·185 lines (166 loc) · 9.44 KB
/
bundle.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
#!/usr/bin/env python3
import re
import argparse
import subprocess
import asyncio
from data_store import DataStore, logToConsole
parser = argparse.ArgumentParser( \
description=('Bundle data directly from phylanx stdout, individual tree / '
'performance / graph files, OTF2 traces, and/or source code files'))
parser.add_argument('-l', '--label', dest='label', type=str, default='Untitled dataset',
help=('Label for the bundled dataset (default: "Untitled dataset"). Providing a '
'label that already exists in the database will bundle with/overwrite '
'any previous data. If globbing multiple inputs, this should be a '
'regular expression, where the first capturing group indicates which '
'files go together. For example: \n'
'--input data/*/phylanxLog.txt \n'
'--otf2 data/*/OTF2_archive/APEX.otf2 \n'
'--label data/([^/]*) \n'
'would merge datasets based on their common directory name, and use '
'that directory name as the label.'))
parser.add_argument('-d', '--db_dir', dest='dbDir', default='/tmp/traveler-integrated',
help='Directory to store the bundled data (default: /tmp/traveler-integrated)')
parser.add_argument('-i', '--input', dest='input', type=str, metavar='path', nargs='*', default=[],
help=('STDOUT from phylanx run as a file or pipe (should contain the '
'tree, graph, and performance CSV)'))
parser.add_argument('-t', '--tree', dest='tree', type=str, metavar='path', nargs='*', default=[],
help='Input newick tree as its own file')
parser.add_argument('-p', '--performance', dest='performance', type=str, metavar='path', nargs='*', default=[],
help='Input performance CSV as its own file')
parser.add_argument('-g', '--graph', dest='graph', type=str, metavar='path', nargs='*', default=[],
help='Input DOT-formatted links as its own file')
parser.add_argument('-o', '--otf2', dest='otf2', type=str, metavar='path', nargs='*', default=[],
help='Input otf2 trace (e.g. OTF2_archive/APEX.otf2)')
parser.add_argument('-y', '--physl', dest='physl', type=str, metavar='path', nargs='*', default=[],
help='Input physl source code file')
parser.add_argument('-n', '--python', dest='python', type=str, metavar='path', nargs='*', default=[],
help='Input python source code file')
parser.add_argument('-c', '--cpp', dest='cpp', type=str, metavar='path', nargs='*', default=[],
help='Input C++ source code file')
parser.add_argument('-s', '--debug', dest='debug', action='store_true',
help='Store additional information for debugging source files, etc.')
parser.add_argument('-a', '--tags', dest='tags', type=str,
help=('Tags to be attached to the dataset (when bundling multiple '
'datasets, the same tags are attached to all datasets bundled '
'at the same time). Separate tags with commas.'))
parser.add_argument('-z', '--color', dest='color', type=str, default='#e6ab02',
help=('Color to be attached to the dataset.'
'Different color stored per dataset.'))
parser.add_argument('-f', '--folder', dest='folder', type=str,
help=('Folder or path name that will be prefixed to the label of all '
'data bundled by this command; usually this is a good idea when '
'bundling lots of files to reduce clutter in the interface'))
class FakeFile: #pylint: disable=R0903
def __init__(self, name):
self.name = name
async def __aiter__(self):
# otfPipe = subprocess.Popen(['otf2-print', self.name], stdout=subprocess.PIPE)
otfPipe = subprocess.Popen(['otf2-print', self.name], stdout=subprocess.PIPE)
for bytesChunk in otfPipe.stdout:
yield bytesChunk.decode()
otfPipe.stdout.flush()
async def main():
args = vars(parser.parse_args())
if 'folder' in args and args['folder'] is not None:
args['folder'] = args['folder'].strip('/ ')
db = DataStore(args['dbDir'], args['debug'])
await db.load()
inputs = {}
labelRegex = re.compile(args['label'])
if labelRegex.groups == 0:
# We're in normal mode; one path per argument
inputs[args['label']] = {}
for arg in ['input', 'tree', 'performance', 'graph', 'otf2', 'physl', 'python', 'cpp']:
if len(args[arg]) == 1:
inputs[args['label']][arg] = args[arg][0]
elif len(args[arg]) > 1:
raise Exception('To use glob patterns, please provide a regular expression with one capture group as a --label argument')
if not inputs[args['label']]:
raise Exception('At least one of: --input, --tree, --performance, --graph, --otf2, --physl, --python, and/or --cpp is required')
elif labelRegex.groups == 1:
# We're in globbing mode; we can expect many files per argument, and
# --label should be a regular expression that matches input files to
# their label The only (possible) exception are code files: if only
# one is provided, use it for all labels (otherwise, expect it to match
# the regular expression as well)
singlePhysl = args['physl'][0] if len(args['physl']) == 1 else None
singlePython = args['python'][0] if len(args['python']) == 1 else None
singleCpp = args['cpp'][0] if len(args['cpp']) == 1 else None
for arg in ['input', 'tree', 'performance', 'graph', 'otf2', 'physl', 'python', 'cpp']:
if arg == 'physl' and singlePhysl is not None:
continue
if arg == 'python' and singlePython is not None:
continue
if arg == 'cpp' and singleCpp is not None:
continue
for path in args[arg]:
pathMatch = labelRegex.match(path)
if pathMatch is None:
raise Exception('--label pattern could not identify a label for file: %s' % path)
label = pathMatch[1].replace('/', '')
inputs[label] = inputs.get(label, {})
if arg in inputs[label]:
raise Exception('--label pattern found duplicate matches for --%s:\n%s\n%s' % (arg, inputs[label][arg], path))
inputs[label][arg] = path
for label in inputs:
if singlePhysl is not None:
inputs[label]['physl'] = singlePhysl
if singlePython is not None:
inputs[label]['python'] = singlePython
if singleCpp is not None:
inputs[label]['cpp'] = singleCpp
else:
raise Exception('Too many capturing groups in the --label argument')
for label, paths in inputs.items():
if 'input' in paths and ('tree' in paths or 'performance' in paths or 'graph' in paths):
raise Exception('Don\'t use --input with --tree, --performance, or --graph for the same --label: %s' % label)
try:
# Initialize the dataset
datasetId = db.createDataset()['info']['datasetId']
# Prefix the label with the folder if one was specified
if 'folder' in args and args['folder'] is not None:
label = args['folder'] + '/' + label
await logToConsole('#################' + ''.join(['#' for x in range(len(label))]))
await logToConsole('Adding data for: %s (%s)' % (datasetId, label))
# Assign its name
db.rename(datasetId, label)
# Assign color
if db[datasetId]['info']['color'] is not None:
db.recolor(datasetId, db[datasetId]['info']['color'])
# Assign any tags
if args['tags'] is not None:
tags = {t : True for t in args['tags'].split(',')}
db.addTags(datasetId, tags)
# Handle performance files
if 'performance' in paths:
with open(paths['performance'], 'r') as file:
await db.processCsvFile(datasetId, file)
# Handle tree files:
if 'tree' in paths:
with open(paths['tree'], 'r') as file:
await db.processNewickFile(datasetId, file)
# Handle graph files:
if 'graph' in paths:
with open(paths['graph'], 'r') as file:
await db.processDotFile(datasetId, file)
# Handle stdout from phylanx
if 'input' in paths:
with open(paths['input'], 'r') as file:
await db.processPhylanxLogFile(datasetId, file)
# Handle code files
for codeType in ['physl', 'python', 'cpp']:
if codeType in paths:
with open(paths[codeType], 'r') as file:
await db.processCodeFile(datasetId, file, codeType)
# Handle otf2
if 'otf2' in paths:
db.addSourceFile(datasetId, paths['otf2'], 'otf2')
await db.processOtf2(datasetId, FakeFile(paths['otf2']))
# Save all the data
await db.save(datasetId)
except: #pylint: disable=W0702
await logToConsole('Error encountered; purging corrupted data for: %s' % datasetId)
del db[datasetId]
raise
if __name__ == '__main__':
asyncio.get_event_loop().run_until_complete(main())