-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcsvs
executable file
·425 lines (319 loc) · 12 KB
/
csvs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
#! /usr/bin/env python
__doc__ = """csvs: Frontend for CSVSee
Usage::
csvs [command] [options]
Command may be::
filter
graph
grep
grinder
info
Run ``csvs [command]`` with no further arguments to get help.
"""
usage = __doc__
"""
Ideas
-----
Manipulation of .csv files, especially large ones
- Display column names / column count / row count
- Split into manageable pieces based on column name or position
High-level analysis
- Display "interesting" columns (ones with large or frequent variation)
- Display "boring" columns (ones that are always the same or with little variation)
"""
import sys
import csv
from csvsee import utils
from csvsee.graph import Graph
from csvsee import grinder
class UsageError (Exception):
pass
def graph_command(args):
"""
Generage a graph from a .csv data file.
Usage::
csvs graph filename.csv [-options] ["Column 1"] ["Column 2"] ...
Where filename.csv contains comma-separated values, with column names in the
first row, and all subsequent arguments are regular expressions that may match
one or more column names.
Options:
-x "<column name>"
An expression matching the column you want to use for your X-axis.
If this is omitted, the first column of the .csv file will be used
as the X-axis coordinate.
-dateformat "<format string>"
Interpret the timestamp as a date in the given format. Examples:
%m/%d/%y %I:%M:%S %p: 12/10/09 3:45:56 PM (Grinder logs)
%m/%d/%Y %H:%M:%S.%f: 12/10/2009 15:45:56.789 (Perfmon)
See http://docs.python.org/library/datetime.html for valid formats.
By default, the date format will be guessed based on the first row of
the .csv file. If the X-column is NOT a date, use -dateformat ""
-title "Title"
Set the title label for the graph. By default, the .csv filename
is used as the graph title.
-save "filename.(png|svg|pdf)"
Save the graph to a file. Default is to show the graph in a viewer.
-linestyle "<format string>"
Define the style of lines plotted on the graph. Examples are:
"-" Solid line (Default)
"." Point marker
"o" Circle marker
"o-" Circle + solid lines
See the Matplotlib Axes.plot documentation for available styles:
http://matplotlib.sourceforge.net/api/axes_api.html#matplotlib.axes.Axes.plot
-xlabel "Label string"
Use the given string as the label of the X axis. If omitted, the
name of the X-column is used.
-ylabel "Label string" | prefix
Use the given string as the label of the Y axis. By default, the
Y axis has no label. If 'prefix', the prefix common to all the given
column names is used.
-ymax <number>
Set the maximum Y-value beyond which the graph is cropped. By default,
maximum Y-value is determined by the maximum value present in the data.
-truncate <number>
Truncate the column labels to <number> characters. By default,
no truncation is done.
-top <number>
Graph only the top <number> columns, based on the average of
all values in matching columns.
-peak <number>
Graph only the top <number> columns, based on the highest peak
value in matching columns.
-drop <number>
When used in conjunction with -top or -peak, this causes the top
<number> of columns to be omitted. For example, -top 10 -drop 5
will skip the top 5 maximum columns, and graph the next 10.
-gmtoffset [+/-]<hours>
Adjust timestamps if they are not in GMT. For example, if the
timestamps are GMT-6, use -gmtoffset +6 to make the graph display
them as GMT times.
-zerotime
Adjust all timestamps so the graph starts at 00:00.
If no column names are given, then all columns are graphed. To graph only
specific columns, provide one or more column expressions after the .csv
filename and any options. Column names are given as regular expressions,
allowing you to match multiple columns.
Examples:
csvgraph.py data.csv
Graph all columns found in data.csv, using the first column
as the X-axis.
csvgraph.py data.csv -top 5
Graph the 5 columns with the highest average value
csvgraph.py data.csv "^Response.*"
Graph all columns beginning with the word "Response"
csvgraph.py data.csv A B C
Graph columns "A", "B", and "C". Note that these are regular
expressions, and will actually match all columns containing "A", all
columns containing "B", and all columns containing "C".
If the first column is a date field, then the X axis will be displayed in HH:MM
format. Otherwise, all columns must be numeric (integer or floating-point).
"""
# CSV file is always the first argument
csv_file = args.pop(0)
if not csv_file.lower().endswith('.csv'):
raise UsageError("First argument must be a filename with .csv extension.")
# Create Graph for this csv file
graph = Graph(csv_file)
save_file = ''
# Get any -options that follow
while args and args[0].startswith('-'):
opt = args.pop(0).lstrip('-')
if opt in graph.strings:
graph[opt] = args.pop(0)
elif opt in graph.ints:
graph[opt] = int(args.pop(0))
elif opt in graph.floats:
graph[opt] = float(args.pop(0))
elif opt in graph.bools:
graph[opt] = True
elif opt == 'save':
save_file = args.pop(0)
else:
raise UsageError("Unknown option: %s" % opt)
# Get column expressions (all remaining arguments, if any)
if args:
graph['y'] = args
# Generate the graph
graph.generate()
if save_file:
graph.save(save_file)
else:
graph.show()
def grep_command(args):
"""
Create a .csv file by counting the number of occurrences of
text strings in one or more timestamped text files.
Usage::
csvs grep <file1> <file2> -match <expr1> <expr2> -out <report.csv> [-options]
Options::
-seconds <number>
Report match frequency with a granularity of <number> seconds. The
default is 60 seconds (1 minute); that is, each line of the .csv
output will include the count of all matches during each minute.
-dateformat "<format string>"
Interpret date/time using the given format. If omitted, the format
is inferred by guessing.
See http://docs.python.org/library/datetime.html for valid formats.
"""
# Need at least five arguments
if len(args) < 5:
raise UsageError()
infiles = []
matches = []
csvfile = ''
dateformat = ''
seconds = 60
# Get input filenames until an -option is reached
while args and not args[0].startswith('-'):
infiles.append(args.pop(0))
while args:
opt = args.pop(0)
if opt == '-match':
while not args[0].startswith('-'):
matches.append(args.pop(0))
elif opt == '-out':
csvfile = args.pop(0)
elif opt == '-dateformat':
dateformat = args.pop(0)
elif opt == '-seconds':
seconds = int(args.pop(0))
else:
raise UsageError("Unknown option: '%s'" % opt)
# Search all the given files for matching text, and write the results to
# csvfile, with the first column being the timestamp, and remaining columns
# being the number of times each match was found.
outfile = open(csvfile, 'w')
heading = '"Timestamp","%s"' % '","'.join(matches)
outfile.write(heading + '\n')
for (timestamp, counts) in utils.grep_files(infiles, matches, dateformat, seconds):
line = '%s' % timestamp
for match in matches:
line += ',%s' % counts[match]
outfile.write(line + '\n')
outfile.close()
print("Wrote '%s'" % csvfile)
def grinder_command(args):
"""
Generate a .csv report of data from Grinder log files.
Usage::
csvs grinder [-options] <out_file> <data_files ...> <csv_prefix>
Options::
-seconds <number>
Summarize statistics over an interval of <number> seconds.
Default is 60-second intervals.
This will generate one .csv file for each of several important statistics.
"""
# Defaults
granularity = 60
# Get any -options
while args and args[0].startswith('-'):
opt = args.pop(0)
if opt == '-seconds':
granularity = int(args.pop(0))
else:
raise UsageError("Unknown option: '%s'" % opt)
# Need at least three positional arguments
if len(args) < 3:
raise UsageError()
# Get positional arguments
out_file = args[0]
data_files = args[1:-1]
csv_prefix = args[-1]
# Generate the report
report = grinder.Report(granularity, out_file, *data_files)
report.write_all_csvs(csv_prefix)
# TODO: Refactor some of this into a submodule
def info_command(args):
"""
Display statistics and high-level analysis of a .csv file.
Usage::
csvs info <filename.csv> [-options]
Options::
-columns
Display all column names
"""
# Need a .csv filename at least
if len(args) < 1:
raise UsageError()
csvfile = args.pop(0)
show_columns = False
while args and args[0].startswith('-'):
opt = args.pop(0)
if opt == '-columns':
show_columns = True
else:
raise UsageError("Unknown option: '%s'" % opt)
reader = csv.DictReader(open(csvfile))
num_columns = len(reader.fieldnames)
print(csvfile)
print("%d columns" % num_columns)
if show_columns:
print("Column names:")
print("-------------")
for column in reader.fieldnames:
print(column)
def filter_command(args):
"""
Filter a .csv file, keeping only matching columns.
Usage::
csvs filter <in_file.csv> -match <expr1> <expr2> ... -out <out_file.csv>
"""
# Need at least five arguments
if len(args) < 5:
raise UsageError()
infile = args.pop(0)
matches = []
outfile = ''
while args:
opt = args.pop(0)
if opt == '-match':
while not args[0].startswith('-'):
matches.append(args.pop(0))
elif opt == '-out':
outfile = args.pop(0)
else:
raise UsageError("Unknown option: '%s'" % opt)
if not matches:
raise UsageError("Please provide one or more match expressions with -match")
if not outfile:
raise UsageError("Please provide an output file with -out")
utils.filter_csv(infile, outfile, matches)
# Commands and the function that handles them
command_functions = {
'graph': graph_command,
'grep': grep_command,
'grinder': grinder_command,
'info': info_command,
'filter': filter_command,
}
def exit_msg(usage, text=''):
"""Print usage notes along with a message, then exit the application.
"""
print(usage)
if text:
print(text)
sys.exit(1)
# Main program
if __name__ == '__main__':
if len(sys.argv) < 2:
exit_msg(usage)
args = sys.argv[1:]
command = args.pop(0)
# If command is not known, print usage and exit
if command not in command_functions:
exit_msg(usage, "Unknown command: '%s'" % command)
# Get the appropriate function
function = command_functions[command]
# If there are no arguments, display help and exit
if not args:
print(function.__doc__)
sys.exit(0)
# Run the command and catch errors
try:
command_functions[command](args)
except UsageError, message:
exit_msg(function.__doc__, message)
except KeyboardInterrupt:
print("Aborted!")
sys.exit(0)