forked from lebinh/ngxtop
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathngxtop.py
executable file
·442 lines (363 loc) · 15.4 KB
/
ngxtop.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
#!/usr/bin/env python
"""ngxtop - ad-hoc query for nginx access log.
Usage:
ngxtop [options]
ngxtop [options] (print|top|avg|sum) <var> ...
ngxtop info
ngxtop [options] query <query> ...
Options:
-l <file>, --access-log <file> access log file to parse.
-f <format>, --log-format <format> log format as specify in log_format directive.
--no-follow ngxtop default behavior is to ignore current lines in log
and only watch for new lines as they are written to the access log.
Use this flag to tell ngxtop to process the current content of the access log instead.
-t <seconds>, --interval <seconds> report interval when running in follow mode [default: 2.0]
-g <var>, --group-by <var> group by variable [default: request_path]
-w <var>, --having <expr> having clause [default: 1]
-o <var>, --order-by <var> order of output for default query [default: count]
-n <number>, --limit <number> limit the number of records included in report for top command [default: 10]
-a <exp> ..., --a <exp> ... add exp (must be aggregation exp: sum, avg, min, max, etc.) into output
-v, --verbose more verbose output
-d, --debug print every line and parsed record
-h, --help print this help message.
--version print version information.
Advanced / experimental options:
-c <file>, --config <file> allow ngxtop to parse nginx config file for log format and location.
-i <filter-expression>, --filter <filter-expression> filter in, records satisfied given expression are processed.
-p <filter-expression>, --pre-filter <filter-expression> in-filter expression to check in pre-parsing phase.
Examples:
All examples read nginx config file for access log location and format.
If you want to specify the access log file and / or log format, use the -f and -a options.
"top" like view of nginx requests
$ ngxtop
Top 10 requested path with status 404:
$ ngxtop top request_path --filter 'status == 404'
Top 10 requests with highest total bytes sent
$ ngxtop --order-by 'avg(bytes_sent) * count'
Top 10 remote address, e.g., who's hitting you the most
$ ngxtop --group-by remote_addr
Print requests with 4xx or 5xx status, together with status and http referer
$ ngxtop -i 'status >= 400' print request status http_referer
Average body bytes sent of 200 responses of requested path begin with 'foo':
$ ngxtop avg bytes_sent --filter 'status == 200 and requested_path.startswith("foo")'
"""
from contextlib import closing
import logging
import os
import re
import sqlite3
import subprocess
import threading
import urlparse
import time
import sys
from docopt import docopt
import tabulate
REGEX_SPECIAL_CHARS = r'([\.\*\+\?\|\(\)\{\}\[\]])'
REGEX_LOG_FORMAT_VARIABLE = r'\$([a-z0-9\_]+)'
LOG_FORMAT_COMBINED = '$remote_addr - $remote_user [$time_local] ' \
'"$request" $status $body_bytes_sent ' \
'"$http_referer" "$http_user_agent"'
DEFAULT_QUERIES = [
('Summary:',
'''SELECT
count(1) AS count,
avg(bytes_sent) AS avg_bytes_sent,
count(CASE WHEN status_type = 2 THEN 1 END) AS '2xx',
count(CASE WHEN status_type = 3 THEN 1 END) AS '3xx',
count(CASE WHEN status_type = 4 THEN 1 END) AS '4xx',
count(CASE WHEN status_type = 5 THEN 1 END) AS '5xx'
FROM log
ORDER BY %(--order-by)s DESC
LIMIT %(--limit)s'''),
('Detailed:',
'''SELECT
%(--group-by)s,
count(1) AS count,
avg(bytes_sent) AS avg_bytes_sent,
count(CASE WHEN status_type = 2 THEN 1 END) AS '2xx',
count(CASE WHEN status_type = 3 THEN 1 END) AS '3xx',
count(CASE WHEN status_type = 4 THEN 1 END) AS '4xx',
count(CASE WHEN status_type = 5 THEN 1 END) AS '5xx'
FROM log
GROUP BY %(--group-by)s
HAVING %(--having)s
ORDER BY %(--order-by)s DESC
LIMIT %(--limit)s''')
]
DEFAULT_FIELDS = set(['status_type', 'bytes_sent'])
# ====================
# Nginx utilities
# ====================
def get_nginx_conf_path():
"""
Get nginx conf path based on `nginx -V` output
"""
proc = subprocess.Popen(['nginx', '-V'], stderr=subprocess.PIPE)
stdout, stderr = proc.communicate()
conf_path_match = re.search(r'--conf-path=(\S*)', stderr)
if conf_path_match is not None:
return conf_path_match.group(1)
prefix_match = re.search(r'--prefix=(\S*)', stderr)
if prefix_match is not None:
return prefix_match.group(1) + '/conf/nginx.conf'
return '/etc/nginx/nginx.conf'
def extract_nginx_conf(path, log_file=None, log_format=None):
"""
*experimental* read nginx conf file to extract access log file location and format.
TODO: rewrite this method to:
- match all access_log directive to get all possible log files
- for each log file search the correct log_format
- if more than one log file, offer user to choose which one
"""
with open(path) as conf_file:
conf = conf_file.read()
log_format_directive = re.search(r'log_format\s+(\S+)\s+(.*?);', conf, flags=re.DOTALL)
log_format_name = log_format_directive.group(1) if log_format_directive else 'combined'
log_format = log_format_directive.group(2) if log_format_directive else 'combined'
# take care of log format in multiple line
# only most common case, which encapsulate log format in single quote is handled
if '\n' in log_format:
log_format = ''.join(line.strip() for line in log_format.split('\n'))
if log_format.startswith("'"):
log_format = log_format.replace("'", "")
access_log_directive = re.search(r'access_log\s+(\S+)\s+%s' % log_format_name, conf)
# Use the log file from config only when not supplied with --access-log option,
# else it is overwritten everytime.
if not log_file:
log_file = access_log_directive.group(1) if access_log_directive else 'logs/access.log'
return log_file, log_format
def build_pattern(log_format):
"""
Take an nginx's log format string and return the required regexp pattern to parse the access log
"""
if log_format == 'combined':
return build_pattern(LOG_FORMAT_COMBINED)
pattern = re.sub(REGEX_SPECIAL_CHARS, r'\\\1', log_format)
pattern = re.sub(REGEX_LOG_FORMAT_VARIABLE, '(?P<\\1>.*)', pattern)
return re.compile(pattern)
def extract_variables(log_format):
for match in re.findall(REGEX_LOG_FORMAT_VARIABLE, log_format):
yield match
# ======================
# generator utilities
# ======================
def follow(the_file):
"""
Follow a given file and yield new lines when they are available, like `tail -f`.
"""
with open(the_file) as f:
f.seek(0, 2) # seek to eof
while True:
line = f.readline()
if not line:
time.sleep(0.1) # sleep briefly before trying again
continue
yield line
def map_field(field, func, dict_sequence):
"""
Apply given function to value of given key in every dictionary in sequence and
set the result as new value for that key.
"""
for item in dict_sequence:
item[field] = func(item.get(field, None))
yield item
def add_field(field, func, dict_sequence):
"""
Apply given function to the record and store result in given field of current record.
Do nothing if record already contains given field.
"""
for item in dict_sequence:
if field not in item:
item[field] = func(item)
yield item
def trace(sequence, phase=''):
for item in sequence:
logging.debug('%s:\n%s', phase, item)
yield item
# ======================
# Access log parsing
# ======================
def parse_request_path(record):
if 'request_uri' in record:
uri = record['request_uri']
elif 'request' in record:
uri = ' '.join(record['request'].split(' ')[1:-1])
else:
uri = None
return urlparse.urlparse(uri).path if uri else None
def parse_status_type(record):
return record['status'] / 100 if 'status' in record else None
def to_int(value):
return int(value) if value and value != '-' else 0
def to_float(value):
return float(value) if value and value != '-' else 0.0
def parse_log(lines, pattern):
matches = (pattern.match(l) for l in lines)
records = (m.groupdict() for m in matches if m is not None)
records = map_field('status', to_int, records)
records = add_field('status_type', parse_status_type, records)
records = add_field('bytes_sent', lambda r: r['body_bytes_sent'], records)
records = map_field('bytes_sent', to_int, records)
records = map_field('request_time', to_float, records)
records = add_field('request_path', parse_request_path, records)
return records
# =================================
# Records and statistic processor
# =================================
class SQLProcessor(object):
def __init__(self, report_queries, fields, index_fields=None):
self.begin = False
self.report_queries = report_queries
self.index_fields = index_fields if index_fields is not None else []
self.column_list = ','.join(fields)
self.holder_list = ','.join(':%s' % var for var in fields)
self.conn = sqlite3.connect(':memory:', check_same_thread=False)
self.init_db()
def process(self, records):
self.begin = time.time()
insert = 'insert into log (%s) values (%s)' % (self.column_list, self.holder_list)
with closing(self.conn.cursor()) as cursor:
for r in records:
cursor.execute(insert, r)
return self.count()
def report(self):
if not self.begin:
return ''
count = self.count()
duration = time.time() - self.begin
status = 'running for %.0f seconds, %d records processed: %.2f req/sec'
output = [status % (duration, count, count / duration)]
with closing(self.conn.cursor()) as cursor:
for query in self.report_queries:
if isinstance(query, tuple):
label, query = query
else:
label = ''
cursor.execute(query)
columns = (d[0] for d in cursor.description)
result = tabulate.tabulate(cursor.fetchall(), headers=columns, tablefmt='orgtbl', floatfmt='.3f')
output.append('%s\n%s' % (label, result))
return '\n\n'.join(output)
def init_db(self):
create_table = 'create table log (%s)' % self.column_list
with closing(self.conn.cursor()) as cursor:
logging.info('sqlite init: %s', create_table)
cursor.execute(create_table)
for idx, field in enumerate(self.index_fields):
sql = 'create index log_idx%d on log (%s)' % (idx, field)
logging.info('sqlite init: %s', sql)
cursor.execute(sql)
def count(self):
with closing(self.conn.cursor()) as cursor:
cursor.execute('select count(1) from log')
return cursor.fetchone()[0]
# ===============
# Log processing
# ===============
def process_log(lines, pattern, processor, arguments):
pre_filer_exp = arguments['--pre-filter']
if pre_filer_exp:
lines = (line for line in lines if eval(pre_filer_exp, {}, dict(line=line)))
records = parse_log(lines, pattern)
filter_exp = arguments['--filter']
if filter_exp:
records = (r for r in records if eval(filter_exp, {}, r))
total = processor.process(records)
print processor.report()
return total
def build_processor(arguments):
fields = arguments['<var>']
if arguments['print']:
label = ', '.join(fields) + ':'
selections = ', '.join(fields)
query = 'select %s from log group by %s' % (selections, selections)
report_queries = [(label, query)]
elif arguments['top']:
limit = int(arguments['--limit'])
report_queries = []
for var in fields:
label = 'top %s' % var
query = 'select %s, count(1) as count from log group by %s order by count desc limit %d' % (var, var, limit)
report_queries.append((label, query))
elif arguments['avg']:
label = 'average %s' % fields
selections = ', '.join('avg(%s)' % var for var in fields)
query = 'select %s from log' % selections
report_queries = [(label, query)]
elif arguments['sum']:
label = 'sum %s' % fields
selections = ', '.join('sum(%s)' % var for var in fields)
query = 'select %s from log' % selections
report_queries = [(label, query)]
elif arguments['query']:
report_queries = arguments['<query>']
fields = arguments['<fields>']
else:
report_queries = [(name, query % arguments) for name, query in DEFAULT_QUERIES]
fields = DEFAULT_FIELDS.union(set([arguments['--group-by']]))
for label, query in report_queries:
logging.info('query for "%s":\n %s', label, query)
processor = SQLProcessor(report_queries, fields)
return processor
def build_source(access_log, arguments):
# constructing log source
if arguments['--no-follow']:
lines = open(access_log)
else:
lines = follow(access_log)
return lines
def build_reporter(processor, arguments):
if arguments['--no-follow']:
return None
def report(interval=float(arguments['--interval'])):
os.system('cls' if os.name == 'nt' else 'clear')
while True:
time.sleep(interval)
output = processor.report()
os.system('cls' if os.name == 'nt' else 'clear')
print output
thread = threading.Thread(target=report)
thread.daemon = True
return thread
def main(arguments):
access_log = arguments['--access-log']
log_format = arguments['--log-format']
if access_log is None or log_format is None:
config = arguments['--config']
if config is None:
config = get_nginx_conf_path()
access_log, log_format = extract_nginx_conf(config, access_log)
else:
config = None
logging.info('access_log: %s', access_log)
logging.info('log_format: %s', log_format)
if arguments['info']:
print 'configuration file:\n ', config
print 'access log file:\n ', access_log
print 'access log format:\n ', log_format
print 'available variables:\n ', ', '.join(sorted(extract_variables(log_format)))
return
begin = time.time()
source = build_source(access_log, arguments)
pattern = build_pattern(log_format)
processor = build_processor(arguments)
reporter = build_reporter(processor, arguments)
if reporter is not None:
reporter.start()
total = process_log(source, pattern, processor, arguments)
duration = time.time() - begin
logging.info('Processed %d lines in %.3f seconds, %.2f lines/sec.', total, duration, total / duration)
if __name__ == '__main__':
args = docopt(__doc__, version='xstat 0.1')
log_level = logging.WARNING
if args['--verbose']:
log_level = logging.INFO
if args['--debug']:
log_level = logging.DEBUG
logging.basicConfig(level=log_level, format='%(levelname)s: %(message)s')
logging.debug('arguments:\n%s', args)
try:
main(args)
except KeyboardInterrupt:
sys.exit(0)