-
Notifications
You must be signed in to change notification settings - Fork 0
/
dstocc.py
executable file
·314 lines (213 loc) · 9.13 KB
/
dstocc.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
#!/usr/bin/env python
# vim: tabstop=8 expandtab shiftwidth=4 softtabstop=4
'''dstocc - Loads dirsearch JSON output and captures result with CutyCapt'''
developers = ['Joel Rangsmo <joel@rangsmo.se>']
description = __doc__
version = '0.1'
license = 'GPLv2'
try:
import json
import time
import string
import argparse
import subprocess
import logging as log
from Queue import Queue
from random import shuffle
from threading import Thread
# PyInstaller requires explicit import of exit
from sys import exit
except ImportError as missing:
print(
'UNKNOWN - Could not import all required modules: "%s".\n' % missing +
'The script requires Python 2.7 or 2.6 with the "argparse" module\n'
'Installation with PIP: "pip install argparse"')
exit(3)
# -----------------------------------------------------------------------------
def split_status_codes(status_code_string):
'''Split a comma separated string with status codes into a list'''
status_codes = status_code_string.split(',')
try:
status_codes = map(int, status_codes)
except:
raise argparse.ArgumentTypeError('Status codes must to be integers')
return status_codes
def parse_args():
'''Parses commandline arguments provided by the user'''
parser = argparse.ArgumentParser(
description=description,
epilog=(
'Developed by %s - licensed under %s!'
% (', '.join(developers), license)))
parser.add_argument(
'-f', '--file', dest='results_file',
help='Path to dirsearch JSON output file',
metavar='/path/to/results.json', type=argparse.FileType('r'),
required=True)
parser.add_argument(
'-i', '--include', dest='included_codes',
help='HTTP status codes to include in CutyCapt (default: All)',
metavar='"200,500"', type=split_status_codes, default=[])
parser.add_argument(
'-e', '--exclude', dest='excluded_codes',
help='HTTP status codes to exclude in CutyCapt (default: None)',
metavar='"403,404"', type=split_status_codes, default=[])
parser.add_argument(
'-c', '--command-template',
help='Template for CutyCapt shell command (default: %(default)s)',
metavar='CMD', type=str,
default=(
'CutyCapt --url=%URL% --out=%FILENAME% '
'--min-width=1024 --min-height=768'))
parser.add_argument(
'-t', '--timeout',
help='Timeout in seconds for CutyCapt execution (default: 20)',
metavar='SECONDS', type=int, default=20)
parser.add_argument(
'-T', '--threads', dest='worker_threads',
help='Number of threads for CutyCapt workers (default: 4)',
metavar='INT', type=int, default=4)
parser.add_argument(
'-V', '--verbose', dest='log_verbose',
help='Enable verbose application logging',
action='store_true', default=False)
parser.add_argument(
'-v', '--version',
help='Display script version',
action='version', version=version)
return parser.parse_args()
# -----------------------------------------------------------------------------
def load_target_urls(results_file, included_codes, excluded_codes):
'''Loads JSON results file and filters out URLs that should be CutyCaped'''
log.debug('Loading resuls from JSON file')
# Strips leading NULL characters and loads JSON from the results file
results_string = results_file.read()
results_string = results_string[results_string.index('{'):]
results = json.loads(results_string)
log.debug('dirsearch result data: "%s"' % str(results))
# Filters the sub-paths that should be captured
target_urls = []
for url in results.iterkeys():
log.debug('Checking sub-paths for URL "%s"' % url)
sub_paths = results[url]
for sub_path in sub_paths:
log.debug('Checking sub-path "%s" for URL "%s"' % (sub_path, url))
status_code = sub_path['status']
path = sub_path['path']
if status_code in excluded_codes:
log.debug('Status code %s should be excluded' % status_code)
continue
elif not included_codes or status_code in included_codes:
log.debug('Including status code %s' % status_code)
target_urls.append(url + path)
# Shuffels the target list array to spread out the capturing load
shuffle(target_urls)
log.debug('Target URLs for capturing: "%s"' % target_urls)
return target_urls
# -----------------------------------------------------------------------------
def cutycapt_exec(url, command_template, timeout):
'''Executes the CutyCapt application with subprocess to target URL'''
log.info('Capturing URL "%s"...' % url)
# Creates a "safe" filename for output image
safe_chars = string.letters + string.digits + '-_.='
file_name = url.replace('/', '_')
file_name = file_name.replace(':', '-')
file_name = filter(lambda char: char in safe_chars, file_name)
file_name += '.png'
log.debug('Generated filename for URL "%s": "%s"' % (url, file_name))
# -------------------------------------------------------------------------
log.debug('Building command string from template "%s"' % command_template)
command = command_template.replace('%URL%', url)
command = command.replace('%FILENAME%', file_name)
log.debug('Executing shell command "%s"' % command)
shell_exec = subprocess.Popen(
command.split(' '),
stdout=subprocess.PIPE, stderr=subprocess.PIPE)
# Poll the execution status of command every 10 milliseconds
attempts = timeout * 10
while shell_exec.poll() is None and attempts:
attempts -= 1
time.sleep(0.1)
if not attempts:
log.error(
'Failed to capture "%s": Execution timed out after %i seconds'
% (url, timeout))
log.debug('Terminating process "%i"' % shell_exec.pid)
shell_exec.terminate()
return
output = shell_exec.communicate()
exit_code = shell_exec.returncode
log.debug(
'Execution status for URL "%s" - output: "%s", exit code "%i"'
% (url, output, exit_code))
if exit_code != 0:
log.error(
'Failed to execute CutyCapt for URL "%s": "%s"' % (url, output))
return
# -------------------------------------------------------------------------
log.info('Saving capture of URL "%s" to "%s"' % (url, file_name))
return
def cutycapt_worker(worker_id, queue, command_template, timeout):
'''Loads URLs from queue and runs the CutyCapt execution function'''
log.debug('Starting CutyCap worker %i' % worker_id)
while True:
log.debug('Loading new URL from queue in worker %i' % worker_id)
url = queue.get()
# Captures the URL with CutyCapt
cutycapt_exec(url, command_template, timeout)
queue.task_done()
# -----------------------------------------------------------------------------
def main():
'''Main application function'''
# Parses commandline arguments
args = parse_args()
# Sets up application logging
if args.log_verbose:
log.basicConfig(level=log.DEBUG, format='%(levelname)s: %(message)s')
else:
log.basicConfig(level=log.INFO, format='%(levelname)s: %(message)s')
log.debug('Script has been started with arguments: "%s"' % str(args))
# Loads results file and extracts a list of URL to CutyCapt
try:
urls = load_target_urls(
args.results_file, args.included_codes, args.excluded_codes)
except Exception as error_msg:
log.error('Failed to load result file: "%s"' % error_msg)
exit(1)
if len(urls):
log.info('Capturing %i URLs with CutyCapt' % len(urls))
else:
log.error('No URLs in the result file matched filtering requirements')
exit(1)
# -------------------------------------------------------------------------
log.debug('Populating queue with URLs')
queue = Queue()
for url in urls:
queue.put(url)
log.debug('Starting %i CutyCapt worker threads' % args.worker_threads)
for worker_id in range(args.worker_threads):
worker = Thread(
target=cutycapt_worker,
args=(worker_id, queue, args.command_template, args.timeout))
worker.setDaemon(True)
worker.start()
# -------------------------------------------------------------------------
while not queue.empty():
log.info('URLs still in queue for capturing: %i' % queue.qsize())
time.sleep(3)
log.info('Waiting for all URLs to be handled by capturing workers')
queue.join()
log.info('Finished capturing %i URLs!' % len(urls))
exit(0)
if __name__ == '__main__':
# Protects the script output from unhandled exceptions
try:
main()
except SystemExit as exit_code:
exit(int(str(exit_code)))
except KeyboardInterrupt:
print('\ndstocc was interrupted by keyboard - exiting!')
exit(3)
except Exception as error_msg:
print('Script generated unhandled exception: "%s"' % error_msg)
exit(1)