-
Notifications
You must be signed in to change notification settings - Fork 65
/
urlattr.py
354 lines (290 loc) · 12.5 KB
/
urlattr.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
#!/usr/bin/python
'''
This is a helper class within jsunpack-n
There is no command line usage for this class
'''
from hashlib import sha1
import re
import os
def convert(hex):
return chr(int('0x' + hex, 0))
def cleanChars(str, replaceWith=''):
'''
Input string is stripped of binary characters
\\x0a is preserved as newline
\\x20 space and ascii chars
'''
return re.sub('[\x00-\x09\x0b-\x19\x7f-\xff]', replaceWith, str)
def canonicalize(input):
#input is a URL
#output is a standardized version of the URL
if os.path.exists(input):
return input
output = re.sub('^[https]+:?//', '', input)
if output.find('/') == -1:
output += '/'
output = re.sub('%([a-fA-F0-9]{2})', lambda mo: convert(mo.group(1)), output)
return output
class urlattr:
DONT_ANALYZE = -2
NOT_ANALYZED = -1
ANALYZED = 0
verbose = False
def __init__(self, inrooturl, url, tcpaddr=[['0.0.0.0', 0], ['0.0.0.0', 0]]):
if self.verbose and url in inrooturl:
print 'Warning: resetting urlattr %s without checking it (may cause loss of data)' % (url)
self.url = url
self.tcpaddr = tcpaddr
self.children = []
self.method = '' #examples: GET, POST, etc
self.type = '' #examples: iframe, shellcode, etc.
self.filetype = '' #example: PDF, MZ
self.hasParent = False #when set to true, it destroys default link
self.rooturl = inrooturl
self.files = [] #decodings, streams, other types of files
self.status = ''
self.msg = []
self.malicious = urlattr.NOT_ANALYZED
self.cumulative_malicious = urlattr.NOT_ANALYZED
self.seen = {}
self.showall = False
self.graphall = False
#if you add a new field, verify mergeEntries accounts for it
self.mergeEntries()
def getIP(self):
'''returns the ip address of the server'''
try:
(srcip, srcport), (dstip, dstport) = self.tcpaddr
return dstip
except:
return '0.0.0.0'
def setTcpMethod(self, url, tcpaddr, method):
url = canonicalize(url)
if not url in self.rooturl:
self.rooturl[url] = urlattr(self.rooturl, url)
self.rooturl[url].method = method
self.rooturl[url].tcpaddr = tcpaddr
self.setChild(url, 'default')
def mergeEntries(self):
#use self.rooturl data to populate
if self.url in self.rooturl:
if not self.method:
self.method = self.rooturl[self.url].method
if not self.type:
self.type = self.rooturl[self.url].type
else:
if self.type == 'default' and self.rooturl[self.url].type:
self.type = self.rooturl[self.url].type
if self.rooturl[self.url].hasParent:
self.hasParent = True
for tuple in self.rooturl[self.url].children:
if not tuple in self.children:
self.children.append(tuple)
if self.rooturl[self.url].malicious > self.malicious:
self.malicious = self.rooturl[self.url].malicious
for m in self.rooturl[self.url].msg:
if not m in self.msg:
self.msg.append(m)
def log(self, printable, severity, msg):
if not [printable, severity, msg] in self.msg:
self.msg.append([printable, severity, msg])
def setMalicious(self, new):
self.malicious = max(self.malicious, new)
if self.url in self.rooturl:
self.rooturl[self.url].malicious = max(self.rooturl[self.url].malicious, new)
def getChildUrls(self, start, returls=[]):
#recursive! append to returls parameter
returls.append(start)
if start in self.rooturl:
for t, u in self.rooturl[start].children:
if not u in returls:
returls = self.getChildUrls(u, returls)
return returls
def setChild(self, childurl, type):
'''
add childurl as a child of self.url
if childurl already has a type (default, shellcode, jsvar, redirect)
and it already exists as a child, we'll keep the value previously set
'''
if len(childurl) <= 4:
#make sure length is > 4 (ie, a valid URL)
return
childurl = canonicalize(childurl)
if self.url == childurl:
return # linking to itself is stupid
if not childurl in self.rooturl:
self.rooturl[childurl] = urlattr(self.rooturl, childurl)
self.rooturl[childurl].type = type
#preserve method,hasParent,type
#child_urlattr.mergeEntries()
if not type == 'default':
if self.rooturl[childurl].type == 'refer': #prefer other types over refer
self.rooturl[childurl].type = type
elif type == 'refer' and self.rooturl[childurl].type != 'default': #prefer other types over refer
type = self.rooturl[childurl].type
#this logic determines whether childurl can safely be removed from the root
#setting the hasParent flag to True will disconnect it
if len(self.rooturl[childurl].children) <= 0:
#require that the node has no existing children
#to prevent it from being disconnected from the tree
self.rooturl[childurl].hasParent = True
self.rooturl[childurl].type = type
elif not self.url in self.getChildUrls(childurl):
#looks through self.rooturl[childurl].children, if you find self.url don't destroy the childurl type
#doing so is bad because it would disconnect the tree
self.rooturl[childurl].hasParent = True
self.rooturl[childurl].type = type
#else:
# print 'setChild: ignored %s (whose parent should be %s) because it would disconnect the tree' % (childurl,self.url)
if not self.child_exists(childurl):
self.rooturl[self.url].children.append([type, childurl])
def child_exists(self, lookup):
'''lookup is a url'''
for t, u in self.rooturl[self.url].children:
if u == lookup:
return True
return False
def file_exists(self, lookup):
'''lookup is a sha1hash'''
for type, hash, data in self.files:
if lookup == hash:
return True
return False
def create_sha1file(self, outdir, data, type='sha1'):
'''
outdir is the directory prefix
'''
if len(data) <= 0:
return ''
shash = sha1(data).hexdigest()
sha1file = 'undefined'
sha1file = '%s/%s_%s' % (outdir, type, shash)
if outdir: #no output directory means don't output anything
if not os.path.isdir(outdir):
os.mkdir(outdir)
if os.path.isdir(outdir):
ffile = open(sha1file, 'wb')
ffile.write(data)
ffile.close()
#self.files.append([type,shash,data])
if not self.file_exists(shash):
self.files.append([type, shash, data])
return sha1file
def tostring(self, prefix='', recursive=True, parentMalicious=0, path=[]):
cumulative_malicious = self.malicious
#if recursive and self.url in self.seen:
# #prevent re-analysis
# return self.seen[self.url]
childtxt = ''
if recursive:
child_ignored = 0
for type, child in self.children:
if self.rooturl[child].hasParent and type == 'default':
child_ignored += 1
elif child in path:
#referencing itself can't be good!
child_ignored += 1
else:
path.append(child)
tmptxt, tmpmal = self.rooturl[child].tostring('\t' + prefix, recursive, max(self.malicious, parentMalicious, path))
#childtxt += '\t%s child[%s] using parent[%s]' % (prefix,child.url,self.url)
childtxt += tmptxt
cumulative_malicious = max(cumulative_malicious, tmpmal)
intro = ''
if max(cumulative_malicious, self.malicious, parentMalicious) <= 0 and urlattr.verbose == False:
return '', cumulative_malicious
if self.type and (self.type == 'img' or self.type == 'input' or self.type == 'link'):
#don't expect these to be interesting
return '', cumulative_malicious
if self.filetype:
intro += '[' + self.filetype + '] '
if self.method:
intro += self.method + ' '
if self.type:
intro += '(' + self.type + ') '
ip = self.getIP()
if ip == '0.0.0.0':
ip = ''
else:
ip = '(ipaddr:%s) ' % (ip)
if self.malicious > 5:
intro = '[malicious:%d] %s' % (self.malicious, ip) + intro
elif self.malicious > 0:
intro = '[suspicious:%d] %s' % (self.malicious, ip) + intro
else:
extra = ''
if cumulative_malicious > self.malicious:
self.cumulative_malicious = cumulative_malicious
if cumulative_malicious > 5:
extra = ';children=malicious:%d' % (cumulative_malicious)
elif cumulative_malicious > 0:
extra = ';children=suspicious:%d' % (cumulative_malicious)
if self.malicious == 0:
intro = '[nothing detected%s] ' % (extra) + intro
else:
intro = '[not analyzed%s] ' % (extra) + intro
intro += self.url
txt = prefix + '%s\n' % (intro)
prefix = '\t' + prefix
#if self.tcpaddr:
# txt += prefix + 'requested by %s\n' % (self.tcpaddr[0][0])
if self.status:
txt += prefix + 'status: %s\n' % (re.sub('[\t\n]', '', self.status))
for printable, impact, msg in self.msg:
msg = re.sub('\n', '\n' + prefix, msg)
if printable:
type = ''
if impact > 5:
type = 'malicious'
elif impact > 0:
type = 'suspicious'
elif impact == 0:
type = 'info'
elif impact < 0:
type = 'error'
txt += prefix + '%s: %s\n' % (type, msg)
for type, hash, data in self.files:
txt += prefix + 'file: %s_%s: %d bytes\n' % (type, hash, len(data))
txt += childtxt
self.seen[self.url] = [txt, cumulative_malicious]
return txt, cumulative_malicious
def graph(self, outfile):
remaining = 60
try:
import yapgvb
g = yapgvb.Digraph('Analysis of ' + self.url)
except:
print 'Unable to import yapgvb, please install python library'
if os.path.exists(outfile):
os.remove(outfile)
for url in self.rooturl:
urlstr = url
if self.rooturl[url].malicious > 5:
color = yapgvb.colors.red
urlstr += '\nmalicious'
elif self.rooturl[url].malicious > 0:
color = yapgvb.colors.orange
urlstr += '\nsuspicious'
else:
color = 'white'
if max(self.rooturl[url].malicious, self.rooturl[url].cumulative_malicious) > 0 or self.graphall:
remaining -= 1
node = g.add_node(url)
node.label = urlstr
node.color = color
node.shape = yapgvb.shapes.box
for type, child in self.rooturl[url].children:
if self.rooturl[child].hasParent and type == 'default':
pass
elif max(self.rooturl[url].malicious, self.rooturl[child].cumulative_malicious, self.rooturl[child].malicious) > 0 or self.graphall:
cnode = g.add_node(child)
cnode.shape = yapgvb.shapes.box
cnode.label = child
edge = g.add_edge(node, cnode)
if not type == 'default':
edge.label = type
if remaining > 0:
g.layout(yapgvb.engines.dot)
g.render(outfile)
else:
print 'Not graphing "%s" because rooturl used (%d) more nodes than the maximum (60)' % (outfile, -remaining)