-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy paththread_wooyun.py
87 lines (79 loc) · 2.87 KB
/
thread_wooyun.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
import requests
from bs4 import BeautifulSoup
import lxml
import re
from time import sleep,ctime
import threading
class Wooyun:
def __init__(self):
self.url = 'http://wooyun.org/bugs/new_public/'
self.headers = {
"Host":"wooyun.org",
"User-Agent":"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:47.0) Gecko/20100101 Firefox/47.0",
"Accept":"text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Accept-Language": "en-US,en;q=0.5",
"Accept-Encoding":"gzip, deflate",
"Referer":"http://wooyun.org/",
"Connection":"keep-alive"
}
self.page = 1
def GetPageNum(self):
response = requests.get(self.url,headers = self.headers)
soup = BeautifulSoup(response.text,"lxml")
pagenum = re.compile("(\d+)").findall(soup.p.strings.next())
return int(pagenum[1])
def bug_info(self,tr):
try:
bug_title = tr.find("td").a.string.encode("UTF-8")
bug_url = 'http://wooyun.org' + tr.find("td").a.get("href").encode("UTF-8")
bug_author = tr.find_all_next("th")[2].a.get("title").encode("UTF-8")
bug_id = re.sub("/bugs/","",tr.find("td").a.get("href")).encode("UTF-8")
except AttributeError,e:
print "have an error : %s"%(e)
else:
return bug_id,bug_title,bug_url,bug_author
def buildheader(self,page):
headers = self.headers
if page == 1:
url = self.url
return url,headers
if page == 2:
url = self.url+"page/"+ str(page)
headers["Referer"] = "http://wooyun.org/bugs/new_public"
return url,headers
if page > 2:
url = self.url+"page/"+ str(page)
headers["Referer"] = "http://wooyun.org/bugs/new_public/page/"+str(page-1)
return url,headers
def GetBugList(self,url,headers):
print "get page bug"
sleep(2)
html = requests.get(url,headers)
soup = BeautifulSoup(html.text,"lxml")
result = []
try:
for tr in soup.tbody.find_all('tr'):
result.append(self.bug_info(tr))
except AttributeError,e:
print html.text
print "get end"
def main(self):
print 'starting at:',ctime()
threads = []
pagelist = range(1,5)
print self.GetPageNum()
for page in pagelist:
print self.buildheader(page)
t = threading.Thread(target = self.GetBugList,args=self.buildheader(page))
threads.append(t)
for i in range(len(pagelist) - 1):
print i
threads[i].start()
for i in range(len(pagelist) - 1):
print i
threads[i].join()
print 'end at:',ctime()
print len(pagelist)
if __name__ == "__main__":
wy = Wooyun()
wy.main()