-
Notifications
You must be signed in to change notification settings - Fork 3
/
person.py
31 lines (27 loc) · 865 Bytes
/
person.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
#coding=utf-8
import re, urllib2, socket
from BeautifulSoup import BeautifulSoup
def parse(id_):
tried_times = 0
url = 'http://www.npc.gov.cn/delegate/viewDelegate.action?dbid=%s'%id_
while tried_times < 5:
try:
html = urllib2.urlopen(url, timeout=3).read()
break
except urllib2.URLError:
tried_times += 1
except socket.timeout:
tried_times += 1
if tried_times >= 5:
return ['error']
b = BeautifulSoup(html)
tb = b.find('table', attrs={'class':'table_text'})
trs = tb.findAll('tr')
r = []
for tr in trs:
data = [td.string for td in tr.findAll('td', attrs={'class':'bg2'})]
r.extend(data)
zw = trs[-1].find('td', attrs={'class':'bg2_1'}).string.strip()
if not zw: zw = None
r.append(zw)
return r