-
Notifications
You must be signed in to change notification settings - Fork 0
/
wiki.py
28 lines (22 loc) · 840 Bytes
/
wiki.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
import urllib2
from processor import Processor, DbgProcessor
import logging
base='http://en.wikipedia.org/wiki/%s'
urls=[base % year for year in range(1950,2000)]
class Checker(object):
def __init__(self):
self.opener = urllib2.build_opener()
self.opener.addheaders = [('User-agent', 'Mozilla/5.0')]
def check(self,url,text):
infile = self.opener.open(url)
data=infile.read()
return url.split("/")[-1], text.lower() in data.lower()
N=5
p=Processor(Checker(),'check',N,logfile='/tmp/wiki.log',loglevel=logging.DEBUG)
search_string='marilyn monroe'
for url in urls: p.add(url,search_string)
p.start()
while not p.done(): time.sleep(1)
results=p.get_results()[0]
results.sort()
print "Years in which the string '%s' appears: %s" % (search_string, [int(r[0]) for r in results if r[1]])