-
Notifications
You must be signed in to change notification settings - Fork 8
/
updater.py
94 lines (78 loc) · 2.76 KB
/
updater.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
#!/usr/bin/python
"""
craigsuck, a Craigslist RSS poller.
Copyright (c) 2011. Jake Brukhman <jbrukh@gmail.com>. See LICENSE.
"""
import craigslist
import optparse
import time
import sys
from string import Template
class LookupQueue(object):
"""
A bounded queue backed by a set for fast
membership lookup and which does not accept
duplicate elements.
"""
def __init__(self, size):
self.s = set()
self.q = []
self.size = size
def push(self, *items):
for item in items:
if item not in self.s:
if len(self.q) == self.size:
self.pop()
self.s.add(item)
self.q.append(item)
return item
def pop(self):
item = self.q.pop(0)
self.s.remove(item)
return item
def __contains__(self, item):
return item in self.s
def __len__(self):
return len(self.q)
def __str__(self):
return self.q.__str__()
def __repr__(self):
return self.q.__str__()
def main(query, opts):
"""
Indefinitely cycles through the queries provided to the program,
and extracts the new apartment information.
"""
queue = LookupQueue(opts.memory)
while True:
listings = craigslist.fetch_with_pages_back(query, pages=opts.pages)
new_listings = [l for l in listings if queue.push(l['link'])]
for listing in new_listings:
print Template(opts.format).safe_substitute(listing)
process_new(new_listings)
time.sleep(opts.sleep)
def process_new(listings):
pass
if __name__ == '__main__':
USAGE = '%prog [options] <url>'
parser = optparse.OptionParser(usage=USAGE)
parser.add_option('-m', '--memory', dest='memory', type='int', default=1000,
help='number of historical items against which to test for uniqueness (set high)')
parser.add_option('-s', '--sleep', dest='sleep', type='int', default=30,
help='polling period, in seconds')
parser.add_option('-f', '--format', dest='format', default='${date}\t${title}', type='string',
help="output format, using Python formatting; available fields are ['date', 'title', 'link'] and \
the default format is '${date}\\t${title}'")
parser.add_option('-p', '--pages', dest='pages', default=1, type='int',
help="the number of pages back from this url, if possible, up to 10")
opts, args = parser.parse_args()
if len(args)>1 or len(args)<0:
print "Please provide exactly one url."
sys.exit(1)
if opts.pages<0 or opts.pages>10:
print "Ten pages back maximum."
sys.exit(1)
try:
main(args[0], opts)
except KeyboardInterrupt:
print "Goodbye!"