Skip to content

Commit

Permalink
added new hooks
Browse files Browse the repository at this point in the history
  • Loading branch information
volkan committed Jan 1, 2016
1 parent 47f2ea5 commit 6304379
Show file tree
Hide file tree
Showing 8 changed files with 50 additions and 7 deletions.
Binary file added lib/urlwatch/__init__.pyc
Binary file not shown.
10 changes: 4 additions & 6 deletions lib/urlwatch/handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ def get_guid(self):
return sha.new(self.location).hexdigest()

def retrieve(self, timestamp=None, filter_func=None, headers=None,
log=None):
request_hook=None, log=None):
raise Exception('Not implemented')

class ShellError(Exception):
Expand Down Expand Up @@ -121,7 +121,7 @@ class UrlJob(JobBase):
CHARSET_RE = re.compile('text/(html|plain); charset=([^;]*)')

def retrieve(self, timestamp=None, filter_func=None, headers=None,
log=None):
request_hook=None, log=None):
headers = dict(headers)
if timestamp is not None:
timestamp = email.utils.formatdate(timestamp)
Expand All @@ -141,10 +141,8 @@ def retrieve(self, timestamp=None, filter_func=None, headers=None,
auth_token = urllib2.unquote(':'.join((parts.username, parts.password)))
headers['Authorization'] = 'Basic %s' % (auth_token.encode('base64').strip())

request = urllib2.Request(self.location, post_data, headers)
response = urllib2.urlopen(request)
headers = response.info()
content = response.read()
log.info('request_hook')
content = request_hook(self.location, post_data, headers)
encoding = 'utf-8'

# Handle HTTP compression
Expand Down
Binary file added lib/urlwatch/handler.pyc
Binary file not shown.
Binary file added lib/urlwatch/html2txt.pyc
Binary file not shown.
Binary file added lib/urlwatch/ical2txt.pyc
Binary file not shown.
Binary file added lib/urlwatch/mailer.pyc
Binary file not shown.
26 changes: 26 additions & 0 deletions share/urlwatch/examples/hooks.py.example
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,33 @@ import re
# Additional modules installed with urlwatch
from urlwatch import ical2txt
from urlwatch import html2txt
import urllib, urllib2, cookielib, json

def diff_after_hook(url) :
opener = urllib2.build_opener(urllib2.HTTPHandler)
request = urllib2.Request(url, data='')
request.get_method = lambda: 'PURGE'
url = opener.open(request)

def request_hook(url, data, headers):
cj = cookielib.CookieJar()
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
opener.addheaders = [('X-Requested-With', 'XMLHttpRequest'), ('Content-Type', 'application/json')]
resp = opener.open('http://website/user/login')
json_string = json.loads(resp.read())
csrf_token = json_string['csrfToken']
username = 'username'
password = 'password'
login_data = urllib.urlencode({'_csrf_token' : csrf_token, 'remember_me': 'on', '_username' : username, '_password' : password})
resp = opener.open('http://website/user/login_check', login_data)
login_string = resp.read()
h_list = []
for key, value in headers.iteritems():
h_list.append((key,value))
opener.addheaders = h_list
resp = opener.open(url, data)
content = resp.read()
return content

def filter(url, data):
if url == 'http://www.inso.tuwien.ac.at/lectures/usability/':
Expand Down
21 changes: 20 additions & 1 deletion urlwatch
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,13 @@ if __name__ == '__main__':
count = 0

filter_func = lambda x, y: y
def request_hook( url, data, header):
request = urllib2.Request(url, data, header)
response = urllib2.urlopen(request)
#headers = response.info()
content = response.read()
return content
def diff_after_hook( location ): pass

if os.path.exists(hooks_py):
log.info('using hooks.py from %s' % hooks_py)
Expand All @@ -274,6 +281,16 @@ if __name__ == '__main__':
filter_func = hooks.filter
else:
log.warning('hooks.py has no filter function - ignoring')
if hasattr(hooks, 'request_hook'):
log.info('found and enabled request_hook function from hooks.py')
request_hook = hooks.request_hook
else:
log.warning('hooks.py has no request_hook function - ignoring')
if hasattr(hooks, 'diff_after_hook'):
log.info('found and enabled diff_after_hook function from hooks.py')
diff_after_hook = hooks.diff_after_hook
else:
log.warning('hooks.py has no diff_after_hook function - ignoring')
else:
log.info('not using hooks.py (file not found)')

Expand All @@ -285,7 +302,7 @@ if __name__ == '__main__':
if os.path.exists(filename):
timestamp = os.stat(filename)[stat.ST_MTIME]

data = job.retrieve(timestamp, filter_func, headers, log)
data = job.retrieve(timestamp, filter_func, headers, request_hook, log)
return filename, timestamp, data

jobs = handler.parse_urls_txt(urls_txt)
Expand Down Expand Up @@ -327,6 +344,8 @@ if __name__ == '__main__':
timestamp_old, \
timestamp_new))
if len(diff) > 0:
log.info('diff_after_hook')
diff_after_hook(job.location)
log.info('%s has changed - adding diff' % job)
details += foutput('changed', job, diff, summary)
else:
Expand Down

0 comments on commit 6304379

Please sign in to comment.