-
Notifications
You must be signed in to change notification settings - Fork 0
/
update.py
74 lines (56 loc) · 2.6 KB
/
update.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import logging
import re
from google.appengine.ext import webapp
from google.appengine.api import urlfetch
from vendor.BeautifulSoup import BeautifulSoup
from models.post import Post
TASTY_URL = 'http://www.tasty-babelsberg.de'
class UpdateHandler(webapp.RequestHandler):
def get(self):
burger_string = self.fetch_burger_string()
if(burger_string != ''):
post = Post(content=burger_string)
post.put()
logging.info("UpdateHandler::get() - Created new post with id: %s",
post.key().id())
else:
logging.error('UpdateHandler::get() - fetch_burger_string()' +
'returned an empty string, no post created')
def fetch_burger_string(self):
"""Fetch the html from the TASTY_URL and scrape it to extract the
available burger for the current friday."""
# return value
burger_string = ''
try:
# fetch html from the given url
url_fetch_response = urlfetch.fetch(TASTY_URL)
if url_fetch_response.status_code == 200:
# parse html document
soup = BeautifulSoup(url_fetch_response.content)
# find all elments with the text 'burger' in it
burgers = soup.findAll(
text=re.compile('[^A-Za-z]burger', re.IGNORECASE))
# find an element which does not contain 'veggie'
burger_element = None
for item in burgers:
if re.search(r'veggie|neu', item, re.IGNORECASE) == None:
burger_element = item
# stop here if there is no valid element
if burger_element == None:
raise Exception('Could not find a burger element')
# find the parent 'p' element
parent_p_burger_element = burger_element.findParent(name='p')
if(parent_p_burger_element):
# get all text without the html tags and set the
# burger_string
burger_string = ''.join(
parent_p_burger_element.findAll(text=True))
else:
burger_string = burger_element
# TODO: look at the next p element if there is a second half
except Exception, e:
logging.error('UpdateHandler::fetch_burger_string() - ' + str(e))
except:
logging.error('UpdateHandler::fetch_burger_string() - ' +
'failed with an unknown exception')
return burger_string.replace('\n', ' ').strip()