-
Notifications
You must be signed in to change notification settings - Fork 0
/
menu_scrapper.py
38 lines (30 loc) · 1.22 KB
/
menu_scrapper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
from bs4 import BeautifulSoup
from urllib.request import urlopen
weekdays = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday']
ist_menu_url = 'http://screens.app.ist.ac.at/menu_weekly'
""" Remove a line 'line' from a string 'string'. """
def RemoveGivenLine(string,line):
lines = string.splitlines()
lines.pop(line)
string_joined_again = "\n".join(lines)
return string_joined_again
def GetMenu(menu_url):
page = urlopen(menu_url)
soup = BeautifulSoup(page, 'html.parser')
table = soup.find( "table")
rows = table.findAll("tr")
texts = [row.get_text().splitlines() for row in rows]
texts.pop(0) # Remove 1st row (table header).
# Lines corresponding to food items in each day.
meaningful_lines = [2, 6, 10, 16, 20, 24, 30]
days = [[text[i].replace("\t","")
for i in meaningful_lines]
for text in texts]
day_strings = ["\n\n".join(day) for day in days]
# Remove the (whitespace b/w soups) for a nicer look.
day_strings = [RemoveGivenLine(day_string, 3) for day_string in day_strings]
return day_strings
day_menus = GetMenu(ist_menu_url)
for i in range(len(weekdays)):
with open(weekdays[i]+".txt", "w+") as f:
f.write(day_menus[i])