-
Notifications
You must be signed in to change notification settings - Fork 0
/
uk.py
58 lines (51 loc) · 1.89 KB
/
uk.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
import pandas as pd
import yaml
events = []
timeparse = lambda x: pd.datetime.strptime(x, "%Y %A %d %B %H:%M")
with open("cache/uk.html", 'r') as f:
html = f.read().replace("<br>", '####').replace("<br />", "####")
table = pd.read_html(html)[0]
table.columns = table.loc[0]
table = table.drop(0)
def remove_topic(time):
# e.g. 'Thursday 13th December – Energy Transitions'
if "December" in time:
return time.split(" – ")[0]
else:
return time
table.Time = table.Time.apply(remove_topic)
for item in table.iterrows():
if "December" in item[1].Time:
day = item[1].Time.strip()
if ":" in item[1].Time:
(start, end) = item[1].Time.split("-")
start = start.strip()
end = end.strip()
print(item[1].Time)
start = timeparse(f"2018 {day} {start}".replace("\xa0", " ").replace("th ", " ").replace("rd ", " "))
if end.lower() == "close":
end = None
else:
end = timeparse(f"2018 {day} {end}".replace("\xa0", " ").replace("th ", " ").replace("rd ", " "))
try:
(title, description) = item[1].Event.split("####", maxsplit=1)
title = title.strip()
description = description.strip().replace("####", "<br>")
except ValueError:
title = item[1].Event
description = None
if pd.isnull(item[1].Organisation):
organiser = None
else:
organiser = item[1].Organisation.replace("####", '<br>')
events.append({
"start": start,
"end": end,
"title": title,
"description": description,
"organiser": organiser,
"location": "UK Pavilion",
"source": "https://www.events.great.gov.uk/ehome/index.php?eventid=200184147",
})
print(events[-1])
yaml.dump(events, open("uk.yaml", "w"), default_flow_style=False)