-
Notifications
You must be signed in to change notification settings - Fork 0
/
gen_data.py
124 lines (112 loc) · 4.04 KB
/
gen_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
import collections
import os
import functools
import requests
import requests_cache
from lxml import html, etree
import yaml
#requests_cache.install_cache(allowable_codes=(200,301,404))
_parser = html.HTMLParser(encoding="utf-8")
def parseTime(time):
def _parseTime(time):
def __parseTime(t):
n, _, suf = t.partition(" ")
if not suf:
n, suf = t[:-2], t[-2:]
return int(n) + (12 if suf.lower() == "pm" else 0)
s, _, e = time.partition(" - ")
s, e = __parseTime(s), __parseTime(e)
e = e + 24 if e < s else e
return [v % 24 for v in range(s,e)], "{:d}{} to {:d}{}".format(s%12, "am" if s%24 <= 12 else "pm", e%12, "am" if e%24 <= 12 else "pm")
return functools.reduce((lambda a, b: [list(set(a[0])|set(b[0])), a[1]+" & "+b[1]]), [_parseTime(t.strip()) for t in time.split("&")])
def dlimg(fname, url):
if os.path.exists(fname):
return fname
if url:
r = requests.get(url)
if r.status_code == 200:
with open(fname, 'wb') as f:
f.write(r.content)
return fname
return False
def main():
def makeData():
return {
"time": {},
"price": {},
"months": {},
}
data = collections.defaultdict(makeData)
def add(typ, hemi, elems):
o = 1 if typ == "fish" else 0
for el in elems[1:]:
name = el[0][0].text.strip()
imgs = el[1].xpath('.//a/@href')
imageURL = imgs[0] if len(imgs) > 0 else ""
price = el[2].text.strip().replace(",","")
location = el[3].text.strip()
time = el[4+o][0].text.strip() if len(el[4+o]) > 0 else el[3+o][0][0].text.strip() # Diving beetle hack
months = []
for (i, mEl) in enumerate(el[5+o:]):
if "✓" in mEl.text:
months.append(i+1)
price = 0 if price == "?" or price == "-" else int(price)
times, time = (list(range(24)), "All Day") if time.lower() == "all day" or time == "?" else parseTime(time)
_id = name.lower().replace(" ", "")
data[_id]["type"] = typ
data[_id]["name"] = name
data[_id]["location"] = location
data[_id]["time"] = time.replace("-", "to")
data[_id]["times"] = times
data[_id]["price"][hemi] = price
data[_id]["months"][hemi] = months
data[_id]["image"] = dlimg('images/icons/{}.png'.format(_id), imageURL)
if typ == "fish":
data[_id]["size"] = el[4].text.strip()
def addShell(name, price, url=""):
_id = name.lower().replace(" ", "")
data[_id] = {
"type": "shell",
"name": name,
"time":"All Day",
"times": list(range(24)),
"months": {
"north": [1,2,3,4,5,6,7,8,9,10,11,12],
"south": [1,2,3,4,5,6,7,8,9,10,11,12],
},
"price": {
"north": price,
"south": price,
},
"image": dlimg('images/icons/{}.png'.format(_id), url)
}
r = requests.get("https://animalcrossing.fandom.com/wiki/Fish_(New_Horizons)")
if r.status_code != 200:
print(r)
return
doc = html.fromstring(r.content, parser=_parser)
add("fish", "north", doc.xpath('//*[@title="Northern Hemisphere"]//table[@class="roundy sortable"]//tr'))
add("fish", "south", doc.xpath('//*[@title="Southern Hemisphere"]//table[@class="roundy sortable"]//tr'))
r = requests.get("https://animalcrossing.fandom.com/wiki/Bugs_(New_Horizons)")
if r.status_code != 200:
print(r)
return
doc = html.fromstring(r.content, parser=_parser)
add("bug", "north", doc.xpath('//*[@title="Northern Hemisphere"]//table[@class="sortable"]//tr'))
add("bug", "south", doc.xpath('//*[@title="Southern Hemisphere"]//table[@class="sortable"]//tr'))
# Manually add shells
addShell("Conch", 700)
addShell("Coral", 250)
addShell("Cowries", 60)
addShell("Giant clam", 450)
addShell("Oyster shell", 450)
addShell("Pearl oyster", 1200)
addShell("Porceletta", 30)
addShell("Sand dollar", 120)
addShell("Sea snail", 180)
addShell("Scallop shell", 600)
addShell("Venus comb", 150)
addShell("White scallop", 450)
with open('_data/items.yml', 'w') as f:
yaml.dump(sorted(data.values(), key=lambda i: i["name"]), f, default_flow_style=None)
main()