-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfetch_noslang.py
executable file
·64 lines (54 loc) · 1.77 KB
/
fetch_noslang.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
from __future__ import print_function
import string
import sys
import requests
from lxml import html
from marisa_trie import BytesTrie
def iter_noslang():
fucking_shit = {
"a**": "ass",
"b*****d": "bastard",
"b***h": "bitch",
"c**k": "cock",
"c**t": "cunt",
"c**": "cum",
"d**k": "dick",
"d**n": "damn",
"d***o": "dildo",
"d****e": "douche",
"f**": "fag",
"f**k": "fuck",
"f*ck": "fuck",
"h**e": "hole",
"n****r": "nigger",
"n***a": "nigga",
"p***y": "pussy",
"s**t": "shit",
"w***e": "whore",
}
for resource in ["dictionary", "rejects"]:
for ch in "1" + string.lowercase: # '1' for #
url = "http://www.noslang.com/{}/{}".format(resource, ch)
print("Processing " + url)
r = requests.get(url)
if not r.ok:
print("Skipping {} (status code {})".format(ch, r.status_code),
file=sys.stderr)
page = html.fromstring(r.text)
for abbr in page.cssselect("abbr"):
a = abbr.getprevious()
definition = abbr.attrib["title"].lower()
if definition in fucking_shit:
definition = fucking_shit[definition]
else:
for stars, replacement in fucking_shit.iteritems():
definition = definition.replace(stars, replacement)
yield a.attrib["name"].decode("utf-8"), definition
if __name__ == "__main__":
try:
[path] = sys.argv[1:]
except ValueError:
print("Usage: [prog] path/to/trie", file=sys.stderr)
sys.exit(1)
abbr = BytesTrie(iter_noslang())
abbr.save(path)