-
Notifications
You must be signed in to change notification settings - Fork 0
/
pyfeed.py
executable file
·186 lines (144 loc) · 4.18 KB
/
pyfeed.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
import os # for storing site data
import argparse # for parsing arguments
import requests # for making HTTP requests
from bs4 import BeautifulSoup # parsing html
import validators # for validating urls
from slugify import slugify # processing urls
def build_parser():
"""
Builds an ArgumentParser with the specified parameters.
Args:
None
Returns:
argparse.ArgumentParser
"""
parser = argparse.ArgumentParser(
description="Displays sites that have been updated")
parser.add_argument("-a", metavar="url", help="Add new site to pyfeed")
if not os.path.exists(".sitedata"):
os.makedirs(".sitedata")
return parser
def add_new_site(url):
"""
Adds a new site to watch for updates
Args:
url (str): the website to be added
Returns:
None
"""
if validators.url(url):
filename = ".sitedata/" + slugify(url) + ".txt"
if os.path.exists(filename):
print("Site already exists!")
else:
f = open(filename, "w")
f.write(url)
print("Site added!")
else:
print("Please enter a valid URL")
def extract_url(filename):
"""
Extract URL from file
Args:
filename (str): the file to get the URL from
Returns:
url (str): the corresponding url
"""
with open(filename) as f:
url = f.readline().strip()
return url
def load_site_data(filename):
"""
Fetches site data from file
Args:
filename (str): the file to be referenced
Returns:
html (str): the text of the website
"""
html = ""
with open(filename) as f:
f.readline() # Skip first line
for line in f:
html += line
return html
def fetch_site_data(filename):
"""
Fetches new site data from file using beautifulsoup
Args:
filename (str): the file to be referenced
Returns:
html (str): the text of the website
"""
headers = {
"Access-Control-Allow-Origin": "*",
"Access-Control-Allow-Methods": "GET",
"Access-Control-Allow-Headers": "Content-Type",
"Access-Control-Max-Age": "3600",
"User-Agent": "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0"
}
req = requests.get(extract_url(filename))
soup = BeautifulSoup(req.text, "html.parser")
return soup.prettify()
def update_site_data(filename, new_data):
"""
Updates site data with new html
Args:
filename (str): the file to be updated
new_data (str): the updated html data
Returns:
None
"""
url = extract_url(filename)
with open(filename, "w") as f:
f.write(url + "\n")
f.write(new_data)
def compare_site_data(filename):
"""
Compares site data
Args:
filename (str): the file to check against updates
Returns:
did_update (boolean): whether the site has been updated or not
"""
existing_data = load_site_data(filename)
new_data = fetch_site_data(filename)
update_site_data(filename, new_data)
return existing_data != new_data
def print_updated_sites():
"""
Prints all updated sites
Args:
None
Returns:
None
"""
directory = os.fsencode(".sitedata")
updated_sites = set()
# Iterate through files in .sitedata
for file in os.listdir(directory):
filename = os.fsdecode(file)
if filename.endswith(".txt"):
path = ".sitedata/" + filename
if compare_site_data(path):
updated_sites.add(extract_url(path))
# Output relevant info
if len(updated_sites) > 0:
for site in updated_sites:
print(site)
else:
print("No sites have been updated.")
def main():
"""
Builds an ArgumentParser object by calling build_parser(),
adds a new site if specified by the user,
and then prints the updated sites using print_updated_sites().
"""
parser = build_parser()
# Parse command line arguments
new_site = parser.parse_args().a
if new_site:
add_new_site(new_site)
else:
print_updated_sites()
if __name__ == '__main__':
main()