-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrssr-pull
executable file
·44 lines (34 loc) · 1.84 KB
/
rssr-pull
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
#! /usr/bin/python3
import requests
import rssr
import feedparser
import argparse
from os import chdir, environ, listdir
from time import strftime
from sys import exc_info
parser = argparse.ArgumentParser()
parser.add_argument("-f", "--feeds", dest="feeds", metavar="FEEDS", help="file with individual urls", default=environ["HOME"] + "/.rssr/feeds")
parser.add_argument("-q", "--quiet", dest="verbose", help="run in quiet mode", action="store_false", default=True)
parser.add_argument("directory", nargs="?", metavar="DIR", help="directory to wich articles will be downloaded", default=environ["HOME"] + "/.rssr/articles/")
args = parser.parse_args()
chdir(args.directory)
for url in open(args.feeds, "r").readlines():
if args.verbose: print(url.replace("\n", ""))
feed = feedparser.parse(url)
for entry in feed["entries"]:
try: published = strftime("%F %H:%M", entry["published_parsed"])
except KeyError: published = strftime("%F %H:%M", entry["updated_parsed"])
filename = "{} {}: {}".format(published, feed["feed"]["title"], entry["title"]).replace("/", "").replace("?", "")
if filename not in listdir():
try:
artr = requests.get(entry["link"])
if artr.ok:
if args.verbose: print(filename)
html = rssr.html2content(artr.content).decode(artr.encoding or "utf-8")
beg = html.find("<h1")
if beg < 0: beg = 0
open(filename, "w").write("<html>\n<meta charset={}>\n{}\n</html>".format(artr.encoding or "utf-8", html[beg:]))
else:
if args.verbose: print("ERROR", artr.status_code, artr.url)
except ConnectionRefusedErorr:
if args.verbose: print("ERROR", entry["link"])