-
Notifications
You must be signed in to change notification settings - Fork 32
Borodin Ilya #43
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Borodin Ilya #43
Changes from all commits
762b321
0222fce
a432523
00b6bf7
772c22b
53c76d2
7df5749
3971278
39fddde
2f3b674
40fb834
69fadd6
b6867ec
94d057d
5969654
33a66cc
a6ee847
5f126bb
ce0c8e1
deb03ba
0cdc00d
ac95113
4929234
0b13159
31d600f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
import argparse | ||
import sys | ||
import App | ||
import logging | ||
from App.Errors import FatalError | ||
from App.Colors import Colors | ||
|
||
|
||
def parsing_args(): | ||
"""Parsing arguments""" | ||
parser = argparse.ArgumentParser(description='Pure Python command-line RSS reader.') | ||
parser.add_argument('source', type=str, help='RSS URL') | ||
parser.add_argument('--version', action="store_true", help='Print version info') | ||
parser.add_argument('--json', action="store_true", help='Print result as JSON in stdout') | ||
parser.add_argument('--verbose', action="store_true", help='Outputs verbose status messages') | ||
parser.add_argument('--limit', type=int, help='Limit news topics if this parameter provided') | ||
parser.add_argument('--date', type=str, help='Date for which you want to display news (format %y%m%d)') | ||
parser.add_argument('--to_html', type=str, help='Convert data to html to your path') | ||
parser.add_argument('--to_pdf', type=str, help='Convert data to pdf to your path') | ||
parser.add_argument('--colorize', action="store_true", help='colorful output') | ||
return parser.parse_args() | ||
|
||
|
||
def start_settings(args): | ||
"""Check the arguments and act according to their scenario""" | ||
if args.version: | ||
print("*" * 50 + "\n" + "Version: " + App.__version__ + "\n" + "*" * 50 + "\n" * 2) | ||
if args.verbose: | ||
logging.basicConfig(stream=sys.stdout, level=logging.DEBUG) | ||
else: | ||
logging.basicConfig(level=logging.CRITICAL) | ||
if args.limit is not None and args.limit < 0: | ||
raise FatalError("Limit cannot be less than 0") | ||
if args.date is not None and (len(args.date) != 8 or args.date.isdigit() is False): | ||
raise FatalError("Invalid date format") | ||
if not args.colorize: | ||
for color in Colors: | ||
Colors[color] = "white" |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
"""Dictionary stores output colors""" | ||
Colors = { | ||
"error": "red", | ||
"article": "blue", | ||
"text": "yellow", | ||
"other": "green" | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
class FatalError(Exception): | ||
"""An error in which we forcefully terminate the program""" | ||
def __init__(self, text): | ||
self.__str__ = text |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
import logging | ||
from App.Errors import FatalError | ||
|
||
|
||
class News: | ||
"""The class is used to store and process information related to a separate news item.""" | ||
|
||
def __init__(self, entry, channel_name): | ||
logging.info("Creating object News") | ||
try: | ||
self.parsed_date = self.pars_date(entry.published_parsed) | ||
self.title = entry.title | ||
self.date = entry.published | ||
self.summary = entry.summary | ||
self.link = entry.link | ||
self.channel_name = channel_name | ||
except: | ||
raise FatalError("Problems with article processing") | ||
self.images = [] | ||
self.links = [] | ||
self.clear_text() | ||
|
||
def pars_date(self, struct): | ||
"""Parse date to string""" | ||
year = str(struct.tm_year) | ||
mon = str(struct.tm_mon) | ||
if len(mon) < 2: | ||
mon = "0" + mon | ||
day = str(struct.tm_mday) | ||
if len(day) < 2: | ||
day = "0" + day | ||
return year + mon + day | ||
|
||
def del_tags(self, ind1, ind2, ind3, delta=0, items=None): | ||
"""Depending on the input parameters, the method may remove unnecessary tags or save links to images""" | ||
logging.info("Tag processing") | ||
while self.summary.find(ind1) != -1: | ||
index1 = self.summary.index(ind1) | ||
index2 = self.summary[index1 + delta:].index(ind2) | ||
index3 = self.summary[index1:].index(ind3) | ||
if items is not None: | ||
items.append(self.summary[index1 + delta:index1 + index2 + delta]) | ||
self.summary = self.summary[0:index1] + self.summary[index1 + index3 + 2:] | ||
|
||
def clear_text(self): | ||
"""Method running del_tags () in a different configuration. | ||
This is required because on some portals in summary, some of the information is unnecessary.""" | ||
logging.info("Improvement summary and and search for pictures and links") | ||
try: | ||
self.del_tags("<img src=", "\"", "/>", 10, self.images) | ||
self.del_tags("<a href=", "\"", "a>", 9, self.links) | ||
self.del_tags("<br", "<br", "/>") | ||
self.del_tags("</p>", "</p>", "p>") | ||
self.del_tags("<p>", "<p>", "p>") | ||
for link in self.links: | ||
if link == self.link: | ||
self.links.remove(link) | ||
except Exception as e: | ||
logging.warning("Problems with tag parsing:\n" + str(e)) | ||
|
||
def __str__(self): | ||
string = "Channel name: {0}\n" \ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Это мое субъективное мнение, но мне кажется, что это слишком "тяжелая" логика для метода "str" |
||
"Title: {1}\n" \ | ||
"Date: {2}\n" \ | ||
"Link: {3}\n\n" \ | ||
"Summary: {4}".format(self.channel_name, self.title, self.date, self.link, self.summary) | ||
if len(self.images) > 0: | ||
string = string + "\n\nImages in the article:" | ||
for img in self.images: | ||
string = string + "\n" + img | ||
if len(self.links) > 0: | ||
string = string + "\n\nLinks in the article:" | ||
for link in self.links: | ||
string = string + "\n" + link | ||
return string |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,103 @@ | ||
import logging | ||
import feedparser | ||
import json | ||
from App.Errors import FatalError | ||
from App.News import News | ||
from App.ToHtml import ToHtml | ||
from App.ToPDF import ToPDF | ||
from termcolor import colored | ||
from App.Colors import Colors | ||
|
||
|
||
class Portal: | ||
"""The class is used to store and process information associated with one news portal""" | ||
|
||
def __init__(self, url, limit): | ||
logging.info("Creating object Portal") | ||
self.url = url | ||
rss = self.get_rss() | ||
try: | ||
self.title = rss.feed.title | ||
self.link = rss.feed.link | ||
self.updated = None | ||
self.news = [] | ||
self.limit = limit | ||
self.links = [] | ||
self.update(rss.entries[::-1]) | ||
except Exception as e: | ||
raise FatalError("Problems with rss processing") | ||
|
||
def get_rss(self): | ||
"""Get rss file""" | ||
logging.info("Getting rss file") | ||
try: | ||
return feedparser.parse(self.url) | ||
except Exception as e: | ||
raise FatalError("Problems getting rss file") | ||
|
||
def update(self, entries): | ||
"""The method is used to obtain articles""" | ||
logging.info("Start processing article") | ||
if self.limit is None or self.limit > len(entries): | ||
limit = len(entries) | ||
else: | ||
limit = self.limit | ||
try: | ||
rss = self.get_rss() | ||
if self.updated != rss.feed.updated: | ||
self.updated = rss.feed.updated | ||
for entry in entries[:limit]: | ||
self.news.insert(0, News(entry, self.title)) | ||
except FatalError: | ||
raise | ||
except Exception as e: | ||
raise FatalError("Problems with article processing") | ||
|
||
def load_new_news(self, news): | ||
if self.limit is None or self.limit > len(news): | ||
self.news = news | ||
else: | ||
self.news = news[:self.limit] | ||
|
||
def print(self, json_flag): | ||
"""The method displays information about the portal and articles""" | ||
try: | ||
if json_flag: | ||
logging.info("Saving to json") | ||
json_news = [] | ||
for news in self.news: | ||
json_news.append({"Title": news.title, "Date": news.date, "Link": news.link, | ||
"Summary": news.summary, "Images": news.images, "Links": news.links}) | ||
main_dict = {"Title": self.title, "Url": self.url, "News": json_news} | ||
|
||
print(json.dumps(main_dict, ensure_ascii=False, indent=4)) | ||
else: | ||
logging.info("Saving to text") | ||
print(colored("\n\nRSS-chanel", Colors["other"])) | ||
for news in self.news: | ||
print(colored("\n" + "*" * 20 + "New article" + "*" * 20 + "\n", Colors["article"])) | ||
print(colored(news, Colors["text"])) | ||
except Exception as e: | ||
logging.error(str(e)) | ||
raise FatalError("Problems with printing") | ||
|
||
def convert_to_html(self, html_path): | ||
"""Convert news to html""" | ||
logging.info("Start converting news to html") | ||
try: | ||
to_html = ToHtml(self.news, html_path) | ||
to_html.make_file() | ||
except Exception as e: | ||
print(colored("Error with converting to html", Colors["error"])) | ||
logging.info(str(e)) | ||
|
||
def convert_to_pdf(self, pdf_path): | ||
"""Convert news to pdf""" | ||
logging.info("Start converting news to pdf") | ||
try: | ||
to_html = ToHtml(self.news) | ||
to_pdf = ToPDF(to_html.html, pdf_path) | ||
to_pdf.make_file() | ||
except Exception as e: | ||
print(colored("Error with converting to pdf", Colors["error"])) | ||
logging.info(str(e)) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
import logging | ||
from App.Portal import Portal | ||
from App.Errors import FatalError | ||
from App.Saver import Saver | ||
from termcolor import colored | ||
from App.Colors import Colors | ||
|
||
|
||
class RSSListener: | ||
"""Class listener""" | ||
|
||
def __init__(self, limit, json_flag, date, html_path, pdf_path): | ||
logging.info("Creating object RSSListener") | ||
self.limit = limit | ||
self.date = date | ||
self.portal = None | ||
self.json_flag = json_flag | ||
self.html_path = html_path | ||
self.pdf_path = pdf_path | ||
|
||
def start(self, url): | ||
"""Class listener. Handles new rss links and saved news""" | ||
logging.info("We begin to process the url") | ||
try: | ||
self.portal = Portal(url, self.limit) | ||
saver = Saver() | ||
saver.start_saving(self.portal.news) | ||
if self.date is not None: | ||
old_news = saver.load(self.date) | ||
if old_news is not None: | ||
self.portal.load_new_news(old_news) | ||
self.printing() | ||
else: | ||
print(colored("Error: news haven't been founded", Colors["error"])) | ||
|
||
else: | ||
self.printing() | ||
if self.html_path is not None: | ||
self.portal.convert_to_html(self.html_path) | ||
if self.pdf_path is not None: | ||
self.portal.convert_to_pdf(self.pdf_path) | ||
except FatalError: | ||
raise | ||
except Exception as e: | ||
raise FatalError("Something go wrong") | ||
|
||
def printing(self): | ||
self.portal.print(self.json_flag) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
import pickle | ||
import os | ||
import logging | ||
|
||
|
||
class Saver: | ||
"""The class is responsible for saving and unloading data.""" | ||
def sort(self, news_list): | ||
"""The method sorts news from the link by date""" | ||
logging.info("Sorting news in saver") | ||
date_handler = {} | ||
for news in news_list: | ||
if news.parsed_date in date_handler: | ||
date_handler[news.parsed_date].append(news) | ||
else: | ||
date_handler[news.parsed_date] = [news, ] | ||
return date_handler | ||
|
||
def save(self, date_handler): | ||
"""Save data""" | ||
logging.info("Saving data") | ||
for date in date_handler: | ||
if os.path.exists("./Cache/" + date): | ||
with open("./Cache/" + date, 'rb') as f: | ||
old_date = pickle.load(f) | ||
delete_list = [] | ||
for new_d in date_handler[date]: | ||
for old_d in old_date: | ||
if str(new_d) == str(old_d): | ||
delete_list.append(new_d) | ||
for new_d in delete_list: | ||
date_handler[date].remove(new_d) | ||
with open("./Cache/" + date, 'wb') as f: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. в различных операционных системах могут отличаться разделители в пути. Советую не хардкодить такие вещи, а пользоваться функционалом |
||
pickle.dump(old_date + date_handler[date], f) | ||
else: | ||
with open("./Cache/" + date, 'wb') as f: | ||
pickle.dump(date_handler[date], f) | ||
|
||
def start_saving(self, news_list): | ||
try: | ||
handler = self.sort(news_list) | ||
self.save(handler) | ||
except Exception as e: | ||
logging.error("Saving error") | ||
logging.error(str(e)) | ||
|
||
def load(self, date): | ||
"""Load data from files""" | ||
logging.info("Loading data from files") | ||
if not os.path.exists("./Cache/" + date): | ||
return None | ||
with open("./Cache/" + date, 'rb') as f: | ||
old_news = pickle.load(f) | ||
return old_news |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
import logging | ||
from termcolor import colored | ||
from App.Colors import Colors | ||
|
||
|
||
class ToHtml: | ||
"""Class responsible for converting data to html""" | ||
def __init__(self, news, path="./news.html"): | ||
self.news = news | ||
self.path = path | ||
self.html = self.make_html() | ||
|
||
def make_html(self): | ||
"""Create html""" | ||
logging.info("Creating html") | ||
html = """ | ||
<!DOCTYPE HTML> | ||
<html> | ||
<head> | ||
<title>News</title> | ||
</head> | ||
<body> | ||
""" | ||
for entry in self.news: | ||
html += f"<h3><p align=\"center\">{entry.title}</p></h3>" | ||
html += f"<p>Channel name: {entry.channel_name}</p>" | ||
html += f"<p>Date: {entry.date}</p>" | ||
html += f"<p><a href={entry.link}>Link</a></p>" | ||
for img in entry.images: | ||
html += f"<p><img src=\'{img}\' width=\"700\" height=\"500\"></p>" | ||
html += f"<p>{entry.summary}</p>" | ||
if len(entry.links) > 0: | ||
counter = 0 | ||
html += "<p>Links in the article:</p>" | ||
for link in entry.links: | ||
counter += 1 | ||
html += f"<p><a href={link}>Link №{counter}</a></p>" | ||
html += "</body></html>" | ||
return html | ||
|
||
def make_file(self): | ||
"""Create html file""" | ||
logging.info("Creating html file") | ||
try: | ||
with open(self.path, 'w') as f: | ||
f.write(self.html) | ||
except: | ||
print(colored("Saving file error. Problems with path", Colors["error"])) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
А чем
datetime
или аналогичные библиотеки не подходит для этой задачи?или я что-то не до конца понимаю