Skip to content

Borodin Ilya #43

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 25 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
762b321
Делаем комит с пустым файлом
ilyaborodin Nov 10, 2019
0222fce
Add 1st iter
ilyaborodin Nov 17, 2019
a432523
Изменение пути на интепретатор
ilyaborodin Nov 17, 2019
00b6bf7
Выполнил 2-ую итерацию
ilyaborodin Nov 17, 2019
772c22b
Переменовал команду в rss-reader
ilyaborodin Nov 17, 2019
53c76d2
Убрал лишние точки выхода и поправил систему ошибок
ilyaborodin Nov 29, 2019
7df5749
Добавил назввание портала к каждой новости
ilyaborodin Nov 29, 2019
3971278
Добавил класс saver
ilyaborodin Nov 29, 2019
39fddde
Убрал limit с print и добавил его в метод update
ilyaborodin Nov 29, 2019
2f3b674
Выполнена 3-ая итерация
ilyaborodin Nov 29, 2019
40fb834
создал Readme
ilyaborodin Nov 29, 2019
69fadd6
Update README.md
ilyaborodin Nov 29, 2019
b6867ec
Создал папку для хранения данных
ilyaborodin Nov 29, 2019
94d057d
Выполнена 4-ая итерация
ilyaborodin Nov 30, 2019
5969654
Update README.md
ilyaborodin Nov 30, 2019
33a66cc
Убрал лишний импорт
ilyaborodin Nov 30, 2019
a6ee847
Merge remote-tracking branch 'origin/Final_Task' into Final_Task
ilyaborodin Nov 30, 2019
5f126bb
Перевел документацию на английский
ilyaborodin Nov 30, 2019
ce0c8e1
Выполнил 5-ую итерацию
ilyaborodin Nov 30, 2019
deb03ba
Update README.md
ilyaborodin Nov 30, 2019
0cdc00d
Update README.md
ilyaborodin Nov 30, 2019
ac95113
Добавил \n к выводу новостей
ilyaborodin Nov 30, 2019
4929234
Merge remote-tracking branch 'origin/Final_Task' into Final_Task
ilyaborodin Nov 30, 2019
0b13159
Выполнил частично 6-ую итерацию
ilyaborodin Dec 1, 2019
31d600f
Update README.md
ilyaborodin Dec 1, 2019
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 38 additions & 0 deletions App/Args_parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import argparse
import sys
import App
import logging
from App.Errors import FatalError
from App.Colors import Colors


def parsing_args():
"""Parsing arguments"""
parser = argparse.ArgumentParser(description='Pure Python command-line RSS reader.')
parser.add_argument('source', type=str, help='RSS URL')
parser.add_argument('--version', action="store_true", help='Print version info')
parser.add_argument('--json', action="store_true", help='Print result as JSON in stdout')
parser.add_argument('--verbose', action="store_true", help='Outputs verbose status messages')
parser.add_argument('--limit', type=int, help='Limit news topics if this parameter provided')
parser.add_argument('--date', type=str, help='Date for which you want to display news (format %y%m%d)')
parser.add_argument('--to_html', type=str, help='Convert data to html to your path')
parser.add_argument('--to_pdf', type=str, help='Convert data to pdf to your path')
parser.add_argument('--colorize', action="store_true", help='colorful output')
return parser.parse_args()


def start_settings(args):
"""Check the arguments and act according to their scenario"""
if args.version:
print("*" * 50 + "\n" + "Version: " + App.__version__ + "\n" + "*" * 50 + "\n" * 2)
if args.verbose:
logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
else:
logging.basicConfig(level=logging.CRITICAL)
if args.limit is not None and args.limit < 0:
raise FatalError("Limit cannot be less than 0")
if args.date is not None and (len(args.date) != 8 or args.date.isdigit() is False):
raise FatalError("Invalid date format")
if not args.colorize:
for color in Colors:
Colors[color] = "white"
7 changes: 7 additions & 0 deletions App/Colors.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
"""Dictionary stores output colors"""
Colors = {
"error": "red",
"article": "blue",
"text": "yellow",
"other": "green"
}
4 changes: 4 additions & 0 deletions App/Errors.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
class FatalError(Exception):
"""An error in which we forcefully terminate the program"""
def __init__(self, text):
self.__str__ = text
75 changes: 75 additions & 0 deletions App/News.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
import logging
from App.Errors import FatalError


class News:
"""The class is used to store and process information related to a separate news item."""

def __init__(self, entry, channel_name):
logging.info("Creating object News")
try:
self.parsed_date = self.pars_date(entry.published_parsed)
self.title = entry.title
self.date = entry.published
self.summary = entry.summary
self.link = entry.link
self.channel_name = channel_name
except:
raise FatalError("Problems with article processing")
self.images = []
self.links = []
self.clear_text()

def pars_date(self, struct):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

А чем datetime или аналогичные библиотеки не подходит для этой задачи?
или я что-то не до конца понимаю

"""Parse date to string"""
year = str(struct.tm_year)
mon = str(struct.tm_mon)
if len(mon) < 2:
mon = "0" + mon
day = str(struct.tm_mday)
if len(day) < 2:
day = "0" + day
return year + mon + day

def del_tags(self, ind1, ind2, ind3, delta=0, items=None):
"""Depending on the input parameters, the method may remove unnecessary tags or save links to images"""
logging.info("Tag processing")
while self.summary.find(ind1) != -1:
index1 = self.summary.index(ind1)
index2 = self.summary[index1 + delta:].index(ind2)
index3 = self.summary[index1:].index(ind3)
if items is not None:
items.append(self.summary[index1 + delta:index1 + index2 + delta])
self.summary = self.summary[0:index1] + self.summary[index1 + index3 + 2:]

def clear_text(self):
"""Method running del_tags () in a different configuration.
This is required because on some portals in summary, some of the information is unnecessary."""
logging.info("Improvement summary and and search for pictures and links")
try:
self.del_tags("<img src=", "\"", "/>", 10, self.images)
self.del_tags("<a href=", "\"", "a>", 9, self.links)
self.del_tags("<br", "<br", "/>")
self.del_tags("</p>", "</p>", "p>")
self.del_tags("<p>", "<p>", "p>")
for link in self.links:
if link == self.link:
self.links.remove(link)
except Exception as e:
logging.warning("Problems with tag parsing:\n" + str(e))

def __str__(self):
string = "Channel name: {0}\n" \
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Это мое субъективное мнение, но мне кажется, что это слишком "тяжелая" логика для метода "str"
Лично я бы вынес это в отдельный метод и не использовал для этих задач str
но это мое мнение :)

"Title: {1}\n" \
"Date: {2}\n" \
"Link: {3}\n\n" \
"Summary: {4}".format(self.channel_name, self.title, self.date, self.link, self.summary)
if len(self.images) > 0:
string = string + "\n\nImages in the article:"
for img in self.images:
string = string + "\n" + img
if len(self.links) > 0:
string = string + "\n\nLinks in the article:"
for link in self.links:
string = string + "\n" + link
return string
103 changes: 103 additions & 0 deletions App/Portal.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
import logging
import feedparser
import json
from App.Errors import FatalError
from App.News import News
from App.ToHtml import ToHtml
from App.ToPDF import ToPDF
from termcolor import colored
from App.Colors import Colors


class Portal:
"""The class is used to store and process information associated with one news portal"""

def __init__(self, url, limit):
logging.info("Creating object Portal")
self.url = url
rss = self.get_rss()
try:
self.title = rss.feed.title
self.link = rss.feed.link
self.updated = None
self.news = []
self.limit = limit
self.links = []
self.update(rss.entries[::-1])
except Exception as e:
raise FatalError("Problems with rss processing")

def get_rss(self):
"""Get rss file"""
logging.info("Getting rss file")
try:
return feedparser.parse(self.url)
except Exception as e:
raise FatalError("Problems getting rss file")

def update(self, entries):
"""The method is used to obtain articles"""
logging.info("Start processing article")
if self.limit is None or self.limit > len(entries):
limit = len(entries)
else:
limit = self.limit
try:
rss = self.get_rss()
if self.updated != rss.feed.updated:
self.updated = rss.feed.updated
for entry in entries[:limit]:
self.news.insert(0, News(entry, self.title))
except FatalError:
raise
except Exception as e:
raise FatalError("Problems with article processing")

def load_new_news(self, news):
if self.limit is None or self.limit > len(news):
self.news = news
else:
self.news = news[:self.limit]

def print(self, json_flag):
"""The method displays information about the portal and articles"""
try:
if json_flag:
logging.info("Saving to json")
json_news = []
for news in self.news:
json_news.append({"Title": news.title, "Date": news.date, "Link": news.link,
"Summary": news.summary, "Images": news.images, "Links": news.links})
main_dict = {"Title": self.title, "Url": self.url, "News": json_news}

print(json.dumps(main_dict, ensure_ascii=False, indent=4))
else:
logging.info("Saving to text")
print(colored("\n\nRSS-chanel", Colors["other"]))
for news in self.news:
print(colored("\n" + "*" * 20 + "New article" + "*" * 20 + "\n", Colors["article"]))
print(colored(news, Colors["text"]))
except Exception as e:
logging.error(str(e))
raise FatalError("Problems with printing")

def convert_to_html(self, html_path):
"""Convert news to html"""
logging.info("Start converting news to html")
try:
to_html = ToHtml(self.news, html_path)
to_html.make_file()
except Exception as e:
print(colored("Error with converting to html", Colors["error"]))
logging.info(str(e))

def convert_to_pdf(self, pdf_path):
"""Convert news to pdf"""
logging.info("Start converting news to pdf")
try:
to_html = ToHtml(self.news)
to_pdf = ToPDF(to_html.html, pdf_path)
to_pdf.make_file()
except Exception as e:
print(colored("Error with converting to pdf", Colors["error"]))
logging.info(str(e))
48 changes: 48 additions & 0 deletions App/RSSListener.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
import logging
from App.Portal import Portal
from App.Errors import FatalError
from App.Saver import Saver
from termcolor import colored
from App.Colors import Colors


class RSSListener:
"""Class listener"""

def __init__(self, limit, json_flag, date, html_path, pdf_path):
logging.info("Creating object RSSListener")
self.limit = limit
self.date = date
self.portal = None
self.json_flag = json_flag
self.html_path = html_path
self.pdf_path = pdf_path

def start(self, url):
"""Class listener. Handles new rss links and saved news"""
logging.info("We begin to process the url")
try:
self.portal = Portal(url, self.limit)
saver = Saver()
saver.start_saving(self.portal.news)
if self.date is not None:
old_news = saver.load(self.date)
if old_news is not None:
self.portal.load_new_news(old_news)
self.printing()
else:
print(colored("Error: news haven't been founded", Colors["error"]))

else:
self.printing()
if self.html_path is not None:
self.portal.convert_to_html(self.html_path)
if self.pdf_path is not None:
self.portal.convert_to_pdf(self.pdf_path)
except FatalError:
raise
except Exception as e:
raise FatalError("Something go wrong")

def printing(self):
self.portal.print(self.json_flag)
54 changes: 54 additions & 0 deletions App/Saver.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
import pickle
import os
import logging


class Saver:
"""The class is responsible for saving and unloading data."""
def sort(self, news_list):
"""The method sorts news from the link by date"""
logging.info("Sorting news in saver")
date_handler = {}
for news in news_list:
if news.parsed_date in date_handler:
date_handler[news.parsed_date].append(news)
else:
date_handler[news.parsed_date] = [news, ]
return date_handler

def save(self, date_handler):
"""Save data"""
logging.info("Saving data")
for date in date_handler:
if os.path.exists("./Cache/" + date):
with open("./Cache/" + date, 'rb') as f:
old_date = pickle.load(f)
delete_list = []
for new_d in date_handler[date]:
for old_d in old_date:
if str(new_d) == str(old_d):
delete_list.append(new_d)
for new_d in delete_list:
date_handler[date].remove(new_d)
with open("./Cache/" + date, 'wb') as f:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

в различных операционных системах могут отличаться разделители в пути. Советую не хардкодить такие вещи, а пользоваться функционалом os.path

pickle.dump(old_date + date_handler[date], f)
else:
with open("./Cache/" + date, 'wb') as f:
pickle.dump(date_handler[date], f)

def start_saving(self, news_list):
try:
handler = self.sort(news_list)
self.save(handler)
except Exception as e:
logging.error("Saving error")
logging.error(str(e))

def load(self, date):
"""Load data from files"""
logging.info("Loading data from files")
if not os.path.exists("./Cache/" + date):
return None
with open("./Cache/" + date, 'rb') as f:
old_news = pickle.load(f)
return old_news
48 changes: 48 additions & 0 deletions App/ToHtml.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
import logging
from termcolor import colored
from App.Colors import Colors


class ToHtml:
"""Class responsible for converting data to html"""
def __init__(self, news, path="./news.html"):
self.news = news
self.path = path
self.html = self.make_html()

def make_html(self):
"""Create html"""
logging.info("Creating html")
html = """
<!DOCTYPE HTML>
<html>
<head>
<title>News</title>
</head>
<body>
"""
for entry in self.news:
html += f"<h3><p align=\"center\">{entry.title}</p></h3>"
html += f"<p>Channel name: {entry.channel_name}</p>"
html += f"<p>Date: {entry.date}</p>"
html += f"<p><a href={entry.link}>Link</a></p>"
for img in entry.images:
html += f"<p><img src=\'{img}\' width=\"700\" height=\"500\"></p>"
html += f"<p>{entry.summary}</p>"
if len(entry.links) > 0:
counter = 0
html += "<p>Links in the article:</p>"
for link in entry.links:
counter += 1
html += f"<p><a href={link}>Link №{counter}</a></p>"
html += "</body></html>"
return html

def make_file(self):
"""Create html file"""
logging.info("Creating html file")
try:
with open(self.path, 'w') as f:
f.write(self.html)
except:
print(colored("Saving file error. Problems with path", Colors["error"]))
Loading