introduction-to-python-bsuir-2019 · ilyaborodin · Nov 10, 2019 · Nov 17, 2019 · Nov 17, 2019 · Nov 17, 2019
diff --git a/App/Args_parser.py b/App/Args_parser.py
@@ -0,0 +1,38 @@
+import argparse
+import sys
+import App
+import logging
+from App.Errors import FatalError
+from App.Colors import Colors
+
+
+def parsing_args():
+    """Parsing arguments"""
+    parser = argparse.ArgumentParser(description='Pure Python command-line RSS reader.')
+    parser.add_argument('source', type=str, help='RSS URL')
+    parser.add_argument('--version', action="store_true", help='Print version info')
+    parser.add_argument('--json', action="store_true", help='Print result as JSON in stdout')
+    parser.add_argument('--verbose', action="store_true", help='Outputs verbose status messages')
+    parser.add_argument('--limit', type=int, help='Limit news topics if this parameter provided')
+    parser.add_argument('--date', type=str, help='Date for which you want to display news (format %y%m%d)')
+    parser.add_argument('--to_html', type=str, help='Convert data to html to your path')
+    parser.add_argument('--to_pdf', type=str, help='Convert data to pdf to your path')
+    parser.add_argument('--colorize', action="store_true", help='colorful output')
+    return parser.parse_args()
+
+
+def start_settings(args):
+    """Check the arguments and act according to their scenario"""
+    if args.version:
+        print("*" * 50 + "\n" + "Version: " + App.__version__ + "\n" + "*" * 50 + "\n" * 2)
+    if args.verbose:
+        logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
+    else:
+        logging.basicConfig(level=logging.CRITICAL)
+    if args.limit is not None and args.limit < 0:
+        raise FatalError("Limit cannot be less than 0")
+    if args.date is not None and (len(args.date) != 8 or args.date.isdigit() is False):
+        raise FatalError("Invalid date format")
+    if not args.colorize:
+        for color in Colors:
+            Colors[color] = "white"
diff --git a/App/Colors.py b/App/Colors.py
@@ -0,0 +1,7 @@
+"""Dictionary stores output colors"""
+Colors = {
+    "error": "red",
+    "article": "blue",
+    "text": "yellow",
+    "other": "green"
+}
diff --git a/App/Errors.py b/App/Errors.py
@@ -0,0 +1,4 @@
+class FatalError(Exception):
+    """An error in which we forcefully terminate the program"""
+    def __init__(self, text):
+        self.__str__ = text
diff --git a/App/News.py b/App/News.py
@@ -0,0 +1,75 @@
+import logging
+from App.Errors import FatalError
+
+
+class News:
+    """The class is used to store and process information related to a separate news item."""
+
+    def __init__(self, entry, channel_name):
+        logging.info("Creating object News")
+        try:
+            self.parsed_date = self.pars_date(entry.published_parsed)
+            self.title = entry.title
+            self.date = entry.published
+            self.summary = entry.summary
+            self.link = entry.link
+            self.channel_name = channel_name
+        except:
+            raise FatalError("Problems with article processing")
+        self.images = []
+        self.links = []
+        self.clear_text()
+
+    def pars_date(self, struct):
+        """Parse date to string"""
+        year = str(struct.tm_year)
+        mon = str(struct.tm_mon)
+        if len(mon) < 2:
+            mon = "0" + mon
+        day = str(struct.tm_mday)
+        if len(day) < 2:
+            day = "0" + day
+        return year + mon + day
+
+    def del_tags(self, ind1, ind2, ind3, delta=0, items=None):
+        """Depending on the input parameters, the method may remove unnecessary tags or save links to images"""
+        logging.info("Tag processing")
+        while self.summary.find(ind1) != -1:
+            index1 = self.summary.index(ind1)
+            index2 = self.summary[index1 + delta:].index(ind2)
+            index3 = self.summary[index1:].index(ind3)
+            if items is not None:
+                items.append(self.summary[index1 + delta:index1 + index2 + delta])
+            self.summary = self.summary[0:index1] + self.summary[index1 + index3 + 2:]
+
+    def clear_text(self):
+        """Method running del_tags () in a different configuration.
+This is required because on some portals in summary, some of the information is unnecessary."""
+        logging.info("Improvement summary and and search for pictures and links")
+        try:
+            self.del_tags("<img src=", "\"", "/>", 10, self.images)
+            self.del_tags("<a href=", "\"", "a>", 9, self.links)
+            self.del_tags("<br", "<br", "/>")
+            self.del_tags("</p>", "</p>", "p>")
+            self.del_tags("<p>", "<p>", "p>")
+            for link in self.links:
+                if link == self.link:
+                    self.links.remove(link)
+        except Exception as e:
+            logging.warning("Problems with tag parsing:\n" + str(e))
+
+    def __str__(self):
+        string = "Channel name: {0}\n" \
+                 "Title: {1}\n" \
+                 "Date: {2}\n" \
+                 "Link: {3}\n\n" \
+                 "Summary: {4}".format(self.channel_name, self.title, self.date, self.link, self.summary)
+        if len(self.images) > 0:
+            string = string + "\n\nImages in the article:"
+            for img in self.images:
+                string = string + "\n" + img
+        if len(self.links) > 0:
+            string = string + "\n\nLinks in the article:"
+            for link in self.links:
+                string = string + "\n" + link
+        return string
diff --git a/App/Portal.py b/App/Portal.py
@@ -0,0 +1,103 @@
+import logging
+import feedparser
+import json
+from App.Errors import FatalError
+from App.News import News
+from App.ToHtml import ToHtml
+from App.ToPDF import ToPDF
+from termcolor import colored
+from App.Colors import Colors
+
+
+class Portal:
+    """The class is used to store and process information associated with one news portal"""
+
+    def __init__(self, url, limit):
+        logging.info("Creating object Portal")
+        self.url = url
+        rss = self.get_rss()
+        try:
+            self.title = rss.feed.title
+            self.link = rss.feed.link
+            self.updated = None
+            self.news = []
+            self.limit = limit
+            self.links = []
+            self.update(rss.entries[::-1])
+        except Exception as e:
+            raise FatalError("Problems with rss processing")
+
+    def get_rss(self):
+        """Get rss file"""
+        logging.info("Getting rss file")
+        try:
+            return feedparser.parse(self.url)
+        except Exception as e:
+            raise FatalError("Problems getting rss file")
+
+    def update(self, entries):
+        """The method is used to obtain articles"""
+        logging.info("Start processing article")
+        if self.limit is None or self.limit > len(entries):
+            limit = len(entries)
+        else:
+            limit = self.limit
+        try:
+            rss = self.get_rss()
+            if self.updated != rss.feed.updated:
+                self.updated = rss.feed.updated
+                for entry in entries[:limit]:
+                    self.news.insert(0, News(entry, self.title))
+        except FatalError:
+            raise
+        except Exception as e:
+            raise FatalError("Problems with article processing")
+
+    def load_new_news(self, news):
+        if self.limit is None or self.limit > len(news):
+            self.news = news
+        else:
+            self.news = news[:self.limit]
+
+    def print(self, json_flag):
+        """The method displays information about the portal and articles"""
+        try:
+            if json_flag:
+                logging.info("Saving to json")
+                json_news = []
+                for news in self.news:
+                    json_news.append({"Title": news.title, "Date": news.date, "Link": news.link,
+                                      "Summary": news.summary, "Images": news.images, "Links": news.links})
+                main_dict = {"Title": self.title, "Url": self.url, "News": json_news}
+
+                print(json.dumps(main_dict, ensure_ascii=False, indent=4))
+            else:
+                logging.info("Saving to text")
+                print(colored("\n\nRSS-chanel", Colors["other"]))
+                for news in self.news:
+                    print(colored("\n" + "*" * 20 + "New article" + "*" * 20 + "\n", Colors["article"]))
+                    print(colored(news, Colors["text"]))
+        except Exception as e:
+            logging.error(str(e))
+            raise FatalError("Problems with printing")
+
+    def convert_to_html(self, html_path):
+        """Convert news to html"""
+        logging.info("Start converting news to html")
+        try:
+            to_html = ToHtml(self.news, html_path)
+            to_html.make_file()
+        except Exception as e:
+            print(colored("Error with converting to html", Colors["error"]))
+            logging.info(str(e))
+
+    def convert_to_pdf(self, pdf_path):
+        """Convert news to pdf"""
+        logging.info("Start converting news to pdf")
+        try:
+            to_html = ToHtml(self.news)
+            to_pdf = ToPDF(to_html.html, pdf_path)
+            to_pdf.make_file()
+        except Exception as e:
+            print(colored("Error with converting to pdf", Colors["error"]))
+            logging.info(str(e))
diff --git a/App/RSSListener.py b/App/RSSListener.py
@@ -0,0 +1,48 @@
+import logging
+from App.Portal import Portal
+from App.Errors import FatalError
+from App.Saver import Saver
+from termcolor import colored
+from App.Colors import Colors
+
+
+class RSSListener:
+    """Class listener"""
+
+    def __init__(self, limit, json_flag, date, html_path, pdf_path):
+        logging.info("Creating object RSSListener")
+        self.limit = limit
+        self.date = date
+        self.portal = None
+        self.json_flag = json_flag
+        self.html_path = html_path
+        self.pdf_path = pdf_path
+
+    def start(self, url):
+        """Class listener. Handles new rss links and saved news"""
+        logging.info("We begin to process the url")
+        try:
+            self.portal = Portal(url, self.limit)
+            saver = Saver()
+            saver.start_saving(self.portal.news)
+            if self.date is not None:
+                old_news = saver.load(self.date)
+                if old_news is not None:
+                    self.portal.load_new_news(old_news)
+                    self.printing()
+                else:
+                    print(colored("Error: news haven't been founded", Colors["error"]))
+
+            else:
+                self.printing()
+            if self.html_path is not None:
+                self.portal.convert_to_html(self.html_path)
+            if self.pdf_path is not None:
+                self.portal.convert_to_pdf(self.pdf_path)
+        except FatalError:
+            raise
+        except Exception as e:
+            raise FatalError("Something go wrong")
+
+    def printing(self):
+        self.portal.print(self.json_flag)
diff --git a/App/Saver.py b/App/Saver.py
@@ -0,0 +1,54 @@
+import pickle
+import os
+import logging
+
+
+class Saver:
+    """The class is responsible for saving and unloading data."""
+    def sort(self, news_list):
+        """The method sorts news from the link by date"""
+        logging.info("Sorting news in saver")
+        date_handler = {}
+        for news in news_list:
+            if news.parsed_date in date_handler:
+                date_handler[news.parsed_date].append(news)
+            else:
+                date_handler[news.parsed_date] = [news, ]
+        return date_handler
+
+    def save(self, date_handler):
+        """Save data"""
+        logging.info("Saving data")
+        for date in date_handler:
+            if os.path.exists("./Cache/" + date):
+                with open("./Cache/" + date, 'rb') as f:
+                    old_date = pickle.load(f)
+                delete_list = []
+                for new_d in date_handler[date]:
+                    for old_d in old_date:
+                        if str(new_d) == str(old_d):
+                            delete_list.append(new_d)
+                for new_d in delete_list:
+                    date_handler[date].remove(new_d)
+                with open("./Cache/" + date, 'wb') as f:
+                    pickle.dump(old_date + date_handler[date], f)
+            else:
+                with open("./Cache/" + date, 'wb') as f:
+                    pickle.dump(date_handler[date], f)
+
+    def start_saving(self, news_list):
+        try:
+            handler = self.sort(news_list)
+            self.save(handler)
+        except Exception as e:
+            logging.error("Saving error")
+            logging.error(str(e))
+
+    def load(self, date):
+        """Load data from files"""
+        logging.info("Loading data from files")
+        if not os.path.exists("./Cache/" + date):
+            return None
+        with open("./Cache/" + date, 'rb') as f:
+            old_news = pickle.load(f)
+        return old_news
diff --git a/App/ToHtml.py b/App/ToHtml.py
@@ -0,0 +1,48 @@
+import logging
+from termcolor import colored
+from App.Colors import Colors
+
+
+class ToHtml:
+    """Class responsible for converting data to html"""
+    def __init__(self, news, path="./news.html"):
+        self.news = news
+        self.path = path
+        self.html = self.make_html()
+
+    def make_html(self):
+        """Create html"""
+        logging.info("Creating html")
+        html = """
+            <!DOCTYPE HTML>
+            <html>
+             <head>
+              <title>News</title>
+             </head>
+             <body>
+        """
+        for entry in self.news:
+            html += f"<h3><p align=\"center\">{entry.title}</p></h3>"
+            html += f"<p>Channel name: {entry.channel_name}</p>"
+            html += f"<p>Date: {entry.date}</p>"
+            html += f"<p><a href={entry.link}>Link</a></p>"
+            for img in entry.images:
+                html += f"<p><img src=\'{img}\' width=\"700\" height=\"500\"></p>"
+            html += f"<p>{entry.summary}</p>"
+            if len(entry.links) > 0:
+                counter = 0
+                html += "<p>Links in the article:</p>"
+                for link in entry.links:
+                    counter += 1
+                    html += f"<p><a href={link}>Link №{counter}</a></p>"
+        html += "</body></html>"
+        return html
+
+    def make_file(self):
+        """Create html file"""
+        logging.info("Creating html file")
+        try:
+            with open(self.path, 'w') as f:
+                f.write(self.html)
+        except:
+            print(colored("Saving file error. Problems with path", Colors["error"]))