introduction-to-python-bsuir-2019 · ge2nadiy · Nov 10, 2019 · Nov 11, 2019 · Nov 17, 2019 · Nov 17, 2019
diff --git a/README.md b/README.md
@@ -0,0 +1,18 @@
+# PythonHomework
+[Introduction to Python] Homework Repository
+# How to use
+* pip install .
+* python rss-reader https://news.tut.by/rss/economics.rss --limit 2 --json
+# Parameters
+* --help (Show this help message and exit)
+* source (RSS URL)
+* --limit LIMIT (Limit news topics if this parameter provided)
+* --json (Prints result as JSON in stdout)
+* --verbose (Outputs verbose status messages)
+* --version (Print version info)
+* --date ()
+* --to-pdf (Converter in pdf)
+* --to-html (Converter in html)
+* --colorize
+# JSON structure
+news = {"Title": "title", "Date":"date", "Alt image":"alt", "Discription":"discription", "Links":{"News":"link", "Image":"src"} }
diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1,6 @@
+bs4 # for xml and html
+feedparser # rss parsing
+requests # http requests
+python-dateutil
+fpdf
+colorama
diff --git a/rss_app/RSS.py b/rss_app/RSS.py
@@ -0,0 +1,213 @@
+"""
+A file with the RssAggregator class that parses the URL
+and performs various actions with the received data
+"""
+
+import feedparser
+from bs4 import BeautifulSoup
+import json
+from dateutil.parser import parse
+import urllib
+import httplib2
+import os
+from colorama import init
+from colorama import Fore
+
+
+class RssAggregator():
+
+    """ Class for rss feed """
+
+    feedurl = ""
+
+    def __init__(self, source, limit, date, log, colorize):
+        self.source = source
+        self.limit = limit
+        self.date = date
+        self.log = log
+        self.colorize = colorize
+        init()
+
+    def get_news(self):
+
+        """ Returns parsed news and caches it"""
+
+        self.log.info("Getting rss feed")
+        thefeed = feedparser.parse(self.source)
+        self.save_to_json_file(thefeed.entries)
+        return thefeed.entries[:self.limit]
+
+    def print_news(self, entries):
+
+        """ Print rss news """
+
+        self.log.info("Printing news")
+        for thefeedentry in entries:
+            try:
+                if self.colorize:
+                    print("--------------------------------------------------")
+                    print(f"{Fore.RED}Title:{Fore.RESET} ", Fore.RED + thefeedentry.title + Fore.RESET)
+                    print(f"{Fore.BLUE}Date:{Fore.RESET} ", Fore.BLUE + thefeedentry.published + Fore.RESET, end="\n\n")
+                    print(f"{Fore.YELLOW}Alt image:{Fore.RESET} ", Fore.YELLOW + BeautifulSoup(thefeedentry.description + Fore.RESET, "html.parser").find('img')['alt'])
+                    print(Fore.GREEN + BeautifulSoup(thefeedentry.description, "html.parser").text + Fore.RESET, end="\n\n")
+                    print("Links:")
+                    print(f"{Fore.YELLOW}News:{Fore.RESET} ", Fore.YELLOW + thefeedentry.link + Fore.RESET)
+                    print(f"{Fore.YELLOW}Image:{Fore.RESET} ", Fore.YELLOW + BeautifulSoup(thefeedentry.description + Fore.RESET, "html.parser").find('img')['src'])
+                else:
+                    print("Title: ", thefeedentry.title)
+                    print("Date: ", thefeedentry.published, end="\n\n")
+                    print("Alt image: ", BeautifulSoup(thefeedentry.description, "html.parser").find('img')['alt'])
+                    print(BeautifulSoup(thefeedentry.description, "html.parser").text, end="\n\n")
+                    print("Links:")
+                    print("News: ", thefeedentry.link)
+                    print("Image: ", BeautifulSoup(thefeedentry.description, "html.parser").find('img')['src'])
+            except TypeError:
+                self.log.info("TypeError: 'NoneType'")
+
+    def print_json(self, entries):
+
+        """ Print rss news in json format"""
+
+        self.log.info("RSS news to json")
+        for thefeedentry in entries:
+            try:
+                news = {
+                    "Title": thefeedentry.title,
+                    "Date": thefeedentry.published,
+                    "Alt image": BeautifulSoup(thefeedentry.description, "html.parser").find('img')['alt'],
+                    "Discription": BeautifulSoup(thefeedentry.description, "html.parser").text,
+                    "Links": {
+                        "News": thefeedentry.link,
+                        "Image": BeautifulSoup(thefeedentry.description, "html.parser").find('img')['src']
+                    }
+                }
+                print(json.dumps(news, indent=3))
+            except TypeError:
+                self.log.info("TypeError: 'NoneType'")
+
+    def save_to_json_file(self, entries):
+
+        """ Save rss news to json file"""
+
+        self.log.info("Save news to json file")
+        news_list = list()
+        file_name = self.get_file_name()
+        with open(file_name, "w", encoding="utf-8") as write_file:
+            for thefeedentry in entries:
+                try:
+                    news = {
+                        "Title": thefeedentry.title,
+                        "Date": thefeedentry.published,
+                        "Alt image": BeautifulSoup(thefeedentry.description, "html.parser").find('img')['alt'],
+                        "Discription": BeautifulSoup(thefeedentry.description, "html.parser").text,
+                        "Links": {
+                            "News": thefeedentry.link,
+                            "Image": BeautifulSoup(thefeedentry.description, "html.parser").find('img')['src']
+                        }
+                    }
+                    self.save_image(thefeedentry, file_name)
+                    news_list.append(news)
+                except TypeError:
+                    self.log.info("TypeError: 'NoneType'")
+            json.dump(news_list, write_file, indent=3)
+
+    def get_file_name(self):
+
+        """ Getting the file name for storing news """
+
+        self.log.info("Getting file name")
+        file_name_list = self.source.split("//")
+        file_name = file_name_list[1].replace("/", "")
+        file_name += ".json"
+        return file_name
+
+    def save_image(self, thefeedentry, file_name):
+
+        """ Save image to file"""
+
+        file_path = self.get_path_image(thefeedentry)
+        h = httplib2.Http('.cache')
+        response, content = h.request(BeautifulSoup(thefeedentry.description, "html.parser").find('img')['src'])
+        try:
+            out = open(file_path, "wb")
+            out.write(content)
+            out.close()
+        except FileNotFoundError:
+            self.log.info("Error: image not found")
+        except OSError:
+            self.log.info("[Errno 22] Invalid argument {}".format(file_path))
+
+    def get_path_image(self, thefeedentry):
+
+        """ Get path image """
+
+        file_name_list = self.source.split("//")
+        file_name = file_name_list[1].replace("/", "")
+        folder_path = "image_" + file_name + os.path.sep
+        if not os.path.exists(folder_path):
+            self.log.info('Creating directory images')
+            os.mkdir(folder_path)
+        img = BeautifulSoup(thefeedentry.description, "html.parser").find('img')['src']
+        image = img.split("/")
+        file_path = os.path.abspath('') + os.path.sep + folder_path + image[-1]
+        if ".jpg" or ".gif" or ".png" in file_path:
+            return file_path
+        file_path += ".jpg"
+        return file_path
+
+    def get_from_json_file(self):
+
+        """ Get news on the argument --date from json file"""
+
+        self.log.info("Getting news by date")
+        file_name = self.get_file_name()
+        news_by_date = list()
+        try:
+            with open(file_name, "r") as read_file:
+                news = json.load(read_file)
+            for thefeedentry in news:
+                published = parse(thefeedentry['Date']).strftime('%Y%m%d')
+                if published >= self.date:
+                    news_by_date.append(thefeedentry)
+            return news_by_date
+        except FileNotFoundError:
+            self.log.info("File not found error")
+
+    def get_news_for_converter(self):
+
+        """ Get news from json file for converter in pdf or html"""
+
+        self.log.info("Getting news for converter")
+        file_name = self.get_file_name()
+        news = list()
+        try:
+            with open(file_name, "r") as read_file:
+                news = json.load(read_file)
+            return news
+        except FileNotFoundError:
+            self.log.info("File not found error")
+
+    def print_news_from_file(self, entries):
+
+        """ Print a certain amount of news by date """
+
+        self.log.info("Printing news by date")
+        for thefeedentry in entries[:self.limit]:
+            if self.colorize:
+                print("--------------------------------------------------")
+                print(f"{Fore.RED}Title:{Fore.RESET} ", Fore.RED + thefeedentry['Title'] + Fore.RESET)
+                print(f"{Fore.BLUE}Date:{Fore.RESET} ", Fore.BLUE + thefeedentry['Date'] + Fore.RESET, end="\n\n")
+                print(f"{Fore.YELLOW}Alt image:{Fore.RESET} ", Fore.YELLOW + thefeedentry['Alt image'] + Fore.RESET)
+                print(Fore.GREEN + thefeedentry['Discription'] + Fore.RESET, end="\n\n")
+                print("Links: ")
+                print(f"{Fore.YELLOW}News:{Fore.RESET} ", Fore.YELLOW + thefeedentry['Links']['News'] + Fore.RESET)
+                print(f"{Fore.YELLOW}Image:{Fore.RESET} ", Fore.YELLOW + thefeedentry['Links']['Image'] + Fore.RESET)
+            else:
+                print("--------------------------------------------------")
+                print("Title: ", thefeedentry['Title'])
+                print("Date: ", thefeedentry['Date'], end="\n\n")
+                print("Alt image: ", thefeedentry['Alt image'])
+                print(thefeedentry['Discription'], end="\n\n")
+                print("Links: ")
+                print("News: ", thefeedentry['Links']['News'])
+                print("Image: ", thefeedentry['Links']['Image'])
diff --git a/rss_app/__init__.py b/rss_app/__init__.py
diff --git a/rss_app/converter.py b/rss_app/converter.py
@@ -0,0 +1,85 @@
+"""
+File with a class Converter designed to convert data to pdf and html formats
+"""
+
+
+import fpdf
+from bs4 import BeautifulSoup
+import os
+
+
+class Converter:
+
+    """ News conversion class """
+
+    fpdf.set_global("SYSTEM_TTFONTS", os.path.join(os.path.dirname(__file__), 'fonts', 'ttf'))
+
+    def __init__(self, source, limit, to_pdf, to_html, log):
+        self.source = source
+        self.limit = limit
+        self.to_pdf = to_pdf
+        self.to_html = to_html
+        self.log = log
+
+    def pdf_converter(self, entries):
+
+        """ Convert data to pdf file """
+
+        self.log.info("Converter in pdf format")
+        pdf = fpdf.FPDF()
+        pdf.add_page()
+        pdf.add_font('DejaVu', '', 'DejaVuSansCondensed.ttf', uni=True)
+        pdf.set_font('DejaVu', size=14)
+        for thefeedentry in entries[:self.limit]:
+            path = self.get_path_image(thefeedentry)
+            pdf.multi_cell(0, 10, txt="{}".format(thefeedentry['Title']))
+            pdf.multi_cell(0, 10, txt="{}".format(thefeedentry['Links']['News']))
+            try:
+                pdf.image(path)
+            except RuntimeError:
+                self.log.info("Error add image")
+                pdf.multi_cell(0, 10, txt="{}".format(thefeedentry['Alt image']))
+            pdf.multi_cell(0, 10, txt="{}".format(thefeedentry['Discription']))
+            pdf.multi_cell(0, 10, txt="{}".format(thefeedentry['Date']))
+            pdf.ln(10)
+        pdf.output(self.to_pdf)
+        print(self.to_pdf)
+
+    def html_converter(self, entries):
+
+        """ Convert data to html file """
+
+        self.log.info("Converter in html format")
+        with open(self.to_html, "w", encoding="utf-8") as file_text:
+            file_text.write("<html>")
+            file_text.write("<body>")
+            file_text.write("<p>")
+            for thefeedentry in entries[:self.limit]:
+                file_text.write("{}<br />".format(thefeedentry['Title']))
+                file_text.write("<a href = "">{}</a><br />".format(thefeedentry['Links']['News']))
+                file_text.write("<img src= {} > <br />".format(thefeedentry['Links']['Image']))
+                file_text.write("{} <br />".format(thefeedentry['Discription']))
+                file_text.write("{} <br /><br />".format(thefeedentry['Date']))
+            file_text.write("</p>")
+            file_text.write("</body>")
+            file_text.write("</html>")
+
+    def get_path_image(self, thefeedentry):
+
+        """ Get the path of the image to add to the pdf file """
+
+        self.log.info("Getting path image")
+        file_name_list = self.source.split("//")
+        file_name = file_name_list[1].replace("/", "")
+        folder_path = "image_" + file_name + os.path.sep
+        if not os.path.exists(folder_path):
+            self.log.info('Creating directory images')
+            os.mkdir(folder_path)
+        img = thefeedentry['Links']['Image']
+        image = img.split("/")
+        file_path = os.path.abspath('') + os.path.sep + folder_path + image[-1]
+        if ".jpg" or ".gif" or ".png" in file_path:
+            print(file_path)
+            return file_path
+        file_path += ".jpg"
+        return file_path
diff --git a/rss_app/fonts/fontconfig/20-unhint-small-dejavu-sans-mono.conf b/rss_app/fonts/fontconfig/20-unhint-small-dejavu-sans-mono.conf
@@ -0,0 +1,26 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE fontconfig SYSTEM "../fonts.dtd">
+<fontconfig>
+  <!--  /etc/fonts/conf.d/20-unhint-small-dejavu-sans-mono.conf
+
+        Disable hinting manually at smaller sizes (< 8ppem)
+        This is a copy of the Bistream Vera fonts fonts rule, as DejaVu is
+        derived from Vera.
+
+	The Bistream Vera fonts have GASP entries suggesting that hinting be
+	disabled below 8 ppem, but FreeType ignores those, preferring to use
+	the data found in the instructed hints. The initial Vera release
+	didn't include the right instructions in the 'prep' table.
+ -->
+  <match target="font">
+    <test name="family">
+      <string>DejaVu Sans Mono</string>
+    </test>
+    <test compare="less" name="pixelsize">
+      <double>7.5</double>
+    </test>
+    <edit name="hinting">
+      <bool>false</bool>
+    </edit>
+  </match>
+</fontconfig>
diff --git a/rss_app/fonts/fontconfig/20-unhint-small-dejavu-sans.conf b/rss_app/fonts/fontconfig/20-unhint-small-dejavu-sans.conf
@@ -0,0 +1,26 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE fontconfig SYSTEM "../fonts.dtd">
+<fontconfig>
+  <!--  /etc/fonts/conf.d/20-unhint-small-dejavu-sans.conf
+
+        Disable hinting manually at smaller sizes (< 8ppem)
+        This is a copy of the Bistream Vera fonts fonts rule, as DejaVu is
+        derived from Vera.
+
+	The Bistream Vera fonts have GASP entries suggesting that hinting be
+	disabled below 8 ppem, but FreeType ignores those, preferring to use
+	the data found in the instructed hints. The initial Vera release
+	didn't include the right instructions in the 'prep' table.
+ -->
+  <match target="font">
+    <test name="family">
+      <string>DejaVu Sans</string>
+    </test>
+    <test compare="less" name="pixelsize">
+      <double>7.5</double>
+    </test>
+    <edit name="hinting">
+      <bool>false</bool>
+    </edit>
+  </match>
+</fontconfig>