introduction-to-python-bsuir-2019 · yanaShcherbich · Nov 10, 2019 · Nov 13, 2019 · Nov 13, 2019 · Nov 17, 2019
diff --git a/README.md b/README.md
@@ -0,0 +1,57 @@
+# RSS reader
+
+RSS reader is a command-line utility which receives RSS URL and prints results in human-readable format.
+
+## Specification
+<pre>
+usage: rss_reader.py [-h] [--version] [--json] [--verbose] [--limit LIMIT]
+                     source
+
+Pure Python command-line RSS reader.
+
+positional arguments:
+  source         RSS URL
+
+ooptional arguments:
+  -h, --help         show this help message and exit
+  --version          Print version info
+  --json             Print result as JSON in stdout
+  --verbose          Outputs verbose status messages
+  --limit LIMIT      Limit news topics if this parameter provided
+  --date DATE        Take a date in %Y%m%d format. The news from the specified
+                     day will be printed out.
+  --to-html TO_HTML  Convert news into html and print in stdout. Argument
+                     receives the path where new file will be saved.
+  --to-fb2 TO_FB2    Convert news into fb2 and print in stdout. Argument
+                     receives the path where new file will be saved.
+
+</pre>
+
+## News caching
+The RSS news are stored in a local storage while reading. Local storage is implemented using shelve. The cached news are dicts with the news itself and its row (html) description which are stored by key. The key consists of date and RSS URL. The cashed news can be read with optional argument --date. Utility creates binary db file 'cache.db' in current directory. If you change current directory, db file from previoгs will not be copied to the current directory.
+
+## JSON structure
+<pre>
+{
+    "news": {
+        "feed": "TUT.BY: Новости ТУТ - Главные новости",
+        "items": [
+            {
+                "title": "Охрана, неприкосновенность, пенсия. Канопацкая предлагает закон о гарантиях для экс-президента Беларуси",
+                "link": "https://news.tut.by/economics/662957.html?utm_campaign=news-feed&utm_medium=rss&utm_source=rss-news",
+                "date": "Wed, 27 Nov 2019 15:41:00 +0300",
+                "description": {
+                    "text": "Депутат Анна Канопацкая разработала законопроект «О гарантиях президенту Республики Беларусь, прекратившему исполнение своих полномочий, и членам его семьи» и в ближайшее время внесет его на рассмотрение в Палату представителей.",
+                    "images": [
+                        {
+                            "src": "https://img.tyt.by/thumbnails/n/politika/04/4/c5109116a72e8f8029fecf5ca544c9d4.jpg",
+                            "alt": "Фото: sb.by"
+                        }
+                    ],
+                    "links": null
+                }
+            }
+        ]
+    }
+}
+</pre>
diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1,5 @@
+feedparser==2.2.1
+bs4==0.0.1
+dateparser==0.7.2
+requests==2.22.0
+lxml==4.4.2
diff --git a/rss/__init__.py b/rss/__init__.py
diff --git a/rss/cache.py b/rss/cache.py
@@ -0,0 +1,156 @@
+"""This module provides work with cashed news."""
+
+import logging
+import shelve
+import datetime
+import sys
+import json
+
+import dateparser
+
+from rss.converter_to_fb2 import Fb2Converter
+from rss.converter_to_html import HTMLConverter
+
+
+class Cache:
+    """This class creates cache file, updates it and prints cached news."""
+
+    def __init__(self):
+        logging.info("Cache initialization")
+        self.db_file_name = 'cache.db'
+
+    def _create_key(self, date: str, url: str) -> str:
+        """Create key for db."""
+
+        logging.info('Create key')
+        return date + url
+
+    def _convert_date(self, date: str) -> str:
+        """Convert date to %Y%m%d format."""
+
+        logging.info('Convert date')
+
+        converted_date = dateparser.parse(date)
+        if not converted_date:
+            logging.info("Date isn't clear. Try to parse again")
+            try:
+                converted_date = datetime.datetime.strptime(date, "%a, %d %b %Y %X %z")
+                return converted_date.strftime('%Y%m%d')
+            except Exception:
+                raise Exception('Something wrong with date')
+        return converted_date.strftime('%Y%m%d')
+
+    def insert_news(self, news, row_description, url: str):
+        """Insert news into cache file.
+           Create cache file if it doesn't exist.
+        """
+
+        date = news['date']
+        key = self._create_key(self._convert_date(date), url)
+        logging.info("Open db or create if it doesn't exist for inserting news")
+        with shelve.open(self.db_file_name) as db:
+            if db.get(key):
+                logging.info("Update record")
+                record = db[key]
+                if not list(record['list_of_news']).count(news):
+                    record['list_of_news'].append(news)
+                    record['list_of_row_descriptions'].append(row_description)
+                db[key] = record
+            else:
+                logging.info("Create new record")
+                record = {}
+                record['list_of_news'] = []
+                record['list_of_news'].append(news)
+                record['list_of_row_descriptions'] = []
+                record['list_of_row_descriptions'].append(row_description)
+                db[key] = record
+
+    def _check_entered_date(self, key: str):
+        """Check length and characters in entered string"""
+
+        logging.info('Check entered date')
+        if len(key) != 8 or not key.isdigit():
+            raise ValueError('Invalid entered date')
+
+    def _get_news(self, key: str) -> list:
+        """Get news from db by key"""
+
+        logging.info("Open db or create if it doesn't exist for getting news")
+        with shelve.open(self.db_file_name) as db:
+            try:
+                record = db[key]
+                return record
+            except KeyError:
+                raise Exception("Can't find the news")
+
+    def set_printing_news(self, url: str, date: str,
+                          limit: int, json_mode: bool,
+                          fb2_path: str, html_path: str):
+        """Set print format"""
+
+        logging.info("Set print format")
+
+        self._check_entered_date(date)
+        self._check_limit(limit)
+
+        key = self._create_key(date, url)
+        db = self._get_news(key)
+
+        if json_mode:
+            print(json.dumps(db['list_of_news'][:limit], indent=4, ensure_ascii=False))
+        else:
+            self.print_news(db['list_of_news'], limit)
+
+        if fb2_path:
+            conv = Fb2Converter(fb2_path)
+            conv.convert_to_fb2(db['list_of_news'][:limit])
+            conv.save_fb2()
+        if html_path:
+            conv = HTMLConverter(html_path)
+            conv.save_html(conv.convert_to_html(db['list_of_news'][:limit],
+                                                db['list_of_row_descriptions'][:limit]))
+
+    def _check_limit(self, limit):
+        """Check if the limit > 0."""
+
+        logging.info('Check limit')
+        if limit is not None and limit <= 0:
+            raise ValueError('Invalid limit: limit <= 0')
+
+    def print_news(self, list_of_news, limit):
+        """Print news."""
+
+        logging.info('Start printing cached news')
+        news_number = 1
+        # check if self.list_of_news consists of 1 element
+        if type(list_of_news) == dict:
+            print('№', news_number)
+            self._print_entries(list_of_news)
+        else:
+            for news in list_of_news[:limit]:
+                print('№', news_number)
+                news_number += 1
+                self._print_entries(news)
+
+    def _print_entries(self, news: dict):
+        """Print one news."""
+
+        logging.info('Print one news')
+        print('Title:', news['title'])
+        print('Date:', news['date'])
+        print('Link:', news['link'], '\n')
+
+        if news['description']['text'] != 'Nothing':
+            print(news['description']['text'], '\n')
+
+        if news['description']['images']:
+            print('Images:')
+            for item in news['description']['images']:
+                print(item['src'])
+
+        if news['description']['links']:
+            print('Links:')
+            for item in news['description']['links']:
+                print(item)
+
+        print('-' * 50)
diff --git a/rss/converter_to_fb2.py b/rss/converter_to_fb2.py
@@ -0,0 +1,124 @@
+"""This module converts news to fb2 format and saves."""
+
+import os
+import logging
+from base64 import b64encode
+import xml.etree.ElementTree as tree
+from xml.etree.ElementTree import Element
+import xml.dom.minidom as minidom
+
+import requests
+
+
+class Fb2Converter:
+    """Class provides work with conversation to fb2."""
+
+    def __init__(self, path='rss-news.fb2'):
+        logging.info('Fb2Converter initialization')
+        self.path = path
+        self.root = tree.Element('FictionBook')
+        self.root.set('xmlns:l', "http://www.w3.org/1999/xlink")
+        self.description = tree.SubElement(self.root, 'description')
+        self.body = tree.SubElement(self.root, 'body')
+
+    def insert_file_description(self):
+        """Insert file description."""
+
+        logging.info('Insert description')
+        title_info = tree.SubElement(self.description, 'title-info')
+        tree.SubElement(title_info, 'book-title').text = 'RSS news'
+
+    def insert_body(self, list_of_news, limit):
+        """Insert body."""
+
+        logging.info("Insert body")
+        for news in list_of_news[:limit]:
+            self.insert_section(news)
+
+    def insert_section(self, news):
+        """Insert section."""
+
+        logging.info('Insert describing single news section')
+        section = tree.SubElement(self.body, 'section')
+
+        self.insert_tag_p(section, news['title'], True)
+        self.insert_tag_empty_line(section)
+        self.insert_tag_p(section, 'Link: ' + news['link'])
+        self.insert_tag_p(section, 'Date: ' + news['date'])
+        self.insert_tag_empty_line(section)
+
+        if news['description']['images']:
+            try:
+                for img in news['description']['images']:
+                    self.insert_image(section, img['src'], img['alt'])
+            except Exception as e:
+                print("Errors with images: ", e)
+
+        self.insert_tag_empty_line(section)
+        self.insert_tag_p(section, news['description']['text'])
+
+        if news['description']['links']:
+            self.insert_tag_empty_line(section)
+            self.insert_tag_p(section, 'Links:')
+            for link in news['description']['links']:
+                self.insert_tag_p(section, link)
+
+        self.insert_tag_empty_line(section)
+        self.insert_tag_p(section, '-'*50)
+
+    def insert_tag_empty_line(self, parent):
+        """Insert empty line """
+
+        logging.info('Insert empty line')
+        tree.SubElement(parent, 'empty-line')
+
+    def insert_tag_p(self, parent, text, strong_mode=None):
+        """
+        Insert tag p with text.
+        If strong_mode then text will be bold.
+        """
+
+        if strong_mode:
+            logging.info('Insert tag p with ')
+            tag_p = tree.SubElement(parent, 'p')
+            tree.SubElement(tag_p, 'strong').text = text
+        else:
+            logging.info('Insert tag p')
+            tree.SubElement(parent, 'p').text = text
+
+    def convert_to_fb2(self, news, limit=None):
+        """Return news converted into fb2."""
+
+        logging.info('Start conversion to fb2')
+        self.insert_file_description()
+        self.insert_body(news, limit)
+
+    def save_fb2(self):
+        """Save fb2 converted news on the received path."""
+
+        logging.info('Save fb2 converted news')
+        with open(self.path, 'w') as file:
+            file.write(tree.tostring(self.root).decode('UTF-8'))
+
+        pretty_xml_as_string = minidom.parse(self.path).toprettyxml()
+
+        with open(self.path, 'w') as file:
+            file.write(pretty_xml_as_string)
+
+    def insert_image(self, parent, img_url, img_name):
+        """Insert image tag in format: <image l:href="#{img_name}"/>."""
+
+        logging.info('Insert image')
+        image = tree.SubElement(parent, 'image')
+        image.set('l:href', '#' + img_name)
+        binary = tree.SubElement(self.root, 'binary')
+        binary.set('id', img_name)
+        binary.set('content-type', 'image/png')
+        binary.text = self.get_binary_img(img_url)
+
+    def get_binary_img(self, src):
+        """Return img as base64 in string form"""
+
+        logging.info('Get binary img')
+        resource = requests.get(src)
+        return b64encode(resource.content).decode('UTF-8')
diff --git a/rss/converter_to_html.py b/rss/converter_to_html.py
@@ -0,0 +1,52 @@
+"""This module converts news to HTML and fb2 and saves."""
+
+import os
+import logging
+
+from bs4 import BeautifulSoup
+from lxml import html
+from lxml import etree
+from lxml.builder import E
+
+
+class HTMLConverter:
+    """Class provides work with conversation to HTML."""
+
+    def __init__(self, path='rss-news.html'):
+        logging.info('HTMLConverter initialization')
+        self.path = path
+
+    def convert_to_html(self, list_of_news, list_of_row_descriptions):
+        """Return news converted into HTML."""
+
+        logging.info('Start conversion to HTML')
+        page = (
+            E.html(
+                E.head(E.title("RSS news")),
+            )
+        )
+
+        for single_news, single_description in \
+                zip(list_of_news, list_of_row_descriptions):
+            logging.info('Convert one news')
+            page.append(E.P(
+                E.center(E.h2(single_news['title'])),
+                E.h2(E.a(single_news['link'], href=single_news['link'])),
+                E.h4(single_news['date']),
+            ))
+            page.append(html.fromstring(single_description))
+            page.append(E.BR())
+            page.append(E.BR())
+            page.append(E.HR())
+        return page
+
+    def save_html(self, html_news):
+        """Save HTML converted news on the received path."""
+
+        logging.info('Save HTML converted news')
+        with open(self.path, 'w') as file:
+            file.write(html.tostring(html_news,
+                                     pretty_print=True,
+                                     encoding='unicode',
+                                     method='html',
+                                     doctype='<!DOCTYPE html>'))