introduction-to-python-bsuir-2019 · prague15031939 · Nov 10, 2019 · Nov 17, 2019 · Nov 17, 2019 · Nov 17, 2019
diff --git a/final_task/LICENSE b/final_task/LICENSE
@@ -0,0 +1,19 @@
+Copyright (c) 2019 The Python Packaging Authority
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/final_task/MANIFEST.in b/final_task/MANIFEST.in
@@ -0,0 +1 @@
+include requirements.txt
diff --git a/final_task/README.md b/final_task/README.md
@@ -0,0 +1,44 @@
+##### JSON structure
+
+```
+{
+  "news": {
+    "feed": "Yahoo News - Latest News & Headlines",
+    "publications": [
+      {
+        "title": "Stefanik embraces spotlight at impeachment hearings",
+        "pub_date": "Fri, 15 Nov 2019 17:55:51 -0500",
+        "link": "https://news.yahoo.com/stefanik-embraces-spotlight-at-impeachment-hearings-225551297.html",
+        "description": "[image 2: Stefanik embraces spotlight at impeachment hearings] [2]\nThe second day of the impeachment inquiry\u2019s public hearings, on Friday, began the same way\nas the first: with an attempt by Rep. Elise Stefanik, a New York Republican, to interrupt proceedings\nwith a procedural objection.",
+        "hrefs": [
+          [
+            "https://news.yahoo.com/stefanik-embraces-spotlight-at-impeachment-hearings-225551297.html",
+            "link"
+          ],
+          [
+            "http://l.yimg.com/uu/api/res/1.2/NRuDo56c6EiwjZH4WOqEZg--/YXBwaWQ9eXRhY2h5b247aD04Njt3PTEzMDs-/https://media-mbst-pub-ue1.s3.amazonaws.com/creatr-uploaded-images/2019-11/7a1d0760-07d6-11ea-bef7-f17150574bb2",
+            "image",
+            "Stefanik embraces spotlight at impeachment hearings"
+          ]
+        ]
+      }
+    ]
+  }
+}
+```
+
+##### Cache description
+
+News received from feed is cached through database is being created locally.
+
+The database consists of the only file named "cache.db". It has the following structure:
+
+|     |  id  | feed | title | pub_date | pub_parsed | link | description | hrefs |
+|-----|------|------|-------|----------|------------|------|-------------|-------|
+|post |  ..  | ...  |  ...  |   ...    |    ...     | ...  |     ...     |   ... |   
+
+All fields except "id" have text type. ID field plays a role of post primary key.
+
+Hrefs field is composed of all post links including image links and image descriptions.
+Usual references section and one for image links are separated by --|-- sequence. 
+Items in one section are separated by -+- sequence. And -|- is for dividing link, it's type and image description.
diff --git a/final_task/requirements.txt b/final_task/requirements.txt
@@ -0,0 +1,4 @@
+feedparser
+bs4
+fpdf
+requests
diff --git a/final_task/rss_reader/__init__.py b/final_task/rss_reader/__init__.py
diff --git a/final_task/rss_reader/cacher.py b/final_task/rss_reader/cacher.py
@@ -0,0 +1,93 @@
+"""
+this module provides tools for caching news
+
+it includes functions for work with database and support ones
+"""
+
+import sqlite3
+from re import match
+
+def init_database():
+    """
+    this function creates and initizlizes database for caching news
+    """
+    connection_obj = sqlite3.connect('cache.db')
+    cursor_obj = connection_obj.cursor()
+    cursor_obj.execute(
+        '''CREATE TABLE IF NOT EXISTS cache (id integer primary key, feed text, title text, pub_date text, pub_parsed text, link text, description text, hrefs text)'''
+    )
+    connection_obj.commit()
+
+    return connection_obj, cursor_obj
+
+def cache_news(connection_obj, cursor_obj, news):
+    """
+    this function adds parsed news in database
+    """
+    for post in news:
+        cursor_obj.execute(
+            '''SELECT id FROM cache WHERE feed=? AND title=? AND pub_date=? AND pub_parsed=? AND link=? AND description=? AND hrefs=?''',
+            (post['feed'], post['title'], post['pub_date'], post['pub_parsed'], post['link'], post['description'], hrefs_to_text(post['hrefs']))
+        )
+        if cursor_obj.fetchone() is None:
+            cursor_obj.execute(
+                '''INSERT INTO cache (feed, title, pub_date, pub_parsed, link, description, hrefs) VALUES (?, ?, ?, ?, ?, ?, ?)''',
+                (post['feed'], post['title'], post['pub_date'], post['pub_parsed'], post['link'], post['description'], hrefs_to_text(post['hrefs']))
+            )
+    connection_obj.commit()
+
+    return
+
+def get_cached_news(cursor_obj, date):
+    """
+    this function fetches news from database and return them as a list
+    """
+    cursor_obj.execute('''SELECT * FROM cache WHERE pub_parsed=?''', (date, ))
+    rows = cursor_obj.fetchall()
+
+    news = []
+    for row in rows:
+        data = {}
+        data['feed'] = row[1]
+        data['title'] = row[2]
+        data['pub_date'] = row[3]
+        data['pub_parsed'] = row[4]
+        data['link'] = row[5]
+        data['description'] = row[6]
+
+        hrefs = row[7].split("--|--")
+        try:
+            data['hrefs'] = [tuple(item.split("-|-")) for item in hrefs[0].split("-+-") if item != '']
+            data['hrefs'] += [tuple(item.split("-|-")) for item in hrefs[1].split("-+-") if item != '']
+        except IndexError:
+            pass
+        news.append(data)
+
+    return news
+
+def hrefs_to_text(link_list):
+    """
+    this function represents the list of links connected to post to text form
+    """
+    res_line = ''
+    ind = -1
+    for tpl in link_list:
+        if tpl[1] != 'image':
+            res_line += f"-+-{tpl[0]}-|-{tpl[1]}"
+        else:
+            res_line += '--|--'
+            ind = link_list.index(tpl)
+            break
+
+    if ind != -1:
+        for tpl in link_list[ind:]:
+            res_line += f"{tpl[0]}-|-{tpl[1]}-|-{tpl[2]}-+-"
+
+    return res_line
+
+def is_valid_date(line):
+    """
+    this function checks a date parameter for suiting date format
+    """
+    date = r"^[1-2][0-9]{3}[0-1][0-9][0-3][0-9]$"
+    return match(date, line)
diff --git a/final_task/rss_reader/format_converter.py b/final_task/rss_reader/format_converter.py
@@ -0,0 +1,169 @@
+"""
+this module provides tools for converting news to html and pdf formats
+"""
+
+import os
+import shutil
+import requests
+from fpdf import FPDF
+
+def break_lines(text):
+    """
+    this function replaces '\n' to <br> tags
+    """
+    i = 0
+    while True:
+        try:
+            while text[i] != '\n':
+                i += 1
+            text = text[:i] + "<br>" + text[i + 1:]
+            i += 4
+        except IndexError:
+            break
+
+    return text
+
+def to_html(news, filepath):
+    """
+    this function prints news in html format to file
+    """
+    with open(filepath, "w", encoding='utf-8') as f:
+        f.write('''
+<html lang="en" dir="ltr">
+  <head>
+    <title>rss_reader</title>
+        <link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/4.3.1/css/bootstrap.min.css" integrity="sha384-ggOyR0iXCbMQv3Xipma34MD+dH/1fQ784/j6cY/iJTQUOhcWr7x9JvoRxT2MZw1T" crossorigin="anonymous">
+    	<style>
+    		ul>li{
+    			list-style: none;
+    			border: 1px solid;
+    			margin-top: 20px;
+    			padding: 10px;
+    		}
+    		ul>li>p:nth-child(1){
+    			font-size: 35px;
+    		}
+
+    		ul>li{
+    			border-radius: 10px;
+    			box-shadow: 1px 1px 10px black;
+    		}
+    	</style>
+  </head>
+  <body>
+    <div class="container">
+    <h1 style="text-align: center">Actual News</h1>
+    <ul>''')
+        for post in news:
+            f.write(f'''
+      <li>
+        <p>Feed: {post['feed']}</p>
+        <p>Title: {post['title']}</p>
+        <p>Publication date: {post['pub_date']}</p>
+        <p>Link: <a href = "{post['link']}">{post['link']}</a></p>
+        <p>{break_lines(post['description'])}</p>
+        <p>Links:</p>
+        <ol>''')
+            for tpl in post['hrefs']:
+                if not tpl[1] == 'image':
+                    f.write(f'''
+          <li>
+            <p><a href = "{tpl[0]}">{tpl[0]}</a></p>
+          </li>''')
+                else:
+                    f.write(f'''
+          <li>
+            <p>{tpl[2]}<br><a href = "{tpl[0]}"><img src = "{tpl[0]}"></a></p>
+          </li>''')
+            f.write('''
+        </ol>
+      </li>''')
+        f.write('''
+    </ul>
+    </div>
+    <script src="https://code.jquery.com/jquery-3.3.1.slim.min.js" integrity="sha384-q8i/X+965DzO0rT7abK41JStQIAqVgRVzpbzo5smXKp4YfRvH+8abtTE1Pi6jizo" crossorigin="anonymous"></script>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/popper.js/1.14.7/umd/popper.min.js" integrity="sha384-UO2eT0CpHqdSJQ6hJty5KVphtPhzWj9WO1clHTMGa3JDZwrnQq4sF86dIHNDz0W1" crossorigin="anonymous"></script>
+    <script src="https://stackpath.bootstrapcdn.com/bootstrap/4.3.1/js/bootstrap.min.js" integrity="sha384-JjSmVgyd0p3pXB1rRibZUAYoIIy6OrQ6VrjIEaFf/nJGzIxFDsf4x0xIM+B07jRM" crossorigin="anonymous"></script>
+  </body>
+</html>''')
+
+class user_FPDF(FPDF):
+    """
+    a small inherited class providing an ability to enumerate pages
+    """
+    def footer(self):
+        self.set_y(-15)
+        self.cell(0, 10, txt=f"{self.page_no()}", align='R')
+
+def download_image(url, dest_filepath):
+    """
+    this function downloads an image from url and saves it in file
+    """
+    with open(dest_filepath, 'wb') as f:
+        response = requests.get(url, stream=True)
+        for block in response.iter_content(1024):
+            if not block:
+                break
+            f.write(block)
+
+def to_pdf(news, filepath):
+    """
+    this function prints news in pdf format to file
+    """
+    current_directory = os.getcwd()
+    final_directory = os.path.join(current_directory, "tmp_files")
+    if not os.path.exists(final_directory):
+        os.mkdir(final_directory)
+
+    pdf_obj= user_FPDF()
+    font_dir = os.path.join(final_directory, 'DejaVuSansCondensed.ttf')
+    with open(font_dir, "wb") as f:
+        f.write(requests.get("https://raw.github.com/prague15031939/font_storage/master/DejaVuSansCondensed.ttf").content)
+    pdf_obj.add_font('DejaVu', '', font_dir, uni=True)
+    image_id = 0
+
+    for ind, post in enumerate(news):
+        pdf_obj.add_page()
+        if ind == 0:
+            pdf_obj.set_font('Arial', style='B', size=16)
+            pdf_obj.cell(200, 15, txt='ACTUAL NEWS', align='C', ln=1)
+        pdf_obj.set_font('DejaVu', '', 12)
+        pdf_obj.cell(5, 5, txt="#")
+        pdf_obj.cell(180, 5, txt=f"Feed: {(post['feed'])}", ln=1)
+        pdf_obj.cell(200, 5, ln=1)
+        pdf_obj.cell(5, 5)
+        pdf_obj.multi_cell(180, 5, txt=f"Title: {(post['title'])}")
+        pdf_obj.cell(5, 5)
+        pdf_obj.cell(200, 5, txt=f"Publication date: {post['pub_date']}", ln=1)
+        pdf_obj.cell(5, 5)
+        pdf_obj.cell(10, 5, txt='Link: ')
+        pdf_obj.set_font('Arial', style='I', size=12)
+        pdf_obj.multi_cell(180, 5, txt=f"{post['link']}")
+        pdf_obj.set_font('DejaVu', '', 12)
+        pdf_obj.cell(200, 5, ln=1)
+        pdf_obj.cell(5, 5)
+        pdf_obj.multi_cell(200, 5, txt=f"{post['description']}")
+        pdf_obj.cell(200, 5, ln=1)
+        pdf_obj.cell(5, 5)
+        pdf_obj.cell(200, 5, txt=f"Links:", ln=1)
+
+        for index, tpl in enumerate(post['hrefs']):
+            pdf_obj.cell(10, 5)
+            if not tpl[1] == 'image':
+                pdf_obj.set_font('DejaVu', '', 12)
+                pdf_obj.cell(7, 5, txt=f"[{index + 1}] ")
+                pdf_obj.set_font('Arial', style='I', size=12)
+                pdf_obj.multi_cell(170, 5, txt=f"{tpl[0]}")
+            else:
+                pdf_obj.set_font('DejaVu', '', 12)
+                pdf_obj.multi_cell(170, 5, txt=f"[{index + 1}] {tpl[2]}")
+                try:
+                    img_dir = os.path.join(final_directory, f"{image_id}.jpeg")
+                    download_image(tpl[0], img_dir)
+                    pdf_obj.image(img_dir, x=22, y=pdf_obj.get_y()+5, link=tpl[0])
+                    image_id += 1
+                except RuntimeError:
+                    pass
+
+    pdf_obj.output(filepath)
+    shutil.rmtree(final_directory)