introduction-to-python-bsuir-2019 · pavl1n · Nov 9, 2019 · Nov 16, 2019 · Nov 16, 2019 · Nov 16, 2019
diff --git a/.gitignore b/.gitignore
@@ -102,3 +102,6 @@ venv.bak/
 
 # mypy
 .mypy_cache/
+
+# IntellijIdea files
+.idea
diff --git a/README.md b/README.md
@@ -0,0 +1,30 @@
+# RSS reader
+RSS (originally RDF Site Summary; later, two competing approaches emerged, which used the backronyms Rich Site Summary and Really Simple Syndication respectively) is a type of web feed which allows users and applications to access updates to websites in a standardized, computer-readable format. These feeds can, for example, allow a user to keep track of many different websites in a single news aggregator.
+RSS reader is a command-line utility which receives RSS URL and prints results.
+
+
+## Specification
+Utility provides the following interface:
+  * positional\required arguments:
+    * source .. RSS URL
+  * optional arguments:
+    * -h, --help -- Show help message and exit.
+    * --version  -- Print version info.
+    * --json     -- Print result as JSON in stdout.
+    * --verbose  -- Outputs verbose status messages.
+    * --limit    -- Limit news topics if this parameter is provided.
+    * --date     -- Return cached news from the specified day. Format is YYYYMMDD.
+    * --to_pdf   -- Convert news into pdf format and save a file to the specified path.
+
+
+Notes
+  * in case of using `--json` argument utility converts the news into JSON format.
+  * the `--limit` also affects JSON format.
+
+
+## Caching
+News is stored in local storage are named 'mydatabase.db'
+News that was already saved is not taken into account while caching. `--date` prints news in entered format (YYYYMMDD).
+
+## Format converter
+It's also provided function which converts news in PDF but it's very poor:(
diff --git a/SakBunderan.ttf b/SakBunderan.ttf
diff --git a/arg.py b/arg.py
@@ -0,0 +1,14 @@
+import argparse
+
+def args():
+    '''if the main() function in rss_reader.py is heart of the project
+    this should be the hands. Returns arguments which we parse'''
+    parser = argparse.ArgumentParser(description='Pure python command-line RSS reader')
+    parser.add_argument("source", help='RSS URL', nargs="?", type=str)
+    parser.add_argument("--version", action="store_true", help='Print version info')
+    parser.add_argument("--json", action='store_true', help='Print result as JSON in stdout')
+    parser.add_argument("--verbose", action='store_true', help='Outputs verbose status messages')
+    parser.add_argument("--limit", help='Limit news topics', type=int)
+    parser.add_argument("--date", help='Shows cached news on introduced day', type=int)
+    parser.add_argument("--to_pdf",action='store_true', help = 'Converts news into PDF format' )
+    return parser.parse_args()
diff --git a/cache.py b/cache.py
@@ -0,0 +1,26 @@
+import sqlite3
+from arg import args
+from sqlite3 import Error
+from date_converter import convert_date
+
+
+def sql_connection():
+    '''Get connected to the database'''
+    try:
+        con = sqlite3.connect('mydatabase.db')
+        return con
+    except Error:
+        print(Error)
+
+
+def sql_fetch(con):
+    '''Extract info from the database'''
+    console_args = args()
+    cursorObj = con.cursor()
+    cursorObj.execute('SELECT * FROM news')
+    while True:
+        row = cursorObj.fetchone()
+        if row == None:
+            break
+        if convert_date(row[1]) == console_args.date:
+            print(' Title:',row[0],'\n','Date:',row[1],'\n','Link:',row[2],'\n','Description:',row[3],'\n')
diff --git a/date_converter.py b/date_converter.py
@@ -0,0 +1,19 @@
+def convert_date(date):
+    '''Convert date to make a search in our local storage'''
+    month = {'Jan': '1',
+             'Feb': '2',
+             'Mar': '3',
+             'Apr': '4',
+             'May': '5',
+             'Jun': '6',
+             'Jul': '7',
+             'Aug': '8',
+             'Sep': '9',
+             'Oct': '10',
+             'Nov': '11',
+             'Dec': '12'}
+    day = date[5:7]
+    month_int = month[date[8:11]]
+    year = date[12:16]
+
+    return int(year+month_int+day)
diff --git a/loggerfile.py b/loggerfile.py
@@ -0,0 +1,14 @@
+import logging
+
+def log():
+    '''Mouth of program, but it's a quite calm.
+    Here we initialise logs'''
+    extra = {'app_name':'rss-reader'}
+    logger = logging.getLogger(__name__)
+    syslog = logging.StreamHandler()
+    formatter = logging.Formatter('%(asctime)s %(app_name)s : %(message)s')
+    syslog.setFormatter(formatter)
+    logger.setLevel(logging.INFO)
+    logger.addHandler(syslog)
+    logging.basicConfig(filename="loggs.log", level=logging.DEBUG)
+    return logging.LoggerAdapter(logger, extra)
diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1,2 @@
+feedparser==5.2.1
+reportlab==3.5.32
diff --git a/rss_reader.py b/rss_reader.py
@@ -0,0 +1,178 @@
+import feedparser
+import json
+import html
+from bs4 import BeautifulSoup
+from reportlab.pdfbase import pdfmetrics
+from reportlab.pdfbase.ttfonts import TTFont
+from reportlab.pdfgen import canvas
+from arg import args
+from loggerfile import log
+from cache import sql_connection, sql_fetch
+from version import version_info
+
+
+def parse():
+    '''Returns URL ADDRESS'''
+    parse_arg = args()
+    return feedparser.parse(parse_arg.source)
+
+
+def get_source(parsed):
+    '''Gets link, title and subtitle from the executing URL'''
+    feed = parsed['feed']
+    try:
+        return ({
+            'link': feed['link'],
+            'title': feed['title'],
+            'subtitle': feed['subtitle']
+        })
+    except:
+        print('')
+
+
+def get_articles(parsed):
+    '''Gets information from the article and returns this'''
+    articles = []
+    entries = parsed['entries']
+    try:
+        for entry in entries:
+            img = BeautifulSoup(entry.summary, features="html.parser")
+            article_img = img.find('img')['src']
+            text = BeautifulSoup(entry.summary, features='html.parser').text
+            articles.append({
+                'ID': entry['id'],
+                'Link': entry['link'],
+                'Title': html.unescape(entry['title']),
+                'Description': text,
+                'Published': entry['published'],
+                'article IMG': article_img,
+            })
+        return articles
+    finally:
+        for entry in entries:
+            text = BeautifulSoup(entry.summary, features='html.parser').text
+            articles.append({
+                'ID': entry['id'],
+                'Link': entry['link'],
+                'Title': entry['title'].replace('&#39;', "'"),
+                'Description': text,
+                'Published': entry['published'],
+            })
+        return articles
+
+
+def print_articles(parsed, args_ord, logs_art):
+    '''Function to output information'''
+    articles = get_articles(parsed)
+    feed = get_source(parsed)
+    if args_ord.limit == 0:
+        print('Error, argument should be more than zero')
+    else:
+        try:
+            print('----------' + feed['title'] + '----------\n')
+            print('URL ADDRESS: ' + feed['link'] + '\n')
+            print(feed['subtitle'])
+            for article in articles[0:args_ord.limit]:
+                print("\nTitle: ", article['Title'])
+                print("Date: ", article['Published'])
+                print("Link: ", article['Link'])
+                print("\nDescription: ", article['Description'])
+                try:
+                    print("\nImage: ", article['article IMG'])
+                except:
+                    print('\nNo images given')
+                print('\n')
+        except:
+            print('')
+        if args_ord.verbose:
+            logs_art.info('Program started with source: ' + feed['link'])
+            logs_art.info('Limit is {}'.format(args_ord.limit))
+            logs_art.info('News in ordinary format parsed successfully ')
+
+
+def print_articles_json(parsed, args_json, logs_json):
+    '''Function to output information in JSON format'''
+    feed = get_source(parsed)
+    if args_json.limit == 0:
+        print('Error, argument should be more than zero')
+    else:
+        print('----------' + feed['title'] + '----------')
+        print('URL ADDRESS: ' + feed['link'] + '')
+        print(feed['subtitle'])
+        print(json.dumps(get_articles(parse())[0:args_json.limit], indent=3, ensure_ascii=False, ))
+    if args_json.verbose:
+        logs_json.info('Program started with source: ' + feed['link'])
+        logs_json.info('Limit is {}'.format(args_json.limit))
+        logs_json.info('News in json format parsed successfully ')
+
+
+def sql_table(con, parsed, sql_args, sql_logs):
+    '''Initialise database if it's not created yet and
+     entering information into it'''
+    articles = get_articles(parsed)
+    cursorObj = con.cursor()
+    try:
+        cursorObj.execute("CREATE TABLE IF NOT EXISTS news(title, published, link, description)")
+    finally:
+        if sql_args.source:
+            for article in articles[0:]:
+                entities = (article['Title'], article['Published'],
+                            article['Link'], article['Description'], )
+                cursorObj.execute('INSERT OR IGNORE INTO news(title,'
+                                    'published, link, description) VALUES(?, ?, ?, ?)', entities)
+            if sql_args.verbose:
+                sql_logs.info('Caching news')
+                sql_logs.info('News cached successfully')
+    con.commit()
+
+
+def cache_main(info):
+    '''Function to call others connected with cache'''
+    con = sql_connection()
+    sql_fetch(con)
+
+
+def to_pdf(articles, agrs_conv):
+    '''Function that should convert into pdf
+     But it's very poor and I done '''
+    pdf = canvas.Canvas('Test.pdf')
+    pdf.setTitle('Converter to PDF')
+    pdfmetrics.registerFont(
+        TTFont('abc', 'SakBunderan.ttf')
+    )
+    pdf.setFont('abc', 12)
+    for article in articles[0:agrs_conv.limit]:
+        pdf.drawCentredString(300, 770, article['Title'])
+        textobject = pdf.beginText(40, 680)
+        textobject.setFont("Helvetica-Oblique", 14)
+        for line in article['Description']:
+            textobject.textOut(line)
+    pdf.save()
+
+
+def start():
+    '''Function which calls when --to_pdf argument entered'''
+    to_pdf(get_articles(parse()),args())
+
+
+def main():
+    '''Heart of the project'''
+    console_args = args()
+    logs = log()
+    con = sql_connection()
+    if console_args.json:
+        print_articles_json(parse(), console_args, logs)
+        sql_table(con, parse(), console_args, logs)
+    else:
+        print_articles(parse(), console_args, logs)
+        sql_table(con, parse(), console_args, logs)
+    if console_args.version:
+        version_info(parse())
+    if console_args.date:
+        cache_main(parse())
+    if console_args.to_pdf:
+        start()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/setup.py b/setup.py
@@ -0,0 +1,15 @@
+from setuptools import setup
+
+setup(
+    name='RSS reader',
+    version = 'Second iteration',
+    description = 'CLI utility to process RSS',
+    author = 'Pavel Shymansky',
+    py_module = ['rss_reader.py', 'arg.py', 'loggerfile.py', 'version.py', 'cache.py', 'date_converter.py', 'SakBunderan.ttf'],
+    install_requires = ['feedparser', 'bs4'],
+    python_requires='>=3.7',
+    entry_points ='''
+    [console_scripts]
+    rss-reader=rss_reader:main
+    '''
+    )
diff --git a/unitTests.py b/unitTests.py
@@ -0,0 +1,13 @@
+import unittest
+from rss_reader import parse
+
+class FunctionTestMethods(unittest.TestCase):
+    '''Lonely samurai. Here we tests function'''
+    def test_url(self):
+        self.assertEqual(parse(['https://news.yahoo.com/rss/',
+                          'https://news.google.com/news/rss',
+                          'https://news.yandex.ru/world.rss',
+                          'https://news.tut.by/rss/world.rss',]),['https://news.yahoo.com/rss/',
+                                                                  'https://news.google.com/news/rss',
+                                                                  'https://news.yandex.ru/world.rss',
+                                                                  'https://news.tut.by/rss/world.rss',])
diff --git a/version.py b/version.py
@@ -0,0 +1,3 @@
+def version_info(smth):
+    '''Shows version'''
+    print("----------------Third version----------------")
-Original file line number
+Diff line change
@@ Expand Up / @@ -102,3 +102,6 @@ venv.bak/ @@
     # mypy
     .mypy_cache/
+    # IntellijIdea files
+    .idea