Skip to content

Mlynarchik Artyom #40

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 36 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 11 commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
e8be149
Initial commit
Nov 10, 2019
b060fb3
Main functionality made
Nov 17, 2019
8b8d8a1
Added display of article hrefs
Nov 17, 2019
bebd6cf
Create README.md
Archeex Nov 17, 2019
bfa6e10
Update README.md
Archeex Nov 17, 2019
4df505d
Fix imports for project distribution
Nov 17, 2019
56dd8eb
Update README.md
Nov 17, 2019
94a0554
Update README.md
Archeex Nov 17, 2019
29766e9
Update README.md
Archeex Nov 17, 2019
1b5b06d
Update utility name in setup.py
Nov 17, 2019
01ce0c7
Add opportunity to print json to console, enumerate replaced with slices
Nov 26, 2019
1953b34
News caching done
Nov 30, 2019
22ac84d
Update README.md
Archeex Nov 30, 2019
db491e0
Argument '--date' implemented [Iteration 3 completed]
Nov 30, 2019
2bd4b78
Argument '--date' implemented [Iteration 3 completed]
Nov 30, 2019
3ca84ef
Code refactor
Nov 30, 2019
719b0cf
Code refactor
Nov 30, 2019
4111eaa
Update json_schema.json
Archeex Nov 30, 2019
abe312f
Update README.md
Archeex Nov 30, 2019
9ada778
Code refactor and update setup.py
Nov 30, 2019
466ffd7
Code refactor and update setup.py
Nov 30, 2019
28bc86d
PDFConverter implemented, some code refactor
Dec 1, 2019
93d4e04
Update README.md
Archeex Dec 1, 2019
f9b6fa9
Update README.md
Archeex Dec 1, 2019
8e7209b
Update README.md
Archeex Dec 1, 2019
4cb5555
Update README.md
Archeex Dec 1, 2019
208e667
HTMLConverter implemented
Dec 1, 2019
23ff39e
HTMLConverter implemented
Dec 1, 2019
c677187
Fix launch problems
Dec 1, 2019
f5767c1
Console output colorized. Iteration 5 completed
Dec 1, 2019
87a1e92
Fix launch problems
Dec 1, 2019
92bd0d1
Add requirements.txt
Dec 1, 2019
6e786eb
Code refactoring
Dec 1, 2019
3785b40
Code refactor and adding tests
Dec 1, 2019
cf99556
Fix launch problems
Dec 1, 2019
fc70aa5
Delete cached_news.json
Archeex Dec 1, 2019
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# Pure Python RSS Reader [PythonHomework]

Python version: v3.8

Current version: v0.2

Code checking: Code correspond to pep8
#### Usage:
```shell
usage: __main__.py [-h] [--version] [--json] [--verbose] [--limit LIMIT] source

Pure Python command-line RSS reader.

positional arguments:
source RSS URL

optional arguments:
-h, --help show this help message and exit
--version Print version info
--json Print result as JSON in stdout
--verbose Outputs verbose status messages
--limit LIMIT Limit news topics if this parameter provided
```
JSON scheme is described in `json_schema.json`
31 changes: 31 additions & 0 deletions json_schema.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
{
"$schema": "http://json-schema.org/schema#",
"title": "feed",
"type": "object",
"required": ["title", "date", "text", "link", "hrefs"],
"properties": {
"title": {
"type": "string",
"description": "Article title"
},
"date": {
"type": "date",
"description": "Article published date"
},
"text": {
"type": "string",
"description": "Article text"
},
"link": {
"type": "string",
"description": "Article static link"
},
"hrefs": {
"type": "array",
"items": {
"type": "string",
"description": "Article href"
}
}
}
}
Empty file added rss_reader/__init__.py
Empty file.
34 changes: 34 additions & 0 deletions rss_reader/__main__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import argparse
import logging

from rss_reader import Reader


def main():
args = parse_args()

if args.verbose:
logging.basicConfig(format="%(levelname)s: %(message)s", level=logging.INFO)
else:
logging.basicConfig(format="%(levelname)s: %(message)s")

reader = Reader(args.source, args.limit, args.json)
reader.parse_url()

reader.print_articles()


def parse_args():
parser = argparse.ArgumentParser(description='Pure Python command-line RSS reader.')

parser.add_argument('source', help='RSS URL')
parser.add_argument('--version', help='Print version info', action='version', version='%(prog)s 0.2')
parser.add_argument('--json', help='Print result as JSON in stdout', action='store_true')
parser.add_argument('--verbose', help='Outputs verbose status messages', action='store_true')
parser.add_argument('--limit', help='Limit news topics if this parameter provided', type=int)

return parser.parse_args()


if __name__ == '__main__':
main()
26 changes: 26 additions & 0 deletions rss_reader/article.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
"""Article module"""

import re
import time
import logging


class Article:
logger = logging.getLogger('__main__.py')

def __init__(self, title, date, text, link, hrefs):
self.title = title
self.date = date
self.text = self.strip_html_string(text)
self.link = link.split('?')[0]
self.hrefs = hrefs

def convert_time_to_unix(self):
"""Convert datetime to unix time"""
pattern_time = time.strptime(self.date, '%a, %d %b %Y %H:%M:%S %z')
return int(time.mktime(pattern_time))

def strip_html_string(self, string):
"""Remove html tags from a string"""
strip_string = re.compile('<.*?>')
return re.sub(strip_string, '', string)
41 changes: 41 additions & 0 deletions rss_reader/json_format.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
"""Module with tools for working with Json"""

import json
import codecs
import logging


class Json:
logger = logging.getLogger('__main__.py')

def __init__(self):
self.data = {}

def __str__(self):
"""Print JSON-file to console"""
self.logger.info('Print JSON-data to console')

return json.dumps(self.data, ensure_ascii=False, indent=4)

# def print(self):
# """Print JSON-file to console"""
# self.logger.info('Print JSON-data to file')

print(json.dumps(self.data, ensure_ascii=False, indent=4))

def write_to_file(self):
"""Write JSON-data to file"""
self.logger.info('Write JSON-data to file')

with codecs.open('data.json', 'w', encoding='utf-8') as outfile:
json.dump(self.data, outfile, ensure_ascii=False, indent=4)

def format(self, data):
"""Format file to JSON-format"""
self.logger.info('Format data to JSON appereance')

self.data = {}
self.data['feed'] = []

for element in data:
self.data['feed'].append(element)
85 changes: 85 additions & 0 deletions rss_reader/rss_reader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
"""RSS-reader module"""

import feedparser
import logging

from article import Article
from json_format import Json


class Reader:
logger = logging.getLogger('__main__.py')

def __init__(self, link, limit, json):
self.link = link
self.limit = limit
self.articles = []
self.json = json
self.hrefs = []

def parse_url(self):
"""Get RSS xml-file from url"""
self.logger.info('Get RSS XML-file from url')

self.feed = feedparser.parse(self.link)
self.parse_xml(self.feed.entries[:self.limit])

def parse_xml(self, source):
"""Parse xml-file to articles"""
self.logger.info('Parse XML-file to articles')

for item in source:
content = []

try:
for element in item.media_content:
content.append(element['url'])
except AttributeError:
try:
for element in item.media_thumbnail:
content.append(element['url'])
except AttributeError:
content.append('No content!')
# content.append('No content!')

self.articles.append(Article(item.title, item.published, item.description, item.link, content))

if self.json is True:
json_object = Json()
feeds = self.articles_to_array()
json_object.format(feeds)
print(json_object)

def articles_to_array(self):
self.logger.info('Convert articles to array of dicts')

array = []
for article in self.articles:
feed_dict = {}
feed_dict.update({'title': article.title})
feed_dict.update({'date': article.date})
feed_dict.update({'text': article.text})
feed_dict.update({'link': article.link})
feed_dict.update({'hrefs': article.hrefs})
array.append(feed_dict)

return array

def print_articles(self):
self.logger.info('Print articles to console')

for article in self.articles:
self.print_article(article)
print('\n-------------------------\n')

def print_article(self, article):
"""Print article to console"""

print(f'Title: {article.title}')
print(f'Date: {article.date}')
print(f'Link: {article.link}')
print('\nArticle text:')
print(article.text)
print('\nHrefs:')
for href in article.hrefs:
print(href)
19 changes: 19 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import setuptools

setuptools.setup(
name="rss-reader",
version="0.2",
author="Archeex",
author_email="qsanich@gmail.com",
description="Pure Python command-line RSS reader",
packages=setuptools.find_packages(),
classifiers=[
"Programming Language :: Python :: 3",
"License :: OSI Approved :: MIT License",
"Operating System :: OS Independent",
],
entry_points={
'console_scripts': ['rss_reader = rss_reader.__main__:main']
},
python_requires='>=3.8'
)