Skip to content

Kotkovets Gennadiy #49

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 17 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# PythonHomework
[Introduction to Python] Homework Repository
# How to use
* pip install .
* python rss-reader https://news.tut.by/rss/economics.rss --limit 2 --json
# Parameters
* --help (Show this help message and exit)
* source (RSS URL)
* --limit LIMIT (Limit news topics if this parameter provided)
* --json (Prints result as JSON in stdout)
* --verbose (Outputs verbose status messages)
* --version (Print version info)
* --date ()
* --to-pdf (Converter in pdf)
* --to-html (Converter in html)
* --colorize
# JSON structure
news = {"Title": "title", "Date":"date", "Alt image":"alt", "Discription":"discription", "Links":{"News":"link", "Image":"src"} }
6 changes: 6 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
bs4 # for xml and html
feedparser # rss parsing
requests # http requests
python-dateutil
fpdf
colorama
213 changes: 213 additions & 0 deletions rss_app/RSS.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,213 @@
"""
A file with the RssAggregator class that parses the URL
and performs various actions with the received data
"""

import feedparser
from bs4 import BeautifulSoup
import json
from dateutil.parser import parse
import urllib
import httplib2
import os
from colorama import init
from colorama import Fore


class RssAggregator():

""" Class for rss feed """

feedurl = ""

def __init__(self, source, limit, date, log, colorize):
self.source = source
self.limit = limit
self.date = date
self.log = log
self.colorize = colorize
init()

def get_news(self):

""" Returns parsed news and caches it"""

self.log.info("Getting rss feed")
thefeed = feedparser.parse(self.source)
self.save_to_json_file(thefeed.entries)
return thefeed.entries[:self.limit]

def print_news(self, entries):

""" Print rss news """

self.log.info("Printing news")
for thefeedentry in entries:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

а изображения?

try:
if self.colorize:
print("--------------------------------------------------")
print(f"{Fore.RED}Title:{Fore.RESET} ", Fore.RED + thefeedentry.title + Fore.RESET)
print(f"{Fore.BLUE}Date:{Fore.RESET} ", Fore.BLUE + thefeedentry.published + Fore.RESET, end="\n\n")
print(f"{Fore.YELLOW}Alt image:{Fore.RESET} ", Fore.YELLOW + BeautifulSoup(thefeedentry.description + Fore.RESET, "html.parser").find('img')['alt'])
print(Fore.GREEN + BeautifulSoup(thefeedentry.description, "html.parser").text + Fore.RESET, end="\n\n")
print("Links:")
print(f"{Fore.YELLOW}News:{Fore.RESET} ", Fore.YELLOW + thefeedentry.link + Fore.RESET)
print(f"{Fore.YELLOW}Image:{Fore.RESET} ", Fore.YELLOW + BeautifulSoup(thefeedentry.description + Fore.RESET, "html.parser").find('img')['src'])
else:
print("Title: ", thefeedentry.title)
print("Date: ", thefeedentry.published, end="\n\n")
print("Alt image: ", BeautifulSoup(thefeedentry.description, "html.parser").find('img')['alt'])
print(BeautifulSoup(thefeedentry.description, "html.parser").text, end="\n\n")
print("Links:")
print("News: ", thefeedentry.link)
print("Image: ", BeautifulSoup(thefeedentry.description, "html.parser").find('img')['src'])
except TypeError:
self.log.info("TypeError: 'NoneType'")

def print_json(self, entries):

""" Print rss news in json format"""

self.log.info("RSS news to json")
for thefeedentry in entries:
try:
news = {
"Title": thefeedentry.title,
"Date": thefeedentry.published,
"Alt image": BeautifulSoup(thefeedentry.description, "html.parser").find('img')['alt'],
"Discription": BeautifulSoup(thefeedentry.description, "html.parser").text,
"Links": {
"News": thefeedentry.link,
"Image": BeautifulSoup(thefeedentry.description, "html.parser").find('img')['src']
}
}
print(json.dumps(news, indent=3))
except TypeError:
self.log.info("TypeError: 'NoneType'")

def save_to_json_file(self, entries):

""" Save rss news to json file"""

self.log.info("Save news to json file")
news_list = list()
file_name = self.get_file_name()
with open(file_name, "w", encoding="utf-8") as write_file:
for thefeedentry in entries:
try:
news = {
"Title": thefeedentry.title,
"Date": thefeedentry.published,
"Alt image": BeautifulSoup(thefeedentry.description, "html.parser").find('img')['alt'],
"Discription": BeautifulSoup(thefeedentry.description, "html.parser").text,
"Links": {
"News": thefeedentry.link,
"Image": BeautifulSoup(thefeedentry.description, "html.parser").find('img')['src']
}
}
self.save_image(thefeedentry, file_name)
news_list.append(news)
except TypeError:
self.log.info("TypeError: 'NoneType'")
json.dump(news_list, write_file, indent=3)

def get_file_name(self):

""" Getting the file name for storing news """

self.log.info("Getting file name")
file_name_list = self.source.split("//")
file_name = file_name_list[1].replace("/", "")
file_name += ".json"
return file_name

def save_image(self, thefeedentry, file_name):

""" Save image to file"""

file_path = self.get_path_image(thefeedentry)
h = httplib2.Http('.cache')
response, content = h.request(BeautifulSoup(thefeedentry.description, "html.parser").find('img')['src'])
try:
out = open(file_path, "wb")
out.write(content)
out.close()
except FileNotFoundError:
self.log.info("Error: image not found")
except OSError:
self.log.info("[Errno 22] Invalid argument {}".format(file_path))

def get_path_image(self, thefeedentry):

""" Get path image """

file_name_list = self.source.split("//")
file_name = file_name_list[1].replace("/", "")
folder_path = "image_" + file_name + os.path.sep
if not os.path.exists(folder_path):
self.log.info('Creating directory images')
os.mkdir(folder_path)
img = BeautifulSoup(thefeedentry.description, "html.parser").find('img')['src']
image = img.split("/")
file_path = os.path.abspath('') + os.path.sep + folder_path + image[-1]
if ".jpg" or ".gif" or ".png" in file_path:
return file_path
file_path += ".jpg"
return file_path

def get_from_json_file(self):

""" Get news on the argument --date from json file"""

self.log.info("Getting news by date")
file_name = self.get_file_name()
news_by_date = list()
try:
with open(file_name, "r") as read_file:
news = json.load(read_file)
for thefeedentry in news:
published = parse(thefeedentry['Date']).strftime('%Y%m%d')
if published >= self.date:
news_by_date.append(thefeedentry)
return news_by_date
except FileNotFoundError:
self.log.info("File not found error")

def get_news_for_converter(self):

""" Get news from json file for converter in pdf or html"""

self.log.info("Getting news for converter")
file_name = self.get_file_name()
news = list()
try:
with open(file_name, "r") as read_file:
news = json.load(read_file)
return news
except FileNotFoundError:
self.log.info("File not found error")

def print_news_from_file(self, entries):

""" Print a certain amount of news by date """

self.log.info("Printing news by date")
for thefeedentry in entries[:self.limit]:
if self.colorize:
print("--------------------------------------------------")
print(f"{Fore.RED}Title:{Fore.RESET} ", Fore.RED + thefeedentry['Title'] + Fore.RESET)
print(f"{Fore.BLUE}Date:{Fore.RESET} ", Fore.BLUE + thefeedentry['Date'] + Fore.RESET, end="\n\n")
print(f"{Fore.YELLOW}Alt image:{Fore.RESET} ", Fore.YELLOW + thefeedentry['Alt image'] + Fore.RESET)
print(Fore.GREEN + thefeedentry['Discription'] + Fore.RESET, end="\n\n")
print("Links: ")
print(f"{Fore.YELLOW}News:{Fore.RESET} ", Fore.YELLOW + thefeedentry['Links']['News'] + Fore.RESET)
print(f"{Fore.YELLOW}Image:{Fore.RESET} ", Fore.YELLOW + thefeedentry['Links']['Image'] + Fore.RESET)
else:
print("--------------------------------------------------")
print("Title: ", thefeedentry['Title'])
print("Date: ", thefeedentry['Date'], end="\n\n")
print("Alt image: ", thefeedentry['Alt image'])
print(thefeedentry['Discription'], end="\n\n")
print("Links: ")
print("News: ", thefeedentry['Links']['News'])
print("Image: ", thefeedentry['Links']['Image'])
Empty file added rss_app/__init__.py
Empty file.
85 changes: 85 additions & 0 deletions rss_app/converter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
"""
File with a class Converter designed to convert data to pdf and html formats
"""


import fpdf
from bs4 import BeautifulSoup
import os


class Converter:

""" News conversion class """

fpdf.set_global("SYSTEM_TTFONTS", os.path.join(os.path.dirname(__file__), 'fonts', 'ttf'))

def __init__(self, source, limit, to_pdf, to_html, log):
self.source = source
self.limit = limit
self.to_pdf = to_pdf
self.to_html = to_html
self.log = log

def pdf_converter(self, entries):

""" Convert data to pdf file """

self.log.info("Converter in pdf format")
pdf = fpdf.FPDF()
pdf.add_page()
pdf.add_font('DejaVu', '', 'DejaVuSansCondensed.ttf', uni=True)
pdf.set_font('DejaVu', size=14)
for thefeedentry in entries[:self.limit]:
path = self.get_path_image(thefeedentry)
pdf.multi_cell(0, 10, txt="{}".format(thefeedentry['Title']))
pdf.multi_cell(0, 10, txt="{}".format(thefeedentry['Links']['News']))
try:
pdf.image(path)
except RuntimeError:
self.log.info("Error add image")
pdf.multi_cell(0, 10, txt="{}".format(thefeedentry['Alt image']))
pdf.multi_cell(0, 10, txt="{}".format(thefeedentry['Discription']))
pdf.multi_cell(0, 10, txt="{}".format(thefeedentry['Date']))
pdf.ln(10)
pdf.output(self.to_pdf)
print(self.to_pdf)

def html_converter(self, entries):

""" Convert data to html file """

self.log.info("Converter in html format")
with open(self.to_html, "w", encoding="utf-8") as file_text:
file_text.write("<html>")
file_text.write("<body>")
file_text.write("<p>")
for thefeedentry in entries[:self.limit]:
file_text.write("{}<br />".format(thefeedentry['Title']))
file_text.write("<a href = "">{}</a><br />".format(thefeedentry['Links']['News']))
file_text.write("<img src= {} > <br />".format(thefeedentry['Links']['Image']))
file_text.write("{} <br />".format(thefeedentry['Discription']))
file_text.write("{} <br /><br />".format(thefeedentry['Date']))
file_text.write("</p>")
file_text.write("</body>")
file_text.write("</html>")

def get_path_image(self, thefeedentry):

""" Get the path of the image to add to the pdf file """

self.log.info("Getting path image")
file_name_list = self.source.split("//")
file_name = file_name_list[1].replace("/", "")
folder_path = "image_" + file_name + os.path.sep
if not os.path.exists(folder_path):
self.log.info('Creating directory images')
os.mkdir(folder_path)
img = thefeedentry['Links']['Image']
image = img.split("/")
file_path = os.path.abspath('') + os.path.sep + folder_path + image[-1]
if ".jpg" or ".gif" or ".png" in file_path:
print(file_path)
return file_path
file_path += ".jpg"
return file_path
26 changes: 26 additions & 0 deletions rss_app/fonts/fontconfig/20-unhint-small-dejavu-sans-mono.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE fontconfig SYSTEM "../fonts.dtd">
<fontconfig>
<!-- /etc/fonts/conf.d/20-unhint-small-dejavu-sans-mono.conf

Disable hinting manually at smaller sizes (< 8ppem)
This is a copy of the Bistream Vera fonts fonts rule, as DejaVu is
derived from Vera.

The Bistream Vera fonts have GASP entries suggesting that hinting be
disabled below 8 ppem, but FreeType ignores those, preferring to use
the data found in the instructed hints. The initial Vera release
didn't include the right instructions in the 'prep' table.
-->
<match target="font">
<test name="family">
<string>DejaVu Sans Mono</string>
</test>
<test compare="less" name="pixelsize">
<double>7.5</double>
</test>
<edit name="hinting">
<bool>false</bool>
</edit>
</match>
</fontconfig>
26 changes: 26 additions & 0 deletions rss_app/fonts/fontconfig/20-unhint-small-dejavu-sans.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE fontconfig SYSTEM "../fonts.dtd">
<fontconfig>
<!-- /etc/fonts/conf.d/20-unhint-small-dejavu-sans.conf

Disable hinting manually at smaller sizes (< 8ppem)
This is a copy of the Bistream Vera fonts fonts rule, as DejaVu is
derived from Vera.

The Bistream Vera fonts have GASP entries suggesting that hinting be
disabled below 8 ppem, but FreeType ignores those, preferring to use
the data found in the instructed hints. The initial Vera release
didn't include the right instructions in the 'prep' table.
-->
<match target="font">
<test name="family">
<string>DejaVu Sans</string>
</test>
<test compare="less" name="pixelsize">
<double>7.5</double>
</test>
<edit name="hinting">
<bool>false</bool>
</edit>
</match>
</fontconfig>
Loading