diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..a5655cd --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1 @@ +include rss_reader/times-new-roman.ttf diff --git a/README.md b/README.md new file mode 100644 index 0000000..9252ec4 --- /dev/null +++ b/README.md @@ -0,0 +1,6 @@ +# Command-line RSS reader + +Final task for the Introduction to Python Epam courses. + +* [Code coverage](Test_coverage.png) -- Code coverage percentage with tests +* [Json schema](json_schema.json) -- Scheme of JSON structure diff --git a/Test_coverage.png b/Test_coverage.png new file mode 100644 index 0000000..da9443a Binary files /dev/null and b/Test_coverage.png differ diff --git a/json_schema.json b/json_schema.json new file mode 100644 index 0000000..7405ba3 --- /dev/null +++ b/json_schema.json @@ -0,0 +1,73 @@ +{ + "$schema": "http://json-schema.org/draft-04/schema#", + "type": "object", + "properties": { + "News": { + "type": "array", + "items": [ + { + "type": "object", + "properties": { + "Feed#": { + "type": "object", + "title": "Feed", + "properties": { + "Feed source": { + "type": "string", + "title": "Source ", + "description": "Source where the news came from" + }, + "Title": { + "type": "string", + "title": "Title", + "description": "Title of the feed" + }, + "Date": { + "type": "string", + "title": "Publication date", + "description": "Date when the news was published" + }, + "Link": { + "type": "string", + "title": "Link", + "description": "News link" + }, + "Description": { + "type": "string", + "title": "Description", + "description": "A description of the feed" + }, + "Media_content": { + "type": "array", + "title": "Image links", + "description": "Array of image links", + "items": [ + { + "type": "string" + "title": "URL", + "description": "URL of image" + } + ] + } + }, + "required": [ + "Feed source", + "Title", + "Date", + "Link", + "Description", + "Media_content" + ] + } + }, + "required": [ + "Feed0" + ] + } + ] + } + }, + "required": [ + "News" + ] +} diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..fdb66e0 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,7 @@ +beautifulsoup4>=4.8.0 +validator_collection>=1.3.7 +sqlalchemy>=1.2.7 +feedparser>=5.2.1 +fpdf>=1.7.2 +httplib2>=0.14.0 +Pillow>=6.2.1 \ No newline at end of file diff --git a/rss_reader/__init__.py b/rss_reader/__init__.py new file mode 100644 index 0000000..e34b0ed --- /dev/null +++ b/rss_reader/__init__.py @@ -0,0 +1,2 @@ +'''Package implements command-line RSS reader with some advanced features''' +__version__ = '1.4' diff --git a/rss_reader/__main__.py b/rss_reader/__main__.py new file mode 100644 index 0000000..187cbfb --- /dev/null +++ b/rss_reader/__main__.py @@ -0,0 +1,3 @@ +'''Module contain entry point to the package''' +from .cli import main +main() diff --git a/rss_reader/cli.py b/rss_reader/cli.py new file mode 100644 index 0000000..490924e --- /dev/null +++ b/rss_reader/cli.py @@ -0,0 +1,98 @@ +'''Module implements a CLI of the application''' +import logging +from argparse import ArgumentParser +from datetime import datetime +from os import path, makedirs +from validator_collection.checkers import is_url +from .rssreader import RSSReader +from collections import defaultdict + + +def main(): + '''Entry point of the app''' + parser = adding_arguments() + args = parser.parse_args() + init_logging(args.verbose) + source, date = validate_arguments(args) + configuration_for_conversion = mk_config_for_conversion(args.to_pdf, args.to_html) + rss = RSSReader(source, args.limit, date, args.json, configuration_for_conversion, args.all) + rss.exec() + + +def adding_arguments(): + '''Function initializes arguments of the RSS Reader''' + parser = ArgumentParser(description='Pure Python command-line RSS reader') + parser.add_argument('source', metavar='source', type=str, help='RSS URL') + parser.add_argument('--version', action='version', version='ver 1.4', help='Print version info') + parser.add_argument('--limit', metavar='LIMIT', type=int, help='Amount of news output') + parser.add_argument('--verbose', action='store_true', help='Print all logs in stdout') + parser.add_argument('--json', action='store_true', help='Print news in json format') + parser.add_argument('--date', type=str, help='Print news published on a given day') + parser.add_argument('--to-pdf', type=str, help='Conversion news to the PDF format') + parser.add_argument('--to-html', type=str, help='Conversion news to the HTML format') + parser.add_argument('--all', action='store_true', help='Getting all cached news.\ + Compatible with the following arguments: --verbose, --json, --to-pdf, --to-html') + return parser + + +def init_logging(verbose): + '''Logging initialization''' + if verbose: + logging.basicConfig(format='%(module)s %(asctime)s %(message)s', + datefmt='%I:%M:%S', level=logging.INFO) + + +def validate_arguments(args): + '''Function validates of all received arguments''' + source = validate_url(args.source) + date = None + if args.date: + date = validate_date(args.date) + if args.to_html: + validate_path(args.to_html) + if args.to_pdf: + validate_path(args.to_pdf) + return source, date + + +def validate_url(source): + '''Function validates the URL that is the source of the news''' + logging.info('URL validation') + if not is_url(source): + raise ValueError('Invalid url') + return source + + +def validate_date(date): + '''Function validates date''' + logging.info('Date validation') + try: + checked_date = datetime.strptime(date, '%Y%m%d').date() + return checked_date + except ValueError as e: + print('Wrong date') + + +def validate_path(dir_for_save): + '''Function validates the path where the exported file will be saved''' + if path.exists(dir_for_save): + logging.info(f'Directory {dir_for_save} already exists') + else: + makedirs(dir_for_save) + logging.info(f'Create directory {dir_for_save} for saving file') + return dir_for_save + + +def mk_config_for_conversion(pdf, html): + '''Function creates a dictionary which contain which type of files and where news will be saved''' + logging.info('Making dict with configuration of conversion') + dict_with_directories = defaultdict(str) + if pdf: + dict_with_directories['pdf'] = pdf + if html: + dict_with_directories['html'] = html + return dict_with_directories + + +if __name__ == "__main__": + main() diff --git a/rss_reader/converter.py b/rss_reader/converter.py new file mode 100644 index 0000000..28f2aa5 --- /dev/null +++ b/rss_reader/converter.py @@ -0,0 +1,211 @@ +'''Module contains classes that implement converters in different formats''' +import logging +import httplib2 +import io +from PIL import Image +from os import path, mkdir +from abc import ABC, abstractmethod +from fpdf import FPDF + + +class ConverterBase(ABC): + '''Base class of converters''' + def __init__(self, news, dir_for_save): + self.news = news + self.dir_for_save = dir_for_save + self.dir_for_images = self.init_dir_for_images_from_news(path.join(dir_for_save, '.images_from_news')) + self.get_images(news) + + @staticmethod + def init_dir_for_images_from_news(dir_for_images): + '''Method creates directory where images from the news wiil be saved''' + if path.exists(dir_for_images): + logging.info('Directory %s already exists' % dir_for_images) + else: + mkdir(dir_for_images) + logging.info('Create directory %s for saving images from news' % dir_for_images) + return dir_for_images + + @abstractmethod + def convert(self, news): + return news + + def save_file(self, data): + '''Method that save converted file''' + logging.info('Saving file with news') + with open(self.generate_filename(self.dir_for_save, self.filename), 'w') as f: + f.write(data) + + @staticmethod + def generate_filename(dir_for_save, filename): + '''Method that generate unique filename in the directory''' + new_filename = path.join(dir_for_save, filename) + number_of_files = 1 + while number_of_files: + if path.exists(new_filename): + new_filename = path.join(dir_for_save, str(number_of_files) + filename) + number_of_files += 1 + else: + return new_filename + + def get_images(self, news): + '''Method that getting images that were in the news from their sources''' + h = httplib2.Http('.cache') + logging.info('Getting images from news') + for feed in news: + images = feed.media_content + for number_of_image, image in enumerate(images): + if not image or image.endswith('.mp4'): + continue + image = self.check_image_link(image) + response, content = h.request(image) + image = Image.open(io.BytesIO(content)) + image_file_name = path.join(self.dir_for_images, f'{feed.id}{number_of_image}.png') + image.save(image_file_name, 'PNG') + + @staticmethod + def check_image_link(image_link): + ''' + Method checks nested links in the source of image. + + For example, in news.yahoo.com link to the image has nested link + that contains the address of this image in its original form. + ''' + logging.info('Checking for nested links') + where_sub_link = image_link.rfind('http') + if where_sub_link: + return image_link[where_sub_link:] + return image_link + + +class HtmlConverter(ConverterBase): + '''Class implements conversion into HTML format''' + def __init__(self, news, dir_for_save): + logging.info('Initialization of HtmlConverter') + super().__init__(news, dir_for_save) + self.filename = 'news.html' + self.html_template = ''' + + + + + +

News you were looking for

+

{news}

+ +''' + + self.feed_template = ''' +

+

{title}

+

Link to that feed

+ + + + +
+
{description}
+
+

+ {img} +

+

+

+''' + + self.image_template = '
' + + def convert(self): + '''Method that doing conversion''' + logging.info('Converting news to HTML format') + news_str = '' + for feed in self.news: + images_from_the_feed = '' + for number_of_image in range(len(feed.media_content)): + images_from_the_feed += self.image_template.format(path=path.join(self.dir_for_images, f'{feed.id}{number_of_image}.png')) + news_str += self.feed_template.format(title=feed.title, url=feed.link, description=feed.description, img=images_from_the_feed) + converted_data = self.html_template.format(news=news_str) + self.save_file(converted_data) + + +class PdfConverter(ConverterBase): + '''Class that implements conversion into PDF format''' + def __init__(self, news, dir_for_save): + logging.info('Initialization of PdfConverter') + super().__init__(news, dir_for_save) + self.filename = 'news.pdf' + + class PDF(FPDF): + '''Class implements PDF document''' + def __init__(self, dir_with_images): + logging.info('Initialization of PDF document') + self.dir_with_images = dir_with_images + super().__init__(orientation='P', unit='mm', format='A4') + self.add_font('TimesNewRoman', fname='rss_reader/times-new-roman.ttf', uni=True) + self.set_margins(left=30, top=20, right=10) + self.set_auto_page_break(True, margin=20) + self.add_page() + self.set_font('TimesNewRoman', size=22) + self.cell(0, 20, 'News you were looking for', ln=1, align='C') + + def footer(self): + '''Method that adds footer to document''' + self.set_y(-20) + self.set_font('TimesNewRoman', size=12) + self.cell(0, 20, '%s' % self.page_no(), 0, 0, 'R') + + def add_feed(self, feed): + '''Method that adds feed to document''' + self.add_title(feed.title) + self.add_link(feed.link) + self.add_description(feed.description) + self.add_images_from_the_feed(feed.id, feed.media_content) + self.ln(20) + + def add_link(self, link): + '''Method that adds link to document''' + self.set_font('TimesNewRoman', 'U', 12) + self.set_text_color(r=0, g=0, b=255) + self.cell(210, 15, 'Link to that news', ln=1, align='L', link=link) + + def add_title(self, title): + '''Method that adds title of the feed to document''' + self.set_font('TimesNewRoman', size=18) + self.multi_cell(0, 10, title, align='C') + + def add_description(self, description): + '''Method that adds description of the feed to document''' + self.set_text_color(0, 0, 0) + self.set_font('TimesNewRoman', size=14) + self.write(6, description) + self.ln(10) + + def add_images_from_the_feed(self, id, media_content): + '''Method that adds images from the news to document''' + for number_of_image, image_link in enumerate(media_content): + if not image_link or image_link.endswith('.mp4'): + continue + self.set_x(50) + self.image(path.join(self.dir_with_images, f'{id}{number_of_image}.png'), w=120, h=80) + self.ln(10) + + def convert(self): + '''Method that doing conversion''' + logging.info('Converting news to PDF format') + pdf = self.PDF(self.dir_for_images) + for feed in self.news: + pdf.add_feed(feed) + logging.info('Saving news in PDF format') + pdf.output(self.generate_filename(self.dir_for_save, self.filename)) + + +class EpubConverte(ConverterBase): + pass + + +class MobiConverter(ConverterBase): + pass + + +class Fb2Converter(ConverterBase): + pass diff --git a/rss_reader/json_formatter.py b/rss_reader/json_formatter.py new file mode 100644 index 0000000..9dc52f3 --- /dev/null +++ b/rss_reader/json_formatter.py @@ -0,0 +1,31 @@ +'''Module contains implementation of JSON converter''' +import json +import logging +from .news import News + +class FeedEncoder(json.JSONEncoder): + """Subclass of JSONEncoder to be used for transforming into JSON""" + def default(self, obj: object): + '''Method returns serializable object of news''' + names_of_sections = ('Feed source', 'Title', 'Date', 'Link', 'Description', 'Media_content') + if isinstance(obj, Json): + logging.info('Encoding news into JSON format') + return {'News': + [{'Feed'+str(number): + {section_name: feed_section for section_name, feed_section + in zip(names_of_sections, feed())}\ + for number, feed + in enumerate(obj.news_to_convert)}]} + return json.JSONEncoder.default(self, obj) + + +class Json(object): + '''Class that implements getting of the news in JSON format''' + def __init__(self, news): + logging.info('Initialization of JSON formatter') + self.news_to_convert = news + + def __str__(self): + '''Returns news in JSON format''' + logging.info('Receiving news in JSON fromat') + return json.dumps(self, cls=FeedEncoder, indent=4, ensure_ascii=False) diff --git a/rss_reader/news.py b/rss_reader/news.py new file mode 100644 index 0000000..ceba0f0 --- /dev/null +++ b/rss_reader/news.py @@ -0,0 +1,40 @@ +'''Module contain class related to news''' +from sqlalchemy import Column, String, DateTime, PickleType, Integer +from sqlalchemy.ext.declarative import declarative_base + + +Base = declarative_base() + + +class News(Base): + '''News class''' + __tablename__ = 'news' + id = Column(Integer, primary_key=True, autoincrement=True) + feed = Column(String) + title = Column(String) + date = Column(DateTime) + link = Column(String) + description = Column(String) + media_content = Column(PickleType) + date_of_addition = Column(DateTime) + + def __init__(self, feed, title, date, link, description, media_content, date_of_addition): + self.feed = feed + self.title = title + self.date = date + self.link = link + self.description = description + self.media_content = media_content + self.date_of_addition = date_of_addition + + def __str__(self, json=None): + '''Method returns full text of the feed''' + str_to_print = 'Feed: %s\nTitle: %s\nDate: %s\nLink: %s\n\n%s\n\nLinks:\n[1] %s --feed\n' % \ + (self.feed, self.title, self.date, self.link, self.description, self.link) + if self.media_content[0]: + for i in range(len(self.media_content)): + str_to_print += '[%d] %s --image\n' % (i+2, self.media_content[i]) + return str_to_print + + def __call__(self): + return self.feed, self.title, str(self.date), self.link, self.description, self.media_content diff --git a/rss_reader/rssreader.py b/rss_reader/rssreader.py new file mode 100644 index 0000000..3275e7c --- /dev/null +++ b/rss_reader/rssreader.py @@ -0,0 +1,150 @@ +import logging +import json +import feedparser +from datetime import datetime, date +from time import mktime +from bs4 import BeautifulSoup +from sqlalchemy.orm import sessionmaker +from sqlalchemy import func, desc, Date, create_engine +from .news import News, Base +from .json_formatter import Json +from contextlib import contextmanager +from .converter import HtmlConverter, PdfConverter + + +@contextmanager +def create_session(session): + "Context manager that creates a session for exchanging data with a database" + s = session() + try: + yield s + finally: + s.close() + + +class RSSReader(object): + '''Class of RSS reader''' + def __init__(self, source, limit, date, json, configuration_for_conversion, all, name_of_database='news'): + logging.info('Initialization of RSS Reader') + self.source = source + self.limit = limit + self.date = date + self.json = json + self.configuration_for_conversion = configuration_for_conversion + self.all = all + self.news_to_print = [] + self.init_database(name_of_database) + + def init_database(self, name_of_database): + '''Method initialize interaction with the database''' + engine = create_engine(f'sqlite:///{name_of_database}') + Base.metadata.create_all(engine) + self.session = sessionmaker(bind=engine, autocommit=True) + + @staticmethod + def get_news(source): + '''Method for getting news from the Internet''' + logging.info('Getting news') + news = feedparser.parse(source) + logging.info('Parsing news') + if news.bozo: + if news.bozo_exception.args[0].endswith('name\n'): + raise Exception('Entered URL is not a RSS source') + else: + raise news.bozo_exception + return news + + def parse_and_save_news(self, news): + '''Method for parsing and saving news''' + with create_session(self.session) as s: + list_of_news = news.entries[:self.limit+1] if self.limit else news.entries + for feed in list_of_news: + text_of_the_feed = self.parse_html(feed.summary_detail.value) + title = self.parse_html(feed.title) + feed_object = News(news.feed.title, + title, + datetime.fromtimestamp(mktime(feed.published_parsed)), + feed.link, + text_of_the_feed, + [image.get('url') for image in feed.media_content], + datetime.today()) + current_feed_in_table = s.query(News).filter(News.link == feed.link).first() + if not current_feed_in_table: + s.add(feed_object) + else: + s.query(News)\ + .filter(News.link == feed.link)\ + .update({'date_of_addition': datetime.today()}) + logging.info('All news are cached') + + @staticmethod + def parse_html(html): + '''Method that parse html elements from news''' + logging.info('Parsing content of the news in HTML format') + parser = BeautifulSoup(html, 'html.parser') + return parser.getText() + + def get_cached_news(self): + '''Method for getting cached news by date''' + logging.info('Getting news from cache by date') + with create_session(self.session) as s: + if self.limit: + self.news_to_print.extend(s.query(News).filter(func.date(News.date) == self.date).limit(self.limit).all()) + else: + self.news_to_print.extend(s.query(News).filter(func.date(News.date) == self.date).all()) + if not self.news_to_print: + raise Exception('No cached news on this date') + + def get_all_news(self): + '''Method getting all news from database''' + logging.info('Getting all news from cache') + with create_session(self.session) as s: + self.news_to_print = s.query(News).all() + if not self.news_to_print: + raise Exception('No cached news') + + def get_news_to_print(self): + '''Getting news that will be printed''' + logging.info('Receiving news that will be displayed') + with create_session(self.session) as s: + if self.limit: + self.news_to_print = s.query(News)\ + .filter(func.Date(News.date_of_addition) == datetime.today().date())\ + .order_by(News.date_of_addition.desc())\ + .limit(self.limit)\ + .all() + else: + self.news_to_print = s.query(News)\ + .filter(func.Date(News.date_of_addition) == datetime.today().date())\ + .order_by(News.date_of_addition.desc())\ + .all() + + def print_news(self): + '''Method that print news in sys.stdout''' + logging.info('Print news') + if self.json: + print(Json(self.news_to_print)) + return + for feed in self.news_to_print: + print(feed) + print('='*77) + + def exec(self): + '''Method implements execution of RSS reader''' + logging.info('Starting work of the RSS Reader') + try: + if self.all: + self.get_all_news() + elif self.date: + self.get_cached_news() + else: + news = self.get_news(self.source) + self.parse_and_save_news(news) + self.get_news_to_print() + self.print_news() + if 'pdf' in self.configuration_for_conversion: + PdfConverter(self.news_to_print, self.configuration_for_conversion.get('pdf')).convert() + if 'html' in self.configuration_for_conversion: + HtmlConverter(self.news_to_print, self.configuration_for_conversion.get('html')).convert() + except Exception as e: + print(e) diff --git a/rss_reader/times-new-roman.ttf b/rss_reader/times-new-roman.ttf new file mode 100644 index 0000000..eaf5e11 Binary files /dev/null and b/rss_reader/times-new-roman.ttf differ diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..87b60e2 --- /dev/null +++ b/setup.py @@ -0,0 +1,23 @@ +from setuptools import setup +from rss_reader import __version__ as version + + +with open('requirements.txt') as f: + requirements = f.read() + + +setup( + name='rss-reader', + version=version, + packages=['rss_reader'], + url='', + license='', + author='Zavxoz', + author_email='artem.klimec8@gmail.com', + description='simple command-line rss reader', + install_requires=requirements, + include_package_data=True, + entry_points={ + 'console_scripts': ['rss-reader = rss_reader.cli:main'] + } +) diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/data_for_testing/all_news(date also 01-12-2019).txt b/tests/data_for_testing/all_news(date also 01-12-2019).txt new file mode 100644 index 0000000..78d470a --- /dev/null +++ b/tests/data_for_testing/all_news(date also 01-12-2019).txt @@ -0,0 +1,20 @@ +Feed: Yahoo News - Latest News & Headlines +Title: Toll at least 21 after Mexico cartel attack near US border +Date: 2019-12-01 15:28:11 +Link: https://news.yahoo.com/toll-least-21-mexico-cartel-152811534.html + +Mexican security forces on Sunday killed seven more members of a presumed cartel assault force that rolled into a town near the Texas border and staged an hour-long attack, officials said, bringing the death toll to at least 21. The Coahuila state government said in a statement that lawmen were still chasing remnants of the force that arrived in a convoy of trucks and attacked the city hall of Villa Union on Saturday. Gov. Miguel Angel Riquelme said at least 14 people had died by that afternoon, four of them police officers. + +Links: +[1] https://news.yahoo.com/toll-least-21-mexico-cartel-152811534.html --feed +[2] http://l2.yimg.com/uu/api/res/1.2/eaVOxQfOmhj3xx9cS9ntTw--/YXBwaWQ9eXRhY2h5b247aD04Njt3PTEzMDs-/https://media.zenfs.com/en/ap.org/9150f9f4c340580bbd66527e978ee26d --image +Feed: Yahoo News - Latest News & Headlines +Title: Newsweek 'fires' journalist who reported Trump was golfing for Thanksgiving before he secretly travelled to Afghanistan +Date: 2019-12-01 14:24:00 +Link: https://news.yahoo.com/newsweek-fires-journalist-reported-trump-142400157.html + +A Newsweek reporter who wrote that Donald Trump would spend Thanksgiving ‘tweeting, golfing, and more’ - hours before he touched down in a surprise visit to US troops in Afghanistan - has reportedly been fired.The visit was kept highly secret for the president’s safety, and his public schedule said he would be at his Mar-a-Lago hotel in Florida on the holiday, where he would make calls to selected military members. + +Links: +[1] https://news.yahoo.com/newsweek-fires-journalist-reported-trump-142400157.html --feed +[2] http://l1.yimg.com/uu/api/res/1.2/zcYzP.Ed7Z2ViZZL6RjfJw--/YXBwaWQ9eXRhY2h5b247aD04Njt3PTEzMDs-/https://media.zenfs.com/en/the_independent_635/218f25cad42049a1432f1d7fa1a031c8 --image diff --git a/tests/data_for_testing/example_of_rss.xml b/tests/data_for_testing/example_of_rss.xml new file mode 100644 index 0000000..da989ee --- /dev/null +++ b/tests/data_for_testing/example_of_rss.xml @@ -0,0 +1,2 @@ +Yahoo News - Latest News & Headlineshttps://www.yahoo.com/newsThe latest news and headlines from Yahoo! News. Get breaking news stories and in-depth coverage with videos and photos.en-USCopyright (c) 2019 Yahoo! Inc. All rights reservedSun, 01 Dec 2019 15:04:03 -05005Yahoo News - Latest News & Headlineshttps://www.yahoo.com/newshttp://l.yimg.com/rz/d/yahoo_news_en-US_s_f_p_168x21_news.pngLondon Attack by Convicted Terrorist Disrupts U.K. Campaign<p><a href="https://news.yahoo.com/terrorist-kills-two-london-putting-205829309.html"><img src="http://l.yimg.com/uu/api/res/1.2/Rv4DOpfMFWNaXKQRLUHotg--/YXBwaWQ9eXRhY2h5b247aD04Njt3PTEzMDs-/https://media.zenfs.com/en/bloomberg_politics_602/dc1103d0b79d8002ce450951062ebec9" width="130" height="86" alt="London Attack by Convicted Terrorist Disrupts U.K. Campaign" align="left" title="London Attack by Convicted Terrorist Disrupts U.K. Campaign" border="0" ></a>(Bloomberg) -- The man suspected of stabbing two people to death near London Bridge had been released early from jail after a terrorism conviction, allowing an attack in the heart of the city that is disrupting the U.K.’s general election campaign two weeks before the vote.Officers shot and killed the 28-year-old attacker, who was wearing a fake suicide vest after members of the public wrestled him to the ground on London Bridge, on the edge of the city’s financial district. He was tackled by passersby moments after carrying out the attack at about 2 p.m. on Friday.Boris Johnson broke away from campaigning on Friday for the Dec. 12 election to rush back to Downing Street for a security briefing on the attack. Speaking afterward, he praised the civilians who tried to stop the suspected terrorist before police arrived, and declared that “Britain will not be cowed” by the incident.On Saturday, Johnson met with police at the site of the attack and used the opportunity to criticize the U.K.’s criminal justice system, which routinely allows for jail sentences, even for criminals committing violent crimes or acts of terrorism, to be reduced.“The practice of automatic early release, when you cut a sentence in half and let serious and violent offenders out, is not working,” he told the BBC after his meeting with police.Click Here for the Day’s Events as They HappenedThe suspect, identified by police as Usman Khan, was released from prison on parole in December 2018, the police said in a statement. Khan was one of nine people convicted in 2012 for offenses ranging from a plot to bomb the London Stock Exchange to planning a terrorist training camp. Khan originally received an indeterminate sentence, which was changed on appeal in 2013 to 16 years, the BBC reported.Johnson also praised the men who fought the attacker and pinned him to the ground on London Bridge until the police arrived. Khan began the attack while attending a conference on prisoner rehabilitation at a building called Fishmongers’ Hall next to the bridge.A Polish chef grabbed an ornamental narwhal tusk off a wall and used it to confront the attacker, while another chased Khan with a fire extinguisher, Sky News reported. A third man who aided the victims and tried to fend Khan off was a convicted murderer who was close to completing his sentence, the Telegraph reported, while another man stopped his car and helped the others force Khan to release the two knives he was carrying.“I want to pay tribute to the sheer bravery of the members of the public who went to deal with and put their own lives at risk,“ Johnson said.The first victim of the attack was identified as Jack Merritt, 25, a University of Cambridge graduate who was a coordinator of the conference that Khan attended, the BBC reported.With voters set to go to the polls on Dec. 12, the impact of such a potentially disruptive event is unclear. But the revelation that the attacker was a former convicted terrorist is likely to put pressure on the ruling Conservatives -- who traditionally view crime prevention as one of their stronger cards -- to explain why the person was allowed out of jail.Johnson also told the BBC that his government would review sentencing policies in the wake of the attack.Campaigning in the U.K.’s last election in 2017 was thrown off course by two terrorist attacks, including one in the same area of London just five days before the vote. In that incident, eight people were killed and 48 injured.In the aftermath of the 2017 attack, U.S. President Donald Trump triggered a diplomatic row when he criticized London Mayor Sadiq Khan over his response, and their spat has continued ever since. The U.S. president arrives in the U.K. next week for a NATO summit, which Johnson hopes will be a low-key visit.Trump spoke to Johnson on Saturday and expressed his condolences following the attack, White House spokesman Judd Deere said in a statement. On Friday, Johnson and Labour leader Jeremy Corbyn spoke by phone and each suspended their election campaigns in the capital for the rest of the day. Johnson’s team said he would also cancel his events on Saturday so he can focus on the security response.But speaking to television reporters just before a meeting of the government’s ‘Cobra’ crisis committee on Friday evening, Johnson highlighted his election pledge to hire extra police officers.‘Hunted Down’“Anybody involved in this crime and these attacks will be hunted down and will be brought to justice,” he said. “This country will never be cowed or divided or intimidated by this sort of attack and our British values will prevail.”After the alarm was raised on Friday lunchtime, armed police cleared cafes and shops in the London Bridge area. Officers burst into restaurants in the popular Borough Market area on the other side of the river, urging diners to leave immediately. They shouted “Out, out, out,” to people at the Black and Blue bar, and ordered customers to walk away with their hands on their heads. Nearby, police shouted to pedestrians to “run.”The police asked people to avoid the area. Mayor Sadiq Khan said Saturday on BBC’s Radio 4 that while there will be “more high visibility police officers present in London” through the weekend “there’s no reason to believe there is an increased threat” from terrorism. The bridge will remain closed for some time, he said from the site on Saturday afternoon.(Updates with Trump-Johnson phone call from 15th paragraph.)&#92;--With assistance from Tim Ross.To contact the reporters on this story: Jessica Shankleman in London at jshankleman@bloomberg.net;Greg Ritchie in London at gritchie10@bloomberg.net;Kitty Donaldson in London at kdonaldson1@bloomberg.netTo contact the editors responsible for this story: Tim Ross at tross54@bloomberg.net, James Amott, Andrew DavisFor more articles like this, please visit us at bloomberg.com©2019 Bloomberg L.P.<p><br clear="all">https://news.yahoo.com/terrorist-kills-two-london-putting-205829309.htmlSat, 30 Nov 2019 18:17:14 -0500Bloombergterrorist-kills-two-london-putting-205829309.html<p><a href="https://news.yahoo.com/terrorist-kills-two-london-putting-205829309.html"><img src="http://l.yimg.com/uu/api/res/1.2/Rv4DOpfMFWNaXKQRLUHotg--/YXBwaWQ9eXRhY2h5b247aD04Njt3PTEzMDs-/https://media.zenfs.com/en/bloomberg_politics_602/dc1103d0b79d8002ce450951062ebec9" width="130" height="86" alt="London Attack by Convicted Terrorist Disrupts U.K. Campaign" align="left" title="London Attack by Convicted Terrorist Disrupts U.K. Campaign" border="0" ></a>(Bloomberg) -- The man suspected of stabbing two people to death near London Bridge had been released early from jail after a terrorism conviction, allowing an attack in the heart of the city that is disrupting the U.K.’s general election campaign two weeks before the vote.Officers shot and killed the 28-year-old attacker, who was wearing a fake suicide vest after members of the public wrestled him to the ground on London Bridge, on the edge of the city’s financial district. He was tackled by passersby moments after carrying out the attack at about 2 p.m. on Friday.Boris Johnson broke away from campaigning on Friday for the Dec. 12 election to rush back to Downing Street for a security briefing on the attack. Speaking afterward, he praised the civilians who tried to stop the suspected terrorist before police arrived, and declared that “Britain will not be cowed” by the incident.On Saturday, Johnson met with police at the site of the attack and used the opportunity to criticize the U.K.’s criminal justice system, which routinely allows for jail sentences, even for criminals committing violent crimes or acts of terrorism, to be reduced.“The practice of automatic early release, when you cut a sentence in half and let serious and violent offenders out, is not working,” he told the BBC after his meeting with police.Click Here for the Day’s Events as They HappenedThe suspect, identified by police as Usman Khan, was released from prison on parole in December 2018, the police said in a statement. Khan was one of nine people convicted in 2012 for offenses ranging from a plot to bomb the London Stock Exchange to planning a terrorist training camp. Khan originally received an indeterminate sentence, which was changed on appeal in 2013 to 16 +years, the BBC reported.Johnson also praised the men who fought the attacker and pinned him to the ground on London Bridge until the police arrived. Khan began the attack while attending a conference on prisoner rehabilitation at a building called Fishmongers’ Hall next to the bridge.A Polish chef grabbed an ornamental narwhal tusk off a wall and used it to confront the attacker, while another chased Khan with a fire extinguisher, Sky News reported. A third man who aided the victims and tried to fend Khan off was a convicted murderer who was close to completing his sentence, the Telegraph reported, while another man stopped his car and helped the others force Khan to release the two knives he was carrying.“I want to pay tribute to the sheer bravery of the members of the public who went to deal with and put their own lives at risk,“ Johnson said.The first victim of the attack was identified as Jack Merritt, 25, a University of Cambridge graduate who was a coordinator of the conference that Khan attended, the BBC reported.With voters set to go to the polls on Dec. 12, the impact of such a potentially disruptive event is unclear. But the revelation that the attacker was a former convicted terrorist is likely to put pressure on the ruling Conservatives -- who traditionally view crime prevention as one of their stronger cards -- to explain why the person was allowed out of jail.Johnson also told the BBC that his government would review sentencing policies in the wake of the attack.Campaigning in the U.K.’s last election in 2017 was thrown off course by two terrorist attacks, including one in the same area of London just five days before the vote. In that incident, eight people were killed and 48 injured.In the aftermath of the 2017 attack, U.S. President Donald Trump triggered a diplomatic row when he criticized London Mayor Sadiq Khan over his response, and their spat has continued ever since. The U.S. president arrives in the U.K. next week for a NATO summit, which Johnson hopes will be a low-key visit.Trump spoke to Johnson on Saturday and expressed his condolences following the attack, White House spokesman Judd Deere said in a statement. On Friday, Johnson and Labour leader Jeremy Corbyn spoke by phone and each suspended their election campaigns in the capital for the rest of the day. Johnson’s team said he would also cancel his events on Saturday so he can focus on the security response.But speaking to television reporters just before a meeting of the government’s ‘Cobra’ crisis committee on Friday evening, Johnson highlighted his election pledge to hire extra police officers.‘Hunted Down’“Anybody involved in this crime and these attacks will be hunted down and will be brought to justice,” he said. “This country will never be cowed or divided or intimidated by this sort of attack and our British values will prevail.”After the alarm was raised on Friday lunchtime, armed police cleared cafes and shops in the London Bridge area. Officers burst into restaurants in the popular Borough Market area on the other side of the river, urging diners to leave immediately. They shouted “Out, out, out,” to people at the Black and Blue bar, and ordered customers to walk away with their hands on their heads. Nearby, police shouted to pedestrians to “run.”The police asked people to avoid the area. Mayor Sadiq Khan said Saturday on BBC’s Radio 4 that while there will be “more high visibility police officers present in London” through the weekend “there’s no reason to believe there is an increased threat” from terrorism. The bridge will remain closed for some time, he said from the site on Saturday afternoon.(Updates with Trump-Johnson phone call from 15th paragraph.)&#92;--With assistance from Tim Ross.To contact the reporters on this story: Jessica Shankleman in London at jshankleman@bloomberg.net;Greg Ritchie in London at gritchie10@bloomberg.net;Kitty Donaldson in London at kdonaldson1@bloomberg.netTo contact the editors responsible for this story: Tim Ross at tross54@bloomberg.net, James Amott, Andrew DavisFor more articles like this, please visit us at bloomberg.com©2019 Bloomberg L.P.<p><br clear="all"> diff --git a/tests/data_for_testing/news b/tests/data_for_testing/news new file mode 100644 index 0000000..5964625 Binary files /dev/null and b/tests/data_for_testing/news differ diff --git a/tests/data_for_testing/news.html b/tests/data_for_testing/news.html new file mode 100644 index 0000000..e69de29 diff --git a/tests/data_for_testing/news.pdf b/tests/data_for_testing/news.pdf new file mode 100644 index 0000000..e69de29 diff --git a/tests/data_for_testing/one_news_from20191201.txt b/tests/data_for_testing/one_news_from20191201.txt new file mode 100644 index 0000000..7f3ba11 --- /dev/null +++ b/tests/data_for_testing/one_news_from20191201.txt @@ -0,0 +1,10 @@ +Feed: Yahoo News - Latest News & Headlines +Title: Toll at least 21 after Mexico cartel attack near US border +Date: 2019-12-01 15:28:11 +Link: https://news.yahoo.com/toll-least-21-mexico-cartel-152811534.html + +Mexican security forces on Sunday killed seven more members of a presumed cartel assault force that rolled into a town near the Texas border and staged an hour-long attack, officials said, bringing the death toll to at least 21. The Coahuila state government said in a statement that lawmen were still chasing remnants of the force that arrived in a convoy of trucks and attacked the city hall of Villa Union on Saturday. Gov. Miguel Angel Riquelme said at least 14 people had died by that afternoon, four of them police officers. + +Links: +[1] https://news.yahoo.com/toll-least-21-mexico-cartel-152811534.html --feed +[2] http://l2.yimg.com/uu/api/res/1.2/eaVOxQfOmhj3xx9cS9ntTw--/YXBwaWQ9eXRhY2h5b247aD04Njt3PTEzMDs-/https://media.zenfs.com/en/ap.org/9150f9f4c340580bbd66527e978ee26d --image diff --git a/tests/test_cli.py b/tests/test_cli.py new file mode 100644 index 0000000..7e654f0 --- /dev/null +++ b/tests/test_cli.py @@ -0,0 +1,30 @@ +import unittest +from argparse import ArgumentParser +import rss_reader.cli as cli + + +class TestCli(unittest.TestCase): + def test_directory(self): + self.assertEqual(cli.validate_path('tests'), 'tests') + + def test_url(self): + self.assertEqual(cli.validate_url('https://news.yahoo.com'), 'https://news.yahoo.com') + with self.assertRaises(ValueError): + cli.validate_url('http:/news.yahoo') + + def test_date(self): + import datetime + self.assertEqual(cli.validate_date('20191123'), datetime.date(2019, 11, 23)) + self.assertIsNone(cli.validate_date('20193212')) + + def test_make_config_dict(self): + self.assertDictEqual(cli.mk_config_for_conversion('path', None), {'pdf': 'path'}) + self.assertDictEqual(cli.mk_config_for_conversion(None, 'path'), {'html': 'path'}) + self.assertDictEqual(cli.mk_config_for_conversion('path', 'path'), {'pdf': 'path', 'html': 'path'}) + + def test_adding_arguments(self): + self.assertIsInstance(cli.adding_arguments(), ArgumentParser) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_converter.py b/tests/test_converter.py new file mode 100644 index 0000000..a995546 --- /dev/null +++ b/tests/test_converter.py @@ -0,0 +1,23 @@ +import unittest +import os +from rss_reader.converter import ConverterBase, PdfConverter, HtmlConverter + + +class TestNews(unittest.TestCase): + def test_init_dir_for_images(self): + self.assertEqual(ConverterBase.init_dir_for_images_from_news('tests'), 'tests') + + def test_check_image_link(self): + self.assertEqual(ConverterBase.check_image_link('htttps://linkhttp:/nestedlink'), 'http:/nestedlink') + + def test_generate_filename_pdf(self): + path_to_file = os.path.join(os.getcwd(),'tests/data_for_testing/') + self.assertEqual(PdfConverter.generate_filename(path_to_file, 'news.pdf'), f'{path_to_file}1news.pdf') + + def test_generate_filename_html(self): + path_to_file = os.path.join(os.getcwd(),'tests/data_for_testing/') + self.assertEqual(PdfConverter.generate_filename(path_to_file, 'news.html'), f'{path_to_file}1news.html') + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_json.py b/tests/test_json.py new file mode 100644 index 0000000..7bfb9c9 --- /dev/null +++ b/tests/test_json.py @@ -0,0 +1,29 @@ +import unittest +from datetime import datetime +from rss_reader.json_formatter import Json +from rss_reader.news import News + + +class TestNews(unittest.TestCase): + def test_json(self): + news = [News('feed', 'title', datetime(2019, 11, 12), 'link', 'description', ['link_to_image'], datetime.now())] + json_example = '''\ +{ + "News": [ + { + "Feed0": { + "Feed source": "feed", + "Title": "title", + "Date": "2019-11-12 00:00:00", + "Link": "link", + "Description": "description", + "Media_content": "['link_to_image']" + } + } + ] +}''' + self.assertEqual(Json(news).__str__(), json_example) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_news.py b/tests/test_news.py new file mode 100644 index 0000000..656a9c3 --- /dev/null +++ b/tests/test_news.py @@ -0,0 +1,15 @@ +import unittest +from datetime import datetime +from rss_reader.news import News + + +class TestNews(unittest.TestCase): + def test_str_and_call(self): + feed = News('feed', 'title', datetime(2019, 11, 12), 'link', 'description', ['link_to_image'], datetime.now()) + feed_str_example = f'Feed: feed\nTitle: title\nDate: {datetime(2019, 11, 12)}\nLink: link\n\ndescription\n\nLinks:\n[1] link --feed\n[2] link_to_image --image\n' + self.assertEqual(feed.__str__(), feed_str_example) + self.assertEqual(feed(), ('feed', 'title', datetime(2019, 11, 12), 'link', 'description', ['link_to_image'])) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_rssreader.py b/tests/test_rssreader.py new file mode 100644 index 0000000..22fb879 --- /dev/null +++ b/tests/test_rssreader.py @@ -0,0 +1,61 @@ +import unittest +import glob +from rss_reader.rssreader import RSSReader +from datetime import datetime + + +class TestNews(unittest.TestCase): + + maxDiff = None + + def test_parse_html(self): + rss_example = RSSReader('source', 1, datetime(2019, 12, 1), True, {}, False) + html_example = '''

Ilhan Omar GOP challenger banned from Twitter after sayingshe should be "tried for treason and hanged”\ +Danielle Stella campaign account also tweeted a picture of a stick figure being hanged with a link to a blog post about her comments.


\ +''' + html_result = '''Danielle Stella campaign account also tweeted a picture of a stick figure being hanged with a link to a blog post about her comments.''' + self.assertEqual(rss_example.parse_html(html_example), html_result) + + def test_get_cached_news(self): + rss_example1 = RSSReader('source', None, datetime(2019, 12, 1).date(), True, {}, False, 'tests/data_for_testing/news') + rss_example2 = RSSReader('source', 1, datetime(2019, 12, 1).date(), False, {}, False, 'tests/data_for_testing/news') + with open('tests/data_for_testing/all_news(date also 01-12-2019).txt', 'r') as f: + all_news = f.read() + rss_example1.get_cached_news() + cached_news = '' + for feed in rss_example1.news_to_print: + cached_news += feed.__str__() + self.assertEqual(all_news, cached_news) + with open('tests/data_for_testing/one_news_from20191201.txt', 'r') as f: + one_news = f.read() + rss_example2.get_cached_news() + cached_news = '' + for feed in rss_example2.news_to_print: + cached_news += feed.__str__() + self.assertEqual(one_news, cached_news) + + def test_get_all_news(self): + rss_example = RSSReader('source', None, datetime(2019, 12, 1).date(), True, {}, True, 'tests/data_for_testing/news') + with open('tests/data_for_testing/all_news(date also 01-12-2019).txt', 'r') as f: + all_news = f.read() + rss_example.get_all_news() + cached_news = '' + for feed in rss_example.news_to_print: + cached_news += feed.__str__() + self.assertEqual(all_news, cached_news) + + def test_get_news(self): + news = RSSReader.get_news(r'tests/data_for_testing/example_of_rss.xml') + self.assertEqual(news.feed.title, 'Yahoo News - Latest News & Headlines') + self.assertEqual(news.feed.link, 'https://www.yahoo.com/news') + self.assertEqual(news.feed.subtitle, 'The latest news and headlines from Yahoo! News. Get breaking news stories and in-depth coverage with videos and photos.') + self.assertEqual(news.feed.rights, 'Copyright (c) 2019 Yahoo! Inc. All rights reserved') + + +if __name__ == '__main__': + unittest.main()