-
Couldn't load subscription status.
- Fork 32
Pashkevich Anton #32
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Pashkevich Anton #32
Changes from all commits
4e932f6
a49f200
182eef5
0455a5d
c9c9aff
2885f95
3ca7f18
57b4e74
747618a
984f28f
0388792
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,19 @@ | ||
| Copyright (c) 2019 The Python Packaging Authority | ||
|
|
||
| Permission is hereby granted, free of charge, to any person obtaining a copy | ||
| of this software and associated documentation files (the "Software"), to deal | ||
| in the Software without restriction, including without limitation the rights | ||
| to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||
| copies of the Software, and to permit persons to whom the Software is | ||
| furnished to do so, subject to the following conditions: | ||
|
|
||
| The above copyright notice and this permission notice shall be included in all | ||
| copies or substantial portions of the Software. | ||
|
|
||
| THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
| IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
| FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
| AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
| LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
| OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
| SOFTWARE. |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1 @@ | ||
| include requirements.txt |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,44 @@ | ||
| ##### JSON structure | ||
|
|
||
| ``` | ||
| { | ||
| "news": { | ||
| "feed": "Yahoo News - Latest News & Headlines", | ||
| "publications": [ | ||
| { | ||
| "title": "Stefanik embraces spotlight at impeachment hearings", | ||
| "pub_date": "Fri, 15 Nov 2019 17:55:51 -0500", | ||
| "link": "https://news.yahoo.com/stefanik-embraces-spotlight-at-impeachment-hearings-225551297.html", | ||
| "description": "[image 2: Stefanik embraces spotlight at impeachment hearings] [2]\nThe second day of the impeachment inquiry\u2019s public hearings, on Friday, began the same way\nas the first: with an attempt by Rep. Elise Stefanik, a New York Republican, to interrupt proceedings\nwith a procedural objection.", | ||
| "hrefs": [ | ||
| [ | ||
| "https://news.yahoo.com/stefanik-embraces-spotlight-at-impeachment-hearings-225551297.html", | ||
| "link" | ||
| ], | ||
| [ | ||
| "http://l.yimg.com/uu/api/res/1.2/NRuDo56c6EiwjZH4WOqEZg--/YXBwaWQ9eXRhY2h5b247aD04Njt3PTEzMDs-/https://media-mbst-pub-ue1.s3.amazonaws.com/creatr-uploaded-images/2019-11/7a1d0760-07d6-11ea-bef7-f17150574bb2", | ||
| "image", | ||
| "Stefanik embraces spotlight at impeachment hearings" | ||
| ] | ||
| ] | ||
| } | ||
| ] | ||
| } | ||
| } | ||
| ``` | ||
|
|
||
| ##### Cache description | ||
|
|
||
| News received from feed is cached through database is being created locally. | ||
|
|
||
| The database consists of the only file named "cache.db". It has the following structure: | ||
|
|
||
| | | id | feed | title | pub_date | pub_parsed | link | description | hrefs | | ||
| |-----|------|------|-------|----------|------------|------|-------------|-------| | ||
| |post | .. | ... | ... | ... | ... | ... | ... | ... | | ||
|
|
||
| All fields except "id" have text type. ID field plays a role of post primary key. | ||
|
|
||
| Hrefs field is composed of all post links including image links and image descriptions. | ||
| Usual references section and one for image links are separated by --|-- sequence. | ||
| Items in one section are separated by -+- sequence. And -|- is for dividing link, it's type and image description. |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,4 @@ | ||
| feedparser | ||
| bs4 | ||
| fpdf | ||
| requests |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,93 @@ | ||
| """ | ||
| this module provides tools for caching news | ||
|
|
||
| it includes functions for work with database and support ones | ||
| """ | ||
|
|
||
| import sqlite3 | ||
| from re import match | ||
|
|
||
| def init_database(): | ||
| """ | ||
| this function creates and initizlizes database for caching news | ||
| """ | ||
| connection_obj = sqlite3.connect('cache.db') | ||
| cursor_obj = connection_obj.cursor() | ||
| cursor_obj.execute( | ||
| '''CREATE TABLE IF NOT EXISTS cache (id integer primary key, feed text, title text, pub_date text, pub_parsed text, link text, description text, hrefs text)''' | ||
| ) | ||
| connection_obj.commit() | ||
|
|
||
| return connection_obj, cursor_obj | ||
|
|
||
| def cache_news(connection_obj, cursor_obj, news): | ||
| """ | ||
| this function adds parsed news in database | ||
| """ | ||
| for post in news: | ||
| cursor_obj.execute( | ||
| '''SELECT id FROM cache WHERE feed=? AND title=? AND pub_date=? AND pub_parsed=? AND link=? AND description=? AND hrefs=?''', | ||
| (post['feed'], post['title'], post['pub_date'], post['pub_parsed'], post['link'], post['description'], hrefs_to_text(post['hrefs'])) | ||
| ) | ||
| if cursor_obj.fetchone() is None: | ||
| cursor_obj.execute( | ||
| '''INSERT INTO cache (feed, title, pub_date, pub_parsed, link, description, hrefs) VALUES (?, ?, ?, ?, ?, ?, ?)''', | ||
| (post['feed'], post['title'], post['pub_date'], post['pub_parsed'], post['link'], post['description'], hrefs_to_text(post['hrefs'])) | ||
| ) | ||
| connection_obj.commit() | ||
|
|
||
| return | ||
|
|
||
| def get_cached_news(cursor_obj, date): | ||
| """ | ||
| this function fetches news from database and return them as a list | ||
| """ | ||
| cursor_obj.execute('''SELECT * FROM cache WHERE pub_parsed=?''', (date, )) | ||
| rows = cursor_obj.fetchall() | ||
|
|
||
| news = [] | ||
| for row in rows: | ||
| data = {} | ||
| data['feed'] = row[1] | ||
| data['title'] = row[2] | ||
| data['pub_date'] = row[3] | ||
| data['pub_parsed'] = row[4] | ||
| data['link'] = row[5] | ||
| data['description'] = row[6] | ||
|
|
||
| hrefs = row[7].split("--|--") | ||
| try: | ||
| data['hrefs'] = [tuple(item.split("-|-")) for item in hrefs[0].split("-+-") if item != ''] | ||
| data['hrefs'] += [tuple(item.split("-|-")) for item in hrefs[1].split("-+-") if item != ''] | ||
| except IndexError: | ||
| pass | ||
| news.append(data) | ||
|
|
||
| return news | ||
|
|
||
| def hrefs_to_text(link_list): | ||
| """ | ||
| this function represents the list of links connected to post to text form | ||
| """ | ||
| res_line = '' | ||
| ind = -1 | ||
| for tpl in link_list: | ||
| if tpl[1] != 'image': | ||
| res_line += f"-+-{tpl[0]}-|-{tpl[1]}" | ||
| else: | ||
| res_line += '--|--' | ||
| ind = link_list.index(tpl) | ||
| break | ||
|
|
||
| if ind != -1: | ||
| for tpl in link_list[ind:]: | ||
| res_line += f"{tpl[0]}-|-{tpl[1]}-|-{tpl[2]}-+-" | ||
|
|
||
| return res_line | ||
|
|
||
| def is_valid_date(line): | ||
| """ | ||
| this function checks a date parameter for suiting date format | ||
| """ | ||
| date = r"^[1-2][0-9]{3}[0-1][0-9][0-3][0-9]$" | ||
| return match(date, line) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,169 @@ | ||
| """ | ||
| this module provides tools for converting news to html and pdf formats | ||
| """ | ||
|
|
||
| import os | ||
| import shutil | ||
| import requests | ||
| from fpdf import FPDF | ||
|
|
||
| def break_lines(text): | ||
| """ | ||
| this function replaces '\n' to <br> tags | ||
| """ | ||
| i = 0 | ||
| while True: | ||
| try: | ||
| while text[i] != '\n': | ||
| i += 1 | ||
| text = text[:i] + "<br>" + text[i + 1:] | ||
| i += 4 | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Очень много магических чисел :) |
||
| except IndexError: | ||
| break | ||
|
|
||
| return text | ||
|
|
||
| def to_html(news, filepath): | ||
| """ | ||
| this function prints news in html format to file | ||
| """ | ||
| with open(filepath, "w", encoding='utf-8') as f: | ||
| f.write(''' | ||
| <html lang="en" dir="ltr"> | ||
| <head> | ||
| <title>rss_reader</title> | ||
| <link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/4.3.1/css/bootstrap.min.css" integrity="sha384-ggOyR0iXCbMQv3Xipma34MD+dH/1fQ784/j6cY/iJTQUOhcWr7x9JvoRxT2MZw1T" crossorigin="anonymous"> | ||
| <style> | ||
| ul>li{ | ||
| list-style: none; | ||
| border: 1px solid; | ||
| margin-top: 20px; | ||
| padding: 10px; | ||
| } | ||
| ul>li>p:nth-child(1){ | ||
| font-size: 35px; | ||
| } | ||
|
|
||
| ul>li{ | ||
| border-radius: 10px; | ||
| box-shadow: 1px 1px 10px black; | ||
| } | ||
| </style> | ||
| </head> | ||
| <body> | ||
| <div class="container"> | ||
| <h1 style="text-align: center">Actual News</h1> | ||
| <ul>''') | ||
| for post in news: | ||
| f.write(f''' | ||
| <li> | ||
| <p>Feed: {post['feed']}</p> | ||
| <p>Title: {post['title']}</p> | ||
| <p>Publication date: {post['pub_date']}</p> | ||
| <p>Link: <a href = "{post['link']}">{post['link']}</a></p> | ||
| <p>{break_lines(post['description'])}</p> | ||
| <p>Links:</p> | ||
| <ol>''') | ||
| for tpl in post['hrefs']: | ||
| if not tpl[1] == 'image': | ||
| f.write(f''' | ||
| <li> | ||
| <p><a href = "{tpl[0]}">{tpl[0]}</a></p> | ||
| </li>''') | ||
| else: | ||
| f.write(f''' | ||
| <li> | ||
| <p>{tpl[2]}<br><a href = "{tpl[0]}"><img src = "{tpl[0]}"></a></p> | ||
| </li>''') | ||
| f.write(''' | ||
| </ol> | ||
| </li>''') | ||
| f.write(''' | ||
| </ul> | ||
| </div> | ||
| <script src="https://code.jquery.com/jquery-3.3.1.slim.min.js" integrity="sha384-q8i/X+965DzO0rT7abK41JStQIAqVgRVzpbzo5smXKp4YfRvH+8abtTE1Pi6jizo" crossorigin="anonymous"></script> | ||
| <script src="https://cdnjs.cloudflare.com/ajax/libs/popper.js/1.14.7/umd/popper.min.js" integrity="sha384-UO2eT0CpHqdSJQ6hJty5KVphtPhzWj9WO1clHTMGa3JDZwrnQq4sF86dIHNDz0W1" crossorigin="anonymous"></script> | ||
| <script src="https://stackpath.bootstrapcdn.com/bootstrap/4.3.1/js/bootstrap.min.js" integrity="sha384-JjSmVgyd0p3pXB1rRibZUAYoIIy6OrQ6VrjIEaFf/nJGzIxFDsf4x0xIM+B07jRM" crossorigin="anonymous"></script> | ||
| </body> | ||
| </html>''') | ||
|
|
||
| class user_FPDF(FPDF): | ||
| """ | ||
| a small inherited class providing an ability to enumerate pages | ||
| """ | ||
| def footer(self): | ||
| self.set_y(-15) | ||
| self.cell(0, 10, txt=f"{self.page_no()}", align='R') | ||
|
|
||
| def download_image(url, dest_filepath): | ||
| """ | ||
| this function downloads an image from url and saves it in file | ||
| """ | ||
| with open(dest_filepath, 'wb') as f: | ||
| response = requests.get(url, stream=True) | ||
| for block in response.iter_content(1024): | ||
| if not block: | ||
| break | ||
| f.write(block) | ||
|
|
||
| def to_pdf(news, filepath): | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Это достаточно большая функция. Есть смысл разделить ее на несколько маленьких |
||
| """ | ||
| this function prints news in pdf format to file | ||
| """ | ||
| current_directory = os.getcwd() | ||
| final_directory = os.path.join(current_directory, "tmp_files") | ||
| if not os.path.exists(final_directory): | ||
| os.mkdir(final_directory) | ||
|
|
||
| pdf_obj= user_FPDF() | ||
| font_dir = os.path.join(final_directory, 'DejaVuSansCondensed.ttf') | ||
| with open(font_dir, "wb") as f: | ||
| f.write(requests.get("https://raw.github.com/prague15031939/font_storage/master/DejaVuSansCondensed.ttf").content) | ||
| pdf_obj.add_font('DejaVu', '', font_dir, uni=True) | ||
| image_id = 0 | ||
|
|
||
| for ind, post in enumerate(news): | ||
| pdf_obj.add_page() | ||
| if ind == 0: | ||
| pdf_obj.set_font('Arial', style='B', size=16) | ||
| pdf_obj.cell(200, 15, txt='ACTUAL NEWS', align='C', ln=1) | ||
| pdf_obj.set_font('DejaVu', '', 12) | ||
| pdf_obj.cell(5, 5, txt="#") | ||
| pdf_obj.cell(180, 5, txt=f"Feed: {(post['feed'])}", ln=1) | ||
| pdf_obj.cell(200, 5, ln=1) | ||
| pdf_obj.cell(5, 5) | ||
| pdf_obj.multi_cell(180, 5, txt=f"Title: {(post['title'])}") | ||
| pdf_obj.cell(5, 5) | ||
| pdf_obj.cell(200, 5, txt=f"Publication date: {post['pub_date']}", ln=1) | ||
| pdf_obj.cell(5, 5) | ||
| pdf_obj.cell(10, 5, txt='Link: ') | ||
| pdf_obj.set_font('Arial', style='I', size=12) | ||
| pdf_obj.multi_cell(180, 5, txt=f"{post['link']}") | ||
| pdf_obj.set_font('DejaVu', '', 12) | ||
| pdf_obj.cell(200, 5, ln=1) | ||
| pdf_obj.cell(5, 5) | ||
| pdf_obj.multi_cell(200, 5, txt=f"{post['description']}") | ||
| pdf_obj.cell(200, 5, ln=1) | ||
| pdf_obj.cell(5, 5) | ||
| pdf_obj.cell(200, 5, txt=f"Links:", ln=1) | ||
|
|
||
| for index, tpl in enumerate(post['hrefs']): | ||
| pdf_obj.cell(10, 5) | ||
| if not tpl[1] == 'image': | ||
| pdf_obj.set_font('DejaVu', '', 12) | ||
| pdf_obj.cell(7, 5, txt=f"[{index + 1}] ") | ||
| pdf_obj.set_font('Arial', style='I', size=12) | ||
| pdf_obj.multi_cell(170, 5, txt=f"{tpl[0]}") | ||
| else: | ||
| pdf_obj.set_font('DejaVu', '', 12) | ||
| pdf_obj.multi_cell(170, 5, txt=f"[{index + 1}] {tpl[2]}") | ||
| try: | ||
| img_dir = os.path.join(final_directory, f"{image_id}.jpeg") | ||
| download_image(tpl[0], img_dir) | ||
| pdf_obj.image(img_dir, x=22, y=pdf_obj.get_y()+5, link=tpl[0]) | ||
| image_id += 1 | ||
| except RuntimeError: | ||
| pass | ||
|
|
||
| pdf_obj.output(filepath) | ||
| shutil.rmtree(final_directory) | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Возможно я ошибаюсь, но может ли в данном случае подойти метод
replaceу строки?