Skip to content

Commit

Permalink
Fix sqlite_releases (add _parser.py support for it, and a test).
Browse files Browse the repository at this point in the history
For #205.
  • Loading branch information
lemon24 committed Jan 27, 2021
1 parent ea3caad commit c222c30
Show file tree
Hide file tree
Showing 4 changed files with 109 additions and 26 deletions.
12 changes: 9 additions & 3 deletions src/reader/_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,7 @@ class Parser:
def __init__(self) -> None:
self.retrievers: 'OrderedDict[str, RetrieverType]' = OrderedDict()
self.parsers_by_mime_type: Dict[str, List[Tuple[float, ParserType]]] = {}
self.parsers_by_url: Dict[str, ParserType] = {}
self.session_hooks = SessionHooks()

def mount_retriever(self, prefix: str, retriever: RetrieverType) -> None:
Expand Down Expand Up @@ -267,15 +268,20 @@ def mount_parser_by_mime_type(
index = existing_qualities[0][1] if existing_qualities else 0
parsers.insert(index, (quality, parser))

def get_parser_by_url(self, url: str) -> Optional[ParserType]:
# we might change this to have some smarter matching, but YAGNI
return self.parsers_by_url.get(url)

def mount_parser_by_url(self, url: str, parser: ParserType) -> None:
self.parsers_by_url[url] = parser

def __call__(
self,
url: str,
http_etag: Optional[str] = None,
http_last_modified: Optional[str] = None,
) -> ParsedFeed:

# FIXME: URL parser selection goes here
parser: Optional[ParserType] = None
parser = self.get_parser_by_url(url)

http_accept: Optional[str]
if not parser:
Expand Down
32 changes: 9 additions & 23 deletions src/reader/_plugins/sqlite_releases.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,9 @@

import bs4

from reader._parser import caching_get
from reader._parser import wrap_exceptions
from reader._types import EntryData
from reader._types import FeedData
from reader._types import ParsedFeed

warnings.filterwarnings(
'ignore',
Expand Down Expand Up @@ -93,28 +91,16 @@ def make_feed(feed_url, url, soup):
return FeedData(url=feed_url, title=soup.title and soup.title.text, link=url)


def make_parser(make_session):
def parse(url, http_etag=None, http_last_modified=None):
with make_session() as session:
with wrap_exceptions(url, "while getting feed"):
response, http_etag, http_last_modified = caching_get(
session, FULL_URL, http_etag, http_last_modified, stream=True,
)

with wrap_exceptions(url, "while reading feed"), response:
soup = bs4.BeautifulSoup(response.raw)

with wrap_exceptions(url, "while parsing page"):
feed = make_feed(url, FULL_URL, soup)
entries = list(make_entries(url, FULL_URL, soup))
feed = feed._replace(updated=max(e.updated for e in entries))

return ParsedFeed(feed, entries, http_etag, http_last_modified)

return parse
def parse(url, file, headers):
with wrap_exceptions(url, "while reading feed"):
soup = bs4.BeautifulSoup(file)
with wrap_exceptions(url, "while parsing page"):
feed = make_feed(url, FULL_URL, soup)
entries = list(make_entries(url, FULL_URL, soup))
feed = feed._replace(updated=max(e.updated for e in entries))
return feed, entries


def init(reader):
parser = make_parser(reader._parser.make_session)
for url in URLS:
reader._parser.mount_parser(url, parser)
reader._parser.mount_parser_by_url(url, parse)
28 changes: 28 additions & 0 deletions tests/data/sqlite_releases.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
<!DOCTYPE html>
<html><head>
<meta http-equiv="content-type" content="text/html; charset=UTF-8">
<title>Release History Of SQLite</title>
<!-- path= -->
</head>
<body>

<h1 align=center>Release History</h1>

<p>
This page provides a high-level summary of changes to SQLite.
</p>

<a name="version_3_34_1"></a>
<h3>2021-01-20 (3.34.1)</h3><p><ol class='lessindent'>
<li>Fix a potential use-after-free bug.
</ol></p>
<a name="version_3_34_0"></a>
<h3>2020-12-01 (3.34.0)</h3><p>
Added the <a href="c3ref/txn_state.html">sqlite3_txn_state()</a> interface.
</p>
<h3>2000-05-30</h3>
Added the <b>LIKE</b> operator.
<h3>2000-05-29</h3>
Initial Public Release of Alpha code

</dl>
63 changes: 63 additions & 0 deletions tests/test_plugins_sqlite_releases.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
from datetime import datetime

from reader._plugins.sqlite_releases import FULL_URL
from reader._plugins.sqlite_releases import init


def test_sqlite_releases(reader, requests_mock, data_dir):
init(reader)

requests_mock.get(
FULL_URL,
content=data_dir.join('sqlite_releases.html').read_binary(),
headers={
"Last-Modified": "Thu, 21 Jan 2021 01:23:58 +0000",
"ETag": "m6008d7aes58501",
"Content-type": "text/html; charset=utf-8",
},
)

reader.add_feed(FULL_URL)
reader.update_feeds()

(feed,) = reader.get_feeds()
assert feed.updated == datetime(2021, 1, 20, 0, 0)
assert feed.title == 'Release History Of SQLite'
assert feed.link == 'https://www.sqlite.org/changes.html'

(feed_for_update,) = reader._storage.get_feeds_for_update(url=FULL_URL)
assert feed_for_update.http_etag == 'm6008d7aes58501'
assert feed_for_update.http_last_modified == 'Thu, 21 Jan 2021 01:23:58 +0000'

entries = list(reader.get_entries())
entry_data = [(e.id, e.updated, e.title, e.link, e.summary) for e in entries]
assert entry_data == [
(
'2021-01-20 (3.34.1)',
datetime(2021, 1, 20, 0, 0),
'2021-01-20 (3.34.1)',
'https://www.sqlite.org/changes.html#version_3_34_1',
'<p></p><ol class="lessindent">\n<li>Fix a potential use-after-free bug.\n</li></ol>\n',
),
(
'2020-12-01 (3.34.0)',
datetime(2020, 12, 1, 0, 0),
'2020-12-01 (3.34.0)',
'https://www.sqlite.org/changes.html#version_3_34_0',
'<p>\nAdded the <a href="c3ref/txn_state.html">sqlite3_txn_state()</a> interface.\n</p>\n',
),
(
'2000-05-30',
datetime(2000, 5, 30, 0, 0),
'2000-05-30',
'https://www.sqlite.org/changes.html',
'\nAdded the <b>LIKE</b> operator.\n',
),
(
'2000-05-29',
datetime(2000, 5, 29, 0, 0),
'2000-05-29',
'https://www.sqlite.org/changes.html',
'\nInitial Public Release of Alpha code\n\n\n',
),
]

0 comments on commit c222c30

Please sign in to comment.