Skip to content

Commit

Permalink
Refactor _parser.py so we can get rid of the werkzeug dependency.
Browse files Browse the repository at this point in the history
For #205.
  • Loading branch information
lemon24 committed Jan 26, 2021
1 parent 9622c29 commit c381ade
Show file tree
Hide file tree
Showing 2 changed files with 51 additions and 15 deletions.
44 changes: 29 additions & 15 deletions src/reader/_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from typing import BinaryIO
from typing import Callable
from typing import ContextManager
from typing import Dict
from typing import Iterable
from typing import Iterator
from typing import List
Expand Down Expand Up @@ -214,6 +215,10 @@ def accept_header(self) -> str: # pragma: no cover
pass


def unparse_accept_header(values: Iterable[Tuple[str, float]]) -> str:
return MIMEAccept(values).to_header()


class Parser:

user_agent = (
Expand All @@ -222,7 +227,7 @@ class Parser:

def __init__(self) -> None:
self.retrievers: 'OrderedDict[str, RetriverType]' = OrderedDict()
self.parsers_by_mime_type: 'List[Tuple[MIMEAccept, ParserType]]' = []
self.parsers_by_mime_type: Dict[str, List[Tuple[float, ParserType]]] = {}
self.session_hooks = SessionHooks()

def mount_retriever(self, prefix: str, retriever: RetriverType) -> None:
Expand All @@ -240,23 +245,32 @@ def get_retriever(self, url: str) -> RetriverType:
def get_parser_by_mime_type(
self, mime_type: str
) -> Optional[ParserType]: # pragma: no cover
for accept, parser in self.parsers_by_mime_type:
if accept.best_match([mime_type]):
return parser
parsers = self.parsers_by_mime_type.get(mime_type, ())
if not parsers:
parsers = self.parsers_by_mime_type.get('*/*', ())
if parsers:
return parsers[-1][1]
return None

def mount_parser_by_mime_type(
self, parser: ParserType, accept_header: Optional[str] = None
) -> None: # pragma: no cover
if accept_header:
accept = parse_accept_header(accept_header, MIMEAccept)
else:
if not accept_header:
if not isinstance(parser, AwareParserType):
raise TypeError("unaware parser type with no accept_header given")
accept = parse_accept_header(parser.accept_header, MIMEAccept)
accept_header = parser.accept_header

for mime_type, quality in parse_accept_header(accept_header):
if not quality:
continue

parsers = self.parsers_by_mime_type
parsers.append((accept, parser))
parsers = self.parsers_by_mime_type.setdefault(mime_type, [])

existing_qualities = sorted(
(q, i) for i, (q, _) in enumerate(parsers) if q > quality
)
index = existing_qualities[0][1] if existing_qualities else 0
parsers.insert(index, (quality, parser))

def __call__(
self,
Expand All @@ -270,11 +284,11 @@ def __call__(

http_accept: Optional[str]
if not parser:
http_accept = MIMEAccept(
mime_type
for accept, _ in self.parsers_by_mime_type
for mime_type in accept
).to_header()
http_accept = unparse_accept_header(
(mime_type, quality)
for mime_type, parsers in self.parsers_by_mime_type.items()
for quality, _ in parsers
)
else:
# URL parsers get the default session / requests Accept (*/*);
# later, we may use parser.accept_header, if it exists, but YAGNI
Expand Down
22 changes: 22 additions & 0 deletions tests/test_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from reader._parser import default_parser
from reader._parser import feedparser_parse
from reader._parser import FileRetriever
from reader._parser import Parser
from reader._parser import RetrieveResult
from reader._parser import SessionWrapper
from reader.exceptions import _NotModified
Expand Down Expand Up @@ -766,4 +767,25 @@ def test_normalize_url_errors(monkeypatch, reload_module, os_name, url, reason):
assert reason in str(excinfo.value)


def test_parser_mount_order():
p = Parser()
p.mount_parser_by_mime_type('P0', 'one/two;q=0.0')
p.mount_parser_by_mime_type('P1', 'one/two')
p.mount_parser_by_mime_type('P2', 'one/two;q=0.1')
p.mount_parser_by_mime_type('P3', 'one/two;q=0.1')
p.mount_parser_by_mime_type('P4', 'one/two;q=0.4')
p.mount_parser_by_mime_type('P5', 'one/two;q=0.5')
p.mount_parser_by_mime_type('P6', 'one/two;q=0.3')
assert p.parsers_by_mime_type == {
'one/two': [
(0.1, 'P2'),
(0.1, 'P3'),
(0.3, 'P6'),
(0.4, 'P4'),
(0.5, 'P5'),
(1, 'P1'),
]
}


# FIXME: test no mimetype (#205)

0 comments on commit c381ade

Please sign in to comment.