Skip to content

Commit 1f83dd4

Browse files
committed
make hyperlink handle + like an HTML form post by default
1 parent e5cd7e2 commit 1f83dd4

File tree

3 files changed

+64
-13
lines changed

3 files changed

+64
-13
lines changed

src/hyperlink/_url.py

+42-12
Original file line numberDiff line numberDiff line change
@@ -467,9 +467,11 @@ def _encode_userinfo_part(text, maximal=True):
467467
)
468468
# As of Mar 11, 2017, there were 44 netloc schemes, and 13 non-netloc
469469

470+
NO_QUERY_PLUS_SCHEMES = set()
470471

471-
def register_scheme(text, uses_netloc=True, default_port=None):
472-
# type: (Text, bool, Optional[int]) -> None
472+
473+
def register_scheme(text, uses_netloc=True, default_port=None, query_plus_is_space=True):
474+
# type: (Text, bool, Optional[int], bool) -> None
473475
"""Registers new scheme information, resulting in correct port and
474476
slash behavior from the URL object. There are dozens of standard
475477
schemes preregistered, so this function is mostly meant for
@@ -485,6 +487,8 @@ def register_scheme(text, uses_netloc=True, default_port=None):
485487
not. Defaults to True.
486488
default_port: The default port, if any, for
487489
netloc-using schemes.
490+
query_plus_is_space: If true, a "+" in the query string should be
491+
decoded as a space by DecodedURL.
488492
489493
.. _file an issue: https://github.com/mahmoud/hyperlink/issues
490494
"""
@@ -510,6 +514,9 @@ def register_scheme(text, uses_netloc=True, default_port=None):
510514
else:
511515
raise ValueError("uses_netloc expected bool, not: %r" % uses_netloc)
512516

517+
if not query_plus_is_space:
518+
NO_QUERY_PLUS_SCHEMES.add(text)
519+
513520
return
514521

515522

@@ -1998,6 +2005,9 @@ class DecodedURL(object):
19982005
lazy: Set to True to avoid pre-decode all parts of the URL to check for
19992006
validity.
20002007
Defaults to False.
2008+
query_plus_is_space: + characters in the query string should be treated
2009+
as spaces when decoding. If unspecified, the default is taken from
2010+
the scheme.
20012011
20022012
.. note::
20032013
@@ -2012,17 +2022,20 @@ class DecodedURL(object):
20122022
.. versionadded:: 18.0.0
20132023
"""
20142024

2015-
def __init__(self, url=_EMPTY_URL, lazy=False):
2016-
# type: (URL, bool) -> None
2025+
def __init__(self, url=_EMPTY_URL, lazy=False, query_plus_is_space=None):
2026+
# type: (URL, bool, Optional[bool]) -> None
20172027
self._url = url
2028+
if query_plus_is_space is None:
2029+
query_plus_is_space = url.scheme not in NO_QUERY_PLUS_SCHEMES
2030+
self._query_plus_is_space = query_plus_is_space
20182031
if not lazy:
20192032
# cache the following, while triggering any decoding
20202033
# issues with decodable fields
20212034
self.host, self.userinfo, self.path, self.query, self.fragment
20222035
return
20232036

20242037
@classmethod
2025-
def from_text(cls, text, lazy=False):
2038+
def from_text(cls, text, lazy=False, query_plus_is_space=None):
20262039
# type: (Text, bool) -> DecodedURL
20272040
"""\
20282041
Make a `DecodedURL` instance from any text string containing a URL.
@@ -2034,7 +2047,7 @@ def from_text(cls, text, lazy=False):
20342047
Defaults to True.
20352048
"""
20362049
_url = URL.from_text(text)
2037-
return cls(_url, lazy=lazy)
2050+
return cls(_url, lazy=lazy, query_plus_is_space=query_plus_is_space)
20382051

20392052
@property
20402053
def encoded_url(self):
@@ -2059,22 +2072,34 @@ def to_iri(self):
20592072
"Passthrough to :meth:`~hyperlink.URL.to_iri()`"
20602073
return self._url.to_iri()
20612074

2075+
def _clone(self, url):
2076+
# type: (URL) -> DecodedURL
2077+
return self.__class__(
2078+
url,
2079+
# TODO: propagate laziness?
2080+
query_plus_is_space=self._query_plus_is_space,
2081+
)
2082+
20622083
def click(self, href=u""):
20632084
# type: (Union[Text, URL, DecodedURL]) -> DecodedURL
20642085
"""Return a new DecodedURL wrapping the result of
20652086
:meth:`~hyperlink.URL.click()`
20662087
"""
20672088
if isinstance(href, DecodedURL):
20682089
href = href._url
2069-
return self.__class__(self._url.click(href=href))
2090+
return self._clone(
2091+
self._url.click(href=href),
2092+
)
20702093

20712094
def sibling(self, segment):
20722095
# type: (Text) -> DecodedURL
20732096
"""Automatically encode any reserved characters in *segment* and
20742097
return a new `DecodedURL` wrapping the result of
20752098
:meth:`~hyperlink.URL.sibling()`
20762099
"""
2077-
return self.__class__(self._url.sibling(_encode_reserved(segment)))
2100+
return self._clone(
2101+
self._url.sibling(_encode_reserved(segment)),
2102+
)
20782103

20792104
def child(self, *segments):
20802105
# type: (Text) -> DecodedURL
@@ -2085,7 +2110,7 @@ def child(self, *segments):
20852110
if not segments:
20862111
return self
20872112
new_segs = [_encode_reserved(s) for s in segments]
2088-
return self.__class__(self._url.child(*new_segs))
2113+
return self._clone(self._url.child(*new_segs))
20892114

20902115
def normalize(
20912116
self,
@@ -2101,7 +2126,7 @@ def normalize(
21012126
"""Return a new `DecodedURL` wrapping the result of
21022127
:meth:`~hyperlink.URL.normalize()`
21032128
"""
2104-
return self.__class__(
2129+
return self._clone(
21052130
self._url.normalize(
21062131
scheme, host, path, query, fragment, userinfo, percents
21072132
)
@@ -2148,11 +2173,16 @@ def path(self):
21482173
def query(self):
21492174
# type: () -> QueryPairs
21502175
if not hasattr(self, "_query"):
2176+
if self._query_plus_is_space:
2177+
predecode = lambda x: x.replace("+", "%20")
2178+
else:
2179+
predecode = lambda x: x
2180+
21512181
self._query = cast(
21522182
QueryPairs,
21532183
tuple(
21542184
tuple(
2155-
_percent_decode(x, raise_subencoding_exc=True)
2185+
_percent_decode(predecode(x), raise_subencoding_exc=True)
21562186
if x is not None
21572187
else None
21582188
for x in (k, v)
@@ -2248,7 +2278,7 @@ def replace(
22482278
userinfo=userinfo_text,
22492279
uses_netloc=uses_netloc,
22502280
)
2251-
return self.__class__(url=new_url)
2281+
return self._clone(url=new_url)
22522282

22532283
def get(self, name):
22542284
# type: (Text) -> List[Optional[Text]]

src/hyperlink/test/test_decoded_url.py

+14
Original file line numberDiff line numberDiff line change
@@ -210,3 +210,17 @@ def test_click_decoded_url(self):
210210
assert clicked.host == durl.host
211211
assert clicked.path == durl_dest.path
212212
assert clicked.path == ("tëst",)
213+
214+
def test_decode_plus(self):
215+
# type: () -> None
216+
durl = DecodedURL.from_text("/x+y%2B?a=b+c%2B")
217+
assert durl.path == ("x+y+",)
218+
assert durl.get("a") == ["b c+"]
219+
assert durl.query == (("a", "b c+"),)
220+
221+
def test_decode_nonplussed(self):
222+
# type: () -> None
223+
durl = DecodedURL.from_text("/x+y%2B?a=b+c%2B", query_plus_is_space=False)
224+
assert durl.path == ("x+y+",)
225+
assert durl.get("a") == ["b+c+"]
226+
assert durl.query == (("a", "b+c+"),)

src/hyperlink/test/test_scheme_registration.py

+8-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
from .. import _url
77
from .common import HyperlinkTestCase
8-
from .._url import register_scheme, URL
8+
from .._url import register_scheme, URL, DecodedURL
99

1010

1111
class TestSchemeRegistration(HyperlinkTestCase):
@@ -70,3 +70,10 @@ def test_register_invalid_port(self):
7070
# type: () -> None
7171
with self.assertRaises(ValueError):
7272
register_scheme("nope", default_port=cast(bool, object()))
73+
74+
def test_register_no_quote_plus_scheme(self):
75+
# type: () -> None
76+
register_scheme("keepplus", query_plus_is_space=False)
77+
DecodedURL.from_text("keepplus://heyoo/?q=a+b")
78+
plus_is_space = DecodedURL.from_text("https://abc.xyz/x+y%2B?a=b+c%2B", query_plus_is_space=False)
79+

0 commit comments

Comments
 (0)