Skip to content

Commit

Permalink
Add host_subcomponent property (#1159)
Browse files Browse the repository at this point in the history
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
  • Loading branch information
bdraco and pre-commit-ci[bot] authored Sep 26, 2024
1 parent ecb100d commit 7d013d2
Show file tree
Hide file tree
Showing 5 changed files with 66 additions and 7 deletions.
3 changes: 3 additions & 0 deletions CHANGES/1159.feature.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Added :attr:`~yarl.URL.host_subcomponent` which returns the :rfc:`3986#section-3.2.2` host subcomponent -- by :user:`bdraco`.

The only current practical difference between :attr:`~yarl.URL.raw_host` and :attr:`~yarl.URL.host_subcomponent` is that IPv6 addresses are returned bracketed.
15 changes: 15 additions & 0 deletions docs/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,22 @@ There are two kinds of properties: *decoded* and *encoded* (with

>>> URL('http://хост.домен').raw_host
'xn--n1agdj.xn--d1acufc'
>>> URL('http://[::1]').raw_host
'::1'

.. attribute:: URL.host_subcomponent

:rfc:`3986#section-3.2.2` host subcomponent part of URL, ``None`` for relative URLs
(:ref:`yarl-api-relative-urls`).

.. doctest::

>>> URL('http://хост.домен').host_subcomponent
'xn--n1agdj.xn--d1acufc'
>>> URL('http://[::1]').host_subcomponent
'[::1]'

.. versionadded:: 1.13

.. attribute:: URL.port

Expand Down
1 change: 1 addition & 0 deletions docs/spelling_wordlist.txt
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ runtimes
sdist
subclass
subclasses
subcomponent
svetlov
uncompiled
v1
Expand Down
18 changes: 18 additions & 0 deletions tests/test_url.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,24 @@ def test_raw_host():
assert url.raw_host == url._val.hostname


@pytest.mark.parametrize(
("host"),
[
("example.com"),
("[::1]"),
("xn--gnter-4ya.com"),
],
)
def test_host_subcomponent(host: str):
url = URL(f"http://{host}")
assert url.host_subcomponent == host


def test_host_subcomponent_return_idna_encoded_host():
url = URL("http://оун-упа.укр")
assert url.host_subcomponent == "xn----8sb1bdhvc.xn--j1amh"


def test_raw_host_non_ascii():
url = URL("http://оун-упа.укр")
assert "xn----8sb1bdhvc.xn--j1amh" == url.raw_host
Expand Down
36 changes: 29 additions & 7 deletions yarl/_url.py
Original file line number Diff line number Diff line change
Expand Up @@ -647,6 +647,8 @@ def raw_host(self) -> Union[str, None]:
None for relative URLs.
When working with IPv6 addresses, use the `host_subcomponent` property instead
as it will return the host subcomponent with brackets.
"""
# Use host instead of hostname for sake of shortness
# May add .hostname prop later
Expand All @@ -660,16 +662,35 @@ def host(self) -> Union[str, None]:
None for relative URLs.
"""
raw = self.raw_host
if raw is None:
if (raw := self.raw_host) is None:
return None
if "%" in raw:
# Hack for scoped IPv6 addresses like
# fe80::2%Перевірка
# presence of '%' sign means only IPv6 address, so idna is useless.
if raw and raw[-1].isdigit() or ":" in raw:
# IP addresses are never IDNA encoded
return raw
return _idna_decode(raw)

@cached_property
def host_subcomponent(self) -> Union[str, None]:
"""Return the host subcomponent part of URL.
None for relative URLs.
https://datatracker.ietf.org/doc/html/rfc3986#section-3.2.2
`IP-literal = "[" ( IPv6address / IPvFuture ) "]"`
Examples:
- `http://example.com:8080` -> `example.com`
- `http://example.com:80` -> `example.com`
- `https://127.0.0.1:8443` -> `127.0.0.1`
- `https://[::1]:8443` -> `[::1]`
- `http://[::1]` -> `[::1]`
"""
if (raw := self.raw_host) is None:
return None
return f"[{raw}]" if ":" in raw else raw

@cached_property
def port(self) -> Union[int, None]:
"""Port part of URL, with scheme-based fallback.
Expand Down Expand Up @@ -953,7 +974,8 @@ def _encode_host(cls, host: str, human: bool = False) -> str:
# - 127.0.0.1 (last character is a digit)
# - 2001:db8::ff00:42:8329 (contains a colon)
# - 2001:db8::ff00:42:8329%eth0 (contains a colon)
# - [2001:db8::ff00:42:8329] (contains a colon)
# - [2001:db8::ff00:42:8329] (contains a colon -- brackets should
# have been removed before it gets here)
# Rare IP Address formats are not supported per:
# https://datatracker.ietf.org/doc/html/rfc3986#section-7.4
#
Expand Down

0 comments on commit 7d013d2

Please sign in to comment.