diff --git a/README.md b/README.md index 68ab89a9..1fd3ab1a 100644 --- a/README.md +++ b/README.md @@ -31,20 +31,6 @@ ExtractResult(subdomain='forums', domain='bbc', suffix='co.uk', is_private=False ExtractResult(subdomain='www', domain='worldbank', suffix='org.kg', is_private=False) ``` -`ExtractResult` is a namedtuple, so it's simple to access the parts you want. - -```python ->>> ext = tldextract.extract('http://forums.bbc.co.uk') ->>> (ext.subdomain, ext.domain, ext.suffix) -('forums', 'bbc', 'co.uk') ->>> # rejoin subdomain and domain ->>> '.'.join(ext[:2]) -'forums.bbc' ->>> # a common alias ->>> ext.registered_domain -'bbc.co.uk' -``` - Note subdomain and suffix are _optional_. Not all URL-like inputs have a subdomain or a valid suffix. @@ -59,17 +45,14 @@ ExtractResult(subdomain='google', domain='notavalidsuffix', suffix='', is_privat ExtractResult(subdomain='', domain='127.0.0.1', suffix='', is_private=False) ``` -If you want to rejoin the whole namedtuple, regardless of whether a subdomain -or suffix were found: +To rejoin the original hostname, if it was indeed a valid, registered hostname: ```python ->>> ext = tldextract.extract('http://127.0.0.1:8080/deployed/') ->>> # this has unwanted dots ->>> '.'.join(ext[:3]) -'.127.0.0.1.' ->>> # join each part only if it's truthy ->>> '.'.join(part for part in ext[:3] if part) -'127.0.0.1' +>>> ext = tldextract.extract('http://forums.bbc.co.uk') +>>> ext.registered_domain +'bbc.co.uk' +>>> ext.fqdn +'forums.bbc.co.uk' ``` By default, this package supports the public ICANN TLDs and their exceptions. diff --git a/tests/custom_suffix_test.py b/tests/custom_suffix_test.py index c041aedc..1621d0e1 100644 --- a/tests/custom_suffix_test.py +++ b/tests/custom_suffix_test.py @@ -4,6 +4,7 @@ import tempfile import tldextract +from tldextract.tldextract import ExtractResult FAKE_SUFFIX_LIST_URL = "file://" + os.path.join( os.path.dirname(os.path.abspath(__file__)), "fixtures/fake_suffix_list_fixture.dat" @@ -27,8 +28,8 @@ def test_private_extraction() -> None: """Test this library's uncached, offline, private domain extraction.""" tld = tldextract.TLDExtract(cache_dir=tempfile.mkdtemp(), suffix_list_urls=[]) - assert tld("foo.blogspot.com") == ("foo", "blogspot", "com", False) - assert tld("foo.blogspot.com", include_psl_private_domains=True) == ( + assert tld("foo.blogspot.com") == ExtractResult("foo", "blogspot", "com", False) + assert tld("foo.blogspot.com", include_psl_private_domains=True) == ExtractResult( "", "foo", "blogspot.com", diff --git a/tests/main_test.py b/tests/main_test.py index fffe3f7e..bf6f7a79 100644 --- a/tests/main_test.py +++ b/tests/main_test.py @@ -412,20 +412,6 @@ def test_ipv4_lookalike() -> None: ) -def test_result_as_dict() -> None: - """Test that the result is a namedtuple.""" - result = extract( - "http://admin:password1@www.google.com:666/secret/admin/interface?param1=42" - ) - expected_dict = { - "subdomain": "www", - "domain": "google", - "suffix": "com", - "is_private": False, - } - assert result._asdict() == expected_dict - - def test_cache_permission( mocker: pytest_mock.MockerFixture, monkeypatch: pytest.MonkeyPatch, tmp_path: Path ) -> None: diff --git a/tldextract/cli.py b/tldextract/cli.py index 09495658..fa58cb59 100644 --- a/tldextract/cli.py +++ b/tldextract/cli.py @@ -88,5 +88,5 @@ def main() -> None: sys.exit(1) for i in args.input: - subdomain, domain, suffix, _ = tld_extract(i) - print(f"{subdomain} {domain} {suffix}") + ext = tld_extract(i) + print(f"{ext.subdomain} {ext.domain} {ext.suffix}") diff --git a/tldextract/tldextract.py b/tldextract/tldextract.py index 9431bfc0..d57a9c4b 100644 --- a/tldextract/tldextract.py +++ b/tldextract/tldextract.py @@ -13,18 +13,6 @@ >>> tldextract.extract('http://www.worldbank.org.kg/') # Kyrgyzstan ExtractResult(subdomain='www', domain='worldbank', suffix='org.kg', is_private=False) -`ExtractResult` is a namedtuple, so it's simple to access the parts you want. - - >>> ext = tldextract.extract('http://forums.bbc.co.uk') - >>> (ext.subdomain, ext.domain, ext.suffix) - ('forums', 'bbc', 'co.uk') - >>> # rejoin subdomain and domain - >>> '.'.join(ext[:2]) - 'forums.bbc' - >>> # a common alias - >>> ext.registered_domain - 'bbc.co.uk' - Note subdomain and suffix are _optional_. Not all URL-like inputs have a subdomain or a valid suffix. @@ -37,16 +25,13 @@ >>> tldextract.extract('http://127.0.0.1:8080/deployed/') ExtractResult(subdomain='', domain='127.0.0.1', suffix='', is_private=False) -If you want to rejoin the whole namedtuple, regardless of whether a subdomain -or suffix were found: +To rejoin the original hostname, if it was indeed a valid, registered hostname: - >>> ext = tldextract.extract('http://127.0.0.1:8080/deployed/') - >>> # this has unwanted dots - >>> '.'.join(part for part in ext[:3]) - '.127.0.0.1.' - >>> # join part only if truthy - >>> '.'.join(part for part in ext[:3] if part) - '127.0.0.1' + >>> ext = tldextract.extract('http://forums.bbc.co.uk') + >>> ext.registered_domain + 'bbc.co.uk' + >>> ext.fqdn + 'forums.bbc.co.uk' """ from __future__ import annotations @@ -55,10 +40,8 @@ import os import urllib.parse from collections.abc import Collection, Sequence +from dataclasses import dataclass from functools import wraps -from typing import ( - NamedTuple, -) import idna @@ -77,13 +60,17 @@ ) -class ExtractResult(NamedTuple): - """namedtuple of a URL's subdomain, domain, suffix, and flag that indicates if URL has private suffix.""" +@dataclass(order=True) +class ExtractResult: + """A URL's extracted subdomain, domain, and suffix. + + Also contains metadata, like a flag that indicates if the URL has a private suffix. + """ subdomain: str domain: str suffix: str - is_private: bool = False + is_private: bool @property def registered_domain(self) -> str: @@ -110,7 +97,7 @@ def fqdn(self) -> str: '' """ if self.suffix and (self.domain or self.is_private): - return ".".join(i for i in self[:3] if i) + return ".".join(i for i in (self.subdomain, self.domain, self.suffix) if i) return "" @property @@ -291,7 +278,7 @@ def _extract_netloc( and netloc_with_ascii_dots[-1] == "]" ): if looks_like_ipv6(netloc_with_ascii_dots[1:-1]): - return ExtractResult("", netloc_with_ascii_dots, "") + return ExtractResult("", netloc_with_ascii_dots, "", is_private=False) labels = netloc_with_ascii_dots.split(".")