Skip to content

Commit

Permalink
More robust suffix extraction
Browse files Browse the repository at this point in the history
Fix #193
  • Loading branch information
Yomguithereal committed Oct 18, 2023
1 parent 8f499b6 commit 254b3a9
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 0 deletions.
5 changes: 5 additions & 0 deletions test/tld_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
("http://facebook.com/whatever", "facebook.com"),
("facebook.com/whatever", "facebook.com"),
("notatld.blablablou", None),
("", None),
]

IS_VALID_TLD_TESTS = [
Expand All @@ -25,18 +26,21 @@
(".megalol", False),
(".xn--fiqs8s", True),
("中国", True),
("", False),
]

HAS_VALID_TLD_TESTS = [
("http://facebook.com/whatever", True),
("lemonde.showtime", True),
("https://test.idontexistlol", False),
("", False),
]

HAS_VALID_SUFFIX_TESTS = [
("http://www.google.co.uk", True),
("lemonde.showtime", True),
("notatld.blablablou", False),
("", False),
]

SPLIT_SUFFIX_TESTS = [
Expand All @@ -47,6 +51,7 @@
("http://192.169.1.1", None),
("http://localhost:8080", None),
("http://localhost", None),
("", None),
("http://google.co.uk", ("google", "co.uk")),
("http://www.v2.google.co.uk", ("www.v2.google", "co.uk")),
("http://хром.гугл.рф", ("хром.гугл", "рф")),
Expand Down
3 changes: 3 additions & 0 deletions ural/classes/suffix_trie.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,9 @@ def add(self, suffix, private=False):
def __walk(self, url):
parsed = safe_urlsplit(url)

if not parsed or parsed.hostname is None:
return None

if is_special_host(parsed.hostname):
return None

Expand Down

0 comments on commit 254b3a9

Please sign in to comment.