From 181f6dcdbcd0e25d0d5bf059744f765192a77f7b Mon Sep 17 00:00:00 2001 From: yozachar <38415384+yozachar@users.noreply.github.com> Date: Thu, 18 Apr 2024 05:30:40 +0530 Subject: [PATCH] fix: rfc cases in the `domain` validator --- src/validators/domain.py | 14 +++++++++----- tests/test_domain.py | 4 ++++ 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/src/validators/domain.py b/src/validators/domain.py index 0e8c0c48..43ed981f 100644 --- a/src/validators/domain.py +++ b/src/validators/domain.py @@ -59,17 +59,21 @@ def domain( return False try: - return not re.search(r"\s", value) and re.match( + + service_record = r"_" if rfc_2782 else "" + trailing_dot = r"\.?$" if rfc_1034 else r"$" + + return not re.search(r"\s|__+", value) and re.match( # First character of the domain - rf"^(?:[a-z0-9{r'_?'if rfc_2782 else ''}]" + rf"^(?:[a-z0-9{service_record}]" # Sub-domain - + rf"(?:[a-z0-9-{r'_?'if rfc_2782 else ''}]{{0,61}}" + + rf"(?:[a-z0-9-{service_record}]{{0,61}}" # Hostname - + rf"[a-z0-9{r'_?'if rfc_2782 else ''}])?\.)" + + rf"[a-z0-9{service_record}])?\.)" # First 61 characters of the gTLD + r"+[a-z0-9][a-z0-9-_]{0,61}" # Last character of the gTLD - + rf"[a-z]{r'.?$' if rfc_1034 else r'$'}", + + rf"[a-z]{trailing_dot}", value.encode("idna").decode("utf-8"), re.IGNORECASE, ) diff --git a/tests/test_domain.py b/tests/test_domain.py index 21aadf90..6d8e8675 100644 --- a/tests/test_domain.py +++ b/tests/test_domain.py @@ -20,6 +20,7 @@ ("3.cn.", True, False), ("_example.com", False, True), ("example_.com", False, True), + ("_exa_mple_.com", False, True), ("a.cn", False, False), ("sub1.sub2.sample.co.uk", False, False), ("somerandomexample.xn--fiqs8s", False, False), @@ -67,6 +68,9 @@ def test_returns_true_on_valid_top_level_domain( ("_example._com", False, False), ("example_.com", False, False), ("example", False, False), + ("example.com!", True, False), + ("example?.com", True, False), + ("__exa__mple__.com", False, True), ("a......b.com", False, False), ("a.123", False, False), ("123.123", False, False),