diff --git a/htmldate/core.py b/htmldate/core.py
index f6fe7bb4..a0546a52 100644
--- a/htmldate/core.py
+++ b/htmldate/core.py
@@ -218,19 +218,17 @@ def examine_date_elements(
"analyzing (HTML): %s",
" ".join(logstring(elem).split())[:100],
)
- attempt = try_date_expr(
+ if attempt := try_date_expr(
text, outputformat, extensive_search, min_date, max_date
- )
- if attempt:
+ ):
return attempt
# try link title (Blogspot)
title_attr = trim_text(elem.get("title", ""))
if len(title_attr) > MIN_SEGMENT_LEN:
title_attr = NON_DIGITS_REGEX.sub("", title_attr[:MAX_SEGMENT_LEN])
- attempt = try_date_expr(
+ if attempt := try_date_expr(
title_attr, outputformat, extensive_search, min_date, max_date
- )
- if attempt:
+ ):
return attempt
return None
@@ -404,8 +402,7 @@ def select_candidate(
years = [""] * len(bestones)
validation = [False] * len(bestones)
for i, pattern in enumerate(patterns):
- year_match = yearpat.search(pattern)
- if year_match:
+ if year_match := yearpat.search(pattern):
years[i] = year_match[1]
dateobject = datetime(int(year_match[1]), 1, 1)
validation[i] = date_validator(
@@ -642,7 +639,7 @@ def normalize_match(match: Optional[Match[str]]) -> str:
and optionally expand the year from two to four digits."""
day, month, year = (g.zfill(2) for g in match.groups() if g) # type: ignore[union-attr]
if len(year) == 2:
- year = "19" + year if year[0] == "9" else "20" + year
+ year = f"19{year}" if year[0] == "9" else f"20{year}"
return f"{year}-{month}-{day}"
@@ -870,7 +867,7 @@ def search_page(
match = TWO_COMP_REGEX.match(item)
month = match[1] # type: ignore[index]
if len(month) == 1:
- month = "0" + month
+ month = f"0{month}"
candidate = "-".join([match[2], month, "01"]) # type: ignore[index]
replacement[candidate] = candidates[item]
candidates = Counter(replacement)
@@ -923,9 +920,11 @@ def search_page(
if bestmatch is not None:
dateobject = datetime(int(bestmatch[0]), 1, 1)
if (
- date_validator(dateobject, "%Y-%m-%d", earliest=min_date, latest=max_date)
+ date_validator(
+ dateobject, "%Y-%m-%d", earliest=min_date, latest=max_date
+ )
is True
- and int(dateobject.year) >= copyear
+ and dateobject.year >= copyear
):
LOGGER.debug(
'date found for pattern "%s": %s', SIMPLE_PATTERN, bestmatch[0]
diff --git a/htmldate/extractors.py b/htmldate/extractors.py
index 02ec01fd..5a7a1531 100644
--- a/htmldate/extractors.py
+++ b/htmldate/extractors.py
@@ -3,6 +3,7 @@
Custom parsers and XPath expressions for date extraction
"""
+
## This file is available from https://github.com/adbar/htmldate
## under GNU GPL v3 license
@@ -85,7 +86,7 @@
# or contains(@class, 'article')
# or contains(@id, 'lastmod') or contains(@class, 'updated')
-FREE_TEXT_EXPRESSIONS = FAST_PREPEND + "/text()"
+FREE_TEXT_EXPRESSIONS = f"{FAST_PREPEND}/text()"
MIN_SEGMENT_LEN = 6
MAX_SEGMENT_LEN = 52
@@ -230,8 +231,7 @@ def extract_url_date(
testurl: str, outputformat: str, min_date: datetime, max_date: datetime
) -> Optional[str]:
"""Extract the date out of an URL string complying with the Y-M-D format"""
- match = COMPLETE_URL.search(testurl)
- if match:
+ if match := COMPLETE_URL.search(testurl):
LOGGER.debug("found date in URL: %s", match[0])
try:
dateobject = datetime(int(match[1]), int(match[2]), int(match[3]))
@@ -326,9 +326,7 @@ def custom_parse(
LOGGER.debug("parsing result: %s", candidate)
return candidate.strftime(outputformat)
- # 2. Try YYYYMMDD, use regex
- match = YMD_NO_SEP_PATTERN.search(string)
- if match:
+ if match := YMD_NO_SEP_PATTERN.search(string):
try:
year, month, day = int(match[1][:4]), int(match[1][4:6]), int(match[1][6:8])
candidate = datetime(year, month, day)
@@ -344,9 +342,7 @@ def custom_parse(
LOGGER.debug("YYYYMMDD match: %s", candidate)
return candidate.strftime(outputformat)
- # 3. Try the very common YMD, Y-M-D, and D-M-Y patterns
- match = YMD_PATTERN.search(string)
- if match:
+ if match := YMD_PATTERN.search(string):
try:
if match.lastgroup == "day":
year, month, day = (
@@ -373,9 +369,7 @@ def custom_parse(
LOGGER.debug("regex match: %s", candidate)
return candidate.strftime(outputformat)
- # 4. Try the Y-M and M-Y patterns
- match = YM_PATTERN.search(string)
- if match:
+ if match := YM_PATTERN.search(string):
try:
if match.lastgroup == "month":
candidate = datetime(
@@ -524,14 +518,13 @@ def idiosyncrasies_search(
htmlstring: str, outputformat: str, min_date: datetime, max_date: datetime
) -> Optional[str]:
"""Look for author-written dates throughout the web page"""
- match = TEXT_PATTERNS.search(htmlstring) # EN+DE+TR
- if match:
+ if match := TEXT_PATTERNS.search(htmlstring):
parts = list(filter(None, match.groups()))
if len(parts) == 3:
candidate = None
if len(parts[0]) == 4:
candidate = datetime(int(parts[0]), int(parts[1]), int(parts[2]))
- elif len(parts[2]) in (2, 4):
+ elif len(parts[2]) in {2, 4}:
# DD/MM/YY
day, month = try_swap_values(int(parts[0]), int(parts[1]))
year = correct_year(int(parts[2]))