Skip to content

Commit

Permalink
adjust mypy
Browse files Browse the repository at this point in the history
  • Loading branch information
adbar committed Feb 20, 2024
1 parent 957e973 commit 91c6959
Show file tree
Hide file tree
Showing 4 changed files with 21 additions and 19 deletions.
8 changes: 5 additions & 3 deletions htmldate/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,9 @@
import sys

from platform import python_version
from typing import Any, Optional
from typing import Any, Optional, Union

from lxml.html import HtmlElement

from . import __version__
from .core import find_date
Expand All @@ -19,7 +21,7 @@


def examine(
htmlstring: Optional[str],
htmlstring: Union[str, HtmlElement],
extensive_bool: bool = True,
original_date: bool = False,
verbose_flag: bool = False,
Expand Down Expand Up @@ -116,7 +118,7 @@ def process_args(args: Any) -> None:
for line in inputfile:
htmltext = fetch_url(line.strip())
result = examine(
htmltext,
htmltext, # type: ignore[arg-type]
extensive_bool=args.fast,
original_date=args.original,
verbose_flag=args.verbose,
Expand Down
26 changes: 13 additions & 13 deletions htmldate/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from functools import lru_cache, partial
from typing import Match, Optional, Pattern, Union, Counter as Counter_Type

from lxml.html import HtmlElement, tostring # type: ignore
from lxml.html import HtmlElement, tostring

# own
from .extractors import (
Expand Down Expand Up @@ -78,7 +78,7 @@

def logstring(element: HtmlElement) -> str:
"""Format the element to be logged to a string."""
return tostring(element, pretty_print=False, encoding="unicode").strip() # type: ignore
return tostring(element, pretty_print=False, encoding="unicode").strip()


DATE_ATTRIBUTES = {
Expand Down Expand Up @@ -271,7 +271,7 @@ def examine_header(
continue
# name attribute, most frequent
if "name" in elem.attrib:
attribute = elem.get("name").lower()
attribute = elem.get("name", "").lower()
# url
if attribute == "og:url":
reserve = extract_url_date(elem.get("content"), options)
Expand All @@ -288,7 +288,7 @@ def examine_header(
reserve = tryfunc(elem.get("content"))
# property attribute
elif "property" in elem.attrib:
attribute = elem.get("property").lower()
attribute = elem.get("property", "").lower()
if attribute in DATE_ATTRIBUTES or attribute in PROPERTY_MODIFIED:
LOGGER.debug("examining meta property: %s", logstring(elem))
attempt = tryfunc(elem.get("content"))
Expand All @@ -302,7 +302,7 @@ def examine_header(
reserve = attempt
# itemprop
elif "itemprop" in elem.attrib:
attribute = elem.get("itemprop").lower()
attribute = elem.get("itemprop", "").lower()
# original: store / updated: override date
if attribute in ITEMPROP_ATTRS:
LOGGER.debug("examining meta itemprop: %s", logstring(elem))
Expand All @@ -320,19 +320,19 @@ def examine_header(
elif attribute == "copyrightyear":
LOGGER.debug("examining meta itemprop: %s", logstring(elem))
if "content" in elem.attrib:
attempt = "-".join([elem.get("content"), "01", "01"])
attempt = "-".join([elem.get("content", ""), "01", "01"])
if is_valid_date(
attempt, "%Y-%m-%d", earliest=options.min, latest=options.max
):
reserve = attempt
# pubdate, relatively rare
elif "pubdate" in elem.attrib:
if elem.get("pubdate").lower() == "pubdate":
if elem.get("pubdate", "").lower() == "pubdate":
LOGGER.debug("examining meta pubdate: %s", logstring(elem))
headerdate = tryfunc(elem.get("content"))
# http-equiv, rare
elif "http-equiv" in elem.attrib:
attribute = elem.get("http-equiv").lower()
attribute = elem.get("http-equiv", "").lower()
if attribute == "date":
LOGGER.debug("examining meta http-equiv: %s", logstring(elem))
if options.original:
Expand Down Expand Up @@ -456,7 +456,7 @@ def examine_abbr_elements(
# data-utime (mostly Facebook)
if "data-utime" in elem.attrib:
try:
candidate = int(elem.get("data-utime"))
candidate = int(elem.get("data-utime", ""))
except ValueError:
continue
LOGGER.debug("data-utime found: %s", candidate)
Expand Down Expand Up @@ -515,7 +515,7 @@ def examine_time_elements(
for elem in elements:
shortcut_flag = False
# go for datetime
if "datetime" in elem.attrib and len(elem.get("datetime")) > 6:
if len(elem.get("datetime", "")) > 6:
# shortcut: time pubdate
if (
"pubdate" in elem.attrib
Expand All @@ -529,8 +529,8 @@ def examine_time_elements(
# shortcuts: class attribute
elif "class" in elem.attrib:
if options.original and (
elem.get("class").startswith("entry-date")
or elem.get("class").startswith("entry-time")
elem.get("class", "").startswith("entry-date")
or elem.get("class", "").startswith("entry-time")
):
shortcut_flag = True
LOGGER.debug(
Expand Down Expand Up @@ -815,7 +815,7 @@ def search_page(htmlstring: str, options: Extractor) -> Optional[str]:


def find_date(
htmlobject: HtmlElement,
htmlobject: Union[bytes, str, HtmlElement],
extensive_search: bool = True,
original_date: bool = False,
outputformat: str = "%Y-%m-%d",
Expand Down
2 changes: 1 addition & 1 deletion htmldate/extractors.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
from dateutil.parser import parse as dateutil_parse

from lxml.etree import XPath
from lxml.html import HtmlElement # type: ignore
from lxml.html import HtmlElement

# own
from .settings import CACHE_SIZE
Expand Down
4 changes: 2 additions & 2 deletions htmldate/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
cchardet_detect = None
from charset_normalizer import from_bytes

from lxml.html import HtmlElement, HTMLParser, fromstring # type: ignore
from lxml.html import HtmlElement, HTMLParser, fromstring

from .settings import MAX_FILE_SIZE, MIN_FILE_SIZE

Expand Down Expand Up @@ -248,7 +248,7 @@ def load_html(htmlobject: Union[bytes, str, HtmlElement]) -> Optional[HtmlElemen

def clean_html(tree: HtmlElement, elemlist: List[str]) -> HtmlElement:
"Delete selected elements."
for element in tree.iter(elemlist):
for element in tree.iter(elemlist): # type: ignore[call-overload]
try:
element.drop_tree()
except AttributeError: # pragma: no cover
Expand Down

0 comments on commit 91c6959

Please sign in to comment.