adjust mypy

adbar · Feb 20, 2024 · 91c6959 · 91c6959
1 parent 957e973
commit 91c6959
Show file tree

Hide file tree

Showing 4 changed files with 21 additions and 19 deletions.
diff --git a/htmldate/cli.py b/htmldate/cli.py
@@ -10,7 +10,9 @@
 import sys
 
 from platform import python_version
-from typing import Any, Optional
+from typing import Any, Optional, Union
+
+from lxml.html import HtmlElement
 
 from . import __version__
 from .core import find_date
@@ -19,7 +21,7 @@
 
 
 def examine(
-    htmlstring: Optional[str],
+    htmlstring: Union[str, HtmlElement],
     extensive_bool: bool = True,
     original_date: bool = False,
     verbose_flag: bool = False,
@@ -116,7 +118,7 @@ def process_args(args: Any) -> None:
             for line in inputfile:
                 htmltext = fetch_url(line.strip())
                 result = examine(
-                    htmltext,
+                    htmltext,  # type: ignore[arg-type]
                     extensive_bool=args.fast,
                     original_date=args.original,
                     verbose_flag=args.verbose,

diff --git a/htmldate/core.py b/htmldate/core.py
@@ -16,7 +16,7 @@
 from functools import lru_cache, partial
 from typing import Match, Optional, Pattern, Union, Counter as Counter_Type
 
-from lxml.html import HtmlElement, tostring  # type: ignore
+from lxml.html import HtmlElement, tostring
 
 # own
 from .extractors import (
@@ -78,7 +78,7 @@
 
 def logstring(element: HtmlElement) -> str:
     """Format the element to be logged to a string."""
-    return tostring(element, pretty_print=False, encoding="unicode").strip()  # type: ignore
+    return tostring(element, pretty_print=False, encoding="unicode").strip()
 
 
 DATE_ATTRIBUTES = {
@@ -271,7 +271,7 @@ def examine_header(
             continue
         # name attribute, most frequent
         if "name" in elem.attrib:
-            attribute = elem.get("name").lower()
+            attribute = elem.get("name", "").lower()
             # url
             if attribute == "og:url":
                 reserve = extract_url_date(elem.get("content"), options)
@@ -288,7 +288,7 @@ def examine_header(
                     reserve = tryfunc(elem.get("content"))
         # property attribute
         elif "property" in elem.attrib:
-            attribute = elem.get("property").lower()
+            attribute = elem.get("property", "").lower()
             if attribute in DATE_ATTRIBUTES or attribute in PROPERTY_MODIFIED:
                 LOGGER.debug("examining meta property: %s", logstring(elem))
                 attempt = tryfunc(elem.get("content"))
@@ -302,7 +302,7 @@ def examine_header(
                         reserve = attempt
         # itemprop
         elif "itemprop" in elem.attrib:
-            attribute = elem.get("itemprop").lower()
+            attribute = elem.get("itemprop", "").lower()
             # original: store / updated: override date
             if attribute in ITEMPROP_ATTRS:
                 LOGGER.debug("examining meta itemprop: %s", logstring(elem))
@@ -320,19 +320,19 @@ def examine_header(
             elif attribute == "copyrightyear":
                 LOGGER.debug("examining meta itemprop: %s", logstring(elem))
                 if "content" in elem.attrib:
-                    attempt = "-".join([elem.get("content"), "01", "01"])
+                    attempt = "-".join([elem.get("content", ""), "01", "01"])
                     if is_valid_date(
                         attempt, "%Y-%m-%d", earliest=options.min, latest=options.max
                     ):
                         reserve = attempt
         # pubdate, relatively rare
         elif "pubdate" in elem.attrib:
-            if elem.get("pubdate").lower() == "pubdate":
+            if elem.get("pubdate", "").lower() == "pubdate":
                 LOGGER.debug("examining meta pubdate: %s", logstring(elem))
                 headerdate = tryfunc(elem.get("content"))
         # http-equiv, rare
         elif "http-equiv" in elem.attrib:
-            attribute = elem.get("http-equiv").lower()
+            attribute = elem.get("http-equiv", "").lower()
             if attribute == "date":
                 LOGGER.debug("examining meta http-equiv: %s", logstring(elem))
                 if options.original:
@@ -456,7 +456,7 @@ def examine_abbr_elements(
             # data-utime (mostly Facebook)
             if "data-utime" in elem.attrib:
                 try:
-                    candidate = int(elem.get("data-utime"))
+                    candidate = int(elem.get("data-utime", ""))
                 except ValueError:
                     continue
                 LOGGER.debug("data-utime found: %s", candidate)
@@ -515,7 +515,7 @@ def examine_time_elements(
         for elem in elements:
             shortcut_flag = False
             # go for datetime
-            if "datetime" in elem.attrib and len(elem.get("datetime")) > 6:
+            if len(elem.get("datetime", "")) > 6:
                 # shortcut: time pubdate
                 if (
                     "pubdate" in elem.attrib
@@ -529,8 +529,8 @@ def examine_time_elements(
                 # shortcuts: class attribute
                 elif "class" in elem.attrib:
                     if options.original and (
-                        elem.get("class").startswith("entry-date")
-                        or elem.get("class").startswith("entry-time")
+                        elem.get("class", "").startswith("entry-date")
+                        or elem.get("class", "").startswith("entry-time")
                     ):
                         shortcut_flag = True
                         LOGGER.debug(
@@ -815,7 +815,7 @@ def search_page(htmlstring: str, options: Extractor) -> Optional[str]:
 
 
 def find_date(
-    htmlobject: HtmlElement,
+    htmlobject: Union[bytes, str, HtmlElement],
     extensive_search: bool = True,
     original_date: bool = False,
     outputformat: str = "%Y-%m-%d",

diff --git a/htmldate/extractors.py b/htmldate/extractors.py
@@ -21,7 +21,7 @@
 from dateutil.parser import parse as dateutil_parse
 
 from lxml.etree import XPath
-from lxml.html import HtmlElement  # type: ignore
+from lxml.html import HtmlElement
 
 # own
 from .settings import CACHE_SIZE

diff --git a/htmldate/utils.py b/htmldate/utils.py
@@ -23,7 +23,7 @@
     cchardet_detect = None
 from charset_normalizer import from_bytes
 
-from lxml.html import HtmlElement, HTMLParser, fromstring  # type: ignore
+from lxml.html import HtmlElement, HTMLParser, fromstring
 
 from .settings import MAX_FILE_SIZE, MIN_FILE_SIZE
 
@@ -248,7 +248,7 @@ def load_html(htmlobject: Union[bytes, str, HtmlElement]) -> Optional[HtmlElemen
 
 def clean_html(tree: HtmlElement, elemlist: List[str]) -> HtmlElement:
     "Delete selected elements."
-    for element in tree.iter(elemlist):
+    for element in tree.iter(elemlist):  # type: ignore[call-overload]
         try:
             element.drop_tree()
         except AttributeError:  # pragma: no cover