Skip to content

Commit

Permalink
feat(test): Uses hypothesis for argument / property testing
Browse files Browse the repository at this point in the history
_Attrib covered for now, more TBD. Closes #68
  • Loading branch information
abelcheung committed Nov 12, 2024
1 parent ccd8e90 commit ec203cf
Show file tree
Hide file tree
Showing 4 changed files with 483 additions and 127 deletions.
2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ rt-test = [
"pytest == 8.3.3",
"beautifulsoup4 == 4.12.3",
"html5lib == 1.1",
"hypothesis[zoneinfo] == 6.116.0",
{include-group = "mypy"},
{include-group = "pyright"},
]
Expand All @@ -81,6 +82,7 @@ dev = [
'pytest-mypy-plugins >= 2.0',
'lxml >= 4.9',
'beautifulsoup4 ~= 4.8',
'hypothesis[zoneinfo] ~= 6.0',
]

[tool.pdm.version]
Expand Down
181 changes: 181 additions & 0 deletions tests/runtime/_testutils/strategy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,181 @@
from __future__ import annotations

import operator
from typing import Any, Literal

import hypothesis.strategies as st
from hypothesis import note
from lxml.etree import QName


def all_instances_except_of_type(*excluded: type[Any]) -> st.SearchStrategy[Any]:
if object in excluded:
raise ValueError("Cannot exclude everything")
strategy: st.SearchStrategy[Any] = st.from_type(type).flatmap(st.from_type)
if len(excluded) == 0:
return strategy
else:
return strategy.filter(lambda x: not isinstance(x, excluded))


def all_types_except(*excluded: type, exact: bool = False) -> st.SearchStrategy[type]:
if Any in excluded: # type: ignore[comparison-overlap]
raise ValueError("Cannot exclude everything")
strategy: st.SearchStrategy[type] = st.from_type(type)
if len(excluded) == 0:
return strategy
elif exact:
return strategy.filter(lambda x: x not in excluded)
else:
return strategy.filter(lambda x: not issubclass(x, excluded))


# Although stringified XML names use colon (:) character,
# lxml uses Clark notation ({namespace}name) for accessing
# namespaced tags and attributes. Therefore we don't generate
# names with colon.
# https://www.w3.org/TR/xml-names/#Conformance
# https://www.w3.org/TR/xml/#NT-NameStartChar
# https://www.w3.org/TR/xml/#NT-NameChar
def _xml_name_unicode_char(*, start: bool) -> st.SearchStrategy[str]:
return st.one_of( # prefers earlier strategies
st.characters(
min_codepoint=ord("A"),
max_codepoint=0x2FF,
categories={"L"},
include_characters="_" if start else "_-.0123456789\xb7",
exclude_characters="\xaa\xb5\xba",
),
st.characters(
min_codepoint=0x370 if start else 0x300,
max_codepoint=0x1FFF,
blacklist_characters="\u037e",
),
st.characters(
min_codepoint=0x2070,
max_codepoint=0x218F,
include_characters="\u200c\u200d" if start else "\u200c\u200d\u203f\u2040",
),
st.characters(
min_codepoint=0x2C00,
max_codepoint=0xEFFFF,
exclude_categories={"Cs", "Co", "Cn"},
exclude_characters="".join(chr(c) for c in range(0x2FF0, 0x3001)),
),
)


# Lxml expects 7-bit ASCII for names if supplied as bytes
def _xml_name_ascii_char(*, start: bool) -> st.SearchStrategy[str]:
return st.characters(
codec="ascii",
min_codepoint=ord("A"),
max_codepoint=ord("z"),
categories={"Lu", "Ll"},
include_characters="_" if start else "_-.0123456789",
)


def xml_name_nons(
variant: Literal["unicode", "ascii"] = "unicode",
) -> st.SearchStrategy[str]:
char_strategy = (
_xml_name_unicode_char if variant == "unicode" else _xml_name_ascii_char
)
first_char = char_strategy(start=True)
rest_char = st.text(max_size=4, alphabet=char_strategy(start=False))
return st.builds(operator.add, first_char, rest_char)


def xml_name(
variant: Literal["unicode", "ascii"] = "unicode",
) -> st.SearchStrategy[str]:
def add_ns(ns: str, n: str) -> str:
note(f"ns: {ns}, n: {n}")
return "{" + ns + "}" + n

name_strat = xml_name_nons(variant)
return st.one_of(
st.builds(
add_ns,
st.text(
max_size=5,
# Exclude characters used in Clark notation
alphabet=xml_legal_char(variant).filter(lambda c: c not in "{}"),
),
name_strat,
),
name_strat,
name_strat.map(lambda x: "{}" + x),
)


# FIXME: For future test, QName("", "foo") raise exception
def xml_name_arg() -> st.SearchStrategy[str | bytes | bytearray | QName]:
s = xml_name()
qn = s.map(QName)
b = xml_name("ascii").map(lambda x: x.encode("ascii"))
ba = b.map(bytearray)
return st.one_of(s, b, ba, qn)


# https://www.w3.org/TR/xml/#NT-Char
def xml_legal_char(
variant: Literal["unicode", "ascii"] = "unicode",
) -> st.SearchStrategy[str]:
codec = "utf-8" if variant == "unicode" else "ascii"
# TODO consider excluding following ranges too
# [#x7F-#x84], [#x86-#x9F], [#xFDD0-#xFDEF]
return st.characters(
codec=codec,
min_codepoint=ord(" "),
include_characters="\t\n\r",
exclude_categories={"Cc", "Cs", "Co", "Cn"},
)


# https://www.w3.org/TR/xml/#NT-CharRef
def xml_char_decimal_ref(
char_strategy: st.SearchStrategy[str] = xml_legal_char(),
) -> st.SearchStrategy[str]:
return char_strategy.map(lambda i: "&#{};".format(ord(i)))


def xml_char_hex_ref(
char_strategy: st.SearchStrategy[str] = xml_legal_char(),
) -> st.SearchStrategy[str]:
return char_strategy.map(lambda i: "&#x{:x};".format(ord(i)))


# https://www.w3.org/TR/xml/#NT-EntityRef
def xml_entity_ref(
variant: Literal["unicode", "ascii"] = "unicode",
) -> st.SearchStrategy[str]:
return xml_name_nons(variant).map(lambda x: f"&{x};")


# https://www.w3.org/TR/xml/#NT-AttValue
def xml_attr_value(
variant: Literal["unicode", "ascii"] = "unicode",
) -> st.SearchStrategy[str]:
refined_char = xml_legal_char(variant).filter(lambda x: x not in "&<")
return st.lists(
st.one_of(
refined_char,
xml_char_decimal_ref(refined_char),
xml_char_hex_ref(refined_char),
xml_entity_ref(variant),
),
max_size=5,
).map(lambda x: "".join(x))


def xml_attr_value_arg() -> st.SearchStrategy[str | bytes | bytearray | QName]:
s = xml_attr_value()
# Note that QName only accepts characters usable in XML name
# Practically QName is not used for attribute values, but
# we concern about annotation only
qn = xml_name().map(QName)
b = xml_attr_value("ascii").map(lambda x: x.encode("ascii"))
ba = b.map(bytearray)
return st.one_of(s, b, ba, qn)
13 changes: 12 additions & 1 deletion tests/runtime/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

from ._testutils import mypy_adapter, pyright_adapter

pytest_plugins = ["typeguard"]
pytest_plugins = ["typeguard", "hypothesis"]
typeguard.config.forward_ref_policy = typeguard.ForwardRefPolicy.ERROR


Expand Down Expand Up @@ -157,3 +157,14 @@ def list_log() -> _e._ListErrorLog:
raise RuntimeError("Unknown error when creating error_log fixture")

return err


# For hypothesis tests, parsing valid document spends too much
# time and raises HealthCheck warning, use simple stuff instead
@pytest.fixture(scope="class")
def disposable_element() -> _e._Element:
return _e.Element("order", date = "1900-01-01", id = "123")

@pytest.fixture(scope="class")
def disposable_attrib(disposable_element: _e._Element) -> _e._Attrib:
return disposable_element.attrib
Loading

0 comments on commit ec203cf

Please sign in to comment.