From 3c193bed2e94f510d4d87157083ec04193edf9cb Mon Sep 17 00:00:00 2001 From: Chris T Date: Sat, 6 Mar 2021 18:40:49 +0200 Subject: [PATCH] Split requirements to extras cli, soap and lxml Closes #419 --- .github/workflows/tests.yml | 18 ++++ README.rst | 7 ++ docs/codegen.rst | 8 ++ docs/installation.rst | 19 +++- docs/wsdl.rst | 6 ++ docs/xml.rst | 24 +++-- setup.cfg | 39 ++++---- tests/integration/benchmarks/conftest.py | 63 ++++++++----- tests/integration/benchmarks/test_handlers.py | 3 + tox.ini | 4 +- xsdata/__main__.py | 15 ++++ .../dataclass/parsers/handlers/__init__.py | 20 ++++- .../dataclass/parsers/handlers/lxml.py | 90 +------------------ .../dataclass/parsers/handlers/native.py | 4 +- xsdata/formats/dataclass/parsers/mixins.py | 87 ++++++++++++++++++ xsdata/formats/dataclass/parsers/xml.py | 4 +- .../dataclass/serializers/writers/__init__.py | 19 +++- xsdata/formats/dataclass/serializers/xml.py | 4 +- 18 files changed, 287 insertions(+), 147 deletions(-) create mode 100644 xsdata/__main__.py diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 8aac29440..5af827f55 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -57,3 +57,21 @@ jobs: - name: Benchmark run: | tox -e benchmarks + minimum: + name: Minimum Installation + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: actions/setup-python@v2 + with: + python-version: 3.7 + - name: Install pip install . + run: | + pip install -e . + - name: Verify + run: | + python tests/integration/benchmarks/conftest.py -c XmlEventWriter + python tests/integration/benchmarks/conftest.py -c XmlEventHandler + python tests/integration/benchmarks/conftest.py -c JsonParser + python tests/integration/benchmarks/conftest.py -c JsonSerializer + xsdata | xargs -0 python -c "import sys; assert 'Install cli' in sys.argv[1]" diff --git a/README.rst b/README.rst index 09c8f3d50..cf106da29 100644 --- a/README.rst +++ b/README.rst @@ -41,6 +41,13 @@ unknown properties and to process xinclude statements. xsData is constantly tested against the `W3C XML Schema 1.1 test suite `_. +Getting started +--------------- + +.. code-block:: bash + + $ pip install xsdata[cli,lxml,soap] + .. image:: https://github.com/tefra/xsdata/raw/master/docs/_static/demo.svg Check the documentation `demos `_ or diff --git a/docs/codegen.rst b/docs/codegen.rst index 5834fa6e0..57a0311e9 100644 --- a/docs/codegen.rst +++ b/docs/codegen.rst @@ -2,6 +2,14 @@ Command Line ============ + +Make sure the cli requirements are installed. + +.. code-block:: bash + + $ pip install xsdata[cli] + + .. command-output:: xsdata --help diff --git a/docs/installation.rst b/docs/installation.rst index 448470201..196456a65 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -4,11 +4,26 @@ Getting started Install using pip ----------------- -The recommended method is to use a virtual environment +The recommended method is to use a virtual environment. .. code-block:: bash - $ pip install xsdata + $ pip install xsdata[cli,lxml,soap] + +.. hint:: + + - Install the cli requirements for the code generator + - Install the soap requirements for the builtin wsdl client + - Install lxml if you want to use one of the lxml handlers/writers instead of + the builtin python xml implementations. + +xsdata has a monthly release cycle, in order to use the latest updates you can also +install directly from the git repo. + +.. code-block:: bash + + $ pip install git+https://github.com/tefra/xsdata@master#egg=xsdata[cli,lxml] + Install using conda ------------------- diff --git a/docs/wsdl.rst b/docs/wsdl.rst index 348503e0c..2a5d58598 100644 --- a/docs/wsdl.rst +++ b/docs/wsdl.rst @@ -9,6 +9,12 @@ The code generator in addition to models derived from xml schemas will also gene dataclasses for messages and simple classes to describe the unique operations. +Make sure you install both cli and soap requirements. + +.. code-block:: console + + $ pip install xsdata[cli,soap] + .. code-block:: console $ xsdata --wsdl --package calculator http://www.dneonline.com/calculator.asmx?WSDL diff --git a/docs/xml.rst b/docs/xml.rst index d94691f7d..0689deb14 100644 --- a/docs/xml.rst +++ b/docs/xml.rst @@ -124,12 +124,18 @@ XmlHandlers read the xml source and push build events to create the target class xsData ships with multiple handlers based on lxml and native python that vary in performance and features. - >>> from xsdata.formats.dataclass.parsers.handlers import XmlEventHandler - ... - >>> parser = XmlParser(handler=XmlEventHandler) - >>> order = parser.from_path(xml_path) - >>> order.bill_to.street - '8 Oak Avenue' +.. hint:: + + If you installed xsdata with lxml the default handler is set to + :class:`~xsdata.formats.dataclass.parsers.handlers.LxmlEventHandler` otherwise + :class:`~xsdata.formats.dataclass.parsers.handlers.XmlEventHandler` will be used. + +>>> from xsdata.formats.dataclass.parsers.handlers import XmlEventHandler +... +>>> parser = XmlParser(handler=XmlEventHandler) +>>> order = parser.from_path(xml_path) +>>> order.bill_to.street +'8 Oak Avenue' .. hint:: @@ -292,6 +298,12 @@ xsData ships with multiple writers based on lxml and native python that may vary in performance in some cases. The output of all them is consistent with a few exceptions when handling mixed content with ``pretty_print=True``. +.. hint:: + + If you installed xsdata with lxml the default writer is set to + :class:`~xsdata.formats.dataclass.serializers.writers.LxmlEventWriter` otherwise + :class:`~xsdata.formats.dataclass.serializers.writers.XmlEventWriter` will be used. + .. doctest:: >>> from xsdata.formats.dataclass.serializers.writers import XmlEventWriter diff --git a/setup.cfg b/setup.cfg index 7a5c18b4a..89769cfb7 100644 --- a/setup.cfg +++ b/setup.cfg @@ -33,30 +33,22 @@ project_urls = [options] packages = xsdata install_requires = - click - click-default-group - click_log - docformatter - jinja2 - lxml - requests - toposort dataclasses;python_version<"3.7" python_requires = >=3.6 include_package_data = True [options.entry_points] console_scripts = - xsdata=xsdata.cli:cli + xsdata=xsdata.__main__:main [options.extras_require] -dev = - codecov - pre-commit - pytest - pytest-benchmark - pytest-cov - tox +cli = + click + click-default-group + click_log + docformatter + jinja2 + toposort docs = sphinx sphinx-autobuild @@ -65,6 +57,17 @@ docs = sphinx-inline-tabs sphinx-material sphinxcontrib-programoutput +lxml = + lxml +soap = + requests +test = + codecov + pre-commit + pytest + pytest-benchmark + pytest-cov + tox [flake8] exclude = tests/* @@ -76,3 +79,7 @@ max-line-length = 88 [tool:pytest] addopts = --color=yes --benchmark-skip --benchmark-columns=min,max,mean,median + +[coverage:run] +omit = + xsdata/__main__.py diff --git a/tests/integration/benchmarks/conftest.py b/tests/integration/benchmarks/conftest.py index c4deff7a7..9dbb68367 100644 --- a/tests/integration/benchmarks/conftest.py +++ b/tests/integration/benchmarks/conftest.py @@ -1,3 +1,5 @@ +import logging + from tests import xsdata_temp_dir from tests.fixtures.books import BookForm from tests.fixtures.books import Books @@ -6,6 +8,7 @@ from xsdata.formats.dataclass.parsers import XmlParser from xsdata.formats.dataclass.serializers import JsonSerializer from xsdata.formats.dataclass.serializers import XmlSerializer +from xsdata.logger import logger from xsdata.models.datatype import XmlDate xsdata_temp_dir.mkdir(parents=True, exist_ok=True) @@ -43,23 +46,23 @@ def make_books(how_many: int): ) -def parse(source, handler, *args): +def parse(source, handler): parser = XmlParser(context=context, handler=handler) parser.from_bytes(source, Books) -def parse_json(source, *args): +def parse_json(source): parser = JsonParser(context=context) parser.from_bytes(source, Books) -def write(size, obj, writer, *args): +def write(size, obj, writer): with xsdata_temp_dir.joinpath(f"benchmark_{size}.xml").open("w") as f: serializer = XmlSerializer(writer=writer, context=context) serializer.write(f, obj) -def write_json(size, obj, *args): +def write_json(size, obj): with xsdata_temp_dir.joinpath(f"benchmark_{size}.json").open("w") as f: serializer = JsonSerializer(context=context) serializer.write(f, obj) @@ -72,33 +75,47 @@ def write_json(size, obj, *args): from xsdata.formats.dataclass.parsers import handlers from timeit import Timer + components = [ + "LxmlEventHandler", + "LxmlSaxHandler", + "XmlEventHandler", + "XmlSaxHandler", + "LxmlEventWriter", + "XmlEventWriter", + "JsonParser", + "JsonSerializer", + ] + parser = argparse.ArgumentParser() - parser.add_argument( - "--component", default="parser", choices=["serializer", "parser"] - ) - parser.add_argument("--format", default="xml", choices=["xml", "json"]) - parser.add_argument("--handler", default="XmlEventWriter") - parser.add_argument("--number", default=1000, type=int) - parser.add_argument("--repeat", default=10, type=int) + parser.add_argument("-c", "--component", choices=components, required=True) + parser.add_argument("-n", "--number", default=1000, type=int) + parser.add_argument("-r", "--repeat", default=10, type=int) args = parser.parse_args() - if args.component == "serializer": - func = write if args.format == "xml" else write_json - writer = getattr(writers, args.handler) + if args.component in writers.__all__: + component = getattr(writers, args.component) books = make_books(args.number) - t = Timer(lambda: func(args.number, books, writer)) - - elif args.component == "parser": - func = parse if args.format == "xml" else parse_json - handler = getattr(handlers, args.handler) - fixture = xsdata_temp_dir.joinpath(f"benchmark_{args.number}.{args.format}") + t = Timer(lambda: write(args.number, books, component)) + elif args.component in handlers.__all__: + fixture = xsdata_temp_dir.joinpath(f"benchmark_{args.number}.xml") + if not fixture.exists(): + write(args.number, make_books(args.number), writers.XmlEventWriter) + component = getattr(handlers, args.component) + t = Timer(lambda: parse(fixture.read_bytes(), component)) + elif args.component == "JsonParser": + component = JsonParser + fixture = xsdata_temp_dir.joinpath(f"benchmark_{args.number}.json") if not fixture.exists(): - write_func = write if args.format == "xml" else write_json - write_func(args.number, make_books(args.number), writers.LxmlEventWriter) + write_json(args.number, make_books(args.number)) - t = Timer(lambda: func(fixture.read_bytes(), handler)) + t = Timer(lambda: parse_json(fixture.read_bytes())) + elif args.component == "JsonSerializer": + component = JsonSerializer + books = make_books(args.number) + t = Timer(lambda: write_json(args.number, books)) + print(f"Benchmark {component.__name__} - n{args.number}/r{args.repeat}") result = t.repeat(repeat=args.repeat, number=1) print("avg {}".format(statistics.mean(result))) print("med {}".format(statistics.median(result))) diff --git a/tests/integration/benchmarks/test_handlers.py b/tests/integration/benchmarks/test_handlers.py index 1cdb4e515..0bbe051f0 100644 --- a/tests/integration/benchmarks/test_handlers.py +++ b/tests/integration/benchmarks/test_handlers.py @@ -14,6 +14,9 @@ readers_list = list(readers.__all__) writers_list = list(writers.__all__) +readers_list.remove("default_handler") +writers_list.remove("default_writer") + random.shuffle(readers_list) random.shuffle(writers_list) diff --git a/tox.ini b/tox.ini index 6d5ecac83..f9c1a88b3 100644 --- a/tox.ini +++ b/tox.ini @@ -3,7 +3,7 @@ envlist = py36,py37,py38,py39,pypy3 skip_missing_interpreters = true [testenv] -extras = dev +extras = test,cli,soap,lxml commands = pytest --cov=./xsdata --cov-branch --doctest-glob="docs/*.rst" [testenv:benchmarks] @@ -11,7 +11,7 @@ commands = pytest --benchmark-only tests/integration/benchmarks [testenv:docs] basepython = python3.7 -extras = docs +extras = docs,cli changedir = docs commands = xsdata init-config examples/config.sample.xml diff --git a/xsdata/__main__.py b/xsdata/__main__.py new file mode 100644 index 000000000..a4fb12120 --- /dev/null +++ b/xsdata/__main__.py @@ -0,0 +1,15 @@ +import sys + + +def main(): + try: + from xsdata.cli import cli + + cli() + except ImportError: + print('Install cli requirements "pip install xsdata[cli]"') + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/xsdata/formats/dataclass/parsers/handlers/__init__.py b/xsdata/formats/dataclass/parsers/handlers/__init__.py index 55be5bf85..2b93221a4 100644 --- a/xsdata/formats/dataclass/parsers/handlers/__init__.py +++ b/xsdata/formats/dataclass/parsers/handlers/__init__.py @@ -1,11 +1,27 @@ -from xsdata.formats.dataclass.parsers.handlers.lxml import LxmlEventHandler -from xsdata.formats.dataclass.parsers.handlers.lxml import LxmlSaxHandler +from typing import Type + from xsdata.formats.dataclass.parsers.handlers.native import XmlEventHandler from xsdata.formats.dataclass.parsers.handlers.native import XmlSaxHandler +from xsdata.formats.dataclass.parsers.mixins import XmlHandler + +try: + from xsdata.formats.dataclass.parsers.handlers.lxml import LxmlEventHandler + from xsdata.formats.dataclass.parsers.handlers.lxml import LxmlSaxHandler + + def default_handler() -> Type[XmlHandler]: + return LxmlEventHandler + + +except ImportError: # pragma: no cover + + def default_handler() -> Type[XmlHandler]: + return XmlEventHandler + __all__ = [ "LxmlEventHandler", "LxmlSaxHandler", "XmlEventHandler", "XmlSaxHandler", + "default_handler", ] diff --git a/xsdata/formats/dataclass/parsers/handlers/lxml.py b/xsdata/formats/dataclass/parsers/handlers/lxml.py index 5a8ee50a5..0c5bea44f 100644 --- a/xsdata/formats/dataclass/parsers/handlers/lxml.py +++ b/xsdata/formats/dataclass/parsers/handlers/lxml.py @@ -1,14 +1,11 @@ from dataclasses import dataclass -from dataclasses import field from typing import Any -from typing import Dict from typing import Iterable -from typing import List -from typing import Optional from lxml import etree from xsdata.exceptions import XmlHandlerError +from xsdata.formats.dataclass.parsers.mixins import SaxHandler from xsdata.formats.dataclass.parsers.mixins import XmlHandler from xsdata.models.enums import EventType @@ -77,7 +74,7 @@ def process_context(self, context: Iterable) -> Any: @dataclass -class LxmlSaxHandler(XmlHandler): +class LxmlSaxHandler(SaxHandler): """ Sax content handler based on :class:`lxml.etree.XMLParser` api. @@ -89,10 +86,6 @@ class LxmlSaxHandler(XmlHandler): eg [(qname, object)] """ - # Scope vars - data_frames: List = field(init=False, default_factory=list) - flush_next: Optional[str] = field(init=False, default=None) - def parse(self, source: Any) -> Any: """ Parse an XML document from a system identifier or an InputSource. @@ -116,82 +109,3 @@ def parse(self, source: Any) -> Any: ) return etree.parse(source, parser=parser) # nosec - - def start(self, qname: str, attrs: Dict, ns_map: Dict): - """ - Start element notification receiver. - - The receiver will flush any previous active element, append a - new data frame to collect data content for the next active - element and notify the main parser to prepare for next binding - instruction. - - :param qname: Qualified name - :param attrs: Attribute key-value map - :param ns_map: Namespace prefix-URI map - """ - self.flush() - self.data_frames.append(([], [])) - self.parser.start( - self.clazz, - self.queue, - self.objects, - qname, - attrs, - self.start_ns_bulk(ns_map), - ) - - def end(self, qname: str): - """ - End element notification receiver. - - The receiver will flush any previous active element and set the - next element to be flushed. - - :param qname: Qualified name - """ - self.flush() - self.flush_next = qname - - def close(self) -> Any: - """ - Close document notification receiver. - - The receiver will flush any previous active element and return - the first item in the objects stack. - """ - try: - self.flush() - return self.objects[0][1] - except IndexError: - return None - - def flush(self): - """ - Flush element notification receiver. - - The receiver will check if there is an active element present, - collect and join the data frames for text/tail content and - notify the main parser to finish the binding process for the - element. - """ - if self.flush_next: - data = self.data_frames.pop() - text = "".join(data[0]) if data[0] else None - tail = "".join(data[1]) if data[1] else None - - self.parser.end(self.queue, self.objects, self.flush_next, text, tail) - self.flush_next = None - - def data(self, data: str): - """ - Data notification receiver. - - The receiver will append the given data content in the current - data frame either in the text position 0 or in the tail position - 1 whether the element has ended or not. - - :param data: Text or tail content - """ - index = 0 if self.flush_next is None else 1 - self.data_frames[-1][index].append(data) diff --git a/xsdata/formats/dataclass/parsers/handlers/native.py b/xsdata/formats/dataclass/parsers/handlers/native.py index c10df7910..04a23d63c 100644 --- a/xsdata/formats/dataclass/parsers/handlers/native.py +++ b/xsdata/formats/dataclass/parsers/handlers/native.py @@ -9,7 +9,7 @@ from xml.etree.ElementTree import iterparse from xsdata.exceptions import XmlHandlerError -from xsdata.formats.dataclass.parsers.handlers import LxmlSaxHandler +from xsdata.formats.dataclass.parsers.mixins import SaxHandler from xsdata.formats.dataclass.parsers.mixins import XmlHandler from xsdata.models.enums import EventType from xsdata.utils.namespaces import build_qname @@ -80,7 +80,7 @@ def process_context(self, context: Iterable) -> Any: @dataclass -class XmlSaxHandler(LxmlSaxHandler, sax.handler.ContentHandler): +class XmlSaxHandler(SaxHandler, sax.handler.ContentHandler): """Sax content handler based on native python.""" # Scope vars diff --git a/xsdata/formats/dataclass/parsers/mixins.py b/xsdata/formats/dataclass/parsers/mixins.py index 866a9f821..80e1787a3 100644 --- a/xsdata/formats/dataclass/parsers/mixins.py +++ b/xsdata/formats/dataclass/parsers/mixins.py @@ -144,6 +144,93 @@ def start_ns_bulk(self, ns_map: Dict) -> Dict: return result +@dataclass +class SaxHandler(XmlHandler): + + # Scope vars + data_frames: List = field(init=False, default_factory=list) + flush_next: Optional[str] = field(init=False, default=None) + + def start(self, qname: str, attrs: Dict, ns_map: Dict): + """ + Start element notification receiver. + + The receiver will flush any previous active element, append a + new data frame to collect data content for the next active + element and notify the main parser to prepare for next binding + instruction. + + :param qname: Qualified name + :param attrs: Attribute key-value map + :param ns_map: Namespace prefix-URI map + """ + self.flush() + self.data_frames.append(([], [])) + self.parser.start( + self.clazz, + self.queue, + self.objects, + qname, + attrs, + self.start_ns_bulk(ns_map), + ) + + def end(self, qname: str): + """ + End element notification receiver. + + The receiver will flush any previous active element and set the + next element to be flushed. + + :param qname: Qualified name + """ + self.flush() + self.flush_next = qname + + def close(self) -> Any: + """ + Close document notification receiver. + + The receiver will flush any previous active element and return + the first item in the objects stack. + """ + try: + self.flush() + return self.objects[0][1] + except IndexError: + return None + + def flush(self): + """ + Flush element notification receiver. + + The receiver will check if there is an active element present, + collect and join the data frames for text/tail content and + notify the main parser to finish the binding process for the + element. + """ + if self.flush_next: + data = self.data_frames.pop() + text = "".join(data[0]) if data[0] else None + tail = "".join(data[1]) if data[1] else None + + self.parser.end(self.queue, self.objects, self.flush_next, text, tail) + self.flush_next = None + + def data(self, data: str): + """ + Data notification receiver. + + The receiver will append the given data content in the current + data frame either in the text position 0 or in the tail position + 1 whether the element has ended or not. + + :param data: Text or tail content + """ + index = 0 if self.flush_next is None else 1 + self.data_frames[-1][index].append(data) + + @dataclass class EventsHandler(XmlHandler): """ diff --git a/xsdata/formats/dataclass/parsers/xml.py b/xsdata/formats/dataclass/parsers/xml.py index 41428f02b..e65cedd42 100644 --- a/xsdata/formats/dataclass/parsers/xml.py +++ b/xsdata/formats/dataclass/parsers/xml.py @@ -6,7 +6,7 @@ from typing import Optional from typing import Type -from xsdata.formats.dataclass.parsers.handlers import LxmlEventHandler +from xsdata.formats.dataclass.parsers.handlers import default_handler from xsdata.formats.dataclass.parsers.mixins import XmlHandler from xsdata.formats.dataclass.parsers.mixins import XmlNode from xsdata.formats.dataclass.parsers.nodes import NodeParser @@ -28,7 +28,7 @@ class XmlParser(NodeParser): :ivar emit_cache: Qname to event name cache """ - handler: Type[XmlHandler] = field(default=LxmlEventHandler) + handler: Type[XmlHandler] = field(default=default_handler()) emit_cache: Dict = field(init=False, default_factory=dict) def start( diff --git a/xsdata/formats/dataclass/serializers/writers/__init__.py b/xsdata/formats/dataclass/serializers/writers/__init__.py index b62bb8eb8..b9204977e 100644 --- a/xsdata/formats/dataclass/serializers/writers/__init__.py +++ b/xsdata/formats/dataclass/serializers/writers/__init__.py @@ -1,4 +1,19 @@ -from xsdata.formats.dataclass.serializers.writers.lxml import LxmlEventWriter +from typing import Type + +from xsdata.formats.dataclass.serializers.mixins import XmlWriter from xsdata.formats.dataclass.serializers.writers.native import XmlEventWriter -__all__ = ["LxmlEventWriter", "XmlEventWriter"] +try: + from xsdata.formats.dataclass.serializers.writers.lxml import LxmlEventWriter + + def default_writer() -> Type[XmlWriter]: + return LxmlEventWriter + + +except ImportError: # pragma: no cover + + def default_writer() -> Type[XmlWriter]: + return XmlEventWriter + + +__all__ = ["LxmlEventWriter", "XmlEventWriter", "default_writer"] diff --git a/xsdata/formats/dataclass/serializers/xml.py b/xsdata/formats/dataclass/serializers/xml.py index 4f4a09a2c..95db4acd2 100644 --- a/xsdata/formats/dataclass/serializers/xml.py +++ b/xsdata/formats/dataclass/serializers/xml.py @@ -23,7 +23,7 @@ from xsdata.formats.dataclass.serializers.config import SerializerConfig from xsdata.formats.dataclass.serializers.mixins import XmlWriter from xsdata.formats.dataclass.serializers.mixins import XmlWriterEvent -from xsdata.formats.dataclass.serializers.writers import LxmlEventWriter +from xsdata.formats.dataclass.serializers.writers import default_writer from xsdata.models.enums import DataType from xsdata.models.enums import QNames from xsdata.utils import namespaces @@ -45,7 +45,7 @@ class XmlSerializer(AbstractSerializer): config: SerializerConfig = field(default_factory=SerializerConfig) context: XmlContext = field(default_factory=XmlContext) - writer: Type[XmlWriter] = field(default=LxmlEventWriter) + writer: Type[XmlWriter] = field(default=default_writer()) def render(self, obj: Any, ns_map: Optional[Dict] = None) -> str: """