From a18229b9b1f1db6530dec0bd7c6e48f26a0a5f9f Mon Sep 17 00:00:00 2001 From: Mateusz Jakub Fila <37295697+m-fila@users.noreply.github.com> Date: Tue, 14 May 2024 21:55:02 +0200 Subject: [PATCH] Add pythonizations for collection subscript (#570) * added pythonizer base class * added collection subscript pythonization * Update python/podio/pythonizations/__init__.py Co-authored-by: Juan Miguel Carceller <22276694+jmcarcell@users.noreply.github.com> * collection `__getitem__` uses `at` wrapped to throw python exception * fix exception stacktrace readability * Update python/podio/pythonizations/collection_subscript.py Co-authored-by: Thomas Madlener * split pythonization callback to predicate and modifcation * added documentation * Applied suggestions in docs Co-authored-by: Thomas Madlener --------- Co-authored-by: Juan Miguel Carceller <22276694+jmcarcell@users.noreply.github.com> Co-authored-by: Thomas Madlener --- README.md | 1 + doc/index.rst | 1 + doc/python.md | 56 ++++++++++++++++++ python/podio/pythonizations/__init__.py | 15 +++++ .../pythonizations/collection_subscript.py | 26 +++++++++ python/podio/pythonizations/utils/__init__.py | 0 .../podio/pythonizations/utils/pythonizer.py | 57 +++++++++++++++++++ python/podio/test_CodeGen.py | 18 ++++++ 8 files changed, 174 insertions(+) create mode 100644 doc/python.md create mode 100644 python/podio/pythonizations/__init__.py create mode 100644 python/podio/pythonizations/collection_subscript.py create mode 100644 python/podio/pythonizations/utils/__init__.py create mode 100644 python/podio/pythonizations/utils/pythonizer.py diff --git a/README.md b/README.md index 445924871..75c89fdec 100644 --- a/README.md +++ b/README.md @@ -11,6 +11,7 @@ - [Data Model Syntax](./doc/datamodel_syntax.md) - [Examples](./doc/examples.md) - [Advanced Topics](./doc/advanced_topics.md) + - [Python Interface](./doc/python.md) - [Contributing](./doc/contributing.md) diff --git a/doc/index.rst b/doc/index.rst index 9c56def77..706e039d0 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -17,5 +17,6 @@ Welcome to PODIO's documentation! userdata.md advanced_topics.md templates.md + python.md cpp_api/api py_api/modules diff --git a/doc/python.md b/doc/python.md new file mode 100644 index 000000000..2ae04ea02 --- /dev/null +++ b/doc/python.md @@ -0,0 +1,56 @@ +# Python interface for data models + +Podio provides support for a Python interface for the generated data models. The [design choice](design.md) to create Python interface resembling the C++ interface is achieved by generating Python bindings from the C++ interface using +[cppyy](https://cppyy.readthedocs.io/en/latest/index.html). To make pyROOT aware of the bindings, the cppyy functionality bundled with ROOT can be used. + +It's important to note that cppyy loads the bindings and presents them lazily at runtime to the Python interpreter, rather than writing Python interface files. Consequently, the Python bindings have a runtime dependencies on ROOT, cppyy and the data model's C++ interface. + +To load the Python bindings from a generated C++ model dictionary, first make sure the model's library and headers can be found in `LD_LIBRARY_PATH` and `ROOT_INCLUDE_HEADERS` respectively, then: + +```python +import ROOT + +res = ROOT.gSystem.Load('libGeneratedModelDict.so') +if res < 0: + raise RuntimeError('Failed to load libGeneratedModelDict.so') +``` + +For reference usage, see the [Python module of EDM4hep](https://github.com/key4hep/EDM4hep/blob/main/python/edm4hep/__init__.py). + +## Pythonizations + +Python as a language uses different constructions and conventions than C++, perfectly fine C++ code translated one to one to Python could be clunky by Python's standard. cppyy offers a mechanism called [pythonizations](https://cppyy.readthedocs.io/en/latest/pythonizations.html) to make the resulting bindings more pythonic. Some basic pythonizations are included automatically (for instance `operator[]` is translated to `__getitem__`) but others can be specified by a user. + +Podio comes with its own set of pythonizations useful for the data models generated with it. To apply all the provided pythonizations to a `model_namespace` namespace: + +```python +from podio.pythonizations import load_pythonizations + +load_pythonizations("model_namespace") +``` + +If only specific pythonizations should be applied: + +```python +from podio.pythonizations import collection_subscript # specific pythonization + +collection_subscript.CollectionSubscriptPythonizer.register("model_namespace") +``` + +### Developing new pythonizations + +To be discovered by `load_pythonizations`, any new pythonization should be placed in `podio.pythonizations` and be derived from the abstract class `podio.pythonizations.utils.pythonizer.Pythonizer`. + +A pythonization class should implement the following three class methods: + +- `priority`: The `load_pythonizations` function applies the pythonizations in increasing order of their `priority` +- `filter`: A predicate to filter out classes to which given pythonization should be applied. See the [cppyy documentation](https://cppyy.readthedocs.io/en/latest/pythonizations.html#python-callbacks). +- `modify`: Applying the modifications to the pythonized classes. + +### Considerations + +The cppyy pythonizations come with some considerations: + +- The general cppyy idea to lazily load only things that are needed applies only partially to the pythonizations. For instance, a pythonization modifying the `collection[]` will be applied the first time a class of `collection` is used, regardless if `collection[]` is actually used. +- Each pythonization is applied to all the entities in a namespace and relies on a conditional mechanism (`filter` method) inside the pythonizations to select entities they modify. With a large number of pythonizations, the overheads will add up and slow down the usage of any class from a pythonized namespace. +- The cppyy bindings hooking to the C++ routines are characterized by high performance compared to ordinary Python code. The pythonizations are written in Python and are executed at ordinary Python code speed. diff --git a/python/podio/pythonizations/__init__.py b/python/podio/pythonizations/__init__.py new file mode 100644 index 000000000..ac8d189b2 --- /dev/null +++ b/python/podio/pythonizations/__init__.py @@ -0,0 +1,15 @@ +"""cppyy pythonizations for podio""" + +from importlib import import_module +from pkgutil import walk_packages +from .utils.pythonizer import Pythonizer + + +def load_pythonizations(namespace): + """Register all available pythonizations for a given namespace""" + module_names = [name for _, name, _ in walk_packages(__path__) if not name.startswith("test_")] + for module_name in module_names: + import_module(__name__ + "." + module_name) + pythonizers = sorted(Pythonizer.__subclasses__(), key=lambda x: x.priority()) + for i in pythonizers: + i.register(namespace) diff --git a/python/podio/pythonizations/collection_subscript.py b/python/podio/pythonizations/collection_subscript.py new file mode 100644 index 000000000..0b1908597 --- /dev/null +++ b/python/podio/pythonizations/collection_subscript.py @@ -0,0 +1,26 @@ +"""Pythonize subscript operation for collections""" + +import cppyy +from .utils.pythonizer import Pythonizer + + +class CollectionSubscriptPythonizer(Pythonizer): + """Bound-check __getitem__ for classes derived from podio::CollectionBase""" + + @classmethod + def priority(cls): + return 50 + + @classmethod + def filter(cls, class_, name): + return issubclass(class_, cppyy.gbl.podio.CollectionBase) + + @classmethod + def modify(cls, class_, name): + def get_item(self, i): + try: + return self.at(i) + except cppyy.gbl.std.out_of_range: + raise IndexError("collection index out of range") from None + + class_.__getitem__ = get_item diff --git a/python/podio/pythonizations/utils/__init__.py b/python/podio/pythonizations/utils/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/python/podio/pythonizations/utils/pythonizer.py b/python/podio/pythonizations/utils/pythonizer.py new file mode 100644 index 000000000..56b0dc7ef --- /dev/null +++ b/python/podio/pythonizations/utils/pythonizer.py @@ -0,0 +1,57 @@ +"""cppyy pythonizations for podio""" + +from abc import ABCMeta, abstractmethod +import cppyy + + +class Pythonizer(metaclass=ABCMeta): + """ + Base class to define cppyy pythonization for podio + """ + + @classmethod + @abstractmethod + def priority(cls): + """Order in which the pythonizations are applied + + Returns: + int: Priority + """ + + @classmethod + @abstractmethod + def filter(cls, class_, name): + """ + Abstract classmethod to filter classes to which the pythonizations should be applied + + Args: + class_ (type): Class object. + name (str): Name of the class. + + Returns: + bool: True if class should be pythonized. + """ + + @classmethod + @abstractmethod + def modify(cls, class_, name): + """Abstract classmethod modifying classes to be pythonized + + Args: + class_ (type): Class object. + name (str): Name of the class. + """ + + @classmethod + def register(cls, namespace): + """Helper method to apply the pythonization to the given namespace + + Args: + namespace (str): Namespace to by pythonized + """ + + def pythonization_callback(class_, name): + if cls.filter(class_, name): + cls.modify(class_, name) + + cppyy.py.add_pythonization(pythonization_callback, namespace) diff --git a/python/podio/test_CodeGen.py b/python/podio/test_CodeGen.py index 3f1328c22..70bff5660 100644 --- a/python/podio/test_CodeGen.py +++ b/python/podio/test_CodeGen.py @@ -4,6 +4,12 @@ import unittest import ROOT from ROOT import ExampleMCCollection, MutableExampleMC +from ROOT import nsp +from pythonizations import load_pythonizations # pylint: disable=import-error + +# load all available pythonizations to the classes in a namespace +# loading pythonizations changes the state of cppyy backend shared by all the tests in a process +load_pythonizations("nsp") class ObjectConversionsTest(unittest.TestCase): @@ -31,3 +37,15 @@ def test_add(self): self.assertEqual(len(daughter_particle.parents()), 0) daughter_particle.addparents(parent_particle) self.assertEqual(len(daughter_particle.parents()), 1) + + +class CollectionSubscriptTest(unittest.TestCase): + """Collection subscript test""" + + def test_bound_check(self): + collection = nsp.EnergyInNamespaceCollection() + _ = collection.create() + self.assertEqual(len(collection), 1) + with self.assertRaises(IndexError): + _ = collection[20] + _ = collection[0]