diff --git a/pyperformance/data-files/benchmarks/MANIFEST b/pyperformance/data-files/benchmarks/MANIFEST index 3fb05160..fd2fedef 100644 --- a/pyperformance/data-files/benchmarks/MANIFEST +++ b/pyperformance/data-files/benchmarks/MANIFEST @@ -75,6 +75,7 @@ richards richards_super scimark spectral_norm +sphinx sqlalchemy_declarative sqlalchemy_imperative sqlglot diff --git a/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/about.rst b/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/about.rst new file mode 100644 index 00000000..5e6160ff --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/about.rst @@ -0,0 +1,38 @@ +===================== +About these documents +===================== + + +These documents are generated from `reStructuredText`_ sources by `Sphinx`_, a +document processor specifically written for the Python documentation. + +.. _reStructuredText: https://docutils.sourceforge.io/rst.html +.. _Sphinx: https://www.sphinx-doc.org/ + +.. In the online version of these documents, you can submit comments and suggest + changes directly on the documentation pages. + +Development of the documentation and its toolchain is an entirely volunteer +effort, just like Python itself. If you want to contribute, please take a +look at the :ref:`reporting-bugs` page for information on how to do so. New +volunteers are always welcome! + +Many thanks go to: + +* Fred L. Drake, Jr., the creator of the original Python documentation toolset + and writer of much of the content; +* the `Docutils `_ project for creating + reStructuredText and the Docutils suite; +* Fredrik Lundh for his Alternative Python Reference project from which Sphinx + got many good ideas. + + +Contributors to the Python Documentation +---------------------------------------- + +Many people have contributed to the Python language, the Python standard +library, and the Python documentation. See :source:`Misc/ACKS` in the Python +source distribution for a partial list of contributors. + +It is only with the input and contributions of the Python community +that Python has such wonderful documentation -- Thank You! diff --git a/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/bugs.rst b/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/bugs.rst new file mode 100644 index 00000000..9aff2f0f --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/bugs.rst @@ -0,0 +1,112 @@ +.. _reporting-bugs: + +***************** +Dealing with Bugs +***************** + +Python is a mature programming language which has established a reputation for +stability. In order to maintain this reputation, the developers would like to +know of any deficiencies you find in Python. + +It can be sometimes faster to fix bugs yourself and contribute patches to +Python as it streamlines the process and involves less people. Learn how to +:ref:`contribute `. + +Documentation bugs +================== + +If you find a bug in this documentation or would like to propose an improvement, +please submit a bug report on the :ref:`tracker `. If you +have a suggestion on how to fix it, include that as well. + +You can also open a discussion item on our +`Documentation Discourse forum `_. + +If you find a bug in the theme (HTML / CSS / JavaScript) of the +documentation, please submit a bug report on the `python-doc-theme bug +tracker `_. + +If you're short on time, you can also email documentation bug reports to +docs@python.org (behavioral bugs can be sent to python-list@python.org). +'docs@' is a mailing list run by volunteers; your request will be noticed, +though it may take a while to be processed. + +.. seealso:: + + `Documentation bugs`_ + A list of documentation bugs that have been submitted to the Python issue tracker. + + `Issue Tracking `_ + Overview of the process involved in reporting an improvement on the tracker. + + `Helping with Documentation `_ + Comprehensive guide for individuals that are interested in contributing to Python documentation. + + `Documentation Translations `_ + A list of GitHub pages for documentation translation and their primary contacts. + + +.. _using-the-tracker: + +Using the Python issue tracker +============================== + +Issue reports for Python itself should be submitted via the GitHub issues +tracker (https://github.com/python/cpython/issues). +The GitHub issues tracker offers a web form which allows pertinent information +to be entered and submitted to the developers. + +The first step in filing a report is to determine whether the problem has +already been reported. The advantage in doing so, aside from saving the +developers' time, is that you learn what has been done to fix it; it may be that +the problem has already been fixed for the next release, or additional +information is needed (in which case you are welcome to provide it if you can!). +To do this, search the tracker using the search box at the top of the page. + +If the problem you're reporting is not already in the list, log in to GitHub. +If you don't already have a GitHub account, create a new account using the +"Sign up" link. +It is not possible to submit a bug report anonymously. + +Being now logged in, you can submit an issue. +Click on the "New issue" button in the top bar to report a new issue. + +The submission form has two fields, "Title" and "Comment". + +For the "Title" field, enter a *very* short description of the problem; +fewer than ten words is good. + +In the "Comment" field, describe the problem in detail, including what you +expected to happen and what did happen. Be sure to include whether any +extension modules were involved, and what hardware and software platform you +were using (including version information as appropriate). + +Each issue report will be reviewed by a developer who will determine what needs to +be done to correct the problem. You will receive an update each time an action is +taken on the issue. + + +.. seealso:: + + `How to Report Bugs Effectively `_ + Article which goes into some detail about how to create a useful bug report. + This describes what kind of information is useful and why it is useful. + + `Bug Writing Guidelines `_ + Information about writing a good bug report. Some of this is specific to the + Mozilla project, but describes general good practices. + +.. _contributing-to-python: + +Getting started contributing to Python yourself +=============================================== + +Beyond just reporting bugs that you find, you are also welcome to submit +patches to fix them. You can find more information on how to get started +patching Python in the `Python Developer's Guide`_. If you have questions, +the `core-mentorship mailing list`_ is a friendly place to get answers to +any and all questions pertaining to the process of fixing issues in Python. + +.. _Documentation bugs: https://github.com/python/cpython/issues?q=is%3Aissue+is%3Aopen+label%3Adocs +.. _Python Developer's Guide: https://devguide.python.org/ +.. _core-mentorship mailing list: https://mail.python.org/mailman3/lists/core-mentorship.python.org/ diff --git a/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/conf.py b/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/conf.py new file mode 100644 index 00000000..8e505836 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/conf.py @@ -0,0 +1,56 @@ +import os +import sys + +sys.path.append(os.path.abspath('tools/extensions')) + +extensions = [ + 'pyspecific', + 'sphinx.ext.extlinks', +] + +manpages_url = 'https://manpages.debian.org/{path}' + +# General substitutions. +project = 'Python' +copyright = f"2001, Python Software Foundation" + +version = release = sys.version.split(" ", 1)[0] + +rst_epilog = f""" +.. |python_version_literal| replace:: ``Python {version}`` +.. |python_x_dot_y_literal| replace:: ``python{version}`` +.. |usr_local_bin_python_x_dot_y_literal| replace:: ``/usr/local/bin/python{version}`` +""" + +# There are two options for replacing |today|: either, you set today to some +# non-false value, then it is used: +today = '' +# Else, today_fmt is used as the format for a strftime call. +today_fmt = '%B %d, %Y' + +# By default, highlight as Python 3. +highlight_language = 'python3' + +# Minimum version of sphinx required +needs_sphinx = '6.2.1' + +# Create table of contents entries for domain objects (e.g. functions, classes, +# attributes, etc.). Default is True. +toc_object_entries = False + +# Disable Docutils smartquotes for several translations +smartquotes_excludes = { + 'languages': ['ja', 'fr', 'zh_TW', 'zh_CN'], + 'builders': ['man', 'text'], +} + +# Avoid a warning with Sphinx >= 4.0 +root_doc = 'contents' + +extlinks = { + "cve": ("https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-%s", "CVE-%s"), + "cwe": ("https://cwe.mitre.org/data/definitions/%s.html", "CWE-%s"), + "pypi": ("https://pypi.org/project/%s/", "%s"), + "source": ('https://github.com/python/cpython/tree/3.13/%s', "%s"), +} +extlinks_detect_hardcoded_links = True diff --git a/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/constraints.txt b/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/constraints.txt new file mode 100644 index 00000000..26ac1862 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/constraints.txt @@ -0,0 +1,26 @@ +# We have upper bounds on our transitive dependencies here +# To avoid new releases unexpectedly breaking our build. +# This file can be updated on an ad-hoc basis, +# though it will probably have to be updated +# whenever Doc/requirements.txt is updated. + +# Direct dependencies of Sphinx +babel<3 +colorama<0.5 +imagesize<2 +Jinja2<4 +packaging<25 +Pygments<3 +requests<3 +snowballstemmer<3 +# keep lower-bounds until Sphinx 8.1 is released +# https://github.com/sphinx-doc/sphinx/pull/12756 +sphinxcontrib-applehelp>=1.0.7,<3 +sphinxcontrib-devhelp>=1.0.6,<3 +sphinxcontrib-htmlhelp>=2.0.6,<3 +sphinxcontrib-jsmath>=1.0.1,<2 +sphinxcontrib-qthelp>=1.0.6,<3 +sphinxcontrib-serializinghtml>=1.1.9,<3 + +# Direct dependencies of Jinja2 (Jinja is a dependency of Sphinx, see above) +MarkupSafe<3 diff --git a/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/contents.rst b/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/contents.rst new file mode 100644 index 00000000..b57f4b09 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/contents.rst @@ -0,0 +1,23 @@ +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + Python Documentation contents +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +.. toctree:: + + whatsnew/index.rst + tutorial/index.rst + using/index.rst + reference/index.rst + library/index.rst + extending/index.rst + c-api/index.rst + installing/index.rst + howto/index.rst + faq/index.rst + deprecations/index.rst + glossary.rst + + about.rst + bugs.rst + copyright.rst + license.rst diff --git a/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/copyright.rst b/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/copyright.rst new file mode 100644 index 00000000..8629ed1f --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/copyright.rst @@ -0,0 +1,19 @@ +********* +Copyright +********* + +Python and this documentation is: + +Copyright © 2001-2024 Python Software Foundation. All rights reserved. + +Copyright © 2000 BeOpen.com. All rights reserved. + +Copyright © 1995-2000 Corporation for National Research Initiatives. All rights +reserved. + +Copyright © 1991-1995 Stichting Mathematisch Centrum. All rights reserved. + +------- + +See :ref:`history-and-license` for complete license and permissions information. + diff --git a/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/glossary.rst b/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/glossary.rst new file mode 100644 index 00000000..97cee075 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/glossary.rst @@ -0,0 +1,1313 @@ +.. _glossary: + +******** +Glossary +******** + +.. if you add new entries, keep the alphabetical sorting! + +.. glossary:: + + ``>>>`` + The default Python prompt of the :term:`interactive` shell. Often + seen for code examples which can be executed interactively in the + interpreter. + + ``...`` + Can refer to: + + * The default Python prompt of the :term:`interactive` shell when entering the + code for an indented code block, when within a pair of matching left and + right delimiters (parentheses, square brackets, curly braces or triple + quotes), or after specifying a decorator. + + * The :const:`Ellipsis` built-in constant. + + abstract base class + Abstract base classes complement :term:`duck-typing` by + providing a way to define interfaces when other techniques like + :func:`hasattr` would be clumsy or subtly wrong (for example with + :ref:`magic methods `). ABCs introduce virtual + subclasses, which are classes that don't inherit from a class but are + still recognized by :func:`isinstance` and :func:`issubclass`; see the + :mod:`abc` module documentation. Python comes with many built-in ABCs for + data structures (in the :mod:`collections.abc` module), numbers (in the + :mod:`numbers` module), streams (in the :mod:`io` module), import finders + and loaders (in the :mod:`importlib.abc` module). You can create your own + ABCs with the :mod:`abc` module. + + annotation + A label associated with a variable, a class + attribute or a function parameter or return value, + used by convention as a :term:`type hint`. + + Annotations of local variables cannot be accessed at runtime, but + annotations of global variables, class attributes, and functions + are stored in the :attr:`__annotations__` + special attribute of modules, classes, and functions, + respectively. + + See :term:`variable annotation`, :term:`function annotation`, :pep:`484` + and :pep:`526`, which describe this functionality. + Also see :ref:`annotations-howto` + for best practices on working with annotations. + + argument + A value passed to a :term:`function` (or :term:`method`) when calling the + function. There are two kinds of argument: + + * :dfn:`keyword argument`: an argument preceded by an identifier (e.g. + ``name=``) in a function call or passed as a value in a dictionary + preceded by ``**``. For example, ``3`` and ``5`` are both keyword + arguments in the following calls to :func:`complex`:: + + complex(real=3, imag=5) + complex(**{'real': 3, 'imag': 5}) + + * :dfn:`positional argument`: an argument that is not a keyword argument. + Positional arguments can appear at the beginning of an argument list + and/or be passed as elements of an :term:`iterable` preceded by ``*``. + For example, ``3`` and ``5`` are both positional arguments in the + following calls:: + + complex(3, 5) + complex(*(3, 5)) + + Arguments are assigned to the named local variables in a function body. + See the :ref:`calls` section for the rules governing this assignment. + Syntactically, any expression can be used to represent an argument; the + evaluated value is assigned to the local variable. + + See also the :term:`parameter` glossary entry, the FAQ question on + :ref:`the difference between arguments and parameters + `, and :pep:`362`. + + asynchronous context manager + An object which controls the environment seen in an + :keyword:`async with` statement by defining :meth:`~object.__aenter__` and + :meth:`~object.__aexit__` methods. Introduced by :pep:`492`. + + asynchronous generator + A function which returns an :term:`asynchronous generator iterator`. It + looks like a coroutine function defined with :keyword:`async def` except + that it contains :keyword:`yield` expressions for producing a series of + values usable in an :keyword:`async for` loop. + + Usually refers to an asynchronous generator function, but may refer to an + *asynchronous generator iterator* in some contexts. In cases where the + intended meaning isn't clear, using the full terms avoids ambiguity. + + An asynchronous generator function may contain :keyword:`await` + expressions as well as :keyword:`async for`, and :keyword:`async with` + statements. + + asynchronous generator iterator + An object created by a :term:`asynchronous generator` function. + + This is an :term:`asynchronous iterator` which when called using the + :meth:`~object.__anext__` method returns an awaitable object which will execute + the body of the asynchronous generator function until the next + :keyword:`yield` expression. + + Each :keyword:`yield` temporarily suspends processing, remembering the + location execution state (including local variables and pending + try-statements). When the *asynchronous generator iterator* effectively + resumes with another awaitable returned by :meth:`~object.__anext__`, it + picks up where it left off. See :pep:`492` and :pep:`525`. + + asynchronous iterable + An object, that can be used in an :keyword:`async for` statement. + Must return an :term:`asynchronous iterator` from its + :meth:`~object.__aiter__` method. Introduced by :pep:`492`. + + asynchronous iterator + An object that implements the :meth:`~object.__aiter__` and :meth:`~object.__anext__` + methods. :meth:`~object.__anext__` must return an :term:`awaitable` object. + :keyword:`async for` resolves the awaitables returned by an asynchronous + iterator's :meth:`~object.__anext__` method until it raises a + :exc:`StopAsyncIteration` exception. Introduced by :pep:`492`. + + attribute + A value associated with an object which is usually referenced by name + using dotted expressions. + For example, if an object *o* has an attribute + *a* it would be referenced as *o.a*. + + It is possible to give an object an attribute whose name is not an + identifier as defined by :ref:`identifiers`, for example using + :func:`setattr`, if the object allows it. + Such an attribute will not be accessible using a dotted expression, + and would instead need to be retrieved with :func:`getattr`. + + awaitable + An object that can be used in an :keyword:`await` expression. Can be + a :term:`coroutine` or an object with an :meth:`~object.__await__` method. + See also :pep:`492`. + + BDFL + Benevolent Dictator For Life, a.k.a. `Guido van Rossum + `_, Python's creator. + + binary file + A :term:`file object` able to read and write + :term:`bytes-like objects `. + Examples of binary files are files opened in binary mode (``'rb'``, + ``'wb'`` or ``'rb+'``), :data:`sys.stdin.buffer `, + :data:`sys.stdout.buffer `, and instances of + :class:`io.BytesIO` and :class:`gzip.GzipFile`. + + See also :term:`text file` for a file object able to read and write + :class:`str` objects. + + borrowed reference + In Python's C API, a borrowed reference is a reference to an object, + where the code using the object does not own the reference. + It becomes a dangling + pointer if the object is destroyed. For example, a garbage collection can + remove the last :term:`strong reference` to the object and so destroy it. + + Calling :c:func:`Py_INCREF` on the :term:`borrowed reference` is + recommended to convert it to a :term:`strong reference` in-place, except + when the object cannot be destroyed before the last usage of the borrowed + reference. The :c:func:`Py_NewRef` function can be used to create a new + :term:`strong reference`. + + bytes-like object + An object that supports the :ref:`bufferobjects` and can + export a C-:term:`contiguous` buffer. This includes all :class:`bytes`, + :class:`bytearray`, and :class:`array.array` objects, as well as many + common :class:`memoryview` objects. Bytes-like objects can + be used for various operations that work with binary data; these include + compression, saving to a binary file, and sending over a socket. + + Some operations need the binary data to be mutable. The documentation + often refers to these as "read-write bytes-like objects". Example + mutable buffer objects include :class:`bytearray` and a + :class:`memoryview` of a :class:`bytearray`. + Other operations require the binary data to be stored in + immutable objects ("read-only bytes-like objects"); examples + of these include :class:`bytes` and a :class:`memoryview` + of a :class:`bytes` object. + + bytecode + Python source code is compiled into bytecode, the internal representation + of a Python program in the CPython interpreter. The bytecode is also + cached in ``.pyc`` files so that executing the same file is + faster the second time (recompilation from source to bytecode can be + avoided). This "intermediate language" is said to run on a + :term:`virtual machine` that executes the machine code corresponding to + each bytecode. Do note that bytecodes are not expected to work between + different Python virtual machines, nor to be stable between Python + releases. + + A list of bytecode instructions can be found in the documentation for + :ref:`the dis module `. + + callable + A callable is an object that can be called, possibly with a set + of arguments (see :term:`argument`), with the following syntax:: + + callable(argument1, argument2, argumentN) + + A :term:`function`, and by extension a :term:`method`, is a callable. + An instance of a class that implements the :meth:`~object.__call__` + method is also a callable. + + callback + A subroutine function which is passed as an argument to be executed at + some point in the future. + + class + A template for creating user-defined objects. Class definitions + normally contain method definitions which operate on instances of the + class. + + class variable + A variable defined in a class and intended to be modified only at + class level (i.e., not in an instance of the class). + + complex number + An extension of the familiar real number system in which all numbers are + expressed as a sum of a real part and an imaginary part. Imaginary + numbers are real multiples of the imaginary unit (the square root of + ``-1``), often written ``i`` in mathematics or ``j`` in + engineering. Python has built-in support for complex numbers, which are + written with this latter notation; the imaginary part is written with a + ``j`` suffix, e.g., ``3+1j``. To get access to complex equivalents of the + :mod:`math` module, use :mod:`cmath`. Use of complex numbers is a fairly + advanced mathematical feature. If you're not aware of a need for them, + it's almost certain you can safely ignore them. + + context manager + An object which controls the environment seen in a :keyword:`with` + statement by defining :meth:`~object.__enter__` and :meth:`~object.__exit__` methods. + See :pep:`343`. + + context variable + A variable which can have different values depending on its context. + This is similar to Thread-Local Storage in which each execution + thread may have a different value for a variable. However, with context + variables, there may be several contexts in one execution thread and the + main usage for context variables is to keep track of variables in + concurrent asynchronous tasks. + See :mod:`contextvars`. + + contiguous + .. index:: C-contiguous, Fortran contiguous + + A buffer is considered contiguous exactly if it is either + *C-contiguous* or *Fortran contiguous*. Zero-dimensional buffers are + C and Fortran contiguous. In one-dimensional arrays, the items + must be laid out in memory next to each other, in order of + increasing indexes starting from zero. In multidimensional + C-contiguous arrays, the last index varies the fastest when + visiting items in order of memory address. However, in + Fortran contiguous arrays, the first index varies the fastest. + + coroutine + Coroutines are a more generalized form of subroutines. Subroutines are + entered at one point and exited at another point. Coroutines can be + entered, exited, and resumed at many different points. They can be + implemented with the :keyword:`async def` statement. See also + :pep:`492`. + + coroutine function + A function which returns a :term:`coroutine` object. A coroutine + function may be defined with the :keyword:`async def` statement, + and may contain :keyword:`await`, :keyword:`async for`, and + :keyword:`async with` keywords. These were introduced + by :pep:`492`. + + CPython + The canonical implementation of the Python programming language, as + distributed on `python.org `_. The term "CPython" + is used when necessary to distinguish this implementation from others + such as Jython or IronPython. + + decorator + A function returning another function, usually applied as a function + transformation using the ``@wrapper`` syntax. Common examples for + decorators are :func:`classmethod` and :func:`staticmethod`. + + The decorator syntax is merely syntactic sugar, the following two + function definitions are semantically equivalent:: + + def f(arg): + ... + f = staticmethod(f) + + @staticmethod + def f(arg): + ... + + The same concept exists for classes, but is less commonly used there. See + the documentation for :ref:`function definitions ` and + :ref:`class definitions ` for more about decorators. + + descriptor + Any object which defines the methods :meth:`~object.__get__`, + :meth:`~object.__set__`, or :meth:`~object.__delete__`. + When a class attribute is a descriptor, its special + binding behavior is triggered upon attribute lookup. Normally, using + *a.b* to get, set or delete an attribute looks up the object named *b* in + the class dictionary for *a*, but if *b* is a descriptor, the respective + descriptor method gets called. Understanding descriptors is a key to a + deep understanding of Python because they are the basis for many features + including functions, methods, properties, class methods, static methods, + and reference to super classes. + + For more information about descriptors' methods, see :ref:`descriptors` + or the :ref:`Descriptor How To Guide `. + + dictionary + An associative array, where arbitrary keys are mapped to values. The + keys can be any object with :meth:`~object.__hash__` and + :meth:`~object.__eq__` methods. + Called a hash in Perl. + + dictionary comprehension + A compact way to process all or part of the elements in an iterable and + return a dictionary with the results. ``results = {n: n ** 2 for n in + range(10)}`` generates a dictionary containing key ``n`` mapped to + value ``n ** 2``. See :ref:`comprehensions`. + + dictionary view + The objects returned from :meth:`dict.keys`, :meth:`dict.values`, and + :meth:`dict.items` are called dictionary views. They provide a dynamic + view on the dictionary’s entries, which means that when the dictionary + changes, the view reflects these changes. To force the + dictionary view to become a full list use ``list(dictview)``. See + :ref:`dict-views`. + + docstring + A string literal which appears as the first expression in a class, + function or module. While ignored when the suite is executed, it is + recognized by the compiler and put into the :attr:`~definition.__doc__` attribute + of the enclosing class, function or module. Since it is available via + introspection, it is the canonical place for documentation of the + object. + + duck-typing + A programming style which does not look at an object's type to determine + if it has the right interface; instead, the method or attribute is simply + called or used ("If it looks like a duck and quacks like a duck, it + must be a duck.") By emphasizing interfaces rather than specific types, + well-designed code improves its flexibility by allowing polymorphic + substitution. Duck-typing avoids tests using :func:`type` or + :func:`isinstance`. (Note, however, that duck-typing can be complemented + with :term:`abstract base classes `.) Instead, it + typically employs :func:`hasattr` tests or :term:`EAFP` programming. + + EAFP + Easier to ask for forgiveness than permission. This common Python coding + style assumes the existence of valid keys or attributes and catches + exceptions if the assumption proves false. This clean and fast style is + characterized by the presence of many :keyword:`try` and :keyword:`except` + statements. The technique contrasts with the :term:`LBYL` style + common to many other languages such as C. + + expression + A piece of syntax which can be evaluated to some value. In other words, + an expression is an accumulation of expression elements like literals, + names, attribute access, operators or function calls which all return a + value. In contrast to many other languages, not all language constructs + are expressions. There are also :term:`statement`\s which cannot be used + as expressions, such as :keyword:`while`. Assignments are also statements, + not expressions. + + extension module + A module written in C or C++, using Python's C API to interact with the + core and with user code. + + f-string + String literals prefixed with ``'f'`` or ``'F'`` are commonly called + "f-strings" which is short for + :ref:`formatted string literals `. See also :pep:`498`. + + file object + An object exposing a file-oriented API (with methods such as + :meth:`!read` or :meth:`!write`) to an underlying resource. Depending + on the way it was created, a file object can mediate access to a real + on-disk file or to another type of storage or communication device + (for example standard input/output, in-memory buffers, sockets, pipes, + etc.). File objects are also called :dfn:`file-like objects` or + :dfn:`streams`. + + There are actually three categories of file objects: raw + :term:`binary files `, buffered + :term:`binary files ` and :term:`text files `. + Their interfaces are defined in the :mod:`io` module. The canonical + way to create a file object is by using the :func:`open` function. + + file-like object + A synonym for :term:`file object`. + + filesystem encoding and error handler + Encoding and error handler used by Python to decode bytes from the + operating system and encode Unicode to the operating system. + + The filesystem encoding must guarantee to successfully decode all bytes + below 128. If the file system encoding fails to provide this guarantee, + API functions can raise :exc:`UnicodeError`. + + The :func:`sys.getfilesystemencoding` and + :func:`sys.getfilesystemencodeerrors` functions can be used to get the + filesystem encoding and error handler. + + The :term:`filesystem encoding and error handler` are configured at + Python startup by the :c:func:`PyConfig_Read` function: see + :c:member:`~PyConfig.filesystem_encoding` and + :c:member:`~PyConfig.filesystem_errors` members of :c:type:`PyConfig`. + + See also the :term:`locale encoding`. + + finder + An object that tries to find the :term:`loader` for a module that is + being imported. + + There are two types of finder: :term:`meta path finders + ` for use with :data:`sys.meta_path`, and :term:`path + entry finders ` for use with :data:`sys.path_hooks`. + + See :ref:`importsystem` and :mod:`importlib` for much more detail. + + floor division + Mathematical division that rounds down to nearest integer. The floor + division operator is ``//``. For example, the expression ``11 // 4`` + evaluates to ``2`` in contrast to the ``2.75`` returned by float true + division. Note that ``(-11) // 4`` is ``-3`` because that is ``-2.75`` + rounded *downward*. See :pep:`238`. + + free threading + A threading model where multiple threads can run Python bytecode + simultaneously within the same interpreter. This is in contrast to + the :term:`global interpreter lock` which allows only one thread to + execute Python bytecode at a time. See :pep:`703`. + + function + A series of statements which returns some value to a caller. It can also + be passed zero or more :term:`arguments ` which may be used in + the execution of the body. See also :term:`parameter`, :term:`method`, + and the :ref:`function` section. + + function annotation + An :term:`annotation` of a function parameter or return value. + + Function annotations are usually used for + :term:`type hints `: for example, this function is expected to take two + :class:`int` arguments and is also expected to have an :class:`int` + return value:: + + def sum_two_numbers(a: int, b: int) -> int: + return a + b + + Function annotation syntax is explained in section :ref:`function`. + + See :term:`variable annotation` and :pep:`484`, + which describe this functionality. + Also see :ref:`annotations-howto` + for best practices on working with annotations. + + __future__ + A :ref:`future statement `, ``from __future__ import ``, + directs the compiler to compile the current module using syntax or + semantics that will become standard in a future release of Python. + The :mod:`__future__` module documents the possible values of + *feature*. By importing this module and evaluating its variables, + you can see when a new feature was first added to the language and + when it will (or did) become the default:: + + >>> import __future__ + >>> __future__.division + _Feature((2, 2, 0, 'alpha', 2), (3, 0, 0, 'alpha', 0), 8192) + + garbage collection + The process of freeing memory when it is not used anymore. Python + performs garbage collection via reference counting and a cyclic garbage + collector that is able to detect and break reference cycles. The + garbage collector can be controlled using the :mod:`gc` module. + + .. index:: single: generator + + generator + A function which returns a :term:`generator iterator`. It looks like a + normal function except that it contains :keyword:`yield` expressions + for producing a series of values usable in a for-loop or that can be + retrieved one at a time with the :func:`next` function. + + Usually refers to a generator function, but may refer to a + *generator iterator* in some contexts. In cases where the intended + meaning isn't clear, using the full terms avoids ambiguity. + + generator iterator + An object created by a :term:`generator` function. + + Each :keyword:`yield` temporarily suspends processing, remembering the + location execution state (including local variables and pending + try-statements). When the *generator iterator* resumes, it picks up where + it left off (in contrast to functions which start fresh on every + invocation). + + .. index:: single: generator expression + + generator expression + An :term:`expression` that returns an :term:`iterator`. It looks like a normal expression + followed by a :keyword:`!for` clause defining a loop variable, range, + and an optional :keyword:`!if` clause. The combined expression + generates values for an enclosing function:: + + >>> sum(i*i for i in range(10)) # sum of squares 0, 1, 4, ... 81 + 285 + + generic function + A function composed of multiple functions implementing the same operation + for different types. Which implementation should be used during a call is + determined by the dispatch algorithm. + + See also the :term:`single dispatch` glossary entry, the + :func:`functools.singledispatch` decorator, and :pep:`443`. + + generic type + A :term:`type` that can be parameterized; typically a + :ref:`container class` such as :class:`list` or + :class:`dict`. Used for :term:`type hints ` and + :term:`annotations `. + + For more details, see :ref:`generic alias types`, + :pep:`483`, :pep:`484`, :pep:`585`, and the :mod:`typing` module. + + GIL + See :term:`global interpreter lock`. + + global interpreter lock + The mechanism used by the :term:`CPython` interpreter to assure that + only one thread executes Python :term:`bytecode` at a time. + This simplifies the CPython implementation by making the object model + (including critical built-in types such as :class:`dict`) implicitly + safe against concurrent access. Locking the entire interpreter + makes it easier for the interpreter to be multi-threaded, at the + expense of much of the parallelism afforded by multi-processor + machines. + + However, some extension modules, either standard or third-party, + are designed so as to release the GIL when doing computationally intensive + tasks such as compression or hashing. Also, the GIL is always released + when doing I/O. + + As of Python 3.13, the GIL can be disabled using the :option:`--disable-gil` + build configuration. After building Python with this option, code must be + run with :option:`-X gil 0 <-X>` or after setting the :envvar:`PYTHON_GIL=0 ` + environment variable. This feature enables improved performance for + multi-threaded applications and makes it easier to use multi-core CPUs + efficiently. For more details, see :pep:`703`. + + hash-based pyc + A bytecode cache file that uses the hash rather than the last-modified + time of the corresponding source file to determine its validity. See + :ref:`pyc-invalidation`. + + hashable + An object is *hashable* if it has a hash value which never changes during + its lifetime (it needs a :meth:`~object.__hash__` method), and can be + compared to other objects (it needs an :meth:`~object.__eq__` method). + Hashable objects which + compare equal must have the same hash value. + + Hashability makes an object usable as a dictionary key and a set member, + because these data structures use the hash value internally. + + Most of Python's immutable built-in objects are hashable; mutable + containers (such as lists or dictionaries) are not; immutable + containers (such as tuples and frozensets) are only hashable if + their elements are hashable. Objects which are + instances of user-defined classes are hashable by default. They all + compare unequal (except with themselves), and their hash value is derived + from their :func:`id`. + + IDLE + An Integrated Development and Learning Environment for Python. + :ref:`idle` is a basic editor and interpreter environment + which ships with the standard distribution of Python. + + immortal + *Immortal objects* are a CPython implementation detail introduced + in :pep:`683`. + + If an object is immortal, its :term:`reference count` is never modified, + and therefore it is never deallocated while the interpreter is running. + For example, :const:`True` and :const:`None` are immortal in CPython. + + immutable + An object with a fixed value. Immutable objects include numbers, strings and + tuples. Such an object cannot be altered. A new object has to + be created if a different value has to be stored. They play an important + role in places where a constant hash value is needed, for example as a key + in a dictionary. + + import path + A list of locations (or :term:`path entries `) that are + searched by the :term:`path based finder` for modules to import. During + import, this list of locations usually comes from :data:`sys.path`, but + for subpackages it may also come from the parent package's ``__path__`` + attribute. + + importing + The process by which Python code in one module is made available to + Python code in another module. + + importer + An object that both finds and loads a module; both a + :term:`finder` and :term:`loader` object. + + interactive + Python has an interactive interpreter which means you can enter + statements and expressions at the interpreter prompt, immediately + execute them and see their results. Just launch ``python`` with no + arguments (possibly by selecting it from your computer's main + menu). It is a very powerful way to test out new ideas or inspect + modules and packages (remember ``help(x)``). For more on interactive + mode, see :ref:`tut-interac`. + + interpreted + Python is an interpreted language, as opposed to a compiled one, + though the distinction can be blurry because of the presence of the + bytecode compiler. This means that source files can be run directly + without explicitly creating an executable which is then run. + Interpreted languages typically have a shorter development/debug cycle + than compiled ones, though their programs generally also run more + slowly. See also :term:`interactive`. + + interpreter shutdown + When asked to shut down, the Python interpreter enters a special phase + where it gradually releases all allocated resources, such as modules + and various critical internal structures. It also makes several calls + to the :term:`garbage collector `. This can trigger + the execution of code in user-defined destructors or weakref callbacks. + Code executed during the shutdown phase can encounter various + exceptions as the resources it relies on may not function anymore + (common examples are library modules or the warnings machinery). + + The main reason for interpreter shutdown is that the ``__main__`` module + or the script being run has finished executing. + + iterable + An object capable of returning its members one at a time. Examples of + iterables include all sequence types (such as :class:`list`, :class:`str`, + and :class:`tuple`) and some non-sequence types like :class:`dict`, + :term:`file objects `, and objects of any classes you define + with an :meth:`~iterator.__iter__` method or with a + :meth:`~object.__getitem__` method + that implements :term:`sequence` semantics. + + Iterables can be + used in a :keyword:`for` loop and in many other places where a sequence is + needed (:func:`zip`, :func:`map`, ...). When an iterable object is passed + as an argument to the built-in function :func:`iter`, it returns an + iterator for the object. This iterator is good for one pass over the set + of values. When using iterables, it is usually not necessary to call + :func:`iter` or deal with iterator objects yourself. The :keyword:`for` + statement does that automatically for you, creating a temporary unnamed + variable to hold the iterator for the duration of the loop. See also + :term:`iterator`, :term:`sequence`, and :term:`generator`. + + iterator + An object representing a stream of data. Repeated calls to the iterator's + :meth:`~iterator.__next__` method (or passing it to the built-in function + :func:`next`) return successive items in the stream. When no more data + are available a :exc:`StopIteration` exception is raised instead. At this + point, the iterator object is exhausted and any further calls to its + :meth:`!__next__` method just raise :exc:`StopIteration` again. Iterators + are required to have an :meth:`~iterator.__iter__` method that returns the iterator + object itself so every iterator is also iterable and may be used in most + places where other iterables are accepted. One notable exception is code + which attempts multiple iteration passes. A container object (such as a + :class:`list`) produces a fresh new iterator each time you pass it to the + :func:`iter` function or use it in a :keyword:`for` loop. Attempting this + with an iterator will just return the same exhausted iterator object used + in the previous iteration pass, making it appear like an empty container. + + More information can be found in :ref:`typeiter`. + + .. impl-detail:: + + CPython does not consistently apply the requirement that an iterator + define :meth:`~iterator.__iter__`. + And also please note that the free-threading CPython does not guarantee + the thread-safety of iterator operations. + + + key function + A key function or collation function is a callable that returns a value + used for sorting or ordering. For example, :func:`locale.strxfrm` is + used to produce a sort key that is aware of locale specific sort + conventions. + + A number of tools in Python accept key functions to control how elements + are ordered or grouped. They include :func:`min`, :func:`max`, + :func:`sorted`, :meth:`list.sort`, :func:`heapq.merge`, + :func:`heapq.nsmallest`, :func:`heapq.nlargest`, and + :func:`itertools.groupby`. + + There are several ways to create a key function. For example. the + :meth:`str.lower` method can serve as a key function for case insensitive + sorts. Alternatively, a key function can be built from a + :keyword:`lambda` expression such as ``lambda r: (r[0], r[2])``. Also, + :func:`operator.attrgetter`, :func:`operator.itemgetter`, and + :func:`operator.methodcaller` are three key function constructors. See the :ref:`Sorting HOW TO + ` for examples of how to create and use key functions. + + keyword argument + See :term:`argument`. + + lambda + An anonymous inline function consisting of a single :term:`expression` + which is evaluated when the function is called. The syntax to create + a lambda function is ``lambda [parameters]: expression`` + + LBYL + Look before you leap. This coding style explicitly tests for + pre-conditions before making calls or lookups. This style contrasts with + the :term:`EAFP` approach and is characterized by the presence of many + :keyword:`if` statements. + + In a multi-threaded environment, the LBYL approach can risk introducing a + race condition between "the looking" and "the leaping". For example, the + code, ``if key in mapping: return mapping[key]`` can fail if another + thread removes *key* from *mapping* after the test, but before the lookup. + This issue can be solved with locks or by using the EAFP approach. + + list + A built-in Python :term:`sequence`. Despite its name it is more akin + to an array in other languages than to a linked list since access to + elements is *O*\ (1). + + list comprehension + A compact way to process all or part of the elements in a sequence and + return a list with the results. ``result = ['{:#04x}'.format(x) for x in + range(256) if x % 2 == 0]`` generates a list of strings containing + even hex numbers (0x..) in the range from 0 to 255. The :keyword:`if` + clause is optional. If omitted, all elements in ``range(256)`` are + processed. + + loader + An object that loads a module. It must define a method named + :meth:`load_module`. A loader is typically returned by a + :term:`finder`. See :pep:`302` for details and + :class:`importlib.abc.Loader` for an :term:`abstract base class`. + + locale encoding + On Unix, it is the encoding of the LC_CTYPE locale. It can be set with + :func:`locale.setlocale(locale.LC_CTYPE, new_locale) `. + + On Windows, it is the ANSI code page (ex: ``"cp1252"``). + + On Android and VxWorks, Python uses ``"utf-8"`` as the locale encoding. + + :func:`locale.getencoding` can be used to get the locale encoding. + + See also the :term:`filesystem encoding and error handler`. + + magic method + .. index:: pair: magic; method + + An informal synonym for :term:`special method`. + + mapping + A container object that supports arbitrary key lookups and implements the + methods specified in the :class:`collections.abc.Mapping` or + :class:`collections.abc.MutableMapping` + :ref:`abstract base classes `. Examples + include :class:`dict`, :class:`collections.defaultdict`, + :class:`collections.OrderedDict` and :class:`collections.Counter`. + + meta path finder + A :term:`finder` returned by a search of :data:`sys.meta_path`. Meta path + finders are related to, but different from :term:`path entry finders + `. + + See :class:`importlib.abc.MetaPathFinder` for the methods that meta path + finders implement. + + metaclass + The class of a class. Class definitions create a class name, a class + dictionary, and a list of base classes. The metaclass is responsible for + taking those three arguments and creating the class. Most object oriented + programming languages provide a default implementation. What makes Python + special is that it is possible to create custom metaclasses. Most users + never need this tool, but when the need arises, metaclasses can provide + powerful, elegant solutions. They have been used for logging attribute + access, adding thread-safety, tracking object creation, implementing + singletons, and many other tasks. + + More information can be found in :ref:`metaclasses`. + + method + A function which is defined inside a class body. If called as an attribute + of an instance of that class, the method will get the instance object as + its first :term:`argument` (which is usually called ``self``). + See :term:`function` and :term:`nested scope`. + + method resolution order + Method Resolution Order is the order in which base classes are searched + for a member during lookup. See :ref:`python_2.3_mro` for details of the + algorithm used by the Python interpreter since the 2.3 release. + + module + An object that serves as an organizational unit of Python code. Modules + have a namespace containing arbitrary Python objects. Modules are loaded + into Python by the process of :term:`importing`. + + See also :term:`package`. + + module spec + A namespace containing the import-related information used to load a + module. An instance of :class:`importlib.machinery.ModuleSpec`. + + MRO + See :term:`method resolution order`. + + mutable + Mutable objects can change their value but keep their :func:`id`. See + also :term:`immutable`. + + named tuple + The term "named tuple" applies to any type or class that inherits from + tuple and whose indexable elements are also accessible using named + attributes. The type or class may have other features as well. + + Several built-in types are named tuples, including the values returned + by :func:`time.localtime` and :func:`os.stat`. Another example is + :data:`sys.float_info`:: + + >>> sys.float_info[1] # indexed access + 1024 + >>> sys.float_info.max_exp # named field access + 1024 + >>> isinstance(sys.float_info, tuple) # kind of tuple + True + + Some named tuples are built-in types (such as the above examples). + Alternatively, a named tuple can be created from a regular class + definition that inherits from :class:`tuple` and that defines named + fields. Such a class can be written by hand, or it can be created by + inheriting :class:`typing.NamedTuple`, or with the factory function + :func:`collections.namedtuple`. The latter techniques also add some + extra methods that may not be found in hand-written or built-in named + tuples. + + namespace + The place where a variable is stored. Namespaces are implemented as + dictionaries. There are the local, global and built-in namespaces as well + as nested namespaces in objects (in methods). Namespaces support + modularity by preventing naming conflicts. For instance, the functions + :func:`builtins.open <.open>` and :func:`os.open` are distinguished by + their namespaces. Namespaces also aid readability and maintainability by + making it clear which module implements a function. For instance, writing + :func:`random.seed` or :func:`itertools.islice` makes it clear that those + functions are implemented by the :mod:`random` and :mod:`itertools` + modules, respectively. + + namespace package + A :pep:`420` :term:`package` which serves only as a container for + subpackages. Namespace packages may have no physical representation, + and specifically are not like a :term:`regular package` because they + have no ``__init__.py`` file. + + See also :term:`module`. + + nested scope + The ability to refer to a variable in an enclosing definition. For + instance, a function defined inside another function can refer to + variables in the outer function. Note that nested scopes by default work + only for reference and not for assignment. Local variables both read and + write in the innermost scope. Likewise, global variables read and write + to the global namespace. The :keyword:`nonlocal` allows writing to outer + scopes. + + new-style class + Old name for the flavor of classes now used for all class objects. In + earlier Python versions, only new-style classes could use Python's newer, + versatile features like :attr:`~object.__slots__`, descriptors, + properties, :meth:`~object.__getattribute__`, class methods, and static + methods. + + object + Any data with state (attributes or value) and defined behavior + (methods). Also the ultimate base class of any :term:`new-style + class`. + + optimized scope + A scope where target local variable names are reliably known to the + compiler when the code is compiled, allowing optimization of read and + write access to these names. The local namespaces for functions, + generators, coroutines, comprehensions, and generator expressions are + optimized in this fashion. Note: most interpreter optimizations are + applied to all scopes, only those relying on a known set of local + and nonlocal variable names are restricted to optimized scopes. + + package + A Python :term:`module` which can contain submodules or recursively, + subpackages. Technically, a package is a Python module with a + ``__path__`` attribute. + + See also :term:`regular package` and :term:`namespace package`. + + parameter + A named entity in a :term:`function` (or method) definition that + specifies an :term:`argument` (or in some cases, arguments) that the + function can accept. There are five kinds of parameter: + + * :dfn:`positional-or-keyword`: specifies an argument that can be passed + either :term:`positionally ` or as a :term:`keyword argument + `. This is the default kind of parameter, for example *foo* + and *bar* in the following:: + + def func(foo, bar=None): ... + + .. _positional-only_parameter: + + * :dfn:`positional-only`: specifies an argument that can be supplied only + by position. Positional-only parameters can be defined by including a + ``/`` character in the parameter list of the function definition after + them, for example *posonly1* and *posonly2* in the following:: + + def func(posonly1, posonly2, /, positional_or_keyword): ... + + .. _keyword-only_parameter: + + * :dfn:`keyword-only`: specifies an argument that can be supplied only + by keyword. Keyword-only parameters can be defined by including a + single var-positional parameter or bare ``*`` in the parameter list + of the function definition before them, for example *kw_only1* and + *kw_only2* in the following:: + + def func(arg, *, kw_only1, kw_only2): ... + + * :dfn:`var-positional`: specifies that an arbitrary sequence of + positional arguments can be provided (in addition to any positional + arguments already accepted by other parameters). Such a parameter can + be defined by prepending the parameter name with ``*``, for example + *args* in the following:: + + def func(*args, **kwargs): ... + + * :dfn:`var-keyword`: specifies that arbitrarily many keyword arguments + can be provided (in addition to any keyword arguments already accepted + by other parameters). Such a parameter can be defined by prepending + the parameter name with ``**``, for example *kwargs* in the example + above. + + Parameters can specify both optional and required arguments, as well as + default values for some optional arguments. + + See also the :term:`argument` glossary entry, the FAQ question on + :ref:`the difference between arguments and parameters + `, the :class:`inspect.Parameter` class, the + :ref:`function` section, and :pep:`362`. + + path entry + A single location on the :term:`import path` which the :term:`path + based finder` consults to find modules for importing. + + path entry finder + A :term:`finder` returned by a callable on :data:`sys.path_hooks` + (i.e. a :term:`path entry hook`) which knows how to locate modules given + a :term:`path entry`. + + See :class:`importlib.abc.PathEntryFinder` for the methods that path entry + finders implement. + + path entry hook + A callable on the :data:`sys.path_hooks` list which returns a :term:`path + entry finder` if it knows how to find modules on a specific :term:`path + entry`. + + path based finder + One of the default :term:`meta path finders ` which + searches an :term:`import path` for modules. + + path-like object + An object representing a file system path. A path-like object is either + a :class:`str` or :class:`bytes` object representing a path, or an object + implementing the :class:`os.PathLike` protocol. An object that supports + the :class:`os.PathLike` protocol can be converted to a :class:`str` or + :class:`bytes` file system path by calling the :func:`os.fspath` function; + :func:`os.fsdecode` and :func:`os.fsencode` can be used to guarantee a + :class:`str` or :class:`bytes` result instead, respectively. Introduced + by :pep:`519`. + + PEP + Python Enhancement Proposal. A PEP is a design document + providing information to the Python community, or describing a new + feature for Python or its processes or environment. PEPs should + provide a concise technical specification and a rationale for proposed + features. + + PEPs are intended to be the primary mechanisms for proposing major new + features, for collecting community input on an issue, and for documenting + the design decisions that have gone into Python. The PEP author is + responsible for building consensus within the community and documenting + dissenting opinions. + + See :pep:`1`. + + portion + A set of files in a single directory (possibly stored in a zip file) + that contribute to a namespace package, as defined in :pep:`420`. + + positional argument + See :term:`argument`. + + provisional API + A provisional API is one which has been deliberately excluded from + the standard library's backwards compatibility guarantees. While major + changes to such interfaces are not expected, as long as they are marked + provisional, backwards incompatible changes (up to and including removal + of the interface) may occur if deemed necessary by core developers. Such + changes will not be made gratuitously -- they will occur only if serious + fundamental flaws are uncovered that were missed prior to the inclusion + of the API. + + Even for provisional APIs, backwards incompatible changes are seen as + a "solution of last resort" - every attempt will still be made to find + a backwards compatible resolution to any identified problems. + + This process allows the standard library to continue to evolve over + time, without locking in problematic design errors for extended periods + of time. See :pep:`411` for more details. + + provisional package + See :term:`provisional API`. + + Python 3000 + Nickname for the Python 3.x release line (coined long ago when the + release of version 3 was something in the distant future.) This is also + abbreviated "Py3k". + + Pythonic + An idea or piece of code which closely follows the most common idioms + of the Python language, rather than implementing code using concepts + common to other languages. For example, a common idiom in Python is + to loop over all elements of an iterable using a :keyword:`for` + statement. Many other languages don't have this type of construct, so + people unfamiliar with Python sometimes use a numerical counter instead:: + + for i in range(len(food)): + print(food[i]) + + As opposed to the cleaner, Pythonic method:: + + for piece in food: + print(piece) + + qualified name + A dotted name showing the "path" from a module's global scope to a + class, function or method defined in that module, as defined in + :pep:`3155`. For top-level functions and classes, the qualified name + is the same as the object's name:: + + >>> class C: + ... class D: + ... def meth(self): + ... pass + ... + >>> C.__qualname__ + 'C' + >>> C.D.__qualname__ + 'C.D' + >>> C.D.meth.__qualname__ + 'C.D.meth' + + When used to refer to modules, the *fully qualified name* means the + entire dotted path to the module, including any parent packages, + e.g. ``email.mime.text``:: + + >>> import email.mime.text + >>> email.mime.text.__name__ + 'email.mime.text' + + reference count + The number of references to an object. When the reference count of an + object drops to zero, it is deallocated. Some objects are + :term:`immortal` and have reference counts that are never modified, and + therefore the objects are never deallocated. Reference counting is + generally not visible to Python code, but it is a key element of the + :term:`CPython` implementation. Programmers can call the + :func:`sys.getrefcount` function to return the + reference count for a particular object. + + regular package + A traditional :term:`package`, such as a directory containing an + ``__init__.py`` file. + + See also :term:`namespace package`. + + REPL + An acronym for the "read–eval–print loop", another name for the + :term:`interactive` interpreter shell. + + __slots__ + A declaration inside a class that saves memory by pre-declaring space for + instance attributes and eliminating instance dictionaries. Though + popular, the technique is somewhat tricky to get right and is best + reserved for rare cases where there are large numbers of instances in a + memory-critical application. + + sequence + An :term:`iterable` which supports efficient element access using integer + indices via the :meth:`~object.__getitem__` special method and defines a + :meth:`~object.__len__` method that returns the length of the sequence. + Some built-in sequence types are :class:`list`, :class:`str`, + :class:`tuple`, and :class:`bytes`. Note that :class:`dict` also + supports :meth:`~object.__getitem__` and :meth:`!__len__`, but is considered a + mapping rather than a sequence because the lookups use arbitrary + :term:`hashable` keys rather than integers. + + The :class:`collections.abc.Sequence` abstract base class + defines a much richer interface that goes beyond just + :meth:`~object.__getitem__` and :meth:`~object.__len__`, adding + :meth:`!count`, :meth:`!index`, :meth:`~object.__contains__`, and + :meth:`~object.__reversed__`. Types that implement this expanded + interface can be registered explicitly using + :func:`~abc.ABCMeta.register`. For more documentation on sequence + methods generally, see + :ref:`Common Sequence Operations `. + + set comprehension + A compact way to process all or part of the elements in an iterable and + return a set with the results. ``results = {c for c in 'abracadabra' if + c not in 'abc'}`` generates the set of strings ``{'r', 'd'}``. See + :ref:`comprehensions`. + + single dispatch + A form of :term:`generic function` dispatch where the implementation is + chosen based on the type of a single argument. + + slice + An object usually containing a portion of a :term:`sequence`. A slice is + created using the subscript notation, ``[]`` with colons between numbers + when several are given, such as in ``variable_name[1:3:5]``. The bracket + (subscript) notation uses :class:`slice` objects internally. + + soft deprecated + A soft deprecation can be used when using an API which should no longer + be used to write new code, but it remains safe to continue using it in + existing code. The API remains documented and tested, but will not be + developed further (no enhancement). + + The main difference between a "soft" and a (regular) "hard" deprecation + is that the soft deprecation does not imply scheduling the removal of the + deprecated API. + + Another difference is that a soft deprecation does not issue a warning. + + See `PEP 387: Soft Deprecation + `_. + + special method + .. index:: pair: special; method + + A method that is called implicitly by Python to execute a certain + operation on a type, such as addition. Such methods have names starting + and ending with double underscores. Special methods are documented in + :ref:`specialnames`. + + statement + A statement is part of a suite (a "block" of code). A statement is either + an :term:`expression` or one of several constructs with a keyword, such + as :keyword:`if`, :keyword:`while` or :keyword:`for`. + + static type checker + An external tool that reads Python code and analyzes it, looking for + issues such as incorrect types. See also :term:`type hints ` + and the :mod:`typing` module. + + strong reference + In Python's C API, a strong reference is a reference to an object + which is owned by the code holding the reference. The strong + reference is taken by calling :c:func:`Py_INCREF` when the + reference is created and released with :c:func:`Py_DECREF` + when the reference is deleted. + + The :c:func:`Py_NewRef` function can be used to create a strong reference + to an object. Usually, the :c:func:`Py_DECREF` function must be called on + the strong reference before exiting the scope of the strong reference, to + avoid leaking one reference. + + See also :term:`borrowed reference`. + + text encoding + A string in Python is a sequence of Unicode code points (in range + ``U+0000``--``U+10FFFF``). To store or transfer a string, it needs to be + serialized as a sequence of bytes. + + Serializing a string into a sequence of bytes is known as "encoding", and + recreating the string from the sequence of bytes is known as "decoding". + + There are a variety of different text serialization + :ref:`codecs `, which are collectively referred to as + "text encodings". + + text file + A :term:`file object` able to read and write :class:`str` objects. + Often, a text file actually accesses a byte-oriented datastream + and handles the :term:`text encoding` automatically. + Examples of text files are files opened in text mode (``'r'`` or ``'w'``), + :data:`sys.stdin`, :data:`sys.stdout`, and instances of + :class:`io.StringIO`. + + See also :term:`binary file` for a file object able to read and write + :term:`bytes-like objects `. + + triple-quoted string + A string which is bound by three instances of either a quotation mark + (") or an apostrophe ('). While they don't provide any functionality + not available with single-quoted strings, they are useful for a number + of reasons. They allow you to include unescaped single and double + quotes within a string and they can span multiple lines without the + use of the continuation character, making them especially useful when + writing docstrings. + + type + The type of a Python object determines what kind of object it is; every + object has a type. An object's type is accessible as its + :attr:`~object.__class__` attribute or can be retrieved with + ``type(obj)``. + + type alias + A synonym for a type, created by assigning the type to an identifier. + + Type aliases are useful for simplifying :term:`type hints `. + For example:: + + def remove_gray_shades( + colors: list[tuple[int, int, int]]) -> list[tuple[int, int, int]]: + pass + + could be made more readable like this:: + + Color = tuple[int, int, int] + + def remove_gray_shades(colors: list[Color]) -> list[Color]: + pass + + See :mod:`typing` and :pep:`484`, which describe this functionality. + + type hint + An :term:`annotation` that specifies the expected type for a variable, a class + attribute, or a function parameter or return value. + + Type hints are optional and are not enforced by Python but + they are useful to :term:`static type checkers `. + They can also aid IDEs with code completion and refactoring. + + Type hints of global variables, class attributes, and functions, + but not local variables, can be accessed using + :func:`typing.get_type_hints`. + + See :mod:`typing` and :pep:`484`, which describe this functionality. + + universal newlines + A manner of interpreting text streams in which all of the following are + recognized as ending a line: the Unix end-of-line convention ``'\n'``, + the Windows convention ``'\r\n'``, and the old Macintosh convention + ``'\r'``. See :pep:`278` and :pep:`3116`, as well as + :func:`bytes.splitlines` for an additional use. + + variable annotation + An :term:`annotation` of a variable or a class attribute. + + When annotating a variable or a class attribute, assignment is optional:: + + class C: + field: 'annotation' + + Variable annotations are usually used for + :term:`type hints `: for example this variable is expected to take + :class:`int` values:: + + count: int = 0 + + Variable annotation syntax is explained in section :ref:`annassign`. + + See :term:`function annotation`, :pep:`484` + and :pep:`526`, which describe this functionality. + Also see :ref:`annotations-howto` + for best practices on working with annotations. + + virtual environment + A cooperatively isolated runtime environment that allows Python users + and applications to install and upgrade Python distribution packages + without interfering with the behaviour of other Python applications + running on the same system. + + See also :mod:`venv`. + + virtual machine + A computer defined entirely in software. Python's virtual machine + executes the :term:`bytecode` emitted by the bytecode compiler. + + Zen of Python + Listing of Python design principles and philosophies that are helpful in + understanding and using the language. The listing can be found by typing + "``import this``" at the interactive prompt. diff --git a/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/howto/annotations.rst b/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/howto/annotations.rst new file mode 100644 index 00000000..174078b8 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/howto/annotations.rst @@ -0,0 +1,233 @@ +.. _annotations-howto: + +************************** +Annotations Best Practices +************************** + +:author: Larry Hastings + +.. topic:: Abstract + + This document is designed to encapsulate the best practices + for working with annotations dicts. If you write Python code + that examines ``__annotations__`` on Python objects, we + encourage you to follow the guidelines described below. + + The document is organized into four sections: + best practices for accessing the annotations of an object + in Python versions 3.10 and newer, + best practices for accessing the annotations of an object + in Python versions 3.9 and older, + other best practices + for ``__annotations__`` that apply to any Python version, + and + quirks of ``__annotations__``. + + Note that this document is specifically about working with + ``__annotations__``, not uses *for* annotations. + If you're looking for information on how to use "type hints" + in your code, please see the :mod:`typing` module. + + +Accessing The Annotations Dict Of An Object In Python 3.10 And Newer +==================================================================== + +Python 3.10 adds a new function to the standard library: +:func:`inspect.get_annotations`. In Python versions 3.10 +and newer, calling this function is the best practice for +accessing the annotations dict of any object that supports +annotations. This function can also "un-stringize" +stringized annotations for you. + +If for some reason :func:`inspect.get_annotations` isn't +viable for your use case, you may access the +``__annotations__`` data member manually. Best practice +for this changed in Python 3.10 as well: as of Python 3.10, +``o.__annotations__`` is guaranteed to *always* work +on Python functions, classes, and modules. If you're +certain the object you're examining is one of these three +*specific* objects, you may simply use ``o.__annotations__`` +to get at the object's annotations dict. + +However, other types of callables--for example, +callables created by :func:`functools.partial`--may +not have an ``__annotations__`` attribute defined. When +accessing the ``__annotations__`` of a possibly unknown +object, best practice in Python versions 3.10 and +newer is to call :func:`getattr` with three arguments, +for example ``getattr(o, '__annotations__', None)``. + +Before Python 3.10, accessing ``__annotations__`` on a class that +defines no annotations but that has a parent class with +annotations would return the parent's ``__annotations__``. +In Python 3.10 and newer, the child class's annotations +will be an empty dict instead. + + +Accessing The Annotations Dict Of An Object In Python 3.9 And Older +=================================================================== + +In Python 3.9 and older, accessing the annotations dict +of an object is much more complicated than in newer versions. +The problem is a design flaw in these older versions of Python, +specifically to do with class annotations. + +Best practice for accessing the annotations dict of other +objects--functions, other callables, and modules--is the same +as best practice for 3.10, assuming you aren't calling +:func:`inspect.get_annotations`: you should use three-argument +:func:`getattr` to access the object's ``__annotations__`` +attribute. + +Unfortunately, this isn't best practice for classes. The problem +is that, since ``__annotations__`` is optional on classes, and +because classes can inherit attributes from their base classes, +accessing the ``__annotations__`` attribute of a class may +inadvertently return the annotations dict of a *base class.* +As an example:: + + class Base: + a: int = 3 + b: str = 'abc' + + class Derived(Base): + pass + + print(Derived.__annotations__) + +This will print the annotations dict from ``Base``, not +``Derived``. + +Your code will have to have a separate code path if the object +you're examining is a class (``isinstance(o, type)``). +In that case, best practice relies on an implementation detail +of Python 3.9 and before: if a class has annotations defined, +they are stored in the class's :attr:`~type.__dict__` dictionary. Since +the class may or may not have annotations defined, best practice +is to call the :meth:`~dict.get` method on the class dict. + +To put it all together, here is some sample code that safely +accesses the ``__annotations__`` attribute on an arbitrary +object in Python 3.9 and before:: + + if isinstance(o, type): + ann = o.__dict__.get('__annotations__', None) + else: + ann = getattr(o, '__annotations__', None) + +After running this code, ``ann`` should be either a +dictionary or ``None``. You're encouraged to double-check +the type of ``ann`` using :func:`isinstance` before further +examination. + +Note that some exotic or malformed type objects may not have +a :attr:`~type.__dict__` attribute, so for extra safety you may also wish +to use :func:`getattr` to access :attr:`!__dict__`. + + +Manually Un-Stringizing Stringized Annotations +============================================== + +In situations where some annotations may be "stringized", +and you wish to evaluate those strings to produce the +Python values they represent, it really is best to +call :func:`inspect.get_annotations` to do this work +for you. + +If you're using Python 3.9 or older, or if for some reason +you can't use :func:`inspect.get_annotations`, you'll need +to duplicate its logic. You're encouraged to examine the +implementation of :func:`inspect.get_annotations` in the +current Python version and follow a similar approach. + +In a nutshell, if you wish to evaluate a stringized annotation +on an arbitrary object ``o``: + +* If ``o`` is a module, use ``o.__dict__`` as the + ``globals`` when calling :func:`eval`. +* If ``o`` is a class, use ``sys.modules[o.__module__].__dict__`` + as the ``globals``, and ``dict(vars(o))`` as the ``locals``, + when calling :func:`eval`. +* If ``o`` is a wrapped callable using :func:`functools.update_wrapper`, + :func:`functools.wraps`, or :func:`functools.partial`, iteratively + unwrap it by accessing either ``o.__wrapped__`` or ``o.func`` as + appropriate, until you have found the root unwrapped function. +* If ``o`` is a callable (but not a class), use + :attr:`o.__globals__ ` as the globals when calling + :func:`eval`. + +However, not all string values used as annotations can +be successfully turned into Python values by :func:`eval`. +String values could theoretically contain any valid string, +and in practice there are valid use cases for type hints that +require annotating with string values that specifically +*can't* be evaluated. For example: + +* :pep:`604` union types using ``|``, before support for this + was added to Python 3.10. +* Definitions that aren't needed at runtime, only imported + when :const:`typing.TYPE_CHECKING` is true. + +If :func:`eval` attempts to evaluate such values, it will +fail and raise an exception. So, when designing a library +API that works with annotations, it's recommended to only +attempt to evaluate string values when explicitly requested +to by the caller. + + +Best Practices For ``__annotations__`` In Any Python Version +============================================================ + +* You should avoid assigning to the ``__annotations__`` member + of objects directly. Let Python manage setting ``__annotations__``. + +* If you do assign directly to the ``__annotations__`` member + of an object, you should always set it to a ``dict`` object. + +* If you directly access the ``__annotations__`` member + of an object, you should ensure that it's a + dictionary before attempting to examine its contents. + +* You should avoid modifying ``__annotations__`` dicts. + +* You should avoid deleting the ``__annotations__`` attribute + of an object. + + +``__annotations__`` Quirks +========================== + +In all versions of Python 3, function +objects lazy-create an annotations dict if no annotations +are defined on that object. You can delete the ``__annotations__`` +attribute using ``del fn.__annotations__``, but if you then +access ``fn.__annotations__`` the object will create a new empty dict +that it will store and return as its annotations. Deleting the +annotations on a function before it has lazily created its annotations +dict will throw an ``AttributeError``; using ``del fn.__annotations__`` +twice in a row is guaranteed to always throw an ``AttributeError``. + +Everything in the above paragraph also applies to class and module +objects in Python 3.10 and newer. + +In all versions of Python 3, you can set ``__annotations__`` +on a function object to ``None``. However, subsequently +accessing the annotations on that object using ``fn.__annotations__`` +will lazy-create an empty dictionary as per the first paragraph of +this section. This is *not* true of modules and classes, in any Python +version; those objects permit setting ``__annotations__`` to any +Python value, and will retain whatever value is set. + +If Python stringizes your annotations for you +(using ``from __future__ import annotations``), and you +specify a string as an annotation, the string will +itself be quoted. In effect the annotation is quoted +*twice.* For example:: + + from __future__ import annotations + def foo(a: "str"): pass + + print(foo.__annotations__) + +This prints ``{'a': "'str'"}``. This shouldn't really be considered +a "quirk"; it's mentioned here simply because it might be surprising. diff --git a/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/howto/argparse.rst b/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/howto/argparse.rst new file mode 100644 index 00000000..30d9ac70 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/howto/argparse.rst @@ -0,0 +1,850 @@ +.. _argparse-tutorial: + +***************** +Argparse Tutorial +***************** + +:author: Tshepang Mbambo + +.. currentmodule:: argparse + +This tutorial is intended to be a gentle introduction to :mod:`argparse`, the +recommended command-line parsing module in the Python standard library. + +.. note:: + + There are two other modules that fulfill the same task, namely + :mod:`getopt` (an equivalent for ``getopt()`` from the C + language) and the deprecated :mod:`optparse`. + Note also that :mod:`argparse` is based on :mod:`optparse`, + and therefore very similar in terms of usage. + + +Concepts +======== + +Let's show the sort of functionality that we are going to explore in this +introductory tutorial by making use of the :command:`ls` command: + +.. code-block:: shell-session + + $ ls + cpython devguide prog.py pypy rm-unused-function.patch + $ ls pypy + ctypes_configure demo dotviewer include lib_pypy lib-python ... + $ ls -l + total 20 + drwxr-xr-x 19 wena wena 4096 Feb 18 18:51 cpython + drwxr-xr-x 4 wena wena 4096 Feb 8 12:04 devguide + -rwxr-xr-x 1 wena wena 535 Feb 19 00:05 prog.py + drwxr-xr-x 14 wena wena 4096 Feb 7 00:59 pypy + -rw-r--r-- 1 wena wena 741 Feb 18 01:01 rm-unused-function.patch + $ ls --help + Usage: ls [OPTION]... [FILE]... + List information about the FILEs (the current directory by default). + Sort entries alphabetically if none of -cftuvSUX nor --sort is specified. + ... + +A few concepts we can learn from the four commands: + +* The :command:`ls` command is useful when run without any options at all. It defaults + to displaying the contents of the current directory. + +* If we want beyond what it provides by default, we tell it a bit more. In + this case, we want it to display a different directory, ``pypy``. + What we did is specify what is known as a positional argument. It's named so + because the program should know what to do with the value, solely based on + where it appears on the command line. This concept is more relevant + to a command like :command:`cp`, whose most basic usage is ``cp SRC DEST``. + The first position is *what you want copied,* and the second + position is *where you want it copied to*. + +* Now, say we want to change behaviour of the program. In our example, + we display more info for each file instead of just showing the file names. + The ``-l`` in that case is known as an optional argument. + +* That's a snippet of the help text. It's very useful in that you can + come across a program you have never used before, and can figure out + how it works simply by reading its help text. + + +The basics +========== + +Let us start with a very simple example which does (almost) nothing:: + + import argparse + parser = argparse.ArgumentParser() + parser.parse_args() + +Following is a result of running the code: + +.. code-block:: shell-session + + $ python prog.py + $ python prog.py --help + usage: prog.py [-h] + + options: + -h, --help show this help message and exit + $ python prog.py --verbose + usage: prog.py [-h] + prog.py: error: unrecognized arguments: --verbose + $ python prog.py foo + usage: prog.py [-h] + prog.py: error: unrecognized arguments: foo + +Here is what is happening: + +* Running the script without any options results in nothing displayed to + stdout. Not so useful. + +* The second one starts to display the usefulness of the :mod:`argparse` + module. We have done almost nothing, but already we get a nice help message. + +* The ``--help`` option, which can also be shortened to ``-h``, is the only + option we get for free (i.e. no need to specify it). Specifying anything + else results in an error. But even then, we do get a useful usage message, + also for free. + + +Introducing Positional arguments +================================ + +An example:: + + import argparse + parser = argparse.ArgumentParser() + parser.add_argument("echo") + args = parser.parse_args() + print(args.echo) + +And running the code: + +.. code-block:: shell-session + + $ python prog.py + usage: prog.py [-h] echo + prog.py: error: the following arguments are required: echo + $ python prog.py --help + usage: prog.py [-h] echo + + positional arguments: + echo + + options: + -h, --help show this help message and exit + $ python prog.py foo + foo + +Here is what's happening: + +* We've added the :meth:`~ArgumentParser.add_argument` method, which is what we use to specify + which command-line options the program is willing to accept. In this case, + I've named it ``echo`` so that it's in line with its function. + +* Calling our program now requires us to specify an option. + +* The :meth:`~ArgumentParser.parse_args` method actually returns some data from the + options specified, in this case, ``echo``. + +* The variable is some form of 'magic' that :mod:`argparse` performs for free + (i.e. no need to specify which variable that value is stored in). + You will also notice that its name matches the string argument given + to the method, ``echo``. + +Note however that, although the help display looks nice and all, it currently +is not as helpful as it can be. For example we see that we got ``echo`` as a +positional argument, but we don't know what it does, other than by guessing or +by reading the source code. So, let's make it a bit more useful:: + + import argparse + parser = argparse.ArgumentParser() + parser.add_argument("echo", help="echo the string you use here") + args = parser.parse_args() + print(args.echo) + +And we get: + +.. code-block:: shell-session + + $ python prog.py -h + usage: prog.py [-h] echo + + positional arguments: + echo echo the string you use here + + options: + -h, --help show this help message and exit + +Now, how about doing something even more useful:: + + import argparse + parser = argparse.ArgumentParser() + parser.add_argument("square", help="display a square of a given number") + args = parser.parse_args() + print(args.square**2) + +Following is a result of running the code: + +.. code-block:: shell-session + + $ python prog.py 4 + Traceback (most recent call last): + File "prog.py", line 5, in + print(args.square**2) + TypeError: unsupported operand type(s) for ** or pow(): 'str' and 'int' + +That didn't go so well. That's because :mod:`argparse` treats the options we +give it as strings, unless we tell it otherwise. So, let's tell +:mod:`argparse` to treat that input as an integer:: + + import argparse + parser = argparse.ArgumentParser() + parser.add_argument("square", help="display a square of a given number", + type=int) + args = parser.parse_args() + print(args.square**2) + +Following is a result of running the code: + +.. code-block:: shell-session + + $ python prog.py 4 + 16 + $ python prog.py four + usage: prog.py [-h] square + prog.py: error: argument square: invalid int value: 'four' + +That went well. The program now even helpfully quits on bad illegal input +before proceeding. + + +Introducing Optional arguments +============================== + +So far we have been playing with positional arguments. Let us +have a look on how to add optional ones:: + + import argparse + parser = argparse.ArgumentParser() + parser.add_argument("--verbosity", help="increase output verbosity") + args = parser.parse_args() + if args.verbosity: + print("verbosity turned on") + +And the output: + +.. code-block:: shell-session + + $ python prog.py --verbosity 1 + verbosity turned on + $ python prog.py + $ python prog.py --help + usage: prog.py [-h] [--verbosity VERBOSITY] + + options: + -h, --help show this help message and exit + --verbosity VERBOSITY + increase output verbosity + $ python prog.py --verbosity + usage: prog.py [-h] [--verbosity VERBOSITY] + prog.py: error: argument --verbosity: expected one argument + +Here is what is happening: + +* The program is written so as to display something when ``--verbosity`` is + specified and display nothing when not. + +* To show that the option is actually optional, there is no error when running + the program without it. Note that by default, if an optional argument isn't + used, the relevant variable, in this case ``args.verbosity``, is + given ``None`` as a value, which is the reason it fails the truth + test of the :keyword:`if` statement. + +* The help message is a bit different. + +* When using the ``--verbosity`` option, one must also specify some value, + any value. + +The above example accepts arbitrary integer values for ``--verbosity``, but for +our simple program, only two values are actually useful, ``True`` or ``False``. +Let's modify the code accordingly:: + + import argparse + parser = argparse.ArgumentParser() + parser.add_argument("--verbose", help="increase output verbosity", + action="store_true") + args = parser.parse_args() + if args.verbose: + print("verbosity turned on") + +And the output: + +.. code-block:: shell-session + + $ python prog.py --verbose + verbosity turned on + $ python prog.py --verbose 1 + usage: prog.py [-h] [--verbose] + prog.py: error: unrecognized arguments: 1 + $ python prog.py --help + usage: prog.py [-h] [--verbose] + + options: + -h, --help show this help message and exit + --verbose increase output verbosity + +Here is what is happening: + +* The option is now more of a flag than something that requires a value. + We even changed the name of the option to match that idea. + Note that we now specify a new keyword, ``action``, and give it the value + ``"store_true"``. This means that, if the option is specified, + assign the value ``True`` to ``args.verbose``. + Not specifying it implies ``False``. + +* It complains when you specify a value, in true spirit of what flags + actually are. + +* Notice the different help text. + + +Short options +------------- + +If you are familiar with command line usage, +you will notice that I haven't yet touched on the topic of short +versions of the options. It's quite simple:: + + import argparse + parser = argparse.ArgumentParser() + parser.add_argument("-v", "--verbose", help="increase output verbosity", + action="store_true") + args = parser.parse_args() + if args.verbose: + print("verbosity turned on") + +And here goes: + +.. code-block:: shell-session + + $ python prog.py -v + verbosity turned on + $ python prog.py --help + usage: prog.py [-h] [-v] + + options: + -h, --help show this help message and exit + -v, --verbose increase output verbosity + +Note that the new ability is also reflected in the help text. + + +Combining Positional and Optional arguments +=========================================== + +Our program keeps growing in complexity:: + + import argparse + parser = argparse.ArgumentParser() + parser.add_argument("square", type=int, + help="display a square of a given number") + parser.add_argument("-v", "--verbose", action="store_true", + help="increase output verbosity") + args = parser.parse_args() + answer = args.square**2 + if args.verbose: + print(f"the square of {args.square} equals {answer}") + else: + print(answer) + +And now the output: + +.. code-block:: shell-session + + $ python prog.py + usage: prog.py [-h] [-v] square + prog.py: error: the following arguments are required: square + $ python prog.py 4 + 16 + $ python prog.py 4 --verbose + the square of 4 equals 16 + $ python prog.py --verbose 4 + the square of 4 equals 16 + +* We've brought back a positional argument, hence the complaint. + +* Note that the order does not matter. + +How about we give this program of ours back the ability to have +multiple verbosity values, and actually get to use them:: + + import argparse + parser = argparse.ArgumentParser() + parser.add_argument("square", type=int, + help="display a square of a given number") + parser.add_argument("-v", "--verbosity", type=int, + help="increase output verbosity") + args = parser.parse_args() + answer = args.square**2 + if args.verbosity == 2: + print(f"the square of {args.square} equals {answer}") + elif args.verbosity == 1: + print(f"{args.square}^2 == {answer}") + else: + print(answer) + +And the output: + +.. code-block:: shell-session + + $ python prog.py 4 + 16 + $ python prog.py 4 -v + usage: prog.py [-h] [-v VERBOSITY] square + prog.py: error: argument -v/--verbosity: expected one argument + $ python prog.py 4 -v 1 + 4^2 == 16 + $ python prog.py 4 -v 2 + the square of 4 equals 16 + $ python prog.py 4 -v 3 + 16 + +These all look good except the last one, which exposes a bug in our program. +Let's fix it by restricting the values the ``--verbosity`` option can accept:: + + import argparse + parser = argparse.ArgumentParser() + parser.add_argument("square", type=int, + help="display a square of a given number") + parser.add_argument("-v", "--verbosity", type=int, choices=[0, 1, 2], + help="increase output verbosity") + args = parser.parse_args() + answer = args.square**2 + if args.verbosity == 2: + print(f"the square of {args.square} equals {answer}") + elif args.verbosity == 1: + print(f"{args.square}^2 == {answer}") + else: + print(answer) + +And the output: + +.. code-block:: shell-session + + $ python prog.py 4 -v 3 + usage: prog.py [-h] [-v {0,1,2}] square + prog.py: error: argument -v/--verbosity: invalid choice: 3 (choose from 0, 1, 2) + $ python prog.py 4 -h + usage: prog.py [-h] [-v {0,1,2}] square + + positional arguments: + square display a square of a given number + + options: + -h, --help show this help message and exit + -v, --verbosity {0,1,2} + increase output verbosity + +Note that the change also reflects both in the error message as well as the +help string. + +Now, let's use a different approach of playing with verbosity, which is pretty +common. It also matches the way the CPython executable handles its own +verbosity argument (check the output of ``python --help``):: + + import argparse + parser = argparse.ArgumentParser() + parser.add_argument("square", type=int, + help="display the square of a given number") + parser.add_argument("-v", "--verbosity", action="count", + help="increase output verbosity") + args = parser.parse_args() + answer = args.square**2 + if args.verbosity == 2: + print(f"the square of {args.square} equals {answer}") + elif args.verbosity == 1: + print(f"{args.square}^2 == {answer}") + else: + print(answer) + +We have introduced another action, "count", +to count the number of occurrences of specific options. + + +.. code-block:: shell-session + + $ python prog.py 4 + 16 + $ python prog.py 4 -v + 4^2 == 16 + $ python prog.py 4 -vv + the square of 4 equals 16 + $ python prog.py 4 --verbosity --verbosity + the square of 4 equals 16 + $ python prog.py 4 -v 1 + usage: prog.py [-h] [-v] square + prog.py: error: unrecognized arguments: 1 + $ python prog.py 4 -h + usage: prog.py [-h] [-v] square + + positional arguments: + square display a square of a given number + + options: + -h, --help show this help message and exit + -v, --verbosity increase output verbosity + $ python prog.py 4 -vvv + 16 + +* Yes, it's now more of a flag (similar to ``action="store_true"``) in the + previous version of our script. That should explain the complaint. + +* It also behaves similar to "store_true" action. + +* Now here's a demonstration of what the "count" action gives. You've probably + seen this sort of usage before. + +* And if you don't specify the ``-v`` flag, that flag is considered to have + ``None`` value. + +* As should be expected, specifying the long form of the flag, we should get + the same output. + +* Sadly, our help output isn't very informative on the new ability our script + has acquired, but that can always be fixed by improving the documentation for + our script (e.g. via the ``help`` keyword argument). + +* That last output exposes a bug in our program. + + +Let's fix:: + + import argparse + parser = argparse.ArgumentParser() + parser.add_argument("square", type=int, + help="display a square of a given number") + parser.add_argument("-v", "--verbosity", action="count", + help="increase output verbosity") + args = parser.parse_args() + answer = args.square**2 + + # bugfix: replace == with >= + if args.verbosity >= 2: + print(f"the square of {args.square} equals {answer}") + elif args.verbosity >= 1: + print(f"{args.square}^2 == {answer}") + else: + print(answer) + +And this is what it gives: + +.. code-block:: shell-session + + $ python prog.py 4 -vvv + the square of 4 equals 16 + $ python prog.py 4 -vvvv + the square of 4 equals 16 + $ python prog.py 4 + Traceback (most recent call last): + File "prog.py", line 11, in + if args.verbosity >= 2: + TypeError: '>=' not supported between instances of 'NoneType' and 'int' + + +* First output went well, and fixes the bug we had before. + That is, we want any value >= 2 to be as verbose as possible. + +* Third output not so good. + +Let's fix that bug:: + + import argparse + parser = argparse.ArgumentParser() + parser.add_argument("square", type=int, + help="display a square of a given number") + parser.add_argument("-v", "--verbosity", action="count", default=0, + help="increase output verbosity") + args = parser.parse_args() + answer = args.square**2 + if args.verbosity >= 2: + print(f"the square of {args.square} equals {answer}") + elif args.verbosity >= 1: + print(f"{args.square}^2 == {answer}") + else: + print(answer) + +We've just introduced yet another keyword, ``default``. +We've set it to ``0`` in order to make it comparable to the other int values. +Remember that by default, +if an optional argument isn't specified, +it gets the ``None`` value, and that cannot be compared to an int value +(hence the :exc:`TypeError` exception). + +And: + +.. code-block:: shell-session + + $ python prog.py 4 + 16 + +You can go quite far just with what we've learned so far, +and we have only scratched the surface. +The :mod:`argparse` module is very powerful, +and we'll explore a bit more of it before we end this tutorial. + + +Getting a little more advanced +============================== + +What if we wanted to expand our tiny program to perform other powers, +not just squares:: + + import argparse + parser = argparse.ArgumentParser() + parser.add_argument("x", type=int, help="the base") + parser.add_argument("y", type=int, help="the exponent") + parser.add_argument("-v", "--verbosity", action="count", default=0) + args = parser.parse_args() + answer = args.x**args.y + if args.verbosity >= 2: + print(f"{args.x} to the power {args.y} equals {answer}") + elif args.verbosity >= 1: + print(f"{args.x}^{args.y} == {answer}") + else: + print(answer) + +Output: + +.. code-block:: shell-session + + $ python prog.py + usage: prog.py [-h] [-v] x y + prog.py: error: the following arguments are required: x, y + $ python prog.py -h + usage: prog.py [-h] [-v] x y + + positional arguments: + x the base + y the exponent + + options: + -h, --help show this help message and exit + -v, --verbosity + $ python prog.py 4 2 -v + 4^2 == 16 + + +Notice that so far we've been using verbosity level to *change* the text +that gets displayed. The following example instead uses verbosity level +to display *more* text instead:: + + import argparse + parser = argparse.ArgumentParser() + parser.add_argument("x", type=int, help="the base") + parser.add_argument("y", type=int, help="the exponent") + parser.add_argument("-v", "--verbosity", action="count", default=0) + args = parser.parse_args() + answer = args.x**args.y + if args.verbosity >= 2: + print(f"Running '{__file__}'") + if args.verbosity >= 1: + print(f"{args.x}^{args.y} == ", end="") + print(answer) + +Output: + +.. code-block:: shell-session + + $ python prog.py 4 2 + 16 + $ python prog.py 4 2 -v + 4^2 == 16 + $ python prog.py 4 2 -vv + Running 'prog.py' + 4^2 == 16 + + +.. _specifying-ambiguous-arguments: + +Specifying ambiguous arguments +------------------------------ + +When there is ambiguity in deciding whether an argument is positional or for an +argument, ``--`` can be used to tell :meth:`~ArgumentParser.parse_args` that +everything after that is a positional argument:: + + >>> parser = argparse.ArgumentParser(prog='PROG') + >>> parser.add_argument('-n', nargs='+') + >>> parser.add_argument('args', nargs='*') + + >>> # ambiguous, so parse_args assumes it's an option + >>> parser.parse_args(['-f']) + usage: PROG [-h] [-n N [N ...]] [args ...] + PROG: error: unrecognized arguments: -f + + >>> parser.parse_args(['--', '-f']) + Namespace(args=['-f'], n=None) + + >>> # ambiguous, so the -n option greedily accepts arguments + >>> parser.parse_args(['-n', '1', '2', '3']) + Namespace(args=[], n=['1', '2', '3']) + + >>> parser.parse_args(['-n', '1', '--', '2', '3']) + Namespace(args=['2', '3'], n=['1']) + + +Conflicting options +------------------- + +So far, we have been working with two methods of an +:class:`argparse.ArgumentParser` instance. Let's introduce a third one, +:meth:`~ArgumentParser.add_mutually_exclusive_group`. It allows for us to specify options that +conflict with each other. Let's also change the rest of the program so that +the new functionality makes more sense: +we'll introduce the ``--quiet`` option, +which will be the opposite of the ``--verbose`` one:: + + import argparse + + parser = argparse.ArgumentParser() + group = parser.add_mutually_exclusive_group() + group.add_argument("-v", "--verbose", action="store_true") + group.add_argument("-q", "--quiet", action="store_true") + parser.add_argument("x", type=int, help="the base") + parser.add_argument("y", type=int, help="the exponent") + args = parser.parse_args() + answer = args.x**args.y + + if args.quiet: + print(answer) + elif args.verbose: + print(f"{args.x} to the power {args.y} equals {answer}") + else: + print(f"{args.x}^{args.y} == {answer}") + +Our program is now simpler, and we've lost some functionality for the sake of +demonstration. Anyways, here's the output: + +.. code-block:: shell-session + + $ python prog.py 4 2 + 4^2 == 16 + $ python prog.py 4 2 -q + 16 + $ python prog.py 4 2 -v + 4 to the power 2 equals 16 + $ python prog.py 4 2 -vq + usage: prog.py [-h] [-v | -q] x y + prog.py: error: argument -q/--quiet: not allowed with argument -v/--verbose + $ python prog.py 4 2 -v --quiet + usage: prog.py [-h] [-v | -q] x y + prog.py: error: argument -q/--quiet: not allowed with argument -v/--verbose + +That should be easy to follow. I've added that last output so you can see the +sort of flexibility you get, i.e. mixing long form options with short form +ones. + +Before we conclude, you probably want to tell your users the main purpose of +your program, just in case they don't know:: + + import argparse + + parser = argparse.ArgumentParser(description="calculate X to the power of Y") + group = parser.add_mutually_exclusive_group() + group.add_argument("-v", "--verbose", action="store_true") + group.add_argument("-q", "--quiet", action="store_true") + parser.add_argument("x", type=int, help="the base") + parser.add_argument("y", type=int, help="the exponent") + args = parser.parse_args() + answer = args.x**args.y + + if args.quiet: + print(answer) + elif args.verbose: + print(f"{args.x} to the power {args.y} equals {answer}") + else: + print(f"{args.x}^{args.y} == {answer}") + +Note that slight difference in the usage text. Note the ``[-v | -q]``, +which tells us that we can either use ``-v`` or ``-q``, +but not both at the same time: + +.. code-block:: shell-session + + $ python prog.py --help + usage: prog.py [-h] [-v | -q] x y + + calculate X to the power of Y + + positional arguments: + x the base + y the exponent + + options: + -h, --help show this help message and exit + -v, --verbose + -q, --quiet + + +How to translate the argparse output +==================================== + +The output of the :mod:`argparse` module such as its help text and error +messages are all made translatable using the :mod:`gettext` module. This +allows applications to easily localize messages produced by +:mod:`argparse`. See also :ref:`i18n-howto`. + +For instance, in this :mod:`argparse` output: + +.. code-block:: shell-session + + $ python prog.py --help + usage: prog.py [-h] [-v | -q] x y + + calculate X to the power of Y + + positional arguments: + x the base + y the exponent + + options: + -h, --help show this help message and exit + -v, --verbose + -q, --quiet + +The strings ``usage:``, ``positional arguments:``, ``options:`` and +``show this help message and exit`` are all translatable. + +In order to translate these strings, they must first be extracted +into a ``.po`` file. For example, using `Babel `__, +run this command: + +.. code-block:: shell-session + + $ pybabel extract -o messages.po /usr/lib/python3.12/argparse.py + +This command will extract all translatable strings from the :mod:`argparse` +module and output them into a file named ``messages.po``. This command assumes +that your Python installation is in ``/usr/lib``. + +You can find out the location of the :mod:`argparse` module on your system +using this script:: + + import argparse + print(argparse.__file__) + +Once the messages in the ``.po`` file are translated and the translations are +installed using :mod:`gettext`, :mod:`argparse` will be able to display the +translated messages. + +To translate your own strings in the :mod:`argparse` output, use :mod:`gettext`. + +Conclusion +========== + +The :mod:`argparse` module offers a lot more than shown here. +Its docs are quite detailed and thorough, and full of examples. +Having gone through this tutorial, you should easily digest them +without feeling overwhelmed. diff --git a/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/howto/clinic.rst b/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/howto/clinic.rst new file mode 100644 index 00000000..06097724 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/howto/clinic.rst @@ -0,0 +1,14 @@ +:orphan: + +.. This page is retained solely for existing links to /howto/clinic.html. + Direct readers to the devguide. + +********************** +Argument Clinic How-To +********************** + + +.. note:: + + The Argument Clinic How-TO has been moved to the `Python Developer's Guide + `__. diff --git a/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/howto/cporting.rst b/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/howto/cporting.rst new file mode 100644 index 00000000..7773620b --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/howto/cporting.rst @@ -0,0 +1,26 @@ +.. highlight:: c + +.. _cporting-howto: + +************************************* +Porting Extension Modules to Python 3 +************************************* + +We recommend the following resources for porting extension modules to Python 3: + +* The `Migrating C extensions`_ chapter from + *Supporting Python 3: An in-depth guide*, a book on moving from Python 2 + to Python 3 in general, guides the reader through porting an extension + module. +* The `Porting guide`_ from the *py3c* project provides opinionated + suggestions with supporting code. +* The `Cython`_ and `CFFI`_ libraries offer abstractions over + Python's C API. + Extensions generally need to be re-written to use one of them, + but the library then handles differences between various Python + versions and implementations. + +.. _Migrating C extensions: http://python3porting.com/cextensions.html +.. _Porting guide: https://py3c.readthedocs.io/en/latest/guide.html +.. _Cython: https://cython.org/ +.. _CFFI: https://cffi.readthedocs.io/en/latest/ diff --git a/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/howto/curses.rst b/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/howto/curses.rst new file mode 100644 index 00000000..f9ad81e3 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/howto/curses.rst @@ -0,0 +1,547 @@ +.. _curses-howto: + +********************************** + Curses Programming with Python +********************************** + +.. currentmodule:: curses + +:Author: A.M. Kuchling, Eric S. Raymond +:Release: 2.04 + + +.. topic:: Abstract + + This document describes how to use the :mod:`curses` extension + module to control text-mode displays. + + +What is curses? +=============== + +The curses library supplies a terminal-independent screen-painting and +keyboard-handling facility for text-based terminals; such terminals +include VT100s, the Linux console, and the simulated terminal provided +by various programs. Display terminals support various control codes +to perform common operations such as moving the cursor, scrolling the +screen, and erasing areas. Different terminals use widely differing +codes, and often have their own minor quirks. + +In a world of graphical displays, one might ask "why bother"? It's +true that character-cell display terminals are an obsolete technology, +but there are niches in which being able to do fancy things with them +are still valuable. One niche is on small-footprint or embedded +Unixes that don't run an X server. Another is tools such as OS +installers and kernel configurators that may have to run before any +graphical support is available. + +The curses library provides fairly basic functionality, providing the +programmer with an abstraction of a display containing multiple +non-overlapping windows of text. The contents of a window can be +changed in various ways---adding text, erasing it, changing its +appearance---and the curses library will figure out what control codes +need to be sent to the terminal to produce the right output. curses +doesn't provide many user-interface concepts such as buttons, checkboxes, +or dialogs; if you need such features, consider a user interface library such as +:pypi:`Urwid`. + +The curses library was originally written for BSD Unix; the later System V +versions of Unix from AT&T added many enhancements and new functions. BSD curses +is no longer maintained, having been replaced by ncurses, which is an +open-source implementation of the AT&T interface. If you're using an +open-source Unix such as Linux or FreeBSD, your system almost certainly uses +ncurses. Since most current commercial Unix versions are based on System V +code, all the functions described here will probably be available. The older +versions of curses carried by some proprietary Unixes may not support +everything, though. + +The Windows version of Python doesn't include the :mod:`curses` +module. A ported version called :pypi:`UniCurses` is available. + + +The Python curses module +------------------------ + +The Python module is a fairly simple wrapper over the C functions provided by +curses; if you're already familiar with curses programming in C, it's really +easy to transfer that knowledge to Python. The biggest difference is that the +Python interface makes things simpler by merging different C functions such as +:c:func:`!addstr`, :c:func:`!mvaddstr`, and :c:func:`!mvwaddstr` into a single +:meth:`~curses.window.addstr` method. You'll see this covered in more +detail later. + +This HOWTO is an introduction to writing text-mode programs with curses +and Python. It doesn't attempt to be a complete guide to the curses API; for +that, see the Python library guide's section on ncurses, and the C manual pages +for ncurses. It will, however, give you the basic ideas. + + +Starting and ending a curses application +======================================== + +Before doing anything, curses must be initialized. This is done by +calling the :func:`~curses.initscr` function, which will determine the +terminal type, send any required setup codes to the terminal, and +create various internal data structures. If successful, +:func:`!initscr` returns a window object representing the entire +screen; this is usually called ``stdscr`` after the name of the +corresponding C variable. :: + + import curses + stdscr = curses.initscr() + +Usually curses applications turn off automatic echoing of keys to the +screen, in order to be able to read keys and only display them under +certain circumstances. This requires calling the +:func:`~curses.noecho` function. :: + + curses.noecho() + +Applications will also commonly need to react to keys instantly, +without requiring the Enter key to be pressed; this is called cbreak +mode, as opposed to the usual buffered input mode. :: + + curses.cbreak() + +Terminals usually return special keys, such as the cursor keys or navigation +keys such as Page Up and Home, as a multibyte escape sequence. While you could +write your application to expect such sequences and process them accordingly, +curses can do it for you, returning a special value such as +:const:`curses.KEY_LEFT`. To get curses to do the job, you'll have to enable +keypad mode. :: + + stdscr.keypad(True) + +Terminating a curses application is much easier than starting one. You'll need +to call:: + + curses.nocbreak() + stdscr.keypad(False) + curses.echo() + +to reverse the curses-friendly terminal settings. Then call the +:func:`~curses.endwin` function to restore the terminal to its original +operating mode. :: + + curses.endwin() + +A common problem when debugging a curses application is to get your terminal +messed up when the application dies without restoring the terminal to its +previous state. In Python this commonly happens when your code is buggy and +raises an uncaught exception. Keys are no longer echoed to the screen when +you type them, for example, which makes using the shell difficult. + +In Python you can avoid these complications and make debugging much easier by +importing the :func:`curses.wrapper` function and using it like this:: + + from curses import wrapper + + def main(stdscr): + # Clear screen + stdscr.clear() + + # This raises ZeroDivisionError when i == 10. + for i in range(0, 11): + v = i-10 + stdscr.addstr(i, 0, '10 divided by {} is {}'.format(v, 10/v)) + + stdscr.refresh() + stdscr.getkey() + + wrapper(main) + +The :func:`~curses.wrapper` function takes a callable object and does the +initializations described above, also initializing colors if color +support is present. :func:`!wrapper` then runs your provided callable. +Once the callable returns, :func:`!wrapper` will restore the original +state of the terminal. The callable is called inside a +:keyword:`try`...\ :keyword:`except` that catches exceptions, restores +the state of the terminal, and then re-raises the exception. Therefore +your terminal won't be left in a funny state on exception and you'll be +able to read the exception's message and traceback. + + +Windows and Pads +================ + +Windows are the basic abstraction in curses. A window object represents a +rectangular area of the screen, and supports methods to display text, +erase it, allow the user to input strings, and so forth. + +The ``stdscr`` object returned by the :func:`~curses.initscr` function is a +window object that covers the entire screen. Many programs may need +only this single window, but you might wish to divide the screen into +smaller windows, in order to redraw or clear them separately. The +:func:`~curses.newwin` function creates a new window of a given size, +returning the new window object. :: + + begin_x = 20; begin_y = 7 + height = 5; width = 40 + win = curses.newwin(height, width, begin_y, begin_x) + +Note that the coordinate system used in curses is unusual. +Coordinates are always passed in the order *y,x*, and the top-left +corner of a window is coordinate (0,0). This breaks the normal +convention for handling coordinates where the *x* coordinate comes +first. This is an unfortunate difference from most other computer +applications, but it's been part of curses since it was first written, +and it's too late to change things now. + +Your application can determine the size of the screen by using the +:data:`curses.LINES` and :data:`curses.COLS` variables to obtain the *y* and +*x* sizes. Legal coordinates will then extend from ``(0,0)`` to +``(curses.LINES - 1, curses.COLS - 1)``. + +When you call a method to display or erase text, the effect doesn't +immediately show up on the display. Instead you must call the +:meth:`~curses.window.refresh` method of window objects to update the +screen. + +This is because curses was originally written with slow 300-baud +terminal connections in mind; with these terminals, minimizing the +time required to redraw the screen was very important. Instead curses +accumulates changes to the screen and displays them in the most +efficient manner when you call :meth:`!refresh`. For example, if your +program displays some text in a window and then clears the window, +there's no need to send the original text because they're never +visible. + +In practice, explicitly telling curses to redraw a window doesn't +really complicate programming with curses much. Most programs go into a flurry +of activity, and then pause waiting for a keypress or some other action on the +part of the user. All you have to do is to be sure that the screen has been +redrawn before pausing to wait for user input, by first calling +:meth:`!stdscr.refresh` or the :meth:`!refresh` method of some other relevant +window. + +A pad is a special case of a window; it can be larger than the actual display +screen, and only a portion of the pad displayed at a time. Creating a pad +requires the pad's height and width, while refreshing a pad requires giving the +coordinates of the on-screen area where a subsection of the pad will be +displayed. :: + + pad = curses.newpad(100, 100) + # These loops fill the pad with letters; addch() is + # explained in the next section + for y in range(0, 99): + for x in range(0, 99): + pad.addch(y,x, ord('a') + (x*x+y*y) % 26) + + # Displays a section of the pad in the middle of the screen. + # (0,0) : coordinate of upper-left corner of pad area to display. + # (5,5) : coordinate of upper-left corner of window area to be filled + # with pad content. + # (20, 75) : coordinate of lower-right corner of window area to be + # : filled with pad content. + pad.refresh( 0,0, 5,5, 20,75) + +The :meth:`!refresh` call displays a section of the pad in the rectangle +extending from coordinate (5,5) to coordinate (20,75) on the screen; the upper +left corner of the displayed section is coordinate (0,0) on the pad. Beyond +that difference, pads are exactly like ordinary windows and support the same +methods. + +If you have multiple windows and pads on screen there is a more +efficient way to update the screen and prevent annoying screen flicker +as each part of the screen gets updated. :meth:`!refresh` actually +does two things: + +1) Calls the :meth:`~curses.window.noutrefresh` method of each window + to update an underlying data structure representing the desired + state of the screen. +2) Calls the function :func:`~curses.doupdate` function to change the + physical screen to match the desired state recorded in the data structure. + +Instead you can call :meth:`!noutrefresh` on a number of windows to +update the data structure, and then call :func:`!doupdate` to update +the screen. + + +Displaying Text +=============== + +From a C programmer's point of view, curses may sometimes look like a +twisty maze of functions, all subtly different. For example, +:c:func:`!addstr` displays a string at the current cursor location in +the ``stdscr`` window, while :c:func:`!mvaddstr` moves to a given y,x +coordinate first before displaying the string. :c:func:`!waddstr` is just +like :c:func:`!addstr`, but allows specifying a window to use instead of +using ``stdscr`` by default. :c:func:`!mvwaddstr` allows specifying both +a window and a coordinate. + +Fortunately the Python interface hides all these details. ``stdscr`` +is a window object like any other, and methods such as +:meth:`~curses.window.addstr` accept multiple argument forms. Usually there +are four different forms. + ++---------------------------------+-----------------------------------------------+ +| Form | Description | ++=================================+===============================================+ +| *str* or *ch* | Display the string *str* or character *ch* at | +| | the current position | ++---------------------------------+-----------------------------------------------+ +| *str* or *ch*, *attr* | Display the string *str* or character *ch*, | +| | using attribute *attr* at the current | +| | position | ++---------------------------------+-----------------------------------------------+ +| *y*, *x*, *str* or *ch* | Move to position *y,x* within the window, and | +| | display *str* or *ch* | ++---------------------------------+-----------------------------------------------+ +| *y*, *x*, *str* or *ch*, *attr* | Move to position *y,x* within the window, and | +| | display *str* or *ch*, using attribute *attr* | ++---------------------------------+-----------------------------------------------+ + +Attributes allow displaying text in highlighted forms such as boldface, +underline, reverse code, or in color. They'll be explained in more detail in +the next subsection. + + +The :meth:`~curses.window.addstr` method takes a Python string or +bytestring as the value to be displayed. The contents of bytestrings +are sent to the terminal as-is. Strings are encoded to bytes using +the value of the window's :attr:`~window.encoding` attribute; this defaults to +the default system encoding as returned by :func:`locale.getencoding`. + +The :meth:`~curses.window.addch` methods take a character, which can be +either a string of length 1, a bytestring of length 1, or an integer. + +Constants are provided for extension characters; these constants are +integers greater than 255. For example, :const:`ACS_PLMINUS` is a +/- +symbol, and :const:`ACS_ULCORNER` is the upper left corner of a box +(handy for drawing borders). You can also use the appropriate Unicode +character. + +Windows remember where the cursor was left after the last operation, so if you +leave out the *y,x* coordinates, the string or character will be displayed +wherever the last operation left off. You can also move the cursor with the +``move(y,x)`` method. Because some terminals always display a flashing cursor, +you may want to ensure that the cursor is positioned in some location where it +won't be distracting; it can be confusing to have the cursor blinking at some +apparently random location. + +If your application doesn't need a blinking cursor at all, you can +call ``curs_set(False)`` to make it invisible. For compatibility +with older curses versions, there's a ``leaveok(bool)`` function +that's a synonym for :func:`~curses.curs_set`. When *bool* is true, the +curses library will attempt to suppress the flashing cursor, and you +won't need to worry about leaving it in odd locations. + + +Attributes and Color +-------------------- + +Characters can be displayed in different ways. Status lines in a text-based +application are commonly shown in reverse video, or a text viewer may need to +highlight certain words. curses supports this by allowing you to specify an +attribute for each cell on the screen. + +An attribute is an integer, each bit representing a different +attribute. You can try to display text with multiple attribute bits +set, but curses doesn't guarantee that all the possible combinations +are available, or that they're all visually distinct. That depends on +the ability of the terminal being used, so it's safest to stick to the +most commonly available attributes, listed here. + ++----------------------+--------------------------------------+ +| Attribute | Description | ++======================+======================================+ +| :const:`A_BLINK` | Blinking text | ++----------------------+--------------------------------------+ +| :const:`A_BOLD` | Extra bright or bold text | ++----------------------+--------------------------------------+ +| :const:`A_DIM` | Half bright text | ++----------------------+--------------------------------------+ +| :const:`A_REVERSE` | Reverse-video text | ++----------------------+--------------------------------------+ +| :const:`A_STANDOUT` | The best highlighting mode available | ++----------------------+--------------------------------------+ +| :const:`A_UNDERLINE` | Underlined text | ++----------------------+--------------------------------------+ + +So, to display a reverse-video status line on the top line of the screen, you +could code:: + + stdscr.addstr(0, 0, "Current mode: Typing mode", + curses.A_REVERSE) + stdscr.refresh() + +The curses library also supports color on those terminals that provide it. The +most common such terminal is probably the Linux console, followed by color +xterms. + +To use color, you must call the :func:`~curses.start_color` function soon +after calling :func:`~curses.initscr`, to initialize the default color set +(the :func:`curses.wrapper` function does this automatically). Once that's +done, the :func:`~curses.has_colors` function returns TRUE if the terminal +in use can +actually display color. (Note: curses uses the American spelling 'color', +instead of the Canadian/British spelling 'colour'. If you're used to the +British spelling, you'll have to resign yourself to misspelling it for the sake +of these functions.) + +The curses library maintains a finite number of color pairs, containing a +foreground (or text) color and a background color. You can get the attribute +value corresponding to a color pair with the :func:`~curses.color_pair` +function; this can be bitwise-OR'ed with other attributes such as +:const:`A_REVERSE`, but again, such combinations are not guaranteed to work +on all terminals. + +An example, which displays a line of text using color pair 1:: + + stdscr.addstr("Pretty text", curses.color_pair(1)) + stdscr.refresh() + +As I said before, a color pair consists of a foreground and background color. +The ``init_pair(n, f, b)`` function changes the definition of color pair *n*, to +foreground color f and background color b. Color pair 0 is hard-wired to white +on black, and cannot be changed. + +Colors are numbered, and :func:`start_color` initializes 8 basic +colors when it activates color mode. They are: 0:black, 1:red, +2:green, 3:yellow, 4:blue, 5:magenta, 6:cyan, and 7:white. The :mod:`curses` +module defines named constants for each of these colors: +:const:`curses.COLOR_BLACK`, :const:`curses.COLOR_RED`, and so forth. + +Let's put all this together. To change color 1 to red text on a white +background, you would call:: + + curses.init_pair(1, curses.COLOR_RED, curses.COLOR_WHITE) + +When you change a color pair, any text already displayed using that color pair +will change to the new colors. You can also display new text in this color +with:: + + stdscr.addstr(0,0, "RED ALERT!", curses.color_pair(1)) + +Very fancy terminals can change the definitions of the actual colors to a given +RGB value. This lets you change color 1, which is usually red, to purple or +blue or any other color you like. Unfortunately, the Linux console doesn't +support this, so I'm unable to try it out, and can't provide any examples. You +can check if your terminal can do this by calling +:func:`~curses.can_change_color`, which returns ``True`` if the capability is +there. If you're lucky enough to have such a talented terminal, consult your +system's man pages for more information. + + +User Input +========== + +The C curses library offers only very simple input mechanisms. Python's +:mod:`curses` module adds a basic text-input widget. (Other libraries +such as :pypi:`Urwid` have more extensive collections of widgets.) + +There are two methods for getting input from a window: + +* :meth:`~curses.window.getch` refreshes the screen and then waits for + the user to hit a key, displaying the key if :func:`~curses.echo` has been + called earlier. You can optionally specify a coordinate to which + the cursor should be moved before pausing. + +* :meth:`~curses.window.getkey` does the same thing but converts the + integer to a string. Individual characters are returned as + 1-character strings, and special keys such as function keys return + longer strings containing a key name such as ``KEY_UP`` or ``^G``. + +It's possible to not wait for the user using the +:meth:`~curses.window.nodelay` window method. After ``nodelay(True)``, +:meth:`!getch` and :meth:`!getkey` for the window become +non-blocking. To signal that no input is ready, :meth:`!getch` returns +``curses.ERR`` (a value of -1) and :meth:`!getkey` raises an exception. +There's also a :func:`~curses.halfdelay` function, which can be used to (in +effect) set a timer on each :meth:`!getch`; if no input becomes +available within a specified delay (measured in tenths of a second), +curses raises an exception. + +The :meth:`!getch` method returns an integer; if it's between 0 and 255, it +represents the ASCII code of the key pressed. Values greater than 255 are +special keys such as Page Up, Home, or the cursor keys. You can compare the +value returned to constants such as :const:`curses.KEY_PPAGE`, +:const:`curses.KEY_HOME`, or :const:`curses.KEY_LEFT`. The main loop of +your program may look something like this:: + + while True: + c = stdscr.getch() + if c == ord('p'): + PrintDocument() + elif c == ord('q'): + break # Exit the while loop + elif c == curses.KEY_HOME: + x = y = 0 + +The :mod:`curses.ascii` module supplies ASCII class membership functions that +take either integer or 1-character string arguments; these may be useful in +writing more readable tests for such loops. It also supplies +conversion functions that take either integer or 1-character-string arguments +and return the same type. For example, :func:`curses.ascii.ctrl` returns the +control character corresponding to its argument. + +There's also a method to retrieve an entire string, +:meth:`~curses.window.getstr`. It isn't used very often, because its +functionality is quite limited; the only editing keys available are +the backspace key and the Enter key, which terminates the string. It +can optionally be limited to a fixed number of characters. :: + + curses.echo() # Enable echoing of characters + + # Get a 15-character string, with the cursor on the top line + s = stdscr.getstr(0,0, 15) + +The :mod:`curses.textpad` module supplies a text box that supports an +Emacs-like set of keybindings. Various methods of the +:class:`~curses.textpad.Textbox` class support editing with input +validation and gathering the edit results either with or without +trailing spaces. Here's an example:: + + import curses + from curses.textpad import Textbox, rectangle + + def main(stdscr): + stdscr.addstr(0, 0, "Enter IM message: (hit Ctrl-G to send)") + + editwin = curses.newwin(5,30, 2,1) + rectangle(stdscr, 1,0, 1+5+1, 1+30+1) + stdscr.refresh() + + box = Textbox(editwin) + + # Let the user edit until Ctrl-G is struck. + box.edit() + + # Get resulting contents + message = box.gather() + +See the library documentation on :mod:`curses.textpad` for more details. + + +For More Information +==================== + +This HOWTO doesn't cover some advanced topics, such as reading the +contents of the screen or capturing mouse events from an xterm +instance, but the Python library page for the :mod:`curses` module is now +reasonably complete. You should browse it next. + +If you're in doubt about the detailed behavior of the curses +functions, consult the manual pages for your curses implementation, +whether it's ncurses or a proprietary Unix vendor's. The manual pages +will document any quirks, and provide complete lists of all the +functions, attributes, and :ref:`ACS_\* ` characters available to +you. + +Because the curses API is so large, some functions aren't supported in +the Python interface. Often this isn't because they're difficult to +implement, but because no one has needed them yet. Also, Python +doesn't yet support the menu library associated with ncurses. +Patches adding support for these would be welcome; see +`the Python Developer's Guide `_ to +learn more about submitting patches to Python. + +* `Writing Programs with NCURSES `_: + a lengthy tutorial for C programmers. +* `The ncurses man page `_ +* `The ncurses FAQ `_ +* `"Use curses... don't swear" `_: + video of a PyCon 2013 talk on controlling terminals using curses or Urwid. +* `"Console Applications with Urwid" `_: + video of a PyCon CA 2012 talk demonstrating some applications written using + Urwid. diff --git a/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/howto/descriptor.rst b/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/howto/descriptor.rst new file mode 100644 index 00000000..c60cd638 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/howto/descriptor.rst @@ -0,0 +1,1794 @@ +.. _descriptorhowto: + +================ +Descriptor Guide +================ + +:Author: Raymond Hettinger +:Contact: + +.. Contents:: + + +:term:`Descriptors ` let objects customize attribute lookup, +storage, and deletion. + +This guide has four major sections: + +1) The "primer" gives a basic overview, moving gently from simple examples, + adding one feature at a time. Start here if you're new to descriptors. + +2) The second section shows a complete, practical descriptor example. If you + already know the basics, start there. + +3) The third section provides a more technical tutorial that goes into the + detailed mechanics of how descriptors work. Most people don't need this + level of detail. + +4) The last section has pure Python equivalents for built-in descriptors that + are written in C. Read this if you're curious about how functions turn + into bound methods or about the implementation of common tools like + :func:`classmethod`, :func:`staticmethod`, :func:`property`, and + :term:`__slots__`. + + +Primer +^^^^^^ + +In this primer, we start with the most basic possible example and then we'll +add new capabilities one by one. + + +Simple example: A descriptor that returns a constant +---------------------------------------------------- + +The :class:`Ten` class is a descriptor whose :meth:`__get__` method always +returns the constant ``10``: + +.. testcode:: + + class Ten: + def __get__(self, obj, objtype=None): + return 10 + +To use the descriptor, it must be stored as a class variable in another class: + +.. testcode:: + + class A: + x = 5 # Regular class attribute + y = Ten() # Descriptor instance + +An interactive session shows the difference between normal attribute lookup +and descriptor lookup: + +.. doctest:: + + >>> a = A() # Make an instance of class A + >>> a.x # Normal attribute lookup + 5 + >>> a.y # Descriptor lookup + 10 + +In the ``a.x`` attribute lookup, the dot operator finds ``'x': 5`` +in the class dictionary. In the ``a.y`` lookup, the dot operator +finds a descriptor instance, recognized by its ``__get__`` method. +Calling that method returns ``10``. + +Note that the value ``10`` is not stored in either the class dictionary or the +instance dictionary. Instead, the value ``10`` is computed on demand. + +This example shows how a simple descriptor works, but it isn't very useful. +For retrieving constants, normal attribute lookup would be better. + +In the next section, we'll create something more useful, a dynamic lookup. + + +Dynamic lookups +--------------- + +Interesting descriptors typically run computations instead of returning +constants: + +.. testcode:: + + import os + + class DirectorySize: + + def __get__(self, obj, objtype=None): + return len(os.listdir(obj.dirname)) + + class Directory: + + size = DirectorySize() # Descriptor instance + + def __init__(self, dirname): + self.dirname = dirname # Regular instance attribute + +An interactive session shows that the lookup is dynamic — it computes +different, updated answers each time:: + + >>> s = Directory('songs') + >>> g = Directory('games') + >>> s.size # The songs directory has twenty files + 20 + >>> g.size # The games directory has three files + 3 + >>> os.remove('games/chess') # Delete a game + >>> g.size # File count is automatically updated + 2 + +Besides showing how descriptors can run computations, this example also +reveals the purpose of the parameters to :meth:`__get__`. The *self* +parameter is *size*, an instance of *DirectorySize*. The *obj* parameter is +either *g* or *s*, an instance of *Directory*. It is the *obj* parameter that +lets the :meth:`__get__` method learn the target directory. The *objtype* +parameter is the class *Directory*. + + +Managed attributes +------------------ + +A popular use for descriptors is managing access to instance data. The +descriptor is assigned to a public attribute in the class dictionary while the +actual data is stored as a private attribute in the instance dictionary. The +descriptor's :meth:`__get__` and :meth:`__set__` methods are triggered when +the public attribute is accessed. + +In the following example, *age* is the public attribute and *_age* is the +private attribute. When the public attribute is accessed, the descriptor logs +the lookup or update: + +.. testcode:: + + import logging + + logging.basicConfig(level=logging.INFO) + + class LoggedAgeAccess: + + def __get__(self, obj, objtype=None): + value = obj._age + logging.info('Accessing %r giving %r', 'age', value) + return value + + def __set__(self, obj, value): + logging.info('Updating %r to %r', 'age', value) + obj._age = value + + class Person: + + age = LoggedAgeAccess() # Descriptor instance + + def __init__(self, name, age): + self.name = name # Regular instance attribute + self.age = age # Calls __set__() + + def birthday(self): + self.age += 1 # Calls both __get__() and __set__() + + +An interactive session shows that all access to the managed attribute *age* is +logged, but that the regular attribute *name* is not logged: + +.. testcode:: + :hide: + + import logging, sys + logging.basicConfig(level=logging.INFO, stream=sys.stdout, force=True) + +.. doctest:: + + >>> mary = Person('Mary M', 30) # The initial age update is logged + INFO:root:Updating 'age' to 30 + >>> dave = Person('David D', 40) + INFO:root:Updating 'age' to 40 + + >>> vars(mary) # The actual data is in a private attribute + {'name': 'Mary M', '_age': 30} + >>> vars(dave) + {'name': 'David D', '_age': 40} + + >>> mary.age # Access the data and log the lookup + INFO:root:Accessing 'age' giving 30 + 30 + >>> mary.birthday() # Updates are logged as well + INFO:root:Accessing 'age' giving 30 + INFO:root:Updating 'age' to 31 + + >>> dave.name # Regular attribute lookup isn't logged + 'David D' + >>> dave.age # Only the managed attribute is logged + INFO:root:Accessing 'age' giving 40 + 40 + +One major issue with this example is that the private name *_age* is hardwired in +the *LoggedAgeAccess* class. That means that each instance can only have one +logged attribute and that its name is unchangeable. In the next example, +we'll fix that problem. + + +Customized names +---------------- + +When a class uses descriptors, it can inform each descriptor about which +variable name was used. + +In this example, the :class:`Person` class has two descriptor instances, +*name* and *age*. When the :class:`Person` class is defined, it makes a +callback to :meth:`__set_name__` in *LoggedAccess* so that the field names can +be recorded, giving each descriptor its own *public_name* and *private_name*: + +.. testcode:: + + import logging + + logging.basicConfig(level=logging.INFO) + + class LoggedAccess: + + def __set_name__(self, owner, name): + self.public_name = name + self.private_name = '_' + name + + def __get__(self, obj, objtype=None): + value = getattr(obj, self.private_name) + logging.info('Accessing %r giving %r', self.public_name, value) + return value + + def __set__(self, obj, value): + logging.info('Updating %r to %r', self.public_name, value) + setattr(obj, self.private_name, value) + + class Person: + + name = LoggedAccess() # First descriptor instance + age = LoggedAccess() # Second descriptor instance + + def __init__(self, name, age): + self.name = name # Calls the first descriptor + self.age = age # Calls the second descriptor + + def birthday(self): + self.age += 1 + +An interactive session shows that the :class:`Person` class has called +:meth:`__set_name__` so that the field names would be recorded. Here +we call :func:`vars` to look up the descriptor without triggering it: + +.. doctest:: + + >>> vars(vars(Person)['name']) + {'public_name': 'name', 'private_name': '_name'} + >>> vars(vars(Person)['age']) + {'public_name': 'age', 'private_name': '_age'} + +The new class now logs access to both *name* and *age*: + +.. testcode:: + :hide: + + import logging, sys + logging.basicConfig(level=logging.INFO, stream=sys.stdout, force=True) + +.. doctest:: + + >>> pete = Person('Peter P', 10) + INFO:root:Updating 'name' to 'Peter P' + INFO:root:Updating 'age' to 10 + >>> kate = Person('Catherine C', 20) + INFO:root:Updating 'name' to 'Catherine C' + INFO:root:Updating 'age' to 20 + +The two *Person* instances contain only the private names: + +.. doctest:: + + >>> vars(pete) + {'_name': 'Peter P', '_age': 10} + >>> vars(kate) + {'_name': 'Catherine C', '_age': 20} + + +Closing thoughts +---------------- + +A :term:`descriptor` is what we call any object that defines :meth:`__get__`, +:meth:`__set__`, or :meth:`__delete__`. + +Optionally, descriptors can have a :meth:`__set_name__` method. This is only +used in cases where a descriptor needs to know either the class where it was +created or the name of class variable it was assigned to. (This method, if +present, is called even if the class is not a descriptor.) + +Descriptors get invoked by the dot operator during attribute lookup. If a +descriptor is accessed indirectly with ``vars(some_class)[descriptor_name]``, +the descriptor instance is returned without invoking it. + +Descriptors only work when used as class variables. When put in instances, +they have no effect. + +The main motivation for descriptors is to provide a hook allowing objects +stored in class variables to control what happens during attribute lookup. + +Traditionally, the calling class controls what happens during lookup. +Descriptors invert that relationship and allow the data being looked-up to +have a say in the matter. + +Descriptors are used throughout the language. It is how functions turn into +bound methods. Common tools like :func:`classmethod`, :func:`staticmethod`, +:func:`property`, and :func:`functools.cached_property` are all implemented as +descriptors. + + +Complete Practical Example +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +In this example, we create a practical and powerful tool for locating +notoriously hard to find data corruption bugs. + + +Validator class +--------------- + +A validator is a descriptor for managed attribute access. Prior to storing +any data, it verifies that the new value meets various type and range +restrictions. If those restrictions aren't met, it raises an exception to +prevent data corruption at its source. + +This :class:`Validator` class is both an :term:`abstract base class` and a +managed attribute descriptor: + +.. testcode:: + + from abc import ABC, abstractmethod + + class Validator(ABC): + + def __set_name__(self, owner, name): + self.private_name = '_' + name + + def __get__(self, obj, objtype=None): + return getattr(obj, self.private_name) + + def __set__(self, obj, value): + self.validate(value) + setattr(obj, self.private_name, value) + + @abstractmethod + def validate(self, value): + pass + +Custom validators need to inherit from :class:`Validator` and must supply a +:meth:`validate` method to test various restrictions as needed. + + +Custom validators +----------------- + +Here are three practical data validation utilities: + +1) :class:`OneOf` verifies that a value is one of a restricted set of options. + +2) :class:`Number` verifies that a value is either an :class:`int` or + :class:`float`. Optionally, it verifies that a value is between a given + minimum or maximum. + +3) :class:`String` verifies that a value is a :class:`str`. Optionally, it + validates a given minimum or maximum length. It can validate a + user-defined `predicate + `_ as well. + +.. testcode:: + + class OneOf(Validator): + + def __init__(self, *options): + self.options = set(options) + + def validate(self, value): + if value not in self.options: + raise ValueError( + f'Expected {value!r} to be one of {self.options!r}' + ) + + class Number(Validator): + + def __init__(self, minvalue=None, maxvalue=None): + self.minvalue = minvalue + self.maxvalue = maxvalue + + def validate(self, value): + if not isinstance(value, (int, float)): + raise TypeError(f'Expected {value!r} to be an int or float') + if self.minvalue is not None and value < self.minvalue: + raise ValueError( + f'Expected {value!r} to be at least {self.minvalue!r}' + ) + if self.maxvalue is not None and value > self.maxvalue: + raise ValueError( + f'Expected {value!r} to be no more than {self.maxvalue!r}' + ) + + class String(Validator): + + def __init__(self, minsize=None, maxsize=None, predicate=None): + self.minsize = minsize + self.maxsize = maxsize + self.predicate = predicate + + def validate(self, value): + if not isinstance(value, str): + raise TypeError(f'Expected {value!r} to be an str') + if self.minsize is not None and len(value) < self.minsize: + raise ValueError( + f'Expected {value!r} to be no smaller than {self.minsize!r}' + ) + if self.maxsize is not None and len(value) > self.maxsize: + raise ValueError( + f'Expected {value!r} to be no bigger than {self.maxsize!r}' + ) + if self.predicate is not None and not self.predicate(value): + raise ValueError( + f'Expected {self.predicate} to be true for {value!r}' + ) + + +Practical application +--------------------- + +Here's how the data validators can be used in a real class: + +.. testcode:: + + class Component: + + name = String(minsize=3, maxsize=10, predicate=str.isupper) + kind = OneOf('wood', 'metal', 'plastic') + quantity = Number(minvalue=0) + + def __init__(self, name, kind, quantity): + self.name = name + self.kind = kind + self.quantity = quantity + +The descriptors prevent invalid instances from being created: + +.. doctest:: + + >>> Component('Widget', 'metal', 5) # Blocked: 'Widget' is not all uppercase + Traceback (most recent call last): + ... + ValueError: Expected to be true for 'Widget' + + >>> Component('WIDGET', 'metle', 5) # Blocked: 'metle' is misspelled + Traceback (most recent call last): + ... + ValueError: Expected 'metle' to be one of {'metal', 'plastic', 'wood'} + + >>> Component('WIDGET', 'metal', -5) # Blocked: -5 is negative + Traceback (most recent call last): + ... + ValueError: Expected -5 to be at least 0 + + >>> Component('WIDGET', 'metal', 'V') # Blocked: 'V' isn't a number + Traceback (most recent call last): + ... + TypeError: Expected 'V' to be an int or float + + >>> c = Component('WIDGET', 'metal', 5) # Allowed: The inputs are valid + + +Technical Tutorial +^^^^^^^^^^^^^^^^^^ + +What follows is a more technical tutorial for the mechanics and details of how +descriptors work. + + +Abstract +-------- + +Defines descriptors, summarizes the protocol, and shows how descriptors are +called. Provides an example showing how object relational mappings work. + +Learning about descriptors not only provides access to a larger toolset, it +creates a deeper understanding of how Python works. + + +Definition and introduction +--------------------------- + +In general, a descriptor is an attribute value that has one of the methods in +the descriptor protocol. Those methods are :meth:`__get__`, :meth:`__set__`, +and :meth:`__delete__`. If any of those methods are defined for an +attribute, it is said to be a :term:`descriptor`. + +The default behavior for attribute access is to get, set, or delete the +attribute from an object's dictionary. For instance, ``a.x`` has a lookup chain +starting with ``a.__dict__['x']``, then ``type(a).__dict__['x']``, and +continuing through the method resolution order of ``type(a)``. If the +looked-up value is an object defining one of the descriptor methods, then Python +may override the default behavior and invoke the descriptor method instead. +Where this occurs in the precedence chain depends on which descriptor methods +were defined. + +Descriptors are a powerful, general purpose protocol. They are the mechanism +behind properties, methods, static methods, class methods, and +:func:`super`. They are used throughout Python itself. Descriptors +simplify the underlying C code and offer a flexible set of new tools for +everyday Python programs. + + +Descriptor protocol +------------------- + +``descr.__get__(self, obj, type=None)`` + +``descr.__set__(self, obj, value)`` + +``descr.__delete__(self, obj)`` + +That is all there is to it. Define any of these methods and an object is +considered a descriptor and can override default behavior upon being looked up +as an attribute. + +If an object defines :meth:`__set__` or :meth:`__delete__`, it is considered +a data descriptor. Descriptors that only define :meth:`__get__` are called +non-data descriptors (they are often used for methods but other uses are +possible). + +Data and non-data descriptors differ in how overrides are calculated with +respect to entries in an instance's dictionary. If an instance's dictionary +has an entry with the same name as a data descriptor, the data descriptor +takes precedence. If an instance's dictionary has an entry with the same +name as a non-data descriptor, the dictionary entry takes precedence. + +To make a read-only data descriptor, define both :meth:`__get__` and +:meth:`__set__` with the :meth:`__set__` raising an :exc:`AttributeError` when +called. Defining the :meth:`__set__` method with an exception raising +placeholder is enough to make it a data descriptor. + + +Overview of descriptor invocation +--------------------------------- + +A descriptor can be called directly with ``desc.__get__(obj)`` or +``desc.__get__(None, cls)``. + +But it is more common for a descriptor to be invoked automatically from +attribute access. + +The expression ``obj.x`` looks up the attribute ``x`` in the chain of +namespaces for ``obj``. If the search finds a descriptor outside of the +instance :attr:`~object.__dict__`, its :meth:`~object.__get__` method is +invoked according to the precedence rules listed below. + +The details of invocation depend on whether ``obj`` is an object, class, or +instance of super. + + +Invocation from an instance +--------------------------- + +Instance lookup scans through a chain of namespaces giving data descriptors +the highest priority, followed by instance variables, then non-data +descriptors, then class variables, and lastly :meth:`__getattr__` if it is +provided. + +If a descriptor is found for ``a.x``, then it is invoked with: +``desc.__get__(a, type(a))``. + +The logic for a dotted lookup is in :meth:`object.__getattribute__`. Here is +a pure Python equivalent: + +.. testcode:: + + def find_name_in_mro(cls, name, default): + "Emulate _PyType_Lookup() in Objects/typeobject.c" + for base in cls.__mro__: + if name in vars(base): + return vars(base)[name] + return default + + def object_getattribute(obj, name): + "Emulate PyObject_GenericGetAttr() in Objects/object.c" + null = object() + objtype = type(obj) + cls_var = find_name_in_mro(objtype, name, null) + descr_get = getattr(type(cls_var), '__get__', null) + if descr_get is not null: + if (hasattr(type(cls_var), '__set__') + or hasattr(type(cls_var), '__delete__')): + return descr_get(cls_var, obj, objtype) # data descriptor + if hasattr(obj, '__dict__') and name in vars(obj): + return vars(obj)[name] # instance variable + if descr_get is not null: + return descr_get(cls_var, obj, objtype) # non-data descriptor + if cls_var is not null: + return cls_var # class variable + raise AttributeError(name) + + +.. testcode:: + :hide: + + # Test the fidelity of object_getattribute() by comparing it with the + # normal object.__getattribute__(). The former will be accessed by + # square brackets and the latter by the dot operator. + + class Object: + + def __getitem__(obj, name): + try: + return object_getattribute(obj, name) + except AttributeError: + if not hasattr(type(obj), '__getattr__'): + raise + return type(obj).__getattr__(obj, name) # __getattr__ + + class DualOperator(Object): + + x = 10 + + def __init__(self, z): + self.z = z + + @property + def p2(self): + return 2 * self.x + + @property + def p3(self): + return 3 * self.x + + def m5(self, y): + return 5 * y + + def m7(self, y): + return 7 * y + + def __getattr__(self, name): + return ('getattr_hook', self, name) + + class DualOperatorWithSlots: + + __getitem__ = Object.__getitem__ + + __slots__ = ['z'] + + x = 15 + + def __init__(self, z): + self.z = z + + @property + def p2(self): + return 2 * self.x + + def m5(self, y): + return 5 * y + + def __getattr__(self, name): + return ('getattr_hook', self, name) + + class D1: + def __get__(self, obj, objtype=None): + return type(self), obj, objtype + + class U1: + x = D1() + + class U2(U1): + pass + +.. doctest:: + :hide: + + >>> a = DualOperator(11) + >>> vars(a).update(p3 = '_p3', m7 = '_m7') + >>> a.x == a['x'] == 10 + True + >>> a.z == a['z'] == 11 + True + >>> a.p2 == a['p2'] == 20 + True + >>> a.p3 == a['p3'] == 30 + True + >>> a.m5(100) == a.m5(100) == 500 + True + >>> a.m7 == a['m7'] == '_m7' + True + >>> a.g == a['g'] == ('getattr_hook', a, 'g') + True + + >>> b = DualOperatorWithSlots(22) + >>> b.x == b['x'] == 15 + True + >>> b.z == b['z'] == 22 + True + >>> b.p2 == b['p2'] == 30 + True + >>> b.m5(200) == b['m5'](200) == 1000 + True + >>> b.g == b['g'] == ('getattr_hook', b, 'g') + True + + >>> u2 = U2() + >>> object_getattribute(u2, 'x') == u2.x == (D1, u2, U2) + True + +Note, there is no :meth:`__getattr__` hook in the :meth:`__getattribute__` +code. That is why calling :meth:`__getattribute__` directly or with +``super().__getattribute__`` will bypass :meth:`__getattr__` entirely. + +Instead, it is the dot operator and the :func:`getattr` function that are +responsible for invoking :meth:`__getattr__` whenever :meth:`__getattribute__` +raises an :exc:`AttributeError`. Their logic is encapsulated in a helper +function: + +.. testcode:: + + def getattr_hook(obj, name): + "Emulate slot_tp_getattr_hook() in Objects/typeobject.c" + try: + return obj.__getattribute__(name) + except AttributeError: + if not hasattr(type(obj), '__getattr__'): + raise + return type(obj).__getattr__(obj, name) # __getattr__ + +.. doctest:: + :hide: + + + >>> class ClassWithGetAttr: + ... x = 123 + ... def __getattr__(self, attr): + ... return attr.upper() + ... + >>> cw = ClassWithGetAttr() + >>> cw.y = 456 + >>> getattr_hook(cw, 'x') + 123 + >>> getattr_hook(cw, 'y') + 456 + >>> getattr_hook(cw, 'z') + 'Z' + + >>> class ClassWithoutGetAttr: + ... x = 123 + ... + >>> cwo = ClassWithoutGetAttr() + >>> cwo.y = 456 + >>> getattr_hook(cwo, 'x') + 123 + >>> getattr_hook(cwo, 'y') + 456 + >>> getattr_hook(cwo, 'z') + Traceback (most recent call last): + ... + AttributeError: 'ClassWithoutGetAttr' object has no attribute 'z' + + +Invocation from a class +----------------------- + +The logic for a dotted lookup such as ``A.x`` is in +:meth:`type.__getattribute__`. The steps are similar to those for +:meth:`object.__getattribute__` but the instance dictionary lookup is replaced +by a search through the class's :term:`method resolution order`. + +If a descriptor is found, it is invoked with ``desc.__get__(None, A)``. + +The full C implementation can be found in :c:func:`!type_getattro` and +:c:func:`!_PyType_Lookup` in :source:`Objects/typeobject.c`. + + +Invocation from super +--------------------- + +The logic for super's dotted lookup is in the :meth:`__getattribute__` method for +object returned by :func:`super`. + +A dotted lookup such as ``super(A, obj).m`` searches ``obj.__class__.__mro__`` +for the base class ``B`` immediately following ``A`` and then returns +``B.__dict__['m'].__get__(obj, A)``. If not a descriptor, ``m`` is returned +unchanged. + +The full C implementation can be found in :c:func:`!super_getattro` in +:source:`Objects/typeobject.c`. A pure Python equivalent can be found in +`Guido's Tutorial +`_. + + +Summary of invocation logic +--------------------------- + +The mechanism for descriptors is embedded in the :meth:`__getattribute__` +methods for :class:`object`, :class:`type`, and :func:`super`. + +The important points to remember are: + +* Descriptors are invoked by the :meth:`__getattribute__` method. + +* Classes inherit this machinery from :class:`object`, :class:`type`, or + :func:`super`. + +* Overriding :meth:`__getattribute__` prevents automatic descriptor calls + because all the descriptor logic is in that method. + +* :meth:`object.__getattribute__` and :meth:`type.__getattribute__` make + different calls to :meth:`__get__`. The first includes the instance and may + include the class. The second puts in ``None`` for the instance and always + includes the class. + +* Data descriptors always override instance dictionaries. + +* Non-data descriptors may be overridden by instance dictionaries. + + +Automatic name notification +--------------------------- + +Sometimes it is desirable for a descriptor to know what class variable name it +was assigned to. When a new class is created, the :class:`type` metaclass +scans the dictionary of the new class. If any of the entries are descriptors +and if they define :meth:`__set_name__`, that method is called with two +arguments. The *owner* is the class where the descriptor is used, and the +*name* is the class variable the descriptor was assigned to. + +The implementation details are in :c:func:`!type_new` and +:c:func:`!set_names` in :source:`Objects/typeobject.c`. + +Since the update logic is in :meth:`type.__new__`, notifications only take +place at the time of class creation. If descriptors are added to the class +afterwards, :meth:`__set_name__` will need to be called manually. + + +ORM example +----------- + +The following code is a simplified skeleton showing how data descriptors could +be used to implement an `object relational mapping +`_. + +The essential idea is that the data is stored in an external database. The +Python instances only hold keys to the database's tables. Descriptors take +care of lookups or updates: + +.. testcode:: + + class Field: + + def __set_name__(self, owner, name): + self.fetch = f'SELECT {name} FROM {owner.table} WHERE {owner.key}=?;' + self.store = f'UPDATE {owner.table} SET {name}=? WHERE {owner.key}=?;' + + def __get__(self, obj, objtype=None): + return conn.execute(self.fetch, [obj.key]).fetchone()[0] + + def __set__(self, obj, value): + conn.execute(self.store, [value, obj.key]) + conn.commit() + +We can use the :class:`Field` class to define `models +`_ that describe the schema for +each table in a database: + +.. testcode:: + + class Movie: + table = 'Movies' # Table name + key = 'title' # Primary key + director = Field() + year = Field() + + def __init__(self, key): + self.key = key + + class Song: + table = 'Music' + key = 'title' + artist = Field() + year = Field() + genre = Field() + + def __init__(self, key): + self.key = key + +To use the models, first connect to the database:: + + >>> import sqlite3 + >>> conn = sqlite3.connect('entertainment.db') + +An interactive session shows how data is retrieved from the database and how +it can be updated: + +.. testsetup:: + + song_data = [ + ('Country Roads', 'John Denver', 1972), + ('Me and Bobby McGee', 'Janice Joplin', 1971), + ('Coal Miners Daughter', 'Loretta Lynn', 1970), + ] + + movie_data = [ + ('Star Wars', 'George Lucas', 1977), + ('Jaws', 'Steven Spielberg', 1975), + ('Aliens', 'James Cameron', 1986), + ] + + import sqlite3 + + conn = sqlite3.connect(':memory:') + conn.execute('CREATE TABLE Music (title text, artist text, year integer);') + conn.execute('CREATE INDEX MusicNdx ON Music (title);') + conn.executemany('INSERT INTO Music VALUES (?, ?, ?);', song_data) + conn.execute('CREATE TABLE Movies (title text, director text, year integer);') + conn.execute('CREATE INDEX MovieNdx ON Music (title);') + conn.executemany('INSERT INTO Movies VALUES (?, ?, ?);', movie_data) + conn.commit() + +.. doctest:: + + >>> Movie('Star Wars').director + 'George Lucas' + >>> jaws = Movie('Jaws') + >>> f'Released in {jaws.year} by {jaws.director}' + 'Released in 1975 by Steven Spielberg' + + >>> Song('Country Roads').artist + 'John Denver' + + >>> Movie('Star Wars').director = 'J.J. Abrams' + >>> Movie('Star Wars').director + 'J.J. Abrams' + +.. testcleanup:: + + conn.close() + + +Pure Python Equivalents +^^^^^^^^^^^^^^^^^^^^^^^ + +The descriptor protocol is simple and offers exciting possibilities. Several +use cases are so common that they have been prepackaged into built-in tools. +Properties, bound methods, static methods, class methods, and \_\_slots\_\_ are +all based on the descriptor protocol. + + +Properties +---------- + +Calling :func:`property` is a succinct way of building a data descriptor that +triggers a function call upon access to an attribute. Its signature is:: + + property(fget=None, fset=None, fdel=None, doc=None) -> property + +The documentation shows a typical use to define a managed attribute ``x``: + +.. testcode:: + + class C: + def getx(self): return self.__x + def setx(self, value): self.__x = value + def delx(self): del self.__x + x = property(getx, setx, delx, "I'm the 'x' property.") + +.. doctest:: + :hide: + + >>> C.x.__doc__ + "I'm the 'x' property." + >>> c.x = 2.71828 + >>> c.x + 2.71828 + >>> del c.x + >>> c.x + Traceback (most recent call last): + ... + AttributeError: 'C' object has no attribute '_C__x' + +To see how :func:`property` is implemented in terms of the descriptor protocol, +here is a pure Python equivalent that implements most of the core functionality: + +.. testcode:: + + class Property: + "Emulate PyProperty_Type() in Objects/descrobject.c" + + def __init__(self, fget=None, fset=None, fdel=None, doc=None): + self.fget = fget + self.fset = fset + self.fdel = fdel + if doc is None and fget is not None: + doc = fget.__doc__ + self.__doc__ = doc + + def __set_name__(self, owner, name): + self.__name__ = name + + def __get__(self, obj, objtype=None): + if obj is None: + return self + if self.fget is None: + raise AttributeError + return self.fget(obj) + + def __set__(self, obj, value): + if self.fset is None: + raise AttributeError + self.fset(obj, value) + + def __delete__(self, obj): + if self.fdel is None: + raise AttributeError + self.fdel(obj) + + def getter(self, fget): + return type(self)(fget, self.fset, self.fdel, self.__doc__) + + def setter(self, fset): + return type(self)(self.fget, fset, self.fdel, self.__doc__) + + def deleter(self, fdel): + return type(self)(self.fget, self.fset, fdel, self.__doc__) + +.. testcode:: + :hide: + + # Verify the Property() emulation + + class CC: + def getx(self): + return self.__x + def setx(self, value): + self.__x = value + def delx(self): + del self.__x + x = Property(getx, setx, delx, "I'm the 'x' property.") + no_getter = Property(None, setx, delx, "I'm the 'x' property.") + no_setter = Property(getx, None, delx, "I'm the 'x' property.") + no_deleter = Property(getx, setx, None, "I'm the 'x' property.") + no_doc = Property(getx, setx, delx, None) + + + # Now do it again but use the decorator style + + class CCC: + @Property + def x(self): + return self.__x + @x.setter + def x(self, value): + self.__x = value + @x.deleter + def x(self): + del self.__x + + +.. doctest:: + :hide: + + >>> cc = CC() + >>> hasattr(cc, 'x') + False + >>> cc.x = 33 + >>> cc.x + 33 + >>> del cc.x + >>> hasattr(cc, 'x') + False + + >>> ccc = CCC() + >>> hasattr(ccc, 'x') + False + >>> ccc.x = 333 + >>> ccc.x == 333 + True + >>> del ccc.x + >>> hasattr(ccc, 'x') + False + + >>> cc = CC() + >>> cc.x = 33 + >>> try: + ... cc.no_getter + ... except AttributeError as e: + ... type(e).__name__ + ... + 'AttributeError' + + >>> try: + ... cc.no_setter = 33 + ... except AttributeError as e: + ... type(e).__name__ + ... + 'AttributeError' + + >>> try: + ... del cc.no_deleter + ... except AttributeError as e: + ... type(e).__name__ + ... + 'AttributeError' + + >>> CC.no_doc.__doc__ is None + True + +The :func:`property` builtin helps whenever a user interface has granted +attribute access and then subsequent changes require the intervention of a +method. + +For instance, a spreadsheet class may grant access to a cell value through +``Cell('b10').value``. Subsequent improvements to the program require the cell +to be recalculated on every access; however, the programmer does not want to +affect existing client code accessing the attribute directly. The solution is +to wrap access to the value attribute in a property data descriptor: + +.. testcode:: + + class Cell: + ... + + @property + def value(self): + "Recalculate the cell before returning value" + self.recalc() + return self._value + +Either the built-in :func:`property` or our :func:`Property` equivalent would +work in this example. + + +Functions and methods +--------------------- + +Python's object oriented features are built upon a function based environment. +Using non-data descriptors, the two are merged seamlessly. + +Functions stored in class dictionaries get turned into methods when invoked. +Methods only differ from regular functions in that the object instance is +prepended to the other arguments. By convention, the instance is called +*self* but could be called *this* or any other variable name. + +Methods can be created manually with :class:`types.MethodType` which is +roughly equivalent to: + +.. testcode:: + + class MethodType: + "Emulate PyMethod_Type in Objects/classobject.c" + + def __init__(self, func, obj): + self.__func__ = func + self.__self__ = obj + + def __call__(self, *args, **kwargs): + func = self.__func__ + obj = self.__self__ + return func(obj, *args, **kwargs) + + def __getattribute__(self, name): + "Emulate method_getset() in Objects/classobject.c" + if name == '__doc__': + return self.__func__.__doc__ + return object.__getattribute__(self, name) + + def __getattr__(self, name): + "Emulate method_getattro() in Objects/classobject.c" + return getattr(self.__func__, name) + + def __get__(self, obj, objtype=None): + "Emulate method_descr_get() in Objects/classobject.c" + return self + +To support automatic creation of methods, functions include the +:meth:`__get__` method for binding methods during attribute access. This +means that functions are non-data descriptors that return bound methods +during dotted lookup from an instance. Here's how it works: + +.. testcode:: + + class Function: + ... + + def __get__(self, obj, objtype=None): + "Simulate func_descr_get() in Objects/funcobject.c" + if obj is None: + return self + return MethodType(self, obj) + +Running the following class in the interpreter shows how the function +descriptor works in practice: + +.. testcode:: + + class D: + def f(self): + return self + + class D2: + pass + +.. doctest:: + :hide: + + >>> d = D() + >>> d2 = D2() + >>> d2.f = d.f.__get__(d2, D2) + >>> d2.f() is d + True + +The function has a :term:`qualified name` attribute to support introspection: + +.. doctest:: + + >>> D.f.__qualname__ + 'D.f' + +Accessing the function through the class dictionary does not invoke +:meth:`__get__`. Instead, it just returns the underlying function object:: + + >>> D.__dict__['f'] + + +Dotted access from a class calls :meth:`__get__` which just returns the +underlying function unchanged:: + + >>> D.f + + +The interesting behavior occurs during dotted access from an instance. The +dotted lookup calls :meth:`__get__` which returns a bound method object:: + + >>> d = D() + >>> d.f + > + +Internally, the bound method stores the underlying function and the bound +instance:: + + >>> d.f.__func__ + + + >>> d.f.__self__ + <__main__.D object at 0x00B18C90> + +If you have ever wondered where *self* comes from in regular methods or where +*cls* comes from in class methods, this is it! + + +Kinds of methods +---------------- + +Non-data descriptors provide a simple mechanism for variations on the usual +patterns of binding functions into methods. + +To recap, functions have a :meth:`__get__` method so that they can be converted +to a method when accessed as attributes. The non-data descriptor transforms an +``obj.f(*args)`` call into ``f(obj, *args)``. Calling ``cls.f(*args)`` +becomes ``f(*args)``. + +This chart summarizes the binding and its two most useful variants: + + +-----------------+----------------------+------------------+ + | Transformation | Called from an | Called from a | + | | object | class | + +=================+======================+==================+ + | function | f(obj, \*args) | f(\*args) | + +-----------------+----------------------+------------------+ + | staticmethod | f(\*args) | f(\*args) | + +-----------------+----------------------+------------------+ + | classmethod | f(type(obj), \*args) | f(cls, \*args) | + +-----------------+----------------------+------------------+ + + +Static methods +-------------- + +Static methods return the underlying function without changes. Calling either +``c.f`` or ``C.f`` is the equivalent of a direct lookup into +``object.__getattribute__(c, "f")`` or ``object.__getattribute__(C, "f")``. As a +result, the function becomes identically accessible from either an object or a +class. + +Good candidates for static methods are methods that do not reference the +``self`` variable. + +For instance, a statistics package may include a container class for +experimental data. The class provides normal methods for computing the average, +mean, median, and other descriptive statistics that depend on the data. However, +there may be useful functions which are conceptually related but do not depend +on the data. For instance, ``erf(x)`` is handy conversion routine that comes up +in statistical work but does not directly depend on a particular dataset. +It can be called either from an object or the class: ``s.erf(1.5) --> 0.9332`` +or ``Sample.erf(1.5) --> 0.9332``. + +Since static methods return the underlying function with no changes, the +example calls are unexciting: + +.. testcode:: + + class E: + @staticmethod + def f(x): + return x * 10 + +.. doctest:: + + >>> E.f(3) + 30 + >>> E().f(3) + 30 + +Using the non-data descriptor protocol, a pure Python version of +:func:`staticmethod` would look like this: + +.. testcode:: + + import functools + + class StaticMethod: + "Emulate PyStaticMethod_Type() in Objects/funcobject.c" + + def __init__(self, f): + self.f = f + functools.update_wrapper(self, f) + + def __get__(self, obj, objtype=None): + return self.f + + def __call__(self, *args, **kwds): + return self.f(*args, **kwds) + +The :func:`functools.update_wrapper` call adds a ``__wrapped__`` attribute +that refers to the underlying function. Also it carries forward +the attributes necessary to make the wrapper look like the wrapped +function: :attr:`~function.__name__`, :attr:`~function.__qualname__`, +:attr:`~function.__doc__`, and :attr:`~function.__annotations__`. + +.. testcode:: + :hide: + + class E_sim: + @StaticMethod + def f(x: int) -> str: + "Simple function example" + return "!" * x + + wrapped_ord = StaticMethod(ord) + +.. doctest:: + :hide: + + >>> E_sim.f(3) + '!!!' + >>> E_sim().f(3) + '!!!' + + >>> sm = vars(E_sim)['f'] + >>> type(sm).__name__ + 'StaticMethod' + >>> f = E_sim.f + >>> type(f).__name__ + 'function' + >>> sm.__name__ + 'f' + >>> f.__name__ + 'f' + >>> sm.__qualname__ + 'E_sim.f' + >>> f.__qualname__ + 'E_sim.f' + >>> sm.__doc__ + 'Simple function example' + >>> f.__doc__ + 'Simple function example' + >>> sm.__annotations__ + {'x': , 'return': } + >>> f.__annotations__ + {'x': , 'return': } + >>> sm.__module__ == f.__module__ + True + >>> sm(3) + '!!!' + >>> f(3) + '!!!' + + >>> wrapped_ord('A') + 65 + >>> wrapped_ord.__module__ == ord.__module__ + True + >>> wrapped_ord.__wrapped__ == ord + True + >>> wrapped_ord.__name__ == ord.__name__ + True + >>> wrapped_ord.__qualname__ == ord.__qualname__ + True + >>> wrapped_ord.__doc__ == ord.__doc__ + True + + +Class methods +------------- + +Unlike static methods, class methods prepend the class reference to the +argument list before calling the function. This format is the same +for whether the caller is an object or a class: + +.. testcode:: + + class F: + @classmethod + def f(cls, x): + return cls.__name__, x + +.. doctest:: + + >>> F.f(3) + ('F', 3) + >>> F().f(3) + ('F', 3) + +This behavior is useful whenever the method only needs to have a class +reference and does not rely on data stored in a specific instance. One use for +class methods is to create alternate class constructors. For example, the +classmethod :func:`dict.fromkeys` creates a new dictionary from a list of +keys. The pure Python equivalent is: + +.. testcode:: + + class Dict(dict): + @classmethod + def fromkeys(cls, iterable, value=None): + "Emulate dict_fromkeys() in Objects/dictobject.c" + d = cls() + for key in iterable: + d[key] = value + return d + +Now a new dictionary of unique keys can be constructed like this: + +.. doctest:: + + >>> d = Dict.fromkeys('abracadabra') + >>> type(d) is Dict + True + >>> d + {'a': None, 'b': None, 'r': None, 'c': None, 'd': None} + +Using the non-data descriptor protocol, a pure Python version of +:func:`classmethod` would look like this: + +.. testcode:: + + import functools + + class ClassMethod: + "Emulate PyClassMethod_Type() in Objects/funcobject.c" + + def __init__(self, f): + self.f = f + functools.update_wrapper(self, f) + + def __get__(self, obj, cls=None): + if cls is None: + cls = type(obj) + return MethodType(self.f, cls) + +.. testcode:: + :hide: + + # Verify the emulation works + class T: + @ClassMethod + def cm(cls, x: int, y: str) -> tuple[str, int, str]: + "Class method that returns a tuple" + return (cls.__name__, x, y) + + +.. doctest:: + :hide: + + >>> T.cm(11, 22) + ('T', 11, 22) + + # Also call it from an instance + >>> t = T() + >>> t.cm(11, 22) + ('T', 11, 22) + + # Verify that T uses our emulation + >>> type(vars(T)['cm']).__name__ + 'ClassMethod' + + # Verify that update_wrapper() correctly copied attributes + >>> T.cm.__name__ + 'cm' + >>> T.cm.__qualname__ + 'T.cm' + >>> T.cm.__doc__ + 'Class method that returns a tuple' + >>> T.cm.__annotations__ + {'x': , 'y': , 'return': tuple[str, int, str]} + + # Verify that __wrapped__ was added and works correctly + >>> f = vars(T)['cm'].__wrapped__ + >>> type(f).__name__ + 'function' + >>> f.__name__ + 'cm' + >>> f(T, 11, 22) + ('T', 11, 22) + + +The :func:`functools.update_wrapper` call in ``ClassMethod`` adds a +``__wrapped__`` attribute that refers to the underlying function. Also +it carries forward the attributes necessary to make the wrapper look +like the wrapped function: :attr:`~function.__name__`, +:attr:`~function.__qualname__`, :attr:`~function.__doc__`, +and :attr:`~function.__annotations__`. + + +Member objects and __slots__ +---------------------------- + +When a class defines ``__slots__``, it replaces instance dictionaries with a +fixed-length array of slot values. From a user point of view that has +several effects: + +1. Provides immediate detection of bugs due to misspelled attribute +assignments. Only attribute names specified in ``__slots__`` are allowed: + +.. testcode:: + + class Vehicle: + __slots__ = ('id_number', 'make', 'model') + +.. doctest:: + + >>> auto = Vehicle() + >>> auto.id_nubmer = 'VYE483814LQEX' + Traceback (most recent call last): + ... + AttributeError: 'Vehicle' object has no attribute 'id_nubmer' + +2. Helps create immutable objects where descriptors manage access to private +attributes stored in ``__slots__``: + +.. testcode:: + + class Immutable: + + __slots__ = ('_dept', '_name') # Replace the instance dictionary + + def __init__(self, dept, name): + self._dept = dept # Store to private attribute + self._name = name # Store to private attribute + + @property # Read-only descriptor + def dept(self): + return self._dept + + @property + def name(self): # Read-only descriptor + return self._name + +.. doctest:: + + >>> mark = Immutable('Botany', 'Mark Watney') + >>> mark.dept + 'Botany' + >>> mark.dept = 'Space Pirate' + Traceback (most recent call last): + ... + AttributeError: property 'dept' of 'Immutable' object has no setter + >>> mark.location = 'Mars' + Traceback (most recent call last): + ... + AttributeError: 'Immutable' object has no attribute 'location' + +3. Saves memory. On a 64-bit Linux build, an instance with two attributes +takes 48 bytes with ``__slots__`` and 152 bytes without. This `flyweight +design pattern `_ likely only +matters when a large number of instances are going to be created. + +4. Improves speed. Reading instance variables is 35% faster with +``__slots__`` (as measured with Python 3.10 on an Apple M1 processor). + +5. Blocks tools like :func:`functools.cached_property` which require an +instance dictionary to function correctly: + +.. testcode:: + + from functools import cached_property + + class CP: + __slots__ = () # Eliminates the instance dict + + @cached_property # Requires an instance dict + def pi(self): + return 4 * sum((-1.0)**n / (2.0*n + 1.0) + for n in reversed(range(100_000))) + +.. doctest:: + + >>> CP().pi + Traceback (most recent call last): + ... + TypeError: No '__dict__' attribute on 'CP' instance to cache 'pi' property. + +It is not possible to create an exact drop-in pure Python version of +``__slots__`` because it requires direct access to C structures and control +over object memory allocation. However, we can build a mostly faithful +simulation where the actual C structure for slots is emulated by a private +``_slotvalues`` list. Reads and writes to that private structure are managed +by member descriptors: + +.. testcode:: + + null = object() + + class Member: + + def __init__(self, name, clsname, offset): + 'Emulate PyMemberDef in Include/structmember.h' + # Also see descr_new() in Objects/descrobject.c + self.name = name + self.clsname = clsname + self.offset = offset + + def __get__(self, obj, objtype=None): + 'Emulate member_get() in Objects/descrobject.c' + # Also see PyMember_GetOne() in Python/structmember.c + if obj is None: + return self + value = obj._slotvalues[self.offset] + if value is null: + raise AttributeError(self.name) + return value + + def __set__(self, obj, value): + 'Emulate member_set() in Objects/descrobject.c' + obj._slotvalues[self.offset] = value + + def __delete__(self, obj): + 'Emulate member_delete() in Objects/descrobject.c' + value = obj._slotvalues[self.offset] + if value is null: + raise AttributeError(self.name) + obj._slotvalues[self.offset] = null + + def __repr__(self): + 'Emulate member_repr() in Objects/descrobject.c' + return f'' + +The :meth:`type.__new__` method takes care of adding member objects to class +variables: + +.. testcode:: + + class Type(type): + 'Simulate how the type metaclass adds member objects for slots' + + def __new__(mcls, clsname, bases, mapping, **kwargs): + 'Emulate type_new() in Objects/typeobject.c' + # type_new() calls PyTypeReady() which calls add_methods() + slot_names = mapping.get('slot_names', []) + for offset, name in enumerate(slot_names): + mapping[name] = Member(name, clsname, offset) + return type.__new__(mcls, clsname, bases, mapping, **kwargs) + +The :meth:`object.__new__` method takes care of creating instances that have +slots instead of an instance dictionary. Here is a rough simulation in pure +Python: + +.. testcode:: + + class Object: + 'Simulate how object.__new__() allocates memory for __slots__' + + def __new__(cls, *args, **kwargs): + 'Emulate object_new() in Objects/typeobject.c' + inst = super().__new__(cls) + if hasattr(cls, 'slot_names'): + empty_slots = [null] * len(cls.slot_names) + object.__setattr__(inst, '_slotvalues', empty_slots) + return inst + + def __setattr__(self, name, value): + 'Emulate _PyObject_GenericSetAttrWithDict() Objects/object.c' + cls = type(self) + if hasattr(cls, 'slot_names') and name not in cls.slot_names: + raise AttributeError( + f'{cls.__name__!r} object has no attribute {name!r}' + ) + super().__setattr__(name, value) + + def __delattr__(self, name): + 'Emulate _PyObject_GenericSetAttrWithDict() Objects/object.c' + cls = type(self) + if hasattr(cls, 'slot_names') and name not in cls.slot_names: + raise AttributeError( + f'{cls.__name__!r} object has no attribute {name!r}' + ) + super().__delattr__(name) + +To use the simulation in a real class, just inherit from :class:`Object` and +set the :term:`metaclass` to :class:`Type`: + +.. testcode:: + + class H(Object, metaclass=Type): + 'Instance variables stored in slots' + + slot_names = ['x', 'y'] + + def __init__(self, x, y): + self.x = x + self.y = y + +At this point, the metaclass has loaded member objects for *x* and *y*:: + + >>> from pprint import pp + >>> pp(dict(vars(H))) + {'__module__': '__main__', + '__doc__': 'Instance variables stored in slots', + 'slot_names': ['x', 'y'], + '__init__': , + 'x': , + 'y': } + +.. doctest:: + :hide: + + # We test this separately because the preceding section is not + # doctestable due to the hex memory address for the __init__ function + >>> isinstance(vars(H)['x'], Member) + True + >>> isinstance(vars(H)['y'], Member) + True + +When instances are created, they have a ``slot_values`` list where the +attributes are stored: + +.. doctest:: + + >>> h = H(10, 20) + >>> vars(h) + {'_slotvalues': [10, 20]} + >>> h.x = 55 + >>> vars(h) + {'_slotvalues': [55, 20]} + +Misspelled or unassigned attributes will raise an exception: + +.. doctest:: + + >>> h.xz + Traceback (most recent call last): + ... + AttributeError: 'H' object has no attribute 'xz' + +.. doctest:: + :hide: + + # Examples for deleted attributes are not shown because this section + # is already a bit lengthy. We still test that code here. + >>> del h.x + >>> hasattr(h, 'x') + False + + # Also test the code for uninitialized slots + >>> class HU(Object, metaclass=Type): + ... slot_names = ['x', 'y'] + ... + >>> hu = HU() + >>> hasattr(hu, 'x') + False + >>> hasattr(hu, 'y') + False diff --git a/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/howto/enum.rst b/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/howto/enum.rst new file mode 100644 index 00000000..66929b41 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/howto/enum.rst @@ -0,0 +1,1561 @@ +.. _enum-howto: + +========== +Enum HOWTO +========== + +.. _enum-basic-tutorial: + +.. currentmodule:: enum + +An :class:`Enum` is a set of symbolic names bound to unique values. They are +similar to global variables, but they offer a more useful :func:`repr`, +grouping, type-safety, and a few other features. + +They are most useful when you have a variable that can take one of a limited +selection of values. For example, the days of the week:: + + >>> from enum import Enum + >>> class Weekday(Enum): + ... MONDAY = 1 + ... TUESDAY = 2 + ... WEDNESDAY = 3 + ... THURSDAY = 4 + ... FRIDAY = 5 + ... SATURDAY = 6 + ... SUNDAY = 7 + +Or perhaps the RGB primary colors:: + + >>> from enum import Enum + >>> class Color(Enum): + ... RED = 1 + ... GREEN = 2 + ... BLUE = 3 + +As you can see, creating an :class:`Enum` is as simple as writing a class that +inherits from :class:`Enum` itself. + +.. note:: Case of Enum Members + + Because Enums are used to represent constants, and to help avoid issues + with name clashes between mixin-class methods/attributes and enum names, + we strongly recommend using UPPER_CASE names for members, and will be using + that style in our examples. + +Depending on the nature of the enum a member's value may or may not be +important, but either way that value can be used to get the corresponding +member:: + + >>> Weekday(3) + + +As you can see, the ``repr()`` of a member shows the enum name, the member name, +and the value. The ``str()`` of a member shows only the enum name and member +name:: + + >>> print(Weekday.THURSDAY) + Weekday.THURSDAY + +The *type* of an enumeration member is the enum it belongs to:: + + >>> type(Weekday.MONDAY) + + >>> isinstance(Weekday.FRIDAY, Weekday) + True + +Enum members have an attribute that contains just their :attr:`name`:: + + >>> print(Weekday.TUESDAY.name) + TUESDAY + +Likewise, they have an attribute for their :attr:`value`:: + + + >>> Weekday.WEDNESDAY.value + 3 + +Unlike many languages that treat enumerations solely as name/value pairs, +Python Enums can have behavior added. For example, :class:`datetime.date` +has two methods for returning the weekday: :meth:`weekday` and :meth:`isoweekday`. +The difference is that one of them counts from 0-6 and the other from 1-7. +Rather than keep track of that ourselves we can add a method to the :class:`Weekday` +enum to extract the day from the :class:`date` instance and return the matching +enum member:: + + @classmethod + def from_date(cls, date): + return cls(date.isoweekday()) + +The complete :class:`Weekday` enum now looks like this:: + + >>> class Weekday(Enum): + ... MONDAY = 1 + ... TUESDAY = 2 + ... WEDNESDAY = 3 + ... THURSDAY = 4 + ... FRIDAY = 5 + ... SATURDAY = 6 + ... SUNDAY = 7 + ... # + ... @classmethod + ... def from_date(cls, date): + ... return cls(date.isoweekday()) + +Now we can find out what today is! Observe:: + + >>> from datetime import date + >>> Weekday.from_date(date.today()) # doctest: +SKIP + + +Of course, if you're reading this on some other day, you'll see that day instead. + +This :class:`Weekday` enum is great if our variable only needs one day, but +what if we need several? Maybe we're writing a function to plot chores during +a week, and don't want to use a :class:`list` -- we could use a different type +of :class:`Enum`:: + + >>> from enum import Flag + >>> class Weekday(Flag): + ... MONDAY = 1 + ... TUESDAY = 2 + ... WEDNESDAY = 4 + ... THURSDAY = 8 + ... FRIDAY = 16 + ... SATURDAY = 32 + ... SUNDAY = 64 + +We've changed two things: we're inherited from :class:`Flag`, and the values are +all powers of 2. + +Just like the original :class:`Weekday` enum above, we can have a single selection:: + + >>> first_week_day = Weekday.MONDAY + >>> first_week_day + + +But :class:`Flag` also allows us to combine several members into a single +variable:: + + >>> weekend = Weekday.SATURDAY | Weekday.SUNDAY + >>> weekend + + +You can even iterate over a :class:`Flag` variable:: + + >>> for day in weekend: + ... print(day) + Weekday.SATURDAY + Weekday.SUNDAY + +Okay, let's get some chores set up:: + + >>> chores_for_ethan = { + ... 'feed the cat': Weekday.MONDAY | Weekday.WEDNESDAY | Weekday.FRIDAY, + ... 'do the dishes': Weekday.TUESDAY | Weekday.THURSDAY, + ... 'answer SO questions': Weekday.SATURDAY, + ... } + +And a function to display the chores for a given day:: + + >>> def show_chores(chores, day): + ... for chore, days in chores.items(): + ... if day in days: + ... print(chore) + ... + >>> show_chores(chores_for_ethan, Weekday.SATURDAY) + answer SO questions + +In cases where the actual values of the members do not matter, you can save +yourself some work and use :func:`auto` for the values:: + + >>> from enum import auto + >>> class Weekday(Flag): + ... MONDAY = auto() + ... TUESDAY = auto() + ... WEDNESDAY = auto() + ... THURSDAY = auto() + ... FRIDAY = auto() + ... SATURDAY = auto() + ... SUNDAY = auto() + ... WEEKEND = SATURDAY | SUNDAY + + +.. _enum-advanced-tutorial: + + +Programmatic access to enumeration members and their attributes +--------------------------------------------------------------- + +Sometimes it's useful to access members in enumerations programmatically (i.e. +situations where ``Color.RED`` won't do because the exact color is not known +at program-writing time). ``Enum`` allows such access:: + + >>> Color(1) + + >>> Color(3) + + +If you want to access enum members by *name*, use item access:: + + >>> Color['RED'] + + >>> Color['GREEN'] + + +If you have an enum member and need its :attr:`name` or :attr:`value`:: + + >>> member = Color.RED + >>> member.name + 'RED' + >>> member.value + 1 + + +Duplicating enum members and values +----------------------------------- + +Having two enum members with the same name is invalid:: + + >>> class Shape(Enum): + ... SQUARE = 2 + ... SQUARE = 3 + ... + Traceback (most recent call last): + ... + TypeError: 'SQUARE' already defined as 2 + +However, an enum member can have other names associated with it. Given two +entries ``A`` and ``B`` with the same value (and ``A`` defined first), ``B`` +is an alias for the member ``A``. By-value lookup of the value of ``A`` will +return the member ``A``. By-name lookup of ``A`` will return the member ``A``. +By-name lookup of ``B`` will also return the member ``A``:: + + >>> class Shape(Enum): + ... SQUARE = 2 + ... DIAMOND = 1 + ... CIRCLE = 3 + ... ALIAS_FOR_SQUARE = 2 + ... + >>> Shape.SQUARE + + >>> Shape.ALIAS_FOR_SQUARE + + >>> Shape(2) + + +.. note:: + + Attempting to create a member with the same name as an already + defined attribute (another member, a method, etc.) or attempting to create + an attribute with the same name as a member is not allowed. + + +Ensuring unique enumeration values +---------------------------------- + +By default, enumerations allow multiple names as aliases for the same value. +When this behavior isn't desired, you can use the :func:`unique` decorator:: + + >>> from enum import Enum, unique + >>> @unique + ... class Mistake(Enum): + ... ONE = 1 + ... TWO = 2 + ... THREE = 3 + ... FOUR = 3 + ... + Traceback (most recent call last): + ... + ValueError: duplicate values found in : FOUR -> THREE + + +Using automatic values +---------------------- + +If the exact value is unimportant you can use :class:`auto`:: + + >>> from enum import Enum, auto + >>> class Color(Enum): + ... RED = auto() + ... BLUE = auto() + ... GREEN = auto() + ... + >>> [member.value for member in Color] + [1, 2, 3] + +The values are chosen by :func:`_generate_next_value_`, which can be +overridden:: + + >>> class AutoName(Enum): + ... @staticmethod + ... def _generate_next_value_(name, start, count, last_values): + ... return name + ... + >>> class Ordinal(AutoName): + ... NORTH = auto() + ... SOUTH = auto() + ... EAST = auto() + ... WEST = auto() + ... + >>> [member.value for member in Ordinal] + ['NORTH', 'SOUTH', 'EAST', 'WEST'] + +.. note:: + + The :meth:`_generate_next_value_` method must be defined before any members. + +Iteration +--------- + +Iterating over the members of an enum does not provide the aliases:: + + >>> list(Shape) + [, , ] + >>> list(Weekday) + [, , , , , , ] + +Note that the aliases ``Shape.ALIAS_FOR_SQUARE`` and ``Weekday.WEEKEND`` aren't shown. + +The special attribute ``__members__`` is a read-only ordered mapping of names +to members. It includes all names defined in the enumeration, including the +aliases:: + + >>> for name, member in Shape.__members__.items(): + ... name, member + ... + ('SQUARE', ) + ('DIAMOND', ) + ('CIRCLE', ) + ('ALIAS_FOR_SQUARE', ) + +The ``__members__`` attribute can be used for detailed programmatic access to +the enumeration members. For example, finding all the aliases:: + + >>> [name for name, member in Shape.__members__.items() if member.name != name] + ['ALIAS_FOR_SQUARE'] + +.. note:: + + Aliases for flags include values with multiple flags set, such as ``3``, + and no flags set, i.e. ``0``. + + +Comparisons +----------- + +Enumeration members are compared by identity:: + + >>> Color.RED is Color.RED + True + >>> Color.RED is Color.BLUE + False + >>> Color.RED is not Color.BLUE + True + +Ordered comparisons between enumeration values are *not* supported. Enum +members are not integers (but see `IntEnum`_ below):: + + >>> Color.RED < Color.BLUE + Traceback (most recent call last): + File "", line 1, in + TypeError: '<' not supported between instances of 'Color' and 'Color' + +Equality comparisons are defined though:: + + >>> Color.BLUE == Color.RED + False + >>> Color.BLUE != Color.RED + True + >>> Color.BLUE == Color.BLUE + True + +Comparisons against non-enumeration values will always compare not equal +(again, :class:`IntEnum` was explicitly designed to behave differently, see +below):: + + >>> Color.BLUE == 2 + False + +.. warning:: + + It is possible to reload modules -- if a reloaded module contains + enums, they will be recreated, and the new members may not + compare identical/equal to the original members. + +Allowed members and attributes of enumerations +---------------------------------------------- + +Most of the examples above use integers for enumeration values. Using integers +is short and handy (and provided by default by the `Functional API`_), but not +strictly enforced. In the vast majority of use-cases, one doesn't care what +the actual value of an enumeration is. But if the value *is* important, +enumerations can have arbitrary values. + +Enumerations are Python classes, and can have methods and special methods as +usual. If we have this enumeration:: + + >>> class Mood(Enum): + ... FUNKY = 1 + ... HAPPY = 3 + ... + ... def describe(self): + ... # self is the member here + ... return self.name, self.value + ... + ... def __str__(self): + ... return 'my custom str! {0}'.format(self.value) + ... + ... @classmethod + ... def favorite_mood(cls): + ... # cls here is the enumeration + ... return cls.HAPPY + ... + +Then:: + + >>> Mood.favorite_mood() + + >>> Mood.HAPPY.describe() + ('HAPPY', 3) + >>> str(Mood.FUNKY) + 'my custom str! 1' + +The rules for what is allowed are as follows: names that start and end with +a single underscore are reserved by enum and cannot be used; all other +attributes defined within an enumeration will become members of this +enumeration, with the exception of special methods (:meth:`__str__`, +:meth:`__add__`, etc.), descriptors (methods are also descriptors), and +variable names listed in :attr:`_ignore_`. + +Note: if your enumeration defines :meth:`__new__` and/or :meth:`__init__`, +any value(s) given to the enum member will be passed into those methods. +See `Planet`_ for an example. + +.. note:: + + The :meth:`__new__` method, if defined, is used during creation of the Enum + members; it is then replaced by Enum's :meth:`__new__` which is used after + class creation for lookup of existing members. See :ref:`new-vs-init` for + more details. + + +Restricted Enum subclassing +--------------------------- + +A new :class:`Enum` class must have one base enum class, up to one concrete +data type, and as many :class:`object`-based mixin classes as needed. The +order of these base classes is:: + + class EnumName([mix-in, ...,] [data-type,] base-enum): + pass + +Also, subclassing an enumeration is allowed only if the enumeration does not define +any members. So this is forbidden:: + + >>> class MoreColor(Color): + ... PINK = 17 + ... + Traceback (most recent call last): + ... + TypeError: cannot extend + +But this is allowed:: + + >>> class Foo(Enum): + ... def some_behavior(self): + ... pass + ... + >>> class Bar(Foo): + ... HAPPY = 1 + ... SAD = 2 + ... + +Allowing subclassing of enums that define members would lead to a violation of +some important invariants of types and instances. On the other hand, it makes +sense to allow sharing some common behavior between a group of enumerations. +(See `OrderedEnum`_ for an example.) + + +.. _enum-dataclass-support: + +Dataclass support +----------------- + +When inheriting from a :class:`~dataclasses.dataclass`, +the :meth:`~Enum.__repr__` omits the inherited class' name. For example:: + + >>> from dataclasses import dataclass, field + >>> @dataclass + ... class CreatureDataMixin: + ... size: str + ... legs: int + ... tail: bool = field(repr=False, default=True) + ... + >>> class Creature(CreatureDataMixin, Enum): + ... BEETLE = 'small', 6 + ... DOG = 'medium', 4 + ... + >>> Creature.DOG + + +Use the :func:`~dataclasses.dataclass` argument ``repr=False`` +to use the standard :func:`repr`. + +.. versionchanged:: 3.12 + Only the dataclass fields are shown in the value area, not the dataclass' + name. + +.. note:: + + Adding :func:`~dataclasses.dataclass` decorator to :class:`Enum` + and its subclasses is not supported. It will not raise any errors, + but it will produce very strange results at runtime, such as members + being equal to each other:: + + >>> @dataclass # don't do this: it does not make any sense + ... class Color(Enum): + ... RED = 1 + ... BLUE = 2 + ... + >>> Color.RED is Color.BLUE + False + >>> Color.RED == Color.BLUE # problem is here: they should not be equal + True + + +Pickling +-------- + +Enumerations can be pickled and unpickled:: + + >>> from test.test_enum import Fruit + >>> from pickle import dumps, loads + >>> Fruit.TOMATO is loads(dumps(Fruit.TOMATO)) + True + +The usual restrictions for pickling apply: picklable enums must be defined in +the top level of a module, since unpickling requires them to be importable +from that module. + +.. note:: + + With pickle protocol version 4 it is possible to easily pickle enums + nested in other classes. + +It is possible to modify how enum members are pickled/unpickled by defining +:meth:`__reduce_ex__` in the enumeration class. The default method is by-value, +but enums with complicated values may want to use by-name:: + + >>> import enum + >>> class MyEnum(enum.Enum): + ... __reduce_ex__ = enum.pickle_by_enum_name + +.. note:: + + Using by-name for flags is not recommended, as unnamed aliases will + not unpickle. + + +Functional API +-------------- + +The :class:`Enum` class is callable, providing the following functional API:: + + >>> Animal = Enum('Animal', 'ANT BEE CAT DOG') + >>> Animal + + >>> Animal.ANT + + >>> list(Animal) + [, , , ] + +The semantics of this API resemble :class:`~collections.namedtuple`. The first +argument of the call to :class:`Enum` is the name of the enumeration. + +The second argument is the *source* of enumeration member names. It can be a +whitespace-separated string of names, a sequence of names, a sequence of +2-tuples with key/value pairs, or a mapping (e.g. dictionary) of names to +values. The last two options enable assigning arbitrary values to +enumerations; the others auto-assign increasing integers starting with 1 (use +the ``start`` parameter to specify a different starting value). A +new class derived from :class:`Enum` is returned. In other words, the above +assignment to :class:`Animal` is equivalent to:: + + >>> class Animal(Enum): + ... ANT = 1 + ... BEE = 2 + ... CAT = 3 + ... DOG = 4 + ... + +The reason for defaulting to ``1`` as the starting number and not ``0`` is +that ``0`` is ``False`` in a boolean sense, but by default enum members all +evaluate to ``True``. + +Pickling enums created with the functional API can be tricky as frame stack +implementation details are used to try and figure out which module the +enumeration is being created in (e.g. it will fail if you use a utility +function in a separate module, and also may not work on IronPython or Jython). +The solution is to specify the module name explicitly as follows:: + + >>> Animal = Enum('Animal', 'ANT BEE CAT DOG', module=__name__) + +.. warning:: + + If ``module`` is not supplied, and Enum cannot determine what it is, + the new Enum members will not be unpicklable; to keep errors closer to + the source, pickling will be disabled. + +The new pickle protocol 4 also, in some circumstances, relies on +:attr:`~type.__qualname__` being set to the location where pickle will be able +to find the class. For example, if the class was made available in class +SomeData in the global scope:: + + >>> Animal = Enum('Animal', 'ANT BEE CAT DOG', qualname='SomeData.Animal') + +The complete signature is:: + + Enum( + value='NewEnumName', + names=<...>, + *, + module='...', + qualname='...', + type=, + start=1, + ) + +* *value*: What the new enum class will record as its name. + +* *names*: The enum members. This can be a whitespace- or comma-separated string + (values will start at 1 unless otherwise specified):: + + 'RED GREEN BLUE' | 'RED,GREEN,BLUE' | 'RED, GREEN, BLUE' + + or an iterator of names:: + + ['RED', 'GREEN', 'BLUE'] + + or an iterator of (name, value) pairs:: + + [('CYAN', 4), ('MAGENTA', 5), ('YELLOW', 6)] + + or a mapping:: + + {'CHARTREUSE': 7, 'SEA_GREEN': 11, 'ROSEMARY': 42} + +* *module*: name of module where new enum class can be found. + +* *qualname*: where in module new enum class can be found. + +* *type*: type to mix in to new enum class. + +* *start*: number to start counting at if only names are passed in. + +.. versionchanged:: 3.5 + The *start* parameter was added. + + +Derived Enumerations +-------------------- + +IntEnum +^^^^^^^ + +The first variation of :class:`Enum` that is provided is also a subclass of +:class:`int`. Members of an :class:`IntEnum` can be compared to integers; +by extension, integer enumerations of different types can also be compared +to each other:: + + >>> from enum import IntEnum + >>> class Shape(IntEnum): + ... CIRCLE = 1 + ... SQUARE = 2 + ... + >>> class Request(IntEnum): + ... POST = 1 + ... GET = 2 + ... + >>> Shape == 1 + False + >>> Shape.CIRCLE == 1 + True + >>> Shape.CIRCLE == Request.POST + True + +However, they still can't be compared to standard :class:`Enum` enumerations:: + + >>> class Shape(IntEnum): + ... CIRCLE = 1 + ... SQUARE = 2 + ... + >>> class Color(Enum): + ... RED = 1 + ... GREEN = 2 + ... + >>> Shape.CIRCLE == Color.RED + False + +:class:`IntEnum` values behave like integers in other ways you'd expect:: + + >>> int(Shape.CIRCLE) + 1 + >>> ['a', 'b', 'c'][Shape.CIRCLE] + 'b' + >>> [i for i in range(Shape.SQUARE)] + [0, 1] + + +StrEnum +^^^^^^^ + +The second variation of :class:`Enum` that is provided is also a subclass of +:class:`str`. Members of a :class:`StrEnum` can be compared to strings; +by extension, string enumerations of different types can also be compared +to each other. + +.. versionadded:: 3.11 + + +IntFlag +^^^^^^^ + +The next variation of :class:`Enum` provided, :class:`IntFlag`, is also based +on :class:`int`. The difference being :class:`IntFlag` members can be combined +using the bitwise operators (&, \|, ^, ~) and the result is still an +:class:`IntFlag` member, if possible. Like :class:`IntEnum`, :class:`IntFlag` +members are also integers and can be used wherever an :class:`int` is used. + +.. note:: + + Any operation on an :class:`IntFlag` member besides the bit-wise operations will + lose the :class:`IntFlag` membership. + + Bit-wise operations that result in invalid :class:`IntFlag` values will lose the + :class:`IntFlag` membership. See :class:`FlagBoundary` for + details. + +.. versionadded:: 3.6 +.. versionchanged:: 3.11 + +Sample :class:`IntFlag` class:: + + >>> from enum import IntFlag + >>> class Perm(IntFlag): + ... R = 4 + ... W = 2 + ... X = 1 + ... + >>> Perm.R | Perm.W + + >>> Perm.R + Perm.W + 6 + >>> RW = Perm.R | Perm.W + >>> Perm.R in RW + True + +It is also possible to name the combinations:: + + >>> class Perm(IntFlag): + ... R = 4 + ... W = 2 + ... X = 1 + ... RWX = 7 + ... + >>> Perm.RWX + + >>> ~Perm.RWX + + >>> Perm(7) + + +.. note:: + + Named combinations are considered aliases. Aliases do not show up during + iteration, but can be returned from by-value lookups. + +.. versionchanged:: 3.11 + +Another important difference between :class:`IntFlag` and :class:`Enum` is that +if no flags are set (the value is 0), its boolean evaluation is :data:`False`:: + + >>> Perm.R & Perm.X + + >>> bool(Perm.R & Perm.X) + False + +Because :class:`IntFlag` members are also subclasses of :class:`int` they can +be combined with them (but may lose :class:`IntFlag` membership:: + + >>> Perm.X | 4 + + + >>> Perm.X + 8 + 9 + +.. note:: + + The negation operator, ``~``, always returns an :class:`IntFlag` member with a + positive value:: + + >>> (~Perm.X).value == (Perm.R|Perm.W).value == 6 + True + +:class:`IntFlag` members can also be iterated over:: + + >>> list(RW) + [, ] + +.. versionadded:: 3.11 + + +Flag +^^^^ + +The last variation is :class:`Flag`. Like :class:`IntFlag`, :class:`Flag` +members can be combined using the bitwise operators (&, \|, ^, ~). Unlike +:class:`IntFlag`, they cannot be combined with, nor compared against, any +other :class:`Flag` enumeration, nor :class:`int`. While it is possible to +specify the values directly it is recommended to use :class:`auto` as the +value and let :class:`Flag` select an appropriate value. + +.. versionadded:: 3.6 + +Like :class:`IntFlag`, if a combination of :class:`Flag` members results in no +flags being set, the boolean evaluation is :data:`False`:: + + >>> from enum import Flag, auto + >>> class Color(Flag): + ... RED = auto() + ... BLUE = auto() + ... GREEN = auto() + ... + >>> Color.RED & Color.GREEN + + >>> bool(Color.RED & Color.GREEN) + False + +Individual flags should have values that are powers of two (1, 2, 4, 8, ...), +while combinations of flags will not:: + + >>> class Color(Flag): + ... RED = auto() + ... BLUE = auto() + ... GREEN = auto() + ... WHITE = RED | BLUE | GREEN + ... + >>> Color.WHITE + + +Giving a name to the "no flags set" condition does not change its boolean +value:: + + >>> class Color(Flag): + ... BLACK = 0 + ... RED = auto() + ... BLUE = auto() + ... GREEN = auto() + ... + >>> Color.BLACK + + >>> bool(Color.BLACK) + False + +:class:`Flag` members can also be iterated over:: + + >>> purple = Color.RED | Color.BLUE + >>> list(purple) + [, ] + +.. versionadded:: 3.11 + +.. note:: + + For the majority of new code, :class:`Enum` and :class:`Flag` are strongly + recommended, since :class:`IntEnum` and :class:`IntFlag` break some + semantic promises of an enumeration (by being comparable to integers, and + thus by transitivity to other unrelated enumerations). :class:`IntEnum` + and :class:`IntFlag` should be used only in cases where :class:`Enum` and + :class:`Flag` will not do; for example, when integer constants are replaced + with enumerations, or for interoperability with other systems. + + +Others +^^^^^^ + +While :class:`IntEnum` is part of the :mod:`enum` module, it would be very +simple to implement independently:: + + class IntEnum(int, ReprEnum): # or Enum instead of ReprEnum + pass + +This demonstrates how similar derived enumerations can be defined; for example +a :class:`FloatEnum` that mixes in :class:`float` instead of :class:`int`. + +Some rules: + +1. When subclassing :class:`Enum`, mix-in types must appear before the + :class:`Enum` class itself in the sequence of bases, as in the :class:`IntEnum` + example above. +2. Mix-in types must be subclassable. For example, :class:`bool` and + :class:`range` are not subclassable and will throw an error during Enum + creation if used as the mix-in type. +3. While :class:`Enum` can have members of any type, once you mix in an + additional type, all the members must have values of that type, e.g. + :class:`int` above. This restriction does not apply to mix-ins which only + add methods and don't specify another type. +4. When another data type is mixed in, the :attr:`value` attribute is *not the + same* as the enum member itself, although it is equivalent and will compare + equal. +5. A ``data type`` is a mixin that defines :meth:`__new__`, or a + :class:`~dataclasses.dataclass` +6. %-style formatting: ``%s`` and ``%r`` call the :class:`Enum` class's + :meth:`__str__` and :meth:`__repr__` respectively; other codes (such as + ``%i`` or ``%h`` for IntEnum) treat the enum member as its mixed-in type. +7. :ref:`Formatted string literals `, :meth:`str.format`, + and :func:`format` will use the enum's :meth:`__str__` method. + +.. note:: + + Because :class:`IntEnum`, :class:`IntFlag`, and :class:`StrEnum` are + designed to be drop-in replacements for existing constants, their + :meth:`__str__` method has been reset to their data types' + :meth:`__str__` method. + +.. _new-vs-init: + +When to use :meth:`__new__` vs. :meth:`__init__` +------------------------------------------------ + +:meth:`__new__` must be used whenever you want to customize the actual value of +the :class:`Enum` member. Any other modifications may go in either +:meth:`__new__` or :meth:`__init__`, with :meth:`__init__` being preferred. + +For example, if you want to pass several items to the constructor, but only +want one of them to be the value:: + + >>> class Coordinate(bytes, Enum): + ... """ + ... Coordinate with binary codes that can be indexed by the int code. + ... """ + ... def __new__(cls, value, label, unit): + ... obj = bytes.__new__(cls, [value]) + ... obj._value_ = value + ... obj.label = label + ... obj.unit = unit + ... return obj + ... PX = (0, 'P.X', 'km') + ... PY = (1, 'P.Y', 'km') + ... VX = (2, 'V.X', 'km/s') + ... VY = (3, 'V.Y', 'km/s') + ... + + >>> print(Coordinate['PY']) + Coordinate.PY + + >>> print(Coordinate(3)) + Coordinate.VY + +.. warning:: + + *Do not* call ``super().__new__()``, as the lookup-only ``__new__`` is the one + that is found; instead, use the data type directly. + + +Finer Points +^^^^^^^^^^^^ + +Supported ``__dunder__`` names +"""""""""""""""""""""""""""""" + +:attr:`__members__` is a read-only ordered mapping of ``member_name``:``member`` +items. It is only available on the class. + +:meth:`__new__`, if specified, must create and return the enum members; it is +also a very good idea to set the member's :attr:`_value_` appropriately. Once +all the members are created it is no longer used. + + +Supported ``_sunder_`` names +"""""""""""""""""""""""""""" + +- :attr:`~Enum._name_` -- name of the member +- :attr:`~Enum._value_` -- value of the member; can be set in ``__new__`` +- :meth:`~Enum._missing_` -- a lookup function used when a value is not found; + may be overridden +- :attr:`~Enum._ignore_` -- a list of names, either as a :class:`list` or a + :class:`str`, that will not be transformed into members, and will be removed + from the final class +- :meth:`~Enum._generate_next_value_` -- used to get an appropriate value for + an enum member; may be overridden +- :meth:`~Enum._add_alias_` -- adds a new name as an alias to an existing + member. +- :meth:`~Enum._add_value_alias_` -- adds a new value as an alias to an + existing member. See `MultiValueEnum`_ for an example. + + .. note:: + + For standard :class:`Enum` classes the next value chosen is the highest + value seen incremented by one. + + For :class:`Flag` classes the next value chosen will be the next highest + power-of-two. + + .. versionchanged:: 3.13 + Prior versions would use the last seen value instead of the highest value. + +.. versionadded:: 3.6 ``_missing_``, ``_order_``, ``_generate_next_value_`` +.. versionadded:: 3.7 ``_ignore_`` +.. versionadded:: 3.13 ``_add_alias_``, ``_add_value_alias_`` + +To help keep Python 2 / Python 3 code in sync an :attr:`_order_` attribute can +be provided. It will be checked against the actual order of the enumeration +and raise an error if the two do not match:: + + >>> class Color(Enum): + ... _order_ = 'RED GREEN BLUE' + ... RED = 1 + ... BLUE = 3 + ... GREEN = 2 + ... + Traceback (most recent call last): + ... + TypeError: member order does not match _order_: + ['RED', 'BLUE', 'GREEN'] + ['RED', 'GREEN', 'BLUE'] + +.. note:: + + In Python 2 code the :attr:`_order_` attribute is necessary as definition + order is lost before it can be recorded. + + +_Private__names +""""""""""""""" + +:ref:`Private names ` are not converted to enum members, +but remain normal attributes. + +.. versionchanged:: 3.11 + + +``Enum`` member type +"""""""""""""""""""" + +Enum members are instances of their enum class, and are normally accessed as +``EnumClass.member``. In certain situations, such as writing custom enum +behavior, being able to access one member directly from another is useful, +and is supported; however, in order to avoid name clashes between member names +and attributes/methods from mixed-in classes, upper-case names are strongly +recommended. + +.. versionchanged:: 3.5 + + +Creating members that are mixed with other data types +""""""""""""""""""""""""""""""""""""""""""""""""""""" + +When subclassing other data types, such as :class:`int` or :class:`str`, with +an :class:`Enum`, all values after the ``=`` are passed to that data type's +constructor. For example:: + + >>> class MyEnum(IntEnum): # help(int) -> int(x, base=10) -> integer + ... example = '11', 16 # so x='11' and base=16 + ... + >>> MyEnum.example.value # and hex(11) is... + 17 + + +Boolean value of ``Enum`` classes and members +""""""""""""""""""""""""""""""""""""""""""""" + +Enum classes that are mixed with non-:class:`Enum` types (such as +:class:`int`, :class:`str`, etc.) are evaluated according to the mixed-in +type's rules; otherwise, all members evaluate as :data:`True`. To make your +own enum's boolean evaluation depend on the member's value add the following to +your class:: + + def __bool__(self): + return bool(self.value) + +Plain :class:`Enum` classes always evaluate as :data:`True`. + + +``Enum`` classes with methods +""""""""""""""""""""""""""""" + +If you give your enum subclass extra methods, like the `Planet`_ +class below, those methods will show up in a :func:`dir` of the member, +but not of the class:: + + >>> dir(Planet) # doctest: +SKIP + ['EARTH', 'JUPITER', 'MARS', 'MERCURY', 'NEPTUNE', 'SATURN', 'URANUS', 'VENUS', '__class__', '__doc__', '__members__', '__module__'] + >>> dir(Planet.EARTH) # doctest: +SKIP + ['__class__', '__doc__', '__module__', 'mass', 'name', 'radius', 'surface_gravity', 'value'] + + +Combining members of ``Flag`` +""""""""""""""""""""""""""""" + +Iterating over a combination of :class:`Flag` members will only return the members that +are comprised of a single bit:: + + >>> class Color(Flag): + ... RED = auto() + ... GREEN = auto() + ... BLUE = auto() + ... MAGENTA = RED | BLUE + ... YELLOW = RED | GREEN + ... CYAN = GREEN | BLUE + ... + >>> Color(3) # named combination + + >>> Color(7) # not named combination + + + +``Flag`` and ``IntFlag`` minutia +"""""""""""""""""""""""""""""""" + +Using the following snippet for our examples:: + + >>> class Color(IntFlag): + ... BLACK = 0 + ... RED = 1 + ... GREEN = 2 + ... BLUE = 4 + ... PURPLE = RED | BLUE + ... WHITE = RED | GREEN | BLUE + ... + +the following are true: + +- single-bit flags are canonical +- multi-bit and zero-bit flags are aliases +- only canonical flags are returned during iteration:: + + >>> list(Color.WHITE) + [, , ] + +- negating a flag or flag set returns a new flag/flag set with the + corresponding positive integer value:: + + >>> Color.BLUE + + + >>> ~Color.BLUE + + +- names of pseudo-flags are constructed from their members' names:: + + >>> (Color.RED | Color.GREEN).name + 'RED|GREEN' + + >>> class Perm(IntFlag): + ... R = 4 + ... W = 2 + ... X = 1 + ... + >>> (Perm.R & Perm.W).name is None # effectively Perm(0) + True + +- multi-bit flags, aka aliases, can be returned from operations:: + + >>> Color.RED | Color.BLUE + + + >>> Color(7) # or Color(-1) + + + >>> Color(0) + + +- membership / containment checking: zero-valued flags are always considered + to be contained:: + + >>> Color.BLACK in Color.WHITE + True + + otherwise, only if all bits of one flag are in the other flag will True + be returned:: + + >>> Color.PURPLE in Color.WHITE + True + + >>> Color.GREEN in Color.PURPLE + False + +There is a new boundary mechanism that controls how out-of-range / invalid +bits are handled: ``STRICT``, ``CONFORM``, ``EJECT``, and ``KEEP``: + +* STRICT --> raises an exception when presented with invalid values +* CONFORM --> discards any invalid bits +* EJECT --> lose Flag status and become a normal int with the given value +* KEEP --> keep the extra bits + + - keeps Flag status and extra bits + - extra bits do not show up in iteration + - extra bits do show up in repr() and str() + +The default for Flag is ``STRICT``, the default for ``IntFlag`` is ``EJECT``, +and the default for ``_convert_`` is ``KEEP`` (see ``ssl.Options`` for an +example of when ``KEEP`` is needed). + + +.. _enum-class-differences: + +How are Enums and Flags different? +---------------------------------- + +Enums have a custom metaclass that affects many aspects of both derived :class:`Enum` +classes and their instances (members). + + +Enum Classes +^^^^^^^^^^^^ + +The :class:`EnumType` metaclass is responsible for providing the +:meth:`__contains__`, :meth:`__dir__`, :meth:`__iter__` and other methods that +allow one to do things with an :class:`Enum` class that fail on a typical +class, such as ``list(Color)`` or ``some_enum_var in Color``. :class:`EnumType` is +responsible for ensuring that various other methods on the final :class:`Enum` +class are correct (such as :meth:`__new__`, :meth:`__getnewargs__`, +:meth:`__str__` and :meth:`__repr__`). + +Flag Classes +^^^^^^^^^^^^ + +Flags have an expanded view of aliasing: to be canonical, the value of a flag +needs to be a power-of-two value, and not a duplicate name. So, in addition to the +:class:`Enum` definition of alias, a flag with no value (a.k.a. ``0``) or with more than one +power-of-two value (e.g. ``3``) is considered an alias. + +Enum Members (aka instances) +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The most interesting thing about enum members is that they are singletons. +:class:`EnumType` creates them all while it is creating the enum class itself, +and then puts a custom :meth:`__new__` in place to ensure that no new ones are +ever instantiated by returning only the existing member instances. + +Flag Members +^^^^^^^^^^^^ + +Flag members can be iterated over just like the :class:`Flag` class, and only the +canonical members will be returned. For example:: + + >>> list(Color) + [, , ] + +(Note that ``BLACK``, ``PURPLE``, and ``WHITE`` do not show up.) + +Inverting a flag member returns the corresponding positive value, +rather than a negative value --- for example:: + + >>> ~Color.RED + + +Flag members have a length corresponding to the number of power-of-two values +they contain. For example:: + + >>> len(Color.PURPLE) + 2 + + +.. _enum-cookbook: + +Enum Cookbook +------------- + + +While :class:`Enum`, :class:`IntEnum`, :class:`StrEnum`, :class:`Flag`, and +:class:`IntFlag` are expected to cover the majority of use-cases, they cannot +cover them all. Here are recipes for some different types of enumerations +that can be used directly, or as examples for creating one's own. + + +Omitting values +^^^^^^^^^^^^^^^ + +In many use-cases, one doesn't care what the actual value of an enumeration +is. There are several ways to define this type of simple enumeration: + +- use instances of :class:`auto` for the value +- use instances of :class:`object` as the value +- use a descriptive string as the value +- use a tuple as the value and a custom :meth:`__new__` to replace the + tuple with an :class:`int` value + +Using any of these methods signifies to the user that these values are not +important, and also enables one to add, remove, or reorder members without +having to renumber the remaining members. + + +Using :class:`auto` +""""""""""""""""""" + +Using :class:`auto` would look like:: + + >>> class Color(Enum): + ... RED = auto() + ... BLUE = auto() + ... GREEN = auto() + ... + >>> Color.GREEN + + + +Using :class:`object` +""""""""""""""""""""" + +Using :class:`object` would look like:: + + >>> class Color(Enum): + ... RED = object() + ... GREEN = object() + ... BLUE = object() + ... + >>> Color.GREEN # doctest: +SKIP + > + +This is also a good example of why you might want to write your own +:meth:`__repr__`:: + + >>> class Color(Enum): + ... RED = object() + ... GREEN = object() + ... BLUE = object() + ... def __repr__(self): + ... return "<%s.%s>" % (self.__class__.__name__, self._name_) + ... + >>> Color.GREEN + + + + +Using a descriptive string +"""""""""""""""""""""""""" + +Using a string as the value would look like:: + + >>> class Color(Enum): + ... RED = 'stop' + ... GREEN = 'go' + ... BLUE = 'too fast!' + ... + >>> Color.GREEN + + + +Using a custom :meth:`__new__` +"""""""""""""""""""""""""""""" + +Using an auto-numbering :meth:`__new__` would look like:: + + >>> class AutoNumber(Enum): + ... def __new__(cls): + ... value = len(cls.__members__) + 1 + ... obj = object.__new__(cls) + ... obj._value_ = value + ... return obj + ... + >>> class Color(AutoNumber): + ... RED = () + ... GREEN = () + ... BLUE = () + ... + >>> Color.GREEN + + +To make a more general purpose ``AutoNumber``, add ``*args`` to the signature:: + + >>> class AutoNumber(Enum): + ... def __new__(cls, *args): # this is the only change from above + ... value = len(cls.__members__) + 1 + ... obj = object.__new__(cls) + ... obj._value_ = value + ... return obj + ... + +Then when you inherit from ``AutoNumber`` you can write your own ``__init__`` +to handle any extra arguments:: + + >>> class Swatch(AutoNumber): + ... def __init__(self, pantone='unknown'): + ... self.pantone = pantone + ... AUBURN = '3497' + ... SEA_GREEN = '1246' + ... BLEACHED_CORAL = () # New color, no Pantone code yet! + ... + >>> Swatch.SEA_GREEN + + >>> Swatch.SEA_GREEN.pantone + '1246' + >>> Swatch.BLEACHED_CORAL.pantone + 'unknown' + +.. note:: + + The :meth:`__new__` method, if defined, is used during creation of the Enum + members; it is then replaced by Enum's :meth:`__new__` which is used after + class creation for lookup of existing members. + +.. warning:: + + *Do not* call ``super().__new__()``, as the lookup-only ``__new__`` is the one + that is found; instead, use the data type directly -- e.g.:: + + obj = int.__new__(cls, value) + + +OrderedEnum +^^^^^^^^^^^ + +An ordered enumeration that is not based on :class:`IntEnum` and so maintains +the normal :class:`Enum` invariants (such as not being comparable to other +enumerations):: + + >>> class OrderedEnum(Enum): + ... def __ge__(self, other): + ... if self.__class__ is other.__class__: + ... return self.value >= other.value + ... return NotImplemented + ... def __gt__(self, other): + ... if self.__class__ is other.__class__: + ... return self.value > other.value + ... return NotImplemented + ... def __le__(self, other): + ... if self.__class__ is other.__class__: + ... return self.value <= other.value + ... return NotImplemented + ... def __lt__(self, other): + ... if self.__class__ is other.__class__: + ... return self.value < other.value + ... return NotImplemented + ... + >>> class Grade(OrderedEnum): + ... A = 5 + ... B = 4 + ... C = 3 + ... D = 2 + ... F = 1 + ... + >>> Grade.C < Grade.A + True + + +DuplicateFreeEnum +^^^^^^^^^^^^^^^^^ + +Raises an error if a duplicate member value is found instead of creating an +alias:: + + >>> class DuplicateFreeEnum(Enum): + ... def __init__(self, *args): + ... cls = self.__class__ + ... if any(self.value == e.value for e in cls): + ... a = self.name + ... e = cls(self.value).name + ... raise ValueError( + ... "aliases not allowed in DuplicateFreeEnum: %r --> %r" + ... % (a, e)) + ... + >>> class Color(DuplicateFreeEnum): + ... RED = 1 + ... GREEN = 2 + ... BLUE = 3 + ... GRENE = 2 + ... + Traceback (most recent call last): + ... + ValueError: aliases not allowed in DuplicateFreeEnum: 'GRENE' --> 'GREEN' + +.. note:: + + This is a useful example for subclassing Enum to add or change other + behaviors as well as disallowing aliases. If the only desired change is + disallowing aliases, the :func:`unique` decorator can be used instead. + + +MultiValueEnum +^^^^^^^^^^^^^^^^^ + +Supports having more than one value per member:: + + >>> class MultiValueEnum(Enum): + ... def __new__(cls, value, *values): + ... self = object.__new__(cls) + ... self._value_ = value + ... for v in values: + ... self._add_value_alias_(v) + ... return self + ... + >>> class DType(MultiValueEnum): + ... float32 = 'f', 8 + ... double64 = 'd', 9 + ... + >>> DType('f') + + >>> DType(9) + + + +Planet +^^^^^^ + +If :meth:`__new__` or :meth:`__init__` is defined, the value of the enum member +will be passed to those methods:: + + >>> class Planet(Enum): + ... MERCURY = (3.303e+23, 2.4397e6) + ... VENUS = (4.869e+24, 6.0518e6) + ... EARTH = (5.976e+24, 6.37814e6) + ... MARS = (6.421e+23, 3.3972e6) + ... JUPITER = (1.9e+27, 7.1492e7) + ... SATURN = (5.688e+26, 6.0268e7) + ... URANUS = (8.686e+25, 2.5559e7) + ... NEPTUNE = (1.024e+26, 2.4746e7) + ... def __init__(self, mass, radius): + ... self.mass = mass # in kilograms + ... self.radius = radius # in meters + ... @property + ... def surface_gravity(self): + ... # universal gravitational constant (m3 kg-1 s-2) + ... G = 6.67300E-11 + ... return G * self.mass / (self.radius * self.radius) + ... + >>> Planet.EARTH.value + (5.976e+24, 6378140.0) + >>> Planet.EARTH.surface_gravity + 9.802652743337129 + +.. _enum-time-period: + +TimePeriod +^^^^^^^^^^ + +An example to show the :attr:`_ignore_` attribute in use:: + + >>> from datetime import timedelta + >>> class Period(timedelta, Enum): + ... "different lengths of time" + ... _ignore_ = 'Period i' + ... Period = vars() + ... for i in range(367): + ... Period['day_%d' % i] = i + ... + >>> list(Period)[:2] + [, ] + >>> list(Period)[-2:] + [, ] + + +.. _enumtype-examples: + +Subclassing EnumType +-------------------- + +While most enum needs can be met by customizing :class:`Enum` subclasses, +either with class decorators or custom functions, :class:`EnumType` can be +subclassed to provide a different Enum experience. diff --git a/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/howto/free-threading-extensions.rst b/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/howto/free-threading-extensions.rst new file mode 100644 index 00000000..6abe93d7 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/howto/free-threading-extensions.rst @@ -0,0 +1,280 @@ +.. highlight:: c + +.. _freethreading-extensions-howto: + +****************************************** +C API Extension Support for Free Threading +****************************************** + +Starting with the 3.13 release, CPython has experimental support for running +with the :term:`global interpreter lock` (GIL) disabled in a configuration +called :term:`free threading`. This document describes how to adapt C API +extensions to support free threading. + + +Identifying the Free-Threaded Build in C +======================================== + +The CPython C API exposes the ``Py_GIL_DISABLED`` macro: in the free-threaded +build it's defined to ``1``, and in the regular build it's not defined. +You can use it to enable code that only runs under the free-threaded build:: + + #ifdef Py_GIL_DISABLED + /* code that only runs in the free-threaded build */ + #endif + +Module Initialization +===================== + +Extension modules need to explicitly indicate that they support running with +the GIL disabled; otherwise importing the extension will raise a warning and +enable the GIL at runtime. + +There are two ways to indicate that an extension module supports running with +the GIL disabled depending on whether the extension uses multi-phase or +single-phase initialization. + +Multi-Phase Initialization +.......................... + +Extensions that use multi-phase initialization (i.e., +:c:func:`PyModuleDef_Init`) should add a :c:data:`Py_mod_gil` slot in the +module definition. If your extension supports older versions of CPython, +you should guard the slot with a :c:data:`PY_VERSION_HEX` check. + +:: + + static struct PyModuleDef_Slot module_slots[] = { + ... + #if PY_VERSION_HEX >= 0x030D0000 + {Py_mod_gil, Py_MOD_GIL_NOT_USED}, + #endif + {0, NULL} + }; + + static struct PyModuleDef moduledef = { + PyModuleDef_HEAD_INIT, + .m_slots = module_slots, + ... + }; + + +Single-Phase Initialization +........................... + +Extensions that use single-phase initialization (i.e., +:c:func:`PyModule_Create`) should call :c:func:`PyUnstable_Module_SetGIL` to +indicate that they support running with the GIL disabled. The function is +only defined in the free-threaded build, so you should guard the call with +``#ifdef Py_GIL_DISABLED`` to avoid compilation errors in the regular build. + +:: + + static struct PyModuleDef moduledef = { + PyModuleDef_HEAD_INIT, + ... + }; + + PyMODINIT_FUNC + PyInit_mymodule(void) + { + PyObject *m = PyModule_Create(&moduledef); + if (m == NULL) { + return NULL; + } + #ifdef Py_GIL_DISABLED + PyUnstable_Module_SetGIL(m, Py_MOD_GIL_NOT_USED); + #endif + return m; + } + + +General API Guidelines +====================== + +Most of the C API is thread-safe, but there are some exceptions. + +* **Struct Fields**: Accessing fields in Python C API objects or structs + directly is not thread-safe if the field may be concurrently modified. +* **Macros**: Accessor macros like :c:macro:`PyList_GET_ITEM` and + :c:macro:`PyList_SET_ITEM` do not perform any error checking or locking. + These macros are not thread-safe if the container object may be modified + concurrently. +* **Borrowed References**: C API functions that return + :term:`borrowed references ` may not be thread-safe if + the containing object is modified concurrently. See the section on + :ref:`borrowed references ` for more information. + + +Container Thread Safety +....................... + +Containers like :c:struct:`PyListObject`, +:c:struct:`PyDictObject`, and :c:struct:`PySetObject` perform internal locking +in the free-threaded build. For example, the :c:func:`PyList_Append` will +lock the list before appending an item. + +.. _PyDict_Next: + +``PyDict_Next`` +''''''''''''''' + +A notable exception is :c:func:`PyDict_Next`, which does not lock the +dictionary. You should use :c:macro:`Py_BEGIN_CRITICAL_SECTION` to protect +the dictionary while iterating over it if the dictionary may be concurrently +modified:: + + Py_BEGIN_CRITICAL_SECTION(dict); + PyObject *key, *value; + Py_ssize_t pos = 0; + while (PyDict_Next(dict, &pos, &key, &value)) { + ... + } + Py_END_CRITICAL_SECTION(); + + +Borrowed References +=================== + +.. _borrowed-references: + +Some C API functions return :term:`borrowed references `. +These APIs are not thread-safe if the containing object is modified +concurrently. For example, it's not safe to use :c:func:`PyList_GetItem` +if the list may be modified concurrently. + +The following table lists some borrowed reference APIs and their replacements +that return :term:`strong references `. + ++-----------------------------------+-----------------------------------+ +| Borrowed reference API | Strong reference API | ++===================================+===================================+ +| :c:func:`PyList_GetItem` | :c:func:`PyList_GetItemRef` | ++-----------------------------------+-----------------------------------+ +| :c:func:`PyDict_GetItem` | :c:func:`PyDict_GetItemRef` | ++-----------------------------------+-----------------------------------+ +| :c:func:`PyDict_GetItemWithError` | :c:func:`PyDict_GetItemRef` | ++-----------------------------------+-----------------------------------+ +| :c:func:`PyDict_GetItemString` | :c:func:`PyDict_GetItemStringRef` | ++-----------------------------------+-----------------------------------+ +| :c:func:`PyDict_SetDefault` | :c:func:`PyDict_SetDefaultRef` | ++-----------------------------------+-----------------------------------+ +| :c:func:`PyDict_Next` | none (see :ref:`PyDict_Next`) | ++-----------------------------------+-----------------------------------+ +| :c:func:`PyWeakref_GetObject` | :c:func:`PyWeakref_GetRef` | ++-----------------------------------+-----------------------------------+ +| :c:func:`PyWeakref_GET_OBJECT` | :c:func:`PyWeakref_GetRef` | ++-----------------------------------+-----------------------------------+ +| :c:func:`PyImport_AddModule` | :c:func:`PyImport_AddModuleRef` | ++-----------------------------------+-----------------------------------+ + +Not all APIs that return borrowed references are problematic. For +example, :c:func:`PyTuple_GetItem` is safe because tuples are immutable. +Similarly, not all uses of the above APIs are problematic. For example, +:c:func:`PyDict_GetItem` is often used for parsing keyword argument +dictionaries in function calls; those keyword argument dictionaries are +effectively private (not accessible by other threads), so using borrowed +references in that context is safe. + +Some of these functions were added in Python 3.13. You can use the +`pythoncapi-compat `_ package +to provide implementations of these functions for older Python versions. + + +.. _free-threaded-memory-allocation: + +Memory Allocation APIs +====================== + +Python's memory management C API provides functions in three different +:ref:`allocation domains `: "raw", "mem", and "object". +For thread-safety, the free-threaded build requires that only Python objects +are allocated using the object domain, and that all Python object are +allocated using that domain. This differs from the prior Python versions, +where this was only a best practice and not a hard requirement. + +.. note:: + + Search for uses of :c:func:`PyObject_Malloc` in your + extension and check that the allocated memory is used for Python objects. + Use :c:func:`PyMem_Malloc` to allocate buffers instead of + :c:func:`PyObject_Malloc`. + + +Thread State and GIL APIs +========================= + +Python provides a set of functions and macros to manage thread state and the +GIL, such as: + +* :c:func:`PyGILState_Ensure` and :c:func:`PyGILState_Release` +* :c:func:`PyEval_SaveThread` and :c:func:`PyEval_RestoreThread` +* :c:macro:`Py_BEGIN_ALLOW_THREADS` and :c:macro:`Py_END_ALLOW_THREADS` + +These functions should still be used in the free-threaded build to manage +thread state even when the :term:`GIL` is disabled. For example, if you +create a thread outside of Python, you must call :c:func:`PyGILState_Ensure` +before calling into the Python API to ensure that the thread has a valid +Python thread state. + +You should continue to call :c:func:`PyEval_SaveThread` or +:c:macro:`Py_BEGIN_ALLOW_THREADS` around blocking operations, such as I/O or +lock acquisitions, to allow other threads to run the +:term:`cyclic garbage collector `. + + +Protecting Internal Extension State +=================================== + +Your extension may have internal state that was previously protected by the +GIL. You may need to add locking to protect this state. The approach will +depend on your extension, but some common patterns include: + +* **Caches**: global caches are a common source of shared state. Consider + using a lock to protect the cache or disabling it in the free-threaded build + if the cache is not critical for performance. +* **Global State**: global state may need to be protected by a lock or moved + to thread local storage. C11 and C++11 provide the ``thread_local`` or + ``_Thread_local`` for + `thread-local storage `_. + + +Building Extensions for the Free-Threaded Build +=============================================== + +C API extensions need to be built specifically for the free-threaded build. +The wheels, shared libraries, and binaries are indicated by a ``t`` suffix. + +* `pypa/manylinux `_ supports the + free-threaded build, with the ``t`` suffix, such as ``python3.13t``. +* `pypa/cibuildwheel `_ supports the + free-threaded build if you set + `CIBW_FREE_THREADED_SUPPORT `_. + +Limited C API and Stable ABI +............................ + +The free-threaded build does not currently support the +:ref:`Limited C API ` or the stable ABI. If you use +`setuptools `_ to build +your extension and currently set ``py_limited_api=True`` you can use +``py_limited_api=not sysconfig.get_config_var("Py_GIL_DISABLED")`` to opt out +of the limited API when building with the free-threaded build. + +.. note:: + You will need to build separate wheels specifically for the free-threaded + build. If you currently use the stable ABI, you can continue to build a + single wheel for multiple non-free-threaded Python versions. + + +Windows +....... + +Due to a limitation of the official Windows installer, you will need to +manually define ``Py_GIL_DISABLED=1`` when building extensions from source. + +.. seealso:: + + `Porting Extension Modules to Support Free-Threading + `_: + A community-maintained porting guide for extension authors. diff --git a/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/howto/functional.rst b/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/howto/functional.rst new file mode 100644 index 00000000..1f0608fb --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/howto/functional.rst @@ -0,0 +1,1272 @@ +.. _functional-howto: + +******************************** + Functional Programming HOWTO +******************************** + +:Author: A. M. Kuchling +:Release: 0.32 + +In this document, we'll take a tour of Python's features suitable for +implementing programs in a functional style. After an introduction to the +concepts of functional programming, we'll look at language features such as +:term:`iterator`\s and :term:`generator`\s and relevant library modules such as +:mod:`itertools` and :mod:`functools`. + + +Introduction +============ + +This section explains the basic concept of functional programming; if +you're just interested in learning about Python language features, +skip to the next section on :ref:`functional-howto-iterators`. + +Programming languages support decomposing problems in several different ways: + +* Most programming languages are **procedural**: programs are lists of + instructions that tell the computer what to do with the program's input. C, + Pascal, and even Unix shells are procedural languages. + +* In **declarative** languages, you write a specification that describes the + problem to be solved, and the language implementation figures out how to + perform the computation efficiently. SQL is the declarative language you're + most likely to be familiar with; a SQL query describes the data set you want + to retrieve, and the SQL engine decides whether to scan tables or use indexes, + which subclauses should be performed first, etc. + +* **Object-oriented** programs manipulate collections of objects. Objects have + internal state and support methods that query or modify this internal state in + some way. Smalltalk and Java are object-oriented languages. C++ and Python + are languages that support object-oriented programming, but don't force the + use of object-oriented features. + +* **Functional** programming decomposes a problem into a set of functions. + Ideally, functions only take inputs and produce outputs, and don't have any + internal state that affects the output produced for a given input. Well-known + functional languages include the ML family (Standard ML, OCaml, and other + variants) and Haskell. + +The designers of some computer languages choose to emphasize one +particular approach to programming. This often makes it difficult to +write programs that use a different approach. Other languages are +multi-paradigm languages that support several different approaches. +Lisp, C++, and Python are multi-paradigm; you can write programs or +libraries that are largely procedural, object-oriented, or functional +in all of these languages. In a large program, different sections +might be written using different approaches; the GUI might be +object-oriented while the processing logic is procedural or +functional, for example. + +In a functional program, input flows through a set of functions. Each function +operates on its input and produces some output. Functional style discourages +functions with side effects that modify internal state or make other changes +that aren't visible in the function's return value. Functions that have no side +effects at all are called **purely functional**. Avoiding side effects means +not using data structures that get updated as a program runs; every function's +output must only depend on its input. + +Some languages are very strict about purity and don't even have assignment +statements such as ``a=3`` or ``c = a + b``, but it's difficult to avoid all +side effects, such as printing to the screen or writing to a disk file. Another +example is a call to the :func:`print` or :func:`time.sleep` function, neither +of which returns a useful value. Both are called only for their side effects +of sending some text to the screen or pausing execution for a second. + +Python programs written in functional style usually won't go to the extreme of +avoiding all I/O or all assignments; instead, they'll provide a +functional-appearing interface but will use non-functional features internally. +For example, the implementation of a function will still use assignments to +local variables, but won't modify global variables or have other side effects. + +Functional programming can be considered the opposite of object-oriented +programming. Objects are little capsules containing some internal state along +with a collection of method calls that let you modify this state, and programs +consist of making the right set of state changes. Functional programming wants +to avoid state changes as much as possible and works with data flowing between +functions. In Python you might combine the two approaches by writing functions +that take and return instances representing objects in your application (e-mail +messages, transactions, etc.). + +Functional design may seem like an odd constraint to work under. Why should you +avoid objects and side effects? There are theoretical and practical advantages +to the functional style: + +* Formal provability. +* Modularity. +* Composability. +* Ease of debugging and testing. + + +Formal provability +------------------ + +A theoretical benefit is that it's easier to construct a mathematical proof that +a functional program is correct. + +For a long time researchers have been interested in finding ways to +mathematically prove programs correct. This is different from testing a program +on numerous inputs and concluding that its output is usually correct, or reading +a program's source code and concluding that the code looks right; the goal is +instead a rigorous proof that a program produces the right result for all +possible inputs. + +The technique used to prove programs correct is to write down **invariants**, +properties of the input data and of the program's variables that are always +true. For each line of code, you then show that if invariants X and Y are true +**before** the line is executed, the slightly different invariants X' and Y' are +true **after** the line is executed. This continues until you reach the end of +the program, at which point the invariants should match the desired conditions +on the program's output. + +Functional programming's avoidance of assignments arose because assignments are +difficult to handle with this technique; assignments can break invariants that +were true before the assignment without producing any new invariants that can be +propagated onward. + +Unfortunately, proving programs correct is largely impractical and not relevant +to Python software. Even trivial programs require proofs that are several pages +long; the proof of correctness for a moderately complicated program would be +enormous, and few or none of the programs you use daily (the Python interpreter, +your XML parser, your web browser) could be proven correct. Even if you wrote +down or generated a proof, there would then be the question of verifying the +proof; maybe there's an error in it, and you wrongly believe you've proved the +program correct. + + +Modularity +---------- + +A more practical benefit of functional programming is that it forces you to +break apart your problem into small pieces. Programs are more modular as a +result. It's easier to specify and write a small function that does one thing +than a large function that performs a complicated transformation. Small +functions are also easier to read and to check for errors. + + +Ease of debugging and testing +----------------------------- + +Testing and debugging a functional-style program is easier. + +Debugging is simplified because functions are generally small and clearly +specified. When a program doesn't work, each function is an interface point +where you can check that the data are correct. You can look at the intermediate +inputs and outputs to quickly isolate the function that's responsible for a bug. + +Testing is easier because each function is a potential subject for a unit test. +Functions don't depend on system state that needs to be replicated before +running a test; instead you only have to synthesize the right input and then +check that the output matches expectations. + + +Composability +------------- + +As you work on a functional-style program, you'll write a number of functions +with varying inputs and outputs. Some of these functions will be unavoidably +specialized to a particular application, but others will be useful in a wide +variety of programs. For example, a function that takes a directory path and +returns all the XML files in the directory, or a function that takes a filename +and returns its contents, can be applied to many different situations. + +Over time you'll form a personal library of utilities. Often you'll assemble +new programs by arranging existing functions in a new configuration and writing +a few functions specialized for the current task. + + +.. _functional-howto-iterators: + +Iterators +========= + +I'll start by looking at a Python language feature that's an important +foundation for writing functional-style programs: iterators. + +An iterator is an object representing a stream of data; this object returns the +data one element at a time. A Python iterator must support a method called +:meth:`~iterator.__next__` that takes no arguments and always returns the next +element of the stream. If there are no more elements in the stream, +:meth:`~iterator.__next__` must raise the :exc:`StopIteration` exception. +Iterators don't have to be finite, though; it's perfectly reasonable to write +an iterator that produces an infinite stream of data. + +The built-in :func:`iter` function takes an arbitrary object and tries to return +an iterator that will return the object's contents or elements, raising +:exc:`TypeError` if the object doesn't support iteration. Several of Python's +built-in data types support iteration, the most common being lists and +dictionaries. An object is called :term:`iterable` if you can get an iterator +for it. + +You can experiment with the iteration interface manually: + + >>> L = [1, 2, 3] + >>> it = iter(L) + >>> it #doctest: +ELLIPSIS + <...iterator object at ...> + >>> it.__next__() # same as next(it) + 1 + >>> next(it) + 2 + >>> next(it) + 3 + >>> next(it) + Traceback (most recent call last): + File "", line 1, in + StopIteration + >>> + +Python expects iterable objects in several different contexts, the most +important being the :keyword:`for` statement. In the statement ``for X in Y``, +Y must be an iterator or some object for which :func:`iter` can create an +iterator. These two statements are equivalent:: + + + for i in iter(obj): + print(i) + + for i in obj: + print(i) + +Iterators can be materialized as lists or tuples by using the :func:`list` or +:func:`tuple` constructor functions: + + >>> L = [1, 2, 3] + >>> iterator = iter(L) + >>> t = tuple(iterator) + >>> t + (1, 2, 3) + +Sequence unpacking also supports iterators: if you know an iterator will return +N elements, you can unpack them into an N-tuple: + + >>> L = [1, 2, 3] + >>> iterator = iter(L) + >>> a, b, c = iterator + >>> a, b, c + (1, 2, 3) + +Built-in functions such as :func:`max` and :func:`min` can take a single +iterator argument and will return the largest or smallest element. The ``"in"`` +and ``"not in"`` operators also support iterators: ``X in iterator`` is true if +X is found in the stream returned by the iterator. You'll run into obvious +problems if the iterator is infinite; :func:`max`, :func:`min` +will never return, and if the element X never appears in the stream, the +``"in"`` and ``"not in"`` operators won't return either. + +Note that you can only go forward in an iterator; there's no way to get the +previous element, reset the iterator, or make a copy of it. Iterator objects +can optionally provide these additional capabilities, but the iterator protocol +only specifies the :meth:`~iterator.__next__` method. Functions may therefore +consume all of the iterator's output, and if you need to do something different +with the same stream, you'll have to create a new iterator. + + + +Data Types That Support Iterators +--------------------------------- + +We've already seen how lists and tuples support iterators. In fact, any Python +sequence type, such as strings, will automatically support creation of an +iterator. + +Calling :func:`iter` on a dictionary returns an iterator that will loop over the +dictionary's keys:: + + >>> m = {'Jan': 1, 'Feb': 2, 'Mar': 3, 'Apr': 4, 'May': 5, 'Jun': 6, + ... 'Jul': 7, 'Aug': 8, 'Sep': 9, 'Oct': 10, 'Nov': 11, 'Dec': 12} + >>> for key in m: + ... print(key, m[key]) + Jan 1 + Feb 2 + Mar 3 + Apr 4 + May 5 + Jun 6 + Jul 7 + Aug 8 + Sep 9 + Oct 10 + Nov 11 + Dec 12 + +Note that starting with Python 3.7, dictionary iteration order is guaranteed +to be the same as the insertion order. In earlier versions, the behaviour was +unspecified and could vary between implementations. + +Applying :func:`iter` to a dictionary always loops over the keys, but +dictionaries have methods that return other iterators. If you want to iterate +over values or key/value pairs, you can explicitly call the +:meth:`~dict.values` or :meth:`~dict.items` methods to get an appropriate +iterator. + +The :func:`dict` constructor can accept an iterator that returns a finite stream +of ``(key, value)`` tuples: + + >>> L = [('Italy', 'Rome'), ('France', 'Paris'), ('US', 'Washington DC')] + >>> dict(iter(L)) + {'Italy': 'Rome', 'France': 'Paris', 'US': 'Washington DC'} + +Files also support iteration by calling the :meth:`~io.TextIOBase.readline` +method until there are no more lines in the file. This means you can read each +line of a file like this:: + + for line in file: + # do something for each line + ... + +Sets can take their contents from an iterable and let you iterate over the set's +elements:: + + >>> S = {2, 3, 5, 7, 11, 13} + >>> for i in S: + ... print(i) + 2 + 3 + 5 + 7 + 11 + 13 + + + +Generator expressions and list comprehensions +============================================= + +Two common operations on an iterator's output are 1) performing some operation +for every element, 2) selecting a subset of elements that meet some condition. +For example, given a list of strings, you might want to strip off trailing +whitespace from each line or extract all the strings containing a given +substring. + +List comprehensions and generator expressions (short form: "listcomps" and +"genexps") are a concise notation for such operations, borrowed from the +functional programming language Haskell (https://www.haskell.org/). You can strip +all the whitespace from a stream of strings with the following code:: + + >>> line_list = [' line 1\n', 'line 2 \n', ' \n', ''] + + >>> # Generator expression -- returns iterator + >>> stripped_iter = (line.strip() for line in line_list) + + >>> # List comprehension -- returns list + >>> stripped_list = [line.strip() for line in line_list] + +You can select only certain elements by adding an ``"if"`` condition:: + + >>> stripped_list = [line.strip() for line in line_list + ... if line != ""] + +With a list comprehension, you get back a Python list; ``stripped_list`` is a +list containing the resulting lines, not an iterator. Generator expressions +return an iterator that computes the values as necessary, not needing to +materialize all the values at once. This means that list comprehensions aren't +useful if you're working with iterators that return an infinite stream or a very +large amount of data. Generator expressions are preferable in these situations. + +Generator expressions are surrounded by parentheses ("()") and list +comprehensions are surrounded by square brackets ("[]"). Generator expressions +have the form:: + + ( expression for expr in sequence1 + if condition1 + for expr2 in sequence2 + if condition2 + for expr3 in sequence3 + ... + if condition3 + for exprN in sequenceN + if conditionN ) + +Again, for a list comprehension only the outside brackets are different (square +brackets instead of parentheses). + +The elements of the generated output will be the successive values of +``expression``. The ``if`` clauses are all optional; if present, ``expression`` +is only evaluated and added to the result when ``condition`` is true. + +Generator expressions always have to be written inside parentheses, but the +parentheses signalling a function call also count. If you want to create an +iterator that will be immediately passed to a function you can write:: + + obj_total = sum(obj.count for obj in list_all_objects()) + +The ``for...in`` clauses contain the sequences to be iterated over. The +sequences do not have to be the same length, because they are iterated over from +left to right, **not** in parallel. For each element in ``sequence1``, +``sequence2`` is looped over from the beginning. ``sequence3`` is then looped +over for each resulting pair of elements from ``sequence1`` and ``sequence2``. + +To put it another way, a list comprehension or generator expression is +equivalent to the following Python code:: + + for expr1 in sequence1: + if not (condition1): + continue # Skip this element + for expr2 in sequence2: + if not (condition2): + continue # Skip this element + ... + for exprN in sequenceN: + if not (conditionN): + continue # Skip this element + + # Output the value of + # the expression. + +This means that when there are multiple ``for...in`` clauses but no ``if`` +clauses, the length of the resulting output will be equal to the product of the +lengths of all the sequences. If you have two lists of length 3, the output +list is 9 elements long: + + >>> seq1 = 'abc' + >>> seq2 = (1, 2, 3) + >>> [(x, y) for x in seq1 for y in seq2] #doctest: +NORMALIZE_WHITESPACE + [('a', 1), ('a', 2), ('a', 3), + ('b', 1), ('b', 2), ('b', 3), + ('c', 1), ('c', 2), ('c', 3)] + +To avoid introducing an ambiguity into Python's grammar, if ``expression`` is +creating a tuple, it must be surrounded with parentheses. The first list +comprehension below is a syntax error, while the second one is correct:: + + # Syntax error + [x, y for x in seq1 for y in seq2] + # Correct + [(x, y) for x in seq1 for y in seq2] + + +Generators +========== + +Generators are a special class of functions that simplify the task of writing +iterators. Regular functions compute a value and return it, but generators +return an iterator that returns a stream of values. + +You're doubtless familiar with how regular function calls work in Python or C. +When you call a function, it gets a private namespace where its local variables +are created. When the function reaches a ``return`` statement, the local +variables are destroyed and the value is returned to the caller. A later call +to the same function creates a new private namespace and a fresh set of local +variables. But, what if the local variables weren't thrown away on exiting a +function? What if you could later resume the function where it left off? This +is what generators provide; they can be thought of as resumable functions. + +Here's the simplest example of a generator function: + + >>> def generate_ints(N): + ... for i in range(N): + ... yield i + +Any function containing a :keyword:`yield` keyword is a generator function; +this is detected by Python's :term:`bytecode` compiler which compiles the +function specially as a result. + +When you call a generator function, it doesn't return a single value; instead it +returns a generator object that supports the iterator protocol. On executing +the ``yield`` expression, the generator outputs the value of ``i``, similar to a +``return`` statement. The big difference between ``yield`` and a ``return`` +statement is that on reaching a ``yield`` the generator's state of execution is +suspended and local variables are preserved. On the next call to the +generator's :meth:`~generator.__next__` method, the function will resume +executing. + +Here's a sample usage of the ``generate_ints()`` generator: + + >>> gen = generate_ints(3) + >>> gen #doctest: +ELLIPSIS + + >>> next(gen) + 0 + >>> next(gen) + 1 + >>> next(gen) + 2 + >>> next(gen) + Traceback (most recent call last): + File "stdin", line 1, in + File "stdin", line 2, in generate_ints + StopIteration + +You could equally write ``for i in generate_ints(5)``, or ``a, b, c = +generate_ints(3)``. + +Inside a generator function, ``return value`` causes ``StopIteration(value)`` +to be raised from the :meth:`~generator.__next__` method. Once this happens, or +the bottom of the function is reached, the procession of values ends and the +generator cannot yield any further values. + +You could achieve the effect of generators manually by writing your own class +and storing all the local variables of the generator as instance variables. For +example, returning a list of integers could be done by setting ``self.count`` to +0, and having the :meth:`~iterator.__next__` method increment ``self.count`` and +return it. +However, for a moderately complicated generator, writing a corresponding class +can be much messier. + +The test suite included with Python's library, +:source:`Lib/test/test_generators.py`, contains +a number of more interesting examples. Here's one generator that implements an +in-order traversal of a tree using generators recursively. :: + + # A recursive generator that generates Tree leaves in in-order. + def inorder(t): + if t: + for x in inorder(t.left): + yield x + + yield t.label + + for x in inorder(t.right): + yield x + +Two other examples in ``test_generators.py`` produce solutions for the N-Queens +problem (placing N queens on an NxN chess board so that no queen threatens +another) and the Knight's Tour (finding a route that takes a knight to every +square of an NxN chessboard without visiting any square twice). + + + +Passing values into a generator +------------------------------- + +In Python 2.4 and earlier, generators only produced output. Once a generator's +code was invoked to create an iterator, there was no way to pass any new +information into the function when its execution is resumed. You could hack +together this ability by making the generator look at a global variable or by +passing in some mutable object that callers then modify, but these approaches +are messy. + +In Python 2.5 there's a simple way to pass values into a generator. +:keyword:`yield` became an expression, returning a value that can be assigned to +a variable or otherwise operated on:: + + val = (yield i) + +I recommend that you **always** put parentheses around a ``yield`` expression +when you're doing something with the returned value, as in the above example. +The parentheses aren't always necessary, but it's easier to always add them +instead of having to remember when they're needed. + +(:pep:`342` explains the exact rules, which are that a ``yield``-expression must +always be parenthesized except when it occurs at the top-level expression on the +right-hand side of an assignment. This means you can write ``val = yield i`` +but have to use parentheses when there's an operation, as in ``val = (yield i) ++ 12``.) + +Values are sent into a generator by calling its :meth:`send(value) +` method. This method resumes the generator's code and the +``yield`` expression returns the specified value. If the regular +:meth:`~generator.__next__` method is called, the ``yield`` returns ``None``. + +Here's a simple counter that increments by 1 and allows changing the value of +the internal counter. + +.. testcode:: + + def counter(maximum): + i = 0 + while i < maximum: + val = (yield i) + # If value provided, change counter + if val is not None: + i = val + else: + i += 1 + +And here's an example of changing the counter: + + >>> it = counter(10) #doctest: +SKIP + >>> next(it) #doctest: +SKIP + 0 + >>> next(it) #doctest: +SKIP + 1 + >>> it.send(8) #doctest: +SKIP + 8 + >>> next(it) #doctest: +SKIP + 9 + >>> next(it) #doctest: +SKIP + Traceback (most recent call last): + File "t.py", line 15, in + it.next() + StopIteration + +Because ``yield`` will often be returning ``None``, you should always check for +this case. Don't just use its value in expressions unless you're sure that the +:meth:`~generator.send` method will be the only method used to resume your +generator function. + +In addition to :meth:`~generator.send`, there are two other methods on +generators: + +* :meth:`throw(value) ` is used to + raise an exception inside the generator; the exception is raised by the + ``yield`` expression where the generator's execution is paused. + +* :meth:`~generator.close` raises a :exc:`GeneratorExit` exception inside the + generator to terminate the iteration. On receiving this exception, the + generator's code must either raise :exc:`GeneratorExit` or + :exc:`StopIteration`; catching the exception and doing anything else is + illegal and will trigger a :exc:`RuntimeError`. :meth:`~generator.close` + will also be called by Python's garbage collector when the generator is + garbage-collected. + + If you need to run cleanup code when a :exc:`GeneratorExit` occurs, I suggest + using a ``try: ... finally:`` suite instead of catching :exc:`GeneratorExit`. + +The cumulative effect of these changes is to turn generators from one-way +producers of information into both producers and consumers. + +Generators also become **coroutines**, a more generalized form of subroutines. +Subroutines are entered at one point and exited at another point (the top of the +function, and a ``return`` statement), but coroutines can be entered, exited, +and resumed at many different points (the ``yield`` statements). + + +Built-in functions +================== + +Let's look in more detail at built-in functions often used with iterators. + +Two of Python's built-in functions, :func:`map` and :func:`filter` duplicate the +features of generator expressions: + +:func:`map(f, iterA, iterB, ...) ` returns an iterator over the sequence + ``f(iterA[0], iterB[0]), f(iterA[1], iterB[1]), f(iterA[2], iterB[2]), ...``. + + >>> def upper(s): + ... return s.upper() + + >>> list(map(upper, ['sentence', 'fragment'])) + ['SENTENCE', 'FRAGMENT'] + >>> [upper(s) for s in ['sentence', 'fragment']] + ['SENTENCE', 'FRAGMENT'] + +You can of course achieve the same effect with a list comprehension. + +:func:`filter(predicate, iter) ` returns an iterator over all the +sequence elements that meet a certain condition, and is similarly duplicated by +list comprehensions. A **predicate** is a function that returns the truth +value of some condition; for use with :func:`filter`, the predicate must take a +single value. + + >>> def is_even(x): + ... return (x % 2) == 0 + + >>> list(filter(is_even, range(10))) + [0, 2, 4, 6, 8] + + +This can also be written as a list comprehension: + + >>> list(x for x in range(10) if is_even(x)) + [0, 2, 4, 6, 8] + + +:func:`enumerate(iter, start=0) ` counts off the elements in the +iterable returning 2-tuples containing the count (from *start*) and +each element. :: + + >>> for item in enumerate(['subject', 'verb', 'object']): + ... print(item) + (0, 'subject') + (1, 'verb') + (2, 'object') + +:func:`enumerate` is often used when looping through a list and recording the +indexes at which certain conditions are met:: + + f = open('data.txt', 'r') + for i, line in enumerate(f): + if line.strip() == '': + print('Blank line at line #%i' % i) + +:func:`sorted(iterable, key=None, reverse=False) ` collects all the +elements of the iterable into a list, sorts the list, and returns the sorted +result. The *key* and *reverse* arguments are passed through to the +constructed list's :meth:`~list.sort` method. :: + + >>> import random + >>> # Generate 8 random numbers between [0, 10000) + >>> rand_list = random.sample(range(10000), 8) + >>> rand_list #doctest: +SKIP + [769, 7953, 9828, 6431, 8442, 9878, 6213, 2207] + >>> sorted(rand_list) #doctest: +SKIP + [769, 2207, 6213, 6431, 7953, 8442, 9828, 9878] + >>> sorted(rand_list, reverse=True) #doctest: +SKIP + [9878, 9828, 8442, 7953, 6431, 6213, 2207, 769] + +(For a more detailed discussion of sorting, see the :ref:`sortinghowto`.) + + +The :func:`any(iter) ` and :func:`all(iter) ` built-ins look at the +truth values of an iterable's contents. :func:`any` returns ``True`` if any element +in the iterable is a true value, and :func:`all` returns ``True`` if all of the +elements are true values: + + >>> any([0, 1, 0]) + True + >>> any([0, 0, 0]) + False + >>> any([1, 1, 1]) + True + >>> all([0, 1, 0]) + False + >>> all([0, 0, 0]) + False + >>> all([1, 1, 1]) + True + + +:func:`zip(iterA, iterB, ...) ` takes one element from each iterable and +returns them in a tuple:: + + zip(['a', 'b', 'c'], (1, 2, 3)) => + ('a', 1), ('b', 2), ('c', 3) + +It doesn't construct an in-memory list and exhaust all the input iterators +before returning; instead tuples are constructed and returned only if they're +requested. (The technical term for this behaviour is `lazy evaluation +`__.) + +This iterator is intended to be used with iterables that are all of the same +length. If the iterables are of different lengths, the resulting stream will be +the same length as the shortest iterable. :: + + zip(['a', 'b'], (1, 2, 3)) => + ('a', 1), ('b', 2) + +You should avoid doing this, though, because an element may be taken from the +longer iterators and discarded. This means you can't go on to use the iterators +further because you risk skipping a discarded element. + + +The itertools module +==================== + +The :mod:`itertools` module contains a number of commonly used iterators as well +as functions for combining several iterators. This section will introduce the +module's contents by showing small examples. + +The module's functions fall into a few broad classes: + +* Functions that create a new iterator based on an existing iterator. +* Functions for treating an iterator's elements as function arguments. +* Functions for selecting portions of an iterator's output. +* A function for grouping an iterator's output. + +Creating new iterators +---------------------- + +:func:`itertools.count(start, step) ` returns an infinite +stream of evenly spaced values. You can optionally supply the starting number, +which defaults to 0, and the interval between numbers, which defaults to 1:: + + itertools.count() => + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, ... + itertools.count(10) => + 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, ... + itertools.count(10, 5) => + 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, ... + +:func:`itertools.cycle(iter) ` saves a copy of the contents of +a provided iterable and returns a new iterator that returns its elements from +first to last. The new iterator will repeat these elements infinitely. :: + + itertools.cycle([1, 2, 3, 4, 5]) => + 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, ... + +:func:`itertools.repeat(elem, [n]) ` returns the provided +element *n* times, or returns the element endlessly if *n* is not provided. :: + + itertools.repeat('abc') => + abc, abc, abc, abc, abc, abc, abc, abc, abc, abc, ... + itertools.repeat('abc', 5) => + abc, abc, abc, abc, abc + +:func:`itertools.chain(iterA, iterB, ...) ` takes an arbitrary +number of iterables as input, and returns all the elements of the first +iterator, then all the elements of the second, and so on, until all of the +iterables have been exhausted. :: + + itertools.chain(['a', 'b', 'c'], (1, 2, 3)) => + a, b, c, 1, 2, 3 + +:func:`itertools.islice(iter, [start], stop, [step]) ` returns +a stream that's a slice of the iterator. With a single *stop* argument, it +will return the first *stop* elements. If you supply a starting index, you'll +get *stop-start* elements, and if you supply a value for *step*, elements +will be skipped accordingly. Unlike Python's string and list slicing, you can't +use negative values for *start*, *stop*, or *step*. :: + + itertools.islice(range(10), 8) => + 0, 1, 2, 3, 4, 5, 6, 7 + itertools.islice(range(10), 2, 8) => + 2, 3, 4, 5, 6, 7 + itertools.islice(range(10), 2, 8, 2) => + 2, 4, 6 + +:func:`itertools.tee(iter, [n]) ` replicates an iterator; it +returns *n* independent iterators that will all return the contents of the +source iterator. +If you don't supply a value for *n*, the default is 2. Replicating iterators +requires saving some of the contents of the source iterator, so this can consume +significant memory if the iterator is large and one of the new iterators is +consumed more than the others. :: + + itertools.tee( itertools.count() ) => + iterA, iterB + + where iterA -> + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, ... + + and iterB -> + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, ... + + +Calling functions on elements +----------------------------- + +The :mod:`operator` module contains a set of functions corresponding to Python's +operators. Some examples are :func:`operator.add(a, b) ` (adds +two values), :func:`operator.ne(a, b) ` (same as ``a != b``), and +:func:`operator.attrgetter('id') ` +(returns a callable that fetches the ``.id`` attribute). + +:func:`itertools.starmap(func, iter) ` assumes that the +iterable will return a stream of tuples, and calls *func* using these tuples as +the arguments:: + + itertools.starmap(os.path.join, + [('/bin', 'python'), ('/usr', 'bin', 'java'), + ('/usr', 'bin', 'perl'), ('/usr', 'bin', 'ruby')]) + => + /bin/python, /usr/bin/java, /usr/bin/perl, /usr/bin/ruby + + +Selecting elements +------------------ + +Another group of functions chooses a subset of an iterator's elements based on a +predicate. + +:func:`itertools.filterfalse(predicate, iter) ` is the +opposite of :func:`filter`, returning all elements for which the predicate +returns false:: + + itertools.filterfalse(is_even, itertools.count()) => + 1, 3, 5, 7, 9, 11, 13, 15, ... + +:func:`itertools.takewhile(predicate, iter) ` returns +elements for as long as the predicate returns true. Once the predicate returns +false, the iterator will signal the end of its results. :: + + def less_than_10(x): + return x < 10 + + itertools.takewhile(less_than_10, itertools.count()) => + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 + + itertools.takewhile(is_even, itertools.count()) => + 0 + +:func:`itertools.dropwhile(predicate, iter) ` discards +elements while the predicate returns true, and then returns the rest of the +iterable's results. :: + + itertools.dropwhile(less_than_10, itertools.count()) => + 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, ... + + itertools.dropwhile(is_even, itertools.count()) => + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, ... + +:func:`itertools.compress(data, selectors) ` takes two +iterators and returns only those elements of *data* for which the corresponding +element of *selectors* is true, stopping whenever either one is exhausted:: + + itertools.compress([1, 2, 3, 4, 5], [True, True, False, False, True]) => + 1, 2, 5 + + +Combinatoric functions +---------------------- + +The :func:`itertools.combinations(iterable, r) ` +returns an iterator giving all possible *r*-tuple combinations of the +elements contained in *iterable*. :: + + itertools.combinations([1, 2, 3, 4, 5], 2) => + (1, 2), (1, 3), (1, 4), (1, 5), + (2, 3), (2, 4), (2, 5), + (3, 4), (3, 5), + (4, 5) + + itertools.combinations([1, 2, 3, 4, 5], 3) => + (1, 2, 3), (1, 2, 4), (1, 2, 5), (1, 3, 4), (1, 3, 5), (1, 4, 5), + (2, 3, 4), (2, 3, 5), (2, 4, 5), + (3, 4, 5) + +The elements within each tuple remain in the same order as +*iterable* returned them. For example, the number 1 is always before +2, 3, 4, or 5 in the examples above. A similar function, +:func:`itertools.permutations(iterable, r=None) `, +removes this constraint on the order, returning all possible +arrangements of length *r*:: + + itertools.permutations([1, 2, 3, 4, 5], 2) => + (1, 2), (1, 3), (1, 4), (1, 5), + (2, 1), (2, 3), (2, 4), (2, 5), + (3, 1), (3, 2), (3, 4), (3, 5), + (4, 1), (4, 2), (4, 3), (4, 5), + (5, 1), (5, 2), (5, 3), (5, 4) + + itertools.permutations([1, 2, 3, 4, 5]) => + (1, 2, 3, 4, 5), (1, 2, 3, 5, 4), (1, 2, 4, 3, 5), + ... + (5, 4, 3, 2, 1) + +If you don't supply a value for *r* the length of the iterable is used, +meaning that all the elements are permuted. + +Note that these functions produce all of the possible combinations by +position and don't require that the contents of *iterable* are unique:: + + itertools.permutations('aba', 3) => + ('a', 'b', 'a'), ('a', 'a', 'b'), ('b', 'a', 'a'), + ('b', 'a', 'a'), ('a', 'a', 'b'), ('a', 'b', 'a') + +The identical tuple ``('a', 'a', 'b')`` occurs twice, but the two 'a' +strings came from different positions. + +The :func:`itertools.combinations_with_replacement(iterable, r) ` +function relaxes a different constraint: elements can be repeated +within a single tuple. Conceptually an element is selected for the +first position of each tuple and then is replaced before the second +element is selected. :: + + itertools.combinations_with_replacement([1, 2, 3, 4, 5], 2) => + (1, 1), (1, 2), (1, 3), (1, 4), (1, 5), + (2, 2), (2, 3), (2, 4), (2, 5), + (3, 3), (3, 4), (3, 5), + (4, 4), (4, 5), + (5, 5) + + +Grouping elements +----------------- + +The last function I'll discuss, :func:`itertools.groupby(iter, key_func=None) +`, is the most complicated. ``key_func(elem)`` is a function +that can compute a key value for each element returned by the iterable. If you +don't supply a key function, the key is simply each element itself. + +:func:`~itertools.groupby` collects all the consecutive elements from the +underlying iterable that have the same key value, and returns a stream of +2-tuples containing a key value and an iterator for the elements with that key. + +:: + + city_list = [('Decatur', 'AL'), ('Huntsville', 'AL'), ('Selma', 'AL'), + ('Anchorage', 'AK'), ('Nome', 'AK'), + ('Flagstaff', 'AZ'), ('Phoenix', 'AZ'), ('Tucson', 'AZ'), + ... + ] + + def get_state(city_state): + return city_state[1] + + itertools.groupby(city_list, get_state) => + ('AL', iterator-1), + ('AK', iterator-2), + ('AZ', iterator-3), ... + + where + iterator-1 => + ('Decatur', 'AL'), ('Huntsville', 'AL'), ('Selma', 'AL') + iterator-2 => + ('Anchorage', 'AK'), ('Nome', 'AK') + iterator-3 => + ('Flagstaff', 'AZ'), ('Phoenix', 'AZ'), ('Tucson', 'AZ') + +:func:`~itertools.groupby` assumes that the underlying iterable's contents will +already be sorted based on the key. Note that the returned iterators also use +the underlying iterable, so you have to consume the results of iterator-1 before +requesting iterator-2 and its corresponding key. + + +The functools module +==================== + +The :mod:`functools` module contains some higher-order functions. +A **higher-order function** takes one or more functions as input and returns a +new function. The most useful tool in this module is the +:func:`functools.partial` function. + +For programs written in a functional style, you'll sometimes want to construct +variants of existing functions that have some of the parameters filled in. +Consider a Python function ``f(a, b, c)``; you may wish to create a new function +``g(b, c)`` that's equivalent to ``f(1, b, c)``; you're filling in a value for +one of ``f()``'s parameters. This is called "partial function application". + +The constructor for :func:`~functools.partial` takes the arguments +``(function, arg1, arg2, ..., kwarg1=value1, kwarg2=value2)``. The resulting +object is callable, so you can just call it to invoke ``function`` with the +filled-in arguments. + +Here's a small but realistic example:: + + import functools + + def log(message, subsystem): + """Write the contents of 'message' to the specified subsystem.""" + print('%s: %s' % (subsystem, message)) + ... + + server_log = functools.partial(log, subsystem='server') + server_log('Unable to open socket') + +:func:`functools.reduce(func, iter, [initial_value]) ` +cumulatively performs an operation on all the iterable's elements and, +therefore, can't be applied to infinite iterables. *func* must be a function +that takes two elements and returns a single value. :func:`functools.reduce` +takes the first two elements A and B returned by the iterator and calculates +``func(A, B)``. It then requests the third element, C, calculates +``func(func(A, B), C)``, combines this result with the fourth element returned, +and continues until the iterable is exhausted. If the iterable returns no +values at all, a :exc:`TypeError` exception is raised. If the initial value is +supplied, it's used as a starting point and ``func(initial_value, A)`` is the +first calculation. :: + + >>> import operator, functools + >>> functools.reduce(operator.concat, ['A', 'BB', 'C']) + 'ABBC' + >>> functools.reduce(operator.concat, []) + Traceback (most recent call last): + ... + TypeError: reduce() of empty sequence with no initial value + >>> functools.reduce(operator.mul, [1, 2, 3], 1) + 6 + >>> functools.reduce(operator.mul, [], 1) + 1 + +If you use :func:`operator.add` with :func:`functools.reduce`, you'll add up all the +elements of the iterable. This case is so common that there's a special +built-in called :func:`sum` to compute it: + + >>> import functools, operator + >>> functools.reduce(operator.add, [1, 2, 3, 4], 0) + 10 + >>> sum([1, 2, 3, 4]) + 10 + >>> sum([]) + 0 + +For many uses of :func:`functools.reduce`, though, it can be clearer to just +write the obvious :keyword:`for` loop:: + + import functools + # Instead of: + product = functools.reduce(operator.mul, [1, 2, 3], 1) + + # You can write: + product = 1 + for i in [1, 2, 3]: + product *= i + +A related function is :func:`itertools.accumulate(iterable, func=operator.add) +`. It performs the same calculation, but instead of +returning only the final result, :func:`~itertools.accumulate` returns an iterator +that also yields each partial result:: + + itertools.accumulate([1, 2, 3, 4, 5]) => + 1, 3, 6, 10, 15 + + itertools.accumulate([1, 2, 3, 4, 5], operator.mul) => + 1, 2, 6, 24, 120 + + +The operator module +------------------- + +The :mod:`operator` module was mentioned earlier. It contains a set of +functions corresponding to Python's operators. These functions are often useful +in functional-style code because they save you from writing trivial functions +that perform a single operation. + +Some of the functions in this module are: + +* Math operations: ``add()``, ``sub()``, ``mul()``, ``floordiv()``, ``abs()``, ... +* Logical operations: ``not_()``, ``truth()``. +* Bitwise operations: ``and_()``, ``or_()``, ``invert()``. +* Comparisons: ``eq()``, ``ne()``, ``lt()``, ``le()``, ``gt()``, and ``ge()``. +* Object identity: ``is_()``, ``is_not()``. + +Consult the operator module's documentation for a complete list. + + +Small functions and the lambda expression +========================================= + +When writing functional-style programs, you'll often need little functions that +act as predicates or that combine elements in some way. + +If there's a Python built-in or a module function that's suitable, you don't +need to define a new function at all:: + + stripped_lines = [line.strip() for line in lines] + existing_files = filter(os.path.exists, file_list) + +If the function you need doesn't exist, you need to write it. One way to write +small functions is to use the :keyword:`lambda` expression. ``lambda`` takes a +number of parameters and an expression combining these parameters, and creates +an anonymous function that returns the value of the expression:: + + adder = lambda x, y: x+y + + print_assign = lambda name, value: name + '=' + str(value) + +An alternative is to just use the ``def`` statement and define a function in the +usual way:: + + def adder(x, y): + return x + y + + def print_assign(name, value): + return name + '=' + str(value) + +Which alternative is preferable? That's a style question; my usual course is to +avoid using ``lambda``. + +One reason for my preference is that ``lambda`` is quite limited in the +functions it can define. The result has to be computable as a single +expression, which means you can't have multiway ``if... elif... else`` +comparisons or ``try... except`` statements. If you try to do too much in a +``lambda`` statement, you'll end up with an overly complicated expression that's +hard to read. Quick, what's the following code doing? :: + + import functools + total = functools.reduce(lambda a, b: (0, a[1] + b[1]), items)[1] + +You can figure it out, but it takes time to disentangle the expression to figure +out what's going on. Using a short nested ``def`` statements makes things a +little bit better:: + + import functools + def combine(a, b): + return 0, a[1] + b[1] + + total = functools.reduce(combine, items)[1] + +But it would be best of all if I had simply used a ``for`` loop:: + + total = 0 + for a, b in items: + total += b + +Or the :func:`sum` built-in and a generator expression:: + + total = sum(b for a, b in items) + +Many uses of :func:`functools.reduce` are clearer when written as ``for`` loops. + +Fredrik Lundh once suggested the following set of rules for refactoring uses of +``lambda``: + +1. Write a lambda function. +2. Write a comment explaining what the heck that lambda does. +3. Study the comment for a while, and think of a name that captures the essence + of the comment. +4. Convert the lambda to a def statement, using that name. +5. Remove the comment. + +I really like these rules, but you're free to disagree +about whether this lambda-free style is better. + + +Revision History and Acknowledgements +===================================== + +The author would like to thank the following people for offering suggestions, +corrections and assistance with various drafts of this article: Ian Bicking, +Nick Coghlan, Nick Efford, Raymond Hettinger, Jim Jewett, Mike Krell, Leandro +Lameiro, Jussi Salmela, Collin Winter, Blake Winton. + +Version 0.1: posted June 30 2006. + +Version 0.11: posted July 1 2006. Typo fixes. + +Version 0.2: posted July 10 2006. Merged genexp and listcomp sections into one. +Typo fixes. + +Version 0.21: Added more references suggested on the tutor mailing list. + +Version 0.30: Adds a section on the ``functional`` module written by Collin +Winter; adds short section on the operator module; a few other edits. + + +References +========== + +General +------- + +**Structure and Interpretation of Computer Programs**, by Harold Abelson and +Gerald Jay Sussman with Julie Sussman. The book can be found at +https://mitpress.mit.edu/sicp. In this classic textbook of computer science, +chapters 2 and 3 discuss the use of sequences and streams to organize the data +flow inside a program. The book uses Scheme for its examples, but many of the +design approaches described in these chapters are applicable to functional-style +Python code. + +https://www.defmacro.org/ramblings/fp.html: A general introduction to functional +programming that uses Java examples and has a lengthy historical introduction. + +https://en.wikipedia.org/wiki/Functional_programming: General Wikipedia entry +describing functional programming. + +https://en.wikipedia.org/wiki/Coroutine: Entry for coroutines. + +https://en.wikipedia.org/wiki/Partial_application: Entry for the concept of partial function application. + +https://en.wikipedia.org/wiki/Currying: Entry for the concept of currying. + +Python-specific +--------------- + +https://gnosis.cx/TPiP/: The first chapter of David Mertz's book +:title-reference:`Text Processing in Python` discusses functional programming +for text processing, in the section titled "Utilizing Higher-Order Functions in +Text Processing". + +Mertz also wrote a 3-part series of articles on functional programming +for IBM's DeveloperWorks site; see +`part 1 `__, +`part 2 `__, and +`part 3 `__, + + +Python documentation +-------------------- + +Documentation for the :mod:`itertools` module. + +Documentation for the :mod:`functools` module. + +Documentation for the :mod:`operator` module. + +:pep:`289`: "Generator Expressions" + +:pep:`342`: "Coroutines via Enhanced Generators" describes the new generator +features in Python 2.5. + +.. comment + + Handy little function for printing part of an iterator -- used + while writing this document. + + import itertools + def print_iter(it): + slice = itertools.islice(it, 10) + for elem in slice[:-1]: + sys.stdout.write(str(elem)) + sys.stdout.write(', ') + print(elem[-1]) diff --git a/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/howto/gdb_helpers.rst b/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/howto/gdb_helpers.rst new file mode 100644 index 00000000..53bbf7dd --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/howto/gdb_helpers.rst @@ -0,0 +1,449 @@ +.. _gdb: + +========================================================= +Debugging C API extensions and CPython Internals with GDB +========================================================= + +.. highlight:: none + +This document explains how the Python GDB extension, ``python-gdb.py``, can +be used with the GDB debugger to debug CPython extensions and the +CPython interpreter itself. + +When debugging low-level problems such as crashes or deadlocks, a low-level +debugger, such as GDB, is useful to diagnose and correct the issue. +By default, GDB (or any of its front-ends) doesn't support high-level +information specific to the CPython interpreter. + +The ``python-gdb.py`` extension adds CPython interpreter information to GDB. +The extension helps introspect the stack of currently executing Python functions. +Given a Python object represented by a :c:expr:`PyObject *` pointer, +the extension surfaces the type and value of the object. + +Developers who are working on CPython extensions or tinkering with parts +of CPython that are written in C can use this document to learn how to use the +``python-gdb.py`` extension with GDB. + +.. note:: + + This document assumes that you are familiar with the basics of GDB and the + CPython C API. It consolidates guidance from the + `devguide `_ and the + `Python wiki `_. + + +Prerequisites +============= + +You need to have: + +- GDB 7 or later. (For earlier versions of GDB, see ``Misc/gdbinit`` in the + sources of Python 3.11 or earlier.) +- GDB-compatible debugging information for Python and any extension you are + debugging. +- The ``python-gdb.py`` extension. + +The extension is built with Python, but might be distributed separately or +not at all. Below, we include tips for a few common systems as examples. +Note that even if the instructions match your system, they might be outdated. + + +Setup with Python built from source +----------------------------------- + +When you build CPython from source, debugging information should be available, +and the build should add a ``python-gdb.py`` file to the root directory of +your repository. + +To activate support, you must add the directory containing ``python-gdb.py`` +to GDB's "auto-load-safe-path". +If you haven't done this, recent versions of GDB will print out a warning +with instructions on how to do this. + +.. note:: + + If you do not see instructions for your version of GDB, put this in your + configuration file (``~/.gdbinit`` or ``~/.config/gdb/gdbinit``):: + + add-auto-load-safe-path /path/to/cpython + + You can also add multiple paths, separated by ``:``. + + +Setup for Python from a Linux distro +------------------------------------ + +Most Linux systems provide debug information for the system Python +in a package called ``python-debuginfo``, ``python-dbg`` or similar. +For example: + +- Fedora: + + .. code-block:: shell + + sudo dnf install gdb + sudo dnf debuginfo-install python3 + +- Ubuntu: + + .. code-block:: shell + + sudo apt install gdb python3-dbg + +On several recent Linux systems, GDB can download debugging symbols +automatically using *debuginfod*. +However, this will not install the ``python-gdb.py`` extension; +you generally do need to install the debug info package separately. + + +Using the Debug build and Development mode +========================================== + +For easier debugging, you might want to: + +- Use a :ref:`debug build ` of Python. (When building from source, + use ``configure --with-pydebug``. On Linux distros, install and run a package + like ``python-debug`` or ``python-dbg``, if available.) +- Use the runtime :ref:`development mode ` (``-X dev``). + +Both enable extra assertions and disable some optimizations. +Sometimes this hides the bug you are trying to find, but in most cases they +make the process easier. + + +Using the ``python-gdb`` extension +================================== + +When the extension is loaded, it provides two main features: +pretty printers for Python values, and additional commands. + +Pretty-printers +--------------- + +This is what a GDB backtrace looks like (truncated) when this extension is +enabled:: + + #0 0x000000000041a6b1 in PyObject_Malloc (nbytes=Cannot access memory at address 0x7fffff7fefe8 + ) at Objects/obmalloc.c:748 + #1 0x000000000041b7c0 in _PyObject_DebugMallocApi (id=111 'o', nbytes=24) at Objects/obmalloc.c:1445 + #2 0x000000000041b717 in _PyObject_DebugMalloc (nbytes=24) at Objects/obmalloc.c:1412 + #3 0x000000000044060a in _PyUnicode_New (length=11) at Objects/unicodeobject.c:346 + #4 0x00000000004466aa in PyUnicodeUCS2_DecodeUTF8Stateful (s=0x5c2b8d "__lltrace__", size=11, errors=0x0, consumed= + 0x0) at Objects/unicodeobject.c:2531 + #5 0x0000000000446647 in PyUnicodeUCS2_DecodeUTF8 (s=0x5c2b8d "__lltrace__", size=11, errors=0x0) + at Objects/unicodeobject.c:2495 + #6 0x0000000000440d1b in PyUnicodeUCS2_FromStringAndSize (u=0x5c2b8d "__lltrace__", size=11) + at Objects/unicodeobject.c:551 + #7 0x0000000000440d94 in PyUnicodeUCS2_FromString (u=0x5c2b8d "__lltrace__") at Objects/unicodeobject.c:569 + #8 0x0000000000584abd in PyDict_GetItemString (v= + {'Yuck': , '__builtins__': , '__file__': 'Lib/test/crashers/nasty_eq_vs_dict.py', '__package__': None, 'y': , 'dict': {0: 0, 1: 1, 2: 2, 3: 3}, '__cached__': None, '__name__': '__main__', 'z': , '__doc__': None}, key= + 0x5c2b8d "__lltrace__") at Objects/dictobject.c:2171 + +Notice how the dictionary argument to ``PyDict_GetItemString`` is displayed +as its ``repr()``, rather than an opaque ``PyObject *`` pointer. + +The extension works by supplying a custom printing routine for values of type +``PyObject *``. If you need to access lower-level details of an object, then +cast the value to a pointer of the appropriate type. For example:: + + (gdb) p globals + $1 = {'__builtins__': , '__name__': + '__main__', 'ctypes': , '__doc__': None, + '__package__': None} + + (gdb) p *(PyDictObject*)globals + $2 = {ob_refcnt = 3, ob_type = 0x3dbdf85820, ma_fill = 5, ma_used = 5, + ma_mask = 7, ma_table = 0x63d0f8, ma_lookup = 0x3dbdc7ea70 + , ma_smalltable = {{me_hash = 7065186196740147912, + me_key = '__builtins__', me_value = }, + {me_hash = -368181376027291943, me_key = '__name__', + me_value ='__main__'}, {me_hash = 0, me_key = 0x0, me_value = 0x0}, + {me_hash = 0, me_key = 0x0, me_value = 0x0}, + {me_hash = -9177857982131165996, me_key = 'ctypes', + me_value = }, + {me_hash = -8518757509529533123, me_key = '__doc__', me_value = None}, + {me_hash = 0, me_key = 0x0, me_value = 0x0}, { + me_hash = 6614918939584953775, me_key = '__package__', me_value = None}}} + +Note that the pretty-printers do not actually call ``repr()``. +For basic types, they try to match its result closely. + +An area that can be confusing is that the custom printer for some types look a +lot like GDB's built-in printer for standard types. For example, the +pretty-printer for a Python ``int`` (:c:expr:`PyLongObject *`) +gives a representation that is not distinguishable from one of a +regular machine-level integer:: + + (gdb) p some_machine_integer + $3 = 42 + + (gdb) p some_python_integer + $4 = 42 + +The internal structure can be revealed with a cast to :c:expr:`PyLongObject *`: + + (gdb) p *(PyLongObject*)some_python_integer + $5 = {ob_base = {ob_base = {ob_refcnt = 8, ob_type = 0x3dad39f5e0}, ob_size = 1}, + ob_digit = {42}} + +A similar confusion can arise with the ``str`` type, where the output looks a +lot like gdb's built-in printer for ``char *``:: + + (gdb) p ptr_to_python_str + $6 = '__builtins__' + +The pretty-printer for ``str`` instances defaults to using single-quotes (as +does Python's ``repr`` for strings) whereas the standard printer for ``char *`` +values uses double-quotes and contains a hexadecimal address:: + + (gdb) p ptr_to_char_star + $7 = 0x6d72c0 "hello world" + +Again, the implementation details can be revealed with a cast to +:c:expr:`PyUnicodeObject *`:: + + (gdb) p *(PyUnicodeObject*)$6 + $8 = {ob_base = {ob_refcnt = 33, ob_type = 0x3dad3a95a0}, length = 12, + str = 0x7ffff2128500, hash = 7065186196740147912, state = 1, defenc = 0x0} + +``py-list`` +----------- + + The extension adds a ``py-list`` command, which + lists the Python source code (if any) for the current frame in the selected + thread. The current line is marked with a ">":: + + (gdb) py-list + 901 if options.profile: + 902 options.profile = False + 903 profile_me() + 904 return + 905 + >906 u = UI() + 907 if not u.quit: + 908 try: + 909 gtk.main() + 910 except KeyboardInterrupt: + 911 # properly quit on a keyboard interrupt... + + Use ``py-list START`` to list at a different line number within the Python + source, and ``py-list START,END`` to list a specific range of lines within + the Python source. + +``py-up`` and ``py-down`` +------------------------- + + The ``py-up`` and ``py-down`` commands are analogous to GDB's regular ``up`` + and ``down`` commands, but try to move at the level of CPython frames, rather + than C frames. + + GDB is not always able to read the relevant frame information, depending on + the optimization level with which CPython was compiled. Internally, the + commands look for C frames that are executing the default frame evaluation + function (that is, the core bytecode interpreter loop within CPython) and + look up the value of the related ``PyFrameObject *``. + + They emit the frame number (at the C level) within the thread. + + For example:: + + (gdb) py-up + #37 Frame 0x9420b04, for file /usr/lib/python2.6/site-packages/ + gnome_sudoku/main.py, line 906, in start_game () + u = UI() + (gdb) py-up + #40 Frame 0x948e82c, for file /usr/lib/python2.6/site-packages/ + gnome_sudoku/gnome_sudoku.py, line 22, in start_game(main=) + main.start_game() + (gdb) py-up + Unable to find an older python frame + + so we're at the top of the Python stack. + + The frame numbers correspond to those displayed by GDB's standard + ``backtrace`` command. + The command skips C frames which are not executing Python code. + + Going back down:: + + (gdb) py-down + #37 Frame 0x9420b04, for file /usr/lib/python2.6/site-packages/gnome_sudoku/main.py, line 906, in start_game () + u = UI() + (gdb) py-down + #34 (unable to read python frame information) + (gdb) py-down + #23 (unable to read python frame information) + (gdb) py-down + #19 (unable to read python frame information) + (gdb) py-down + #14 Frame 0x99262ac, for file /usr/lib/python2.6/site-packages/gnome_sudoku/game_selector.py, line 201, in run_swallowed_dialog (self=, puzzle=None, saved_games=[{'gsd.auto_fills': 0, 'tracking': {}, 'trackers': {}, 'notes': [], 'saved_at': 1270084485, 'game': '7 8 0 0 0 0 0 5 6 0 0 9 0 8 0 1 0 0 0 4 6 0 0 0 0 7 0 6 5 0 0 0 4 7 9 2 0 0 0 9 0 1 0 0 0 3 9 7 6 0 0 0 1 8 0 6 0 0 0 0 2 8 0 0 0 5 0 4 0 6 0 0 2 1 0 0 0 0 0 4 5\n7 8 0 0 0 0 0 5 6 0 0 9 0 8 0 1 0 0 0 4 6 0 0 0 0 7 0 6 5 1 8 3 4 7 9 2 0 0 0 9 0 1 0 0 0 3 9 7 6 0 0 0 1 8 0 6 0 0 0 0 2 8 0 0 0 5 0 4 0 6 0 0 2 1 0 0 0 0 0 4 5', 'gsd.impossible_hints': 0, 'timer.__absolute_start_time__': , 'gsd.hints': 0, 'timer.active_time': , 'timer.total_time': }], dialog=, saved_game_model=, sudoku_maker=, main_page=0) at remote 0x98fa6e4>, d=) + gtk.main() + (gdb) py-down + #8 (unable to read python frame information) + (gdb) py-down + Unable to find a newer python frame + + and we're at the bottom of the Python stack. + + Note that in Python 3.12 and newer, the same C stack frame can be used for + multiple Python stack frames. This means that ``py-up`` and ``py-down`` + may move multiple Python frames at once. For example:: + + (gdb) py-up + #6 Frame 0x7ffff7fb62b0, for file /tmp/rec.py, line 5, in recursive_function (n=0) + time.sleep(5) + #6 Frame 0x7ffff7fb6240, for file /tmp/rec.py, line 7, in recursive_function (n=1) + recursive_function(n-1) + #6 Frame 0x7ffff7fb61d0, for file /tmp/rec.py, line 7, in recursive_function (n=2) + recursive_function(n-1) + #6 Frame 0x7ffff7fb6160, for file /tmp/rec.py, line 7, in recursive_function (n=3) + recursive_function(n-1) + #6 Frame 0x7ffff7fb60f0, for file /tmp/rec.py, line 7, in recursive_function (n=4) + recursive_function(n-1) + #6 Frame 0x7ffff7fb6080, for file /tmp/rec.py, line 7, in recursive_function (n=5) + recursive_function(n-1) + #6 Frame 0x7ffff7fb6020, for file /tmp/rec.py, line 9, in () + recursive_function(5) + (gdb) py-up + Unable to find an older python frame + + +``py-bt`` +--------- + + The ``py-bt`` command attempts to display a Python-level backtrace of the + current thread. + + For example:: + + (gdb) py-bt + #8 (unable to read python frame information) + #11 Frame 0x9aead74, for file /usr/lib/python2.6/site-packages/gnome_sudoku/dialog_swallower.py, line 48, in run_dialog (self=, main_page=0) at remote 0x98fa6e4>, d=) + gtk.main() + #14 Frame 0x99262ac, for file /usr/lib/python2.6/site-packages/gnome_sudoku/game_selector.py, line 201, in run_swallowed_dialog (self=, puzzle=None, saved_games=[{'gsd.auto_fills': 0, 'tracking': {}, 'trackers': {}, 'notes': [], 'saved_at': 1270084485, 'game': '7 8 0 0 0 0 0 5 6 0 0 9 0 8 0 1 0 0 0 4 6 0 0 0 0 7 0 6 5 0 0 0 4 7 9 2 0 0 0 9 0 1 0 0 0 3 9 7 6 0 0 0 1 8 0 6 0 0 0 0 2 8 0 0 0 5 0 4 0 6 0 0 2 1 0 0 0 0 0 4 5\n7 8 0 0 0 0 0 5 6 0 0 9 0 8 0 1 0 0 0 4 6 0 0 0 0 7 0 6 5 1 8 3 4 7 9 2 0 0 0 9 0 1 0 0 0 3 9 7 6 0 0 0 1 8 0 6 0 0 0 0 2 8 0 0 0 5 0 4 0 6 0 0 2 1 0 0 0 0 0 4 5', 'gsd.impossible_hints': 0, 'timer.__absolute_start_time__': , 'gsd.hints': 0, 'timer.active_time': , 'timer.total_time': }], dialog=, saved_game_model=, sudoku_maker=) + main.start_game() + + The frame numbers correspond to those displayed by GDB's standard + ``backtrace`` command. + +``py-print`` +------------ + + The ``py-print`` command looks up a Python name and tries to print it. + It looks in locals within the current thread, then globals, then finally + builtins:: + + (gdb) py-print self + local 'self' = , + main_page=0) at remote 0x98fa6e4> + (gdb) py-print __name__ + global '__name__' = 'gnome_sudoku.dialog_swallower' + (gdb) py-print len + builtin 'len' = + (gdb) py-print scarlet_pimpernel + 'scarlet_pimpernel' not found + + If the current C frame corresponds to multiple Python frames, ``py-print`` + only considers the first one. + +``py-locals`` +------------- + + The ``py-locals`` command looks up all Python locals within the current + Python frame in the selected thread, and prints their representations:: + + (gdb) py-locals + self = , + main_page=0) at remote 0x98fa6e4> + d = + + If the current C frame corresponds to multiple Python frames, locals from + all of them will be shown:: + + (gdb) py-locals + Locals for recursive_function + n = 0 + Locals for recursive_function + n = 1 + Locals for recursive_function + n = 2 + Locals for recursive_function + n = 3 + Locals for recursive_function + n = 4 + Locals for recursive_function + n = 5 + Locals for + + +Use with GDB commands +===================== + +The extension commands complement GDB's built-in commands. +For example, you can use a frame numbers shown by ``py-bt`` with the ``frame`` +command to go a specific frame within the selected thread, like this:: + + (gdb) py-bt + (output snipped) + #68 Frame 0xaa4560, for file Lib/test/regrtest.py, line 1548, in () + main() + (gdb) frame 68 + #68 0x00000000004cd1e6 in PyEval_EvalFrameEx (f=Frame 0xaa4560, for file Lib/test/regrtest.py, line 1548, in (), throwflag=0) at Python/ceval.c:2665 + 2665 x = call_function(&sp, oparg); + (gdb) py-list + 1543 # Run the tests in a context manager that temporary changes the CWD to a + 1544 # temporary and writable directory. If it's not possible to create or + 1545 # change the CWD, the original CWD will be used. The original CWD is + 1546 # available from test_support.SAVEDCWD. + 1547 with test_support.temp_cwd(TESTCWD, quiet=True): + >1548 main() + +The ``info threads`` command will give you a list of the threads within the +process, and you can use the ``thread`` command to select a different one:: + + (gdb) info threads + 105 Thread 0x7fffefa18710 (LWP 10260) sem_wait () at ../nptl/sysdeps/unix/sysv/linux/x86_64/sem_wait.S:86 + 104 Thread 0x7fffdf5fe710 (LWP 10259) sem_wait () at ../nptl/sysdeps/unix/sysv/linux/x86_64/sem_wait.S:86 + * 1 Thread 0x7ffff7fe2700 (LWP 10145) 0x00000038e46d73e3 in select () at ../sysdeps/unix/syscall-template.S:82 + +You can use ``thread apply all COMMAND`` or (``t a a COMMAND`` for short) to run +a command on all threads. With ``py-bt``, this lets you see what every +thread is doing at the Python level:: + + (gdb) t a a py-bt + + Thread 105 (Thread 0x7fffefa18710 (LWP 10260)): + #5 Frame 0x7fffd00019d0, for file /home/david/coding/python-svn/Lib/threading.py, line 155, in _acquire_restore (self=<_RLock(_Verbose__verbose=False, _RLock__owner=140737354016512, _RLock__block=, _RLock__count=1) at remote 0xd7ff40>, count_owner=(1, 140737213728528), count=1, owner=140737213728528) + self.__block.acquire() + #8 Frame 0x7fffac001640, for file /home/david/coding/python-svn/Lib/threading.py, line 269, in wait (self=<_Condition(_Condition__lock=<_RLock(_Verbose__verbose=False, _RLock__owner=140737354016512, _RLock__block=, _RLock__count=1) at remote 0xd7ff40>, acquire=, _is_owned=, _release_save=, release=, _acquire_restore=, _Verbose__verbose=False, _Condition__waiters=[]) at remote 0xd7fd10>, timeout=None, waiter=, saved_state=(1, 140737213728528)) + self._acquire_restore(saved_state) + #12 Frame 0x7fffb8001a10, for file /home/david/coding/python-svn/Lib/test/lock_tests.py, line 348, in f () + cond.wait() + #16 Frame 0x7fffb8001c40, for file /home/david/coding/python-svn/Lib/test/lock_tests.py, line 37, in task (tid=140737213728528) + f() + + Thread 104 (Thread 0x7fffdf5fe710 (LWP 10259)): + #5 Frame 0x7fffe4001580, for file /home/david/coding/python-svn/Lib/threading.py, line 155, in _acquire_restore (self=<_RLock(_Verbose__verbose=False, _RLock__owner=140737354016512, _RLock__block=, _RLock__count=1) at remote 0xd7ff40>, count_owner=(1, 140736940992272), count=1, owner=140736940992272) + self.__block.acquire() + #8 Frame 0x7fffc8002090, for file /home/david/coding/python-svn/Lib/threading.py, line 269, in wait (self=<_Condition(_Condition__lock=<_RLock(_Verbose__verbose=False, _RLock__owner=140737354016512, _RLock__block=, _RLock__count=1) at remote 0xd7ff40>, acquire=, _is_owned=, _release_save=, release=, _acquire_restore=, _Verbose__verbose=False, _Condition__waiters=[]) at remote 0xd7fd10>, timeout=None, waiter=, saved_state=(1, 140736940992272)) + self._acquire_restore(saved_state) + #12 Frame 0x7fffac001c90, for file /home/david/coding/python-svn/Lib/test/lock_tests.py, line 348, in f () + cond.wait() + #16 Frame 0x7fffac0011c0, for file /home/david/coding/python-svn/Lib/test/lock_tests.py, line 37, in task (tid=140736940992272) + f() + + Thread 1 (Thread 0x7ffff7fe2700 (LWP 10145)): + #5 Frame 0xcb5380, for file /home/david/coding/python-svn/Lib/test/lock_tests.py, line 16, in _wait () + time.sleep(0.01) + #8 Frame 0x7fffd00024a0, for file /home/david/coding/python-svn/Lib/test/lock_tests.py, line 378, in _check_notify (self=, skipped=[], _mirrorOutput=False, testsRun=39, buffer=False, _original_stderr=, _stdout_buffer=, _stderr_buffer=, _moduleSetUpFailed=False, expectedFailures=[], errors=[], _previousTestClass=, unexpectedSuccesses=[], failures=[], shouldStop=False, failfast=False) at remote 0xc185a0>, _threads=(0,), _cleanups=[], _type_equality_funcs={: , : , : , : , `: + +.. code-block:: none + + checking for --with-dtrace... yes + +On macOS, you can list available DTrace probes by running a Python +process in the background and listing all probes made available by the +Python provider:: + + $ python3.6 -q & + $ sudo dtrace -l -P python$! # or: dtrace -l -m python3.6 + + ID PROVIDER MODULE FUNCTION NAME + 29564 python18035 python3.6 _PyEval_EvalFrameDefault function-entry + 29565 python18035 python3.6 dtrace_function_entry function-entry + 29566 python18035 python3.6 _PyEval_EvalFrameDefault function-return + 29567 python18035 python3.6 dtrace_function_return function-return + 29568 python18035 python3.6 collect gc-done + 29569 python18035 python3.6 collect gc-start + 29570 python18035 python3.6 _PyEval_EvalFrameDefault line + 29571 python18035 python3.6 maybe_dtrace_line line + +On Linux, you can verify if the SystemTap static markers are present in +the built binary by seeing if it contains a ".note.stapsdt" section. + +:: + + $ readelf -S ./python | grep .note.stapsdt + [30] .note.stapsdt NOTE 0000000000000000 00308d78 + +If you've built Python as a shared library +(with the :option:`--enable-shared` configure option), you +need to look instead within the shared library. For example:: + + $ readelf -S libpython3.3dm.so.1.0 | grep .note.stapsdt + [29] .note.stapsdt NOTE 0000000000000000 00365b68 + +Sufficiently modern readelf can print the metadata:: + + $ readelf -n ./python + + Displaying notes found at file offset 0x00000254 with length 0x00000020: + Owner Data size Description + GNU 0x00000010 NT_GNU_ABI_TAG (ABI version tag) + OS: Linux, ABI: 2.6.32 + + Displaying notes found at file offset 0x00000274 with length 0x00000024: + Owner Data size Description + GNU 0x00000014 NT_GNU_BUILD_ID (unique build ID bitstring) + Build ID: df924a2b08a7e89f6e11251d4602022977af2670 + + Displaying notes found at file offset 0x002d6c30 with length 0x00000144: + Owner Data size Description + stapsdt 0x00000031 NT_STAPSDT (SystemTap probe descriptors) + Provider: python + Name: gc__start + Location: 0x00000000004371c3, Base: 0x0000000000630ce2, Semaphore: 0x00000000008d6bf6 + Arguments: -4@%ebx + stapsdt 0x00000030 NT_STAPSDT (SystemTap probe descriptors) + Provider: python + Name: gc__done + Location: 0x00000000004374e1, Base: 0x0000000000630ce2, Semaphore: 0x00000000008d6bf8 + Arguments: -8@%rax + stapsdt 0x00000045 NT_STAPSDT (SystemTap probe descriptors) + Provider: python + Name: function__entry + Location: 0x000000000053db6c, Base: 0x0000000000630ce2, Semaphore: 0x00000000008d6be8 + Arguments: 8@%rbp 8@%r12 -4@%eax + stapsdt 0x00000046 NT_STAPSDT (SystemTap probe descriptors) + Provider: python + Name: function__return + Location: 0x000000000053dba8, Base: 0x0000000000630ce2, Semaphore: 0x00000000008d6bea + Arguments: 8@%rbp 8@%r12 -4@%eax + +The above metadata contains information for SystemTap describing how it +can patch strategically placed machine code instructions to enable the +tracing hooks used by a SystemTap script. + + +Static DTrace probes +-------------------- + +The following example DTrace script can be used to show the call/return +hierarchy of a Python script, only tracing within the invocation of +a function called "start". In other words, import-time function +invocations are not going to be listed: + +.. code-block:: none + + self int indent; + + python$target:::function-entry + /copyinstr(arg1) == "start"/ + { + self->trace = 1; + } + + python$target:::function-entry + /self->trace/ + { + printf("%d\t%*s:", timestamp, 15, probename); + printf("%*s", self->indent, ""); + printf("%s:%s:%d\n", basename(copyinstr(arg0)), copyinstr(arg1), arg2); + self->indent++; + } + + python$target:::function-return + /self->trace/ + { + self->indent--; + printf("%d\t%*s:", timestamp, 15, probename); + printf("%*s", self->indent, ""); + printf("%s:%s:%d\n", basename(copyinstr(arg0)), copyinstr(arg1), arg2); + } + + python$target:::function-return + /copyinstr(arg1) == "start"/ + { + self->trace = 0; + } + +It can be invoked like this:: + + $ sudo dtrace -q -s call_stack.d -c "python3.6 script.py" + +The output looks like this: + +.. code-block:: none + + 156641360502280 function-entry:call_stack.py:start:23 + 156641360518804 function-entry: call_stack.py:function_1:1 + 156641360532797 function-entry: call_stack.py:function_3:9 + 156641360546807 function-return: call_stack.py:function_3:10 + 156641360563367 function-return: call_stack.py:function_1:2 + 156641360578365 function-entry: call_stack.py:function_2:5 + 156641360591757 function-entry: call_stack.py:function_1:1 + 156641360605556 function-entry: call_stack.py:function_3:9 + 156641360617482 function-return: call_stack.py:function_3:10 + 156641360629814 function-return: call_stack.py:function_1:2 + 156641360642285 function-return: call_stack.py:function_2:6 + 156641360656770 function-entry: call_stack.py:function_3:9 + 156641360669707 function-return: call_stack.py:function_3:10 + 156641360687853 function-entry: call_stack.py:function_4:13 + 156641360700719 function-return: call_stack.py:function_4:14 + 156641360719640 function-entry: call_stack.py:function_5:18 + 156641360732567 function-return: call_stack.py:function_5:21 + 156641360747370 function-return:call_stack.py:start:28 + + +Static SystemTap markers +------------------------ + +The low-level way to use the SystemTap integration is to use the static +markers directly. This requires you to explicitly state the binary file +containing them. + +For example, this SystemTap script can be used to show the call/return +hierarchy of a Python script: + +.. code-block:: none + + probe process("python").mark("function__entry") { + filename = user_string($arg1); + funcname = user_string($arg2); + lineno = $arg3; + + printf("%s => %s in %s:%d\\n", + thread_indent(1), funcname, filename, lineno); + } + + probe process("python").mark("function__return") { + filename = user_string($arg1); + funcname = user_string($arg2); + lineno = $arg3; + + printf("%s <= %s in %s:%d\\n", + thread_indent(-1), funcname, filename, lineno); + } + +It can be invoked like this:: + + $ stap \ + show-call-hierarchy.stp \ + -c "./python test.py" + +The output looks like this: + +.. code-block:: none + + 11408 python(8274): => __contains__ in Lib/_abcoll.py:362 + 11414 python(8274): => __getitem__ in Lib/os.py:425 + 11418 python(8274): => encode in Lib/os.py:490 + 11424 python(8274): <= encode in Lib/os.py:493 + 11428 python(8274): <= __getitem__ in Lib/os.py:426 + 11433 python(8274): <= __contains__ in Lib/_abcoll.py:366 + +where the columns are: + +- time in microseconds since start of script +- name of executable +- PID of process + +and the remainder indicates the call/return hierarchy as the script executes. + +For a :option:`--enable-shared` build of CPython, the markers are contained within the +libpython shared library, and the probe's dotted path needs to reflect this. For +example, this line from the above example: + +.. code-block:: none + + probe process("python").mark("function__entry") { + +should instead read: + +.. code-block:: none + + probe process("python").library("libpython3.6dm.so.1.0").mark("function__entry") { + +(assuming a :ref:`debug build ` of CPython 3.6) + + +Available static markers +------------------------ + +.. object:: function__entry(str filename, str funcname, int lineno) + + This marker indicates that execution of a Python function has begun. + It is only triggered for pure-Python (bytecode) functions. + + The filename, function name, and line number are provided back to the + tracing script as positional arguments, which must be accessed using + ``$arg1``, ``$arg2``, ``$arg3``: + + * ``$arg1`` : ``(const char *)`` filename, accessible using ``user_string($arg1)`` + + * ``$arg2`` : ``(const char *)`` function name, accessible using + ``user_string($arg2)`` + + * ``$arg3`` : ``int`` line number + +.. object:: function__return(str filename, str funcname, int lineno) + + This marker is the converse of :c:func:`!function__entry`, and indicates that + execution of a Python function has ended (either via ``return``, or via an + exception). It is only triggered for pure-Python (bytecode) functions. + + The arguments are the same as for :c:func:`!function__entry` + +.. object:: line(str filename, str funcname, int lineno) + + This marker indicates a Python line is about to be executed. It is + the equivalent of line-by-line tracing with a Python profiler. It is + not triggered within C functions. + + The arguments are the same as for :c:func:`!function__entry`. + +.. object:: gc__start(int generation) + + Fires when the Python interpreter starts a garbage collection cycle. + ``arg0`` is the generation to scan, like :func:`gc.collect`. + +.. object:: gc__done(long collected) + + Fires when the Python interpreter finishes a garbage collection + cycle. ``arg0`` is the number of collected objects. + +.. object:: import__find__load__start(str modulename) + + Fires before :mod:`importlib` attempts to find and load the module. + ``arg0`` is the module name. + + .. versionadded:: 3.7 + +.. object:: import__find__load__done(str modulename, int found) + + Fires after :mod:`importlib`'s find_and_load function is called. + ``arg0`` is the module name, ``arg1`` indicates if module was + successfully loaded. + + .. versionadded:: 3.7 + + +.. object:: audit(str event, void *tuple) + + Fires when :func:`sys.audit` or :c:func:`PySys_Audit` is called. + ``arg0`` is the event name as C string, ``arg1`` is a :c:type:`PyObject` + pointer to a tuple object. + + .. versionadded:: 3.8 + + +SystemTap Tapsets +----------------- + +The higher-level way to use the SystemTap integration is to use a "tapset": +SystemTap's equivalent of a library, which hides some of the lower-level +details of the static markers. + +Here is a tapset file, based on a non-shared build of CPython: + +.. code-block:: none + + /* + Provide a higher-level wrapping around the function__entry and + function__return markers: + \*/ + probe python.function.entry = process("python").mark("function__entry") + { + filename = user_string($arg1); + funcname = user_string($arg2); + lineno = $arg3; + frameptr = $arg4 + } + probe python.function.return = process("python").mark("function__return") + { + filename = user_string($arg1); + funcname = user_string($arg2); + lineno = $arg3; + frameptr = $arg4 + } + +If this file is installed in SystemTap's tapset directory (e.g. +``/usr/share/systemtap/tapset``), then these additional probepoints become +available: + +.. object:: python.function.entry(str filename, str funcname, int lineno, frameptr) + + This probe point indicates that execution of a Python function has begun. + It is only triggered for pure-Python (bytecode) functions. + +.. object:: python.function.return(str filename, str funcname, int lineno, frameptr) + + This probe point is the converse of ``python.function.return``, and + indicates that execution of a Python function has ended (either via + ``return``, or via an exception). It is only triggered for pure-Python + (bytecode) functions. + + +Examples +-------- +This SystemTap script uses the tapset above to more cleanly implement the +example given above of tracing the Python function-call hierarchy, without +needing to directly name the static markers: + +.. code-block:: none + + probe python.function.entry + { + printf("%s => %s in %s:%d\n", + thread_indent(1), funcname, filename, lineno); + } + + probe python.function.return + { + printf("%s <= %s in %s:%d\n", + thread_indent(-1), funcname, filename, lineno); + } + + +The following script uses the tapset above to provide a top-like view of all +running CPython code, showing the top 20 most frequently entered bytecode +frames, each second, across the whole system: + +.. code-block:: none + + global fn_calls; + + probe python.function.entry + { + fn_calls[pid(), filename, funcname, lineno] += 1; + } + + probe timer.ms(1000) { + printf("\033[2J\033[1;1H") /* clear screen \*/ + printf("%6s %80s %6s %30s %6s\n", + "PID", "FILENAME", "LINE", "FUNCTION", "CALLS") + foreach ([pid, filename, funcname, lineno] in fn_calls- limit 20) { + printf("%6d %80s %6d %30s %6d\n", + pid, filename, lineno, funcname, + fn_calls[pid, filename, funcname, lineno]); + } + delete fn_calls; + } + diff --git a/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/howto/ipaddress.rst b/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/howto/ipaddress.rst new file mode 100644 index 00000000..e852db98 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/howto/ipaddress.rst @@ -0,0 +1,340 @@ +.. testsetup:: + + import ipaddress + +.. _ipaddress-howto: + +*************************************** +An introduction to the ipaddress module +*************************************** + +:author: Peter Moody +:author: Nick Coghlan + +.. topic:: Overview + + This document aims to provide a gentle introduction to the + :mod:`ipaddress` module. It is aimed primarily at users that aren't + already familiar with IP networking terminology, but may also be useful + to network engineers wanting an overview of how :mod:`ipaddress` + represents IP network addressing concepts. + + +Creating Address/Network/Interface objects +========================================== + +Since :mod:`ipaddress` is a module for inspecting and manipulating IP addresses, +the first thing you'll want to do is create some objects. You can use +:mod:`ipaddress` to create objects from strings and integers. + + +A Note on IP Versions +--------------------- + +For readers that aren't particularly familiar with IP addressing, it's +important to know that the Internet Protocol (IP) is currently in the process +of moving from version 4 of the protocol to version 6. This transition is +occurring largely because version 4 of the protocol doesn't provide enough +addresses to handle the needs of the whole world, especially given the +increasing number of devices with direct connections to the internet. + +Explaining the details of the differences between the two versions of the +protocol is beyond the scope of this introduction, but readers need to at +least be aware that these two versions exist, and it will sometimes be +necessary to force the use of one version or the other. + + +IP Host Addresses +----------------- + +Addresses, often referred to as "host addresses" are the most basic unit +when working with IP addressing. The simplest way to create addresses is +to use the :func:`ipaddress.ip_address` factory function, which automatically +determines whether to create an IPv4 or IPv6 address based on the passed in +value: + + >>> ipaddress.ip_address('192.0.2.1') + IPv4Address('192.0.2.1') + >>> ipaddress.ip_address('2001:DB8::1') + IPv6Address('2001:db8::1') + +Addresses can also be created directly from integers. Values that will +fit within 32 bits are assumed to be IPv4 addresses:: + + >>> ipaddress.ip_address(3221225985) + IPv4Address('192.0.2.1') + >>> ipaddress.ip_address(42540766411282592856903984951653826561) + IPv6Address('2001:db8::1') + +To force the use of IPv4 or IPv6 addresses, the relevant classes can be +invoked directly. This is particularly useful to force creation of IPv6 +addresses for small integers:: + + >>> ipaddress.ip_address(1) + IPv4Address('0.0.0.1') + >>> ipaddress.IPv4Address(1) + IPv4Address('0.0.0.1') + >>> ipaddress.IPv6Address(1) + IPv6Address('::1') + + +Defining Networks +----------------- + +Host addresses are usually grouped together into IP networks, so +:mod:`ipaddress` provides a way to create, inspect and manipulate network +definitions. IP network objects are constructed from strings that define the +range of host addresses that are part of that network. The simplest form +for that information is a "network address/network prefix" pair, where the +prefix defines the number of leading bits that are compared to determine +whether or not an address is part of the network and the network address +defines the expected value of those bits. + +As for addresses, a factory function is provided that determines the correct +IP version automatically:: + + >>> ipaddress.ip_network('192.0.2.0/24') + IPv4Network('192.0.2.0/24') + >>> ipaddress.ip_network('2001:db8::0/96') + IPv6Network('2001:db8::/96') + +Network objects cannot have any host bits set. The practical effect of this +is that ``192.0.2.1/24`` does not describe a network. Such definitions are +referred to as interface objects since the ip-on-a-network notation is +commonly used to describe network interfaces of a computer on a given network +and are described further in the next section. + +By default, attempting to create a network object with host bits set will +result in :exc:`ValueError` being raised. To request that the +additional bits instead be coerced to zero, the flag ``strict=False`` can +be passed to the constructor:: + + >>> ipaddress.ip_network('192.0.2.1/24') + Traceback (most recent call last): + ... + ValueError: 192.0.2.1/24 has host bits set + >>> ipaddress.ip_network('192.0.2.1/24', strict=False) + IPv4Network('192.0.2.0/24') + +While the string form offers significantly more flexibility, networks can +also be defined with integers, just like host addresses. In this case, the +network is considered to contain only the single address identified by the +integer, so the network prefix includes the entire network address:: + + >>> ipaddress.ip_network(3221225984) + IPv4Network('192.0.2.0/32') + >>> ipaddress.ip_network(42540766411282592856903984951653826560) + IPv6Network('2001:db8::/128') + +As with addresses, creation of a particular kind of network can be forced +by calling the class constructor directly instead of using the factory +function. + + +Host Interfaces +--------------- + +As mentioned just above, if you need to describe an address on a particular +network, neither the address nor the network classes are sufficient. +Notation like ``192.0.2.1/24`` is commonly used by network engineers and the +people who write tools for firewalls and routers as shorthand for "the host +``192.0.2.1`` on the network ``192.0.2.0/24``", Accordingly, :mod:`ipaddress` +provides a set of hybrid classes that associate an address with a particular +network. The interface for creation is identical to that for defining network +objects, except that the address portion isn't constrained to being a network +address. + + >>> ipaddress.ip_interface('192.0.2.1/24') + IPv4Interface('192.0.2.1/24') + >>> ipaddress.ip_interface('2001:db8::1/96') + IPv6Interface('2001:db8::1/96') + +Integer inputs are accepted (as with networks), and use of a particular IP +version can be forced by calling the relevant constructor directly. + + +Inspecting Address/Network/Interface Objects +============================================ + +You've gone to the trouble of creating an IPv(4|6)(Address|Network|Interface) +object, so you probably want to get information about it. :mod:`ipaddress` +tries to make doing this easy and intuitive. + +Extracting the IP version:: + + >>> addr4 = ipaddress.ip_address('192.0.2.1') + >>> addr6 = ipaddress.ip_address('2001:db8::1') + >>> addr6.version + 6 + >>> addr4.version + 4 + +Obtaining the network from an interface:: + + >>> host4 = ipaddress.ip_interface('192.0.2.1/24') + >>> host4.network + IPv4Network('192.0.2.0/24') + >>> host6 = ipaddress.ip_interface('2001:db8::1/96') + >>> host6.network + IPv6Network('2001:db8::/96') + +Finding out how many individual addresses are in a network:: + + >>> net4 = ipaddress.ip_network('192.0.2.0/24') + >>> net4.num_addresses + 256 + >>> net6 = ipaddress.ip_network('2001:db8::0/96') + >>> net6.num_addresses + 4294967296 + +Iterating through the "usable" addresses on a network:: + + >>> net4 = ipaddress.ip_network('192.0.2.0/24') + >>> for x in net4.hosts(): + ... print(x) # doctest: +ELLIPSIS + 192.0.2.1 + 192.0.2.2 + 192.0.2.3 + 192.0.2.4 + ... + 192.0.2.252 + 192.0.2.253 + 192.0.2.254 + + +Obtaining the netmask (i.e. set bits corresponding to the network prefix) or +the hostmask (any bits that are not part of the netmask): + + >>> net4 = ipaddress.ip_network('192.0.2.0/24') + >>> net4.netmask + IPv4Address('255.255.255.0') + >>> net4.hostmask + IPv4Address('0.0.0.255') + >>> net6 = ipaddress.ip_network('2001:db8::0/96') + >>> net6.netmask + IPv6Address('ffff:ffff:ffff:ffff:ffff:ffff::') + >>> net6.hostmask + IPv6Address('::ffff:ffff') + + +Exploding or compressing the address:: + + >>> addr6.exploded + '2001:0db8:0000:0000:0000:0000:0000:0001' + >>> addr6.compressed + '2001:db8::1' + >>> net6.exploded + '2001:0db8:0000:0000:0000:0000:0000:0000/96' + >>> net6.compressed + '2001:db8::/96' + +While IPv4 doesn't support explosion or compression, the associated objects +still provide the relevant properties so that version neutral code can +easily ensure the most concise or most verbose form is used for IPv6 +addresses while still correctly handling IPv4 addresses. + + +Networks as lists of Addresses +============================== + +It's sometimes useful to treat networks as lists. This means it is possible +to index them like this:: + + >>> net4[1] + IPv4Address('192.0.2.1') + >>> net4[-1] + IPv4Address('192.0.2.255') + >>> net6[1] + IPv6Address('2001:db8::1') + >>> net6[-1] + IPv6Address('2001:db8::ffff:ffff') + + +It also means that network objects lend themselves to using the list +membership test syntax like this:: + + if address in network: + # do something + +Containment testing is done efficiently based on the network prefix:: + + >>> addr4 = ipaddress.ip_address('192.0.2.1') + >>> addr4 in ipaddress.ip_network('192.0.2.0/24') + True + >>> addr4 in ipaddress.ip_network('192.0.3.0/24') + False + + +Comparisons +=========== + +:mod:`ipaddress` provides some simple, hopefully intuitive ways to compare +objects, where it makes sense:: + + >>> ipaddress.ip_address('192.0.2.1') < ipaddress.ip_address('192.0.2.2') + True + +A :exc:`TypeError` exception is raised if you try to compare objects of +different versions or different types. + + +Using IP Addresses with other modules +===================================== + +Other modules that use IP addresses (such as :mod:`socket`) usually won't +accept objects from this module directly. Instead, they must be coerced to +an integer or string that the other module will accept:: + + >>> addr4 = ipaddress.ip_address('192.0.2.1') + >>> str(addr4) + '192.0.2.1' + >>> int(addr4) + 3221225985 + + +Getting more detail when instance creation fails +================================================ + +When creating address/network/interface objects using the version-agnostic +factory functions, any errors will be reported as :exc:`ValueError` with +a generic error message that simply says the passed in value was not +recognized as an object of that type. The lack of a specific error is +because it's necessary to know whether the value is *supposed* to be IPv4 +or IPv6 in order to provide more detail on why it has been rejected. + +To support use cases where it is useful to have access to this additional +detail, the individual class constructors actually raise the +:exc:`ValueError` subclasses :exc:`ipaddress.AddressValueError` and +:exc:`ipaddress.NetmaskValueError` to indicate exactly which part of +the definition failed to parse correctly. + +The error messages are significantly more detailed when using the +class constructors directly. For example:: + + >>> ipaddress.ip_address("192.168.0.256") + Traceback (most recent call last): + ... + ValueError: '192.168.0.256' does not appear to be an IPv4 or IPv6 address + >>> ipaddress.IPv4Address("192.168.0.256") + Traceback (most recent call last): + ... + ipaddress.AddressValueError: Octet 256 (> 255) not permitted in '192.168.0.256' + + >>> ipaddress.ip_network("192.168.0.1/64") + Traceback (most recent call last): + ... + ValueError: '192.168.0.1/64' does not appear to be an IPv4 or IPv6 network + >>> ipaddress.IPv4Network("192.168.0.1/64") + Traceback (most recent call last): + ... + ipaddress.NetmaskValueError: '64' is not a valid netmask + +However, both of the module specific exceptions have :exc:`ValueError` as their +parent class, so if you're not concerned with the particular type of error, +you can still write code like the following:: + + try: + network = ipaddress.IPv4Network(address) + except ValueError: + print('address/netmask is invalid for IPv4:', address) + diff --git a/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/howto/isolating-extensions.rst b/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/howto/isolating-extensions.rst new file mode 100644 index 00000000..a636e06b --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/howto/isolating-extensions.rst @@ -0,0 +1,629 @@ +.. highlight:: c + +.. _isolating-extensions-howto: + +*************************** +Isolating Extension Modules +*************************** + +.. topic:: Abstract + + Traditionally, state belonging to Python extension modules was kept in C + ``static`` variables, which have process-wide scope. This document + describes problems of such per-process state and shows a safer way: + per-module state. + + The document also describes how to switch to per-module state where + possible. This transition involves allocating space for that state, potentially + switching from static types to heap types, and—perhaps most + importantly—accessing per-module state from code. + + +Who should read this +==================== + +This guide is written for maintainers of :ref:`C-API ` extensions +who would like to make that extension safer to use in applications where +Python itself is used as a library. + + +Background +========== + +An *interpreter* is the context in which Python code runs. It contains +configuration (e.g. the import path) and runtime state (e.g. the set of +imported modules). + +Python supports running multiple interpreters in one process. There are +two cases to think about—users may run interpreters: + +- in sequence, with several :c:func:`Py_InitializeEx`/:c:func:`Py_FinalizeEx` + cycles, and +- in parallel, managing "sub-interpreters" using + :c:func:`Py_NewInterpreter`/:c:func:`Py_EndInterpreter`. + +Both cases (and combinations of them) would be most useful when +embedding Python within a library. Libraries generally shouldn't make +assumptions about the application that uses them, which include +assuming a process-wide "main Python interpreter". + +Historically, Python extension modules don't handle this use case well. +Many extension modules (and even some stdlib modules) use *per-process* +global state, because C ``static`` variables are extremely easy to use. +Thus, data that should be specific to an interpreter ends up being shared +between interpreters. Unless the extension developer is careful, it is very +easy to introduce edge cases that lead to crashes when a module is loaded in +more than one interpreter in the same process. + +Unfortunately, *per-interpreter* state is not easy to achieve. Extension +authors tend to not keep multiple interpreters in mind when developing, +and it is currently cumbersome to test the behavior. + +Enter Per-Module State +---------------------- + +Instead of focusing on per-interpreter state, Python's C API is evolving +to better support the more granular *per-module* state. +This means that C-level data should be attached to a *module object*. +Each interpreter creates its own module object, keeping the data separate. +For testing the isolation, multiple module objects corresponding to a single +extension can even be loaded in a single interpreter. + +Per-module state provides an easy way to think about lifetime and +resource ownership: the extension module will initialize when a +module object is created, and clean up when it's freed. In this regard, +a module is just like any other :c:expr:`PyObject *`; there are no "on +interpreter shutdown" hooks to think—or forget—about. + +Note that there are use cases for different kinds of "globals": +per-process, per-interpreter, per-thread or per-task state. +With per-module state as the default, these are still possible, +but you should treat them as exceptional cases: +if you need them, you should give them additional care and testing. +(Note that this guide does not cover them.) + + +Isolated Module Objects +----------------------- + +The key point to keep in mind when developing an extension module is +that several module objects can be created from a single shared library. +For example: + +.. code-block:: pycon + + >>> import sys + >>> import binascii + >>> old_binascii = binascii + >>> del sys.modules['binascii'] + >>> import binascii # create a new module object + >>> old_binascii == binascii + False + +As a rule of thumb, the two modules should be completely independent. +All objects and state specific to the module should be encapsulated +within the module object, not shared with other module objects, and +cleaned up when the module object is deallocated. +Since this just is a rule of thumb, exceptions are possible +(see `Managing Global State`_), but they will need more +thought and attention to edge cases. + +While some modules could do with less stringent restrictions, isolated +modules make it easier to set clear expectations and guidelines that +work across a variety of use cases. + + +Surprising Edge Cases +--------------------- + +Note that isolated modules do create some surprising edge cases. Most +notably, each module object will typically not share its classes and +exceptions with other similar modules. Continuing from the +`example above `__, +note that ``old_binascii.Error`` and ``binascii.Error`` are +separate objects. In the following code, the exception is *not* caught: + +.. code-block:: pycon + + >>> old_binascii.Error == binascii.Error + False + >>> try: + ... old_binascii.unhexlify(b'qwertyuiop') + ... except binascii.Error: + ... print('boo') + ... + Traceback (most recent call last): + File "", line 2, in + binascii.Error: Non-hexadecimal digit found + +This is expected. Notice that pure-Python modules behave the same way: +it is a part of how Python works. + +The goal is to make extension modules safe at the C level, not to make +hacks behave intuitively. Mutating ``sys.modules`` "manually" counts +as a hack. + + +Making Modules Safe with Multiple Interpreters +============================================== + + +Managing Global State +--------------------- + +Sometimes, the state associated with a Python module is not specific to that module, but +to the entire process (or something else "more global" than a module). +For example: + +- The ``readline`` module manages *the* terminal. +- A module running on a circuit board wants to control *the* on-board + LED. + +In these cases, the Python module should provide *access* to the global +state, rather than *own* it. If possible, write the module so that +multiple copies of it can access the state independently (along with +other libraries, whether for Python or other languages). If that is not +possible, consider explicit locking. + +If it is necessary to use process-global state, the simplest way to +avoid issues with multiple interpreters is to explicitly prevent a +module from being loaded more than once per process—see +`Opt-Out: Limiting to One Module Object per Process`_. + + +Managing Per-Module State +------------------------- + +To use per-module state, use +:ref:`multi-phase extension module initialization `. +This signals that your module supports multiple interpreters correctly. + +Set ``PyModuleDef.m_size`` to a positive number to request that many +bytes of storage local to the module. Usually, this will be set to the +size of some module-specific ``struct``, which can store all of the +module's C-level state. In particular, it is where you should put +pointers to classes (including exceptions, but excluding static types) +and settings (e.g. ``csv``'s :py:data:`~csv.field_size_limit`) +which the C code needs to function. + +.. note:: + Another option is to store state in the module's ``__dict__``, + but you must avoid crashing when users modify ``__dict__`` from + Python code. This usually means error- and type-checking at the C level, + which is easy to get wrong and hard to test sufficiently. + + However, if module state is not needed in C code, storing it in + ``__dict__`` only is a good idea. + +If the module state includes ``PyObject`` pointers, the module object +must hold references to those objects and implement the module-level hooks +``m_traverse``, ``m_clear`` and ``m_free``. These work like +``tp_traverse``, ``tp_clear`` and ``tp_free`` of a class. Adding them will +require some work and make the code longer; this is the price for +modules which can be unloaded cleanly. + +An example of a module with per-module state is currently available as +`xxlimited `__; +example module initialization shown at the bottom of the file. + + +Opt-Out: Limiting to One Module Object per Process +-------------------------------------------------- + +A non-negative ``PyModuleDef.m_size`` signals that a module supports +multiple interpreters correctly. If this is not yet the case for your +module, you can explicitly make your module loadable only once per +process. For example:: + + static int loaded = 0; + + static int + exec_module(PyObject* module) + { + if (loaded) { + PyErr_SetString(PyExc_ImportError, + "cannot load module more than once per process"); + return -1; + } + loaded = 1; + // ... rest of initialization + } + + +Module State Access from Functions +---------------------------------- + +Accessing the state from module-level functions is straightforward. +Functions get the module object as their first argument; for extracting +the state, you can use ``PyModule_GetState``:: + + static PyObject * + func(PyObject *module, PyObject *args) + { + my_struct *state = (my_struct*)PyModule_GetState(module); + if (state == NULL) { + return NULL; + } + // ... rest of logic + } + +.. note:: + ``PyModule_GetState`` may return ``NULL`` without setting an + exception if there is no module state, i.e. ``PyModuleDef.m_size`` was + zero. In your own module, you're in control of ``m_size``, so this is + easy to prevent. + + +Heap Types +========== + +Traditionally, types defined in C code are *static*; that is, +``static PyTypeObject`` structures defined directly in code and +initialized using ``PyType_Ready()``. + +Such types are necessarily shared across the process. Sharing them +between module objects requires paying attention to any state they own +or access. To limit the possible issues, static types are immutable at +the Python level: for example, you can't set ``str.myattribute = 123``. + +.. impl-detail:: + Sharing truly immutable objects between interpreters is fine, + as long as they don't provide access to mutable objects. + However, in CPython, every Python object has a mutable implementation + detail: the reference count. Changes to the refcount are guarded by the GIL. + Thus, code that shares any Python objects across interpreters implicitly + depends on CPython's current, process-wide GIL. + +Because they are immutable and process-global, static types cannot access +"their" module state. +If any method of such a type requires access to module state, +the type must be converted to a *heap-allocated type*, or *heap type* +for short. These correspond more closely to classes created by Python's +``class`` statement. + +For new modules, using heap types by default is a good rule of thumb. + + +Changing Static Types to Heap Types +----------------------------------- + +Static types can be converted to heap types, but note that +the heap type API was not designed for "lossless" conversion +from static types—that is, creating a type that works exactly like a given +static type. +So, when rewriting the class definition in a new API, +you are likely to unintentionally change a few details (e.g. pickleability +or inherited slots). +Always test the details that are important to you. + +Watch out for the following two points in particular (but note that this is not +a comprehensive list): + +* Unlike static types, heap type objects are mutable by default. + Use the :c:macro:`Py_TPFLAGS_IMMUTABLETYPE` flag to prevent mutability. +* Heap types inherit :c:member:`~PyTypeObject.tp_new` by default, + so it may become possible to instantiate them from Python code. + You can prevent this with the :c:macro:`Py_TPFLAGS_DISALLOW_INSTANTIATION` flag. + + +Defining Heap Types +------------------- + +Heap types can be created by filling a :c:struct:`PyType_Spec` structure, a +description or "blueprint" of a class, and calling +:c:func:`PyType_FromModuleAndSpec` to construct a new class object. + +.. note:: + Other functions, like :c:func:`PyType_FromSpec`, can also create + heap types, but :c:func:`PyType_FromModuleAndSpec` associates the module + with the class, allowing access to the module state from methods. + +The class should generally be stored in *both* the module state (for +safe access from C) and the module's ``__dict__`` (for access from +Python code). + + +Garbage-Collection Protocol +--------------------------- + +Instances of heap types hold a reference to their type. +This ensures that the type isn't destroyed before all its instances are, +but may result in reference cycles that need to be broken by the +garbage collector. + +To avoid memory leaks, instances of heap types must implement the +garbage collection protocol. +That is, heap types should: + +- Have the :c:macro:`Py_TPFLAGS_HAVE_GC` flag. +- Define a traverse function using ``Py_tp_traverse``, which + visits the type (e.g. using ``Py_VISIT(Py_TYPE(self))``). + +Please refer to the documentation of +:c:macro:`Py_TPFLAGS_HAVE_GC` and :c:member:`~PyTypeObject.tp_traverse` +for additional considerations. + +The API for defining heap types grew organically, leaving it +somewhat awkward to use in its current state. +The following sections will guide you through common issues. + + +``tp_traverse`` in Python 3.8 and lower +....................................... + +The requirement to visit the type from ``tp_traverse`` was added in Python 3.9. +If you support Python 3.8 and lower, the traverse function must *not* +visit the type, so it must be more complicated:: + + static int my_traverse(PyObject *self, visitproc visit, void *arg) + { + if (Py_Version >= 0x03090000) { + Py_VISIT(Py_TYPE(self)); + } + return 0; + } + +Unfortunately, :c:data:`Py_Version` was only added in Python 3.11. +As a replacement, use: + +* :c:macro:`PY_VERSION_HEX`, if not using the stable ABI, or +* :py:data:`sys.version_info` (via :c:func:`PySys_GetObject` and + :c:func:`PyArg_ParseTuple`). + + +Delegating ``tp_traverse`` +.......................... + +If your traverse function delegates to the :c:member:`~PyTypeObject.tp_traverse` +of its base class (or another type), ensure that ``Py_TYPE(self)`` is visited +only once. +Note that only heap type are expected to visit the type in ``tp_traverse``. + +For example, if your traverse function includes:: + + base->tp_traverse(self, visit, arg) + +...and ``base`` may be a static type, then it should also include:: + + if (base->tp_flags & Py_TPFLAGS_HEAPTYPE) { + // a heap type's tp_traverse already visited Py_TYPE(self) + } else { + if (Py_Version >= 0x03090000) { + Py_VISIT(Py_TYPE(self)); + } + } + +It is not necessary to handle the type's reference count in +:c:member:`~PyTypeObject.tp_new` and :c:member:`~PyTypeObject.tp_clear`. + + +Defining ``tp_dealloc`` +....................... + +If your type has a custom :c:member:`~PyTypeObject.tp_dealloc` function, +it needs to: + +- call :c:func:`PyObject_GC_UnTrack` before any fields are invalidated, and +- decrement the reference count of the type. + +To keep the type valid while ``tp_free`` is called, the type's refcount needs +to be decremented *after* the instance is deallocated. For example:: + + static void my_dealloc(PyObject *self) + { + PyObject_GC_UnTrack(self); + ... + PyTypeObject *type = Py_TYPE(self); + type->tp_free(self); + Py_DECREF(type); + } + +The default ``tp_dealloc`` function does this, so +if your type does *not* override +``tp_dealloc`` you don't need to add it. + + +Not overriding ``tp_free`` +.......................... + +The :c:member:`~PyTypeObject.tp_free` slot of a heap type must be set to +:c:func:`PyObject_GC_Del`. +This is the default; do not override it. + + +Avoiding ``PyObject_New`` +......................... + +GC-tracked objects need to be allocated using GC-aware functions. + +If you use use :c:func:`PyObject_New` or :c:func:`PyObject_NewVar`: + +- Get and call type's :c:member:`~PyTypeObject.tp_alloc` slot, if possible. + That is, replace ``TYPE *o = PyObject_New(TYPE, typeobj)`` with:: + + TYPE *o = typeobj->tp_alloc(typeobj, 0); + + Replace ``o = PyObject_NewVar(TYPE, typeobj, size)`` with the same, + but use size instead of the 0. + +- If the above is not possible (e.g. inside a custom ``tp_alloc``), + call :c:func:`PyObject_GC_New` or :c:func:`PyObject_GC_NewVar`:: + + TYPE *o = PyObject_GC_New(TYPE, typeobj); + + TYPE *o = PyObject_GC_NewVar(TYPE, typeobj, size); + + +Module State Access from Classes +-------------------------------- + +If you have a type object defined with :c:func:`PyType_FromModuleAndSpec`, +you can call :c:func:`PyType_GetModule` to get the associated module, and then +:c:func:`PyModule_GetState` to get the module's state. + +To save a some tedious error-handling boilerplate code, you can combine +these two steps with :c:func:`PyType_GetModuleState`, resulting in:: + + my_struct *state = (my_struct*)PyType_GetModuleState(type); + if (state == NULL) { + return NULL; + } + + +Module State Access from Regular Methods +---------------------------------------- + +Accessing the module-level state from methods of a class is somewhat more +complicated, but is possible thanks to API introduced in Python 3.9. +To get the state, you need to first get the *defining class*, and then +get the module state from it. + +The largest roadblock is getting *the class a method was defined in*, or +that method's "defining class" for short. The defining class can have a +reference to the module it is part of. + +Do not confuse the defining class with ``Py_TYPE(self)``. If the method +is called on a *subclass* of your type, ``Py_TYPE(self)`` will refer to +that subclass, which may be defined in different module than yours. + +.. note:: + The following Python code can illustrate the concept. + ``Base.get_defining_class`` returns ``Base`` even + if ``type(self) == Sub``: + + .. code-block:: python + + class Base: + def get_type_of_self(self): + return type(self) + + def get_defining_class(self): + return __class__ + + class Sub(Base): + pass + +For a method to get its "defining class", it must use the +:ref:`METH_METHOD | METH_FASTCALL | METH_KEYWORDS ` +:c:type:`calling convention ` +and the corresponding :c:type:`PyCMethod` signature:: + + PyObject *PyCMethod( + PyObject *self, // object the method was called on + PyTypeObject *defining_class, // defining class + PyObject *const *args, // C array of arguments + Py_ssize_t nargs, // length of "args" + PyObject *kwnames) // NULL, or dict of keyword arguments + +Once you have the defining class, call :c:func:`PyType_GetModuleState` to get +the state of its associated module. + +For example:: + + static PyObject * + example_method(PyObject *self, + PyTypeObject *defining_class, + PyObject *const *args, + Py_ssize_t nargs, + PyObject *kwnames) + { + my_struct *state = (my_struct*)PyType_GetModuleState(defining_class); + if (state == NULL) { + return NULL; + } + ... // rest of logic + } + + PyDoc_STRVAR(example_method_doc, "..."); + + static PyMethodDef my_methods[] = { + {"example_method", + (PyCFunction)(void(*)(void))example_method, + METH_METHOD|METH_FASTCALL|METH_KEYWORDS, + example_method_doc} + {NULL}, + } + + +Module State Access from Slot Methods, Getters and Setters +---------------------------------------------------------- + +.. note:: + + This is new in Python 3.11. + + .. After adding to limited API: + + If you use the :ref:`limited API `, + you must update ``Py_LIMITED_API`` to ``0x030b0000``, losing ABI + compatibility with earlier versions. + +Slot methods—the fast C equivalents for special methods, such as +:c:member:`~PyNumberMethods.nb_add` for :py:attr:`~object.__add__` or +:c:member:`~PyTypeObject.tp_new` for initialization—have a very simple API that +doesn't allow passing in the defining class, unlike with :c:type:`PyCMethod`. +The same goes for getters and setters defined with +:c:type:`PyGetSetDef`. + +To access the module state in these cases, use the +:c:func:`PyType_GetModuleByDef` function, and pass in the module definition. +Once you have the module, call :c:func:`PyModule_GetState` +to get the state:: + + PyObject *module = PyType_GetModuleByDef(Py_TYPE(self), &module_def); + my_struct *state = (my_struct*)PyModule_GetState(module); + if (state == NULL) { + return NULL; + } + +:c:func:`!PyType_GetModuleByDef` works by searching the +:term:`method resolution order` (i.e. all superclasses) for the first +superclass that has a corresponding module. + +.. note:: + + In very exotic cases (inheritance chains spanning multiple modules + created from the same definition), :c:func:`!PyType_GetModuleByDef` might not + return the module of the true defining class. However, it will always + return a module with the same definition, ensuring a compatible + C memory layout. + + +Lifetime of the Module State +---------------------------- + +When a module object is garbage-collected, its module state is freed. +For each pointer to (a part of) the module state, you must hold a reference +to the module object. + +Usually this is not an issue, because types created with +:c:func:`PyType_FromModuleAndSpec`, and their instances, hold a reference +to the module. +However, you must be careful in reference counting when you reference +module state from other places, such as callbacks for external +libraries. + + +Open Issues +=========== + +Several issues around per-module state and heap types are still open. + +Discussions about improving the situation are best held on the `capi-sig +mailing list `__. + + +Per-Class Scope +--------------- + +It is currently (as of Python 3.11) not possible to attach state to individual +*types* without relying on CPython implementation details (which may change +in the future—perhaps, ironically, to allow a proper solution for +per-class scope). + + +Lossless Conversion to Heap Types +--------------------------------- + +The heap type API was not designed for "lossless" conversion from static types; +that is, creating a type that works exactly like a given static type. diff --git a/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/license.rst b/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/license.rst new file mode 100644 index 00000000..674ac5f5 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/license.rst @@ -0,0 +1,1131 @@ +.. highlight:: none + +.. _history-and-license: + +******************* +History and License +******************* + + +History of the software +======================= + +Python was created in the early 1990s by Guido van Rossum at Stichting +Mathematisch Centrum (CWI, see https://www.cwi.nl/) in the Netherlands as a +successor of a language called ABC. Guido remains Python's principal author, +although it includes many contributions from others. + +In 1995, Guido continued his work on Python at the Corporation for National +Research Initiatives (CNRI, see https://www.cnri.reston.va.us/) in Reston, +Virginia where he released several versions of the software. + +In May 2000, Guido and the Python core development team moved to BeOpen.com to +form the BeOpen PythonLabs team. In October of the same year, the PythonLabs +team moved to Digital Creations (now Zope Corporation; see +https://www.zope.org/). In 2001, the Python Software Foundation (PSF, see +https://www.python.org/psf/) was formed, a non-profit organization created +specifically to own Python-related Intellectual Property. Zope Corporation is a +sponsoring member of the PSF. + +All Python releases are Open Source (see https://opensource.org/ for the Open +Source Definition). Historically, most, but not all, Python releases have also +been GPL-compatible; the table below summarizes the various releases. + ++----------------+--------------+------------+------------+-----------------+ +| Release | Derived from | Year | Owner | GPL compatible? | ++================+==============+============+============+=================+ +| 0.9.0 thru 1.2 | n/a | 1991-1995 | CWI | yes | ++----------------+--------------+------------+------------+-----------------+ +| 1.3 thru 1.5.2 | 1.2 | 1995-1999 | CNRI | yes | ++----------------+--------------+------------+------------+-----------------+ +| 1.6 | 1.5.2 | 2000 | CNRI | no | ++----------------+--------------+------------+------------+-----------------+ +| 2.0 | 1.6 | 2000 | BeOpen.com | no | ++----------------+--------------+------------+------------+-----------------+ +| 1.6.1 | 1.6 | 2001 | CNRI | no | ++----------------+--------------+------------+------------+-----------------+ +| 2.1 | 2.0+1.6.1 | 2001 | PSF | no | ++----------------+--------------+------------+------------+-----------------+ +| 2.0.1 | 2.0+1.6.1 | 2001 | PSF | yes | ++----------------+--------------+------------+------------+-----------------+ +| 2.1.1 | 2.1+2.0.1 | 2001 | PSF | yes | ++----------------+--------------+------------+------------+-----------------+ +| 2.1.2 | 2.1.1 | 2002 | PSF | yes | ++----------------+--------------+------------+------------+-----------------+ +| 2.1.3 | 2.1.2 | 2002 | PSF | yes | ++----------------+--------------+------------+------------+-----------------+ +| 2.2 and above | 2.1.1 | 2001-now | PSF | yes | ++----------------+--------------+------------+------------+-----------------+ + +.. note:: + + GPL-compatible doesn't mean that we're distributing Python under the GPL. All + Python licenses, unlike the GPL, let you distribute a modified version without + making your changes open source. The GPL-compatible licenses make it possible to + combine Python with other software that is released under the GPL; the others + don't. + +Thanks to the many outside volunteers who have worked under Guido's direction to +make these releases possible. + + +Terms and conditions for accessing or otherwise using Python +============================================================ + +Python software and documentation are licensed under the +:ref:`PSF License Agreement `. + +Starting with Python 3.8.6, examples, recipes, and other code in +the documentation are dual licensed under the PSF License Agreement +and the :ref:`Zero-Clause BSD license `. + +Some software incorporated into Python is under different licenses. +The licenses are listed with code falling under that license. +See :ref:`OtherLicenses` for an incomplete list of these licenses. + + +.. _PSF-license: + +PSF LICENSE AGREEMENT FOR PYTHON |release| +------------------------------------------ + +.. parsed-literal:: + + 1. This LICENSE AGREEMENT is between the Python Software Foundation ("PSF"), and + the Individual or Organization ("Licensee") accessing and otherwise using Python + |release| software in source or binary form and its associated documentation. + + 2. Subject to the terms and conditions of this License Agreement, PSF hereby + grants Licensee a nonexclusive, royalty-free, world-wide license to reproduce, + analyze, test, perform and/or display publicly, prepare derivative works, + distribute, and otherwise use Python |release| alone or in any derivative + version, provided, however, that PSF's License Agreement and PSF's notice of + copyright, i.e., "Copyright © 2001-2024 Python Software Foundation; All Rights + Reserved" are retained in Python |release| alone or in any derivative version + prepared by Licensee. + + 3. In the event Licensee prepares a derivative work that is based on or + incorporates Python |release| or any part thereof, and wants to make the + derivative work available to others as provided herein, then Licensee hereby + agrees to include in any such work a brief summary of the changes made to Python + |release|. + + 4. PSF is making Python |release| available to Licensee on an "AS IS" basis. + PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED. BY WAY OF + EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND DISCLAIMS ANY REPRESENTATION OR + WARRANTY OF MERCHANTABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT THE + USE OF PYTHON |release| WILL NOT INFRINGE ANY THIRD PARTY RIGHTS. + + 5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON |release| + FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS A RESULT OF + MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON |release|, OR ANY DERIVATIVE + THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. + + 6. This License Agreement will automatically terminate upon a material breach of + its terms and conditions. + + 7. Nothing in this License Agreement shall be deemed to create any relationship + of agency, partnership, or joint venture between PSF and Licensee. This License + Agreement does not grant permission to use PSF trademarks or trade name in a + trademark sense to endorse or promote products or services of Licensee, or any + third party. + + 8. By copying, installing or otherwise using Python |release|, Licensee agrees + to be bound by the terms and conditions of this License Agreement. + + +BEOPEN.COM LICENSE AGREEMENT FOR PYTHON 2.0 +------------------------------------------- + +BEOPEN PYTHON OPEN SOURCE LICENSE AGREEMENT VERSION 1 + +.. parsed-literal:: + + 1. This LICENSE AGREEMENT is between BeOpen.com ("BeOpen"), having an office at + 160 Saratoga Avenue, Santa Clara, CA 95051, and the Individual or Organization + ("Licensee") accessing and otherwise using this software in source or binary + form and its associated documentation ("the Software"). + + 2. Subject to the terms and conditions of this BeOpen Python License Agreement, + BeOpen hereby grants Licensee a non-exclusive, royalty-free, world-wide license + to reproduce, analyze, test, perform and/or display publicly, prepare derivative + works, distribute, and otherwise use the Software alone or in any derivative + version, provided, however, that the BeOpen Python License is retained in the + Software, alone or in any derivative version prepared by Licensee. + + 3. BeOpen is making the Software available to Licensee on an "AS IS" basis. + BEOPEN MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED. BY WAY OF + EXAMPLE, BUT NOT LIMITATION, BEOPEN MAKES NO AND DISCLAIMS ANY REPRESENTATION OR + WARRANTY OF MERCHANTABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT THE + USE OF THE SOFTWARE WILL NOT INFRINGE ANY THIRD PARTY RIGHTS. + + 4. BEOPEN SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF THE SOFTWARE FOR + ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS A RESULT OF USING, + MODIFYING OR DISTRIBUTING THE SOFTWARE, OR ANY DERIVATIVE THEREOF, EVEN IF + ADVISED OF THE POSSIBILITY THEREOF. + + 5. This License Agreement will automatically terminate upon a material breach of + its terms and conditions. + + 6. This License Agreement shall be governed by and interpreted in all respects + by the law of the State of California, excluding conflict of law provisions. + Nothing in this License Agreement shall be deemed to create any relationship of + agency, partnership, or joint venture between BeOpen and Licensee. This License + Agreement does not grant permission to use BeOpen trademarks or trade names in a + trademark sense to endorse or promote products or services of Licensee, or any + third party. As an exception, the "BeOpen Python" logos available at + http://www.pythonlabs.com/logos.html may be used according to the permissions + granted on that web page. + + 7. By copying, installing or otherwise using the software, Licensee agrees to be + bound by the terms and conditions of this License Agreement. + + +CNRI LICENSE AGREEMENT FOR PYTHON 1.6.1 +--------------------------------------- + +.. parsed-literal:: + + 1. This LICENSE AGREEMENT is between the Corporation for National Research + Initiatives, having an office at 1895 Preston White Drive, Reston, VA 20191 + ("CNRI"), and the Individual or Organization ("Licensee") accessing and + otherwise using Python 1.6.1 software in source or binary form and its + associated documentation. + + 2. Subject to the terms and conditions of this License Agreement, CNRI hereby + grants Licensee a nonexclusive, royalty-free, world-wide license to reproduce, + analyze, test, perform and/or display publicly, prepare derivative works, + distribute, and otherwise use Python 1.6.1 alone or in any derivative version, + provided, however, that CNRI's License Agreement and CNRI's notice of copyright, + i.e., "Copyright © 1995-2001 Corporation for National Research Initiatives; All + Rights Reserved" are retained in Python 1.6.1 alone or in any derivative version + prepared by Licensee. Alternately, in lieu of CNRI's License Agreement, + Licensee may substitute the following text (omitting the quotes): "Python 1.6.1 + is made available subject to the terms and conditions in CNRI's License + Agreement. This Agreement together with Python 1.6.1 may be located on the + internet using the following unique, persistent identifier (known as a handle): + 1895.22/1013. This Agreement may also be obtained from a proxy server on the + internet using the following URL: http://hdl.handle.net/1895.22/1013." + + 3. In the event Licensee prepares a derivative work that is based on or + incorporates Python 1.6.1 or any part thereof, and wants to make the derivative + work available to others as provided herein, then Licensee hereby agrees to + include in any such work a brief summary of the changes made to Python 1.6.1. + + 4. CNRI is making Python 1.6.1 available to Licensee on an "AS IS" basis. CNRI + MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED. BY WAY OF EXAMPLE, + BUT NOT LIMITATION, CNRI MAKES NO AND DISCLAIMS ANY REPRESENTATION OR WARRANTY + OF MERCHANTABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF + PYTHON 1.6.1 WILL NOT INFRINGE ANY THIRD PARTY RIGHTS. + + 5. CNRI SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON 1.6.1 FOR + ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS A RESULT OF + MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON 1.6.1, OR ANY DERIVATIVE + THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. + + 6. This License Agreement will automatically terminate upon a material breach of + its terms and conditions. + + 7. This License Agreement shall be governed by the federal intellectual property + law of the United States, including without limitation the federal copyright + law, and, to the extent such U.S. federal law does not apply, by the law of the + Commonwealth of Virginia, excluding Virginia's conflict of law provisions. + Notwithstanding the foregoing, with regard to derivative works based on Python + 1.6.1 that incorporate non-separable material that was previously distributed + under the GNU General Public License (GPL), the law of the Commonwealth of + Virginia shall govern this License Agreement only as to issues arising under or + with respect to Paragraphs 4, 5, and 7 of this License Agreement. Nothing in + this License Agreement shall be deemed to create any relationship of agency, + partnership, or joint venture between CNRI and Licensee. This License Agreement + does not grant permission to use CNRI trademarks or trade name in a trademark + sense to endorse or promote products or services of Licensee, or any third + party. + + 8. By clicking on the "ACCEPT" button where indicated, or by copying, installing + or otherwise using Python 1.6.1, Licensee agrees to be bound by the terms and + conditions of this License Agreement. + + +CWI LICENSE AGREEMENT FOR PYTHON 0.9.0 THROUGH 1.2 +-------------------------------------------------- + +.. parsed-literal:: + + Copyright © 1991 - 1995, Stichting Mathematisch Centrum Amsterdam, The + Netherlands. All rights reserved. + + Permission to use, copy, modify, and distribute this software and its + documentation for any purpose and without fee is hereby granted, provided that + the above copyright notice appear in all copies and that both that copyright + notice and this permission notice appear in supporting documentation, and that + the name of Stichting Mathematisch Centrum or CWI not be used in advertising or + publicity pertaining to distribution of the software without specific, written + prior permission. + + STICHTING MATHEMATISCH CENTRUM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS + SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO + EVENT SHALL STICHTING MATHEMATISCH CENTRUM BE LIABLE FOR ANY SPECIAL, INDIRECT + OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, + DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS + ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS + SOFTWARE. + + +.. _BSD0: + +ZERO-CLAUSE BSD LICENSE FOR CODE IN THE PYTHON |release| DOCUMENTATION +---------------------------------------------------------------------- + +.. parsed-literal:: + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted. + + THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH + REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY + AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, + INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM + LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR + OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR + PERFORMANCE OF THIS SOFTWARE. + + +.. _OtherLicenses: + +Licenses and Acknowledgements for Incorporated Software +======================================================= + +This section is an incomplete, but growing list of licenses and acknowledgements +for third-party software incorporated in the Python distribution. + + +Mersenne Twister +---------------- + +The :mod:`!_random` C extension underlying the :mod:`random` module +includes code based on a download from +http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/MT2002/emt19937ar.html. The following are +the verbatim comments from the original code:: + + A C-program for MT19937, with initialization improved 2002/1/26. + Coded by Takuji Nishimura and Makoto Matsumoto. + + Before using, initialize the state by using init_genrand(seed) + or init_by_array(init_key, key_length). + + Copyright (C) 1997 - 2002, Makoto Matsumoto and Takuji Nishimura, + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + 3. The names of its contributors may not be used to endorse or promote + products derived from this software without specific prior written + permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + + Any feedback is very welcome. + http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/emt.html + email: m-mat @ math.sci.hiroshima-u.ac.jp (remove space) + + +Sockets +------- + +The :mod:`socket` module uses the functions, :c:func:`!getaddrinfo`, and +:c:func:`!getnameinfo`, which are coded in separate source files from the WIDE +Project, https://www.wide.ad.jp/. :: + + Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. Neither the name of the project nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND + ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE + FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + SUCH DAMAGE. + + +Asynchronous socket services +---------------------------- + +The :mod:`!test.support.asynchat` and :mod:`!test.support.asyncore` +modules contain the following notice:: + + Copyright 1996 by Sam Rushing + + All Rights Reserved + + Permission to use, copy, modify, and distribute this software and + its documentation for any purpose and without fee is hereby + granted, provided that the above copyright notice appear in all + copies and that both that copyright notice and this permission + notice appear in supporting documentation, and that the name of Sam + Rushing not be used in advertising or publicity pertaining to + distribution of the software without specific, written prior + permission. + + SAM RUSHING DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, + INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN + NO EVENT SHALL SAM RUSHING BE LIABLE FOR ANY SPECIAL, INDIRECT OR + CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS + OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, + NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN + CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + + +Cookie management +----------------- + +The :mod:`http.cookies` module contains the following notice:: + + Copyright 2000 by Timothy O'Malley + + All Rights Reserved + + Permission to use, copy, modify, and distribute this software + and its documentation for any purpose and without fee is hereby + granted, provided that the above copyright notice appear in all + copies and that both that copyright notice and this permission + notice appear in supporting documentation, and that the name of + Timothy O'Malley not be used in advertising or publicity + pertaining to distribution of the software without specific, written + prior permission. + + Timothy O'Malley DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS + SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY + AND FITNESS, IN NO EVENT SHALL Timothy O'Malley BE LIABLE FOR + ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, + WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS + ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR + PERFORMANCE OF THIS SOFTWARE. + + +Execution tracing +----------------- + +The :mod:`trace` module contains the following notice:: + + portions copyright 2001, Autonomous Zones Industries, Inc., all rights... + err... reserved and offered to the public under the terms of the + Python 2.2 license. + Author: Zooko O'Whielacronx + http://zooko.com/ + mailto:zooko@zooko.com + + Copyright 2000, Mojam Media, Inc., all rights reserved. + Author: Skip Montanaro + + Copyright 1999, Bioreason, Inc., all rights reserved. + Author: Andrew Dalke + + Copyright 1995-1997, Automatrix, Inc., all rights reserved. + Author: Skip Montanaro + + Copyright 1991-1995, Stichting Mathematisch Centrum, all rights reserved. + + + Permission to use, copy, modify, and distribute this Python software and + its associated documentation for any purpose without fee is hereby + granted, provided that the above copyright notice appears in all copies, + and that both that copyright notice and this permission notice appear in + supporting documentation, and that the name of neither Automatrix, + Bioreason or Mojam Media be used in advertising or publicity pertaining to + distribution of the software without specific, written prior permission. + + +UUencode and UUdecode functions +------------------------------- + +The ``uu`` codec contains the following notice:: + + Copyright 1994 by Lance Ellinghouse + Cathedral City, California Republic, United States of America. + All Rights Reserved + Permission to use, copy, modify, and distribute this software and its + documentation for any purpose and without fee is hereby granted, + provided that the above copyright notice appear in all copies and that + both that copyright notice and this permission notice appear in + supporting documentation, and that the name of Lance Ellinghouse + not be used in advertising or publicity pertaining to distribution + of the software without specific, written prior permission. + LANCE ELLINGHOUSE DISCLAIMS ALL WARRANTIES WITH REGARD TO + THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND + FITNESS, IN NO EVENT SHALL LANCE ELLINGHOUSE CENTRUM BE LIABLE + FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT + OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + + Modified by Jack Jansen, CWI, July 1995: + - Use binascii module to do the actual line-by-line conversion + between ascii and binary. This results in a 1000-fold speedup. The C + version is still 5 times faster, though. + - Arguments more compliant with Python standard + + +XML Remote Procedure Calls +-------------------------- + +The :mod:`xmlrpc.client` module contains the following notice:: + + The XML-RPC client interface is + + Copyright (c) 1999-2002 by Secret Labs AB + Copyright (c) 1999-2002 by Fredrik Lundh + + By obtaining, using, and/or copying this software and/or its + associated documentation, you agree that you have read, understood, + and will comply with the following terms and conditions: + + Permission to use, copy, modify, and distribute this software and + its associated documentation for any purpose and without fee is + hereby granted, provided that the above copyright notice appears in + all copies, and that both that copyright notice and this permission + notice appear in supporting documentation, and that the name of + Secret Labs AB or the author not be used in advertising or publicity + pertaining to distribution of the software without specific, written + prior permission. + + SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD + TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT- + ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR + BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY + DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, + WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS + ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE + OF THIS SOFTWARE. + + +test_epoll +---------- + +The :mod:`!test.test_epoll` module contains the following notice:: + + Copyright (c) 2001-2006 Twisted Matrix Laboratories. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +Select kqueue +------------- + +The :mod:`select` module contains the following notice for the kqueue +interface:: + + Copyright (c) 2000 Doug White, 2006 James Knight, 2007 Christian Heimes + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + SUCH DAMAGE. + + +SipHash24 +--------- + +The file :file:`Python/pyhash.c` contains Marek Majkowski' implementation of +Dan Bernstein's SipHash24 algorithm. It contains the following note:: + + + Copyright (c) 2013 Marek Majkowski + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + + Original location: + https://github.com/majek/csiphash/ + + Solution inspired by code from: + Samuel Neves (supercop/crypto_auth/siphash24/little) + djb (supercop/crypto_auth/siphash24/little2) + Jean-Philippe Aumasson (https://131002.net/siphash/siphash24.c) + + +strtod and dtoa +--------------- + +The file :file:`Python/dtoa.c`, which supplies C functions dtoa and +strtod for conversion of C doubles to and from strings, is derived +from the file of the same name by David M. Gay, currently available +from https://web.archive.org/web/20220517033456/http://www.netlib.org/fp/dtoa.c. +The original file, as retrieved on March 16, 2009, contains the following +copyright and licensing notice:: + + /**************************************************************** + * + * The author of this software is David M. Gay. + * + * Copyright (c) 1991, 2000, 2001 by Lucent Technologies. + * + * Permission to use, copy, modify, and distribute this software for any + * purpose without fee is hereby granted, provided that this entire notice + * is included in all copies of any software which is or includes a copy + * or modification of this software and in all copies of the supporting + * documentation for such software. + * + * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHOR NOR LUCENT MAKES ANY + * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. + * + ***************************************************************/ + + +OpenSSL +------- + +The modules :mod:`hashlib`, :mod:`posix` and :mod:`ssl` use +the OpenSSL library for added performance if made available by the +operating system. Additionally, the Windows and macOS installers for +Python may include a copy of the OpenSSL libraries, so we include a copy +of the OpenSSL license here. For the OpenSSL 3.0 release, +and later releases derived from that, the Apache License v2 applies:: + + + Apache License + Version 2.0, January 2004 + https://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + +expat +----- + +The :mod:`pyexpat ` extension is built using an included copy of the expat +sources unless the build is configured ``--with-system-expat``:: + + Copyright (c) 1998, 1999, 2000 Thai Open Source Software Center Ltd + and Clark Cooper + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + +libffi +------ + +The :mod:`!_ctypes` C extension underlying the :mod:`ctypes` module +is built using an included copy of the libffi +sources unless the build is configured ``--with-system-libffi``:: + + Copyright (c) 1996-2008 Red Hat, Inc and others. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + ``Software''), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. + + +zlib +---- + +The :mod:`zlib` extension is built using an included copy of the zlib +sources if the zlib version found on the system is too old to be +used for the build:: + + Copyright (C) 1995-2011 Jean-loup Gailly and Mark Adler + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + + 3. This notice may not be removed or altered from any source distribution. + + Jean-loup Gailly Mark Adler + jloup@gzip.org madler@alumni.caltech.edu + + +cfuhash +------- + +The implementation of the hash table used by the :mod:`tracemalloc` is based +on the cfuhash project:: + + Copyright (c) 2005 Don Owens + All rights reserved. + + This code is released under the BSD license: + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + + * Neither the name of the author nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + OF THE POSSIBILITY OF SUCH DAMAGE. + + +libmpdec +-------- + +The :mod:`!_decimal` C extension underlying the :mod:`decimal` module +is built using an included copy of the libmpdec +library unless the build is configured ``--with-system-libmpdec``:: + + Copyright (c) 2008-2020 Stefan Krah. All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS" AND + ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + SUCH DAMAGE. + + +W3C C14N test suite +------------------- + +The C14N 2.0 test suite in the :mod:`test` package +(``Lib/test/xmltestdata/c14n-20/``) was retrieved from the W3C website at +https://www.w3.org/TR/xml-c14n2-testcases/ and is distributed under the +3-clause BSD license:: + + Copyright (c) 2013 W3C(R) (MIT, ERCIM, Keio, Beihang), + All Rights Reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + * Redistributions of works must retain the original copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the original copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the W3C nor the names of its contributors may be + used to endorse or promote products derived from this work without + specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +.. _mimalloc-license: + +mimalloc +-------- + +MIT License:: + + Copyright (c) 2018-2021 Microsoft Corporation, Daan Leijen + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in all + copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. + + +asyncio +---------- + +Parts of the :mod:`asyncio` module are incorporated from +`uvloop 0.16 `_, +which is distributed under the MIT license:: + + Copyright (c) 2015-2021 MagicStack Inc. http://magic.io + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + +Global Unbounded Sequences (GUS) +-------------------------------- + +The file :file:`Python/qsbr.c` is adapted from FreeBSD's "Global Unbounded +Sequences" safe memory reclamation scheme in +`subr_smr.c `_. +The file is distributed under the 2-Clause BSD License:: + + Copyright (c) 2019,2020 Jeffrey Roberson + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + 1. Redistributions of source code must retain the above copyright + notice unmodified, this list of conditions, and the following + disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/tools/extensions/pyspecific.py b/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/tools/extensions/pyspecific.py new file mode 100644 index 00000000..9b3256fa --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_sphinx/data/Doc/tools/extensions/pyspecific.py @@ -0,0 +1,212 @@ +import re + +from docutils import nodes +from docutils.parsers.rst import directives +from docutils.utils import unescape +from sphinx import addnodes +from sphinx.domains.changeset import VersionChange, versionlabels, versionlabel_classes +from sphinx.domains.python import PyFunction, PyMethod, PyModule +from sphinx.locale import _ as sphinx_gettext +from sphinx.util.docutils import SphinxDirective + + +ISSUE_URI = 'https://bugs.python.org/issue?@action=redirect&bpo=%s' +GH_ISSUE_URI = 'https://github.com/python/cpython/issues/%s' +# Used in conf.py and updated here by python/release-tools/run_release.py +SOURCE_URI = 'https://github.com/python/cpython/tree/3.13/%s' + +# monkey-patch reST parser to disable alphabetic and roman enumerated lists +from docutils.parsers.rst.states import Body +Body.enum.converters['loweralpha'] = \ + Body.enum.converters['upperalpha'] = \ + Body.enum.converters['lowerroman'] = \ + Body.enum.converters['upperroman'] = lambda x: None + +# monkey-patch the productionlist directive to allow hyphens in group names +# https://github.com/sphinx-doc/sphinx/issues/11854 +from sphinx.domains import std + +std.token_re = re.compile(r'`((~?[\w-]*:)?\w+)`') + +# backport :no-index: +PyModule.option_spec['no-index'] = directives.flag + + +# Support for marking up and linking to bugs.python.org issues + +def issue_role(typ, rawtext, text, lineno, inliner, options={}, content=[]): + issue = unescape(text) + # sanity check: there are no bpo issues within these two values + if 47261 < int(issue) < 400000: + msg = inliner.reporter.error(f'The BPO ID {text!r} seems too high -- ' + 'use :gh:`...` for GitHub IDs', line=lineno) + prb = inliner.problematic(rawtext, rawtext, msg) + return [prb], [msg] + text = 'bpo-' + issue + refnode = nodes.reference(text, text, refuri=ISSUE_URI % issue) + return [refnode], [] + + +# Support for marking up and linking to GitHub issues + +def gh_issue_role(typ, rawtext, text, lineno, inliner, options={}, content=[]): + issue = unescape(text) + # sanity check: all GitHub issues have ID >= 32426 + # even though some of them are also valid BPO IDs + if int(issue) < 32426: + msg = inliner.reporter.error(f'The GitHub ID {text!r} seems too low -- ' + 'use :issue:`...` for BPO IDs', line=lineno) + prb = inliner.problematic(rawtext, rawtext, msg) + return [prb], [msg] + text = 'gh-' + issue + refnode = nodes.reference(text, text, refuri=GH_ISSUE_URI % issue) + return [refnode], [] + + +# Support for marking up implementation details + +class ImplementationDetail(SphinxDirective): + + has_content = True + final_argument_whitespace = True + + # This text is copied to templates/dummy.html + label_text = sphinx_gettext('CPython implementation detail:') + + def run(self): + self.assert_has_content() + pnode = nodes.compound(classes=['impl-detail']) + content = self.content + add_text = nodes.strong(self.label_text, self.label_text) + self.state.nested_parse(content, self.content_offset, pnode) + content = nodes.inline(pnode[0].rawsource, translatable=True) + content.source = pnode[0].source + content.line = pnode[0].line + content += pnode[0].children + pnode[0].replace_self(nodes.paragraph( + '', '', add_text, nodes.Text(' '), content, translatable=False)) + return [pnode] + + +# Support for documenting decorators + +class PyDecoratorMixin(object): + def handle_signature(self, sig, signode): + ret = super(PyDecoratorMixin, self).handle_signature(sig, signode) + signode.insert(0, addnodes.desc_addname('@', '@')) + return ret + + def needs_arglist(self): + return False + + +class PyDecoratorFunction(PyDecoratorMixin, PyFunction): + def run(self): + # a decorator function is a function after all + self.name = 'py:function' + return PyFunction.run(self) + + +# TODO: Use sphinx.domains.python.PyDecoratorMethod when possible +class PyDecoratorMethod(PyDecoratorMixin, PyMethod): + def run(self): + self.name = 'py:method' + return PyMethod.run(self) + + +class PyCoroutineMixin(object): + def handle_signature(self, sig, signode): + ret = super(PyCoroutineMixin, self).handle_signature(sig, signode) + signode.insert(0, addnodes.desc_annotation('coroutine ', 'coroutine ')) + return ret + + +class PyAwaitableMixin(object): + def handle_signature(self, sig, signode): + ret = super(PyAwaitableMixin, self).handle_signature(sig, signode) + signode.insert(0, addnodes.desc_annotation('awaitable ', 'awaitable ')) + return ret + + +class PyCoroutineFunction(PyCoroutineMixin, PyFunction): + def run(self): + self.name = 'py:function' + return PyFunction.run(self) + + +class PyCoroutineMethod(PyCoroutineMixin, PyMethod): + def run(self): + self.name = 'py:method' + return PyMethod.run(self) + + +class PyAwaitableFunction(PyAwaitableMixin, PyFunction): + def run(self): + self.name = 'py:function' + return PyFunction.run(self) + + +class PyAwaitableMethod(PyAwaitableMixin, PyMethod): + def run(self): + self.name = 'py:method' + return PyMethod.run(self) + + +class PyAbstractMethod(PyMethod): + + def handle_signature(self, sig, signode): + ret = super(PyAbstractMethod, self).handle_signature(sig, signode) + signode.insert(0, addnodes.desc_annotation('abstractmethod ', + 'abstractmethod ')) + return ret + + def run(self): + self.name = 'py:method' + return PyMethod.run(self) + + +# Support for documenting version of removal in deprecations + +class DeprecatedRemoved(VersionChange): + required_arguments = 2 + + _deprecated_label = sphinx_gettext('Deprecated since version %s, will be removed in version %s') + _removed_label = sphinx_gettext('Deprecated since version %s, removed in version %s') + + def run(self): + # Replace the first two arguments (deprecated version and removed version) + # with a single tuple of both versions. + version_deprecated = self.arguments[0] + version_removed = self.arguments.pop(1) + self.arguments[0] = version_deprecated, version_removed + + # Set the label based on if we have reached the removal version + current_version = tuple(map(int, self.config.version.split('.'))) + removed_version = tuple(map(int, version_removed.split('.'))) + if current_version < removed_version: + versionlabels[self.name] = self._deprecated_label + versionlabel_classes[self.name] = 'deprecated' + else: + versionlabels[self.name] = self._removed_label + versionlabel_classes[self.name] = 'removed' + try: + return super().run() + finally: + # reset versionlabels and versionlabel_classes + versionlabels[self.name] = '' + versionlabel_classes[self.name] = '' + + +def setup(app): + app.add_role('issue', issue_role) + app.add_role('gh', gh_issue_role) + app.add_directive('impl-detail', ImplementationDetail) + app.add_directive('deprecated-removed', DeprecatedRemoved) + app.add_directive_to_domain('py', 'decorator', PyDecoratorFunction) + app.add_directive_to_domain('py', 'decoratormethod', PyDecoratorMethod) + app.add_directive_to_domain('py', 'coroutinefunction', PyCoroutineFunction) + app.add_directive_to_domain('py', 'coroutinemethod', PyCoroutineMethod) + app.add_directive_to_domain('py', 'awaitablefunction', PyAwaitableFunction) + app.add_directive_to_domain('py', 'awaitablemethod', PyAwaitableMethod) + app.add_directive_to_domain('py', 'abstractmethod', PyAbstractMethod) + return {'version': '1.0', 'parallel_read_safe': True} diff --git a/pyperformance/data-files/benchmarks/bm_sphinx/pyproject.toml b/pyperformance/data-files/benchmarks/bm_sphinx/pyproject.toml new file mode 100644 index 00000000..743d6553 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_sphinx/pyproject.toml @@ -0,0 +1,13 @@ +[project] +name = "pyperformance_bm_sphinx" +requires-python = ">=3.11" +dependencies = [ + "pyperf", + "sphinx", +] +urls.repository = "https://github.com/python/pyperformance" +dynamic = ["version"] + +[tool.pyperformance] +name = "sphinx" +tags = "apps" diff --git a/pyperformance/data-files/benchmarks/bm_sphinx/requirements.txt b/pyperformance/data-files/benchmarks/bm_sphinx/requirements.txt new file mode 100644 index 00000000..a866afda --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_sphinx/requirements.txt @@ -0,0 +1,2 @@ +sphinx==7.3.7 +python-docs-theme==2024.6 diff --git a/pyperformance/data-files/benchmarks/bm_sphinx/run_benchmark.py b/pyperformance/data-files/benchmarks/bm_sphinx/run_benchmark.py new file mode 100644 index 00000000..1ffc1932 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_sphinx/run_benchmark.py @@ -0,0 +1,127 @@ +""" +Build a subset of Python's documentation using Sphinx +""" + +import io +import os +from pathlib import Path +import shutil + +import pyperf +from sphinx.cmd.build import main as sphinx_main + + +# Sphinx performs a lot of filesystem I/O when it operates. This can cause the +# results to be highly variable. Instead, we pre-load all of the source files +# and then monkeypatch "open" so that Sphinx is reading from in-memory +# `io.BytesIO` and `io.StringIO` objects. + + +DOC_ROOT = (Path(__file__).parent / "data" / "Doc").resolve() + + +_orig_open = open + + +preloaded_files = {} + + +def read_all_files(): + for filename in DOC_ROOT.glob("**/*"): + if filename.is_file(): + preloaded_files[str(filename)] = filename.read_bytes() + + +def open( + file, + mode="r", + buffering=-1, + encoding=None, + errors=None, + newline=None, + closefd=True, + opener=None, +): + if isinstance(file, Path): + file = str(file) + + if isinstance(file, str): + if "r" in mode and file in preloaded_files: + if "b" in mode: + return io.BytesIO(preloaded_files[file]) + else: + return io.StringIO(preloaded_files[file].decode(encoding or "utf-8")) + elif "w" in mode and DOC_ROOT in Path(file).parents: + if "b" in mode: + newfile = io.BytesIO() + else: + newfile = io.StringIO() + preloaded_files[file] = newfile + return newfile + + return _orig_open( + file, + mode=mode, + buffering=buffering, + encoding=encoding, + errors=errors, + newline=newline, + closefd=closefd, + opener=opener, + ) + + +__builtins__.open = open + + +def replace(src, dst): + pass + + +os.replace = replace + + +def build_doc(doc_root): + # Make sure there is no caching going on + t0 = pyperf.perf_counter() + sphinx_main( + [ + "--builder", + "dummy", + "--doctree-dir", + str(doc_root / "build" / "doctrees"), + "--jobs", + "1", + "--silent", + "--fresh-env", + "--write-all", + str(doc_root), + str(doc_root / "build" / "html"), + ] + ) + return pyperf.perf_counter() - t0 + + +def bench_sphinx(loops, doc_root): + if (DOC_ROOT / "build").is_dir(): + shutil.rmtree(DOC_ROOT / "build") + read_all_files() + + runs_total = 0 + for _ in range(loops): + runs_total += build_doc(doc_root) + if (DOC_ROOT / "build").is_dir(): + shutil.rmtree(DOC_ROOT / "build") + + return runs_total + + +if __name__ == "__main__": + runner = pyperf.Runner() + + runner.metadata["description"] = ( + "Render documentation with Sphinx, like the CPython docs" + ) + args = runner.parse_args() + + runner.bench_time_func("sphinx", bench_sphinx, DOC_ROOT)